Register
Login
Resources
Docs Blog Datasets Glossary Case Studies Tutorials & Webinars
Product
Data Engine LLMs Platform Enterprise
Pricing Explore
Connect to our Discord channel

same_if_short_relative_edit_distance.py 1.1 KB

You have to be logged in to leave a comment. Sign In
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
  1. from typing import Optional, Tuple
  2. import Levenshtein
  3. from bohrapi.artifacts.identity import Identity
  4. from bohrapi.core import Heuristic
  5. from bohrlabels.core import OneOrManyLabels
  6. from bohrlabels.labels import MatchLabel
  7. @Heuristic(Identity, Identity)
  8. def same_if_short_relative_edit_distance(
  9. identities: Tuple[Identity, Identity]
  10. ) -> Optional[OneOrManyLabels]:
  11. """
  12. >>> same_if_same_names((Identity({"names": ["Hlib Babii"]}), Identity({"names": ["Hlib Babiy"]})))
  13. MatchLabel.Match
  14. >>> same_if_same_names((Identity({"names": ["Hlib Babii"]}), Identity({"names": ["Andrew Babii"]})))
  15. MatchLabel.NoMatch
  16. >>> same_if_same_names((Identity({}), Identity({}))) is None
  17. True
  18. """
  19. name1 = identities[0].name
  20. name2 = identities[1].name
  21. if name1 is not None and name2 is not None:
  22. distance = Levenshtein.distance(name1, name2)
  23. max_length = max(len(name1), len(name2))
  24. return (
  25. MatchLabel.Match
  26. if (max_length - distance) / max_length >= 0.8
  27. else MatchLabel.NoMatch
  28. )
Tip!

Press p or to see the previous file or, n or to see the next file

Comments

Loading...