File tree Expand file tree Collapse file tree 2 files changed +14
-3
lines changed
Expand file tree Collapse file tree 2 files changed +14
-3
lines changed Original file line number Diff line number Diff line change 77
88import bib_dedupe .match_conditions
99import bib_dedupe .sim
10- import bib_dedupe .util
1110from bib_dedupe import verbose_print
1211from bib_dedupe .constants .colors import END
1312from bib_dedupe .constants .colors import GREEN
Original file line number Diff line number Diff line change @@ -23,9 +23,21 @@ def mismatch(*keys: str) -> str:
2323
2424
2525def match (* args : str , threshold : float = 1.0 ) -> str :
26+ """
27+ Build a pandas-query expression requiring:
28+ 1) similarity meets the threshold, and
29+ 2) BOTH raw fields (<arg>_1 and <arg>_2) are non-empty.
30+
31+ Assumes columns like: doi + doi_1 + doi_2, title + title_1 + title_2, ...
32+ """
2633 if threshold == 1.0 :
27- return "&" .join (f" ({ arg } == { threshold } ) " for arg in args )
28- return "&" .join (f" ({ arg } > { threshold } ) " for arg in args )
34+ sim_expr = " & " .join (f" ({ arg } == 1.0) " for arg in args )
35+ else :
36+ sim_expr = " & " .join (f" ({ arg } > { threshold } ) " for arg in args )
37+
38+ non_empty_expr = " & " .join (f" ({ arg } _1 != '' & { arg } _2 != '') " for arg in args )
39+
40+ return f"({ sim_expr } ) & ({ non_empty_expr } )"
2941
3042
3143def non_contradicting (* keys : str ) -> str :
You can’t perform that action at this time.
0 commit comments