Skip to content

Commit 4c5d883

Browse files
author
Gerit Wagner
committed
extend match-conditions
1 parent 4c95d0c commit 4c5d883

File tree

1 file changed

+14
-0
lines changed

1 file changed

+14
-0
lines changed

bib_dedupe/match_conditions.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ def both_entrytypes(entrytype: str) -> str:
8383
# no CONTAINER_TITLE
8484
f"({au10_ti10_ctNC} & {match(VOLUME, YEAR)} & {non_contradicting(NUMBER, PAGES, DOI, ABSTRACT)})",
8585
f"({au10_ti10_ctNC} & {match(YEAR, DOI)} & {non_contradicting(VOLUME, NUMBER, PAGES, ABSTRACT)})", # GROBID
86+
f"({au10_ti10_ctNC} & {match(YEAR)} & {non_contradicting(VOLUME, NUMBER, PAGES, DOI, ABSTRACT)})", # Missing fields
8687
f"({au09_ti09_ctXX} & {match(PAGES, DOI)} & {non_contradicting(VOLUME, NUMBER, ABSTRACT)} & {YEAR} > 0.9)",
8788
f"({au09_ti09_ctXX} & ({match(NUMBER)} & {non_contradicting(PAGES)} | {non_contradicting(NUMBER)} & {match(PAGES)}) & {non_contradicting(VOLUME, YEAR, DOI, ABSTRACT)})",
8889
f"({au09_ti09_ctXX} & {match(VOLUME, PAGES)})",
@@ -98,4 +99,17 @@ def both_entrytypes(entrytype: str) -> str:
9899
f"({mismatch(VOLUME, NUMBER, PAGES)})",
99100
# Editorials: minor differences in volume/number/pages can be meaningful
100101
f'(title_1.str.contains("editor") & title_1.str.len() < 60 & ( {mismatch(VOLUME)} | {mismatch(NUMBER)} | {mismatch(PAGES)}))',
102+
103+
# Journal vs. conference/workshop: same title/authors/year but different venue type
104+
f'({CONTAINER_TITLE}_1.str.contains("j") & '
105+
f' ({CONTAINER_TITLE}_2.str.contains("conf") '
106+
f' | {CONTAINER_TITLE}_2.str.contains("work") '
107+
f' | {CONTAINER_TITLE}_2.str.contains("proc"))) ',
108+
109+
f'({CONTAINER_TITLE}_2.str.contains("j") & '
110+
f' ({CONTAINER_TITLE}_1.str.contains("conf") '
111+
f' | {CONTAINER_TITLE}_1.str.contains("work") '
112+
f' | {CONTAINER_TITLE}_1.str.contains("proc")))'
113+
114+
101115
]

0 commit comments

Comments
 (0)