@@ -83,6 +83,7 @@ def both_entrytypes(entrytype: str) -> str:
8383 # no CONTAINER_TITLE
8484 f"({ au10_ti10_ctNC } & { match (VOLUME , YEAR )} & { non_contradicting (NUMBER , PAGES , DOI , ABSTRACT )} )" ,
8585 f"({ au10_ti10_ctNC } & { match (YEAR , DOI )} & { non_contradicting (VOLUME , NUMBER , PAGES , ABSTRACT )} )" , # GROBID
86+ f"({ au10_ti10_ctNC } & { match (YEAR )} & { non_contradicting (VOLUME , NUMBER , PAGES , DOI , ABSTRACT )} )" , # Missing fields
8687 f"({ au09_ti09_ctXX } & { match (PAGES , DOI )} & { non_contradicting (VOLUME , NUMBER , ABSTRACT )} & { YEAR } > 0.9)" ,
8788 f"({ au09_ti09_ctXX } & ({ match (NUMBER )} & { non_contradicting (PAGES )} | { non_contradicting (NUMBER )} & { match (PAGES )} ) & { non_contradicting (VOLUME , YEAR , DOI , ABSTRACT )} )" ,
8889 f"({ au09_ti09_ctXX } & { match (VOLUME , PAGES )} )" ,
@@ -98,4 +99,17 @@ def both_entrytypes(entrytype: str) -> str:
9899 f"({ mismatch (VOLUME , NUMBER , PAGES )} )" ,
99100 # Editorials: minor differences in volume/number/pages can be meaningful
100101 f'(title_1.str.contains("editor") & title_1.str.len() < 60 & ( { mismatch (VOLUME )} | { mismatch (NUMBER )} | { mismatch (PAGES )} ))' ,
102+
103+ # Journal vs. conference/workshop: same title/authors/year but different venue type
104+ f'({ CONTAINER_TITLE } _1.str.contains("j") & '
105+ f' ({ CONTAINER_TITLE } _2.str.contains("conf") '
106+ f' | { CONTAINER_TITLE } _2.str.contains("work") '
107+ f' | { CONTAINER_TITLE } _2.str.contains("proc"))) ' ,
108+
109+ f'({ CONTAINER_TITLE } _2.str.contains("j") & '
110+ f' ({ CONTAINER_TITLE } _1.str.contains("conf") '
111+ f' | { CONTAINER_TITLE } _1.str.contains("work") '
112+ f' | { CONTAINER_TITLE } _1.str.contains("proc")))'
113+
114+
101115]
0 commit comments