Skip to content

Commit 7094f28

Browse files
author
Gerit Wagner
committed
test cases and code changes
1 parent c9c6890 commit 7094f28

File tree

3 files changed

+79
-1
lines changed

3 files changed

+79
-1
lines changed

bib_dedupe/sim.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,7 @@ def sim_title(title_1: str, title_2: str, debug: bool = False) -> float:
126126
t1 = str(title_1)
127127
t2 = str(title_2)
128128

129-
if t1 == "" and t2 == "":
129+
if t1 in ["", "book review"] or t2 in ["", "book review"]:
130130
return 0.0
131131

132132
if t1.replace(" ", "") == t2.replace(" ", "") and t1.replace(" ", "") != "":
@@ -140,6 +140,10 @@ def sim_title(title_1: str, title_2: str, debug: bool = False) -> float:
140140
"comment",
141141
"response",
142142
"reply",
143+
"update",
144+
"forum",
145+
"proposed",
146+
"talk",
143147
]
144148
]
145149
):

tests/sim_test.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,13 @@ def test_sim_container_title(
149149
"open label, multi center clinical trial eculizumab adult patients atypical hemolytic uremic syndrome",
150150
0.0,
151151
),
152+
(
153+
"negotiation database schema integration",
154+
"",
155+
0.0,
156+
),
157+
("proposed study commitment virtual teams", "commitment virtual teams", 0.0),
158+
("task force report", "update of the task force report", 0.0),
152159
# ("cardiac vascular remodelling effect antihypertensive agents",
153160
# "session 2 cardiac vascular remodelling effect antihypertensive agents",
154161
# 1.0),

tests/test_cases.json

Lines changed: 67 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -553,6 +553,73 @@
553553
"pages": "1--17"
554554
},
555555
"expected_duplicate": true
556+
},
557+
{
558+
"id": "ejis_1995_book_review_unknown_vs_specific_book_review",
559+
"note": "Book review records: one generic ('Book review' with UNKNOWN author), one specific book review -> should not match without extra book-title logic.",
560+
"record_a": {
561+
"ENTRYTYPE": "article",
562+
"ID": "1",
563+
"author": "UNKNOWN",
564+
"title": "Book review",
565+
"journal": "European Journal of Information Systems",
566+
"volume": "4",
567+
"number": "2",
568+
"year": "1995"
569+
},
570+
"record_b": {
571+
"ENTRYTYPE": "article",
572+
"ID": "2",
573+
"author": "Hammer, Michael and Champy, James",
574+
"title": "Book review: Edited by TONY CORNFORD Reengineering the Corporation: A Manifesto for Business Revolution",
575+
"journal": "European Journal of Information Systems",
576+
"volume": "4",
577+
"number": "2",
578+
"year": "1995"
579+
},
580+
"expected_duplicate": false
581+
},
582+
{
583+
"id": "sekar_2024_literature_review_vs_erf_paper",
584+
"note": "LR vs ERF paper; related topic but different titles and authorship -> must NOT match.",
585+
"record_a": {
586+
"ENTRYTYPE": "inproceedings",
587+
"ID": "sekar_tech_noteboom_2024_lit_review",
588+
"author": "Sekar, Aravindh and Tech, Deb and Noteboom, Cherie",
589+
"year": "2024",
590+
"title": "Exploring the Impact of Blockchain Integration on Inventory Accuracy and Supply Chain Efficiency -A Literature Review",
591+
"booktitle": "Americas Conference on Information Systems"
592+
},
593+
"record_b": {
594+
"ENTRYTYPE": "inproceedings",
595+
"ID": "sekar_tech_2024_erf_paper",
596+
"author": "Sekar, Aravindh and Tech, Deb",
597+
"year": "2024",
598+
"title": "Exploratory Study on the Impact of Blockchain Adoption on Inventory Accuracy and Supply Chain Efficiency Emergent Research Forum (ERF) Paper",
599+
"booktitle": "Americas Conference on Information Systems"
600+
},
601+
"expected_duplicate": false
602+
},
603+
{
604+
"id": "powell_1999_two_distinct_pdfs_similar_authors_year",
605+
"note": "Same author/year/venue but different titles -> must NOT match.",
606+
"record_a": {
607+
"ENTRYTYPE": "inproceedings",
608+
"ID": "powell_1999_a_proposed_study",
609+
"author": "Powell, Anne L",
610+
"year": "1999",
611+
"title": "A Proposed Study on Commitment in Virtual Teams",
612+
"booktitle": "Americas Conference on Information Systems"
613+
},
614+
"record_b": {
615+
"ENTRYTYPE": "inproceedings",
616+
"ID": "powell_1999_commitment_virtual_team",
617+
"author": "Powell, Anne L",
618+
"year": "1999",
619+
"title": "Commitment in a Virtual Team",
620+
"booktitle": "Americas Conference on Information Systems"
621+
},
622+
"expected_duplicate": false
556623
}
557624
]
558625
}

0 commit comments

Comments
 (0)