Skip to content

Commit 154c4bf

Browse files
committed
prevent div by zero in cit common phase %; comparing to none?
1 parent aeec310 commit 154c4bf

File tree

1 file changed

+12
-3
lines changed

1 file changed

+12
-3
lines changed

src/academic_tracker/helper_functions.py

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -672,7 +672,7 @@ def _compute_common_phrase_percent(prev_citation, new_citation, characters_to_re
672672
((int, int)|None): if either citation is None, then return None, else the percentage of common to uncommon phrase length for each citation.
673673
"""
674674
if prev_citation and new_citation:
675-
citation_strip_regex = "|".join([f"\{char}" for char in characters_to_remove])
675+
citation_strip_regex = "|".join([f"\\{char}" for char in characters_to_remove])
676676
# citation_strip_regex = r"\.|,|;|\(|\)|\[|\]|\{|\}"
677677
stripped_prev_citation = re.sub(citation_strip_regex, "", prev_citation.lower())
678678
stripped_new_citation = re.sub(citation_strip_regex, "", new_citation.lower())
@@ -685,8 +685,17 @@ def _compute_common_phrase_percent(prev_citation, new_citation, characters_to_re
685685
prev_citation_common_phrases_removed = prev_citation_common_phrases_removed.replace(phrase.strip(), "")
686686
new_citation_common_phrases_removed = new_citation_common_phrases_removed.replace(phrase.strip(), "")
687687
common_base_string = "".join(common_subphrases)
688-
prev_common_percentage = len(common_base_string) / len(common_base_string + prev_citation_common_phrases_removed.strip()) * 100
689-
new_common_percentage = len(common_base_string) / len(common_base_string + new_citation_common_phrases_removed.strip()) * 100
688+
689+
prev_common_denom = len(common_base_string + prev_citation_common_phrases_removed.strip())
690+
new_common_denom = len(common_base_string + new_citation_common_phrases_removed.strip())
691+
692+
if prev_common_denom == 0 or new_common_denom == 0:
693+
print(f"WARN: {prev_citation} vs. {new_citation} similarity divide by zero")
694+
print(f"\treturning None")
695+
return None
696+
697+
prev_common_percentage = len(common_base_string) / prev_common_denom * 100
698+
new_common_percentage = len(common_base_string) / new_common_denom * 100
690699

691700
return prev_common_percentage, new_common_percentage
692701
else:

0 commit comments

Comments
 (0)