@@ -672,7 +672,7 @@ def _compute_common_phrase_percent(prev_citation, new_citation, characters_to_re
672672 ((int, int)|None): if either citation is None, then return None, else the percentage of common to uncommon phrase length for each citation.
673673 """
674674 if prev_citation and new_citation :
675- citation_strip_regex = "|" .join ([f"\{ char } " for char in characters_to_remove ])
675+ citation_strip_regex = "|" .join ([f"\\ { char } " for char in characters_to_remove ])
676676 # citation_strip_regex = r"\.|,|;|\(|\)|\[|\]|\{|\}"
677677 stripped_prev_citation = re .sub (citation_strip_regex , "" , prev_citation .lower ())
678678 stripped_new_citation = re .sub (citation_strip_regex , "" , new_citation .lower ())
@@ -685,8 +685,17 @@ def _compute_common_phrase_percent(prev_citation, new_citation, characters_to_re
685685 prev_citation_common_phrases_removed = prev_citation_common_phrases_removed .replace (phrase .strip (), "" )
686686 new_citation_common_phrases_removed = new_citation_common_phrases_removed .replace (phrase .strip (), "" )
687687 common_base_string = "" .join (common_subphrases )
688- prev_common_percentage = len (common_base_string ) / len (common_base_string + prev_citation_common_phrases_removed .strip ()) * 100
689- new_common_percentage = len (common_base_string ) / len (common_base_string + new_citation_common_phrases_removed .strip ()) * 100
688+
689+ prev_common_denom = len (common_base_string + prev_citation_common_phrases_removed .strip ())
690+ new_common_denom = len (common_base_string + new_citation_common_phrases_removed .strip ())
691+
692+ if prev_common_denom == 0 or new_common_denom == 0 :
693+ print (f"WARN: { prev_citation } vs. { new_citation } similarity divide by zero" )
694+ print (f"\t returning None" )
695+ return None
696+
697+ prev_common_percentage = len (common_base_string ) / prev_common_denom * 100
698+ new_common_percentage = len (common_base_string ) / new_common_denom * 100
690699
691700 return prev_common_percentage , new_common_percentage
692701 else :
0 commit comments