5555TRACE_FILTER_BELOW_MIN_SCORE = False
5656TRACE_FILTER_SINGLE_WORD_GIBBERISH = False
5757TRACE_SET_LINES = False
58- TRACE_KEY_PHRASES = False
58+ TRACE_REQUIRED_PHRASES = False
5959TRACE_REGIONS = False
6060TRACE_FILTER_LICENSE_LIST = False
6161TRACE_FILTER_LICENSE_LIST_DETAILED = False
@@ -91,7 +91,7 @@ def logger_debug(*args): pass
9191 or TRACE_MATCHED_TEXT_DETAILS
9292 or TRACE_HIGHLIGHTED_TEXT
9393 or TRACE_FILTER_SINGLE_WORD_GIBBERISH
94- or TRACE_KEY_PHRASES
94+ or TRACE_REQUIRED_PHRASES
9595 or TRACE_REGIONS
9696 or TRACE_FILTER_LICENSE_LIST
9797 or TRACE_FILTER_LICENSE_LIST_DETAILED
@@ -133,7 +133,7 @@ def _debug_print_matched_query_text(match, extras=5):
133133
134134class DiscardReason (IntEnum ):
135135 NOT_DISCARDED = 0
136- MISSING_KEY_PHRASES = 1
136+ MISSING_REQUIRED_PHRASES = 1
137137 BELOW_MIN_COVERAGE = 2
138138 SPURIOUS_SINGLE_TOKEN = 3
139139 TOO_SHORT = 4
@@ -634,15 +634,15 @@ def combine(self, other):
634634 discard_reason = DiscardReason .NOT_DISCARDED
635635
636636 elif (
637- self .discard_reason == DiscardReason .MISSING_KEY_PHRASES
638- and other .discard_reason == DiscardReason .MISSING_KEY_PHRASES
637+ self .discard_reason == DiscardReason .MISSING_REQUIRED_PHRASES
638+ and other .discard_reason == DiscardReason .MISSING_REQUIRED_PHRASES
639639 ):
640- discard_reason = DiscardReason .MISSING_KEY_PHRASES
640+ discard_reason = DiscardReason .MISSING_REQUIRED_PHRASES
641641
642- elif self .discard_reason == DiscardReason .MISSING_KEY_PHRASES :
642+ elif self .discard_reason == DiscardReason .MISSING_REQUIRED_PHRASES :
643643 discard_reason = other .discard_reason
644644
645- elif other .discard_reason == DiscardReason .MISSING_KEY_PHRASES :
645+ elif other .discard_reason == DiscardReason .MISSING_REQUIRED_PHRASES :
646646 discard_reason = self .discard_reason
647647
648648 else :
@@ -2116,17 +2116,17 @@ def filter_false_positive_matches(
21162116 return kept , discarded
21172117
21182118
2119- def filter_matches_missing_key_phrases (
2119+ def filter_matches_missing_required_phrases (
21202120 matches ,
2121- trace = TRACE_KEY_PHRASES ,
2122- reason = DiscardReason .MISSING_KEY_PHRASES ,
2121+ trace = TRACE_REQUIRED_PHRASES ,
2122+ reason = DiscardReason .MISSING_REQUIRED_PHRASES ,
21232123):
21242124 """
21252125 Return a filtered list of kept LicenseMatch matches and a list of
21262126 discardable matches given a ``matches`` list of LicenseMatch by removing
2127- all ``matches`` that do not contain all key phrases defined in their matched
2127+ all ``matches`` that do not contain all required phrases defined in their matched
21282128 rule.
2129- A key phrase must be matched exactly without gaps or unknown words.
2129+ A required phrase must be matched exactly without gaps or unknown words.
21302130
21312131 A rule with "is_continuous" set to True is the same as if its whole text
21322132 was defined as a keyphrase and is processed here too.
@@ -2143,14 +2143,14 @@ def filter_matches_missing_key_phrases(
21432143 discarded_append = discarded .append
21442144
21452145 if trace :
2146- logger_debug ('filter_matches_missing_key_phrases ' )
2146+ logger_debug ('filter_matches_missing_required_phrases ' )
21472147
21482148 for match in matches :
21492149 if trace :
21502150 logger_debug (' CHECKING KEY PHRASES for:' , match )
21512151
21522152 is_continuous = match .rule .is_continuous
2153- ikey_spans = match .rule .key_phrase_spans
2153+ ikey_spans = match .rule .required_phrase_spans
21542154
21552155 if not (ikey_spans or is_continuous ):
21562156 kept_append (match )
@@ -2180,11 +2180,11 @@ def filter_matches_missing_key_phrases(
21802180 # use whole ispan in this case
21812181 ikey_spans = [match .ispan ]
21822182
2183- # keep matches as candidate if they contain all key phrase positions in the ispan
2183+ # keep matches as candidate if they contain all required phrase positions in the ispan
21842184 if trace :
21852185 print (' CANDIDATE TO KEEP: all ikey_span in match.ispan:' , ikey_spans , ispan )
21862186
2187- # discard matches that contain key phrases, but interrupted by
2187+ # discard matches that contain required phrases, but interrupted by
21882188 # unknown or stop words.
21892189
21902190 unknown_by_pos = match .query .unknowns_by_pos
@@ -2195,7 +2195,7 @@ def filter_matches_missing_key_phrases(
21952195 istopwords_by_pos = match .rule .stopwords_by_pos
21962196 istopwords_by_pos_get = istopwords_by_pos .get
21972197
2198- # iterate on each key phrase span to ensure that they are continuous
2198+ # iterate on each required phrase span to ensure that they are continuous
21992199 # and contain no unknown words on the query side
22002200
22012201 is_valid = True
@@ -2204,7 +2204,7 @@ def filter_matches_missing_key_phrases(
22042204
22052205 for ikey_span in ikey_spans :
22062206
2207- # check that are no gaps in the key phrase span on the query side
2207+ # check that are no gaps in the required phrase span on the query side
22082208 # BUT, do not redo the check for is_continuous already checked above
22092209 if is_continuous :
22102210 qkey_span = qspan
@@ -2225,13 +2225,13 @@ def filter_matches_missing_key_phrases(
22252225 is_valid = False
22262226 break
22272227
2228- # check that key phrase spans does not contain stop words and does
2228+ # check that required phrase spans does not contain stop words and does
22292229 # not contain unknown words
22302230
2231- # NOTE: we do not check the last qkey_span position of a key phrase
2231+ # NOTE: we do not check the last qkey_span position of a required phrase
22322232 # since unknown is a number of words after a given span position:
22332233 # these are pinned to the last position and we would not care for
2234- # what unknown or stop words show up after a key phrase ends.
2234+ # what unknown or stop words show up after a required phrase ends.
22352235
22362236 qkey_span_end = qkey_span .end
22372237 contains_unknown = any (
@@ -2694,7 +2694,7 @@ def _log(_matches, _discarded, msg):
26942694 # FIXME: we should have only a single loop on all the matches at once!!
26952695 # and not 10's of loops!!!
26962696
2697- matches , discarded = filter_matches_missing_key_phrases (matches )
2697+ matches , discarded = filter_matches_missing_required_phrases (matches )
26982698 all_discarded_extend (discarded )
26992699 _log (matches , discarded , 'HAS KEY PHRASES' )
27002700
0 commit comments