@@ -756,7 +756,7 @@ def matched_text(
756756 side effects as the caching depends on which index instance is being
757757 used and this index can change during testing.
758758 """
759- if TRACE_MATCHED_TEXT :
759+ if TRACE_MATCHED_TEXT and not TRACE_REPR_ALL_MATCHED_TEXTS :
760760 logger_debug (f'LicenseMatch.matched_text: self.query: { self .query } ' )
761761
762762 query = self .query
@@ -2205,7 +2205,7 @@ def filter_matches_missing_required_phrases(
22052205
22062206 # keep matches as candidate if they contain all required phrase positions in the ispan
22072207 if trace :
2208- print (' CANDIDATE TO KEEP: all ikey_span in match.ispan:' , ikey_spans , ispan )
2208+ print (' CANDIDATE TO KEEP: all ikey_span in match.ispan: ikey_spans: ' , ikey_spans , 'ispan:' , ispan )
22092209
22102210 # discard matches that contain required phrases, but interrupted by
22112211 # unknown or stop words.
@@ -2219,7 +2219,7 @@ def filter_matches_missing_required_phrases(
22192219 istopwords_by_pos_get = istopwords_by_pos .get
22202220
22212221 # iterate on each required phrase span to ensure that they are continuous
2222- # and contain no unknown words on the query side
2222+ # and contain no unknown words or stop words on the query side
22232223
22242224 is_valid = True
22252225
@@ -2239,18 +2239,15 @@ def filter_matches_missing_required_phrases(
22392239
22402240 qkey_span = Span (qkey_poss )
22412241 if len (qkey_span ) != qkey_span .magnitude ():
2242-
2243- logger_debug (
2244- ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT NOT CONTINUOUS:' ,
2245- 'qkey_span:' , qkey_span , 'qpan:' , qspan
2246- )
2247-
2242+ if trace :
2243+ logger_debug (
2244+ ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT NOT CONTINUOUS:' ,
2245+ 'qkey_span:' , qkey_span , 'qspan:' , qspan
2246+ )
22482247 is_valid = False
22492248 break
22502249
2251- # check that required phrase spans does not contain stop words and does
2252- # not contain unknown words
2253-
2250+ # Check that required phrase spans does not contain unknown words.
22542251 # NOTE: we do not check the last qkey_span position of a required phrase
22552252 # since unknown is a number of words after a given span position:
22562253 # these are pinned to the last position and we would not care for
@@ -2265,34 +2262,36 @@ def filter_matches_missing_required_phrases(
22652262 if contains_unknown :
22662263 logger_debug (
22672264 ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT UNKNOWNS:' ,
2268- 'qkey_span:' , qkey_span , 'qpan :' , qspan ,
2265+ 'qkey_span:' , qkey_span , 'qspan :' , qspan ,
22692266 'unknown_by_pos:' , unknown_by_pos
22702267 )
22712268
22722269 is_valid = False
22732270 break
22742271
2275- if is_continuous :
2276- has_same_stopwords_pos = True
2277- for qpos , ipos in zip (qspan , ispan ):
2278- if qpos not in qkey_span or qpos == qkey_span_end :
2279- continue
2280-
2281- if istopwords_by_pos_get (ipos ) != qstopwords_by_pos_get (qpos ):
2282- has_same_stopwords_pos = False
2283- break
2284-
2285- if not has_same_stopwords_pos :
2286- logger_debug (
2287- ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT STOPWORDS NOT SAME:' ,
2288- 'qkey_span:' , qkey_span , 'qpan:' , qspan ,
2289- 'istopwords_by_pos:' , istopwords_by_pos ,
2290- 'qstopwords_by_pos:' , qstopwords_by_pos
2291- )
2292-
2293- is_valid = False
2272+ # Check that required phrase spans does not contain stop words. This must be true for
2273+ # continuous rules or not, as long as we have a key span: it cannot be interrupted
2274+
2275+ has_same_stopwords_pos = True
2276+ for qpos , ipos in zip (qspan , ispan ):
2277+ if qpos not in qkey_span or qpos == qkey_span_end :
2278+ continue
2279+
2280+ if istopwords_by_pos_get (ipos ) != qstopwords_by_pos_get (qpos ):
2281+ has_same_stopwords_pos = False
22942282 break
22952283
2284+ if not has_same_stopwords_pos :
2285+ logger_debug (
2286+ ' ==> DISCARDING, REQUIRED PHRASES PRESENT, BUT STOPWORDS NOT SAME:' ,
2287+ 'qkey_span:' , qkey_span , 'qspan:' , qspan ,
2288+ 'istopwords_by_pos:' , istopwords_by_pos ,
2289+ 'qstopwords_by_pos:' , qstopwords_by_pos
2290+ )
2291+
2292+ is_valid = False
2293+ break
2294+
22962295 if is_valid :
22972296 logger_debug (' ==> KEEPING, REQUIRED PHRASES PRESENT, CONTINUOUS AND NO UNKNOWNS' )
22982297 kept_append (match )
0 commit comments