File tree Expand file tree Collapse file tree 4 files changed +31
-13
lines changed Expand file tree Collapse file tree 4 files changed +31
-13
lines changed Original file line number Diff line number Diff line change @@ -1156,17 +1156,6 @@ def has_extra_words(license_matches):
11561156 )
11571157
11581158
1159- def has_extra_words_spans (license_matches ):
1160- """
1161- Return True if all of the matches rules in ``license_matches`` (a list of LicenseMatch)
1162- has `extra_phrase` marker and also have matcher `3-seq`.
1163- """
1164- return all (
1165- match .matcher == '3-seq' and match .rule .extra_phrase_spans
1166- for match in license_matches
1167- )
1168-
1169-
11701159def has_low_rule_relevance (license_matches ):
11711160 """
11721161 Return True if all on the matches in ``license_matches`` List of LicenseMatch
Original file line number Diff line number Diff line change @@ -1106,6 +1106,9 @@ def is_extra_words_position_valid(match):
11061106 # Count of extra phrase markers
11071107 extra_phrase_count = 0
11081108
1109+ rule_index = 0
1110+ matched_index = 0
1111+
11091112 for span , allowed_extra_word in extra_phrase_spans :
11101113 rule_index = span .start
11111114
@@ -1128,6 +1131,17 @@ def is_extra_words_position_valid(match):
11281131
11291132 extra_phrase_count += 1
11301133
1134+ rule_index += 1
1135+
1136+ # check if any `extra-words` is present and return False because this `extra-words` are not at marked place
1137+ while (matched_index < len (matched_tokens ) and
1138+ matched_tokens [matched_index ] == rule_tokens [rule_index ]):
1139+ matched_index += 1
1140+ rule_index += 1
1141+
1142+ if matched_index != len (matched_tokens ):
1143+ return False
1144+
11311145 return True
11321146
11331147
Original file line number Diff line number Diff line change 1818from licensedcode import cache
1919from licensedcode import models
2020from licensedcode .detection import is_correct_detection
21- from licensedcode .detection import has_extra_words_spans
2221from licensedcode .models import licenses_data_dir
2322from licensedcode .models import rules_data_dir
2423from licensedcode .models import License
@@ -100,7 +99,7 @@ def check_rule_or_license_can_be_detected_exactly(licensish):
10099 assert results == expected
101100
102101 icm = is_correct_detection (matches )
103- if not icm and not has_extra_words_spans ( matches ) :
102+ if not icm :
104103 expected .append (f'file://{ licensish .rule_file ()} ' )
105104 assert results == expected
106105
Original file line number Diff line number Diff line change @@ -1421,6 +1421,22 @@ def test_exact_match_without_extra_markers(self):
14211421 match = idx .match (query_string = query , _skip_hash_match = True )[0 ]
14221422 assert is_extra_words_position_valid (match ) is False
14231423
1424+ def test_extra_words_one_at_right_place_and_one_at_not_right_place (self ):
1425+ rule_text = """
1426+ Redistribution and use [[3]] in source and binary forms are permitted.
1427+ """
1428+ rule = create_rule_from_text_and_expression (
1429+ license_expression = 'extra-words' ,
1430+ text = rule_text
1431+ )
1432+ idx = index .LicenseIndex ([rule ])
1433+
1434+ query = """
1435+ Redistribution and use of this software in source and binary extra-words forms are permitted.
1436+ """
1437+ match = idx .match (query_string = query , _skip_hash_match = True )[0 ]
1438+ assert is_extra_words_position_valid (match ) is False
1439+
14241440
14251441class TestLicenseMatchScore (FileBasedTesting ):
14261442 test_data_dir = TEST_DATA_DIR
You can’t perform that action at this time.
0 commit comments