Skip to content

Commit 5b933c0

Browse files
committed
improve and add more test for is_extra_words_position_valid
Signed-off-by: Alok Kumar <[email protected]>
1 parent 68ab63d commit 5b933c0

File tree

4 files changed

+31
-13
lines changed

4 files changed

+31
-13
lines changed

src/licensedcode/detection.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -1156,17 +1156,6 @@ def has_extra_words(license_matches):
11561156
)
11571157

11581158

1159-
def has_extra_words_spans(license_matches):
1160-
"""
1161-
Return True if all of the matches rules in ``license_matches`` (a list of LicenseMatch)
1162-
has `extra_phrase` marker and also have matcher `3-seq`.
1163-
"""
1164-
return all(
1165-
match.matcher == '3-seq' and match.rule.extra_phrase_spans
1166-
for match in license_matches
1167-
)
1168-
1169-
11701159
def has_low_rule_relevance(license_matches):
11711160
"""
11721161
Return True if all on the matches in ``license_matches`` List of LicenseMatch

src/licensedcode/match.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1106,6 +1106,9 @@ def is_extra_words_position_valid(match):
11061106
# Count of extra phrase markers
11071107
extra_phrase_count = 0
11081108

1109+
rule_index = 0
1110+
matched_index = 0
1111+
11091112
for span, allowed_extra_word in extra_phrase_spans:
11101113
rule_index = span.start
11111114

@@ -1128,6 +1131,17 @@ def is_extra_words_position_valid(match):
11281131

11291132
extra_phrase_count += 1
11301133

1134+
rule_index+=1
1135+
1136+
# check if any `extra-words` is present and return False because this `extra-words` are not at marked place
1137+
while (matched_index < len(matched_tokens) and
1138+
matched_tokens[matched_index] == rule_tokens[rule_index]):
1139+
matched_index+=1
1140+
rule_index+=1
1141+
1142+
if matched_index != len(matched_tokens):
1143+
return False
1144+
11311145
return True
11321146

11331147

tests/licensedcode/test_detection_validate.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
from licensedcode import cache
1919
from licensedcode import models
2020
from licensedcode.detection import is_correct_detection
21-
from licensedcode.detection import has_extra_words_spans
2221
from licensedcode.models import licenses_data_dir
2322
from licensedcode.models import rules_data_dir
2423
from licensedcode.models import License
@@ -100,7 +99,7 @@ def check_rule_or_license_can_be_detected_exactly(licensish):
10099
assert results == expected
101100

102101
icm = is_correct_detection(matches)
103-
if not icm and not has_extra_words_spans(matches):
102+
if not icm:
104103
expected.append(f'file://{licensish.rule_file()}')
105104
assert results == expected
106105

tests/licensedcode/test_match.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1421,6 +1421,22 @@ def test_exact_match_without_extra_markers(self):
14211421
match = idx.match(query_string=query, _skip_hash_match=True)[0]
14221422
assert is_extra_words_position_valid(match) is False
14231423

1424+
def test_extra_words_one_at_right_place_and_one_at_not_right_place(self):
1425+
rule_text = """
1426+
Redistribution and use [[3]] in source and binary forms are permitted.
1427+
"""
1428+
rule = create_rule_from_text_and_expression(
1429+
license_expression='extra-words',
1430+
text=rule_text
1431+
)
1432+
idx = index.LicenseIndex([rule])
1433+
1434+
query = """
1435+
Redistribution and use of this software in source and binary extra-words forms are permitted.
1436+
"""
1437+
match = idx.match(query_string=query, _skip_hash_match=True)[0]
1438+
assert is_extra_words_position_valid(match) is False
1439+
14241440

14251441
class TestLicenseMatchScore(FileBasedTesting):
14261442
test_data_dir = TEST_DATA_DIR

0 commit comments

Comments
 (0)