Skip to content

Commit 8241cee

Browse files
Update and improve collecting and marking required phrases
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent f13b100 commit 8241cee

File tree

9 files changed

+415
-92
lines changed

9 files changed

+415
-92
lines changed

etc/scripts/licenses/buildrules.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
from licensedcode import models
1717
from licensedcode import match_hash
1818
from licensedcode import frontmatter
19-
from licensedcode.models import rule_exists
19+
from licensedcode.models import get_rule_id_for_text
2020
from license_expression import Licensing
2121

2222
"""
@@ -330,7 +330,7 @@ def cli(licenses_file, dump_to_file_on_errors=False):
330330

331331
text = rule.text
332332

333-
existing_rule = rule_exists(text)
333+
existing_rule = get_rule_id_for_text(text)
334334
skinny_text = " ".join(text[:80].split()).replace("{", " ").replace("}", " ")
335335

336336
existing_msg = (

src/licensedcode/data/rules/mit_1155.RULE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
license_expression: mit
33
is_license_notice: yes
44
relevance: 100
5+
skip_collecting_required_phrases: yes
56
---
67

78
For license terms see {{SLF4J}}

src/licensedcode/data/rules/mit_1302.RULE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ referenced_filenames:
55
- LICENSE
66
---
77

8-
is free software: you can redistribute it and/or modify it under the terms of {{the MIT License}} as published by the Open Source Initiative. See the {{ LICENSE file }} for more details.
8+
is free software: you can redistribute it and/or modify it under the terms of {{the MIT License}} as published by the Open Source Initiative. See the LICENSE file for more details.

src/licensedcode/data/rules/mit_9.RULE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ ignorable_urls:
66
- http://en.wikipedia.org/wiki/MIT_License
77
---
88

9-
Licensed under the MIT (http://en.wikipedia.org/wiki/MIT_License) license.
9+
{{Licensed under the MIT}} (http://en.wikipedia.org/wiki/MIT_License) license.

src/licensedcode/models.py

Lines changed: 19 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1844,7 +1844,8 @@ def validate(self, licensing=None, thorough=False):
18441844
is_false_positive = self.is_false_positive
18451845

18461846
has_license_flags = any(self.license_flag_values)
1847-
has_many_license_flags = len([l for l in self.license_flag_values if l]) != 1
1847+
has_no_license_flags = len([l for l in self.license_flag_values if l]) == 0
1848+
has_many_license_flags = len([l for l in self.license_flag_values if l]) > 1
18481849

18491850
license_expression = self.license_expression
18501851

@@ -1887,6 +1888,9 @@ def validate(self, licensing=None, thorough=False):
18871888

18881889
if not (0 <= self.relevance <= 100):
18891890
yield 'Invalid rule relevance. Should be between 0 and 100.'
1891+
1892+
if has_no_license_flags:
1893+
yield 'Invalid rule no is_license_* flags present.'
18901894

18911895
if has_many_license_flags:
18921896
yield 'Invalid rule is_license_* flags. Only one allowed.'
@@ -2243,10 +2247,7 @@ def build_required_phrase_spans(self):
22432247
if self.is_from_license:
22442248
return []
22452249
try:
2246-
return [
2247-
required_phrase.span
2248-
for required_phrase in get_required_phrase_spans(self.text)
2249-
]
2250+
return get_required_phrase_spans(self.text)
22502251
except Exception as e:
22512252
raise InvalidRule(f'Invalid rule: {self}') from e
22522253

@@ -2327,6 +2328,9 @@ def load(self, rule_file, with_checks=True):
23272328
raise e
23282329

23292330
known_attributes = set(attr.fields_dict(self.__class__))
2331+
# This is an attirbute used to debug marking required phrases, and is not needed
2332+
if "sources" in data:
2333+
data.pop("sources")
23302334
data_file_attributes = set(data)
23312335
if with_checks:
23322336
unknown_attributes = data_file_attributes.difference(known_attributes)
@@ -2870,8 +2874,16 @@ def rule_exists(text):
28702874
if len(matches) > 1:
28712875
return False
28722876
match = matches[0]
2873-
if match.matcher == MATCH_HASH and match.score() == 100:
2874-
return match.rule.identifier
2877+
if match.matcher == MATCH_HASH and match.coverage() == 100:
2878+
return match.rule
2879+
2880+
2881+
def get_rule_id_for_text(text):
2882+
rule = rule_exists(text=text)
2883+
if rule:
2884+
return rule.identifier
2885+
else:
2886+
return False
28752887

28762888

28772889
def find_rule_base_location(name_prefix, rules_directory=rules_data_dir):

0 commit comments

Comments
 (0)