Skip to content

Commit aa9374c

Browse files
Add script for adding required phrases automatically
Add a script which can add required phrases in already existing rules automatically from required phrases already present in other rules and license field names. This can be done one license expression at a time. Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 745ba9c commit aa9374c

34 files changed

+1605
-329
lines changed

docs/source/how-to-guides/add_new_license_detection_rule.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ More (advanced) rules options:
7373
be present in the result license detections. These just have the license text and a
7474
`is_false_positive` flag set to True.
7575

76-
- you can specify key phrases by surrounding one or more words between the `{{`
76+
- you can specify required phrases by surrounding one or more words between the `{{`
7777
and `}}` tags. Key phrases are words that **must** be matched/present in order
7878
for a RULE to be considered a match.
7979

etc/scripts/licenses/buildrules.py

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
from licensedcode import models
1717
from licensedcode import match_hash
1818
from licensedcode import frontmatter
19+
from licensedcode.models import get_rule_id_for_text
1920
from license_expression import Licensing
2021

2122
"""
@@ -129,23 +130,6 @@ def load_data(location="00-new-licenses.txt"):
129130
return rules
130131

131132

132-
def rule_exists(text):
133-
"""
134-
Return the matched rule identifier if the text is an existing rule matched
135-
exactly, False otherwise.
136-
"""
137-
idx = cache.get_index()
138-
139-
matches = idx.match(query_string=text)
140-
if not matches:
141-
return False
142-
if len(matches) > 1:
143-
return False
144-
match = matches[0]
145-
if match.matcher == match_hash.MATCH_HASH and match.score() == 100:
146-
return match.rule.identifier
147-
148-
149133
def all_rule_by_tokens():
150134
"""
151135
Return a mapping of {tuples of tokens: rule id}, with one item for each
@@ -346,7 +330,7 @@ def cli(licenses_file, dump_to_file_on_errors=False):
346330

347331
text = rule.text
348332

349-
existing_rule = rule_exists(text)
333+
existing_rule = get_rule_id_for_text(text)
350334
skinny_text = " ".join(text[:80].split()).replace("{", " ").replace("}", " ")
351335

352336
existing_msg = (

etc/scripts/licenses/report_license_rules.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@
6262
"is_license_reference",
6363
"is_license_intro",
6464
"is_license_clue",
65+
"is_required_phrase",
66+
"skip_collecting_required_phrases",
6567
"is_deprecated",
6668
"has_unknown",
6769
"only_known_words",

setup-mini.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ console_scripts =
158158
scancode-reindex-licenses = licensedcode.reindex:reindex_licenses
159159
scancode-license-data = licensedcode.license_db:dump_scancode_license_data
160160
regen-package-docs = packagedcode.regen_package_docs:regen_package_docs
161+
add-required-phrases = licensedcode.required_phrases:add_required_phrases
161162

162163
# These are configurations for ScanCode plugins as setuptools entry points.
163164
# Each plugin entry hast this form:

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ console_scripts =
158158
scancode-reindex-licenses = licensedcode.reindex:reindex_licenses
159159
scancode-license-data = licensedcode.license_db:dump_scancode_license_data
160160
regen-package-docs = packagedcode.regen_package_docs:regen_package_docs
161+
add-required-phrases = licensedcode.required_phrases:add_required_phrases
161162

162163
# These are configurations for ScanCode plugins as setuptools entry points.
163164
# Each plugin entry hast this form:
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
license_expression: cclrc
3+
is_license_notice: yes
4+
referenced_filenames:
5+
- External_License/CCLRC_CDAT_License.txt
6+
---
7+
8+
* This software may be distributed under the terms of the
9+
* {{CCLRC Licence}} for CCLRC Software
10+
* <CDATDIR>/External_License/CCLRC_CDAT_License.txt
Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
---
2+
license_expression: cclrc
3+
is_license_notice: yes
4+
---
5+
6+
* This software may be distributed under the terms of the
7+
* {{CCLRC Licence}} for CCLRC Software

src/licensedcode/data/rules/mit_1155.RULE

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
license_expression: mit
33
is_license_notice: yes
44
relevance: 100
5+
skip_collecting_required_phrases: yes
56
---
67

78
For license terms see {{SLF4J}}

src/licensedcode/data/rules/mit_1302.RULE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,4 +5,4 @@ referenced_filenames:
55
- LICENSE
66
---
77

8-
is free software: you can redistribute it and/or modify it under the terms of {{the MIT License}} as published by the Open Source Initiative. See the {{ LICENSE file }} for more details.
8+
is free software: you can redistribute it and/or modify it under the terms of {{the MIT License}} as published by the Open Source Initiative. See the LICENSE file for more details.

src/licensedcode/data/rules/mit_397.RULE

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,8 @@ relevance: 100
55
referenced_filenames:
66
- COPYING
77
ignorable_urls:
8-
- http://www.opensource.org/licenses/mit-license.php
8+
- https://www.opensource.org/licenses/mit-license.php
99
---
1010

1111
// Distributed under the MIT software license, see the accompanying
12-
// file COPYING or shttp://www.opensource.org/licenses/mit-license.php.
12+
// file COPYING or https://www.opensource.org/licenses/mit-license.php.

0 commit comments

Comments
 (0)