Skip to content

Commit 4b2b723

Browse files
Address feedback and fix tests
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent d556e4c commit 4b2b723

File tree

3 files changed

+46
-14
lines changed

3 files changed

+46
-14
lines changed

src/licensedcode/models.py

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,11 @@
2424
from licensedcode._vendor import attr
2525
from license_expression import ExpressionError
2626
from license_expression import Licensing
27-
from saneyaml import load as yaml_load
28-
from saneyaml import dump as yaml_dump
27+
from saneyaml import load as saneyaml_load
28+
from saneyaml import dump as saneyaml_dump
29+
from yaml import load as yaml_load
30+
from yaml import dump as yaml_dump
31+
from yaml import CSafeLoader
2932

3033
from commoncode.fileutils import file_base_name
3134
from commoncode.fileutils import file_name
@@ -554,7 +557,7 @@ def spdx_keys(self):
554557
yield key
555558

556559
@staticmethod
557-
def validate(licenses, verbose=False, no_dupe_urls=False):
560+
def validate(licenses, verbose=False, no_dupe_urls=False, thorough=False):
558561
"""
559562
Check that the ``licenses`` a mapping of {key: License} are valid.
560563
Return dictionaries of infos, errors and warnings mapping a license key
@@ -659,14 +662,22 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
659662
if not len(all_licenses) == len(set(all_licenses)):
660663
warn('Some duplicated URLs')
661664

662-
# local text consistency
663665
text = lic.text
664-
665-
data = {"text": text}
666-
# We are testing whether we can dump as yaml and load from yaml
667-
# without failing (i.e. whether the text is yaml safe)
668-
yaml_string = yaml_dump(data, indent=4)
669-
loaded_yaml = yaml_load(yaml_string)
666+
if thorough:
667+
# local text consistency
668+
data = {"text": text}
669+
# We are testing whether we can dump as yaml and load from yaml
670+
# without failing (i.e. whether the text is yaml safe)
671+
# Using saneyaml
672+
try:
673+
yaml_string = saneyaml_dump(data, indent=4)
674+
loaded_yaml = saneyaml_load(yaml_string)
675+
except Exception:
676+
errors['GLOBAL'].append(
677+
f'Error invalid YAML text at: {lic.key}, failed during saneyaml.load()'
678+
)
679+
# This fails because of missing line break at text end, added by saneyaml_dump
680+
# assert text == loaded_yaml["text"]
670681

671682
license_itokens = tuple(index_tokenizer(text))
672683
if not license_itokens:
@@ -750,9 +761,9 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
750761
def get_yaml_safe_text(text):
751762

752763
data = {"text": text}
753-
yaml_string = yaml_dump(data, indent=4)
764+
yaml_string = saneyaml_dump(data, indent=4)
754765
try:
755-
loaded_yaml = yaml_load(yaml_string)
766+
loaded_yaml = saneyaml_load(yaml_string)
756767
except Exception:
757768
text = text.replace('\n\n', '\n \n')
758769
return text
@@ -1028,7 +1039,7 @@ def _validate_all_rules(rules, licenses_by_key):
10281039
errors = defaultdict(list)
10291040

10301041
for rule in rules:
1031-
for err_msg in rule.validate(licensing):
1042+
for err_msg in rule.validate(licensing, thorough=True):
10321043
errors[err_msg].append(rule)
10331044
return errors
10341045

@@ -1717,7 +1728,7 @@ def has_unknown(self):
17171728
# license flag instead
17181729
return self.license_expression and 'unknown' in self.license_expression
17191730

1720-
def validate(self, licensing=None):
1731+
def validate(self, licensing=None, thorough=False):
17211732
"""
17221733
Validate this rule using the provided ``licensing`` Licensing and yield
17231734
one error message for each type of error detected.
@@ -1811,6 +1822,17 @@ def validate(self, licensing=None):
18111822
if len(set(self.referenced_filenames)) != len(self.referenced_filenames):
18121823
yield 'referenced_filenames cannot contain duplicates.'
18131824

1825+
if thorough:
1826+
text = self.text
1827+
data = {"text": text}
1828+
# We are testing whether we can dump as yaml and load from yaml
1829+
# without failing (i.e. whether the text is yaml safe)
1830+
try:
1831+
yaml_string = saneyaml_dump(data, indent=4)
1832+
loaded_yaml = saneyaml_load(yaml_string)
1833+
except Exception:
1834+
yield (f'Error invalid YAML text at: {self.identifier}, failed during saneyaml.load()')
1835+
18141836
def license_keys(self, unique=True):
18151837
"""
18161838
Return a list of license keys for this rule.

tests/formattedcode/data/yaml/package-and-licenses-expected.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ headers:
3333
platform_version: '#67-Ubuntu SMP Mon Mar 13 14:22:10 UTC 2023'
3434
python_version: "3.8.10 (default, Mar 13 2023, 10:26:41) \n[GCC 9.4.0]"
3535
spdx_license_list_version: '3.20'
36+
additional_license_directory: /home/ayansinha/nexB/write_access/scancode-extra/tests/licensedcode/data/additional_licenses/additional_dir
37+
additional_license_plugins:
38+
- /home/ayansinha/nexB/write_access/scancode-extra/venv/lib/python3.8/site-packages/licenses_to_install1
39+
- /home/ayansinha/nexB/write_access/scancode-extra/venv/lib/python3.8/site-packages/licenses_to_install2
3640
files_count: 4
3741
summary:
3842
declared_license_expression: apache-2.0 AND (apache-2.0 OR mit)
@@ -85,6 +89,7 @@ packages:
8589
code_view_url:
8690
vcs_url:
8791
copyright:
92+
holder:
8893
declared_license_expression: apache-2.0
8994
declared_license_expression_spdx: Apache-2.0
9095
license_detections:
@@ -1137,6 +1142,7 @@ files:
11371142
code_view_url:
11381143
vcs_url:
11391144
copyright:
1145+
holder:
11401146
declared_license_expression: apache-2.0
11411147
declared_license_expression_spdx: Apache-2.0
11421148
license_detections:

tests/licensedcode/test_models.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,6 +88,7 @@ def test_validate_license_library_data(self):
8888
errors, warnings, infos = models.License.validate(
8989
licenses=models.load_licenses(with_deprecated=False),
9090
verbose=False,
91+
thorough=True,
9192
)
9293
assert errors == {}
9394
assert warnings == {}
@@ -185,6 +186,9 @@ def test_rule_from_license_have_text_file_and_data_file_are_computed_correctly(s
185186
class TestRule(FileBasedTesting):
186187
test_data_dir = TEST_DATA_DIR
187188

189+
def test_validate_license_rules_data(self):
190+
rules = list(models.get_rules(validate=True))
191+
188192
def test_create_rule_ignore_punctuation(self):
189193
test_rule = create_rule_from_text_and_expression(text='A one. A two. A three.')
190194
expected = ['one', 'two', 'three']

0 commit comments

Comments
 (0)