Skip to content

Commit 9f637e4

Browse files
committed
Repair error messages in validation
Signed-off-by: Philippe Ombredanne <[email protected]>
1 parent 6035d25 commit 9f637e4

File tree

3 files changed

+82
-61
lines changed

3 files changed

+82
-61
lines changed

src/licensedcode/models.py

Lines changed: 67 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -147,8 +147,8 @@ def __attrs_post_init__(self, *args, **kwargs):
147147
self.load()
148148

149149
def set_file_paths(self):
150-
self.data_file = join(self.src_dir, self.key + '.yml')
151-
self.text_file = join(self.src_dir, self.key + '.LICENSE')
150+
self.data_file = join(self.src_dir, f'{self.key}.yml')
151+
self.text_file = join(self.src_dir, f'{self.key}.LICENSE')
152152

153153
def relocate(self, target_dir, new_key=None):
154154
"""
@@ -263,9 +263,9 @@ def load(self):
263263

264264
if k == 'key':
265265
assert self.key == v, (
266-
'The license "key" attribute in the .yml file MUST '
267-
'be the same as the base name of this license .LICENSE '
268-
'and .yml data files license files. '
266+
'The license "key" attribute in the .yml file MUST ' +
267+
'be the same as the base name of this license .LICENSE ' +
268+
'and .yml data files license files. ' +
269269
f'Yet file name = {self.key} and license key = {v}'
270270
)
271271

@@ -336,7 +336,7 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
336336
if lic.category and lic.category not in CATEGORIES:
337337
cats = '\n'.join(sorted(CATEGORIES))
338338
error(
339-
f'Unknown license category: {lic.category}.\n'
339+
f'Unknown license category: {lic.category}.\n' +
340340
f'Use one of these valid categories:\n{cats}'
341341
)
342342
if not lic.owner:
@@ -400,15 +400,15 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
400400
if multiple_spdx_keys_used:
401401
for k, lkeys in multiple_spdx_keys_used.items():
402402
errors['GLOBAL'].append(
403-
f'SPDX key: {k} used in multiple licenses: '
403+
f'SPDX key: {k} used in multiple licenses: ' +
404404
', '.join(sorted(lkeys)))
405405

406406
# global text dedupe
407407
multiple_texts = {k: v for k, v in by_text.items() if len(v) > 1}
408408
if multiple_texts:
409409
for k, msgs in multiple_texts.items():
410410
errors['GLOBAL'].append(
411-
'Duplicate texts in multiple licenses:'
411+
'Duplicate texts in multiple licenses: ' +
412412
', '.join(sorted(msgs))
413413
)
414414

@@ -417,7 +417,7 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
417417
if len(licenses) == 1:
418418
continue
419419
errors['GLOBAL'].append(
420-
f'Duplicate short name: {short_name} in licenses:'
420+
f'Duplicate short name: {short_name} in licenses: ' +
421421
', '.join(l.key for l in licenses)
422422
)
423423

@@ -426,7 +426,7 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
426426
if len(licenses) == 1:
427427
continue
428428
errors['GLOBAL'].append(
429-
f'Duplicate name: {name} in licenses:'
429+
f'Duplicate name: {name} in licenses: ' +
430430
', '.join(l.key for l in licenses)
431431
)
432432

@@ -484,7 +484,7 @@ def load_licenses(licenses_data_dir=licenses_data_dir , with_deprecated=False):
484484
dangling = all_files.difference(used_files)
485485
if dangling:
486486
msg = (
487-
f'Some License files are orphaned in "{licenses_data_dir}".\n'
487+
f'Some License files are orphaned in {licenses_data_dir!r}.\n' +
488488
'\n'.join(f'file://{f}' for f in sorted(dangling))
489489
)
490490
raise Exception(msg)
@@ -548,7 +548,7 @@ def validate_rules(rules, licenses_by_key, with_text=False):
548548
message.append('')
549549
message.append(msg)
550550
for rule in rules:
551-
message.append(' ' + repr(rule))
551+
message.append(f' {rule!r}')
552552
if rule.text_file:
553553
message.append(f' file://{rule.text_file}')
554554
if rule.data_file:
@@ -640,7 +640,7 @@ def load_rules(rules_data_dir=rules_data_dir):
640640
base_name = file_base_name(data_file)
641641
if ' ' in base_name:
642642
space_problems.append(data_file)
643-
rule_file = join(rules_data_dir, base_name + '.RULE')
643+
rule_file = join(rules_data_dir, f'{base_name}.RULE')
644644
try:
645645
rule = Rule(data_file=data_file, text_file=rule_file)
646646
yield rule
@@ -847,14 +847,11 @@ def setup(self):
847847
try:
848848
expression = self.licensing.parse(self.license_expression)
849849
except:
850+
exp = self.license_expression
851+
trace = traceback.format_exc()
850852
raise InvalidRule(
851-
'Unable to parse rule License expression: {license_expression!r} '
852-
'for: file://{data_file}'
853-
'\n{trace}'.format(
854-
license_expression=self.license_expression,
855-
data_file=self.data_file,
856-
trace=traceback.format_exc(),
857-
)
853+
f'Unable to parse rule License expression: {exp!r} '
854+
f'for: file://{self.data_file}\n{trace}'
858855
)
859856

860857
if expression is None:
@@ -1114,7 +1111,8 @@ def load_data(self):
11141111
if not self.text_file:
11151112
# for SPDX or tests only
11161113
if not self.stored_text :
1117-
raise InvalidRule(f'Invalid rule without its corresponding text file: {self}')
1114+
raise InvalidRule(
1115+
f'Invalid rule without its corresponding text file: {self}')
11181116
self.identifier = '_tst_' + str(len(self.stored_text))
11191117
else:
11201118
self.identifier = file_name(self.text_file)
@@ -1139,7 +1137,10 @@ def tokens(self):
11391137
# We tag this rule as being a bare URL if it starts with a scheme and is
11401138
# on one line: this is used to determine a matching approach
11411139

1142-
if text.startswith(('http://', 'https://', 'ftp://')) and '\n' not in text[:1000]:
1140+
if (
1141+
text.startswith(('http://', 'https://', 'ftp://'))
1142+
and '\n' not in text[:1000]
1143+
):
11431144
self.minimum_coverage = 100
11441145

11451146
for token in index_tokenizer(self.text()):
@@ -1152,23 +1153,27 @@ def tokens(self):
11521153
def compute_thresholds(self, small_rule=SMALL_RULE):
11531154
"""
11541155
Compute and set thresholds either considering the occurrence of all
1155-
tokens or the occurance of unique tokens.
1156+
tokens or the occurence of unique tokens.
11561157
"""
1157-
minimum_coverage, self.min_matched_length, self.min_high_matched_length = (
1158+
min_cov, self.min_matched_length, self.min_high_matched_length = (
11581159
compute_thresholds_occurences(
11591160
self.minimum_coverage,
11601161
self.length,
1161-
self.high_length))
1162+
self.high_length,
1163+
)
1164+
)
11621165
if not self.has_stored_minimum_coverage:
1163-
self.minimum_coverage = minimum_coverage
1166+
self.minimum_coverage = min_cov
11641167

11651168
self._minimum_containment = self.minimum_coverage / 100
11661169

11671170
self.min_matched_length_unique, self.min_high_matched_length_unique = (
1168-
compute_thresholds_unique(
1169-
self.minimum_coverage,
1170-
self.length,
1171-
self.length_unique, self.high_length_unique))
1171+
compute_thresholds_unique(
1172+
self.minimum_coverage,
1173+
self.length,
1174+
self.length_unique, self.high_length_unique,
1175+
)
1176+
)
11721177

11731178
self.is_small = self.length < small_rule
11741179

@@ -1184,7 +1189,8 @@ def dump(self):
11841189
return
11851190

11861191
def write(location, byte_string):
1187-
# we write as binary because rules and licenses texts and data are UTF-8-encoded bytes
1192+
# we write as binary because rules and licenses texts and data are
1193+
# UTF-8-encoded bytes
11881194
with io.open(location, 'wb') as of:
11891195
of.write(byte_string)
11901196

@@ -1204,7 +1210,7 @@ def load(self):
12041210
data = saneyaml.load(f.read())
12051211
except Exception as e:
12061212
print('#############################')
1207-
print('INVALID LICENSE RULE FILE:', 'file://' + self.data_file)
1213+
print('INVALID LICENSE RULE FILE:', f'file://{self.data_file}')
12081214
print('#############################')
12091215
print(e)
12101216
print('#############################')
@@ -1283,7 +1289,8 @@ def compute_relevance(self, _threshold=18.0):
12831289
12841290
- false positive rule has 100 relevance.
12851291
- rule length equal or larger than threshold has 100 relevance
1286-
- rule length smaller than threshold has 100/threshold relevance rounded down.
1292+
- rule length smaller than threshold has 100/threshold relevance rounded
1293+
down.
12871294
12881295
The current threshold is 18 words.
12891296
"""
@@ -1327,11 +1334,11 @@ def rename_and_relocate(self, name_prefix):
13271334
rules_directory=self.rule_dir()
13281335
)
13291336

1330-
new_data_file = new_base_loc + '.yml'
1337+
new_data_file = f'{new_base_loc}.yml'
13311338
shutil.move(self.data_file, new_data_file)
13321339
self.data_file = new_data_file
13331340

1334-
new_text_file = new_base_loc + '.RULE'
1341+
new_text_file = f'{new_base_loc}.RULE'
13351342
shutil.move(self.text_file, new_text_file)
13361343
self.text_file = new_text_file
13371344

@@ -1435,20 +1442,22 @@ class SpdxRule(Rule):
14351442
"""
14361443

14371444
def __attrs_post_init__(self, *args, **kwargs):
1438-
self.identifier = 'spdx-license-identifier: ' + self.license_expression
1445+
self.identifier = f'spdx-license-identifier: {self.license_expression}'
14391446
expression = None
14401447
try:
14411448
expression = self.licensing.parse(self.license_expression)
14421449
except:
14431450
raise InvalidRule(
1444-
'Unable to parse License rule expression: ' +
1445-
repr(self.license_expression) + ' for: SPDX rule:' +
1446-
self.stored_text +
1447-
'\n' + traceback.format_exc())
1451+
'Unable to parse License rule expression: '
1452+
f'{self.license_expression!r} for: SPDX rule: '
1453+
f'{self.stored_text}\n' + traceback.format_exc()
1454+
)
1455+
14481456
if expression is None:
14491457
raise InvalidRule(
14501458
'Unable to parse License rule expression: '
1451-
+repr(self.license_expression) + ' for:' + repr(self.data_file))
1459+
f'{self.license_expression!r} for: {self.data_file!r}'
1460+
)
14521461

14531462
self.license_expression = expression.render()
14541463
self.license_expression_object = expression
@@ -1473,13 +1482,19 @@ def _print_rule_stats():
14731482
rules = idx.rules_by_rid
14741483
sizes = Counter(r.length for r in rules)
14751484
print('Top 15 lengths: ', sizes.most_common(15))
1476-
print('15 smallest lengths: ', sorted(sizes.items(),
1477-
key=itemgetter(0))[:15])
1485+
print(
1486+
'15 smallest lengths: ',
1487+
sorted(sizes.items(),
1488+
key=itemgetter(0))[:15],
1489+
)
14781490

14791491
high_sizes = Counter(r.high_length for r in rules)
14801492
print('Top 15 high lengths: ', high_sizes.most_common(15))
1481-
print('15 smallest high lengths: ', sorted(high_sizes.items(),
1482-
key=itemgetter(0))[:15])
1493+
print(
1494+
'15 smallest high lengths: ',
1495+
sorted(high_sizes.items(),
1496+
key=itemgetter(0))[:15],
1497+
)
14831498

14841499

14851500
def update_ignorables(licensish, verbose=False):
@@ -1491,7 +1506,7 @@ def update_ignorables(licensish, verbose=False):
14911506
"""
14921507

14931508
if verbose:
1494-
print(f'Processing:', 'file://{licensish.text_file}')
1509+
print(f'Processing: file://{licensish.text_file}')
14951510

14961511
if not exists(licensish.text_file):
14971512
return licensish
@@ -1575,20 +1590,22 @@ def find_rule_base_location(name_prefix, rules_directory=rules_data_dir):
15751590
without overwriting any existing rule. Use the ``name_prefix`` string as a
15761591
prefix for this name.
15771592
"""
1578-
template = (
1593+
1594+
cleaned = (
15791595
name_prefix
15801596
.lower()
15811597
.strip()
15821598
.replace(' ', '_')
15831599
.replace('(', '')
15841600
.replace(')', '')
15851601
.strip('_-')
1586-
) + '_{}'
1602+
)
1603+
template = cleaned + '_{idx}'
15871604

15881605
idx = 1
15891606
while True:
1590-
base_name = template.format(idx)
1607+
base_name = template.format(idx=idx)
15911608
base_loc = join(rules_directory, base_name)
1592-
if not exists(base_loc + '.RULE'):
1609+
if not exists(f'{base_loc}.RULE'):
15931610
return base_loc
15941611
idx += 1

tests/licensedcode/test_detection_validate.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@ def check_rule_or_license_can_be_self_detected_exactly(rule):
118118
assert '\n'.join(failure_trace) == '\n'.join(expected)
119119

120120

121-
def check_ignorable_clues(licensish, regen=False, verbose=True):
121+
def check_ignorable_clues(licensish, regen=False, verbose=False):
122122
"""
123123
Validate that all current ignorable clues declared in a `licensish` License
124124
or Rule object are properly detected in that rule text file. Optionally

tests/licensedcode/test_models.py

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,9 @@ def test_build_rules_from_licenses(self):
103103

104104
def test_validate_license_library(self):
105105
errors, warnings, infos = models.License.validate(
106-
cache.get_licenses_db(), verbose=True)
106+
cache.get_licenses_db(),
107+
verbose=False,
108+
)
107109
assert errors == {}
108110
assert warnings == {}
109111
assert infos
@@ -112,13 +114,16 @@ def test_validate_license_library_can_return_errors(self):
112114
test_dir = self.get_test_loc('models/validate')
113115
lics = models.load_licenses(test_dir)
114116
errors, warnings, infos = models.License.validate(
115-
lics, no_dupe_urls=True, verbose=True)
117+
lics,
118+
no_dupe_urls=True,
119+
verbose=False,
120+
)
116121

117122
expected_errors = {
118123
'GLOBAL': [
119-
'Duplicate texts in multiple licenses:apache-2.0: TEXT, bsd-ack-carrot2: TEXT',
120-
'Duplicate short name:GPL 1.0 in licenses:gpl-1.0-plus, gpl-1.0',
121-
'Duplicate name:GNU General Public License 1.0 in licenses:gpl-1.0-plus, gpl-1.0'],
124+
'Duplicate texts in multiple licenses: apache-2.0: TEXT, bsd-ack-carrot2: TEXT',
125+
'Duplicate short name: GPL 1.0 in licenses: gpl-1.0-plus, gpl-1.0',
126+
'Duplicate name: GNU General Public License 1.0 in licenses: gpl-1.0-plus, gpl-1.0'],
122127
'bsd-ack-carrot2': [
123128
'No short name',
124129
'No name',
@@ -162,7 +167,7 @@ def test_load_licenses_fails_if_directory_contains_orphaned_files(self):
162167
list(models.load_licenses(test_dir))
163168
self.fail('Exception not raised')
164169
except Exception as e:
165-
assert 'Some License data or text files are orphaned' in str(e)
170+
assert 'Some License files are orphaned in' in str(e)
166171

167172

168173
class TestRule(FileBasedTesting):
@@ -200,9 +205,9 @@ def test_rules_types_has_only_boolean_values(self):
200205
rule_consitency_errors = []
201206

202207
for r in rules:
203-
list_rule_types = [r.is_license_text, r.is_license_notice,
208+
list_rule_types = [r.is_license_text, r.is_license_notice,
204209
r.is_license_tag, r.is_license_reference]
205-
210+
206211
if any(type(rule_type) != bool for rule_type in list_rule_types):
207212
rule_consitency_errors.append((r.data_file, r.text_file))
208213

@@ -213,7 +218,7 @@ def test_rules_have_only_one_rule_type(self):
213218
rule_consitency_errors = []
214219

215220
for r in rules:
216-
list_rule_types = [r.is_license_text, r.is_license_notice,
221+
list_rule_types = [r.is_license_text, r.is_license_notice,
217222
r.is_license_tag, r.is_license_reference]
218223

219224
if sum(list_rule_types) > 1:
@@ -317,7 +322,6 @@ def test_compute_thresholds_occurences(self):
317322
expected = expected_min_matched_length_unique, expected_min_high_matched_length_unique
318323
assert results == expected
319324

320-
321325
def test_Thresholds(self):
322326
r1_text = 'licensed under the GPL, licensed under the GPL'
323327
r1 = models.Rule(text_file='r1', license_expression='apache-1.1', stored_text=r1_text)

0 commit comments

Comments
 (0)