@@ -147,8 +147,8 @@ def __attrs_post_init__(self, *args, **kwargs):
147147 self .load ()
148148
149149 def set_file_paths (self ):
150- self .data_file = join (self .src_dir , self .key + ' .yml' )
151- self .text_file = join (self .src_dir , self .key + ' .LICENSE' )
150+ self .data_file = join (self .src_dir , f' { self .key } .yml' )
151+ self .text_file = join (self .src_dir , f' { self .key } .LICENSE' )
152152
153153 def relocate (self , target_dir , new_key = None ):
154154 """
@@ -263,9 +263,9 @@ def load(self):
263263
264264 if k == 'key' :
265265 assert self .key == v , (
266- 'The license "key" attribute in the .yml file MUST '
267- 'be the same as the base name of this license .LICENSE '
268- 'and .yml data files license files. '
266+ 'The license "key" attribute in the .yml file MUST ' +
267+ 'be the same as the base name of this license .LICENSE ' +
268+ 'and .yml data files license files. ' +
269269 f'Yet file name = { self .key } and license key = { v } '
270270 )
271271
@@ -336,7 +336,7 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
336336 if lic .category and lic .category not in CATEGORIES :
337337 cats = '\n ' .join (sorted (CATEGORIES ))
338338 error (
339- f'Unknown license category: { lic .category } .\n '
339+ f'Unknown license category: { lic .category } .\n ' +
340340 f'Use one of these valid categories:\n { cats } '
341341 )
342342 if not lic .owner :
@@ -400,15 +400,15 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
400400 if multiple_spdx_keys_used :
401401 for k , lkeys in multiple_spdx_keys_used .items ():
402402 errors ['GLOBAL' ].append (
403- f'SPDX key: { k } used in multiple licenses: '
403+ f'SPDX key: { k } used in multiple licenses: ' +
404404 ', ' .join (sorted (lkeys )))
405405
406406 # global text dedupe
407407 multiple_texts = {k : v for k , v in by_text .items () if len (v ) > 1 }
408408 if multiple_texts :
409409 for k , msgs in multiple_texts .items ():
410410 errors ['GLOBAL' ].append (
411- 'Duplicate texts in multiple licenses:'
411+ 'Duplicate texts in multiple licenses: ' +
412412 ', ' .join (sorted (msgs ))
413413 )
414414
@@ -417,7 +417,7 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
417417 if len (licenses ) == 1 :
418418 continue
419419 errors ['GLOBAL' ].append (
420- f'Duplicate short name: { short_name } in licenses:'
420+ f'Duplicate short name: { short_name } in licenses: ' +
421421 ', ' .join (l .key for l in licenses )
422422 )
423423
@@ -426,7 +426,7 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
426426 if len (licenses ) == 1 :
427427 continue
428428 errors ['GLOBAL' ].append (
429- f'Duplicate name: { name } in licenses:'
429+ f'Duplicate name: { name } in licenses: ' +
430430 ', ' .join (l .key for l in licenses )
431431 )
432432
@@ -484,7 +484,7 @@ def load_licenses(licenses_data_dir=licenses_data_dir , with_deprecated=False):
484484 dangling = all_files .difference (used_files )
485485 if dangling :
486486 msg = (
487- f'Some License files are orphaned in " { licenses_data_dir } " .\n '
487+ f'Some License files are orphaned in { licenses_data_dir !r } .\n ' +
488488 '\n ' .join (f'file://{ f } ' for f in sorted (dangling ))
489489 )
490490 raise Exception (msg )
@@ -548,7 +548,7 @@ def validate_rules(rules, licenses_by_key, with_text=False):
548548 message .append ('' )
549549 message .append (msg )
550550 for rule in rules :
551- message .append (' ' + repr ( rule ) )
551+ message .append (f ' { rule !r } ' )
552552 if rule .text_file :
553553 message .append (f' file://{ rule .text_file } ' )
554554 if rule .data_file :
@@ -640,7 +640,7 @@ def load_rules(rules_data_dir=rules_data_dir):
640640 base_name = file_base_name (data_file )
641641 if ' ' in base_name :
642642 space_problems .append (data_file )
643- rule_file = join (rules_data_dir , base_name + ' .RULE' )
643+ rule_file = join (rules_data_dir , f' { base_name } .RULE' )
644644 try :
645645 rule = Rule (data_file = data_file , text_file = rule_file )
646646 yield rule
@@ -847,14 +847,11 @@ def setup(self):
847847 try :
848848 expression = self .licensing .parse (self .license_expression )
849849 except :
850+ exp = self .license_expression
851+ trace = traceback .format_exc ()
850852 raise InvalidRule (
851- 'Unable to parse rule License expression: {license_expression!r} '
852- 'for: file://{data_file}'
853- '\n {trace}' .format (
854- license_expression = self .license_expression ,
855- data_file = self .data_file ,
856- trace = traceback .format_exc (),
857- )
853+ f'Unable to parse rule License expression: { exp !r} '
854+ f'for: file://{ self .data_file } \n { trace } '
858855 )
859856
860857 if expression is None :
@@ -1114,7 +1111,8 @@ def load_data(self):
11141111 if not self .text_file :
11151112 # for SPDX or tests only
11161113 if not self .stored_text :
1117- raise InvalidRule (f'Invalid rule without its corresponding text file: { self } ' )
1114+ raise InvalidRule (
1115+ f'Invalid rule without its corresponding text file: { self } ' )
11181116 self .identifier = '_tst_' + str (len (self .stored_text ))
11191117 else :
11201118 self .identifier = file_name (self .text_file )
@@ -1139,7 +1137,10 @@ def tokens(self):
11391137 # We tag this rule as being a bare URL if it starts with a scheme and is
11401138 # on one line: this is used to determine a matching approach
11411139
1142- if text .startswith (('http://' , 'https://' , 'ftp://' )) and '\n ' not in text [:1000 ]:
1140+ if (
1141+ text .startswith (('http://' , 'https://' , 'ftp://' ))
1142+ and '\n ' not in text [:1000 ]
1143+ ):
11431144 self .minimum_coverage = 100
11441145
11451146 for token in index_tokenizer (self .text ()):
@@ -1152,23 +1153,27 @@ def tokens(self):
11521153 def compute_thresholds (self , small_rule = SMALL_RULE ):
11531154 """
11541155 Compute and set thresholds either considering the occurrence of all
1155- tokens or the occurance of unique tokens.
1156+ tokens or the occurence of unique tokens.
11561157 """
1157- minimum_coverage , self .min_matched_length , self .min_high_matched_length = (
1158+ min_cov , self .min_matched_length , self .min_high_matched_length = (
11581159 compute_thresholds_occurences (
11591160 self .minimum_coverage ,
11601161 self .length ,
1161- self .high_length ))
1162+ self .high_length ,
1163+ )
1164+ )
11621165 if not self .has_stored_minimum_coverage :
1163- self .minimum_coverage = minimum_coverage
1166+ self .minimum_coverage = min_cov
11641167
11651168 self ._minimum_containment = self .minimum_coverage / 100
11661169
11671170 self .min_matched_length_unique , self .min_high_matched_length_unique = (
1168- compute_thresholds_unique (
1169- self .minimum_coverage ,
1170- self .length ,
1171- self .length_unique , self .high_length_unique ))
1171+ compute_thresholds_unique (
1172+ self .minimum_coverage ,
1173+ self .length ,
1174+ self .length_unique , self .high_length_unique ,
1175+ )
1176+ )
11721177
11731178 self .is_small = self .length < small_rule
11741179
@@ -1184,7 +1189,8 @@ def dump(self):
11841189 return
11851190
11861191 def write (location , byte_string ):
1187- # we write as binary because rules and licenses texts and data are UTF-8-encoded bytes
1192+ # we write as binary because rules and licenses texts and data are
1193+ # UTF-8-encoded bytes
11881194 with io .open (location , 'wb' ) as of :
11891195 of .write (byte_string )
11901196
@@ -1204,7 +1210,7 @@ def load(self):
12041210 data = saneyaml .load (f .read ())
12051211 except Exception as e :
12061212 print ('#############################' )
1207- print ('INVALID LICENSE RULE FILE:' , 'file://' + self .data_file )
1213+ print ('INVALID LICENSE RULE FILE:' , f 'file://{ self .data_file } ' )
12081214 print ('#############################' )
12091215 print (e )
12101216 print ('#############################' )
@@ -1283,7 +1289,8 @@ def compute_relevance(self, _threshold=18.0):
12831289
12841290 - false positive rule has 100 relevance.
12851291 - rule length equal or larger than threshold has 100 relevance
1286- - rule length smaller than threshold has 100/threshold relevance rounded down.
1292+ - rule length smaller than threshold has 100/threshold relevance rounded
1293+ down.
12871294
12881295 The current threshold is 18 words.
12891296 """
@@ -1327,11 +1334,11 @@ def rename_and_relocate(self, name_prefix):
13271334 rules_directory = self .rule_dir ()
13281335 )
13291336
1330- new_data_file = new_base_loc + ' .yml'
1337+ new_data_file = f' { new_base_loc } .yml'
13311338 shutil .move (self .data_file , new_data_file )
13321339 self .data_file = new_data_file
13331340
1334- new_text_file = new_base_loc + ' .RULE'
1341+ new_text_file = f' { new_base_loc } .RULE'
13351342 shutil .move (self .text_file , new_text_file )
13361343 self .text_file = new_text_file
13371344
@@ -1435,20 +1442,22 @@ class SpdxRule(Rule):
14351442 """
14361443
14371444 def __attrs_post_init__ (self , * args , ** kwargs ):
1438- self .identifier = 'spdx-license-identifier: ' + self .license_expression
1445+ self .identifier = f 'spdx-license-identifier: { self .license_expression } '
14391446 expression = None
14401447 try :
14411448 expression = self .licensing .parse (self .license_expression )
14421449 except :
14431450 raise InvalidRule (
1444- 'Unable to parse License rule expression: ' +
1445- repr (self .license_expression ) + ' for: SPDX rule:' +
1446- self .stored_text +
1447- '\n ' + traceback .format_exc ())
1451+ 'Unable to parse License rule expression: '
1452+ f'{ self .license_expression !r} for: SPDX rule: '
1453+ f'{ self .stored_text } \n ' + traceback .format_exc ()
1454+ )
1455+
14481456 if expression is None :
14491457 raise InvalidRule (
14501458 'Unable to parse License rule expression: '
1451- + repr (self .license_expression ) + ' for:' + repr (self .data_file ))
1459+ f'{ self .license_expression !r} for: { self .data_file !r} '
1460+ )
14521461
14531462 self .license_expression = expression .render ()
14541463 self .license_expression_object = expression
@@ -1473,13 +1482,19 @@ def _print_rule_stats():
14731482 rules = idx .rules_by_rid
14741483 sizes = Counter (r .length for r in rules )
14751484 print ('Top 15 lengths: ' , sizes .most_common (15 ))
1476- print ('15 smallest lengths: ' , sorted (sizes .items (),
1477- key = itemgetter (0 ))[:15 ])
1485+ print (
1486+ '15 smallest lengths: ' ,
1487+ sorted (sizes .items (),
1488+ key = itemgetter (0 ))[:15 ],
1489+ )
14781490
14791491 high_sizes = Counter (r .high_length for r in rules )
14801492 print ('Top 15 high lengths: ' , high_sizes .most_common (15 ))
1481- print ('15 smallest high lengths: ' , sorted (high_sizes .items (),
1482- key = itemgetter (0 ))[:15 ])
1493+ print (
1494+ '15 smallest high lengths: ' ,
1495+ sorted (high_sizes .items (),
1496+ key = itemgetter (0 ))[:15 ],
1497+ )
14831498
14841499
14851500def update_ignorables (licensish , verbose = False ):
@@ -1491,7 +1506,7 @@ def update_ignorables(licensish, verbose=False):
14911506 """
14921507
14931508 if verbose :
1494- print (f'Processing:' , ' file://{licensish.text_file}' )
1509+ print (f'Processing: file://{ licensish .text_file } ' )
14951510
14961511 if not exists (licensish .text_file ):
14971512 return licensish
@@ -1575,20 +1590,22 @@ def find_rule_base_location(name_prefix, rules_directory=rules_data_dir):
15751590 without overwriting any existing rule. Use the ``name_prefix`` string as a
15761591 prefix for this name.
15771592 """
1578- template = (
1593+
1594+ cleaned = (
15791595 name_prefix
15801596 .lower ()
15811597 .strip ()
15821598 .replace (' ' , '_' )
15831599 .replace ('(' , '' )
15841600 .replace (')' , '' )
15851601 .strip ('_-' )
1586- ) + '_{}'
1602+ )
1603+ template = cleaned + '_{idx}'
15871604
15881605 idx = 1
15891606 while True :
1590- base_name = template .format (idx )
1607+ base_name = template .format (idx = idx )
15911608 base_loc = join (rules_directory , base_name )
1592- if not exists (base_loc + ' .RULE' ):
1609+ if not exists (f' { base_loc } .RULE' ):
15931610 return base_loc
15941611 idx += 1
0 commit comments