3333from licensedcode import MIN_MATCH_HIGH_LENGTH
3434from licensedcode import MIN_MATCH_LENGTH
3535from licensedcode import SMALL_RULE
36+ from licensedcode .languages import LANG_INFO as known_languages
3637from licensedcode .spans import Span
3738from licensedcode .tokenize import index_tokenizer
3839from licensedcode .tokenize import index_tokenizer_with_stopwords
@@ -535,7 +536,7 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
535536 by_short_name_lowered [lic .short_name ].append (lic )
536537
537538 if lic .key != lic .key .lower ():
538- error ('Incorrect license key case. Should be lowercase.' )
539+ error ('Incorrect license key case: must be all lowercase.' )
539540
540541 if len (lic .key ) > 50 :
541542 error ('key must be 50 characters or less.' )
@@ -560,13 +561,19 @@ def validate(licenses, verbose=False, no_dupe_urls=False):
560561 if not lic .owner :
561562 error ('No owner: Use "Unspecified" if not known.' )
562563
564+ lang = lic .language
565+ if lang and lang != 'en' and lang not in known_languages :
566+ error (f'Unknown language: { lang } ' )
567+
563568 if lic .is_unknown :
564569 if not 'unknown' in lic .key :
565- error ('is_unknown can be true only for licenses with '
566- '"unknown " in their key string.' )
570+ error (
571+ 'is_unknown can be true only for licenses with '
572+ '"unknown " in their key string.'
573+ )
567574
568575 if lic .is_generic and lic .is_unknown :
569- error ('is_generic and is_unknown are incompatible' )
576+ error ('is_generic and is_unknown flags are incompatible' )
570577
571578 # URLS dedupe and consistency
572579 if no_dupe_urls :
@@ -1474,10 +1481,14 @@ def validate(self, licensing=None):
14741481 if any (ignorables ):
14751482 yield 'is_false_positive rule cannot have ignorable_* attributes.'
14761483
1477- if not (0 <= self .minimum_coverage <= 100 ):
1478- yield 'Invalid rule minimum_coverage. Should be between 0 and 100.'
1484+ lang = self .language
1485+ if lang and lang != 'en' and lang not in known_languages :
1486+ yield f'Unknown language: { lang } '
14791487
14801488 if not is_false_positive :
1489+ if not (0 <= self .minimum_coverage <= 100 ):
1490+ yield 'Invalid rule minimum_coverage. Should be between 0 and 100.'
1491+
14811492 if not (0 <= self .relevance <= 100 ):
14821493 yield 'Invalid rule relevance. Should be between 0 and 100.'
14831494
@@ -1877,6 +1888,8 @@ def load(self):
18771888 self .ignorable_urls = data .get ('ignorable_urls' , [])
18781889 self .ignorable_emails = data .get ('ignorable_emails' , [])
18791890
1891+ self .language = data .get ('language' ) or 'en'
1892+
18801893 return self
18811894
18821895 def set_relevance (self ):
0 commit comments