5555from license_expression ._pyahocorasick import Trie as AdvancedTokenizer
5656from license_expression ._pyahocorasick import Token
5757
58+
59+ curr_dir = dirname (abspath (__file__ ))
60+ data_dir = join (curr_dir , 'data' )
61+ vendored_scancode_licensedb_index_location = join (data_dir , 'scancode-licensedb-index.json' )
62+
63+
5864# append new error codes to PARSE_ERRORS by monkey patching
5965PARSE_EXPRESSION_NOT_UNICODE = 100
6066if PARSE_EXPRESSION_NOT_UNICODE not in PARSE_ERRORS :
@@ -127,6 +133,8 @@ class ExpressionInfo:
127133 Licensing.validate().
128134
129135 The ExpressionInfo class has the following fields:
136+ - original_license_expression: str.
137+ - This is the license expression that was originally passed into Licensing.validate()
130138 - normalized_license_expression: str.
131139 - If a valid license expression has been passed into `validate()`,
132140 then the license expression string will be set in this field.
@@ -137,20 +145,28 @@ class ExpressionInfo:
137145 - If a valid license expression has been passed into `validate()`,
138146 then the license symbols from the license expression will be
139147 appended here.
148+ - valid_exception_symbols: list
149+ - If a license symbol in the license expression is a license exception,
150+ then that license symbol will be appended here.
140151 - invalid_symbols: list
141152 - If an invalid license expression has been passed into `validate()`,
142153 then the invalid license symbols from the license expression will be
143154 appended here.
144- - exception_symbols: list
145- - If a license symbol in the license expression is a license exception,
146- then that license symbol will be appended here.
147155 """
148- def __init__ (self ):
149- self .normalized_license_expression = ''
150- self .errors = []
151- self .valid_symbols = []
152- self .invalid_symbols = []
153- self .exception_symbols = []
156+ def __init__ (
157+ self ,
158+ original_license_expression ,
159+ normalized_license_expression = None ,
160+ errors = None ,
161+ valid_symbols = None ,
162+ valid_exception_symbols = None ,
163+ invalid_symbols = None ):
164+ self .original_license_expression = original_license_expression
165+ self .normalized_license_expression = normalized_license_expression or ''
166+ self .errors = errors or []
167+ self .valid_symbols = valid_symbols or []
168+ self .valid_exception_symbols = valid_exception_symbols or []
169+ self .invalid_symbols = invalid_symbols or []
154170
155171
156172class Licensing (boolean .BooleanAlgebra ):
@@ -657,7 +673,7 @@ def dedup(self, expression):
657673 def validate (self , expression , strict = True , ** kwargs ):
658674 """
659675 Return a ExpressionInfo object that contains information about
660- `expression` by parsing `expression` using Licensing.parse()
676+ the validation of an `expression` license expression string.
661677
662678 If `expression` is valid, then
663679 `ExpressionInfo.normalized_license_expression` is set, along with a list
@@ -669,104 +685,109 @@ def validate(self, expression, strict=True, **kwargs):
669685 license symbols, the offending symbols will be present in
670686 `ExpressionInfo.invalid_symbols`
671687
672- If `strict` is True, additional exceptions will be raised if in a "WITH"
688+ If `strict` is True, validation error messages will be included if in a "WITH"
673689 expression such as "XXX with ZZZ" if the XXX symbol has `is_exception`
674690 set to True or the YYY symbol has `is_exception` set to False. This
675- checks that symbols are used strictly as constructed .
691+ checks that symbols are used strictly as intended .
676692 """
677- expression_info = ExpressionInfo ()
693+ def set_ExpressionInfo_fields (parsed_expression , expression_info ):
694+ symbols = list (parsed_expression .symbols )
695+ expression_info .normalized_license_expression = str (parsed_expression )
696+ expression_info .valid_symbols = [s .render () for s in symbols ]
697+ expression_info .valid_exception_symbols = [
698+ s .render ()
699+ for s in symbols
700+ if isinstance (s , LicenseWithExceptionSymbol )
701+ or s .is_exception
702+ ]
703+ return expression_info
704+
705+ expression_info = ExpressionInfo (
706+ original_license_expression = str (expression )
707+ )
678708
679709 # Check `expression` type
680710 try :
681- self .parse (expression )
711+ parsed_expression = self .parse (expression )
682712 except ExpressionError as e :
683713 expression_info .errors .append (str (e ))
684714 return expression_info
685715
686- # Check `expression` syntax
687- try :
688- self . parse ( expression , strict = strict )
689- except ExpressionParseError as e :
690- expression_info . errors . append ( str ( e ))
691- expression_info .invalid_symbols .append (e . token_string )
692- return expression_info
716+ if strict :
717+ # Check `expression` syntax
718+ try :
719+ parsed_expression = self . parse ( expression , strict = strict )
720+ except ExpressionParseError as e :
721+ expression_info .errors .append (str ( e ) )
722+ expression_info . invalid_symbols . append ( e . token_string )
693723
694724 # Check `expression` keys
695725 try :
696- parsed_expression = self .parse (expression , strict = strict , validate = True )
726+ parsed_expression = self .parse (expression , validate = True )
697727 except ExpressionError as e :
698728 error_message = str (e )
699729 expression_info .errors .append (error_message )
700- if 'Unknown license key' in error_message :
701- unknown_keys = self .unknown_license_keys (expression )
702- expression_info .invalid_symbols .extend (unknown_keys )
703- return expression_info
730+ unknown_keys = self .unknown_license_keys (expression )
731+ expression_info .invalid_symbols .extend (unknown_keys )
732+ return set_ExpressionInfo_fields (
733+ parsed_expression = parsed_expression ,
734+ expression_info = expression_info
735+ )
704736
705737 # If we have not hit an exception, load `expression_info` and return it
706- symbols = list (parsed_expression .symbols )
707- expression_info .normalized_license_expression = parsed_expression .render ()
708- expression_info .valid_symbols = [s .render () for s in symbols ]
709- expression_info .exception_symbols = [s .render () for s in symbols if isinstance (s , LicenseWithExceptionSymbol ) or s .is_exception ]
710- return expression_info
738+ return set_ExpressionInfo_fields (
739+ parsed_expression = parsed_expression ,
740+ expression_info = expression_info
741+ )
711742
712743
713- def get_license_key_info ( license_key_index_location = None ):
744+ def get_license_index ( license_index_location = vendored_scancode_licensedb_index_location ):
714745 """
715746 Return a list of dictionaries that contain license key information from
716- `license_key_index_location `
747+ `license_index_location `
717748
718- If `license_key_index_location` is not present, then we use a vendored copy
719- of the license key index from https://scancode-licensedb.aboutcode.org/
749+ The default value of `license_index_location` points to a vendored copy
750+ of the license index from https://scancode-licensedb.aboutcode.org/
720751 """
721- if license_key_index_location :
722- with open (license_key_index_location , 'r' ) as f :
723- license_key_info = json .load (f )
724- else :
725- curr_dir = dirname (abspath (__file__ ))
726- data_dir = join (curr_dir , 'data' )
727- vendored_license_key_index_location = join (data_dir , 'license_key_index.json' )
728- with open (vendored_license_key_index_location , 'r' ) as f :
729- license_key_info = json .load (f )
730- return license_key_info
731-
732-
733- def build_licensing (license_key_index_location = None ):
752+ with open (license_index_location ) as f :
753+ return json .load (f )
754+
755+
756+ def load_licensing_from_license_index (license_index ):
734757 """
735- Return a Licensing object that has been loaded with license keys.
758+ Return a Licensing object that has been loaded with license keys and
759+ attributes from `license_index`.
760+ """
761+ syms = [LicenseSymbol (** l ) for l in license_index ]
762+ return Licensing (syms )
763+
736764
737- If `license_key_index_location` is present, then license key information
738- will be loaded from `license_key_index_location`, otherwise license key
739- information will come from a vendored license key index file .
765+ def build_licensing ( license_index ):
766+ """
767+ Return a Licensing object that has been loaded with license keys .
740768 """
741- license_key_info = get_license_key_info (license_key_index_location )
742769 lics = [
743770 {
744771 'key' : l .get ('license_key' , '' ),
745772 'is_exception' : l .get ('is_exception' , '' ),
746- } for l in license_key_info
773+ } for l in license_index
747774 ]
748- syms = [LicenseSymbol (** l ) for l in lics ]
749- return Licensing (syms )
775+ return load_licensing_from_license_index (lics )
750776
751777
752- def build_spdx_licensing (license_key_index_location = None ):
778+ def build_spdx_licensing (license_index ):
753779 """
754780 Return a Licensing object that has been loaded with SPDX license keys.
755-
756- If `license_key_index_location` is present, then license key information
757- will be loaded from `license_key_index_location`, otherwise license key
758- information will come from a vendored license key index file.
759781 """
760- license_key_info = get_license_key_info ( license_key_index_location )
782+ # Massage data such that SPDX license key is the primary license key
761783 lics = [
762784 {
763785 'key' : l .get ('spdx_license_key' , '' ),
764786 'aliases' : l .get ('other_spdx_license_keys' , '' ),
765787 'is_exception' : l .get ('is_exception' , '' ),
766- } for l in license_key_info if l .get ('spdx_license_key' )
788+ } for l in license_index if l .get ('spdx_license_key' )
767789 ]
768- syms = [LicenseSymbol (** l ) for l in lics ]
769- return Licensing (syms )
790+ return load_licensing_from_license_index (lics )
770791
771792
772793def build_symbols_from_unknown_tokens (tokens ):
0 commit comments