Skip to content

Commit ec9242f

Browse files
committed
Address PR comments #56
* Set original license expression in ExpressionInfo * Set vendored licensedb info location as a global * Create function that loads license index json * Update tests Signed-off-by: Jono Yang <[email protected]>
1 parent 8d198cd commit ec9242f

File tree

5 files changed

+17960
-116
lines changed

5 files changed

+17960
-116
lines changed

src/license_expression/__init__.py

Lines changed: 86 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,12 @@
5555
from license_expression._pyahocorasick import Trie as AdvancedTokenizer
5656
from license_expression._pyahocorasick import Token
5757

58+
59+
curr_dir = dirname(abspath(__file__))
60+
data_dir = join(curr_dir, 'data')
61+
vendored_scancode_licensedb_index_location = join(data_dir, 'scancode-licensedb-index.json')
62+
63+
5864
# append new error codes to PARSE_ERRORS by monkey patching
5965
PARSE_EXPRESSION_NOT_UNICODE = 100
6066
if PARSE_EXPRESSION_NOT_UNICODE not in PARSE_ERRORS:
@@ -127,6 +133,8 @@ class ExpressionInfo:
127133
Licensing.validate().
128134
129135
The ExpressionInfo class has the following fields:
136+
- original_license_expression: str.
137+
- This is the license expression that was originally passed into Licensing.validate()
130138
- normalized_license_expression: str.
131139
- If a valid license expression has been passed into `validate()`,
132140
then the license expression string will be set in this field.
@@ -137,20 +145,28 @@ class ExpressionInfo:
137145
- If a valid license expression has been passed into `validate()`,
138146
then the license symbols from the license expression will be
139147
appended here.
148+
- valid_exception_symbols: list
149+
- If a license symbol in the license expression is a license exception,
150+
then that license symbol will be appended here.
140151
- invalid_symbols: list
141152
- If an invalid license expression has been passed into `validate()`,
142153
then the invalid license symbols from the license expression will be
143154
appended here.
144-
- exception_symbols: list
145-
- If a license symbol in the license expression is a license exception,
146-
then that license symbol will be appended here.
147155
"""
148-
def __init__(self):
149-
self.normalized_license_expression = ''
150-
self.errors = []
151-
self.valid_symbols = []
152-
self.invalid_symbols = []
153-
self.exception_symbols = []
156+
def __init__(
157+
self,
158+
original_license_expression,
159+
normalized_license_expression=None,
160+
errors=None,
161+
valid_symbols=None,
162+
valid_exception_symbols=None,
163+
invalid_symbols=None):
164+
self.original_license_expression = original_license_expression
165+
self.normalized_license_expression = normalized_license_expression or ''
166+
self.errors = errors or []
167+
self.valid_symbols = valid_symbols or []
168+
self.valid_exception_symbols = valid_exception_symbols or []
169+
self.invalid_symbols = invalid_symbols or []
154170

155171

156172
class Licensing(boolean.BooleanAlgebra):
@@ -657,7 +673,7 @@ def dedup(self, expression):
657673
def validate(self, expression, strict=True, **kwargs):
658674
"""
659675
Return a ExpressionInfo object that contains information about
660-
`expression` by parsing `expression` using Licensing.parse()
676+
the validation of an `expression` license expression string.
661677
662678
If `expression` is valid, then
663679
`ExpressionInfo.normalized_license_expression` is set, along with a list
@@ -669,104 +685,109 @@ def validate(self, expression, strict=True, **kwargs):
669685
license symbols, the offending symbols will be present in
670686
`ExpressionInfo.invalid_symbols`
671687
672-
If `strict` is True, additional exceptions will be raised if in a "WITH"
688+
If `strict` is True, validation error messages will be included if in a "WITH"
673689
expression such as "XXX with ZZZ" if the XXX symbol has `is_exception`
674690
set to True or the YYY symbol has `is_exception` set to False. This
675-
checks that symbols are used strictly as constructed.
691+
checks that symbols are used strictly as intended.
676692
"""
677-
expression_info = ExpressionInfo()
693+
def set_ExpressionInfo_fields(parsed_expression, expression_info):
694+
symbols = list(parsed_expression.symbols)
695+
expression_info.normalized_license_expression = str(parsed_expression)
696+
expression_info.valid_symbols = [s.render() for s in symbols]
697+
expression_info.valid_exception_symbols = [
698+
s.render()
699+
for s in symbols
700+
if isinstance(s, LicenseWithExceptionSymbol)
701+
or s.is_exception
702+
]
703+
return expression_info
704+
705+
expression_info = ExpressionInfo(
706+
original_license_expression=str(expression)
707+
)
678708

679709
# Check `expression` type
680710
try:
681-
self.parse(expression)
711+
parsed_expression = self.parse(expression)
682712
except ExpressionError as e:
683713
expression_info.errors.append(str(e))
684714
return expression_info
685715

686-
# Check `expression` syntax
687-
try:
688-
self.parse(expression, strict=strict)
689-
except ExpressionParseError as e:
690-
expression_info.errors.append(str(e))
691-
expression_info.invalid_symbols.append(e.token_string)
692-
return expression_info
716+
if strict:
717+
# Check `expression` syntax
718+
try:
719+
parsed_expression = self.parse(expression, strict=strict)
720+
except ExpressionParseError as e:
721+
expression_info.errors.append(str(e))
722+
expression_info.invalid_symbols.append(e.token_string)
693723

694724
# Check `expression` keys
695725
try:
696-
parsed_expression = self.parse(expression, strict=strict, validate=True)
726+
parsed_expression = self.parse(expression, validate=True)
697727
except ExpressionError as e:
698728
error_message = str(e)
699729
expression_info.errors.append(error_message)
700-
if 'Unknown license key' in error_message:
701-
unknown_keys = self.unknown_license_keys(expression)
702-
expression_info.invalid_symbols.extend(unknown_keys)
703-
return expression_info
730+
unknown_keys = self.unknown_license_keys(expression)
731+
expression_info.invalid_symbols.extend(unknown_keys)
732+
return set_ExpressionInfo_fields(
733+
parsed_expression=parsed_expression,
734+
expression_info=expression_info
735+
)
704736

705737
# If we have not hit an exception, load `expression_info` and return it
706-
symbols = list(parsed_expression.symbols)
707-
expression_info.normalized_license_expression = parsed_expression.render()
708-
expression_info.valid_symbols = [s.render() for s in symbols]
709-
expression_info.exception_symbols = [s.render() for s in symbols if isinstance(s, LicenseWithExceptionSymbol) or s.is_exception]
710-
return expression_info
738+
return set_ExpressionInfo_fields(
739+
parsed_expression=parsed_expression,
740+
expression_info=expression_info
741+
)
711742

712743

713-
def get_license_key_info(license_key_index_location=None):
744+
def get_license_index(license_index_location=vendored_scancode_licensedb_index_location):
714745
"""
715746
Return a list of dictionaries that contain license key information from
716-
`license_key_index_location`
747+
`license_index_location`
717748
718-
If `license_key_index_location` is not present, then we use a vendored copy
719-
of the license key index from https://scancode-licensedb.aboutcode.org/
749+
The default value of `license_index_location` points to a vendored copy
750+
of the license index from https://scancode-licensedb.aboutcode.org/
720751
"""
721-
if license_key_index_location:
722-
with open(license_key_index_location, 'r') as f:
723-
license_key_info = json.load(f)
724-
else:
725-
curr_dir = dirname(abspath(__file__))
726-
data_dir = join(curr_dir, 'data')
727-
vendored_license_key_index_location = join(data_dir, 'license_key_index.json')
728-
with open(vendored_license_key_index_location, 'r') as f:
729-
license_key_info = json.load(f)
730-
return license_key_info
731-
732-
733-
def build_licensing(license_key_index_location=None):
752+
with open(license_index_location) as f:
753+
return json.load(f)
754+
755+
756+
def load_licensing_from_license_index(license_index):
734757
"""
735-
Return a Licensing object that has been loaded with license keys.
758+
Return a Licensing object that has been loaded with license keys and
759+
attributes from `license_index`.
760+
"""
761+
syms = [LicenseSymbol(**l) for l in license_index]
762+
return Licensing(syms)
763+
736764

737-
If `license_key_index_location` is present, then license key information
738-
will be loaded from `license_key_index_location`, otherwise license key
739-
information will come from a vendored license key index file.
765+
def build_licensing(license_index):
766+
"""
767+
Return a Licensing object that has been loaded with license keys.
740768
"""
741-
license_key_info = get_license_key_info(license_key_index_location)
742769
lics = [
743770
{
744771
'key': l.get('license_key', ''),
745772
'is_exception': l.get('is_exception', ''),
746-
} for l in license_key_info
773+
} for l in license_index
747774
]
748-
syms = [LicenseSymbol(**l) for l in lics]
749-
return Licensing(syms)
775+
return load_licensing_from_license_index(lics)
750776

751777

752-
def build_spdx_licensing(license_key_index_location=None):
778+
def build_spdx_licensing(license_index):
753779
"""
754780
Return a Licensing object that has been loaded with SPDX license keys.
755-
756-
If `license_key_index_location` is present, then license key information
757-
will be loaded from `license_key_index_location`, otherwise license key
758-
information will come from a vendored license key index file.
759781
"""
760-
license_key_info = get_license_key_info(license_key_index_location)
782+
# Massage data such that SPDX license key is the primary license key
761783
lics = [
762784
{
763785
'key': l.get('spdx_license_key', ''),
764786
'aliases': l.get('other_spdx_license_keys', ''),
765787
'is_exception': l.get('is_exception', ''),
766-
} for l in license_key_info if l.get('spdx_license_key')
788+
} for l in license_index if l.get('spdx_license_key')
767789
]
768-
syms = [LicenseSymbol(**l) for l in lics]
769-
return Licensing(syms)
790+
return load_licensing_from_license_index(lics)
770791

771792

772793
def build_symbols_from_unknown_tokens(tokens):

src/license_expression/data/license_key_index.json

Lines changed: 0 additions & 1 deletion
This file was deleted.
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
1-
about_resource: .
2-
name: license_key_index.json
1+
about_resource: scancode-licensedb-index.json
2+
name: scancode-licensedb-index.json
33
license_expression: cc-by-4.0
44
copyright: Copyright (c) nexB Inc. and others.
55
homepage_url: https://scancode-licensedb.aboutcode.org/

0 commit comments

Comments
 (0)