Skip to content

Commit 97d1fe6

Browse files
committed
Vendor license key index #56
* Refactor validate() to call parse() rather than using the code from parse() Signed-off-by: Jono Yang <[email protected]>
1 parent 3601e2e commit 97d1fe6

File tree

4 files changed

+438
-62
lines changed

4 files changed

+438
-62
lines changed

src/license_expression/__init__.py

Lines changed: 37 additions & 62 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,9 @@
3838
from copy import copy
3939
from copy import deepcopy
4040
from functools import total_ordering
41+
from os.path import abspath
42+
from os.path import dirname
43+
from os.path import join
4144
import itertools
4245
import json
4346
import re
@@ -600,7 +603,7 @@ def simple_tokenizer(self, expression):
600603
sym = LicenseSymbol(key=sym_or_op)
601604
yield Token(start, end, sym_or_op, sym)
602605

603-
def validate(self, expression, strict=False, simple=False, **kwargs):
606+
def validate(self, expression, strict=False, **kwargs):
604607
data = {
605608
'normalized_license_expression': '',
606609
'errors': [],
@@ -609,94 +612,66 @@ def validate(self, expression, strict=False, simple=False, **kwargs):
609612
'exception_symbols': [],
610613
}
611614

612-
if expression is None:
613-
return data
614-
615-
if isinstance(expression, LicenseExpression):
616-
data['normalized_license_expression'] = expression.render()
617-
return data
618-
619-
if isinstance(expression, bytes):
620-
try:
621-
expression = str(expression)
622-
except:
623-
ext = type(expression)
624-
data['errors'].append('expression must be a string and not: %(ext)r' % locals())
625-
return data
626-
627-
if not isinstance(expression, str):
628-
ext = type(expression)
629-
data['errors'].append('expression must be a string and not: %(ext)r' % locals())
615+
# Check `expression` type
616+
try:
617+
self.parse(expression)
618+
except ExpressionError as e:
619+
data['errors'].append(str(e))
630620
return data
631621

632-
if not expression or not expression.strip():
633-
return
622+
# Check `expression` syntax
634623
try:
635-
# this will raise a ParseError on errors
636-
tokens = list(self.tokenize(expression, strict=strict, simple=simple))
637-
expression = super(Licensing, self).parse(tokens)
638-
except ParseError as e:
639-
new_error = ExpressionParseError(
640-
token_type=e.token_type, token_string=e.token_string,
641-
position=e.position, error_code=e.error_code)
642-
data['errors'].append(str(new_error))
624+
self.parse(expression, strict=strict)
625+
except ExpressionParseError as e:
626+
data['errors'].append(str(e))
643627
data['invalid_symbols'].append(e.token_string)
644628
return data
645629

646-
if not isinstance(expression, LicenseExpression):
647-
data['errors'].append('expression must be a LicenseExpression once parsed.')
648-
return data
649-
650-
unknown_keys = self.unknown_license_keys(expression, unique=True)
651-
if unknown_keys:
652-
msg = 'Unknown license key(s): {}'.format(', '.join(unknown_keys))
653-
data['errors'].append(msg)
654-
data['invalid_symbols'].extend(unknown_keys)
630+
# Check `expression` keys
631+
try:
632+
parsed_expression = self.parse(expression, strict=strict, validate=True)
633+
except ExpressionError as e:
634+
error_message = str(e)
635+
data['errors'].append(error_message)
636+
if 'Unknown license key' in error_message:
637+
unknown_keys = self.unknown_license_keys(expression)
638+
data['invalid_symbols'].extend(unknown_keys)
655639
return data
656640

657-
symbols = list(expression.symbols)
658-
data['normalized_license_expression'] = expression.render()
641+
# If we have not hit an exception, load `data` and return it
642+
symbols = list(parsed_expression.symbols)
643+
data['normalized_license_expression'] = parsed_expression.render()
659644
data['valid_symbols'] = [s.render() for s in symbols]
660645
data['exception_symbols'] = [s.render() for s in symbols if isinstance(s, LicenseWithExceptionSymbol) or s.is_exception]
661646
return data
662647

663648

664-
def build_spdx_licensing(index_json_location=None):
665-
# if no index_json, use vendored version
666-
# TODO: vendor index.json
667-
668-
if index_json_location:
669-
with open(index_json_location, 'r') as f:
649+
def build_spdx_licensing(license_key_index_location=None):
650+
"""
651+
Return a Licensing object that has been loaded with SPDX license keys
652+
"""
653+
if license_key_index_location:
654+
with open(license_key_index_location, 'r') as f:
670655
license_info = json.load(f)
671656
else:
672-
with open(vendored_index_json_location, 'r') as f:
657+
# Use vendored license key index if `license_key_index_location` has not been provided
658+
curr_dir = dirname(abspath(__file__))
659+
data_dir = join(curr_dir, 'data')
660+
vendored_license_key_index_location = join(data_dir, 'license_key_index.json')
661+
with open(vendored_license_key_index_location, 'r') as f:
673662
license_info = json.load(f)
674663

675664
lics = [
676665
{
677666
'key': l.get('spdx_license_key', ''),
678667
'aliases': l.get('other_spdx_license_keys', ''),
679668
'is_exception': l.get('is_exception', ''),
680-
} for l in license_info
669+
} for l in license_info if l.get('spdx_license_key')
681670
]
682671
syms = [LicenseSymbol(**l) for l in lics]
683672
return Licensing(syms)
684673

685674

686-
def build_spdx_licensing_scancode():
687-
from licensedcode.cache import get_licenses_db
688-
ld = get_licenses_db()
689-
ld_spdx = [
690-
{
691-
'key': l.spdx_license_key,
692-
'aliases': l.other_spdx_license_keys,
693-
'is_exception': l.is_exception
694-
} for _, l in ld.items()
695-
]
696-
syms = [LicenseSymbol(**l) for l in ld_spdx]
697-
return Licensing(syms)
698-
699-
700675
def build_symbols_from_unknown_tokens(tokens):
701676
"""
702677
Yield Token given a sequence of Token replacing unmatched contiguous Tokens

0 commit comments

Comments
 (0)