|
19 | 19 | """ |
20 | 20 |
|
21 | 21 | import itertools |
| 22 | +import json |
22 | 23 | import re |
23 | 24 | import string |
24 | 25 | from collections import defaultdict |
|
27 | 28 | from copy import copy |
28 | 29 | from copy import deepcopy |
29 | 30 | from functools import total_ordering |
| 31 | +from os.path import abspath |
| 32 | +from os.path import dirname |
| 33 | +from os.path import join |
30 | 34 |
|
31 | 35 | import boolean |
32 | 36 | from boolean import Expression as LicenseExpression |
|
51 | 55 | from license_expression._pyahocorasick import Trie as AdvancedTokenizer |
52 | 56 | from license_expression._pyahocorasick import Token |
53 | 57 |
|
| 58 | + |
| 59 | +curr_dir = dirname(abspath(__file__)) |
| 60 | +data_dir = join(curr_dir, 'data') |
| 61 | +vendored_scancode_licensedb_index_location = join(data_dir, 'scancode-licensedb-index.json') |
| 62 | + |
| 63 | + |
54 | 64 | # append new error codes to PARSE_ERRORS by monkey patching |
55 | 65 | PARSE_EXPRESSION_NOT_UNICODE = 100 |
56 | 66 | if PARSE_EXPRESSION_NOT_UNICODE not in PARSE_ERRORS: |
@@ -116,6 +126,50 @@ class ExpressionParseError(ParseError, ExpressionError): |
116 | 126 | ).finditer |
117 | 127 |
|
118 | 128 |
|
| 129 | +class ExpressionInfo: |
| 130 | + """ |
| 131 | + The ExpressionInfo class is returned by Licensing.validate() where it stores |
| 132 | + information about a given license expression passed into |
| 133 | + Licensing.validate(). |
| 134 | +
|
| 135 | + The ExpressionInfo class has the following fields: |
| 136 | + - original_expression: str. |
| 137 | + - This is the license expression that was originally passed into Licensing.validate() |
| 138 | + - normalized_expression: str. |
| 139 | + - If a valid license expression has been passed into `validate()`, |
| 140 | + then the license expression string will be set in this field. |
| 141 | + - errors: list |
| 142 | + - If there were errors validating a license expression, |
| 143 | + the error messages will be appended here. |
| 144 | + - invalid_symbols: list |
| 145 | + - If the license expression that has been passed into `validate()` has |
| 146 | + license keys that are invalid (either that they are unknown or not used |
| 147 | + in the right context), or the syntax is incorrect because an invalid |
| 148 | + symbol was used, then those symbols will be appended here. |
| 149 | + """ |
| 150 | + def __init__( |
| 151 | + self, |
| 152 | + original_expression, |
| 153 | + normalized_expression=None, |
| 154 | + errors=None, |
| 155 | + invalid_symbols=None, |
| 156 | + ): |
| 157 | + self.original_expression = original_expression |
| 158 | + self.normalized_expression = normalized_expression |
| 159 | + self.errors = errors or [] |
| 160 | + self.invalid_symbols = invalid_symbols or [] |
| 161 | + |
| 162 | + def __repr__(self): |
| 163 | + return ( |
| 164 | + 'ExpressionInfo(\n' |
| 165 | + f' original_expression={self.original_expression!r},\n' |
| 166 | + f' normalized_expression={self.normalized_expression!r},\n' |
| 167 | + f' errors={self.errors!r},\n' |
| 168 | + f' invalid_symbols={self.invalid_symbols!r}\n' |
| 169 | + ')' |
| 170 | + ) |
| 171 | + |
| 172 | + |
119 | 173 | class Licensing(boolean.BooleanAlgebra): |
120 | 174 | """ |
121 | 175 | Licensing defines a mini language to parse, validate and compare license |
@@ -355,6 +409,12 @@ def unknown_license_keys(self, expression, unique=True, **kwargs): |
355 | 409 | symbols = self.unknown_license_symbols(expression, unique=False, **kwargs) |
356 | 410 | return self._keys(symbols, unique) |
357 | 411 |
|
| 412 | + def validate_license_keys(self, expression): |
| 413 | + unknown_keys = self.unknown_license_keys(expression, unique=True) |
| 414 | + if unknown_keys: |
| 415 | + msg = 'Unknown license key(s): {}'.format(', '.join(unknown_keys)) |
| 416 | + raise ExpressionError(msg) |
| 417 | + |
358 | 418 | def parse(self, expression, validate=False, strict=False, simple=False, **kwargs): |
359 | 419 | """ |
360 | 420 | Return a new license LicenseExpression object by parsing a license |
@@ -422,10 +482,7 @@ def parse(self, expression, validate=False, strict=False, simple=False, **kwargs |
422 | 482 | raise ExpressionError('expression must be a LicenseExpression once parsed.') |
423 | 483 |
|
424 | 484 | if validate: |
425 | | - unknown_keys = self.unknown_license_keys(expression, unique=True) |
426 | | - if unknown_keys: |
427 | | - msg = 'Unknown license key(s): {}'.format(', '.join(unknown_keys)) |
428 | | - raise ExpressionError(msg) |
| 485 | + self.validate_license_keys(expression) |
429 | 486 |
|
430 | 487 | return expression |
431 | 488 |
|
@@ -617,6 +674,104 @@ def dedup(self, expression): |
617 | 674 | raise Exception(f'Unknown expression type: {expression!r}') |
618 | 675 | return deduped |
619 | 676 |
|
| 677 | + def validate(self, expression, strict=True, **kwargs): |
| 678 | + """ |
| 679 | + Return a ExpressionInfo object that contains information about |
| 680 | + the validation of an `expression` license expression string. |
| 681 | +
|
| 682 | + If the syntax and license keys of `expression` is valid, then |
| 683 | + `ExpressionInfo.normalized_license_expression` is set. |
| 684 | +
|
| 685 | + If an error was encountered when validating `expression`, |
| 686 | + `ExpressionInfo.errors` will be populated with strings containing the |
| 687 | + error message that has occured. If an error has occured due to unknown |
| 688 | + license keys or an invalid license symbol, the offending keys or symbols |
| 689 | + will be present in `ExpressionInfo.invalid_symbols` |
| 690 | +
|
| 691 | + If `strict` is True, validation error messages will be included if in a "WITH" |
| 692 | + expression such as "XXX with ZZZ" if the XXX symbol has `is_exception` |
| 693 | + set to True or the YYY symbol has `is_exception` set to False. This |
| 694 | + checks that symbols are used strictly as intended. |
| 695 | + """ |
| 696 | + expression_info = ExpressionInfo( |
| 697 | + original_expression=str(expression) |
| 698 | + ) |
| 699 | + |
| 700 | + # Check `expression` type and syntax |
| 701 | + try: |
| 702 | + parsed_expression = self.parse(expression, strict=strict) |
| 703 | + except ExpressionError as e: |
| 704 | + expression_info.errors.append(str(e)) |
| 705 | + expression_info.invalid_symbols.append(e.token_string) |
| 706 | + return expression_info |
| 707 | + |
| 708 | + # Check `expression` keys (validate) |
| 709 | + try: |
| 710 | + self.validate_license_keys(expression) |
| 711 | + except ExpressionError as e: |
| 712 | + expression_info.errors.append(str(e)) |
| 713 | + unknown_keys = self.unknown_license_keys(expression) |
| 714 | + expression_info.invalid_symbols.extend(unknown_keys) |
| 715 | + return expression_info |
| 716 | + |
| 717 | + # If we have not hit an exception, set `normalized_expression` in |
| 718 | + # `expression_info` only if we did not encounter any errors |
| 719 | + # along the way |
| 720 | + if not expression_info.errors and not expression_info.invalid_symbols: |
| 721 | + expression_info.normalized_expression = str(parsed_expression) |
| 722 | + return expression_info |
| 723 | + |
| 724 | + |
| 725 | +def get_license_index(license_index_location=vendored_scancode_licensedb_index_location): |
| 726 | + """ |
| 727 | + Return a list of dictionaries that contain license key information from |
| 728 | + `license_index_location` |
| 729 | +
|
| 730 | + The default value of `license_index_location` points to a vendored copy |
| 731 | + of the license index from https://scancode-licensedb.aboutcode.org/ |
| 732 | + """ |
| 733 | + with open(license_index_location) as f: |
| 734 | + return json.load(f) |
| 735 | + |
| 736 | + |
| 737 | +def load_licensing_from_license_index(license_index): |
| 738 | + """ |
| 739 | + Return a Licensing object that has been loaded with license keys and |
| 740 | + attributes from `license_index`. |
| 741 | + """ |
| 742 | + syms = [LicenseSymbol(**l) for l in license_index] |
| 743 | + return Licensing(syms) |
| 744 | + |
| 745 | + |
| 746 | +def build_licensing(license_index): |
| 747 | + """ |
| 748 | + Return a Licensing object that has been loaded with license keys. |
| 749 | + """ |
| 750 | + lics = [ |
| 751 | + { |
| 752 | + 'key': l.get('license_key', ''), |
| 753 | + 'is_exception': l.get('is_exception', ''), |
| 754 | + } for l in license_index if not l.get('is_deprecated', False) |
| 755 | + ] |
| 756 | + return load_licensing_from_license_index(lics) |
| 757 | + |
| 758 | + |
| 759 | +def build_spdx_licensing(license_index): |
| 760 | + """ |
| 761 | + Return a Licensing object that has been loaded with SPDX license keys. |
| 762 | + """ |
| 763 | + # Massage data such that SPDX license key is the primary license key |
| 764 | + lics = [ |
| 765 | + { |
| 766 | + 'key': l.get('spdx_license_key', ''), |
| 767 | + 'aliases': l.get('other_spdx_license_keys', []), |
| 768 | + 'is_exception': l.get('is_exception', ''), |
| 769 | + } for l in license_index |
| 770 | + if l.get('spdx_license_key') |
| 771 | + and not l.get('is_deprecated', False) |
| 772 | + ] |
| 773 | + return load_licensing_from_license_index(lics) |
| 774 | + |
620 | 775 |
|
621 | 776 | def build_symbols_from_unknown_tokens(tokens): |
622 | 777 | """ |
|
0 commit comments