Skip to content

Commit b147361

Browse files
committed
#331 - Multiple licenses support
* Remove the loads and load_lines as as we are not reading the ABOUT file line by line by strings, we are not using yaml.load instead. * Add a yaml class to check for duplicated keys * Using yaml dump to write files instead of string object Signed-off-by: Chin Yeung Li <[email protected]>
1 parent d1b45eb commit b147361

File tree

3 files changed

+119
-88
lines changed

3 files changed

+119
-88
lines changed

src/attributecode/model.py

Lines changed: 59 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -34,16 +34,20 @@
3434
import os
3535
import posixpath
3636
from posixpath import dirname
37+
38+
import yaml
3739
import re
3840
import sys
3941

4042
if sys.version_info[0] < 3: # Python 2
4143
import backports.csv as csv
44+
from itertools import izip_longest as zip_longest
4245
from urlparse import urljoin, urlparse
4346
from urllib2 import urlopen, Request, HTTPError
4447
else: # Python 3
4548
basestring = str
4649
import csv
50+
from itertools import zip_longest
4751
from urllib.parse import urljoin, urlparse
4852
from urllib.request import urlopen, Request
4953
from urllib.error import HTTPError
@@ -60,7 +64,6 @@
6064
from attributecode import util
6165
from attributecode.util import add_unc
6266
from attributecode.util import copy_license_notice_files
63-
from attributecode.util import check_duplicate_keys_about_file
6467
from attributecode.util import on_windows
6568
from attributecode.util import UNC_PREFIX
6669
from attributecode.util import UNC_PREFIX_POSIX
@@ -107,11 +110,6 @@ def validate(self, *args, **kwargs):
107110
msg = u'Field %(name)s is required'
108111
errors.append(Error(CRITICAL, msg % locals()))
109112
return errors
110-
"""else:
111-
# no error for not present non required fields
112-
# FIXME: should we add an info?
113-
# CY: I don't think so.
114-
pass"""
115113
else:
116114
# present fields should have content ...
117115
if not self.has_content:
@@ -986,51 +984,34 @@ def load(self, location, use_mapping=False, mapping_file=None):
986984
loc = add_unc(loc)
987985
with codecs.open(loc, encoding='utf-8') as txt:
988986
input_text = txt.read()
987+
# Check for duplicated key
988+
yaml.load(input_text, Loader=util.NoDuplicateLoader)
989+
'''
989990
dup_keys = check_duplicate_keys_about_file(input_text)
990991
if dup_keys:
991992
msg = ('Duplicated key name(s): %(dup_keys)s' % locals())
992993
errors.append(Error(ERROR, msg % locals()))
993994
else:
994-
"""
995-
The running_inventory defines if the current process is 'inventory' or not.
996-
This is used for the validation of the about_resource_path.
997-
In the 'inventory' command, the code will use the parent of the about_file_path
998-
location and join with the 'about_resource_path' for the validation.
999-
On the other hand, in the 'gen' command, the code will use the
1000-
generated location (aka base_dir) along with the parent of the about_file_path
1001-
and then join with the 'about_resource_path'
1002-
"""
1003-
running_inventory = True
1004-
errs = self.load_dict(saneyaml.load(input_text), base_dir, running_inventory, use_mapping, mapping_file)
1005-
errors.extend(errs)
995+
'''
996+
"""
997+
The running_inventory defines if the current process is 'inventory' or not.
998+
This is used for the validation of the about_resource_path.
999+
In the 'inventory' command, the code will use the parent of the about_file_path
1000+
location and join with the 'about_resource_path' for the validation.
1001+
On the other hand, in the 'gen' command, the code will use the
1002+
generated location (aka base_dir) along with the parent of the about_file_path
1003+
and then join with the 'about_resource_path'
1004+
"""
1005+
running_inventory = True
1006+
errs = self.load_dict(saneyaml.load(input_text), base_dir, running_inventory, use_mapping, mapping_file)
1007+
errors.extend(errs)
10061008
except Exception as e:
10071009
msg = 'Cannot load invalid ABOUT file: %(location)r: %(e)r\n' + str(e)
10081010
errors.append(Error(CRITICAL, msg % locals()))
10091011

10101012
self.errors = errors
10111013
return errors
10121014

1013-
def loads(self, string, base_dir):
1014-
"""
1015-
Load the ABOUT file from string. Return a list of errors.
1016-
"""
1017-
lines = string.splitlines(True)
1018-
errors = self.load_lines(lines, base_dir)
1019-
self.errors = errors
1020-
return errors
1021-
1022-
def load_lines(self, lines, base_dir):
1023-
"""
1024-
Load the ABOUT file from a lines list. Return a list of errors.
1025-
"""
1026-
errors = []
1027-
parse_errors, fields = parse(lines)
1028-
errors.extend(parse_errors)
1029-
process_errors = self.process(fields, base_dir)
1030-
errors.extend(process_errors)
1031-
self.errors = errors
1032-
return errors
1033-
10341015
def load_dict(self, fields_dict, base_dir, running_inventory=False,
10351016
use_mapping=False, mapping_file=None,
10361017
license_notice_text_location=None, with_empty=True):
@@ -1050,17 +1031,52 @@ def load_dict(self, fields_dict, base_dir, running_inventory=False,
10501031
self.errors = errors
10511032
return errors
10521033

1034+
10531035
def dumps(self, with_absent=False, with_empty=True):
10541036
"""
10551037
Return self as a formatted ABOUT string.
10561038
If with_absent, include absent (not present) fields.
10571039
If with_empty, include empty fields.
10581040
"""
1059-
serialized = []
1041+
about_data = {}
1042+
# Group the same license information (name, url, file) together
1043+
license_key = []
1044+
license_name = []
1045+
license_file = []
1046+
license_url = []
1047+
10601048
for field in self.all_fields(with_absent, with_empty):
1061-
serialized.append(field.serialize())
1062-
# always end with a new line
1063-
return u'\n'.join(serialized) + u'\n'
1049+
if field.name == 'license_key':
1050+
license_key = field.value
1051+
elif field.name == 'license_name':
1052+
license_name = field.value
1053+
elif field.name == 'license_file':
1054+
license_file = field.value.keys()
1055+
elif field.name == 'license_url':
1056+
license_url = field.value
1057+
# No multiple 'about_resource' and 'about_resource_path' reference supported.
1058+
# Take the first element (should only be one) in the list for the
1059+
# value of 'about_resource' and 'about_resource_path'
1060+
elif field.name == 'about_resource':
1061+
about_data[field.name] = field.value[0]
1062+
elif field.name == 'about_resource_path':
1063+
about_data[field.name] = field.value.keys()[0]
1064+
else:
1065+
about_data[field.name] = field.value
1066+
# Group the same license information in a list
1067+
license_group = list(zip_longest(license_key, license_name, license_file, license_url))
1068+
for lic_group in license_group:
1069+
lic_dict = {}
1070+
if lic_group[0] or with_empty:
1071+
lic_dict['key'] = lic_group[0]
1072+
if lic_group[1] or with_empty:
1073+
lic_dict['name'] = lic_group[1]
1074+
if lic_group[2] or with_empty:
1075+
lic_dict['file'] = lic_group[2]
1076+
if lic_group[3] or with_empty:
1077+
lic_dict['url'] = lic_group[3]
1078+
about_data.setdefault('licenses', []).append(lic_dict)
1079+
return saneyaml.dump(about_data)
10641080

10651081
def dump(self, location, with_absent=False, with_empty=True):
10661082
"""

src/attributecode/util.py

Lines changed: 60 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
from __future__ import print_function
1818
from __future__ import unicode_literals
1919

20+
import collections
2021
from collections import OrderedDict
2122
import codecs
2223
import errno
@@ -32,6 +33,15 @@
3233
import string
3334
import sys
3435

36+
import yaml
37+
from yaml.reader import Reader
38+
from yaml.scanner import Scanner
39+
from yaml.parser import Parser
40+
from yaml.composer import Composer
41+
from yaml.constructor import Constructor, ConstructorError
42+
from yaml.resolver import Resolver
43+
from yaml.nodes import MappingNode
44+
3545
if sys.version_info[0] < 3:
3646
# Python 2
3747
import backports.csv as csv
@@ -635,4 +645,53 @@ def update_about_dictionary_keys(about_dictionary_list, mapping_output):
635645
if not update_key:
636646
updated_ordered_dict[about_key] = value
637647
updated_dict_list.append(updated_ordered_dict)
638-
return updated_dict_list
648+
return updated_dict_list
649+
650+
class NoDuplicateConstructor(Constructor):
651+
def construct_mapping(self, node, deep=False):
652+
if not isinstance(node, MappingNode):
653+
raise ConstructorError(
654+
None, None,
655+
"expected a mapping node, but found %s" % node.id,
656+
node.start_mark)
657+
mapping = {}
658+
for key_node, value_node in node.value:
659+
# keys can be list -> deep
660+
key = self.construct_object(key_node, deep=True)
661+
# lists are not hashable, but tuples are
662+
if not isinstance(key, collections.Hashable):
663+
if isinstance(key, list):
664+
key = tuple(key)
665+
666+
if sys.version_info.major == 2:
667+
try:
668+
hash(key)
669+
except TypeError as exc:
670+
raise ConstructorError(
671+
"while constructing a mapping", node.start_mark,
672+
"found unacceptable key (%s)" %
673+
exc, key_node.start_mark)
674+
else:
675+
if not isinstance(key, collections.Hashable):
676+
raise ConstructorError(
677+
"while constructing a mapping", node.start_mark,
678+
"found unhashable key", key_node.start_mark)
679+
680+
value = self.construct_object(value_node, deep=deep)
681+
682+
# Actually do the check.
683+
if key in mapping:
684+
raise KeyError("Got duplicate key: {!r}".format(key))
685+
686+
mapping[key] = value
687+
return mapping
688+
689+
690+
class NoDuplicateLoader(Reader, Scanner, Parser, Composer, NoDuplicateConstructor, Resolver):
691+
def __init__(self, stream):
692+
Reader.__init__(self, stream)
693+
Scanner.__init__(self)
694+
Parser.__init__(self)
695+
Composer.__init__(self)
696+
NoDuplicateConstructor.__init__(self)
697+
Resolver.__init__(self)

tests/test_model.py

Lines changed: 0 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -940,34 +940,6 @@ def test_About_as_dict_with_nothing(self):
940940
# FIXME: why converting back to dict?
941941
assert expected == dict(result)
942942

943-
def test_loads_dumps_is_idempotent(self):
944-
test1 = u'''about_resource: .
945-
name: AboutCode
946-
version: 0.11.0
947-
copyright: |
948-
multi
949-
line
950-
'''
951-
952-
test2 = u'''about_resource: .
953-
name: AboutCode
954-
version: 0.11.0
955-
copyright: >
956-
multi
957-
line
958-
'''
959-
a = model.About()
960-
base_dir = 'some_dir'
961-
a.loads(test1, base_dir)
962-
dumped = a.dumps(with_absent=False, with_empty=False)
963-
assert test1 == dumped
964-
965-
b = model.About()
966-
base_dir = 'some_dir'
967-
b.loads(test2, base_dir)
968-
dumped = b.dumps(with_absent=False, with_empty=False)
969-
assert test2 == dumped
970-
971943
def test_load_dump_is_idempotent(self):
972944
test_file = get_test_loc('load/this.ABOUT')
973945
a = model.About()
@@ -1075,22 +1047,6 @@ def test_write_output_json(self):
10751047
expected = get_test_loc('load/expected.json')
10761048
check_json(expected, result)
10771049

1078-
def test_colon_in_value(self):
1079-
test = u'''about_resource: .
1080-
name: AboutCode
1081-
version: v: 0.11.0
1082-
'''
1083-
expected = u'''about_resource: .
1084-
name: AboutCode
1085-
version: |
1086-
v: 0.11.0
1087-
'''
1088-
a = model.About()
1089-
base_dir = 'some_dir'
1090-
a.loads(test, base_dir)
1091-
dumped = a.dumps(with_absent=False, with_empty=False)
1092-
assert dumped == expected
1093-
10941050
class CollectorTest(unittest.TestCase):
10951051

10961052
def test_collect_inventory_in_directory_with_correct_about_file_path(self):

0 commit comments

Comments
 (0)