Skip to content

Commit 3b82c21

Browse files
committed
Merge with #479
Signed-off-by: Chin Yeung Li <[email protected]>
2 parents 6d4aab1 + 6ae711b commit 3b82c21

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

63 files changed

+2967
-382
lines changed

setup.cfg

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ install_requires =
5151
license_expression >= 0.94
5252
openpyxl
5353
packageurl_python >= 0.9.0
54+
openpyxl
5455

5556
setup_requires = setuptools_scm[toml] >= 4
5657
python_requires = >=3.6.*, <4

src/attributecode/attrib.py

Lines changed: 131 additions & 119 deletions
Original file line numberDiff line numberDiff line change
@@ -26,16 +26,20 @@
2626
from attributecode import ERROR
2727
from attributecode import Error
2828
from attributecode.licenses import COMMON_LICENSES
29-
from attributecode.model import detect_special_char
3029
from attributecode.model import parse_license_expression
30+
from attributecode.model import License
3131
from attributecode.util import add_unc
3232
from attributecode.attrib_util import multi_sort
3333

3434
DEFAULT_TEMPLATE_FILE = os.path.join(
3535
os.path.dirname(os.path.realpath(__file__)), '../../templates', 'default_html.template')
3636

37+
DEFAULT_TEMPLATE_SCANCODE_FILE = os.path.join(
38+
os.path.dirname(os.path.realpath(__file__)), '../../templates', 'scancode_html.template')
3739

38-
def generate(abouts, template=None, variables=None):
40+
DEFAULT_LICENSE_SCORE = 100
41+
42+
def generate(abouts, is_about_input, license_dict, scancode, min_license_score, template=None, variables=None):
3943
"""
4044
Generate an attribution text from an `abouts` list of About objects, a
4145
`template` template text and a `variables` optional dict of extra
@@ -56,121 +60,122 @@ def generate(abouts, template=None, variables=None):
5660
return error, None
5761

5862
template = jinja2.Template(template)
63+
# Get the current UTC time
64+
utcnow = datetime.datetime.utcnow()
5965

60-
try:
61-
captured_license = []
62-
license_file_key_and_context = {}
63-
sorted_license_file_key_and_context = {}
64-
license_file_name_and_license_file_key = {}
65-
license_key_and_license_name = {}
66-
license_name_and_license_key = {}
67-
license_key_and_license_file_name = {}
68-
license_file_key_and_license_key = {}
69-
# FIXME: This need to be simplified
66+
licenses_list = []
67+
lic_name_expression_list = []
68+
if is_about_input:
7069
for about in abouts:
7170
# about.license_file.value is a OrderDict with license_file_name as
7271
# the key and the license text as the value
73-
if about.license_file:
74-
# We want to create a dictionary which have the license file key as
75-
# the key and license text as the value
76-
# The reason we want to use license file key as the key instead of the
77-
# license key is because there is a scenario such that the input only provide
78-
# license_file but not license_key
79-
# The license file key is bascially a license_key or a license file
80-
# name if it's not generated from DJE. The reason for not using
81-
# license file name as the key at the first place is because
82-
# we need the license_key to match with the common license list
83-
for license_file_name in about.license_file.value:
84-
if not license_file_name in captured_license:
85-
captured_license.append(license_file_name)
86-
license_file_key = get_license_file_key(license_file_name)
87-
license_file_key_and_context[license_file_key] = about.license_file.value[license_file_name]
88-
sorted_license_file_key_and_context = collections.OrderedDict(sorted(license_file_key_and_context.items()))
89-
license_file_name_and_license_file_key[license_file_name] = license_file_key
90-
91-
lic_list = []
92-
lic_name_list = []
93-
lic_name_expression_list = []
94-
# Convert/map the key to name
95-
if about.license_name.value:
96-
if about.license_expression.value or about.license_key.value:
97-
if about.license_expression.value:
98-
special_char, lic_list = parse_license_expression(about.license_expression.value)
99-
about.license_key.value = lic_list
100-
else:
101-
lic_list = about.license_key.value
102-
special_char = []
103-
for lic in lic_list:
104-
special_char_list = detect_special_char(lic)
105-
if special_char_list:
106-
for char in special_char_list:
107-
special_char.append(char)
108-
if special_char:
109-
error = Error(CRITICAL, 'Special character(s) are not allowed in '
110-
'license_expression or license_key: %s' % special_char)
111-
return error, ''
72+
index = 0
73+
for lic_name in about.license_name.value:
74+
if about.license_key.value:
75+
key = about.license_key.value[index]
11276
else:
113-
# No license_key or license_expression present. We will put
114-
# None as the value of license key
115-
about.license_key.value = about.license_file.value.keys()
116-
lic_list = about.license_file.value.keys()
117-
118-
lic_name_list = about.license_name.value
119-
120-
# The order of the license_name and key should be the same
121-
# The length for both list should be the same
122-
assert len(lic_name_list) == len(lic_list)
123-
124-
# Map the license key to license name
125-
index_for_license_name_list = 0
126-
for key in lic_list:
127-
license_key_and_license_file_name[key] = list(about.license_file.value.keys())[index_for_license_name_list]
128-
license_key_and_license_name[key] = lic_name_list[index_for_license_name_list]
129-
license_name_and_license_key[lic_name_list[index_for_license_name_list]] = key
130-
license_file_key = license_file_name_and_license_file_key[license_key_and_license_file_name[key]]
131-
license_file_key_and_license_key[license_file_key] = key
132-
index_for_license_name_list = index_for_license_name_list + 1
133-
134-
# Create a license expression with license name instead of key
135-
for segment in about.license_expression.value.split():
136-
if segment in license_key_and_license_name:
137-
lic_name_expression_list.append(license_key_and_license_name[segment])
77+
key = lic_name
78+
captured = False
79+
for lic in licenses_list:
80+
if key in lic.key:
81+
captured = True
82+
if not captured or not licenses_list:
83+
name = lic_name
84+
filename = list(about.license_file.value.keys())[index]
85+
if about.license_url.value:
86+
url = about.license_url.value[index]
13887
else:
139-
lic_name_expression_list.append(segment)
140-
141-
# Join the license name expression into a single string
142-
lic_name_expression = ' '.join(lic_name_expression_list)
143-
144-
# Add the license name expression string into the about object
145-
about.license_name_expression = lic_name_expression
146-
147-
# Get the current UTC time
148-
utcnow = datetime.datetime.utcnow()
149-
rendered = template.render(
150-
abouts=abouts, common_licenses=COMMON_LICENSES,
151-
license_file_key_and_context=sorted_license_file_key_and_context,
152-
license_file_key_and_license_key=license_file_key_and_license_key,
153-
license_file_name_and_license_file_key=license_file_name_and_license_file_key,
154-
license_key_and_license_file_name=license_key_and_license_file_name,
155-
license_key_and_license_name=license_key_and_license_name,
156-
license_name_and_license_key=license_name_and_license_key,
157-
utcnow=utcnow,
158-
tkversion=__version__,
159-
variables=variables
160-
)
161-
except Exception as e:
162-
lineno = getattr(e, 'lineno', '') or ''
163-
if lineno:
164-
lineno = ' at line: {}'.format(lineno)
165-
err = getattr(e, 'message', '') or ''
166-
# error = Error(
167-
# CRITICAL,
168-
# 'Template processing error {lineno}: {err}'.format(**locals()),
169-
# )
170-
error = Error(
171-
CRITICAL,
172-
'Template processing error:' + str(e),
173-
)
88+
url = ''
89+
text = list(about.license_file.value.values())[index]
90+
license_object = License(key, name, filename, url, text)
91+
licenses_list.append(license_object)
92+
index = index + 1
93+
else:
94+
for key in license_dict:
95+
name = license_dict[key][0]
96+
filename = license_dict[key][1]
97+
text = license_dict[key][2]
98+
url = license_dict[key][3]
99+
license_object = License(key, name, filename, url, text)
100+
licenses_list.append(license_object)
101+
102+
103+
# We need special treatment for scancode input.
104+
# Each about_object may have duplicated license key and same/different license score
105+
# We will only keep the unique license key with the highest license score.
106+
# The process will update the license_key, license_name and license_score.
107+
if scancode:
108+
meet_score_licenses_list = []
109+
for about in abouts:
110+
# We will use a dictionary to keep the unique license key
111+
# which the dictionary key is the license key and the dictionary value
112+
# is (lic_score, lic_name)
113+
if about.license_key.value:
114+
updated_dict = {}
115+
lic_key = about.license_key.value
116+
lic_name = about.license_name.value
117+
lic_score = about.license_score.value
118+
assert len(lic_key) == len(lic_name)
119+
assert len(lic_key) == len(lic_score)
120+
if lic_key:
121+
index = 0
122+
for key in lic_key:
123+
if key in updated_dict:
124+
previous_score, _name = updated_dict[key]
125+
current_score = lic_score[index]
126+
if current_score > previous_score:
127+
updated_dict[key] = (lic_score[index], lic_name[index])
128+
else:
129+
updated_dict[key] = (lic_score[index], lic_name[index])
130+
index = index + 1
131+
updated_lic_key = []
132+
updated_lic_name = []
133+
updated_lic_score = []
134+
for lic in updated_dict:
135+
score, name = updated_dict[lic]
136+
if score >= min_license_score:
137+
updated_lic_key.append(lic)
138+
updated_lic_score.append(score)
139+
updated_lic_name.append(name)
140+
if not lic in meet_score_licenses_list:
141+
meet_score_licenses_list.append(lic)
142+
about.license_key.value = updated_lic_key
143+
about.license_name.value = updated_lic_name
144+
about.license_score.value = updated_lic_score
145+
146+
for lic in licenses_list:
147+
if not lic.key in meet_score_licenses_list:
148+
licenses_list.remove(lic)
149+
150+
for about in abouts:
151+
# Create a license expression with license name
152+
if about.license_expression.value:
153+
for segment in about.license_expression.value.split():
154+
not_lic = True
155+
for lic in licenses_list:
156+
if segment == lic.key:
157+
lic_name_expression_list.append(lic.name)
158+
not_lic = False
159+
break
160+
if not_lic:
161+
lic_name_expression_list.append(segment)
162+
# Join the license name expression into a single string
163+
lic_name_expression = ' '.join(lic_name_expression_list)
164+
165+
# Add the license name expression string into the about object as a list
166+
about.license_name_expression = lic_name_expression
167+
168+
# Sort the license object by key
169+
licenses_list = sorted(licenses_list, key=lambda x: x.key)
170+
171+
rendered = template.render(
172+
abouts=abouts,
173+
common_licenses=COMMON_LICENSES,
174+
licenses_list=licenses_list,
175+
utcnow=utcnow,
176+
tkversion=__version__,
177+
variables=variables
178+
)
174179
return error, rendered
175180

176181

@@ -195,7 +200,7 @@ def check_template(template_string):
195200
return e.lineno, e.message
196201

197202

198-
def generate_from_file(abouts, template_loc=DEFAULT_TEMPLATE_FILE, variables=None):
203+
def generate_from_file(abouts, is_about_input, license_dict, scancode, min_license_score, template_loc=None, variables=None):
199204
"""
200205
Generate an attribution text from an `abouts` list of About objects, a
201206
`template_loc` template file location and a `variables` optional
@@ -204,14 +209,19 @@ def generate_from_file(abouts, template_loc=DEFAULT_TEMPLATE_FILE, variables=Non
204209
Return a tuple of (error, attribution text) where error is an Error object
205210
or None and attribution text is the generated text or None.
206211
"""
207-
208-
template_loc = add_unc(template_loc)
212+
if not template_loc:
213+
if scancode:
214+
template_loc = add_unc(DEFAULT_TEMPLATE_SCANCODE_FILE)
215+
else:
216+
template_loc = add_unc(DEFAULT_TEMPLATE_FILE)
217+
else:
218+
template_loc = add_unc(template_loc)
209219
with io.open(template_loc, encoding='utf-8') as tplf:
210220
tpls = tplf.read()
211-
return generate(abouts, template=tpls, variables=variables)
221+
return generate(abouts, is_about_input, license_dict, scancode, min_license_score, template=tpls, variables=variables)
212222

213223

214-
def generate_and_save(abouts, output_location, template_loc=None, variables=None):
224+
def generate_and_save(abouts, is_about_input, license_dict, output_location, scancode=False, min_license_score=0, template_loc=None, variables=None):
215225
"""
216226
Generate an attribution text from an `abouts` list of About objects, a
217227
`template_loc` template file location and a `variables` optional
@@ -220,7 +230,6 @@ def generate_and_save(abouts, output_location, template_loc=None, variables=None
220230
Return a list of Error objects if any.
221231
"""
222232
errors = []
223-
224233
# Parse license_expression and save to the license list
225234
for about in abouts:
226235
if not about.license_expression.value:
@@ -230,11 +239,14 @@ def generate_and_save(abouts, output_location, template_loc=None, variables=None
230239
msg = (u"The following character(s) cannot be in the license_expression: " +
231240
str(special_char_in_expression))
232241
errors.append(Error(ERROR, msg))
233-
234242
rendering_error, rendered = generate_from_file(
235243
abouts,
244+
is_about_input,
245+
license_dict,
246+
scancode=scancode,
247+
min_license_score=min_license_score,
236248
template_loc=template_loc,
237-
variables=variables
249+
variables=variables,
238250
)
239251

240252
if rendering_error:

src/attributecode/attrib_util.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
# ============================================================================
1616

1717
from jinja2 import Environment
18-
from jinja2.filters import environmentfilter
18+
from jinja2.filters import pass_environment
1919
from jinja2.filters import make_attrgetter
2020
from jinja2.filters import ignore_case
2121
from jinja2.filters import FilterArgumentError
@@ -38,7 +38,7 @@ def get_template(template_text):
3838
return env.from_string(template_text)
3939

4040

41-
@environmentfilter
41+
@pass_environment
4242
def multi_sort(environment, value, reverse=False, case_sensitive=False,
4343
attributes=None):
4444
"""
@@ -72,7 +72,7 @@ def key(v):
7272
return sorted(value, key=key, reverse=reverse)
7373

7474

75-
@environmentfilter
75+
@pass_environment
7676
def unique_together(environment, value, case_sensitive=False, attributes=None):
7777
"""
7878
Return a list of unique items from an iterable. Unicity is checked when

0 commit comments

Comments
 (0)