Skip to content

Commit aa2ec9d

Browse files
committed
#518 - SCTK input to work with attrib
* Update doc * attrib is now support with the latest SCTK * code cleanup/enhancement * add/remove tests * update sctk template * update code format Signed-off-by: Chin Yeung Li <[email protected]>
1 parent 08a541a commit aa2ec9d

24 files changed

+2022
-856
lines changed

docs/source/reference.rst

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -263,14 +263,14 @@ Options
263263

264264
.. code-block:: none
265265
266-
--from-inventory FILE Path to an inventory CSV/JSON file as the base list
267-
for files/directories that need to be copied which
268-
have the 'redistribute' flagged.
269-
--with-structures Copy sources with directory structure.
270-
--zip Zip the copied sources to the output location.
271-
-q, --quiet Do not print error or warning messages.
272-
--verbose Show all error and warning messages.
273-
-h, --help Show this message and exit.
266+
--from-inventory FILE Path to an inventory CSV/JSON/XLSX file as the base
267+
list for files/directories that need to be copied
268+
which have the 'redistribute' flagged.
269+
--with-structures Copy sources with directory structure.
270+
--zip Zip the copied sources to the output location.
271+
-q, --quiet Do not print error or warning messages.
272+
--verbose Show all error and warning messages.
273+
-h, --help Show this message and exit.
274274
275275
Purpose
276276
-------

src/attributecode/attrib.py

Lines changed: 80 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@
3939

4040
DEFAULT_LICENSE_SCORE = 100
4141

42+
4243
def generate(abouts, is_about_input, license_dict, scancode, min_license_score, template=None, vartext=None):
4344
"""
4445
Generate an attribution text from an `abouts` list of About objects, a
@@ -55,7 +56,8 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
5556
lineno, message = template_error
5657
error = Error(
5758
CRITICAL,
58-
'Template validation error at line: {lineno}: "{message}"'.format(**locals())
59+
'Template validation error at line: {lineno}: "{message}"'.format(
60+
**locals())
5961
)
6062
errors.append(error)
6163
return error, None
@@ -87,17 +89,19 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
8789
filename = list(about.license_file.value.keys())[index]
8890
text = list(about.license_file.value.values())[index]
8991
else:
90-
error = Error(CRITICAL, 'No license file found for ' + name)
92+
error = Error(
93+
CRITICAL, 'No license file found for ' + name)
9194
errors.append(error)
9295
break
93-
if about.license_url.value:
96+
if about.license_url.value:
9497
url = about.license_url.value[index]
9598
else:
9699
url = ''
97100
license_object = License(key, name, filename, url, text)
98101
licenses_list.append(license_object)
99102
index = index + 1
100103
else:
104+
# Create license object
101105
for key in license_dict:
102106
name = license_dict[key][0]
103107
filename = license_dict[key][1]
@@ -106,81 +110,103 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
106110
license_object = License(key, name, filename, url, text)
107111
licenses_list.append(license_object)
108112

109-
110113
# We need special treatment for scancode input.
111114
# Each about_object may have duplicated license key and same/different license score
112115
# We will only keep the unique license key with the highest license score.
113116
# The process will update the license_key, license_name and license_score.
114117
if scancode:
115118
meet_score_licenses_list = []
116119
for about in abouts:
117-
# See if the input has 'matched_text'
118-
matched_text_exist = False
119-
try:
120-
if about.matched_text:
121-
matched_text_exist = True
122-
except:
123-
pass
124120
# We will use a dictionary to keep the unique license key
125121
# which the dictionary key is the license key and the dictionary value
126122
# is (lic_score, lic_name) or (lic_score, lic_name, matched_text)
127123
if about.license_key.value:
128124
updated_dict = {}
129125
lic_key = about.license_key.value
130-
lic_name = about.license_name.value
126+
lic_name = []
127+
if about.license_name.value:
128+
lic_name = about.license_name.value
129+
else:
130+
lic_name = []
131+
for key_list in lic_key:
132+
lic_name_list = []
133+
for k in key_list:
134+
try:
135+
lic_name_list.append(license_dict[k][0])
136+
except:
137+
lic_name_list.append(k)
138+
lic_name.append(lic_name_list)
139+
about.license_name.value = lic_name
140+
141+
if not lic_name:
142+
lic_name = []
143+
for key in lic_key:
144+
lic_name.append(license_dict[key][0])
131145
lic_score = about.license_score.value
132-
if matched_text_exist:
133-
matched_text = about.matched_text.value
134-
assert len(lic_key) == len(matched_text)
135146
assert len(lic_key) == len(lic_name)
136147
assert len(lic_key) == len(lic_score)
137-
if lic_key:
138-
index = 0
139-
for key in lic_key:
148+
149+
lic_key_expression = about.license_key_expression.value
150+
if lic_key_expression:
151+
updated_lic_key_expression = []
152+
removed_index = []
153+
for index, key in enumerate(lic_key_expression):
140154
if key in updated_dict:
141-
if matched_text_exist:
142-
previous_score, _name, _detected_text = updated_dict[key]
143-
else:
144-
previous_score, _name = updated_dict[key]
155+
previous_score, _name = updated_dict[key]
145156
current_score = lic_score[index]
146157
if current_score > previous_score:
147-
if matched_text_exist:
148-
updated_dict[key] = (lic_score[index], lic_name[index], matched_text[index])
149-
else:
150-
updated_dict[key] = (lic_score[index], lic_name[index])
158+
updated_dict[key] = (
159+
lic_score[index], lic_name[index])
160+
# Track the duplicated index
161+
removed_index.append(index)
151162
else:
152-
if matched_text_exist:
153-
updated_dict[key] = (lic_score[index], lic_name[index], matched_text[index])
154-
else:
155-
updated_dict[key] = (lic_score[index], lic_name[index])
156-
index = index + 1
163+
updated_dict[key] = (
164+
lic_score[index], lic_name[index])
165+
updated_lic_key_expression.append(key)
166+
# Remove the duplication
167+
for index, key in enumerate(about.license_key.value):
168+
if index in removed_index:
169+
del about.license_key.value[index]
170+
del about.license_name.value[index]
171+
del about.license_score.value[index]
172+
173+
lic_key_expression = updated_lic_key_expression
157174
updated_lic_key = []
158175
updated_lic_name = []
159176
updated_lic_score = []
160-
if matched_text_exist:
161-
updated_matched_text = []
162-
for lic in updated_dict:
163-
if matched_text_exist:
164-
score, name, text = updated_dict[lic]
165-
else:
166-
score, name = updated_dict[lic]
177+
for index, lic in enumerate(updated_dict):
178+
_sp_char, lic_keys = parse_license_expression(lic)
179+
score, name = updated_dict[lic]
167180
if score >= min_license_score:
168-
updated_lic_key.append(lic)
169-
updated_lic_score.append(score)
170-
updated_lic_name.append(name)
171-
if matched_text_exist:
172-
updated_matched_text.append(text)
173-
if not lic in meet_score_licenses_list:
174-
meet_score_licenses_list.append(lic)
181+
for lic_key in lic_keys:
182+
if not lic_key in meet_score_licenses_list:
183+
meet_score_licenses_list.append(lic_key)
184+
185+
updated_lic_key.append(lic_keys)
186+
updated_lic_name.append(name)
187+
updated_lic_score.append(score)
188+
189+
# Remove items that don't meet to score
190+
for index, score in enumerate(updated_lic_score):
191+
if score < min_license_score:
192+
del updated_lic_key[index]
193+
del updated_lic_name[index]
194+
del updated_lic_score[index]
195+
del lic_key_expression[index]
196+
175197
about.license_key.value = updated_lic_key
176198
about.license_name.value = updated_lic_name
177199
about.license_score.value = updated_lic_score
178-
if matched_text_exist:
179-
about.matched_text.value = updated_matched_text
200+
about.license_key_expression.value = lic_key_expression
180201

202+
# Remove the license object
203+
remove_list = []
181204
for lic in licenses_list:
182205
if not lic.key in meet_score_licenses_list:
183-
licenses_list.remove(lic)
206+
remove_list.append(lic)
207+
208+
for lic in remove_list:
209+
licenses_list.remove(lic)
184210

185211
for about in abouts:
186212
# Create a license expression with license name
@@ -200,14 +226,16 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
200226
lic_name_expression = ' '.join(lic_name_expression_list)
201227

202228
# Add the license name expression string into the about object as a custom field
203-
custom_field = StringField(name='license_name_expression', value=lic_name_expression, present=True)
229+
custom_field = StringField(
230+
name='license_name_expression', value=lic_name_expression, present=True)
204231
setattr(about, 'license_name_expression', custom_field)
205232

206233
# Sort the about objects by name
207234
abouts = sorted(abouts, key=lambda x: x.name.value.lower())
208235

209236
# Sort the license object by key
210237
licenses_list = sorted(licenses_list, key=lambda x: x.key)
238+
211239
rendered = template.render(
212240
abouts=abouts,
213241
common_licenses=COMMON_LICENSES,
@@ -219,6 +247,7 @@ def generate(abouts, is_about_input, license_dict, scancode, min_license_score,
219247

220248
return errors, rendered
221249

250+
222251
def get_license_file_key(license_text_name):
223252
if license_text_name.endswith('.LICENSE'):
224253
# See https://github.com/nexB/aboutcode-toolkit/issues/439
@@ -274,7 +303,8 @@ def generate_and_save(abouts, is_about_input, license_dict, output_location, sca
274303
for about in abouts:
275304
if not about.license_expression.value:
276305
continue
277-
special_char_in_expression, lic_list = parse_license_expression(about.license_expression.value)
306+
special_char_in_expression, lic_list = parse_license_expression(
307+
about.license_expression.value)
278308
if special_char_in_expression:
279309
msg = (u"The following character(s) cannot be in the license_expression: " +
280310
str(special_char_in_expression))

0 commit comments

Comments
 (0)