Skip to content

Commit b698c6b

Browse files
committed
#479 - Code related to use scancode scan as the input
* Create a scancode default template * Use different default template for usual and scancode input I was trying to use the template to handle min_license_score situation, but it's too hard and too complex. Therefore, I updated the code to handle that. Tests are expected to fail as I haven't updated the tests yet. Signed-off-by: Chin Yeung Li <[email protected]>
1 parent ecbc398 commit b698c6b

File tree

5 files changed

+122
-110
lines changed

5 files changed

+122
-110
lines changed

src/attributecode/attrib.py

Lines changed: 63 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -34,9 +34,12 @@
3434
DEFAULT_TEMPLATE_FILE = os.path.join(
3535
os.path.dirname(os.path.realpath(__file__)), '../../templates', 'default_html.template')
3636

37+
DEFAULT_TEMPLATE_SCANCODE_FILE = os.path.join(
38+
os.path.dirname(os.path.realpath(__file__)), '../../templates', 'scancode_html.template')
39+
3740
DEFAULT_LICENSE_SCORE = 100
3841

39-
def generate(abouts, is_about_input, license_dict, min_license_score, template=None, variables=None):
42+
def generate(abouts, is_about_input, license_dict, scancode, min_license_score, template=None, variables=None):
4043
"""
4144
Generate an attribution text from an `abouts` list of About objects, a
4245
`template` template text and a `variables` optional dict of extra
@@ -96,6 +99,54 @@ def generate(abouts, is_about_input, license_dict, min_license_score, template=N
9699
license_object = License(key, name, filename, url, text)
97100
licenses_list.append(license_object)
98101

102+
103+
# We need special treatment for scancode input.
104+
# Each about_object may have duplicated license key and same/different license score
105+
# We will only keep the unique license key with the highest license score.
106+
# The process will update the license_key, license_name and license_score.
107+
if scancode:
108+
meet_score_licenses_list = []
109+
for about in abouts:
110+
# We will use a dictionary to keep the unique license key
111+
# which the dictionary key is the license key and the dictionary value
112+
# is (lic_score, lic_name)
113+
if about.license_key.value:
114+
updated_dict = {}
115+
lic_key = about.license_key.value
116+
lic_name = about.license_name.value
117+
lic_score = about.license_score.value
118+
assert len(lic_key) == len(lic_name)
119+
assert len(lic_key) == len(lic_score)
120+
if lic_key:
121+
index = 0
122+
for key in lic_key:
123+
if key in updated_dict:
124+
previous_score, _name = updated_dict[key]
125+
current_score = lic_score[index]
126+
if current_score > previous_score:
127+
updated_dict[key] = (lic_score[index], lic_name[index])
128+
else:
129+
updated_dict[key] = (lic_score[index], lic_name[index])
130+
index = index + 1
131+
updated_lic_key = []
132+
updated_lic_name = []
133+
updated_lic_score = []
134+
for lic in updated_dict:
135+
score, name = updated_dict[lic]
136+
if score >= min_license_score:
137+
updated_lic_key.append(lic)
138+
updated_lic_score.append(score)
139+
updated_lic_name.append(name)
140+
if not lic in meet_score_licenses_list:
141+
meet_score_licenses_list.append(lic)
142+
about.license_key.value = updated_lic_key
143+
about.license_name.value = updated_lic_name
144+
about.license_score.value = updated_lic_score
145+
146+
for lic in licenses_list:
147+
if not lic.key in meet_score_licenses_list:
148+
licenses_list.remove(lic)
149+
99150
for about in abouts:
100151
# Create a license expression with license name
101152
if about.license_expression.value:
@@ -121,7 +172,6 @@ def generate(abouts, is_about_input, license_dict, min_license_score, template=N
121172
abouts=abouts,
122173
common_licenses=COMMON_LICENSES,
123174
licenses_list=licenses_list,
124-
min_license_score=min_license_score,
125175
utcnow=utcnow,
126176
tkversion=__version__,
127177
variables=variables
@@ -150,7 +200,7 @@ def check_template(template_string):
150200
return e.lineno, e.message
151201

152202

153-
def generate_from_file(abouts, is_about_input, license_dict, min_license_score, template_loc=DEFAULT_TEMPLATE_FILE, variables=None):
203+
def generate_from_file(abouts, is_about_input, license_dict, scancode, min_license_score, template_loc=None, variables=None):
154204
"""
155205
Generate an attribution text from an `abouts` list of About objects, a
156206
`template_loc` template file location and a `variables` optional
@@ -159,13 +209,19 @@ def generate_from_file(abouts, is_about_input, license_dict, min_license_score,
159209
Return a tuple of (error, attribution text) where error is an Error object
160210
or None and attribution text is the generated text or None.
161211
"""
162-
template_loc = add_unc(template_loc)
212+
if not template_loc:
213+
if scancode:
214+
template_loc = add_unc(DEFAULT_TEMPLATE_SCANCODE_FILE)
215+
else:
216+
template_loc = add_unc(DEFAULT_TEMPLATE_FILE)
217+
else:
218+
template_loc = add_unc(template_loc)
163219
with io.open(template_loc, encoding='utf-8') as tplf:
164220
tpls = tplf.read()
165-
return generate(abouts, is_about_input, license_dict, min_license_score, template=tpls, variables=variables)
221+
return generate(abouts, is_about_input, license_dict, scancode, min_license_score, template=tpls, variables=variables)
166222

167223

168-
def generate_and_save(abouts, is_about_input, license_dict, output_location, min_license_score=0, template_loc=None, variables=None):
224+
def generate_and_save(abouts, is_about_input, license_dict, output_location, scancode=False, min_license_score=0, template_loc=None, variables=None):
169225
"""
170226
Generate an attribution text from an `abouts` list of About objects, a
171227
`template_loc` template file location and a `variables` optional
@@ -187,6 +243,7 @@ def generate_and_save(abouts, is_about_input, license_dict, output_location, min
187243
abouts,
188244
is_about_input,
189245
license_dict,
246+
scancode=scancode,
190247
min_license_score=min_license_score,
191248
template_loc=template_loc,
192249
variables=variables,

src/attributecode/cmd.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -279,7 +279,7 @@ def gen(location, output, android, fetch_license, fetch_license_djc, reference,
279279

280280
def validate_template(ctx, param, value):
281281
if not value:
282-
return DEFAULT_TEMPLATE_FILE
282+
return None
283283

284284
with io.open(value, encoding='utf-8') as templatef:
285285
template_error = check_template(templatef.read())
@@ -383,7 +383,7 @@ def attrib(input, output, api_url, api_key, scancode, min_license_score, referen
383383
msg = 'The input file from scancode toolkit needs to be in JSON format.'
384384
click.echo(msg)
385385
sys.exit(1)
386-
if not min_license_score:
386+
if not min_license_score and not min_license_score == 0:
387387
min_license_score=DEFAULT_LICENSE_SCORE
388388

389389
if min_license_score:
@@ -452,6 +452,7 @@ def attrib(input, output, api_url, api_key, scancode, min_license_score, referen
452452
is_about_input=is_about_input,
453453
license_dict=dict(sorted(license_dict.items())),
454454
output_location=output,
455+
scancode=scancode,
455456
min_license_score=min_license_score,
456457
template_loc=template,
457458
variables=vartext,

src/attributecode/gen.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -211,6 +211,7 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
211211
ld_errors = about.load_dict(
212212
fields,
213213
base_dir,
214+
scancode=scancode,
214215
from_attrib=from_attrib,
215216
running_inventory=False,
216217
reference_dir=reference_dir,
@@ -237,6 +238,7 @@ def load_inventory(location, from_attrib=False, base_dir=None, scancode=False, r
237238
about.license_score.value = score_list
238239
except:
239240
pass
241+
240242
return unique(errors), abouts
241243

242244

src/attributecode/model.py

Lines changed: 20 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -944,7 +944,7 @@ def hydrate(self, fields):
944944
return errors
945945

946946
def process(self, fields, about_file_path, running_inventory=False,
947-
base_dir=None, from_attrib=False, reference_dir=None):
947+
base_dir=None, scancode=False, from_attrib=False, reference_dir=None):
948948
"""
949949
Validate and set as attributes on this About object a sequence of
950950
`fields` name/value tuples. Return a list of errors.
@@ -961,13 +961,18 @@ def process(self, fields, about_file_path, running_inventory=False,
961961
errors.extend(copy_err)
962962

963963
# TODO: why? we validate all fields, not only these hydrated
964-
validation_errors = validate_fields(
965-
self.all_fields(),
966-
about_file_path,
967-
running_inventory,
968-
self.base_dir,
969-
self.reference_dir)
970-
errors.extend(validation_errors)
964+
# The validate functions does not allow duplicated entry for a list meaning
965+
# it will cause problem when using scancode license detection as an input as
966+
# it usually returns duplicated license_key and many license have duplicated
967+
# score such as 100. We need to handle this scenario using different method.
968+
if not scancode:
969+
validation_errors = validate_fields(
970+
self.all_fields(),
971+
about_file_path,
972+
running_inventory,
973+
self.base_dir,
974+
self.reference_dir)
975+
errors.extend(validation_errors)
971976
return errors
972977

973978
def load(self, location):
@@ -1015,7 +1020,7 @@ def load(self, location):
10151020

10161021
# FIXME: should be a from_dict class factory instead
10171022
# FIXME: running_inventory: remove this : this should be done in the commands, not here
1018-
def load_dict(self, fields_dict, base_dir, from_attrib=False, running_inventory=False, reference_dir=None,):
1023+
def load_dict(self, fields_dict, base_dir, scancode=False, from_attrib=False, running_inventory=False, reference_dir=None,):
10191024
"""
10201025
Load this About object file from a `fields_dict` name/value dict.
10211026
Return a list of errors.
@@ -1046,14 +1051,17 @@ def load_dict(self, fields_dict, base_dir, from_attrib=False, running_inventory=
10461051
# 'Field licenses is a custom field.'
10471052
licenses_field = (key, value)
10481053
fields.remove(licenses_field)
1054+
10491055
errors = self.process(
10501056
fields=fields,
10511057
about_file_path=self.about_file_path,
10521058
running_inventory=running_inventory,
10531059
base_dir=base_dir,
1060+
scancode=scancode,
10541061
from_attrib=from_attrib,
10551062
reference_dir=reference_dir,
10561063
)
1064+
10571065
self.errors = errors
10581066
return errors
10591067

@@ -1570,20 +1578,10 @@ def pre_process_and_fetch_license_dict(abouts, api_url=None, api_key=None, scanc
15701578
if scancode:
15711579
lic_exp = ''
15721580
lic_list = []
1573-
# Since the model treats license_expressions (from scancode scan) as a custom field
1574-
# in string format, we need to capture this string to convert to a list
1575-
# and then use the `AND` condition if multiple licenses exist.
1576-
# See https://github.com/nexB/aboutcode-toolkit/issues/479#issuecomment-946328428
1581+
# The license_expressions return from scancode is a list of license keys.
1582+
# Therefore, we will combine it with the 'AND' condition
15771583
if about.license_expressions.value:
1578-
# Stripping '[', ']', quote and spaces
1579-
converted_lic_exp = about.license_expressions.value.strip("[").strip("]").replace('\'','').replace(' ','')
1580-
# Convert the updated lic_exp string to list
1581-
converted_lic_list = converted_lic_exp.split(',')
1582-
for lic in converted_lic_list:
1583-
# Only keep unique license keys
1584-
if not lic in lic_list:
1585-
lic_list.append(lic)
1586-
lic_exp = " AND ".join(lic_list)
1584+
lic_exp = " AND ".join(about.license_expressions.value)
15871585
about.license_expression.value = lic_exp
15881586
about.license_expression.present = True
15891587

templates/scancode_html.template

Lines changed: 34 additions & 80 deletions
Original file line numberDiff line numberDiff line change
@@ -18,94 +18,50 @@
1818
</div>
1919

2020
<div class="oss-table-of-contents">
21-
{% set index = namespace(value=0) %}
21+
{% set index = namespace(value=0) %}
2222
{% for about_object in abouts %}
23-
{% set captured = {} %}
24-
{% if about_object.license_expression.value %}
25-
{% for lic_score in about_object.license_score.value %}
26-
{% if lic_score | float >= min_license_score %}
27-
{% if not captured[about_object.name.value] %}
28-
<p><a href="#component_{{ index.value }}">{{ about_object.name.value }}{% if about_object.version.value %} {{ about_object.version.value }}{% endif %}</a></p>
29-
{% set _ = captured.update({ about_object.name.value: true }) %}
30-
{% set index.value = index.value + 1 %}
31-
{% endif %}
32-
{% endif %}
33-
{% endfor %}
34-
{% endif %}
23+
{% set captured = {} %}
24+
{% if about_object.license_key.value %}
25+
{% if not captured[about_object.name.value] %}
26+
<p><a href="#component_{{ index.value }}">{{ about_object.name.value }}{% if about_object.version.value %} {{ about_object.version.value }}{% endif %}</a></p>
27+
{% set _ = captured.update({ about_object.name.value: true }) %}
28+
{% set index.value = index.value + 1 %}
29+
{% endif %}
30+
{% endif %}
3531
{% endfor %}
3632
</div>
3733

3834
<hr/>
3935

40-
{% set common_licenses_meet_score = {} %}
41-
{% set index = namespace(value=0) %}
36+
{% set index = namespace(value=0) %}
4237
{% for about_object in abouts %}
43-
{% set captured = {} %}
44-
{% if about_object.license_expression.value %}
45-
{% set count = namespace(value=0) %}
46-
{{ about_object.license_key.value }}
47-
{{ about_object.license_score.value }}
48-
{% for lic_score in about_object.license_score.value %}
49-
{% if lic_score | float >= min_license_score %}
50-
{% if not captured[about_object.name.value] %}
51-
<div class="oss-component" id="component_{{ index.value }}">
52-
<h3 class="component-name">{{ about_object.name.value }} {% if about_object.version.value %}{{ about_object.version.value }}{% endif %} </h3>
53-
{% set _ = captured.update({ about_object.name.value: true }) %}
54-
{% set index.value = index.value + 1 %}
55-
{% endif %}
56-
<p>This component is licensed under {{ about_object.license_name.value[count.value] }}</p>
57-
{% endif %}
58-
{% set count.value = count.value + 1 %}
59-
{% endfor %}
60-
{% if about_object.copyright.value %}
61-
<pre>{{about_object.copyright.value}}</pre>
62-
{% endif %}
63-
{% if about_object.notice_file.value %}
64-
{% for notice in about_object.notice_file.value %}
65-
<pre class="component-notice">{{ about_object.notice_file.value[notice] }}</pre>
66-
{% endfor %}
38+
{% set captured = {} %}
39+
{% if about_object.license_key.value %}
40+
{% if not captured[about_object.name.value] %}
41+
<div class="oss-component" id="component_{{ index.value }}">
42+
<h3 class="component-name">{{ about_object.name.value }} {% if about_object.version.value %}{{ about_object.version.value }}{% endif %} </h3>
43+
{% set _ = captured.update({ about_object.name.value: true }) %}
44+
{% set index.value = index.value + 1 %}
6745
{% endif %}
68-
{% if about_object.license_key.value %}
69-
{% if about_object.license_file.value %}
70-
{% for lic_file_name in about_object.license_file.value %}
71-
{% for license in licenses_list %}
72-
{% if license.filename == lic_file_name %}
73-
{% if not license.key in common_licenses %}
74-
<pre> {{ license.text | e}} </pre>
75-
{% endif %}
76-
{% endif %}
77-
{% endfor %}
78-
{% endfor %}
46+
{% if about_object.copyright.value %}
47+
<pre>{{about_object.copyright.value}}</pre>
48+
{% endif %}
49+
50+
51+
{% for lic_key in about_object.license_key.value %}
52+
<p>This component is licensed under {{ lic_key }}</p>
53+
{% if lic_key in common_licenses %}
54+
<p>Full text of <a class="{{ lic_key }}" href="#component-license-{{ lic_key }}"> {{ lic_key }}</a> is available at the end of this document.</p>
7955
{% else %}
80-
{% set count = namespace(value=0) %}
81-
{% for lic_score in about_object.license_score.value %}
82-
{% if lic_score | float >= min_license_score %}
83-
{% if about_object.license_key.value[count.value] in common_licenses %}
84-
{% if not about_object.license_key.value[count.value] in common_licenses_meet_score %}
85-
{% set _ = common_licenses_meet_score.update({ about_object.license_key.value[count.value]: true }) %}
86-
<p>Full text of <a class="{{ about_object.license_key.value[count.value] }}" href="#component-license-{{ about_object.license_key.value[count.value] }}"> {{ about_object.license_key.value[count.value] }}</a> is available at the end of this document.</p>
87-
{% endif %}
88-
{% else %}
89-
{% for license in licenses_list %}
90-
{% if about_object.license_key.value[count.value] == license.key %}
91-
<h3 id="component-license-{{ license.key }}">{{ license.key }}</h3>
92-
<pre> {{ license.text | e }} </pre>
93-
{% endif %}
94-
{% endfor %}
95-
{% endif %}
96-
{% endif %}
97-
{% endfor %}
98-
{% endif %}
99-
{% else %}
100-
{% if about_object.license_file.value %}
101-
{% for lic_file_name in about_object.license_file.value %}
102-
{% if about_object.license_file.value[lic_file_name] %}
103-
<pre> {{ about_object.license_file.value[lic_file_name] | e}} </pre>
56+
{% for license in licenses_list %}
57+
{% if lic_key == license.key %}
58+
<h3 id="component-license-{{ license.key }}">{{ license.key }}</h3>
59+
<pre> {{ license.text | e }} </pre>
10460
{% endif %}
10561
{% endfor %}
10662
{% endif %}
107-
{% endif %}
108-
{% endif %}
63+
{% endfor %}
64+
{% endif %}
10965
</div>
11066
{% endfor %}
11167

@@ -114,10 +70,8 @@
11470
<h3>Common Licenses Used in This Product</h3>
11571
{% for license in licenses_list %}
11672
{% if license.key in common_licenses %}
117-
{% if license.key in common_licenses_meet_score %}
118-
<h3 id="component-license-{{ license.key }}">{{ license.key }}</h3>
119-
<pre> {{ license.text | e }} </pre>
120-
{% endif %}
73+
<h3 id="component-license-{{ license.key }}">{{ license.key }}</h3>
74+
<pre> {{ license.text | e }} </pre>
12175
{% endif %}
12276
{% endfor %}
12377

0 commit comments

Comments
 (0)