Skip to content

Commit 9b6186a

Browse files
Merge pull request #2685 from nexB/2566-report-license-rules
Add script to report rules
2 parents 6c6b25a + 716d959 commit 9b6186a

File tree

1 file changed

+195
-0
lines changed

1 file changed

+195
-0
lines changed
Lines changed: 195 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,195 @@
1+
# -*- coding: utf-8 -*-
2+
#
3+
# Copyright (c) nexB Inc. and others. All rights reserved.
4+
# ScanCode is a trademark of nexB Inc.
5+
# SPDX-License-Identifier: Apache-2.0
6+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
7+
# See https://github.com/nexB/scancode-toolkit for support or download.
8+
# See https://aboutcode.org for more information about nexB OSS projects.
9+
#
10+
11+
import io
12+
13+
import click
14+
import csv
15+
16+
from commoncode.cliutils import PluggableCommandLineOption
17+
from licensedcode.models import load_licenses
18+
from licensedcode.models import load_rules
19+
20+
21+
LICENSES_FIELDNAMES = [
22+
'key', 'short_name', 'name', 'category', 'owner', 'text', 'words_count', 'notes',
23+
'minimum_coverage', 'homepage_url', 'is_exception', 'language', 'is_unknown',
24+
'spdx_license_key', 'reference_url', 'text_urls', 'other_urls', 'standard_notice',
25+
'license_filename', 'faq_url', 'ignorable_authors',
26+
'ignorable_copyrights', 'ignorable_holders', 'ignorable_urls', 'ignorable_emails',
27+
'osi_license_key', 'osi_url', 'other_spdx_license_keys',
28+
]
29+
30+
31+
RULES_FIELDNAMES = [
32+
'identifier', 'license_expression', 'relevance', 'text', 'words_count', 'category',
33+
'is_false_positive', 'is_license_text', 'is_license_notice', 'is_license_tag',
34+
'is_license_reference', 'is_license_intro', 'has_unknown', 'only_known_words',
35+
'notes', 'referenced_filenames', 'minimum_coverage', 'ignorable_copyrights',
36+
'ignorable_holders', 'ignorable_authors', 'ignorable_urls', 'ignorable_emails',
37+
]
38+
39+
40+
SCANCODE_LICENSEDB_URL = 'https://scancode-licensedb.aboutcode.org/{}'
41+
42+
43+
def write_data_to_csv(data, output_csv, fieldnames):
44+
45+
with open(output_csv, 'w') as f:
46+
w = csv.DictWriter(f, fieldnames=fieldnames)
47+
w.writeheader()
48+
for entry in data:
49+
w.writerow(entry)
50+
51+
52+
def filter_by_attribute(data, attribute, required_key):
53+
"""
54+
Filters by attribute, if value is required_key.
55+
Example `attribute`: `category`. Example `required_key`: `Permissive`.
56+
"""
57+
return [entry for entry in data if entry.get(attribute, 'None') == required_key]
58+
59+
def flatten_output(data):
60+
61+
assert isinstance(data, list)
62+
63+
output = []
64+
for entry in data:
65+
assert isinstance(entry, dict)
66+
67+
output_entry = {}
68+
for key, value in entry.items():
69+
if value is None:
70+
continue
71+
72+
if isinstance(value, list):
73+
value = ' '.join(value)
74+
elif not isinstance(value, str):
75+
value = repr(value)
76+
77+
output_entry[key] = value
78+
79+
output.append(output_entry)
80+
81+
return output
82+
83+
@click.command()
84+
@click.option('-l', '--licenses',
85+
type=click.Path(dir_okay=False, writable=True, readable=False),
86+
default=None,
87+
metavar='FILE',
88+
help='Write all Licenses data to the csv FILE.',
89+
cls=PluggableCommandLineOption
90+
)
91+
@click.option('-r', '--rules',
92+
type=click.Path(dir_okay=False, writable=True, readable=False),
93+
default=None,
94+
metavar='FILE',
95+
help='Write all Rules data to the csv FILE.',
96+
cls=PluggableCommandLineOption,
97+
)
98+
@click.option('-c', '--category',
99+
type=str,
100+
default=None,
101+
metavar='STRING',
102+
help='An optional filter to only output licenses/rules of this category. '
103+
'Example STRING: `permissive`.',
104+
cls=PluggableCommandLineOption,
105+
)
106+
@click.option('-k', '--license-key',
107+
type=str,
108+
default=None,
109+
metavar='STRING',
110+
help='An optional filter to only output licenses/rules which has this license key. '
111+
'Example STRING: `mit`.',
112+
cls=PluggableCommandLineOption,
113+
)
114+
@click.option('-t', '--with-text',
115+
is_flag=True,
116+
default=False,
117+
help='Also include the license/rules texts (First 200 characters). '
118+
'Note that this increases the file size significantly.',
119+
cls=PluggableCommandLineOption,
120+
)
121+
@click.help_option('-h', '--help')
122+
def cli(licenses, rules, category, license_key, with_text):
123+
"""
124+
Write Licenses/Rules from scancode into a CSV file with all details.
125+
Output can be optionally filtered by category/license-key.
126+
"""
127+
licenses_output = []
128+
rules_output = []
129+
130+
licenses_data = load_licenses()
131+
132+
if licenses:
133+
for license in licenses_data.values():
134+
license_data = license.to_dict()
135+
if with_text:
136+
license_data['text'] = license.text[:200]
137+
license_data['is_unknown'] = license.is_unknown
138+
license_data['words_count'] = len(license.text)
139+
license_data['reference_url'] = SCANCODE_LICENSEDB_URL.format(license.key)
140+
licenses_output.append(license_data)
141+
142+
if category:
143+
licenses_output = filter_by_attribute(
144+
data=licenses_output,
145+
attribute='category',
146+
required_key=category
147+
)
148+
149+
if license_key:
150+
licenses_output = filter_by_attribute(
151+
data=licenses_output,
152+
attribute='key',
153+
required_key=license_key,
154+
)
155+
156+
licenses_output = flatten_output(data=licenses_output)
157+
write_data_to_csv(data=licenses_output, output_csv=licenses, fieldnames=LICENSES_FIELDNAMES)
158+
159+
160+
if rules:
161+
rules_data = list(load_rules())
162+
for rule in rules_data:
163+
rule_data = rule.to_dict()
164+
rule_data['identifier'] = rule.identifier
165+
rule_data['referenced_filenames'] = rule.referenced_filenames
166+
if with_text:
167+
rule_data['text'] = rule.text()[:200]
168+
rule_data['has_unknown'] = rule.has_unknown
169+
rule_data['words_count'] = len(rule.text())
170+
try:
171+
rule_data['category'] = licenses_data[rule_data['license_expression']].category
172+
except KeyError:
173+
pass
174+
rules_output.append(rule_data)
175+
176+
if category:
177+
rules_output = filter_by_attribute(
178+
data=rules_output,
179+
attribute='category',
180+
required_key=category,
181+
)
182+
183+
if license_key:
184+
rules_output = filter_by_attribute(
185+
data=rules_output,
186+
attribute='license_expression',
187+
required_key=license_key,
188+
)
189+
190+
rules_output = flatten_output(rules_output)
191+
write_data_to_csv(data=rules_output, output_csv=rules, fieldnames=RULES_FIELDNAMES)
192+
193+
194+
if __name__ == '__main__':
195+
cli()

0 commit comments

Comments
 (0)