Skip to content

Commit d8c95e9

Browse files
Merge pull request #53 from AyanSinhaMahapatra/add-match-class
Add LicenceMatch class
2 parents b1dee2e + cfb0a27 commit d8c95e9

File tree

62 files changed

+2195
-1786
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

62 files changed

+2195
-1786
lines changed

src/results_analyze/analyzer.py

Lines changed: 85 additions & 89 deletions
Large diffs are not rendered by default.

src/results_analyze/analyzer_plugin.py

Lines changed: 94 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
import traceback
1111

1212
import attr
13+
from license_expression import Licensing
1314

1415
from commoncode.cliutils import PluggableCommandLineOption
1516
from commoncode.cliutils import POST_SCAN_GROUP
@@ -77,8 +78,8 @@ def process_codebase(self, codebase, **kwargs):
7778
break
7879

7980
# Where the resource does not have any detected license
80-
license_matches = getattr(resource, "licenses", [])
81-
if not license_matches:
81+
license_matches_serialized = getattr(resource, "licenses", [])
82+
if not license_matches_serialized:
8283
continue
8384

8485
# Case where any attribute essential for analysis is missing
@@ -87,6 +88,17 @@ def process_codebase(self, codebase, **kwargs):
8788
break
8889

8990
count_has_license += 1
91+
92+
try:
93+
license_matches = LicenseMatch.from_files_licenses(
94+
license_matches_serialized
95+
)
96+
except KeyError as e:
97+
trace = traceback.format_exc()
98+
msg = f"Cannot convert scancode data to LicenseMatch class: {e}\n{trace}"
99+
codebase.errors.append(msg)
100+
raise ScancodeDataChangedError(msg)
101+
90102
try:
91103
ars = list(analyzer.LicenseDetectionIssue.from_license_matches(
92104
license_matches=license_matches,
@@ -126,6 +138,86 @@ def process_codebase(self, codebase, **kwargs):
126138
codebase.save_resource(resource)
127139

128140

141+
class ScancodeDataChangedError(Exception):
142+
"""
143+
Raised when the scan results data format does not match what we expect.
144+
"""
145+
pass
146+
147+
148+
@attr.s
149+
class LicenseMatch:
150+
"""
151+
Represent a license match to a rule.
152+
"""
153+
license_expression = attr.ib()
154+
score = attr.ib()
155+
start_line = attr.ib()
156+
end_line = attr.ib()
157+
rule_identifier = attr.ib()
158+
is_license_text = attr.ib()
159+
is_license_notice = attr.ib()
160+
is_license_reference = attr.ib()
161+
is_license_tag = attr.ib()
162+
is_license_intro = attr.ib()
163+
matcher = attr.ib()
164+
matched_length = attr.ib()
165+
rule_length = attr.ib()
166+
match_coverage = attr.ib()
167+
rule_relevance = attr.ib()
168+
matched_text = attr.ib()
169+
170+
@classmethod
171+
def from_files_licenses(cls, license_matches):
172+
"""
173+
Return LicenseMatch built from the scancode files.licenses data structure.
174+
"""
175+
matches = []
176+
licensing = Licensing()
177+
# Whenever we have multiple matches with the same expression, we want to only
178+
# keep the first and skip the secondary matches
179+
skip_secondary_matches = 0
180+
181+
for license_match in license_matches:
182+
if skip_secondary_matches:
183+
skip_secondary_matches -= 1
184+
continue
185+
186+
matched_rule = license_match["matched_rule"]
187+
# key = license_match["key"]
188+
license_expression = matched_rule["license_expression"]
189+
expression_keys = licensing.license_keys(license_expression)
190+
191+
if len(expression_keys) != 1:
192+
skip_secondary_matches = len(expression_keys) - 1
193+
194+
matches.append(
195+
cls(
196+
license_expression = license_expression,
197+
score = license_match["score"],
198+
start_line = license_match["start_line"],
199+
end_line = license_match["end_line"],
200+
rule_identifier = matched_rule["identifier"],
201+
is_license_text = matched_rule["is_license_text"],
202+
is_license_notice = matched_rule["is_license_notice"],
203+
is_license_reference = matched_rule["is_license_reference"],
204+
is_license_tag = matched_rule["is_license_tag"],
205+
is_license_intro = matched_rule["is_license_intro"],
206+
matcher = matched_rule["matcher"],
207+
matched_length = matched_rule["matched_length"],
208+
rule_length = matched_rule["rule_length"],
209+
match_coverage = matched_rule["match_coverage"],
210+
rule_relevance = matched_rule["rule_relevance"],
211+
matched_text = license_match["matched_text"],
212+
)
213+
)
214+
215+
return matches
216+
217+
def to_dict(self):
218+
return attr.asdict(self)
219+
220+
129221
def is_analyzable(resource):
130222
"""
131223
Return True if resource has all the data required for the analysis.
Lines changed: 212 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,212 @@
1+
[
2+
{
3+
"key": "unknown",
4+
"score": 27.0,
5+
"name": "Unknown license detected but not recognized",
6+
"short_name": "unknown",
7+
"category": "Unstated License",
8+
"is_exception": false,
9+
"owner": "Unspecified",
10+
"homepage_url": null,
11+
"text_url": "",
12+
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:unknown",
13+
"spdx_license_key": null,
14+
"spdx_url": "",
15+
"start_line": 14,
16+
"end_line": 14,
17+
"matched_rule": {
18+
"identifier": "lead-in_unknown_67.RULE",
19+
"license_expression": "unknown",
20+
"licenses": [
21+
"unknown"
22+
],
23+
"is_license_text": false,
24+
"is_license_notice": false,
25+
"is_license_reference": true,
26+
"is_license_tag": false,
27+
"is_license_intro": false,
28+
"matcher": "2-aho",
29+
"rule_length": 5,
30+
"matched_length": 5,
31+
"match_coverage": 100.0,
32+
"rule_relevance": 27
33+
},
34+
"matched_text": " * licensed under the terms of the LGPL. The redistributable library"
35+
},
36+
{
37+
"key": "lgpl-2.0-plus",
38+
"score": 5.0,
39+
"name": "GNU Library General Public License 2.0 or later",
40+
"short_name": "LGPL 2.0 or later",
41+
"category": "Copyleft Limited",
42+
"is_exception": false,
43+
"owner": "Free Software Foundation (FSF)",
44+
"homepage_url": "http://www.gnu.org/licenses/old-licenses/lgpl-2.0.html",
45+
"text_url": "http://www.gnu.org/licenses/old-licenses/lgpl-2.0-standalone.html",
46+
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:lgpl-2.0-plus",
47+
"spdx_license_key": "LGPL-2.0-or-later",
48+
"spdx_url": "https://spdx.org/licenses/LGPL-2.0-or-later",
49+
"start_line": 14,
50+
"end_line": 14,
51+
"matched_rule": {
52+
"identifier": "lgpl_bare_single_word.RULE",
53+
"license_expression": "lgpl-2.0-plus",
54+
"licenses": [
55+
"lgpl-2.0-plus"
56+
],
57+
"is_license_text": false,
58+
"is_license_notice": false,
59+
"is_license_reference": true,
60+
"is_license_tag": false,
61+
"is_license_intro": false,
62+
"matcher": "2-aho",
63+
"rule_length": 1,
64+
"matched_length": 1,
65+
"match_coverage": 100.0,
66+
"rule_relevance": 5
67+
},
68+
"matched_text": " * licensed under the terms of the LGPL. The redistributable library"
69+
},
70+
{
71+
"key": "bsd-new",
72+
"score": 88.93,
73+
"name": "BSD-3-Clause",
74+
"short_name": "BSD-3-Clause",
75+
"category": "Permissive",
76+
"is_exception": false,
77+
"owner": "Regents of the University of California",
78+
"homepage_url": "http://www.opensource.org/licenses/BSD-3-Clause",
79+
"text_url": "http://www.opensource.org/licenses/BSD-3-Clause",
80+
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:bsd-new",
81+
"spdx_license_key": "BSD-3-Clause",
82+
"spdx_url": "https://spdx.org/licenses/BSD-3-Clause",
83+
"start_line": 5,
84+
"end_line": 45,
85+
"matched_rule": {
86+
"identifier": "bsd-new_and_gpl-2.0-plus_with_bison-exception-2.2_1.RULE",
87+
"license_expression": "bsd-new AND gpl-2.0-plus WITH bison-exception-2.2",
88+
"licenses": [
89+
"bsd-new",
90+
"gpl-2.0-plus",
91+
"bison-exception-2.2"
92+
],
93+
"is_license_text": false,
94+
"is_license_notice": true,
95+
"is_license_reference": false,
96+
"is_license_tag": false,
97+
"is_license_intro": false,
98+
"matcher": "3-seq",
99+
"rule_length": 257,
100+
"matched_length": 257,
101+
"match_coverage": 100.0,
102+
"rule_relevance": 100
103+
},
104+
"matched_text": " Distributed under the OSI-approved BSD License (the \"License\");\n see accompanying file Copyright.txt for details.\n\n This software is distributed WITHOUT ANY WARRANTY; without even the\n implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n See the License for more information.\n============================================================================*/\n/* A Bison parser, made by GNU Bison 2.3. */\n\n/* Skeleton implementation for Bison's Yacc-like parsers in C\n\n Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006\n Free Software Foundation, Inc.\n\n This program is free software; you can redistribute it and/or modify\n it under the terms of the GNU General Public License as published by\n the Free Software Foundation; either version 2, or (at your option)\n any later version.\n\n This program is distributed in the hope that it will be useful,\n but WITHOUT ANY WARRANTY; without even the implied warranty of\n MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n GNU General Public License for more details.\n\n You should have received a copy of the GNU General Public License\n along with this program; if not, write to the Free Software\n Foundation, Inc., 51 Franklin Street, Fifth Floor,\n Boston, MA 02110-1301, USA. */\n\n/* As a special exception, you may create a larger work that contains\n part or all of the Bison parser skeleton and distribute that work\n under terms of your choice, so long as that work isn't itself a\n parser generator using the skeleton or a modified version thereof\n as a parser skeleton. Alternatively, if you modify or redistribute\n the parser skeleton itself, you may (at your option) remove this\n special exception, which will cause the skeleton and the resulting\n Bison output files to be licensed under the GNU General Public\n License without this special exception.\n\n This special exception was added by the Free Software Foundation in\n version 2.2 of Bison. */"
105+
},
106+
{
107+
"key": "gpl-2.0-plus",
108+
"score": 88.93,
109+
"name": "GNU General Public License 2.0 or later",
110+
"short_name": "GPL 2.0 or later",
111+
"category": "Copyleft",
112+
"is_exception": false,
113+
"owner": "Free Software Foundation (FSF)",
114+
"homepage_url": "http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html",
115+
"text_url": "http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html",
116+
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:gpl-2.0-plus",
117+
"spdx_license_key": "GPL-2.0-or-later",
118+
"spdx_url": "https://spdx.org/licenses/GPL-2.0-or-later",
119+
"start_line": 5,
120+
"end_line": 45,
121+
"matched_rule": {
122+
"identifier": "bsd-new_and_gpl-2.0-plus_with_bison-exception-2.2_1.RULE",
123+
"license_expression": "bsd-new AND gpl-2.0-plus WITH bison-exception-2.2",
124+
"licenses": [
125+
"bsd-new",
126+
"gpl-2.0-plus",
127+
"bison-exception-2.2"
128+
],
129+
"is_license_text": false,
130+
"is_license_notice": true,
131+
"is_license_reference": false,
132+
"is_license_tag": false,
133+
"is_license_intro": false,
134+
"matcher": "3-seq",
135+
"rule_length": 257,
136+
"matched_length": 257,
137+
"match_coverage": 100.0,
138+
"rule_relevance": 100
139+
},
140+
"matched_text": " Distributed under the OSI-approved BSD License (the \"License\");\n see accompanying file Copyright.txt for details.\n\n This software is distributed WITHOUT ANY WARRANTY; without even the\n implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n See the License for more information.\n============================================================================*/\n/* A Bison parser, made by GNU Bison 2.3. */\n\n/* Skeleton implementation for Bison's Yacc-like parsers in C\n\n Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006\n Free Software Foundation, Inc.\n\n This program is free software; you can redistribute it and/or modify\n it under the terms of the GNU General Public License as published by\n the Free Software Foundation; either version 2, or (at your option)\n any later version.\n\n This program is distributed in the hope that it will be useful,\n but WITHOUT ANY WARRANTY; without even the implied warranty of\n MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n GNU General Public License for more details.\n\n You should have received a copy of the GNU General Public License\n along with this program; if not, write to the Free Software\n Foundation, Inc., 51 Franklin Street, Fifth Floor,\n Boston, MA 02110-1301, USA. */\n\n/* As a special exception, you may create a larger work that contains\n part or all of the Bison parser skeleton and distribute that work\n under terms of your choice, so long as that work isn't itself a\n parser generator using the skeleton or a modified version thereof\n as a parser skeleton. Alternatively, if you modify or redistribute\n the parser skeleton itself, you may (at your option) remove this\n special exception, which will cause the skeleton and the resulting\n Bison output files to be licensed under the GNU General Public\n License without this special exception.\n\n This special exception was added by the Free Software Foundation in\n version 2.2 of Bison. */"
141+
},
142+
{
143+
"key": "bison-exception-2.2",
144+
"score": 88.93,
145+
"name": "Bison 2.2 exception to GPL 2.0 or later",
146+
"short_name": "Bison 2.2 exception to GPL 2.0 or later",
147+
"category": "Copyleft Limited",
148+
"is_exception": true,
149+
"owner": "Free Software Foundation (FSF)",
150+
"homepage_url": null,
151+
"text_url": "",
152+
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:bison-exception-2.2",
153+
"spdx_license_key": "Bison-exception-2.2",
154+
"spdx_url": "https://spdx.org/licenses/Bison-exception-2.2",
155+
"start_line": 5,
156+
"end_line": 45,
157+
"matched_rule": {
158+
"identifier": "bsd-new_and_gpl-2.0-plus_with_bison-exception-2.2_1.RULE",
159+
"license_expression": "bsd-new AND gpl-2.0-plus WITH bison-exception-2.2",
160+
"licenses": [
161+
"bsd-new",
162+
"gpl-2.0-plus",
163+
"bison-exception-2.2"
164+
],
165+
"is_license_text": false,
166+
"is_license_notice": true,
167+
"is_license_reference": false,
168+
"is_license_tag": false,
169+
"is_license_intro": false,
170+
"matcher": "3-seq",
171+
"rule_length": 257,
172+
"matched_length": 257,
173+
"match_coverage": 100.0,
174+
"rule_relevance": 100
175+
},
176+
"matched_text": " Distributed under the OSI-approved BSD License (the \"License\");\n see accompanying file Copyright.txt for details.\n\n This software is distributed WITHOUT ANY WARRANTY; without even the\n implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n See the License for more information.\n============================================================================*/\n/* A Bison parser, made by GNU Bison 2.3. */\n\n/* Skeleton implementation for Bison's Yacc-like parsers in C\n\n Copyright (C) 1984, 1989, 1990, 2000, 2001, 2002, 2003, 2004, 2005, 2006\n Free Software Foundation, Inc.\n\n This program is free software; you can redistribute it and/or modify\n it under the terms of the GNU General Public License as published by\n the Free Software Foundation; either version 2, or (at your option)\n any later version.\n\n This program is distributed in the hope that it will be useful,\n but WITHOUT ANY WARRANTY; without even the implied warranty of\n MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the\n GNU General Public License for more details.\n\n You should have received a copy of the GNU General Public License\n along with this program; if not, write to the Free Software\n Foundation, Inc., 51 Franklin Street, Fifth Floor,\n Boston, MA 02110-1301, USA. */\n\n/* As a special exception, you may create a larger work that contains\n part or all of the Bison parser skeleton and distribute that work\n under terms of your choice, so long as that work isn't itself a\n parser generator using the skeleton or a modified version thereof\n as a parser skeleton. Alternatively, if you modify or redistribute\n the parser skeleton itself, you may (at your option) remove this\n special exception, which will cause the skeleton and the resulting\n Bison output files to be licensed under the GNU General Public\n License without this special exception.\n\n This special exception was added by the Free Software Foundation in\n version 2.2 of Bison. */"
177+
},
178+
{
179+
"key": "unknown",
180+
"score": 27.0,
181+
"name": "Unknown license detected but not recognized",
182+
"short_name": "unknown",
183+
"category": "Unstated License",
184+
"is_exception": false,
185+
"owner": "Unspecified",
186+
"homepage_url": null,
187+
"text_url": "",
188+
"reference_url": "https://enterprise.dejacode.com/urn/urn:dje:license:unknown",
189+
"spdx_license_key": null,
190+
"spdx_url": "",
191+
"start_line": 19,
192+
"end_line": 19,
193+
"matched_rule": {
194+
"identifier": "lead-in_unknown_77.RULE",
195+
"license_expression": "unknown",
196+
"licenses": [
197+
"unknown"
198+
],
199+
"is_license_text": false,
200+
"is_license_notice": false,
201+
"is_license_reference": true,
202+
"is_license_tag": false,
203+
"is_license_intro": false,
204+
"matcher": "2-aho",
205+
"rule_length": 5,
206+
"matched_length": 5,
207+
"match_coverage": 100.0,
208+
"rule_relevance": 27
209+
},
210+
"matched_text": " * This source file is copyrighted and licensed under the following terms:"
211+
}
212+
]

0 commit comments

Comments
 (0)