Skip to content

Commit 1abf7e3

Browse files
Add tests for unknown license detection #3343
Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 5213880 commit 1abf7e3

File tree

7 files changed

+294
-0
lines changed

7 files changed

+294
-0
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
---
2+
key: apache-2.0
3+
short_name: Apache 2.0
4+
name: Apache License 2.0
5+
category: Permissive
6+
owner: Apache Software Foundation
7+
homepage_url: http://www.apache.org/licenses/
8+
spdx_license_key: Apache-2.0
9+
---
10+
11+
7. Disclaimer of Warranty. Unless required by applicable law or
12+
agreed to in writing, Licensor provides the Work (and each
13+
Contributor provides its Contributions) on an "AS IS" BASIS,
14+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
15+
implied, including, without limitation, any warranties or conditions
16+
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
17+
PARTICULAR PURPOSE. You are solely responsible for determining the
18+
appropriateness of using or redistributing the Work and assume any
19+
risks associated with Your exercise of permissions under this License.
20+
21+
22+
Licensed under the Apache License, Version 2.0 (the "License");
23+
you may not use this file except in compliance with the License.
24+
You may obtain a copy of the License at
25+
26+
http://www.apache.org/licenses/LICENSE-2.0
27+
28+
Unless required by applicable law or agreed to in writing, software
29+
distributed under the License is distributed on an "AS IS" BASIS,
30+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
31+
See the License for the specific language governing permissions and
32+
limitations under the License.
Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
---
2+
key: gpl-2.0-plus
3+
short_name: GPL 2.0 or later
4+
name: GNU General Public License 2.0 or later
5+
category: Copyleft
6+
owner: Free Software Foundation (FSF)
7+
homepage_url: http://www.gnu.org/licenses/old-licenses/gpl-2.0-standalone.html
8+
spdx_license_key: GPL-2.0-or-later
9+
---
10+
11+
12+
This program is distributed in the hope that it will be useful, but WITHOUT ANY
13+
WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A
14+
PARTICULAR PURPOSE. See the GNU General Public License for more details.
15+
16+
17+
BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY
18+
FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN
19+
OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES
20+
PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED
21+
OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
22+
MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS
23+
TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE
24+
PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING,
25+
REPAIR OR CORRECTION.
26+
27+
IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
28+
WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR
29+
REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES,
30+
INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING
31+
OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED
32+
TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY
33+
YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER
34+
PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE
35+
POSSIBILITY OF SUCH DAMAGES.
36+
37+
If you develop a new program, and you want it to be of the greatest
38+
possible use to the public, the best way to achieve this is to make it
39+
free software which everyone can redistribute and change under these terms.
40+
41+
To do so, attach the following notices to the program. It is safest
42+
to attach them to the start of each source file to most effectively
43+
convey the exclusion of warranty; and each file should have at least
44+
the "copyright" line and a pointer to where the full notice is found.
45+
46+
This program is free software; you can redistribute it and/or modify
47+
it under the terms of the GNU General Public License as published by
48+
the Free Software Foundation; either version 2 of the License, or
49+
(at your option) any later version.
50+
51+
This program is distributed in the hope that it will be useful,
52+
but WITHOUT ANY WARRANTY; without even the implied warranty of
53+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
54+
GNU General Public License for more details.
55+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
---
2+
license_expression: gpl-2.0-plus
3+
is_license_notice: yes
4+
---
5+
6+
License:
7+
8+
This package is free software; you can redistribute it and/or modify
9+
it under the terms of the GNU General Public License as published by
10+
the Free Software Foundation; either version 2 of the License, or
11+
(at your option) any later version.
12+
13+
This package is distributed in the hope that it will be useful,
14+
but WITHOUT ANY WARRANTY; without even the implied warranty of
15+
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16+
GNU General Public License for more details.
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
{
2+
"license_detections": [
3+
{
4+
"identifier": "unknown-b0897d47-1c91-9898-2364-2e4d1a34b6fd",
5+
"license_expression": "unknown",
6+
"detection_count": 1
7+
}
8+
],
9+
"files": [
10+
{
11+
"path": "unknown.txt",
12+
"type": "file",
13+
"detected_license_expression": "unknown",
14+
"detected_license_expression_spdx": "LicenseRef-scancode-unknown",
15+
"license_detections": [
16+
{
17+
"license_expression": "unknown",
18+
"matches": [
19+
{
20+
"score": 86.89,
21+
"start_line": 1,
22+
"end_line": 10,
23+
"matched_length": 53,
24+
"match_coverage": 100.0,
25+
"matcher": "6-unknown",
26+
"license_expression": "unknown",
27+
"rule_identifier": "license-detection-unknown-296da2cbc15d2bba73baa1359cda5fc8bf39b942",
28+
"rule_relevance": 100,
29+
"rule_url": null
30+
}
31+
],
32+
"identifier": "unknown-b0897d47-1c91-9898-2364-2e4d1a34b6fd"
33+
}
34+
],
35+
"license_clues": [],
36+
"percentage_of_license_text": 86.89,
37+
"scan_errors": []
38+
}
39+
]
40+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
{
2+
"license_detections": [
3+
{
4+
"identifier": "unknown-b0897d47-1c91-9898-2364-2e4d1a34b6fd",
5+
"license_expression": "unknown",
6+
"detection_count": 1,
7+
"detection_log": []
8+
}
9+
],
10+
"files": [
11+
{
12+
"path": "unknown.txt",
13+
"type": "file",
14+
"detected_license_expression": "unknown",
15+
"detected_license_expression_spdx": "LicenseRef-scancode-unknown",
16+
"license_detections": [
17+
{
18+
"license_expression": "unknown",
19+
"matches": [
20+
{
21+
"score": 86.89,
22+
"start_line": 1,
23+
"end_line": 10,
24+
"matched_length": 53,
25+
"match_coverage": 100.0,
26+
"matcher": "6-unknown",
27+
"license_expression": "unknown",
28+
"rule_identifier": "license-detection-unknown-296da2cbc15d2bba73baa1359cda5fc8bf39b942",
29+
"rule_relevance": 100,
30+
"rule_url": null,
31+
"matched_text": "form shall mean the preferred form for making\nthe purposes of this definition control\n[software] [is] [modified] [by] [someone] [else]\n\n\n\nrepresent, as a whole, an original work of authorship. For the purposes\n of this License, Derivative Works shall not include works that remain\n separable from, or merely link (or bind by name) [to] [the] interfaces of,\n the Work and Derivative Works thereof."
32+
}
33+
],
34+
"detection_log": [
35+
"unknown-match"
36+
],
37+
"identifier": "unknown-b0897d47-1c91-9898-2364-2e4d1a34b6fd"
38+
}
39+
],
40+
"license_clues": [],
41+
"percentage_of_license_text": 86.89,
42+
"scan_errors": []
43+
}
44+
]
45+
}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
form shall mean the preferred form for making
2+
the purposes of this definition control
3+
software is modified by someone else
4+
5+
6+
7+
represent, as a whole, an original work of authorship. For the purposes
8+
of this License, Derivative Works shall not include works that remain
9+
separable from, or merely link (or bind by name) to the interfaces of,
10+
the Work and Derivative Works thereof.
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# ScanCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/scancode-toolkit for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
11+
import os
12+
13+
from commoncode.testcase import FileBasedTesting
14+
from licensedcode.index import LicenseIndex
15+
from scancode_config import REGEN_TEST_FIXTURES
16+
from scancode.cli_test_utils import check_json_scan
17+
from scancode.cli_test_utils import run_scan_click
18+
19+
from licensedcode.query import build_query
20+
21+
from licensedcode.models import load_licenses
22+
from licensedcode.models import get_rules
23+
from licensedcode.models import get_all_spdx_key_tokens
24+
from licensedcode.models import get_license_tokens
25+
26+
from licensedcode.match_unknown import match_unknowns
27+
from licensedcode.match_unknown import MATCH_UNKNOWN
28+
from licensedcode.detection import LicenseMatchFromResult
29+
30+
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), 'data')
31+
32+
33+
class TestUnknownLicenses(FileBasedTesting):
34+
test_data_dir = TEST_DATA_DIR
35+
36+
def test_match_unknowns_works(self):
37+
rule_dir = self.get_test_loc('match_unknown/index_mini/rules/')
38+
license_dir = self.get_test_loc('match_unknown/index_mini/licenses/')
39+
licenses_db = load_licenses(license_dir)
40+
rules = list(get_rules(licenses_db=licenses_db, rules_data_dir=rule_dir))
41+
spdx_tokens = set(get_all_spdx_key_tokens(licenses_db))
42+
license_tokens = set(get_license_tokens())
43+
idx = LicenseIndex(
44+
rules=rules,
45+
_spdx_tokens=spdx_tokens,
46+
_license_tokens=license_tokens,
47+
)
48+
49+
query_loc = self.get_test_loc('match_unknown/apache-2.0.LICENSE')
50+
qry = build_query(location=query_loc, idx=idx)
51+
52+
match = match_unknowns(
53+
idx=idx,
54+
query_run=qry.whole_query_run(),
55+
automaton=idx.unknown_automaton,
56+
)
57+
match.set_lines(qry.line_by_pos)
58+
59+
assert match.matcher == MATCH_UNKNOWN
60+
assert match.matched_text()
61+
62+
assert LicenseMatchFromResult.from_dict(match.to_dict())
63+
64+
def test_unknown_licenses_works(self):
65+
test_dir = self.get_test_loc('match_unknown/unknown.txt', copy=True)
66+
result_file = self.get_temp_file('json')
67+
args = [
68+
'--license',
69+
'--unknown-licenses',
70+
'--strip-root',
71+
'--verbose',
72+
'--json', result_file,
73+
test_dir,
74+
]
75+
run_scan_click(args)
76+
test_loc = self.get_test_loc('match_unknown/unknown-license-expected.json')
77+
check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES)
78+
79+
def test_unknown_licenses_works_with_license_text(self):
80+
test_dir = self.get_test_loc('match_unknown/unknown.txt', copy=True)
81+
result_file = self.get_temp_file('json')
82+
args = [
83+
'--license',
84+
'--license-text',
85+
'--license-text-diagnostics',
86+
'--license-diagnostics',
87+
'--unknown-licenses',
88+
'--strip-root',
89+
'--verbose',
90+
'--json', result_file,
91+
test_dir,
92+
]
93+
run_scan_click(args)
94+
test_loc = self.get_test_loc('match_unknown/unknown-license-text-expected.json')
95+
check_json_scan(test_loc, result_file, regen=REGEN_TEST_FIXTURES)
96+

0 commit comments

Comments
 (0)