Skip to content

Commit 7e3e7cf

Browse files
alok1304AyanSinhaMahapatra
authored andcommitted
Show extra-words in detection_log correctly
Reference: #4400 Signed-off-by: Alok Kumar <[email protected]>
1 parent a4415e7 commit 7e3e7cf

File tree

7 files changed

+271
-5
lines changed

7 files changed

+271
-5
lines changed

src/licensedcode/detection.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -124,6 +124,7 @@ class DetectionRule(Enum):
124124
These are logged in LicenseDetection.detection_log for verbosity.
125125
"""
126126
UNKNOWN_MATCH = 'unknown-match'
127+
EXTRA_WORDS = 'extra-words'
127128
LICENSE_CLUES = 'license-clues'
128129
LOW_QUALITY_MATCH_FRAGMENTS = 'low-quality-matches'
129130
FALSE_POSITIVE = 'possible-false-positive'
@@ -1054,10 +1055,27 @@ def is_correct_detection_non_unknown(license_matches):
10541055
are correct/perfect license detections and also there aren't any unknowns.
10551056
"""
10561057
return (
1057-
is_correct_detection(license_matches)
1058+
is_correct_detection_2(license_matches)
10581059
and not has_unknown_matches(license_matches)
10591060
)
10601061

1062+
def is_correct_detection_2(license_matches):
1063+
"""
1064+
Return True if all the matches in ``license_matches`` List of LicenseMatch
1065+
are perfect license detections, and the matcher is always either `1-hash`
1066+
or `1-spdx-id`.
1067+
"""
1068+
matchers = (license_match.matcher for license_match in license_matches)
1069+
is_match_coverage_perfect = [
1070+
license_match.coverage() == 100
1071+
for license_match in license_matches
1072+
]
1073+
1074+
return (
1075+
all(matcher in ("1-hash", "1-spdx-id") for matcher in matchers)
1076+
and all(is_match_coverage_perfect)
1077+
)
1078+
10611079

10621080
def is_correct_detection(license_matches):
10631081
"""
@@ -1545,6 +1563,13 @@ def get_detected_license_expression(
15451563
# in detections but ideally we should return synthetic unknowns for these
15461564
detection_log.append(DetectionRule.LOW_QUALITY_MATCH_FRAGMENTS.value)
15471565
return detection_log, combined_expression
1566+
1567+
elif analysis == DetectionCategory.EXTRA_WORDS.value:
1568+
if TRACE_ANALYSIS:
1569+
logger_debug(f'analysis {DetectionCategory.EXTRA_WORDS.value}')
1570+
# Apply filtering or handling logic if needed
1571+
matches_for_expression = license_matches
1572+
detection_log.append(DetectionRule.EXTRA_WORDS.value)
15481573

15491574
else:
15501575
if TRACE_ANALYSIS:
@@ -1726,7 +1751,7 @@ def analyze_detection(license_matches, package_license=False):
17261751
):
17271752
return DetectionCategory.LICENSE_CLUES.value
17281753

1729-
# Case where all matches have `matcher` as `1-hash` or `4-spdx-id`
1754+
# Case where all matches have `matcher` as `1-hash` or `1-spdx-id`
17301755
elif is_correct_detection_non_unknown(license_matches=license_matches):
17311756
return DetectionCategory.PERFECT_DETECTION.value
17321757

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
{
2+
"license_detections": [
3+
{
4+
"identifier": "bsd_new-fbfc5955-0c63-4c98-2ce9-08e1e1796f50",
5+
"license_expression": "bsd-new",
6+
"license_expression_spdx": "BSD-3-Clause",
7+
"detection_count": 1,
8+
"detection_log": [
9+
"extra-words"
10+
],
11+
"reference_matches": [
12+
{
13+
"license_expression": "bsd-new",
14+
"license_expression_spdx": "BSD-3-Clause",
15+
"from_file": "scan-extra-words-2-aho-license/LICENSE",
16+
"start_line": 4,
17+
"end_line": 27,
18+
"matcher": "2-aho",
19+
"score": 99.53,
20+
"matched_length": 210,
21+
"match_coverage": 100.0,
22+
"rule_relevance": 100,
23+
"rule_identifier": "bsd-new_158.RULE",
24+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_158.RULE",
25+
"matched_text": "Redistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n* Redistributions of source code must retain the above copyright notice, this\r\n list of conditions and the following disclaimer.\r\n\r\n* Redistributions in binary form must reproduce the above copyright notice,\r\n this list of conditions and the following disclaimer in the documentation\r\n and/or other materials provided with the distribution.\r\n\r\n* Neither the name of filesize nor the names of its\r\n contributors may be used to endorse or promote products derived from\r\n this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\r\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\r\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\r\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.",
26+
"matched_text_diagnostics": "Redistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n* Redistributions of source code must retain the above copyright notice, this\r\n list of conditions and the following disclaimer.\r\n\r\n* Redistributions in binary form must reproduce the above copyright notice,\r\n this list of conditions and the following disclaimer in the documentation\r\n and/or other materials provided with the distribution.\r\n\r\n* Neither the name of [filesize] nor the names of its\r\n contributors may be used to endorse or promote products derived from\r\n this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\r\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\r\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\r\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
27+
}
28+
]
29+
}
30+
],
31+
"files": [
32+
{
33+
"path": "LICENSE",
34+
"type": "file",
35+
"detected_license_expression": "bsd-new",
36+
"detected_license_expression_spdx": "BSD-3-Clause",
37+
"license_detections": [
38+
{
39+
"license_expression": "bsd-new",
40+
"license_expression_spdx": "BSD-3-Clause",
41+
"matches": [
42+
{
43+
"license_expression": "bsd-new",
44+
"license_expression_spdx": "BSD-3-Clause",
45+
"from_file": "scan-extra-words-2-aho-license/LICENSE",
46+
"start_line": 4,
47+
"end_line": 27,
48+
"matcher": "2-aho",
49+
"score": 99.53,
50+
"matched_length": 210,
51+
"match_coverage": 100.0,
52+
"rule_relevance": 100,
53+
"rule_identifier": "bsd-new_158.RULE",
54+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_158.RULE",
55+
"matched_text": "Redistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n* Redistributions of source code must retain the above copyright notice, this\r\n list of conditions and the following disclaimer.\r\n\r\n* Redistributions in binary form must reproduce the above copyright notice,\r\n this list of conditions and the following disclaimer in the documentation\r\n and/or other materials provided with the distribution.\r\n\r\n* Neither the name of filesize nor the names of its\r\n contributors may be used to endorse or promote products derived from\r\n this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\r\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\r\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\r\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.",
56+
"matched_text_diagnostics": "Redistribution and use in source and binary forms, with or without\r\nmodification, are permitted provided that the following conditions are met:\r\n\r\n* Redistributions of source code must retain the above copyright notice, this\r\n list of conditions and the following disclaimer.\r\n\r\n* Redistributions in binary form must reproduce the above copyright notice,\r\n this list of conditions and the following disclaimer in the documentation\r\n and/or other materials provided with the distribution.\r\n\r\n* Neither the name of [filesize] nor the names of its\r\n contributors may be used to endorse or promote products derived from\r\n this software without specific prior written permission.\r\n\r\nTHIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS \"AS IS\"\r\nAND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE\r\nIMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE\r\nDISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE\r\nFOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL\r\nDAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR\r\nSERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER\r\nCAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,\r\nOR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE\r\nOF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE."
57+
}
58+
],
59+
"detection_log": [
60+
"extra-words"
61+
],
62+
"identifier": "bsd_new-fbfc5955-0c63-4c98-2ce9-08e1e1796f50"
63+
}
64+
],
65+
"license_clues": [],
66+
"percentage_of_license_text": 95.89,
67+
"scan_errors": []
68+
}
69+
]
70+
}
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
Copyright (c) 2022, Jason Mulligan
2+
All rights reserved.
3+
4+
Redistribution and use in source and binary forms, with or without
5+
modification, are permitted provided that the following conditions are met:
6+
7+
* Redistributions of source code must retain the above copyright notice, this
8+
list of conditions and the following disclaimer.
9+
10+
* Redistributions in binary form must reproduce the above copyright notice,
11+
this list of conditions and the following disclaimer in the documentation
12+
and/or other materials provided with the distribution.
13+
14+
* Neither the name of filesize nor the names of its
15+
contributors may be used to endorse or promote products derived from
16+
this software without specific prior written permission.
17+
18+
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19+
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20+
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
21+
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
22+
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23+
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
24+
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
25+
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
26+
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
27+
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
28+

0 commit comments

Comments
 (0)