Skip to content

Commit d27a33c

Browse files
Fix misc license/package detection bugs
* Add rules for license detection fixes * Fix detection for pom.properties file * Fix --todo plugin for package detection * Fix false positive case for bare word rules * Fix issue of unknown license detection in package manifest Signed-off-by: Ayan Sinha Mahapatra <[email protected]>
1 parent 1af074f commit d27a33c

File tree

22 files changed

+1232
-63
lines changed

22 files changed

+1232
-63
lines changed
Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
---
2+
license_expression: bsd-new
3+
is_license_tag: yes
4+
referenced_filenames:
5+
- https://github.com/jimsch/COSE-JAVA/blob/master/LICENSE
6+
ignorable_urls:
7+
- https://github.com/jimsch/COSE-JAVA/blob/master/LICENSE
8+
---
9+
10+
name: BSD3
11+
url: {{https://github.com/jimsch/COSE-JAVA/blob/master/LICENSE}}
Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
---
2+
license_expression: cc0-1.0
3+
is_license_tag: yes
4+
relevance: 100
5+
ignorable_urls:
6+
- http://www.creativecommons.org/publicdomain/zero/1.0/
7+
---
8+
9+
name: CC0 universal
10+
url: http://www.creativecommons.org/publicdomain/zero/1.0/

src/licensedcode/detection.py

Lines changed: 67 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -667,7 +667,14 @@ def to_dict(
667667

668668
def collect_license_detections(codebase, include_license_clues=True):
669669
"""
670-
Return a list of LicenseDetectionFromResult from a ``codebase``
670+
Return a list of LicenseDetectionFromResult object rehydrated from
671+
LicenseDetection mappings, from resources and packages in a ``codebase``.
672+
673+
As a side effect, this also corrects `declared_license_expression` in packages
674+
according to their license detections. This is required because package fields
675+
are populated in package plugin, which runs before the license plugin, and thus
676+
the license plugin step where unknown references to other files are dereferenced
677+
does not show up automatically in package attributes.
671678
"""
672679
has_packages = hasattr(codebase.root, 'package_data')
673680
has_licenses = hasattr(codebase.root, 'license_detections')
@@ -711,13 +718,33 @@ def collect_license_detections(codebase, include_license_clues=True):
711718
package_data = getattr(resource, 'package_data', []) or []
712719

713720
package_license_detection_mappings = []
721+
modified = False
714722
for package in package_data:
715723

716-
if package["license_detections"]:
717-
package_license_detection_mappings.extend(package["license_detections"])
718-
719-
if package["other_license_detections"]:
720-
package_license_detection_mappings.extend(package["other_license_detections"])
724+
package_license_detections = package["license_detections"]
725+
if package_license_detections:
726+
package_license_detection_mappings.extend(package_license_detections)
727+
detection_is_same, license_expression = verify_package_license_expression(
728+
license_detection_mappings=package_license_detections,
729+
license_expression=package["declared_license_expression"]
730+
)
731+
if not detection_is_same:
732+
package["declared_license_expression"] = license_expression
733+
modified = True
734+
735+
other_license_detections = package["other_license_detections"]
736+
if other_license_detections:
737+
package_license_detection_mappings.extend(other_license_detections)
738+
detection_is_same, license_expression = verify_package_license_expression(
739+
license_detection_mappings=other_license_detections,
740+
license_expression=package["other_license_expression"]
741+
)
742+
if not detection_is_same:
743+
package["other_license_expression"] = license_expression
744+
modified = True
745+
746+
if modified:
747+
codebase.save_resource(resource)
721748

722749
if package_license_detection_mappings:
723750
package_license_detection_objects = detections_from_license_detection_mappings(
@@ -729,6 +756,33 @@ def collect_license_detections(codebase, include_license_clues=True):
729756
return all_license_detections
730757

731758

759+
760+
def verify_package_license_expression(license_detection_mappings, license_expression):
761+
"""
762+
Returns a tuple of two files: `detection_is_same` and `license_expression` depending
763+
on whether the `license_expression` is same as the license_expression computed from
764+
`license_detection_mappings`:
765+
1. If they are the same, we return True and None for the `license_expression`
766+
2. If they are not the same, we return False, and the computed `license_expression`
767+
"""
768+
license_expressions_from_detections = [
769+
detection["license_expression"]
770+
for detection in license_detection_mappings
771+
]
772+
773+
license_expression_from_detections = str(combine_expressions(
774+
expressions=license_expressions_from_detections,
775+
relation='AND',
776+
unique=True,
777+
))
778+
779+
if not license_expression_from_detections == license_expression:
780+
return False, license_expression_from_detections
781+
else:
782+
return True, None
783+
784+
785+
732786
@attr.s
733787
class UniqueDetection:
734788
"""
@@ -978,9 +1032,12 @@ def is_false_positive(license_matches, package_license=False):
9781032
match_rule_length == 1
9791033
for match_rule_length in match_rule_length_values
9801034
)
981-
982-
is_gpl_bare = all(
983-
'gpl_bare' in license_match.rule.identifier
1035+
bare_rules = ['gpl_bare', 'freeware_bare', 'public-domain_bare']
1036+
is_bare_rule = all(
1037+
any([
1038+
bare_rule in license_match.rule.identifier
1039+
for bare_rule in bare_rules
1040+
])
9841041
for license_match in license_matches
9851042
)
9861043

@@ -995,7 +1052,7 @@ def is_false_positive(license_matches, package_license=False):
9951052

9961053
is_single_match = len(license_matches) == 1
9971054

998-
if is_single_match and is_gpl_bare:
1055+
if is_single_match and is_bare_rule:
9991056
return True
10001057

10011058
if is_gpl and all_match_rule_length_one:

src/packagedcode/maven.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -313,12 +313,27 @@ def parse(cls, location):
313313
if TRACE:
314314
logger.debug(f'MavenPomPropertiesHandler.parse: properties: {properties!r}')
315315
if properties:
316-
yield models.PackageData(
317-
datasource_id=cls.datasource_id,
318-
type=cls.default_package_type,
319-
primary_language=cls.default_primary_language,
320-
extra_data=dict(pom_properties=properties)
321-
)
316+
yield from cls.parse_pom_properties(properties=properties)
317+
318+
@classmethod
319+
def parse_pom_properties(cls, properties):
320+
namespace = properties.pop("groupId", None)
321+
name = properties.pop("artifactId", None)
322+
version = properties.pop("version", None)
323+
if properties:
324+
extra_data = dict(pom_properties=properties)
325+
else:
326+
extra_data = {}
327+
328+
yield models.PackageData(
329+
datasource_id=cls.datasource_id,
330+
type=cls.default_package_type,
331+
primary_language=cls.default_primary_language,
332+
name=name,
333+
namespace=namespace,
334+
version=version,
335+
extra_data=extra_data,
336+
)
322337

323338

324339
def build_url(

src/summarycode/todo.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
from licensedcode.detection import get_uuid_on_content
2222
from licensedcode.detection import UniqueDetection
2323
from plugincode.post_scan import PostScanPlugin, post_scan_impl
24+
from packageurl import PackageURL
2425

2526
TRACE = os.environ.get('SCANCODE_DEBUG_REVIEW', False)
2627

@@ -171,7 +172,7 @@ def get_ambiguous_package_detections(codebase):
171172
for package in package_data:
172173
detection_type = None
173174
if not package["purl"]:
174-
if resource.path not in deps_datafile_paths:
175+
if resource.path not in deps_datafile_paths and not resource.for_packages:
175176
detection_type=PackageDetectionCategory.CANNOT_CREATE_PURL.value
176177
else:
177178
if package["purl"] not in codebase_packages_purls:
@@ -211,6 +212,11 @@ def get_package_identifier(package_data, file_path):
211212
return get_uuid_on_content(content=[identifier_elements])
212213

213214

215+
def get_unknown_purl(package_type):
216+
purl = PackageURL(type=package_type, name="unknown")
217+
return purl.to_string()
218+
219+
214220
@attr.s
215221
class AmbiguousDetection:
216222
"""
@@ -252,6 +258,8 @@ class AmbiguousDetection:
252258
@classmethod
253259
def from_package(cls, package_data, detection_log, file_path):
254260
purl = package_data["purl"]
261+
if not purl:
262+
purl = get_unknown_purl(package_data["type"])
255263
identifier = get_package_identifier(package_data, file_path)
256264
detection_id = f"{purl}-{identifier}"
257265
file_region = FileRegion(

tests/packagedcode/data/license_detection/reference-to-package/samba.expected.json

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -795,7 +795,7 @@
795795
"vcs_url": null,
796796
"copyright": null,
797797
"holder": null,
798-
"declared_license_expression": "gpl-3.0 AND (gpl-3.0 AND lgpl-3.0 AND gpl-2.0) AND (gpl-2.0-plus AND free-unknown AND gpl-1.0-plus) AND (gpl-1.0-plus AND lgpl-3.0-plus AND gpl-3.0 AND lgpl-3.0) AND (cc-by-sa-3.0 AND cc-by-sa-4.0 AND dco-1.1) AND gpl-2.0 AND gpl-1.0-plus",
798+
"declared_license_expression": "gpl-3.0 AND (gpl-3.0 AND lgpl-3.0 AND gpl-2.0) AND (gpl-2.0-plus AND free-unknown AND gpl-1.0-plus) AND (gpl-1.0-plus AND lgpl-3.0-plus AND gpl-3.0 AND lgpl-3.0) AND (cc-by-sa-3.0 AND cc-by-sa-4.0 AND dco-1.1) AND gpl-2.0",
799799
"declared_license_expression_spdx": "GPL-3.0-only AND (GPL-3.0-only AND LGPL-3.0-only AND GPL-2.0-only) AND (GPL-2.0-or-later AND LicenseRef-scancode-free-unknown AND GPL-1.0-or-later) AND (GPL-1.0-or-later AND LGPL-3.0-or-later AND GPL-3.0-only AND LGPL-3.0-only) AND (CC-BY-SA-3.0 AND CC-BY-SA-4.0 AND LicenseRef-scancode-dco-1.1) AND GPL-2.0-only AND GPL-1.0-or-later",
800800
"license_detections": [
801801
{
@@ -1157,7 +1157,7 @@
11571157
"vcs_url": null,
11581158
"copyright": null,
11591159
"holder": null,
1160-
"declared_license_expression": "gpl-3.0 AND (gpl-3.0 AND lgpl-3.0 AND gpl-2.0) AND (gpl-2.0-plus AND free-unknown AND gpl-1.0-plus) AND (gpl-1.0-plus AND lgpl-3.0-plus AND gpl-3.0 AND lgpl-3.0) AND (cc-by-sa-3.0 AND cc-by-sa-4.0 AND dco-1.1) AND gpl-2.0 AND gpl-1.0-plus",
1160+
"declared_license_expression": "gpl-3.0 AND (gpl-3.0 AND lgpl-3.0 AND gpl-2.0) AND (gpl-2.0-plus AND free-unknown AND gpl-1.0-plus) AND (gpl-1.0-plus AND lgpl-3.0-plus AND gpl-3.0 AND lgpl-3.0) AND (cc-by-sa-3.0 AND cc-by-sa-4.0 AND dco-1.1) AND gpl-2.0",
11611161
"declared_license_expression_spdx": "GPL-3.0-only AND (GPL-3.0-only AND LGPL-3.0-only AND GPL-2.0-only) AND (GPL-2.0-or-later AND LicenseRef-scancode-free-unknown AND GPL-1.0-or-later) AND (GPL-1.0-or-later AND LGPL-3.0-or-later AND GPL-3.0-only AND LGPL-3.0-only) AND (CC-BY-SA-3.0 AND CC-BY-SA-4.0 AND LicenseRef-scancode-dco-1.1) AND GPL-2.0-only AND GPL-1.0-or-later",
11621162
"license_detections": [
11631163
{

0 commit comments

Comments
 (0)