Skip to content

Commit cb623c1

Browse files
committed
Implement "update_package_license_from_resource_if_missing" function #1763
- Update package's license if missing while the same package has license detected in RESOURCES Signed-off-by: Chin Yeung Li <[email protected]>
1 parent 13e8e88 commit cb623c1

File tree

4 files changed

+104
-2
lines changed

4 files changed

+104
-2
lines changed

scanpipe/pipelines/scan_maven_package.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ def steps(cls):
4747
cls.extract_archives,
4848
cls.run_scan,
4949
cls.fetch_and_scan_remote_pom,
50+
cls.update_package_license_from_resource_if_missing,
5051
cls.load_inventory_from_toolkit_scan,
5152
cls.make_summary_from_scan_results,
5253
)
@@ -65,9 +66,9 @@ def fetch_and_scan_remote_pom(self):
6566
pom_file_list = download_pom_files(pom_url_list)
6667
scanned_pom_packages, scanned_dependencies = scan_pom_files(pom_file_list)
6768

68-
updated_pacakges = packages + scanned_pom_packages
69+
updated_packages = packages + scanned_pom_packages
6970
# Replace/Update the package and dependencies section
70-
data["packages"] = updated_pacakges
71+
data["packages"] = updated_packages
7172
data["dependencies"] = scanned_dependencies
7273
with open(self.scan_output_location, "w") as file:
7374
json.dump(data, file, indent=2)

scanpipe/pipelines/scan_single_package.py

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
from scanpipe.pipes import scancode
3232
from scanpipe.pipes.input import copy_input
3333
from scanpipe.pipes.input import is_archive
34+
from scanpipe.pipes.resolve import update_package_license_from_resource_if_missing
3435

3536

3637
class ScanSinglePackage(Pipeline):
@@ -51,6 +52,7 @@ def steps(cls):
5152
cls.extract_input_to_codebase_directory,
5253
cls.extract_archives,
5354
cls.run_scan,
55+
cls.update_package_license_from_resource_if_missing,
5456
cls.load_inventory_from_toolkit_scan,
5557
cls.make_summary_from_scan_results,
5658
)
@@ -126,6 +128,23 @@ def run_scan(self):
126128
if not scan_output_path.exists():
127129
raise FileNotFoundError("ScanCode output not available.")
128130

131+
def update_package_license_from_resource_if_missing(self):
132+
"""Update PACKAGE license from the license detected in RESOURCES if missing."""
133+
with open(self.scan_output_location) as file:
134+
data = json.load(file)
135+
packages = data.get("packages", [])
136+
resources = data.get("files", [])
137+
if not packages or not resources:
138+
return
139+
140+
updated_packages = update_package_license_from_resource_if_missing(
141+
packages, resources
142+
)
143+
# Update the package section
144+
data["packages"] = updated_packages
145+
with open(self.scan_output_location, "w") as file:
146+
json.dump(data, file, indent=2)
147+
129148
def load_inventory_from_toolkit_scan(self):
130149
"""Process a JSON Scan results to populate codebase resources and packages."""
131150
input.load_inventory_from_toolkit_scan(self.project, self.scan_output_location)

scanpipe/pipes/resolve.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -723,3 +723,32 @@ def scan_pom_files(pom_file_list):
723723
scanned_dep["datafile_path"] = ""
724724
scanned_pom_deps.append(scanned_dep)
725725
return scanned_pom_packages, scanned_pom_deps
726+
727+
728+
def update_package_license_from_resource_if_missing(packages, resources):
729+
"""Populate missing licenses to packages based on resource data."""
730+
from license_expression import Licensing
731+
732+
updated_packages = []
733+
for package in packages:
734+
if not package.get("declared_license_expression"):
735+
package_uid = package.get("package_uid")
736+
detected_lic_list = []
737+
for resource in resources:
738+
if (
739+
resource.get("detected_license_expression")
740+
and package_uid in resource["for_packages"]
741+
):
742+
if (
743+
resource.get("detected_license_expression")
744+
not in detected_lic_list
745+
):
746+
detected_lic_list.append(
747+
resource.get("detected_license_expression")
748+
)
749+
license_expression = " AND ".join(detected_lic_list)
750+
if license_expression:
751+
declared_license_expression = str(Licensing().dedup(license_expression))
752+
package["declared_license_expression"] = declared_license_expression
753+
updated_packages.append(package)
754+
return updated_packages

scanpipe/tests/pipes/test_resolve.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -586,3 +586,56 @@ def test_scanpipe_resolve_get_pom_url_list_with_invalid_filename(self):
586586
input_source = {"filename": "not-a-jar.txt"}
587587
result = resolve.get_pom_url_list(input_source, [])
588588
self.assertEqual(result, [])
589+
590+
def test_scanpipe_resolve_update_package_license_from_resource_if_missing(self):
591+
packages = [
592+
{"package_uid": "pkg1", "declared_license_expression": ""},
593+
{"package_uid": "pkg2", "declared_license_expression": None},
594+
{"package_uid": "pkg3", "declared_license_expression": "MIT"},
595+
]
596+
resources = [
597+
{
598+
"for_packages": ["pkg1", "pkg2"],
599+
"detected_license_expression": "GPL-2.0",
600+
},
601+
{"for_packages": ["pkg1"], "detected_license_expression": "MIT"},
602+
]
603+
604+
expected_pkg1_expr = "GPL-2.0 AND MIT"
605+
expected_pkg2_expr = "GPL-2.0"
606+
607+
updated = resolve.update_package_license_from_resource_if_missing(
608+
packages, resources
609+
)
610+
611+
self.assertEqual(updated[0]["declared_license_expression"], expected_pkg1_expr)
612+
self.assertEqual(updated[1]["declared_license_expression"], expected_pkg2_expr)
613+
self.assertEqual(updated[2]["declared_license_expression"], "MIT")
614+
615+
def test_scanpipe_resolve_update_package_license_from_resource_if_missing_no_match(
616+
self,
617+
):
618+
packages = [{"package_uid": "pkgX", "declared_license_expression": None}]
619+
resources = [{"for_packages": ["pkgY"], "detected_license_expression": "MIT"}]
620+
621+
updated = resolve.update_package_license_from_resource_if_missing(
622+
packages, resources
623+
)
624+
self.assertEqual(updated[0]["declared_license_expression"], None)
625+
626+
def test_scanpipe_resolve_update_package_license_from_resource_if_missing_no_change(
627+
self,
628+
):
629+
packages = [
630+
{"package_uid": "pkg1", "declared_license_expression": "GPL-2.0"},
631+
{"package_uid": "pkg2", "declared_license_expression": "Apache-2.0"},
632+
]
633+
resources = [
634+
{"for_packages": ["pkg1", "pkg2"], "detected_license_expression": "MIT"},
635+
]
636+
637+
updated = resolve.update_package_license_from_resource_if_missing(
638+
packages, resources
639+
)
640+
self.assertEqual(updated[0]["declared_license_expression"], "GPL-2.0")
641+
self.assertEqual(updated[1]["declared_license_expression"], "Apache-2.0")

0 commit comments

Comments
 (0)