From 45990c4455f58641ad1f79d679df913b46fe79d5 Mon Sep 17 00:00:00 2001 From: Adrian Braemer Date: Mon, 17 Mar 2025 14:55:05 +0100 Subject: [PATCH 1/5] Fix conditions for bzl package versions Signed-off-by: Adrian Braemer --- src/packagedcode/build.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/src/packagedcode/build.py b/src/packagedcode/build.py index 1c7fb152a7..c1cf7cb87a 100644 --- a/src/packagedcode/build.py +++ b/src/packagedcode/build.py @@ -375,12 +375,11 @@ def parse(cls, location, package_only=True): ) if ( - 'upstream_type' - and 'name' - and 'version' - and 'licenses' - and 'upstream_address' - in metadata_fields + 'upstream_type' in metadata_fields + and 'name' in metadata_fields + and 'version' in metadata_fields + and 'licenses' in metadata_fields + and 'upstream_address' in metadata_fields ): # TODO: Create function that determines package type from download URL, # then create a package of that package type from the metadata info @@ -397,16 +396,15 @@ def parse(cls, location, package_only=True): yield models.PackageData.from_data(package_data, package_only=True) if ( - 'package_type' - and 'name' - and 'version' - and 'license_expression' - and 'homepage_url' - and 'download_url' - and 'vcs_url' - and 'download_archive_sha1' - and 'vcs_commit_hash' - in metadata_fields + 'package_type' in metadata_fields + and 'name' in metadata_fields + and 'version' in metadata_fields + and 'license_expression' in metadata_fields + and 'homepage_url' in metadata_fields + and 'download_url' in metadata_fields + and 'vcs_url' in metadata_fields + and 'download_archive_sha1' in metadata_fields + and 'vcs_commit_hash' in metadata_fields ): package_data = dict( datasource_id=cls.datasource_id, From bdb908c9b0d56fc06bc6b1531e77c23adf8b9383 Mon Sep 17 00:00:00 2001 From: Adrian Braemer Date: Mon, 17 Mar 2025 15:08:35 +0100 Subject: [PATCH 2/5] add myself to AUTHORS Signed-off-by: Adrian Braemer --- AUTHORS.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/AUTHORS.rst b/AUTHORS.rst index a8e99cfc0b..43e7ca97b0 100644 --- a/AUTHORS.rst +++ b/AUTHORS.rst @@ -3,6 +3,7 @@ The following organizations or individuals have contributed to ScanCode: - Abhigyan Kumar Singh @Abhigyankrsingh - Abhishek Kumar @Abhishek-Dev09 - Aditya Viki @adityaviki +- Adrian Braemer @abraemer - Agni Bhattacharyya @PyAgni - Akanksha Garg @akugarg - Alex Blekhman @a-tinsmith From 8f7875b6fc162228ca40e56b798bb1fc1e6dfe5e Mon Sep 17 00:00:00 2001 From: Adrian Braemer Date: Mon, 17 Mar 2025 16:04:01 +0100 Subject: [PATCH 3/5] combine paths for different bzl versions * by combining the paths we ensure to extract maximal information * I also added the possibility to extract information from a 'package_url' field Signed-off-by: Adrian Braemer --- src/packagedcode/build.py | 68 ++++++------------- .../metadatabzl/with-package-url/METADATA.bzl | 12 ++++ tests/packagedcode/test_build.py | 22 ++++++ 3 files changed, 56 insertions(+), 46 deletions(-) create mode 100644 tests/packagedcode/data/build/metadatabzl/with-package-url/METADATA.bzl diff --git a/src/packagedcode/build.py b/src/packagedcode/build.py index c1cf7cb87a..6dc1624a18 100644 --- a/src/packagedcode/build.py +++ b/src/packagedcode/build.py @@ -13,6 +13,7 @@ from collections import defaultdict from commoncode import fileutils +from packageurl import PackageURL from licensedcode.cache import build_spdx_license_expression from licensedcode.cache import get_cache @@ -374,52 +375,27 @@ def parse(cls, location, package_only=True): ) ) - if ( - 'upstream_type' in metadata_fields - and 'name' in metadata_fields - and 'version' in metadata_fields - and 'licenses' in metadata_fields - and 'upstream_address' in metadata_fields - ): - # TODO: Create function that determines package type from download URL, - # then create a package of that package type from the metadata info - package_data = dict( - datasource_id=cls.datasource_id, - type=metadata_fields.get('upstream_type', cls.default_package_type), - name=metadata_fields.get('name'), - version=metadata_fields.get('version'), - extracted_license_statement=metadata_fields.get('licenses', []), - parties=parties, - homepage_url=metadata_fields.get('upstream_address', ''), - # TODO: Store 'upstream_hash` somewhere - ) - yield models.PackageData.from_data(package_data, package_only=True) - - if ( - 'package_type' in metadata_fields - and 'name' in metadata_fields - and 'version' in metadata_fields - and 'license_expression' in metadata_fields - and 'homepage_url' in metadata_fields - and 'download_url' in metadata_fields - and 'vcs_url' in metadata_fields - and 'download_archive_sha1' in metadata_fields - and 'vcs_commit_hash' in metadata_fields - ): - package_data = dict( - datasource_id=cls.datasource_id, - type=metadata_fields.get('package_type', cls.default_package_type), - name=metadata_fields.get('name'), - version=metadata_fields.get('version'), - extracted_license_statement=metadata_fields.get('license_expression', ''), - parties=parties, - homepage_url=metadata_fields.get('homepage_url', ''), - download_url=metadata_fields.get('download_url', ''), - vcs_url=metadata_fields.get('vcs_url', ''), - sha1=metadata_fields.get('download_archive_sha1', ''), - extra_data=dict(vcs_commit_hash=metadata_fields.get('vcs_commit_hash', '')) - ) - yield models.PackageData.from_data(package_data, package_only=True) + # TODO: Create function that determines package type from download URL, + # then create a package of that package type from the metadata info + package_data = dict( + datasource_id=cls.datasource_id, + type=metadata_fields.get('upstream_type', metadata_fields.get('package_type', cls.default_package_type)), + name=metadata_fields.get('name'), + version=metadata_fields.get('version'), + extracted_license_statement=metadata_fields.get('licenses', metadata_fields.get('license_expression')), + parties=parties, + homepage_url=metadata_fields.get('upstream_address', metadata_fields.get('homepage_url')), + download_url=metadata_fields.get('download_url'), + vcs_url=metadata_fields.get('vcs_url'), + sha1=metadata_fields.get('download_archive_sha1'), + # TODO: Store 'upstream_hash` somewhere + ) + if 'vcs_commit_hash' in metadata_fields: + package_data["extra_data"] = dict(vcs_commit_hash=metadata_fields['vcs_commit_hash']) + if 'package_url' in metadata_fields: + package_data.update(PackageURL.from_string(metadata_fields['package_url']).to_dict()) + yield models.PackageData.from_data(package_data, package_only=True) + @classmethod def assign_package_to_resources(cls, package, resource, codebase, package_adder): diff --git a/tests/packagedcode/data/build/metadatabzl/with-package-url/METADATA.bzl b/tests/packagedcode/data/build/metadatabzl/with-package-url/METADATA.bzl new file mode 100644 index 0000000000..80949f4cc2 --- /dev/null +++ b/tests/packagedcode/data/build/metadatabzl/with-package-url/METADATA.bzl @@ -0,0 +1,12 @@ +METADATA = { + "licenses": [ + "BSD-3-Clause", + ], + "maintainers": [ + "oss_foundation", + ], + "name": "androidx.compose.animation:animation", + "upstream_address": "https://developer.android.com/jetpack/androidx/releases/compose-animation#0.0.1", + "version": "0.0.1", + "package_url" : "pkg:maven/androidx.compose.animation/animation@0.0.1" +} \ No newline at end of file diff --git a/tests/packagedcode/test_build.py b/tests/packagedcode/test_build.py index 147a075b15..3d7ceb835a 100644 --- a/tests/packagedcode/test_build.py +++ b/tests/packagedcode/test_build.py @@ -103,6 +103,28 @@ def test_MetadataBzl_parse(self): ) expected_packages = [models.PackageData.from_data(package_data=package_data, package_only=True)] compare_package_results(expected_packages, result_packages) + + def test_MetadataBzl_parse_with_package_url(self): + test_file = self.get_test_loc('metadatabzl/with-package-url/METADATA.bzl') + result_packages = build.BuckMetadataBzlHandler.parse(test_file, package_only=True) + package_data = dict( + datasource_id=build.BuckMetadataBzlHandler.datasource_id, + name='animation', + namespace='androidx.compose.animation', + type='maven', + version='0.0.1', + extracted_license_statement=['BSD-3-Clause'], + parties=[ + models.Party( + type=models.party_org, + name='oss_foundation', + role='maintainer' + ) + ], + homepage_url='https://developer.android.com/jetpack/androidx/releases/compose-animation#0.0.1', + ) + expected_packages = [models.PackageData.from_data(package_data=package_data, package_only=True)] + compare_package_results(expected_packages, result_packages) def test_MetadataBzl_recognize_new_format(self): test_file = self.get_test_loc('metadatabzl/new-format/METADATA.bzl') From 658f0d8f0e227fdc6b7e5b440f3940cb99e1f401 Mon Sep 17 00:00:00 2001 From: Adrian Braemer Date: Fri, 21 Mar 2025 08:07:11 +0100 Subject: [PATCH 4/5] fix: minor changes to address review comments * always create extra_data dictionary * use get to extract information from metadatafields instead of branches * also extract upstream_hash Signed-off-by: Adrian Braemer --- src/packagedcode/build.py | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) diff --git a/src/packagedcode/build.py b/src/packagedcode/build.py index 6dc1624a18..2f2d964758 100644 --- a/src/packagedcode/build.py +++ b/src/packagedcode/build.py @@ -377,23 +377,47 @@ def parse(cls, location, package_only=True): # TODO: Create function that determines package type from download URL, # then create a package of that package type from the metadata info + + if 'upstream_type' in metadata_fields: + package_type = metadata_fields['upstream_type'] + elif 'package_type' in metadata_fields: + package_type = metadata_fields['package_type'] + else: + package_type = cls.default_package_type + + if 'licenses' in metadata_fields: + extracted_license_statement = metadata_fields['licenses'] + else: + extracted_license_statement = metadata_fields.get('license_expression') + + if 'upstream_address' in metadata_fields: + homepage_url = metadata_fields['upstream_address'] + else: + homepage_url = metadata_fields.get('homepage_url') + + + extra_data = {} + if 'vcs_commit_hash' in metadata_fields: + extra_data['vcs_commit_hash'] = metadata_fields['vcs_commit_hash'] + if 'upstream_hash' in metadata_fields: + extra_data['upstream_hash'] = metadata_fields['upstream_hash'] + package_data = dict( datasource_id=cls.datasource_id, - type=metadata_fields.get('upstream_type', metadata_fields.get('package_type', cls.default_package_type)), + type=package_type, name=metadata_fields.get('name'), version=metadata_fields.get('version'), - extracted_license_statement=metadata_fields.get('licenses', metadata_fields.get('license_expression')), + extracted_license_statement=extracted_license_statement, parties=parties, - homepage_url=metadata_fields.get('upstream_address', metadata_fields.get('homepage_url')), + homepage_url=homepage_url, download_url=metadata_fields.get('download_url'), vcs_url=metadata_fields.get('vcs_url'), sha1=metadata_fields.get('download_archive_sha1'), - # TODO: Store 'upstream_hash` somewhere + extra_data=extra_data ) - if 'vcs_commit_hash' in metadata_fields: - package_data["extra_data"] = dict(vcs_commit_hash=metadata_fields['vcs_commit_hash']) if 'package_url' in metadata_fields: package_data.update(PackageURL.from_string(metadata_fields['package_url']).to_dict()) + yield models.PackageData.from_data(package_data, package_only=True) From 454910fee6d154769be55fb72eae82444beb5f9a Mon Sep 17 00:00:00 2001 From: Adrian Braemer Date: Thu, 17 Apr 2025 16:30:28 +0200 Subject: [PATCH 5/5] fix: Adapt test to include new upstream hash As we now store the upstream hash in the field `extra_data`, we need to make the test aware of that. Signed-off-by: Adrian Braemer --- tests/packagedcode/test_build.py | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/packagedcode/test_build.py b/tests/packagedcode/test_build.py index 3d7ceb835a..11b2c8cb5f 100644 --- a/tests/packagedcode/test_build.py +++ b/tests/packagedcode/test_build.py @@ -99,6 +99,7 @@ def test_MetadataBzl_parse(self): role='maintainer' ) ], + extra_data=dict(upstream_hash='deadbeef'), homepage_url='https://github.com/example/example', ) expected_packages = [models.PackageData.from_data(package_data=package_data, package_only=True)]