Skip to content

Commit 44c77b5

Browse files
authored
Add composer collector (#689)
* Add composer collector Signed-off-by: Tushar Goel <[email protected]> * Push test JSON files Signed-off-by: Tushar Goel <[email protected]> --------- Signed-off-by: Tushar Goel <[email protected]>
1 parent e23ede7 commit 44c77b5

File tree

4 files changed

+5360
-0
lines changed

4 files changed

+5360
-0
lines changed

minecode/collectors/composer.py

Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import requests
12+
from packageurl import PackageURL
13+
from minecode.miners.composer import build_packages
14+
15+
from minecode import priority_router
16+
from packagedb.models import PackageContentType
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def get_composer_package_json(name):
25+
"""
26+
Return the contents of the JSON file of the package from Packagist.
27+
Example: https://repo.packagist.org/p2/laravel/laravel.json
28+
"""
29+
url = f"https://repo.packagist.org/p2/{name}.json"
30+
31+
try:
32+
response = requests.get(url)
33+
response.raise_for_status()
34+
return response.json()
35+
except requests.exceptions.HTTPError as err:
36+
logger.error(f"HTTP error occurred: {err}")
37+
38+
39+
def map_composer_package(package_url, pipelines, priority=0):
40+
"""
41+
Add a composer `package_url` to the PackageDB.
42+
"""
43+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
44+
45+
namespace = package_url.namespace
46+
name = package_url.name
47+
48+
package_name = f"{namespace}/{name}" if namespace else name
49+
50+
package_json = get_composer_package_json(name=package_name)
51+
52+
if not package_json:
53+
error = f"Package does not exist on packagist.org: {package_url}"
54+
logger.error(error)
55+
return error
56+
57+
packages = build_packages(package_json, package_url)
58+
59+
error = None
60+
for package in packages:
61+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
62+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
63+
if error:
64+
break
65+
66+
if db_package:
67+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
68+
69+
return error
70+
71+
72+
@priority_router.route("pkg:composer/.*")
73+
def process_request(purl_str, **kwargs):
74+
"""
75+
Process `priority_resource_uri` containing a composer Package URL (PURL).
76+
"""
77+
from minecode.model_utils import DEFAULT_PIPELINES
78+
79+
addon_pipelines = kwargs.get("addon_pipelines", [])
80+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
81+
priority = kwargs.get("priority", 0)
82+
83+
package_url = PackageURL.from_string(purl_str)
84+
85+
error_msg = map_composer_package(package_url, pipelines, priority)
86+
87+
if error_msg:
88+
return error_msg

minecode/miners/composer.py

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,74 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
from packagedcode import models as scan_models
11+
12+
13+
def build_packages(metadata_dict, purl):
14+
"""
15+
Yield ScannedPackage built from packagist.org API.
16+
17+
metadata_dict format:
18+
{
19+
"packages": {
20+
"vendor/package": [
21+
{ version metadata... }
22+
]
23+
}
24+
}
25+
"""
26+
purl_version = purl.version
27+
package_name = f"{purl.namespace}/{purl.name}" if purl.namespace else purl.name
28+
29+
packages = metadata_dict.get("packages") or {}
30+
versions = packages.get(package_name) or []
31+
32+
for version_info in versions:
33+
version_normalized = version_info.get("version_normalized")
34+
version = version_info.get("version")
35+
if purl_version and not (purl_version == version or purl_version == version_normalized):
36+
continue
37+
38+
description = version_info.get("description")
39+
homepage_url = version_info.get("homepage")
40+
repository_url = version_info.get("source", {}).get("url")
41+
42+
extracted_license_statement = version_info.get("license") or []
43+
44+
authors = version_info.get("authors", [])
45+
parties = []
46+
for author in authors:
47+
parties.append(scan_models.Party(name=author.get("name"), role="author"))
48+
49+
dist = version_info.get("dist", {})
50+
download_url = dist.get("url")
51+
sha1 = dist.get("shasum")
52+
53+
common_data = dict(
54+
name=purl.name,
55+
version=version,
56+
description=description,
57+
homepage_url=homepage_url,
58+
repository_homepage_url=repository_url,
59+
extracted_license_statement=extracted_license_statement,
60+
parties=parties,
61+
)
62+
63+
if download_url:
64+
download_data = dict(
65+
datasource_id="composer_pkginfo",
66+
type="composer",
67+
download_url=download_url,
68+
sha1=sha1,
69+
)
70+
download_data.update(common_data)
71+
package = scan_models.PackageData.from_data(download_data)
72+
package.datasource_id = "composer_api_metadata"
73+
package.set_purl(purl)
74+
yield package
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import os
12+
13+
from django.test import TestCase as DjangoTestCase
14+
from packageurl import PackageURL
15+
16+
import packagedb
17+
from minecode.collectors import composer
18+
from minecode.utils_test import JsonBasedTesting
19+
20+
21+
class ComposerPriorityQueueTests(JsonBasedTesting, DjangoTestCase):
22+
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")
23+
24+
def setUp(self):
25+
super().setUp()
26+
# Sample Packagist metadata for laravel/laravel
27+
self.expected_json_loc = self.get_test_loc("composer/laravel-laravel.json")
28+
with open(self.expected_json_loc) as f:
29+
self.expected_json_contents = json.load(f)
30+
31+
def test_get_package_json(self):
32+
"""
33+
Verify that get_composer_package_json() fetches metadata and contains
34+
the expected "packages" structure, with laravel/laravel present.
35+
"""
36+
json_contents = composer.get_composer_package_json(name="laravel/laravel")
37+
self.assertIn("packages", json_contents)
38+
self.assertIn("laravel/laravel", json_contents["packages"])
39+
40+
def test_map_composer_package(self):
41+
"""
42+
Verify that map_composer_package() creates a Package in the DB with the
43+
correct PURL and download URL from Packagist metadata.
44+
"""
45+
package_count = packagedb.models.Package.objects.all().count()
46+
self.assertEqual(0, package_count)
47+
48+
package_url = PackageURL.from_string("pkg:composer/laravel/[email protected]")
49+
composer.map_composer_package(package_url, ("test_pipeline",))
50+
51+
package_count = packagedb.models.Package.objects.all().count()
52+
self.assertEqual(1, package_count)
53+
54+
package = packagedb.models.Package.objects.all().first()
55+
expected_purl_str = "pkg:composer/laravel/[email protected]"
56+
57+
# dist.url from Packagist metadata is expected to be something like:
58+
# https://api.github.com/repos/laravel/laravel/zipball/<commit>
59+
self.assertEqual(expected_purl_str, package.purl)
60+
self.assertTrue(package.download_url.startswith("https://"))
61+
self.assertIn("laravel", package.download_url.lower())

0 commit comments

Comments
 (0)