Skip to content

Commit d61be1a

Browse files
authored
Add support cpan on demand collection (#693)
* Add support for cpan on demand collection Signed-off-by: Tushar Goel <[email protected]> * Fix license header file Signed-off-by: Tushar Goel <[email protected]> --------- Signed-off-by: Tushar Goel <[email protected]>
1 parent edff9e1 commit d61be1a

File tree

4 files changed

+528
-0
lines changed

4 files changed

+528
-0
lines changed

minecode/collectors/cpan.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import requests
12+
from packageurl import PackageURL
13+
14+
from minecode import priority_router
15+
from packagedb.models import PackageContentType
16+
17+
logger = logging.getLogger(__name__)
18+
handler = logging.StreamHandler()
19+
logger.addHandler(handler)
20+
logger.setLevel(logging.INFO)
21+
22+
23+
def get_cpan_release_json(distribution, version):
24+
"""
25+
Return the MetaCPAN release JSON for a given distribution@version.
26+
27+
Example:
28+
https://fastapi.metacpan.org/v1/release/_search?q=distribution:Mojolicious%20AND%20version:9.22
29+
30+
"""
31+
url = (
32+
f"https://fastapi.metacpan.org/v1/release/_search?"
33+
f"q=distribution:{distribution}%20AND%20version:{version}"
34+
)
35+
36+
try:
37+
response = requests.get(url)
38+
response.raise_for_status()
39+
results = response.json()
40+
hits = results.get("hits", {}).get("hits", [])
41+
if not hits:
42+
return None
43+
return hits[0].get("_source")
44+
except requests.exceptions.HTTPError as err:
45+
logger.error(f"HTTP error occurred: {err}")
46+
return None
47+
48+
49+
def map_cpan_package(package_url, pipelines, priority=0):
50+
"""
51+
Add a CPAN distribution `package_url` to the PackageDB.
52+
"""
53+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
54+
from minecode.miners.cpan import build_packages
55+
56+
name = package_url.name
57+
version = package_url.version
58+
release_json = get_cpan_release_json(name, version)
59+
60+
if not release_json:
61+
error = f"Distribution does not exist on CPAN: {package_url}"
62+
logger.error(error)
63+
return error
64+
65+
packages = build_packages(release_json, package_url)
66+
67+
error = None
68+
for package in packages:
69+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
70+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
71+
if error:
72+
break
73+
74+
if db_package:
75+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
76+
77+
return error
78+
79+
80+
@priority_router.route("pkg:cpan/.*")
81+
def process_request(purl_str, **kwargs):
82+
"""
83+
Process CPAN Package URL (PURL).
84+
"""
85+
from minecode.model_utils import DEFAULT_PIPELINES
86+
87+
addon_pipelines = kwargs.get("addon_pipelines", [])
88+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
89+
priority = kwargs.get("priority", 0)
90+
91+
package_url = PackageURL.from_string(purl_str)
92+
error_msg = map_cpan_package(package_url, pipelines, priority)
93+
94+
if error_msg:
95+
return error_msg

minecode/miners/cpan.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,3 +484,84 @@ def get_vcs_repo_fromstring(content):
484484
return "git", repo[repo.index("<") + 1 : repo.index(">")]
485485
else:
486486
return None, None
487+
488+
489+
def build_packages(release_json, purl):
490+
"""
491+
Yield ScannedPackage built from MetaCPAN release API.
492+
493+
Example release_json (_source):
494+
{
495+
"name": "Mojolicious-9.22",
496+
"distribution": "Mojolicious",
497+
"version": "9.22",
498+
"abstract": "A next-generation web framework for Perl",
499+
"license": ["perl_5"],
500+
"author": "SRI",
501+
"resources": {
502+
"homepage": "https://mojolicious.org",
503+
"repository": { "url": "https://github.com/mojolicious/mojo" }
504+
},
505+
"download_url": "https://cpan.metacpan.org/authors/id/S/SR/SRI/Mojolicious-9.22.tar.gz"
506+
}
507+
"""
508+
name = release_json.get("distribution") or purl.name
509+
version = release_json.get("version")
510+
description = release_json.get("abstract")
511+
release_date = release_json.get("date")
512+
license_list = release_json.get("license", [])
513+
514+
resources = release_json.get("resources", {})
515+
homepage_url = resources.get("homepage")
516+
repo = resources.get("repository", {})
517+
bugtracker = resources.get("bugtracker", {})
518+
519+
vcs_url = None
520+
if repo and repo.get("url"):
521+
vcs_url = repo.get("url")
522+
523+
parties = []
524+
author = release_json.get("author")
525+
if author:
526+
parties.append(scan_models.Party(name=author, role="author"))
527+
528+
download_url = release_json.get("download_url")
529+
size = release_json.get("stat", {}).get("size")
530+
md5 = release_json.get("checksum_md5")
531+
sha256 = release_json.get("checksum_sha256")
532+
533+
keywords = release_json.get("keywords") or []
534+
535+
common_data = dict(
536+
name=name,
537+
version=version,
538+
primary_language="Perl",
539+
description=description,
540+
release_date=release_date,
541+
homepage_url=homepage_url,
542+
vcs_url=vcs_url,
543+
bug_tracking_url=bugtracker.get("web"),
544+
code_view_url=repo.get("web"),
545+
repository_homepage_url=f"https://metacpan.org/release/{name}",
546+
repository_download_url=download_url,
547+
api_data_url=f"https://fastapi.metacpan.org/v1/release/{name}",
548+
extracted_license_statement=license_list,
549+
declared_license_expression=" OR ".join(license_list) if license_list else None,
550+
parties=parties,
551+
keywords=keywords,
552+
size=size,
553+
md5=md5,
554+
sha256=sha256,
555+
)
556+
557+
download_data = dict(
558+
datasource_id="cpan_pkginfo",
559+
type="cpan",
560+
download_url=download_url,
561+
)
562+
download_data.update(common_data)
563+
564+
package = scan_models.PackageData.from_data(download_data)
565+
package.datasource_id = "cpan_api_metadata"
566+
package.set_purl(purl)
567+
yield package
Lines changed: 60 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,60 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import os
12+
13+
from django.test import TestCase as DjangoTestCase
14+
from packageurl import PackageURL
15+
16+
import packagedb
17+
from minecode.collectors import cpan
18+
from minecode.utils_test import JsonBasedTesting
19+
20+
21+
class CpanPriorityQueueTests(JsonBasedTesting, DjangoTestCase):
22+
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")
23+
24+
def setUp(self):
25+
super().setUp()
26+
self.expected_json_loc = self.get_test_loc("cpan/Mojolicious-9.22.json")
27+
with open(self.expected_json_loc) as f:
28+
self.expected_json_contents = json.load(f)
29+
30+
def test_get_cpan_release_json(self):
31+
"""
32+
Verify get_cpan_release_json() returns expected keys for CPAN distribution.
33+
"""
34+
json_contents = cpan.get_cpan_release_json(distribution="Mojolicious", version="9.22")
35+
self.assertIn("distribution", json_contents)
36+
self.assertEqual("Mojolicious", json_contents["distribution"])
37+
self.assertEqual("9.22", json_contents["version"])
38+
39+
def test_map_cpan_package(self):
40+
"""
41+
Verify map_cpan_package() creates a Package in the DB with correct PURL
42+
and download URL.
43+
"""
44+
package_count = packagedb.models.Package.objects.all().count()
45+
self.assertEqual(0, package_count)
46+
47+
package_url = PackageURL.from_string("pkg:cpan/[email protected]")
48+
cpan.map_cpan_package(package_url, ("test_pipeline",))
49+
50+
package_count = packagedb.models.Package.objects.all().count()
51+
self.assertEqual(1, package_count)
52+
53+
package = packagedb.models.Package.objects.all().first()
54+
expected_purl_str = "pkg:cpan/[email protected]"
55+
expected_download_url = (
56+
"https://cpan.metacpan.org/authors/id/S/SR/SRI/Mojolicious-9.22.tar.gz"
57+
)
58+
59+
self.assertEqual(expected_purl_str, package.purl)
60+
self.assertEqual(expected_download_url, package.download_url)

0 commit comments

Comments
 (0)