Skip to content

Commit c36c2dc

Browse files
add on demand package data collection for nuget #597 (#606)
* Fixed #597 - Add on-demand package data collection for nuget Signed-off-by: Chin Yeung Li <[email protected]> * #597 - ran `make valid` to fix code fomat Signed-off-by: Chin Yeung Li <[email protected]> * Update minecode/tests/collectors/test_nuget.py Signed-off-by: Chin Yeung Li <[email protected]> Co-authored-by: Jono Yang <[email protected]> * Update minecode/collectors/nuget.py Signed-off-by: Chin Yeung Li <[email protected]> Co-authored-by: Jono Yang <[email protected]> --------- Signed-off-by: Chin Yeung Li <[email protected]> Co-authored-by: Jono Yang <[email protected]>
1 parent b1da1fc commit c36c2dc

File tree

4 files changed

+159
-0
lines changed

4 files changed

+159
-0
lines changed

minecode/collectors/nuget.py

Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
12+
import requests
13+
from packageurl import PackageURL
14+
15+
from minecode import priority_router
16+
from minecode.miners.nuget import build_packages_with_json
17+
from packagedb.models import PackageContentType
18+
19+
"""
20+
Collect Nuget packages from nuget registries.
21+
"""
22+
23+
logger = logging.getLogger(__name__)
24+
handler = logging.StreamHandler()
25+
logger.addHandler(handler)
26+
logger.setLevel(logging.INFO)
27+
28+
29+
def get_package_json(name):
30+
"""
31+
Return the contents of the JSON file of the package.
32+
"""
33+
# Create URLs using project name.
34+
url = f"https://api.nuget.org/v3/registration5-gz-semver2/{name}/index.json"
35+
36+
try:
37+
response = requests.get(url)
38+
response.raise_for_status()
39+
return response.json()
40+
except requests.exceptions.HTTPError as err:
41+
logger.error(f"HTTP error occurred: {err}")
42+
43+
44+
def map_nuget_package(package_url, pipelines, priority=0):
45+
"""
46+
Add a nuget `package_url` to the PackageDB.
47+
48+
Return an error string if any errors are encountered during the process
49+
"""
50+
from minecode.model_utils import add_package_to_scan_queue
51+
from minecode.model_utils import merge_or_create_package
52+
53+
error = ""
54+
package_json = get_package_json(name=package_url.name.lower())
55+
56+
if not package_json:
57+
error = f"Package does not exist on nuget.org: {package_url}"
58+
logger.error(error)
59+
return error
60+
61+
packages_metadata = package_json.get("items")[0].get("items")
62+
63+
for package_metadata in packages_metadata:
64+
metadata = package_metadata["catalogEntry"]
65+
if package_url.version:
66+
if metadata.get("version") == package_url.version:
67+
built_package = build_packages_with_json(metadata, package_url)
68+
else:
69+
continue
70+
else:
71+
built_package = build_packages_with_json(metadata, package_url)
72+
73+
for package in built_package:
74+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
75+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
76+
if error:
77+
break
78+
79+
# Submit package for scanning
80+
if db_package:
81+
add_package_to_scan_queue(
82+
package=db_package, pipelines=pipelines, priority=priority
83+
)
84+
85+
return error
86+
87+
88+
@priority_router.route("pkg:nuget/.*")
89+
def process_request(purl_str, **kwargs):
90+
"""
91+
Process `priority_resource_uri` containing a nuget Package URL (PURL) as a
92+
URI.
93+
94+
This involves obtaining Package information for the PURL from nuget and
95+
using it to create a new PackageDB entry. The package is then added to the
96+
scan queue afterwards.
97+
"""
98+
from minecode.model_utils import DEFAULT_PIPELINES
99+
100+
addon_pipelines = kwargs.get("addon_pipelines", [])
101+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
102+
priority = kwargs.get("priority", 0)
103+
104+
package_url = PackageURL.from_string(purl_str)
105+
106+
error_msg = map_nuget_package(package_url, pipelines, priority)
107+
108+
if error_msg:
109+
return error_msg

minecode/miners/nuget.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,8 @@ def build_packages_with_json(metadata, purl=None):
217217
copyright=copyr,
218218
parties=authors,
219219
keywords=keywords,
220+
declared_license_expression=metadata.get("licenseExpression"),
221+
download_url=metadata.get("packageContent"),
220222
)
221223
package = scan_models.PackageData.from_data(package_data=package_mapping)
222224
package.set_purl(purl)
Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/nexB/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import json
11+
import os
12+
13+
from django.test import TestCase as DjangoTestCase
14+
15+
from packageurl import PackageURL
16+
17+
import packagedb
18+
from minecode.collectors import nuget
19+
from minecode.utils_test import JsonBasedTesting
20+
21+
22+
class NugetPriorityQueueTests(JsonBasedTesting, DjangoTestCase):
23+
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")
24+
25+
def setUp(self):
26+
super().setUp()
27+
self.expected_json_loc = self.get_test_loc("nuget/entityframework2.json")
28+
with open(self.expected_json_loc) as f:
29+
self.expected_json_contents = json.load(f)
30+
31+
def test_get_package_json(self):
32+
json_contents = nuget.get_package_json(name="entityframework")
33+
expected_id = "https://api.nuget.org/v3/registration5-gz-semver2/entityframework/index.json"
34+
self.assertEqual(json_contents.get("@id"), expected_id)
35+
36+
def test_map_nuget_package(self):
37+
package_count = packagedb.models.Package.objects.all().count()
38+
self.assertEqual(0, package_count)
39+
package_url = PackageURL.from_string("pkg:nuget/[email protected]")
40+
nuget.map_nuget_package(package_url, ("test_pipeline"))
41+
package_count = packagedb.models.Package.objects.all().count()
42+
self.assertEqual(1, package_count)
43+
package = packagedb.models.Package.objects.all().first()
44+
expected_purl_str = "pkg:nuget/[email protected]"
45+
expected_download_url = "https://api.nuget.org/v3-flatcontainer/entityframework/6.1.3/entityframework.6.1.3.nupkg"
46+
self.assertEqual(expected_purl_str, package.purl)
47+
self.assertEqual(expected_download_url, package.download_url)

packagedb/api.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -975,6 +975,7 @@ def _reindex_package(package, reindexed_packages, **kwargs):
975975
"pypi",
976976
"cargo",
977977
"gem",
978+
"nuget",
978979
]
979980

980981
unique_packages, unsupported_packages, unsupported_vers = get_resolved_packages(

0 commit comments

Comments
 (0)