Skip to content

Commit 32e3817

Browse files
authored
Add support for Conda on-demand data collection (#713)
1 parent 8dde864 commit 32e3817

File tree

8 files changed

+246105
-0
lines changed

8 files changed

+246105
-0
lines changed

minecode/collectors/conda.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from urllib.parse import urljoin
12+
13+
import requests
14+
from packageurl import PackageURL
15+
from minecode import priority_router
16+
from minecode.miners.conda import build_packages
17+
from minecode.utils import fetch_http, get_temp_file
18+
from packagedb.models import PackageContentType
19+
from packageurl.contrib.purl2url import build_conda_download_url
20+
21+
logger = logging.getLogger(__name__)
22+
handler = logging.StreamHandler()
23+
logger.addHandler(handler)
24+
logger.setLevel(logging.INFO)
25+
26+
27+
def map_conda_package(package_url, pipelines, priority=0):
28+
"""
29+
Add a Conda distribution `package_url` to the PackageDB.
30+
"""
31+
from minecode.model_utils import add_package_to_scan_queue
32+
from minecode.model_utils import merge_or_create_package
33+
34+
download_url = build_conda_download_url(str(package_url))
35+
if not download_url:
36+
return None
37+
38+
package_identifier = download_url.split("/")[-1]
39+
package_indexes_url = urljoin(download_url, "./repodata.json.bz2")
40+
41+
content = fetch_http(package_indexes_url)
42+
location = get_temp_file("NonPersistentHttpVisitor")
43+
with open(location, "wb") as tmp:
44+
tmp.write(content)
45+
46+
package_info = None
47+
if package_url.namespace == "conda-forge":
48+
package_info = get_package_info(package_url.name)
49+
packages = build_packages(location, download_url, package_info, package_identifier, package_url)
50+
51+
error = None
52+
for package in packages:
53+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
54+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
55+
if error:
56+
break
57+
58+
if db_package:
59+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
60+
61+
return error
62+
63+
64+
def get_package_info(name):
65+
url = f"https://api.anaconda.org/package/conda-forge/{name}"
66+
try:
67+
response = requests.get(url)
68+
response.raise_for_status()
69+
return response.json()
70+
except requests.exceptions.HTTPError as err:
71+
logger.error(f"HTTP error occurred: {err}")
72+
return None
73+
74+
75+
@priority_router.route("pkg:conda/.*")
76+
def process_request(purl_str, **kwargs):
77+
"""
78+
Process Conda Package URL (PURL).
79+
"""
80+
from minecode.model_utils import DEFAULT_PIPELINES
81+
82+
addon_pipelines = kwargs.get("addon_pipelines", [])
83+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
84+
priority = kwargs.get("priority", 0)
85+
86+
package_url = PackageURL.from_string(purl_str)
87+
error_msg = map_conda_package(package_url, pipelines, priority)
88+
89+
if error_msg:
90+
return error_msg

minecode/miners/conda.py

Lines changed: 81 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,81 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import bz2
11+
import json
12+
import packagedcode.models as scan_models
13+
from packageurl import PackageURL
14+
15+
16+
def build_packages(location, download_url, package_info, package_identifier, package_url):
17+
"""
18+
Yield ScannedPackage built from Conda API.
19+
"""
20+
with bz2.open(location, "rt") as f:
21+
repodata = json.load(f)
22+
23+
metadata_dict = repodata["packages"].get(package_identifier)
24+
if package_identifier.endswith(".conda"):
25+
metadata_dict = repodata["packages.conda"].get(package_identifier)
26+
27+
if not metadata_dict:
28+
return
29+
30+
download_data = dict(
31+
datasource_id="conda_api_metadata",
32+
type="conda",
33+
download_url=download_url,
34+
)
35+
36+
extracted_license_statement = []
37+
license = metadata_dict.get("license")
38+
if license:
39+
extracted_license_statement.append(license)
40+
41+
dependencies = []
42+
for dep in metadata_dict.get("depends", []):
43+
parts = dep.split()
44+
name = parts[0]
45+
46+
dep_purl = PackageURL(type="conan", name=name)
47+
dep = scan_models.DependentPackage(purl=dep_purl.to_string())
48+
dependencies.append(dep)
49+
50+
common_data = dict(
51+
name=package_url.name,
52+
namespace=package_url.namespace,
53+
version=package_url.version,
54+
sha256=metadata_dict.get("sha256"),
55+
md5=metadata_dict.get("md5"),
56+
size=metadata_dict.get("size"),
57+
extracted_license_statement=extracted_license_statement,
58+
dependencies=dependencies,
59+
)
60+
61+
if package_url.namespace == "conda-forge" and package_info:
62+
description = package_info.get("description") or package_info.get("summary")
63+
html_url = package_info.get("html_url")
64+
dev_url = package_info.get("dev_url")
65+
66+
license_conda_forge = package_info.get("license")
67+
if license_conda_forge:
68+
common_data["extracted_license_statement"].append(license_conda_forge)
69+
70+
conda_forge_data = dict(
71+
description=description,
72+
homepage_url=html_url,
73+
repository_homepage_url=dev_url,
74+
)
75+
76+
download_data.update(conda_forge_data)
77+
78+
download_data.update(common_data)
79+
package = scan_models.PackageData.from_data(download_data)
80+
package.set_purl(package_url)
81+
yield package
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import os
11+
from django.test import TestCase
12+
from packageurl import PackageURL
13+
import packagedb
14+
from minecode.collectors import conda
15+
from minecode.utils_test import JsonBasedTesting
16+
17+
18+
class CondaPriorityQueueTests(JsonBasedTesting, TestCase):
19+
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")
20+
21+
def setUp(self):
22+
super().setUp()
23+
self.package_url = PackageURL.from_string(
24+
"pkg:conda/[email protected]?subdir=linux-64&build=py27h1b885b7_8&type=tar.bz2"
25+
)
26+
self.download_url = (
27+
"https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.11.3-py27h1b885b7_8.tar.bz2"
28+
)
29+
30+
def test_map_conda_package(self):
31+
package_count = packagedb.models.Package.objects.all().count()
32+
self.assertEqual(package_count, 0)
33+
34+
conda.map_conda_package(self.package_url, ("test_pipelines"))
35+
package_count = packagedb.models.Package.objects.all().count()
36+
self.assertEqual(package_count, 1)
37+
package = packagedb.models.Package.objects.all().first()
38+
expected_conda_download_url = self.download_url
39+
40+
self.assertEqual(package.purl, str(self.package_url))
41+
self.assertEqual(package.download_url, expected_conda_download_url)
Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
import json
10+
import os
11+
from packageurl import PackageURL
12+
from minecode.miners import conda
13+
from minecode.tests import FIXTURES_REGEN
14+
from minecode.utils_test import JsonBasedTesting
15+
from django.test import TestCase as DjangoTestCase
16+
17+
18+
class CondaMapperTest(JsonBasedTesting, DjangoTestCase):
19+
test_data_dir = os.path.join(os.path.dirname(os.path.dirname(__file__)), "testfiles")
20+
21+
def test_build_packages_metafile_conda1(self):
22+
package_url1 = PackageURL.from_string(
23+
"pkg:conda/[email protected]?subdir=linux-64&build=py27h1b885b7_8&type=conda"
24+
)
25+
package_identifier1 = "numpy-1.11.3-py27h1b885b7_8.conda"
26+
package_info1 = None
27+
download_url1 = (
28+
"https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.11.3-py27h1b885b7_8.conda"
29+
)
30+
location1 = self.get_test_loc("conda/repodata.json.bz2")
31+
32+
result = conda.build_packages(
33+
location1, download_url1, package_info1, package_identifier1, package_url1
34+
)
35+
result = [p.to_dict() for p in result]
36+
expected_loc = self.get_test_loc("conda/mapper_numpy_expected.json")
37+
self.check_expected_results(result, expected_loc, regen=FIXTURES_REGEN)
38+
39+
def test_build_packages_metafile_conda2(self):
40+
package_url2 = PackageURL.from_string(
41+
"pkg:conda/conda-forge/[email protected]?subdir=linux-64&build=py27hb0a01da_0&type=tar.bz2"
42+
)
43+
package_identifier2 = "sqlalchemy-1.1.13-py27hb0a01da_0.tar.bz2"
44+
45+
with open(self.get_test_loc("conda/package_info_sqlalchemy.json")) as f:
46+
package_info2 = json.load(f)
47+
48+
download_url2 = (
49+
"https://repo.anaconda.com/pkgs/main/linux-64/sqlalchemy-1.1.13-py27hb0a01da_0.tar.bz2"
50+
)
51+
location2 = self.get_test_loc("conda/repodata.json.bz2")
52+
53+
result = conda.build_packages(
54+
location2, download_url2, package_info2, package_identifier2, package_url2
55+
)
56+
result = [p.to_dict() for p in result]
57+
expected_loc = self.get_test_loc("conda/mapper_sqlalchemy_expected.json")
58+
self.check_expected_results(result, expected_loc, regen=FIXTURES_REGEN)
Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,118 @@
1+
[
2+
{
3+
"api_data_url": null,
4+
"bug_tracking_url": null,
5+
"code_view_url": null,
6+
"copyright": null,
7+
"datasource_id": "conda_api_metadata",
8+
"declared_license_expression": "bsd-new",
9+
"declared_license_expression_spdx": "BSD-3-Clause",
10+
"dependencies": [
11+
{
12+
"extra_data": {},
13+
"extracted_requirement": null,
14+
"is_direct": true,
15+
"is_optional": false,
16+
"is_pinned": false,
17+
"is_runtime": true,
18+
"purl": "pkg:conan/libgcc-ng",
19+
"resolved_package": {},
20+
"scope": null
21+
},
22+
{
23+
"extra_data": {},
24+
"extracted_requirement": null,
25+
"is_direct": true,
26+
"is_optional": false,
27+
"is_pinned": false,
28+
"is_runtime": true,
29+
"purl": "pkg:conan/libgfortran-ng",
30+
"resolved_package": {},
31+
"scope": null
32+
},
33+
{
34+
"extra_data": {},
35+
"extracted_requirement": null,
36+
"is_direct": true,
37+
"is_optional": false,
38+
"is_pinned": false,
39+
"is_runtime": true,
40+
"purl": "pkg:conan/numpy-base",
41+
"resolved_package": {},
42+
"scope": null
43+
},
44+
{
45+
"extra_data": {},
46+
"extracted_requirement": null,
47+
"is_direct": true,
48+
"is_optional": false,
49+
"is_pinned": false,
50+
"is_runtime": true,
51+
"purl": "pkg:conan/python",
52+
"resolved_package": {},
53+
"scope": null
54+
}
55+
],
56+
"description": null,
57+
"download_url": "https://repo.anaconda.com/pkgs/main/linux-64/numpy-1.11.3-py27h1b885b7_8.conda",
58+
"extra_data": {},
59+
"extracted_license_statement": "- BSD 3-Clause\n",
60+
"file_references": [],
61+
"holder": null,
62+
"homepage_url": null,
63+
"is_private": false,
64+
"is_virtual": false,
65+
"keywords": [],
66+
"license_detections": [
67+
{
68+
"identifier": "bsd_new-50fa5753-f24d-ec04-33a1-36bb8ac0492c",
69+
"license_expression": "bsd-new",
70+
"license_expression_spdx": "BSD-3-Clause",
71+
"matches": [
72+
{
73+
"end_line": 1,
74+
"from_file": null,
75+
"license_expression": "bsd-new",
76+
"license_expression_spdx": "BSD-3-Clause",
77+
"match_coverage": 100.0,
78+
"matched_length": 3,
79+
"matched_text": "BSD 3-Clause",
80+
"matcher": "1-hash",
81+
"rule_identifier": "bsd-new_10.RULE",
82+
"rule_relevance": 100,
83+
"rule_url": "https://github.com/nexB/scancode-toolkit/tree/develop/src/licensedcode/data/rules/bsd-new_10.RULE",
84+
"score": 100.0,
85+
"start_line": 1
86+
}
87+
]
88+
}
89+
],
90+
"md5": "57d14eb0098432d8a03d87bb09ab3fa4",
91+
"name": "numpy",
92+
"namespace": null,
93+
"notice_text": null,
94+
"other_license_detections": [],
95+
"other_license_expression": null,
96+
"other_license_expression_spdx": null,
97+
"parties": [],
98+
"primary_language": null,
99+
"purl": "pkg:conda/[email protected]?build=py27h1b885b7_8&subdir=linux-64&type=conda",
100+
"qualifiers": {
101+
"build": "py27h1b885b7_8",
102+
"subdir": "linux-64",
103+
"type": "conda"
104+
},
105+
"release_date": null,
106+
"repository_download_url": null,
107+
"repository_homepage_url": null,
108+
"sha1": null,
109+
"sha256": "fabbdc2d870a26bf24707e301da84377d0aae09f9a97add4cca2a53e075c57ed",
110+
"sha512": null,
111+
"size": 10127,
112+
"source_packages": [],
113+
"subpath": null,
114+
"type": "conda",
115+
"vcs_url": null,
116+
"version": "1.11.3"
117+
}
118+
]

0 commit comments

Comments
 (0)