Skip to content

Commit a35a351

Browse files
committed
Merge branch 'main' into 660-purl-next-maven
2 parents 9449ee2 + c56e390 commit a35a351

34 files changed

+464479
-2
lines changed

minecode/collectors/alpine.py

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from packageurl import PackageURL
12+
from minecode import priority_router
13+
from minecode.miners.alpine import build_packages
14+
from minecode.utils import fetch_http, get_temp_file
15+
from minecode.utils import extract_file
16+
from packagedb.models import PackageContentType
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def map_apk_package(package_url, pipelines, priority=0):
25+
"""
26+
Add a Alpine Linux ( APK ) distribution `package_url` to the PackageDB.
27+
"""
28+
from minecode.model_utils import add_package_to_scan_queue
29+
from minecode.model_utils import merge_or_create_package
30+
31+
name = package_url.name
32+
version = package_url.version
33+
arch = package_url.qualifiers.get("arch")
34+
repo = package_url.qualifiers.get("repo")
35+
alpine_version = package_url.qualifiers.get("alpine_version")
36+
37+
if not name or not version or not arch or not repo or not alpine_version:
38+
return None
39+
40+
download_url = (
41+
f"https://dl-cdn.alpinelinux.org/alpine/{alpine_version}/{repo}/{arch}/APKINDEX.tar.gz"
42+
)
43+
apk_download_url = (
44+
f"https://dl-cdn.alpinelinux.org/alpine/{alpine_version}/{repo}/{arch}/{name}-{version}.apk"
45+
)
46+
47+
content = fetch_http(download_url)
48+
location = get_temp_file("NonPersistentHttpVisitor")
49+
with open(location, "wb") as tmp:
50+
tmp.write(content)
51+
52+
extracted_location = extract_file(location)
53+
54+
packages = build_packages(extracted_location, apk_download_url, package_url)
55+
56+
error = None
57+
for package in packages:
58+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
59+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
60+
if error:
61+
break
62+
63+
if db_package:
64+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
65+
66+
return error
67+
68+
69+
@priority_router.route("pkg:apk/.*")
70+
def process_request(purl_str, **kwargs):
71+
"""
72+
Process Alpine Linux ( APK ) Package URL (PURL).
73+
"""
74+
from minecode.model_utils import DEFAULT_PIPELINES
75+
76+
addon_pipelines = kwargs.get("addon_pipelines", [])
77+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
78+
priority = kwargs.get("priority", 0)
79+
80+
package_url = PackageURL.from_string(purl_str)
81+
error_msg = map_apk_package(package_url, pipelines, priority)
82+
83+
if error_msg:
84+
return error_msg

minecode/collectors/alpm.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from packageurl import PackageURL
12+
from minecode import priority_router
13+
from minecode.miners.alpm import build_packages
14+
from minecode.utils import fetch_http, get_temp_file
15+
from minecode.utils import extract_file
16+
from packagedb.models import PackageContentType
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def map_alpm_package(package_url, pipelines, priority=0):
25+
"""
26+
Add a Arch Linux distribution `package_url` to the PackageDB.
27+
"""
28+
from minecode.model_utils import add_package_to_scan_queue
29+
from minecode.model_utils import merge_or_create_package
30+
31+
name = package_url.name
32+
version = package_url.version
33+
arch = package_url.qualifiers.get("arch", "any")
34+
first_letter = name[0]
35+
36+
if not name or not version:
37+
return None
38+
39+
download_url = f"https://archive.archlinux.org/packages/{first_letter}/{name}/{name}-{version}-{arch}.pkg.tar.zst"
40+
content = fetch_http(download_url)
41+
location = get_temp_file("NonPersistentHttpVisitor")
42+
with open(location, "wb") as tmp:
43+
tmp.write(content)
44+
45+
extracted_location = extract_file(location)
46+
47+
packages = build_packages(extracted_location, download_url, package_url)
48+
49+
error = None
50+
for package in packages:
51+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
52+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
53+
if error:
54+
break
55+
56+
if db_package:
57+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
58+
59+
return error
60+
61+
62+
@priority_router.route("pkg:alpm/.*")
63+
def process_request(purl_str, **kwargs):
64+
"""
65+
Process Arch Linux ( Alpm ) Package URL (PURL).
66+
"""
67+
from minecode.model_utils import DEFAULT_PIPELINES
68+
69+
addon_pipelines = kwargs.get("addon_pipelines", [])
70+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
71+
priority = kwargs.get("priority", 0)
72+
73+
package_url = PackageURL.from_string(purl_str)
74+
error_msg = map_alpm_package(package_url, pipelines, priority)
75+
76+
if error_msg:
77+
return error_msg

minecode/collectors/conda.py

Lines changed: 90 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,90 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from urllib.parse import urljoin
12+
13+
import requests
14+
from packageurl import PackageURL
15+
from minecode import priority_router
16+
from minecode.miners.conda import build_packages
17+
from minecode.utils import fetch_http, get_temp_file
18+
from packagedb.models import PackageContentType
19+
from packageurl.contrib.purl2url import build_conda_download_url
20+
21+
logger = logging.getLogger(__name__)
22+
handler = logging.StreamHandler()
23+
logger.addHandler(handler)
24+
logger.setLevel(logging.INFO)
25+
26+
27+
def map_conda_package(package_url, pipelines, priority=0):
28+
"""
29+
Add a Conda distribution `package_url` to the PackageDB.
30+
"""
31+
from minecode.model_utils import add_package_to_scan_queue
32+
from minecode.model_utils import merge_or_create_package
33+
34+
download_url = build_conda_download_url(str(package_url))
35+
if not download_url:
36+
return None
37+
38+
package_identifier = download_url.split("/")[-1]
39+
package_indexes_url = urljoin(download_url, "./repodata.json.bz2")
40+
41+
content = fetch_http(package_indexes_url)
42+
location = get_temp_file("NonPersistentHttpVisitor")
43+
with open(location, "wb") as tmp:
44+
tmp.write(content)
45+
46+
package_info = None
47+
if package_url.namespace == "conda-forge":
48+
package_info = get_package_info(package_url.name)
49+
packages = build_packages(location, download_url, package_info, package_identifier, package_url)
50+
51+
error = None
52+
for package in packages:
53+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
54+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
55+
if error:
56+
break
57+
58+
if db_package:
59+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
60+
61+
return error
62+
63+
64+
def get_package_info(name):
65+
url = f"https://api.anaconda.org/package/conda-forge/{name}"
66+
try:
67+
response = requests.get(url)
68+
response.raise_for_status()
69+
return response.json()
70+
except requests.exceptions.HTTPError as err:
71+
logger.error(f"HTTP error occurred: {err}")
72+
return None
73+
74+
75+
@priority_router.route("pkg:conda/.*")
76+
def process_request(purl_str, **kwargs):
77+
"""
78+
Process Conda Package URL (PURL).
79+
"""
80+
from minecode.model_utils import DEFAULT_PIPELINES
81+
82+
addon_pipelines = kwargs.get("addon_pipelines", [])
83+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
84+
priority = kwargs.get("priority", 0)
85+
86+
package_url = PackageURL.from_string(purl_str)
87+
error_msg = map_conda_package(package_url, pipelines, priority)
88+
89+
if error_msg:
90+
return error_msg

minecode/collectors/hackage.py

Lines changed: 91 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,91 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from packageurl import PackageURL
12+
import requests
13+
from minecode import priority_router
14+
15+
from packagedb.models import PackageContentType
16+
from packagedcode import models as scan_models
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def get_hackage_package_json(name):
25+
"""
26+
Return the contents of the JSON file of the package from Hackage.
27+
Example: https://hackage.haskell.org/package/dplyr.json
28+
"""
29+
url = f"https://hackage.haskell.org/package/{name}.json"
30+
31+
try:
32+
response = requests.get(url)
33+
response.raise_for_status()
34+
return response.json()
35+
except requests.RequestException as err:
36+
logger.error(f"Error fetching package data from Hackage: {err}")
37+
return None
38+
39+
40+
def map_hackage_package(package_url, pipelines, priority=0):
41+
"""
42+
Add a hackage `package_url` to the PackageDB.
43+
"""
44+
from minecode.model_utils import add_package_to_scan_queue, merge_or_create_package
45+
46+
name = package_url.name
47+
version = package_url.version
48+
49+
versions = get_hackage_package_json(name=name)
50+
if version not in versions:
51+
error = f"Version {version} not found for {name} on hackage"
52+
logger.error(error)
53+
return error
54+
55+
download_url = f"https://hackage.haskell.org/package/{name}-{version}/{name}-{version}.tar.gz"
56+
homepage_url = f"https://hackage.haskell.org/package/{name}-{version}"
57+
58+
package = scan_models.Package(
59+
type="hackage",
60+
name=name,
61+
version=version,
62+
download_url=download_url,
63+
homepage_url=homepage_url,
64+
primary_language="haskell",
65+
)
66+
67+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
68+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
69+
70+
if db_package:
71+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
72+
73+
return error
74+
75+
76+
@priority_router.route("pkg:hackage/.*")
77+
def process_request(purl_str, **kwargs):
78+
"""
79+
Process Hackage Package URL (PURL).
80+
"""
81+
from minecode.model_utils import DEFAULT_PIPELINES
82+
83+
addon_pipelines = kwargs.get("addon_pipelines", [])
84+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
85+
priority = kwargs.get("priority", 0)
86+
87+
package_url = PackageURL.from_string(purl_str)
88+
error_msg = map_hackage_package(package_url, pipelines, priority)
89+
90+
if error_msg:
91+
return error_msg

0 commit comments

Comments
 (0)