Skip to content

Commit 322eb43

Browse files
authored
Add support for Arch Linux ALPM on-demand data collection (#709)
* Add initial support for Arch Linux ALPM on-demand data collection Signed-off-by: ziad hany <[email protected]> * Add a test for Alpm on-demand data collection. Handle package dependencies. Signed-off-by: ziad hany <[email protected]> --------- Signed-off-by: ziad hany <[email protected]>
1 parent fba0889 commit 322eb43

File tree

7 files changed

+2219
-0
lines changed

7 files changed

+2219
-0
lines changed

minecode/collectors/alpm.py

Lines changed: 77 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,77 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
from packageurl import PackageURL
12+
from minecode import priority_router
13+
from minecode.miners.alpm import build_packages
14+
from minecode.utils import fetch_http, get_temp_file
15+
from minecode.utils import extract_file
16+
from packagedb.models import PackageContentType
17+
18+
logger = logging.getLogger(__name__)
19+
handler = logging.StreamHandler()
20+
logger.addHandler(handler)
21+
logger.setLevel(logging.INFO)
22+
23+
24+
def map_alpm_package(package_url, pipelines, priority=0):
25+
"""
26+
Add a Arch Linux distribution `package_url` to the PackageDB.
27+
"""
28+
from minecode.model_utils import add_package_to_scan_queue
29+
from minecode.model_utils import merge_or_create_package
30+
31+
name = package_url.name
32+
version = package_url.version
33+
arch = package_url.qualifiers.get("arch", "any")
34+
first_letter = name[0]
35+
36+
if not name or not version:
37+
return None
38+
39+
download_url = f"https://archive.archlinux.org/packages/{first_letter}/{name}/{name}-{version}-{arch}.pkg.tar.zst"
40+
content = fetch_http(download_url)
41+
location = get_temp_file("NonPersistentHttpVisitor")
42+
with open(location, "wb") as tmp:
43+
tmp.write(content)
44+
45+
extracted_location = extract_file(location)
46+
47+
packages = build_packages(extracted_location, download_url, package_url)
48+
49+
error = None
50+
for package in packages:
51+
package.extra_data["package_content"] = PackageContentType.SOURCE_ARCHIVE
52+
db_package, _, _, error = merge_or_create_package(package, visit_level=0)
53+
if error:
54+
break
55+
56+
if db_package:
57+
add_package_to_scan_queue(package=db_package, pipelines=pipelines, priority=priority)
58+
59+
return error
60+
61+
62+
@priority_router.route("pkg:alpm/.*")
63+
def process_request(purl_str, **kwargs):
64+
"""
65+
Process Arch Linux ( Alpm ) Package URL (PURL).
66+
"""
67+
from minecode.model_utils import DEFAULT_PIPELINES
68+
69+
addon_pipelines = kwargs.get("addon_pipelines", [])
70+
pipelines = DEFAULT_PIPELINES + tuple(addon_pipelines)
71+
priority = kwargs.get("priority", 0)
72+
73+
package_url = PackageURL.from_string(purl_str)
74+
error_msg = map_alpm_package(package_url, pipelines, priority)
75+
76+
if error_msg:
77+
return error_msg

minecode/miners/alpm.py

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,71 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# purldb is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/purldb for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
from packageurl import PackageURL
10+
11+
from minecode import debutils
12+
from minecode.msys2 import parse_pkginfo
13+
from pathlib import Path
14+
from packagedcode import models as scan_models
15+
16+
17+
def build_packages(extracted_location, download_url, purl=None):
18+
"""
19+
Yield ScannedPackage built from Arch Linux a `metadata` content
20+
"""
21+
22+
build_file = Path(extracted_location) / ".BUILDINFO"
23+
pkginfo_file = Path(extracted_location) / ".PKGINFO"
24+
25+
with open(pkginfo_file, encoding="utf-8") as f:
26+
extracted_pkginfo = parse_pkginfo(f.read())
27+
28+
with open(build_file, encoding="utf-8") as f:
29+
extracted_build = parse_pkginfo(f.read())
30+
31+
description = extracted_pkginfo.get("desc")
32+
version = extracted_pkginfo.get("version")
33+
extracted_license_statement = extracted_pkginfo.get("licenses")
34+
35+
parties = []
36+
maintainers = extracted_build.get("packager")
37+
if maintainers:
38+
name, email = debutils.parse_email(maintainers)
39+
if name:
40+
party = scan_models.Party(name=name, role="maintainer", email=email)
41+
parties.append(party)
42+
43+
repository_homepage_url = extracted_pkginfo.get("url")
44+
size = extracted_pkginfo.get("size")
45+
sha256 = extracted_build.get("pkgbuild_sha256sum")
46+
47+
dependencies = []
48+
for name in extracted_pkginfo.get("depends", []):
49+
dep_purl = PackageURL(type="alpm", name=name)
50+
dep = scan_models.DependentPackage(purl=dep_purl.to_string())
51+
dependencies.append(dep)
52+
53+
download_data = dict(
54+
type="alpm",
55+
name=purl.name,
56+
version=version,
57+
qualifiers=purl.qualifiers,
58+
description=description,
59+
repository_homepage_url=repository_homepage_url,
60+
extracted_license_statement=extracted_license_statement,
61+
parties=parties,
62+
size=size,
63+
sha256=sha256,
64+
dependencies=dependencies,
65+
download_url=download_url,
66+
)
67+
68+
package = scan_models.PackageData.from_data(download_data)
69+
package.datasource_id = "alpm_metadata"
70+
package.set_purl(purl)
71+
yield package

0 commit comments

Comments
 (0)