diff --git a/src/packagedcode/srcinfo.py b/src/packagedcode/srcinfo.py new file mode 100644 index 0000000000..5d2985e70d --- /dev/null +++ b/src/packagedcode/srcinfo.py @@ -0,0 +1,337 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# See http://www.apache.org/licenses/LICENSE-2.0 for the license text. +# See https://github.com/aboutcode-org/scancode-toolkit for support or download. +# See https://aboutcode.org for more information about nexB OSS projects. +# + +import logging +import re + +from packagedcode import models +from packageurl import PackageURL + +""" +Handle Arch Linux .SRCINFO files from makepkg. + +.SRCINFO files contain package metadata in a simple, unambiguous format. +They are key=value pairs, separated into sections. + +See: https://wiki.archlinux.org/title/.SRCINFO +""" + +logger = logging.getLogger(__name__) + + +class SrcinfoHandler(models.DatafileHandler): + """ + Handler for Arch Linux .SRCINFO files. + + .SRCINFO files are generated by makepkg and contain package metadata + for the Arch User Repository (AUR) and Arch Linux packages. + """ + + datasource_id = 'arch_srcinfo' + path_patterns = ('*/.SRCINFO', '*.SRCINFO') + default_package_type = 'arch' + default_primary_language = None # Can be any language + description = 'Arch Linux .SRCINFO file' + documentation_url = 'https://wiki.archlinux.org/title/.SRCINFO' + + @classmethod + def parse(cls, location): + """ + Parse a .SRCINFO file and return package data. + """ + with open(location, 'r', encoding='utf-8') as f: + content = f.read() + + srcinfo_data = cls._parse_srcinfo(content) + + if not srcinfo_data: + return + + # Get pkgbase (global) section + pkgbase_data = srcinfo_data.get('pkgbase', {}) + + # Get all package sections + packages = srcinfo_data.get('packages', []) + + if not packages: + # If no explicit packages, create one from pkgbase + packages = [pkgbase_data.copy()] + + # Yield a package for each pkgname section + for pkg_data in packages: + # Merge pkgbase data with package-specific data + # Package-specific values override pkgbase values + merged_data = pkgbase_data.copy() + merged_data.update(pkg_data) + + package = cls._create_package_from_data(merged_data) + if package: + yield package + + @classmethod + def _parse_srcinfo(cls, content): + """ + Parse .SRCINFO content into structured data. + + .SRCINFO files have: + - pkgbase section (global metadata) + - One or more pkgname sections (per-package metadata) + """ + lines = content.splitlines() + + pkgbase_data = {} + packages = [] + current_section = pkgbase_data + + for line_num, line in enumerate(lines, 1): + # Strip whitespace + line = line.strip() + + # Skip empty lines and comments + if not line or line.startswith('#'): + continue + + # Parse key = value + if '=' not in line: + logger.debug(f'Line {line_num}: No = found, skipping: {line}') + continue + + key, _, value = line.partition('=') + key = key.strip() + value = value.strip() + + # Check for section headers + if key == 'pkgbase': + pkgbase_data['pkgbase'] = value + current_section = pkgbase_data + continue + elif key == 'pkgname': + # Start new package section + pkg = {'pkgname': value} + packages.append(pkg) + current_section = pkg + continue + + # Handle architecture-specific keys (e.g., depends_x86_64) + arch_match = re.match(r'(.+)_([^_]+)$', key) + if arch_match: + base_key = arch_match.group(1) + arch = arch_match.group(2) + # Store as tuple (value, arch) + if base_key not in current_section: + current_section[base_key] = [] + elif not isinstance(current_section[base_key], list): + # Convert existing string value to list + current_section[base_key] = [current_section[base_key]] + current_section[base_key].append((value, arch)) + else: + # Regular key + if key in current_section: + # Handle multiple values (e.g., multiple depends) + if not isinstance(current_section[key], list): + current_section[key] = [current_section[key]] + current_section[key].append(value) + else: + current_section[key] = value + + return { + 'pkgbase': pkgbase_data, + 'packages': packages if packages else [pkgbase_data] + } + + @classmethod + def _create_package_from_data(cls, data): + """ + Create a PackageData object from parsed .SRCINFO data. + """ + # Get basic metadata + pkgname = data.get('pkgname') + if not pkgname: + pkgname = data.get('pkgbase') + + if not pkgname: + return None + + pkgver = data.get('pkgver', '') + pkgrel = data.get('pkgrel', '') + + # Arch Linux version format: pkgver-pkgrel + if pkgver and pkgrel: + version = f'{pkgver}-{pkgrel}' + elif pkgver: + version = pkgver + else: + version = None + + # Create PackageURL + purl = PackageURL( + type='arch', + name=pkgname, + version=version + ).to_string() + + # Extract other metadata + description = data.get('pkgdesc', '') + homepage_url = data.get('url') + + # Extract licenses + declared_license_expression = None + licenses = data.get('license') + if licenses: + if isinstance(licenses, list): + declared_license_expression = ' AND '.join(licenses) + else: + declared_license_expression = licenses + + # Extract architecture + arch = data.get('arch') + if arch: + if isinstance(arch, list): + arch = ', '.join(arch) + + # Parse dependencies + dependencies = [] + + # Runtime dependencies + depends = data.get('depends', []) + if not isinstance(depends, list): + depends = [depends] + + for dep in depends: + if isinstance(dep, tuple): + # Architecture-specific dependency + dep_name, dep_arch = dep + dependencies.append( + models.DependentPackage( + purl=PackageURL(type='arch', name=dep_name.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(), + extracted_requirement=dep_name, + scope=f'depends_{dep_arch}', + is_runtime=True, + is_optional=False + ) + ) + else: + dependencies.append( + models.DependentPackage( + purl=PackageURL(type='arch', name=dep.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(), + extracted_requirement=dep, + scope='depends', + is_runtime=True, + is_optional=False + ) + ) + + # Build dependencies + makedepends = data.get('makedepends', []) + if not isinstance(makedepends, list): + makedepends = [makedepends] + + for dep in makedepends: + if isinstance(dep, tuple): + dep_name, dep_arch = dep + dependencies.append( + models.DependentPackage( + purl=PackageURL(type='arch', name=dep_name.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(), + extracted_requirement=dep_name, + scope=f'makedepends_{dep_arch}', + is_runtime=False, + is_optional=False + ) + ) + else: + dependencies.append( + models.DependentPackage( + purl=PackageURL(type='arch', name=dep.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(), + extracted_requirement=dep, + scope='makedepends', + is_runtime=False, + is_optional=False + ) + ) + + # Optional dependencies + optdepends = data.get('optdepends', []) + if not isinstance(optdepends, list): + optdepends = [optdepends] + + for dep in optdepends: + # optdepends format: "pkgname: description" + if isinstance(dep, tuple): + dep_name, dep_arch = dep + pkg_part = dep_name.split(':')[0].strip() + dependencies.append( + models.DependentPackage( + purl=PackageURL(type='arch', name=pkg_part).to_string(), + extracted_requirement=dep_name, + scope=f'optdepends_{dep_arch}', + is_runtime=True, + is_optional=True + ) + ) + else: + pkg_part = dep.split(':')[0].strip() + dependencies.append( + models.DependentPackage( + purl=PackageURL(type='arch', name=pkg_part).to_string(), + extracted_requirement=dep, + scope='optdepends', + is_runtime=True, + is_optional=True + ) + ) + + # Build package data + package_data = dict( + datasource_id=cls.datasource_id, + type=cls.default_package_type, + name=pkgname, + version=version, + description=description, + homepage_url=homepage_url, + declared_license_expression=declared_license_expression, # ← FIXED! + dependencies=dependencies, + purl=purl, + ) + + + # Store additional metadata in extra_data + extra_data = {} + + # Architecture + if arch: + extra_data['arch'] = arch + + # Sources + source = data.get('source') + if source: + extra_data['source'] = source if isinstance(source, list) else [source] + + # Checksums + for checksum_type in ['md5sums', 'sha1sums', 'sha256sums', 'sha512sums']: + if checksum_type in data: + checksums = data[checksum_type] + extra_data[checksum_type] = checksums if isinstance(checksums, list) else [checksums] + + # Epoch + if 'epoch' in data: + extra_data['epoch'] = data['epoch'] + + # Conflicts, provides, replaces + for key in ['conflicts', 'provides', 'replaces']: + if key in data: + values = data[key] + extra_data[key] = values if isinstance(values, list) else [values] + + if extra_data: + package_data['extra_data'] = extra_data + + return models.PackageData.from_data(package_data, package_only=False) + + +# Save this as: src/packagedcode/srcinfo.py \ No newline at end of file diff --git a/tests/packagedcode/data/srcinfo/arch-specific/.SRCINFO b/tests/packagedcode/data/srcinfo/arch-specific/.SRCINFO new file mode 100644 index 0000000000..327f6cd564 --- /dev/null +++ b/tests/packagedcode/data/srcinfo/arch-specific/.SRCINFO @@ -0,0 +1,16 @@ +pkgbase = rust-multiarch + pkgdesc = Multi-architecture Rust package + pkgver = 1.5.0 + pkgrel = 2 + url = https://example.com + arch = x86_64 + arch = aarch64 + license = GPL + depends = glibc + depends_x86_64 = lib32-glibc + depends_aarch64 = aarch64-specific-lib + makedepends = rust + source = source.tar.gz + sha256sums = SKIP + +pkgname = rust-multiarch \ No newline at end of file diff --git a/tests/packagedcode/data/srcinfo/rust-basic/.SRCINFO b/tests/packagedcode/data/srcinfo/rust-basic/.SRCINFO new file mode 100644 index 0000000000..2f96fcee2a --- /dev/null +++ b/tests/packagedcode/data/srcinfo/rust-basic/.SRCINFO @@ -0,0 +1,13 @@ +pkgbase = rust-basic + pkgdesc = A basic Rust package + pkgver = 1.0.0 + pkgrel = 1 + url = https://github.com/example/rust-basic + arch = x86_64 + license = MIT + makedepends = rust + makedepends = cargo + source = https://github.com/example/rust-basic/archive/1.0.0.tar.gz + sha256sums = SKIP + +pkgname = rust-basic \ No newline at end of file diff --git a/tests/packagedcode/data/srcinfo/split-package/.SRCINFO b/tests/packagedcode/data/srcinfo/split-package/.SRCINFO new file mode 100644 index 0000000000..33d9467abb --- /dev/null +++ b/tests/packagedcode/data/srcinfo/split-package/.SRCINFO @@ -0,0 +1,17 @@ +pkgbase = rust-split + pkgdesc = Split package example + pkgver = 1.0.0 + pkgrel = 1 + url = https://example.com + arch = x86_64 + license = MIT + makedepends = rust + source = source.tar.gz + +pkgname = rust-split-bin + pkgdesc = Binary package + depends = glibc + +pkgname = rust-split-lib + pkgdesc = Library package + depends = gcc-libs \ No newline at end of file diff --git a/tests/packagedcode/data/srcinfo/with-checksums/.SRCINFO b/tests/packagedcode/data/srcinfo/with-checksums/.SRCINFO new file mode 100644 index 0000000000..fd27b414eb --- /dev/null +++ b/tests/packagedcode/data/srcinfo/with-checksums/.SRCINFO @@ -0,0 +1,12 @@ +pkgbase = rust-checksums + pkgdesc = Package with checksums + pkgver = 1.0.0 + pkgrel = 1 + arch = x86_64 + license = MIT + source = file1.tar.gz + source = file2.tar.gz + sha256sums = abc123def456 + sha256sums = 789ghi012jkl + +pkgname = rust-checksums \ No newline at end of file diff --git a/tests/packagedcode/data/srcinfo/with-deps/.SRCINFO b/tests/packagedcode/data/srcinfo/with-deps/.SRCINFO new file mode 100644 index 0000000000..6c2b07e119 --- /dev/null +++ b/tests/packagedcode/data/srcinfo/with-deps/.SRCINFO @@ -0,0 +1,18 @@ +pkgbase = rust-app + pkgdesc = Rust application with dependencies + pkgver = 2.0.0 + pkgrel = 1 + url = https://example.com + arch = x86_64 + arch = aarch64 + license = Apache + depends = gcc-libs + depends = glibc + makedepends = rust + makedepends = cargo + optdepends = python: for scripts + optdepends = bash: for shell completion + source = rust-app-2.0.0.tar.gz + md5sums = abc123 + +pkgname = rust-app \ No newline at end of file diff --git a/tests/packagedcode/test_srcinfo.py b/tests/packagedcode/test_srcinfo.py new file mode 100644 index 0000000000..72ee660be8 --- /dev/null +++ b/tests/packagedcode/test_srcinfo.py @@ -0,0 +1,75 @@ +# +# Copyright (c) nexB Inc. and others. All rights reserved. +# ScanCode is a trademark of nexB Inc. +# SPDX-License-Identifier: Apache-2.0 +# + +import os + +from packagedcode import srcinfo +from packages_test_utils import PackageTester + + +class TestSrcinfo(PackageTester): + test_data_dir = os.path.join(os.path.dirname(__file__), 'data') + + def test_parse_srcinfo_basic(self): + test_file = self.get_test_loc('srcinfo/rust-basic/.SRCINFO') + packages = list(srcinfo.SrcinfoHandler.parse(test_file)) + + assert len(packages) == 1 + package = packages[0] + + assert package.type == 'arch' + assert package.name == 'rust-basic' + assert '1.0.0' in package.version + assert package.description + + def test_parse_srcinfo_with_dependencies(self): + test_file = self.get_test_loc('srcinfo/with-deps/.SRCINFO') + packages = list(srcinfo.SrcinfoHandler.parse(test_file)) + + assert len(packages) == 1 + package = packages[0] + + # Should have runtime, build, and optional dependencies + runtime_deps = [d for d in package.dependencies if d.is_runtime and not d.is_optional] + build_deps = [d for d in package.dependencies if not d.is_runtime] + opt_deps = [d for d in package.dependencies if d.is_optional] + + assert len(runtime_deps) > 0 + assert len(build_deps) > 0 + assert len(opt_deps) > 0 + + def test_parse_srcinfo_arch_specific(self): + test_file = self.get_test_loc('srcinfo/arch-specific/.SRCINFO') + packages = list(srcinfo.SrcinfoHandler.parse(test_file)) + + assert len(packages) == 1 + package = packages[0] + + # Check for architecture-specific dependencies + arch_specific = [d for d in package.dependencies if 'x86_64' in d.scope or 'aarch64' in d.scope] + assert len(arch_specific) > 0 + + def test_parse_srcinfo_split_package(self): + """Test parsing split packages (multiple pkgname sections)""" + test_file = self.get_test_loc('srcinfo/split-package/.SRCINFO') + packages = list(srcinfo.SrcinfoHandler.parse(test_file)) + + # Should yield multiple packages + assert len(packages) > 1 + + # All should share same pkgbase but different pkgname + names = [p.name for p in packages] + assert len(set(names)) == len(names) # All unique + + def test_parse_srcinfo_with_checksums(self): + """Test that checksums are captured in extra_data""" + test_file = self.get_test_loc('srcinfo/with-checksums/.SRCINFO') + packages = list(srcinfo.SrcinfoHandler.parse(test_file)) + + assert len(packages) == 1 + package = packages[0] + + assert 'sha256sums' in package.extra_data or 'md5sums' in package.extra_data \ No newline at end of file