Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
337 changes: 337 additions & 0 deletions src/packagedcode/srcinfo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,337 @@
#
# Copyright (c) nexB Inc. and others. All rights reserved.
# ScanCode is a trademark of nexB Inc.
# SPDX-License-Identifier: Apache-2.0
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
# See https://github.com/aboutcode-org/scancode-toolkit for support or download.
# See https://aboutcode.org for more information about nexB OSS projects.
#

import logging
import re

from packagedcode import models
from packageurl import PackageURL

"""
Handle Arch Linux .SRCINFO files from makepkg.

.SRCINFO files contain package metadata in a simple, unambiguous format.
They are key=value pairs, separated into sections.

See: https://wiki.archlinux.org/title/.SRCINFO
"""

logger = logging.getLogger(__name__)


class SrcinfoHandler(models.DatafileHandler):
"""
Handler for Arch Linux .SRCINFO files.

.SRCINFO files are generated by makepkg and contain package metadata
for the Arch User Repository (AUR) and Arch Linux packages.
"""

datasource_id = 'arch_srcinfo'
path_patterns = ('*/.SRCINFO', '*.SRCINFO')
default_package_type = 'arch'
default_primary_language = None # Can be any language
description = 'Arch Linux .SRCINFO file'
documentation_url = 'https://wiki.archlinux.org/title/.SRCINFO'

@classmethod
def parse(cls, location):
"""
Parse a .SRCINFO file and return package data.
"""
with open(location, 'r', encoding='utf-8') as f:
content = f.read()

srcinfo_data = cls._parse_srcinfo(content)

if not srcinfo_data:
return

# Get pkgbase (global) section
pkgbase_data = srcinfo_data.get('pkgbase', {})

# Get all package sections
packages = srcinfo_data.get('packages', [])

if not packages:
# If no explicit packages, create one from pkgbase
packages = [pkgbase_data.copy()]

# Yield a package for each pkgname section
for pkg_data in packages:
# Merge pkgbase data with package-specific data
# Package-specific values override pkgbase values
merged_data = pkgbase_data.copy()
merged_data.update(pkg_data)

package = cls._create_package_from_data(merged_data)
if package:
yield package

@classmethod
def _parse_srcinfo(cls, content):
"""
Parse .SRCINFO content into structured data.

.SRCINFO files have:
- pkgbase section (global metadata)
- One or more pkgname sections (per-package metadata)
"""
lines = content.splitlines()

pkgbase_data = {}
packages = []
current_section = pkgbase_data

for line_num, line in enumerate(lines, 1):
# Strip whitespace
line = line.strip()

# Skip empty lines and comments
if not line or line.startswith('#'):
continue

# Parse key = value
if '=' not in line:
logger.debug(f'Line {line_num}: No = found, skipping: {line}')
continue

key, _, value = line.partition('=')
key = key.strip()
value = value.strip()

# Check for section headers
if key == 'pkgbase':
pkgbase_data['pkgbase'] = value
current_section = pkgbase_data
continue
elif key == 'pkgname':
# Start new package section
pkg = {'pkgname': value}
packages.append(pkg)
current_section = pkg
continue

# Handle architecture-specific keys (e.g., depends_x86_64)
arch_match = re.match(r'(.+)_([^_]+)$', key)
if arch_match:
base_key = arch_match.group(1)
arch = arch_match.group(2)
# Store as tuple (value, arch)
if base_key not in current_section:
current_section[base_key] = []
elif not isinstance(current_section[base_key], list):
# Convert existing string value to list
current_section[base_key] = [current_section[base_key]]
current_section[base_key].append((value, arch))
else:
# Regular key
if key in current_section:
# Handle multiple values (e.g., multiple depends)
if not isinstance(current_section[key], list):
current_section[key] = [current_section[key]]
current_section[key].append(value)
else:
current_section[key] = value

return {
'pkgbase': pkgbase_data,
'packages': packages if packages else [pkgbase_data]
}

@classmethod
def _create_package_from_data(cls, data):
"""
Create a PackageData object from parsed .SRCINFO data.
"""
# Get basic metadata
pkgname = data.get('pkgname')
if not pkgname:
pkgname = data.get('pkgbase')

if not pkgname:
return None

pkgver = data.get('pkgver', '')
pkgrel = data.get('pkgrel', '')

# Arch Linux version format: pkgver-pkgrel
if pkgver and pkgrel:
version = f'{pkgver}-{pkgrel}'
elif pkgver:
version = pkgver
else:
version = None

# Create PackageURL
purl = PackageURL(
type='arch',
name=pkgname,
version=version
).to_string()

# Extract other metadata
description = data.get('pkgdesc', '')
homepage_url = data.get('url')

# Extract licenses
declared_license_expression = None
licenses = data.get('license')
if licenses:
if isinstance(licenses, list):
declared_license_expression = ' AND '.join(licenses)
else:
declared_license_expression = licenses

# Extract architecture
arch = data.get('arch')
if arch:
if isinstance(arch, list):
arch = ', '.join(arch)

# Parse dependencies
dependencies = []

# Runtime dependencies
depends = data.get('depends', [])
if not isinstance(depends, list):
depends = [depends]

for dep in depends:
if isinstance(dep, tuple):
# Architecture-specific dependency
dep_name, dep_arch = dep
dependencies.append(
models.DependentPackage(
purl=PackageURL(type='arch', name=dep_name.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(),
extracted_requirement=dep_name,
scope=f'depends_{dep_arch}',
is_runtime=True,
is_optional=False
)
)
else:
dependencies.append(
models.DependentPackage(
purl=PackageURL(type='arch', name=dep.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(),
extracted_requirement=dep,
scope='depends',
is_runtime=True,
is_optional=False
)
)

# Build dependencies
makedepends = data.get('makedepends', [])
if not isinstance(makedepends, list):
makedepends = [makedepends]

for dep in makedepends:
if isinstance(dep, tuple):
dep_name, dep_arch = dep
dependencies.append(
models.DependentPackage(
purl=PackageURL(type='arch', name=dep_name.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(),
extracted_requirement=dep_name,
scope=f'makedepends_{dep_arch}',
is_runtime=False,
is_optional=False
)
)
else:
dependencies.append(
models.DependentPackage(
purl=PackageURL(type='arch', name=dep.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(),
extracted_requirement=dep,
scope='makedepends',
is_runtime=False,
is_optional=False
)
)

# Optional dependencies
optdepends = data.get('optdepends', [])
if not isinstance(optdepends, list):
optdepends = [optdepends]

for dep in optdepends:
# optdepends format: "pkgname: description"
if isinstance(dep, tuple):
dep_name, dep_arch = dep
pkg_part = dep_name.split(':')[0].strip()
dependencies.append(
models.DependentPackage(
purl=PackageURL(type='arch', name=pkg_part).to_string(),
extracted_requirement=dep_name,
scope=f'optdepends_{dep_arch}',
is_runtime=True,
is_optional=True
)
)
else:
pkg_part = dep.split(':')[0].strip()
dependencies.append(
models.DependentPackage(
purl=PackageURL(type='arch', name=pkg_part).to_string(),
extracted_requirement=dep,
scope='optdepends',
is_runtime=True,
is_optional=True
)
)

# Build package data
package_data = dict(
datasource_id=cls.datasource_id,
type=cls.default_package_type,
name=pkgname,
version=version,
description=description,
homepage_url=homepage_url,
declared_license_expression=declared_license_expression, # ← FIXED!
dependencies=dependencies,
purl=purl,
)


# Store additional metadata in extra_data
extra_data = {}

# Architecture
if arch:
extra_data['arch'] = arch

# Sources
source = data.get('source')
if source:
extra_data['source'] = source if isinstance(source, list) else [source]

# Checksums
for checksum_type in ['md5sums', 'sha1sums', 'sha256sums', 'sha512sums']:
if checksum_type in data:
checksums = data[checksum_type]
extra_data[checksum_type] = checksums if isinstance(checksums, list) else [checksums]

# Epoch
if 'epoch' in data:
extra_data['epoch'] = data['epoch']

# Conflicts, provides, replaces
for key in ['conflicts', 'provides', 'replaces']:
if key in data:
values = data[key]
extra_data[key] = values if isinstance(values, list) else [values]

if extra_data:
package_data['extra_data'] = extra_data

return models.PackageData.from_data(package_data, package_only=False)


# Save this as: src/packagedcode/srcinfo.py
16 changes: 16 additions & 0 deletions tests/packagedcode/data/srcinfo/arch-specific/.SRCINFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
pkgbase = rust-multiarch
pkgdesc = Multi-architecture Rust package
pkgver = 1.5.0
pkgrel = 2
url = https://example.com
arch = x86_64
arch = aarch64
license = GPL
depends = glibc
depends_x86_64 = lib32-glibc
depends_aarch64 = aarch64-specific-lib
makedepends = rust
source = source.tar.gz
sha256sums = SKIP

pkgname = rust-multiarch
13 changes: 13 additions & 0 deletions tests/packagedcode/data/srcinfo/rust-basic/.SRCINFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
pkgbase = rust-basic
pkgdesc = A basic Rust package
pkgver = 1.0.0
pkgrel = 1
url = https://github.com/example/rust-basic
arch = x86_64
license = MIT
makedepends = rust
makedepends = cargo
source = https://github.com/example/rust-basic/archive/1.0.0.tar.gz
sha256sums = SKIP

pkgname = rust-basic
17 changes: 17 additions & 0 deletions tests/packagedcode/data/srcinfo/split-package/.SRCINFO
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
pkgbase = rust-split
pkgdesc = Split package example
pkgver = 1.0.0
pkgrel = 1
url = https://example.com
arch = x86_64
license = MIT
makedepends = rust
source = source.tar.gz

pkgname = rust-split-bin
pkgdesc = Binary package
depends = glibc

pkgname = rust-split-lib
pkgdesc = Library package
depends = gcc-libs
Loading