Skip to content

Commit 8f5ae89

Browse files
committed
Add .SRCINFO parser for Arch Linux packages
Closes #4406 Signed-off-by: karthiknew07 <[email protected]>
1 parent 85219e6 commit 8f5ae89

File tree

7 files changed

+488
-0
lines changed

7 files changed

+488
-0
lines changed

src/packagedcode/srcinfo.py

Lines changed: 337 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,337 @@
1+
#
2+
# Copyright (c) nexB Inc. and others. All rights reserved.
3+
# ScanCode is a trademark of nexB Inc.
4+
# SPDX-License-Identifier: Apache-2.0
5+
# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+
# See https://github.com/aboutcode-org/scancode-toolkit for support or download.
7+
# See https://aboutcode.org for more information about nexB OSS projects.
8+
#
9+
10+
import logging
11+
import re
12+
13+
from packagedcode import models
14+
from packageurl import PackageURL
15+
16+
"""
17+
Handle Arch Linux .SRCINFO files from makepkg.
18+
19+
.SRCINFO files contain package metadata in a simple, unambiguous format.
20+
They are key=value pairs, separated into sections.
21+
22+
See: https://wiki.archlinux.org/title/.SRCINFO
23+
"""
24+
25+
logger = logging.getLogger(__name__)
26+
27+
28+
class SrcinfoHandler(models.DatafileHandler):
29+
"""
30+
Handler for Arch Linux .SRCINFO files.
31+
32+
.SRCINFO files are generated by makepkg and contain package metadata
33+
for the Arch User Repository (AUR) and Arch Linux packages.
34+
"""
35+
36+
datasource_id = 'arch_srcinfo'
37+
path_patterns = ('*/.SRCINFO', '*.SRCINFO')
38+
default_package_type = 'arch'
39+
default_primary_language = None # Can be any language
40+
description = 'Arch Linux .SRCINFO file'
41+
documentation_url = 'https://wiki.archlinux.org/title/.SRCINFO'
42+
43+
@classmethod
44+
def parse(cls, location):
45+
"""
46+
Parse a .SRCINFO file and return package data.
47+
"""
48+
with open(location, 'r', encoding='utf-8') as f:
49+
content = f.read()
50+
51+
srcinfo_data = cls._parse_srcinfo(content)
52+
53+
if not srcinfo_data:
54+
return
55+
56+
# Get pkgbase (global) section
57+
pkgbase_data = srcinfo_data.get('pkgbase', {})
58+
59+
# Get all package sections
60+
packages = srcinfo_data.get('packages', [])
61+
62+
if not packages:
63+
# If no explicit packages, create one from pkgbase
64+
packages = [pkgbase_data.copy()]
65+
66+
# Yield a package for each pkgname section
67+
for pkg_data in packages:
68+
# Merge pkgbase data with package-specific data
69+
# Package-specific values override pkgbase values
70+
merged_data = pkgbase_data.copy()
71+
merged_data.update(pkg_data)
72+
73+
package = cls._create_package_from_data(merged_data)
74+
if package:
75+
yield package
76+
77+
@classmethod
78+
def _parse_srcinfo(cls, content):
79+
"""
80+
Parse .SRCINFO content into structured data.
81+
82+
.SRCINFO files have:
83+
- pkgbase section (global metadata)
84+
- One or more pkgname sections (per-package metadata)
85+
"""
86+
lines = content.splitlines()
87+
88+
pkgbase_data = {}
89+
packages = []
90+
current_section = pkgbase_data
91+
92+
for line_num, line in enumerate(lines, 1):
93+
# Strip whitespace
94+
line = line.strip()
95+
96+
# Skip empty lines and comments
97+
if not line or line.startswith('#'):
98+
continue
99+
100+
# Parse key = value
101+
if '=' not in line:
102+
logger.debug(f'Line {line_num}: No = found, skipping: {line}')
103+
continue
104+
105+
key, _, value = line.partition('=')
106+
key = key.strip()
107+
value = value.strip()
108+
109+
# Check for section headers
110+
if key == 'pkgbase':
111+
pkgbase_data['pkgbase'] = value
112+
current_section = pkgbase_data
113+
continue
114+
elif key == 'pkgname':
115+
# Start new package section
116+
pkg = {'pkgname': value}
117+
packages.append(pkg)
118+
current_section = pkg
119+
continue
120+
121+
# Handle architecture-specific keys (e.g., depends_x86_64)
122+
arch_match = re.match(r'(.+)_([^_]+)$', key)
123+
if arch_match:
124+
base_key = arch_match.group(1)
125+
arch = arch_match.group(2)
126+
# Store as tuple (value, arch)
127+
if base_key not in current_section:
128+
current_section[base_key] = []
129+
elif not isinstance(current_section[base_key], list):
130+
# Convert existing string value to list
131+
current_section[base_key] = [current_section[base_key]]
132+
current_section[base_key].append((value, arch))
133+
else:
134+
# Regular key
135+
if key in current_section:
136+
# Handle multiple values (e.g., multiple depends)
137+
if not isinstance(current_section[key], list):
138+
current_section[key] = [current_section[key]]
139+
current_section[key].append(value)
140+
else:
141+
current_section[key] = value
142+
143+
return {
144+
'pkgbase': pkgbase_data,
145+
'packages': packages if packages else [pkgbase_data]
146+
}
147+
148+
@classmethod
149+
def _create_package_from_data(cls, data):
150+
"""
151+
Create a PackageData object from parsed .SRCINFO data.
152+
"""
153+
# Get basic metadata
154+
pkgname = data.get('pkgname')
155+
if not pkgname:
156+
pkgname = data.get('pkgbase')
157+
158+
if not pkgname:
159+
return None
160+
161+
pkgver = data.get('pkgver', '')
162+
pkgrel = data.get('pkgrel', '')
163+
164+
# Arch Linux version format: pkgver-pkgrel
165+
if pkgver and pkgrel:
166+
version = f'{pkgver}-{pkgrel}'
167+
elif pkgver:
168+
version = pkgver
169+
else:
170+
version = None
171+
172+
# Create PackageURL
173+
purl = PackageURL(
174+
type='arch',
175+
name=pkgname,
176+
version=version
177+
).to_string()
178+
179+
# Extract other metadata
180+
description = data.get('pkgdesc', '')
181+
homepage_url = data.get('url')
182+
183+
# Extract licenses
184+
declared_license_expression = None
185+
licenses = data.get('license')
186+
if licenses:
187+
if isinstance(licenses, list):
188+
declared_license_expression = ' AND '.join(licenses)
189+
else:
190+
declared_license_expression = licenses
191+
192+
# Extract architecture
193+
arch = data.get('arch')
194+
if arch:
195+
if isinstance(arch, list):
196+
arch = ', '.join(arch)
197+
198+
# Parse dependencies
199+
dependencies = []
200+
201+
# Runtime dependencies
202+
depends = data.get('depends', [])
203+
if not isinstance(depends, list):
204+
depends = [depends]
205+
206+
for dep in depends:
207+
if isinstance(dep, tuple):
208+
# Architecture-specific dependency
209+
dep_name, dep_arch = dep
210+
dependencies.append(
211+
models.DependentPackage(
212+
purl=PackageURL(type='arch', name=dep_name.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(),
213+
extracted_requirement=dep_name,
214+
scope=f'depends_{dep_arch}',
215+
is_runtime=True,
216+
is_optional=False
217+
)
218+
)
219+
else:
220+
dependencies.append(
221+
models.DependentPackage(
222+
purl=PackageURL(type='arch', name=dep.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(),
223+
extracted_requirement=dep,
224+
scope='depends',
225+
is_runtime=True,
226+
is_optional=False
227+
)
228+
)
229+
230+
# Build dependencies
231+
makedepends = data.get('makedepends', [])
232+
if not isinstance(makedepends, list):
233+
makedepends = [makedepends]
234+
235+
for dep in makedepends:
236+
if isinstance(dep, tuple):
237+
dep_name, dep_arch = dep
238+
dependencies.append(
239+
models.DependentPackage(
240+
purl=PackageURL(type='arch', name=dep_name.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(),
241+
extracted_requirement=dep_name,
242+
scope=f'makedepends_{dep_arch}',
243+
is_runtime=False,
244+
is_optional=False
245+
)
246+
)
247+
else:
248+
dependencies.append(
249+
models.DependentPackage(
250+
purl=PackageURL(type='arch', name=dep.split('>')[0].split('<')[0].split('=')[0].strip()).to_string(),
251+
extracted_requirement=dep,
252+
scope='makedepends',
253+
is_runtime=False,
254+
is_optional=False
255+
)
256+
)
257+
258+
# Optional dependencies
259+
optdepends = data.get('optdepends', [])
260+
if not isinstance(optdepends, list):
261+
optdepends = [optdepends]
262+
263+
for dep in optdepends:
264+
# optdepends format: "pkgname: description"
265+
if isinstance(dep, tuple):
266+
dep_name, dep_arch = dep
267+
pkg_part = dep_name.split(':')[0].strip()
268+
dependencies.append(
269+
models.DependentPackage(
270+
purl=PackageURL(type='arch', name=pkg_part).to_string(),
271+
extracted_requirement=dep_name,
272+
scope=f'optdepends_{dep_arch}',
273+
is_runtime=True,
274+
is_optional=True
275+
)
276+
)
277+
else:
278+
pkg_part = dep.split(':')[0].strip()
279+
dependencies.append(
280+
models.DependentPackage(
281+
purl=PackageURL(type='arch', name=pkg_part).to_string(),
282+
extracted_requirement=dep,
283+
scope='optdepends',
284+
is_runtime=True,
285+
is_optional=True
286+
)
287+
)
288+
289+
# Build package data
290+
package_data = dict(
291+
datasource_id=cls.datasource_id,
292+
type=cls.default_package_type,
293+
name=pkgname,
294+
version=version,
295+
description=description,
296+
homepage_url=homepage_url,
297+
declared_license_expression=declared_license_expression, # ← FIXED!
298+
dependencies=dependencies,
299+
purl=purl,
300+
)
301+
302+
303+
# Store additional metadata in extra_data
304+
extra_data = {}
305+
306+
# Architecture
307+
if arch:
308+
extra_data['arch'] = arch
309+
310+
# Sources
311+
source = data.get('source')
312+
if source:
313+
extra_data['source'] = source if isinstance(source, list) else [source]
314+
315+
# Checksums
316+
for checksum_type in ['md5sums', 'sha1sums', 'sha256sums', 'sha512sums']:
317+
if checksum_type in data:
318+
checksums = data[checksum_type]
319+
extra_data[checksum_type] = checksums if isinstance(checksums, list) else [checksums]
320+
321+
# Epoch
322+
if 'epoch' in data:
323+
extra_data['epoch'] = data['epoch']
324+
325+
# Conflicts, provides, replaces
326+
for key in ['conflicts', 'provides', 'replaces']:
327+
if key in data:
328+
values = data[key]
329+
extra_data[key] = values if isinstance(values, list) else [values]
330+
331+
if extra_data:
332+
package_data['extra_data'] = extra_data
333+
334+
return models.PackageData.from_data(package_data, package_only=False)
335+
336+
337+
# Save this as: src/packagedcode/srcinfo.py
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
pkgbase = rust-multiarch
2+
pkgdesc = Multi-architecture Rust package
3+
pkgver = 1.5.0
4+
pkgrel = 2
5+
url = https://example.com
6+
arch = x86_64
7+
arch = aarch64
8+
license = GPL
9+
depends = glibc
10+
depends_x86_64 = lib32-glibc
11+
depends_aarch64 = aarch64-specific-lib
12+
makedepends = rust
13+
source = source.tar.gz
14+
sha256sums = SKIP
15+
16+
pkgname = rust-multiarch
Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
pkgbase = rust-basic
2+
pkgdesc = A basic Rust package
3+
pkgver = 1.0.0
4+
pkgrel = 1
5+
url = https://github.com/example/rust-basic
6+
arch = x86_64
7+
license = MIT
8+
makedepends = rust
9+
makedepends = cargo
10+
source = https://github.com/example/rust-basic/archive/1.0.0.tar.gz
11+
sha256sums = SKIP
12+
13+
pkgname = rust-basic
Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
pkgbase = rust-split
2+
pkgdesc = Split package example
3+
pkgver = 1.0.0
4+
pkgrel = 1
5+
url = https://example.com
6+
arch = x86_64
7+
license = MIT
8+
makedepends = rust
9+
source = source.tar.gz
10+
11+
pkgname = rust-split-bin
12+
pkgdesc = Binary package
13+
depends = glibc
14+
15+
pkgname = rust-split-lib
16+
pkgdesc = Library package
17+
depends = gcc-libs

0 commit comments

Comments
 (0)