1+ #
2+ # Copyright (c) nexB Inc. and others. All rights reserved.
3+ # ScanCode is a trademark of nexB Inc.
4+ # SPDX-License-Identifier: Apache-2.0
5+ # See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
6+ # See https://github.com/aboutcode-org/scancode-toolkit for support or download.
7+ # See https://aboutcode.org for more information about nexB OSS projects.
8+ #
9+
10+ import logging
11+ import re
12+
13+ from packagedcode import models
14+ from packageurl import PackageURL
15+
16+ """
17+ Handle Arch Linux .SRCINFO files from makepkg.
18+
19+ .SRCINFO files contain package metadata in a simple, unambiguous format.
20+ They are key=value pairs, separated into sections.
21+
22+ See: https://wiki.archlinux.org/title/.SRCINFO
23+ """
24+
25+ logger = logging .getLogger (__name__ )
26+
27+
28+ class SrcinfoHandler (models .DatafileHandler ):
29+ """
30+ Handler for Arch Linux .SRCINFO files.
31+
32+ .SRCINFO files are generated by makepkg and contain package metadata
33+ for the Arch User Repository (AUR) and Arch Linux packages.
34+ """
35+
36+ datasource_id = 'arch_srcinfo'
37+ path_patterns = ('*/.SRCINFO' , '*.SRCINFO' )
38+ default_package_type = 'arch'
39+ default_primary_language = None # Can be any language
40+ description = 'Arch Linux .SRCINFO file'
41+ documentation_url = 'https://wiki.archlinux.org/title/.SRCINFO'
42+
43+ @classmethod
44+ def parse (cls , location ):
45+ """
46+ Parse a .SRCINFO file and return package data.
47+ """
48+ with open (location , 'r' , encoding = 'utf-8' ) as f :
49+ content = f .read ()
50+
51+ srcinfo_data = cls ._parse_srcinfo (content )
52+
53+ if not srcinfo_data :
54+ return
55+
56+ # Get pkgbase (global) section
57+ pkgbase_data = srcinfo_data .get ('pkgbase' , {})
58+
59+ # Get all package sections
60+ packages = srcinfo_data .get ('packages' , [])
61+
62+ if not packages :
63+ # If no explicit packages, create one from pkgbase
64+ packages = [pkgbase_data .copy ()]
65+
66+ # Yield a package for each pkgname section
67+ for pkg_data in packages :
68+ # Merge pkgbase data with package-specific data
69+ # Package-specific values override pkgbase values
70+ merged_data = pkgbase_data .copy ()
71+ merged_data .update (pkg_data )
72+
73+ package = cls ._create_package_from_data (merged_data )
74+ if package :
75+ yield package
76+
77+ @classmethod
78+ def _parse_srcinfo (cls , content ):
79+ """
80+ Parse .SRCINFO content into structured data.
81+
82+ .SRCINFO files have:
83+ - pkgbase section (global metadata)
84+ - One or more pkgname sections (per-package metadata)
85+ """
86+ lines = content .splitlines ()
87+
88+ pkgbase_data = {}
89+ packages = []
90+ current_section = pkgbase_data
91+
92+ for line_num , line in enumerate (lines , 1 ):
93+ # Strip whitespace
94+ line = line .strip ()
95+
96+ # Skip empty lines and comments
97+ if not line or line .startswith ('#' ):
98+ continue
99+
100+ # Parse key = value
101+ if '=' not in line :
102+ logger .debug (f'Line { line_num } : No = found, skipping: { line } ' )
103+ continue
104+
105+ key , _ , value = line .partition ('=' )
106+ key = key .strip ()
107+ value = value .strip ()
108+
109+ # Check for section headers
110+ if key == 'pkgbase' :
111+ pkgbase_data ['pkgbase' ] = value
112+ current_section = pkgbase_data
113+ continue
114+ elif key == 'pkgname' :
115+ # Start new package section
116+ pkg = {'pkgname' : value }
117+ packages .append (pkg )
118+ current_section = pkg
119+ continue
120+
121+ # Handle architecture-specific keys (e.g., depends_x86_64)
122+ arch_match = re .match (r'(.+)_([^_]+)$' , key )
123+ if arch_match :
124+ base_key = arch_match .group (1 )
125+ arch = arch_match .group (2 )
126+ # Store as tuple (value, arch)
127+ if base_key not in current_section :
128+ current_section [base_key ] = []
129+ elif not isinstance (current_section [base_key ], list ):
130+ # Convert existing string value to list
131+ current_section [base_key ] = [current_section [base_key ]]
132+ current_section [base_key ].append ((value , arch ))
133+ else :
134+ # Regular key
135+ if key in current_section :
136+ # Handle multiple values (e.g., multiple depends)
137+ if not isinstance (current_section [key ], list ):
138+ current_section [key ] = [current_section [key ]]
139+ current_section [key ].append (value )
140+ else :
141+ current_section [key ] = value
142+
143+ return {
144+ 'pkgbase' : pkgbase_data ,
145+ 'packages' : packages if packages else [pkgbase_data ]
146+ }
147+
148+ @classmethod
149+ def _create_package_from_data (cls , data ):
150+ """
151+ Create a PackageData object from parsed .SRCINFO data.
152+ """
153+ # Get basic metadata
154+ pkgname = data .get ('pkgname' )
155+ if not pkgname :
156+ pkgname = data .get ('pkgbase' )
157+
158+ if not pkgname :
159+ return None
160+
161+ pkgver = data .get ('pkgver' , '' )
162+ pkgrel = data .get ('pkgrel' , '' )
163+
164+ # Arch Linux version format: pkgver-pkgrel
165+ if pkgver and pkgrel :
166+ version = f'{ pkgver } -{ pkgrel } '
167+ elif pkgver :
168+ version = pkgver
169+ else :
170+ version = None
171+
172+ # Create PackageURL
173+ purl = PackageURL (
174+ type = 'arch' ,
175+ name = pkgname ,
176+ version = version
177+ ).to_string ()
178+
179+ # Extract other metadata
180+ description = data .get ('pkgdesc' , '' )
181+ homepage_url = data .get ('url' )
182+
183+ # Extract licenses
184+ declared_license_expression = None
185+ licenses = data .get ('license' )
186+ if licenses :
187+ if isinstance (licenses , list ):
188+ declared_license_expression = ' AND ' .join (licenses )
189+ else :
190+ declared_license_expression = licenses
191+
192+ # Extract architecture
193+ arch = data .get ('arch' )
194+ if arch :
195+ if isinstance (arch , list ):
196+ arch = ', ' .join (arch )
197+
198+ # Parse dependencies
199+ dependencies = []
200+
201+ # Runtime dependencies
202+ depends = data .get ('depends' , [])
203+ if not isinstance (depends , list ):
204+ depends = [depends ]
205+
206+ for dep in depends :
207+ if isinstance (dep , tuple ):
208+ # Architecture-specific dependency
209+ dep_name , dep_arch = dep
210+ dependencies .append (
211+ models .DependentPackage (
212+ purl = PackageURL (type = 'arch' , name = dep_name .split ('>' )[0 ].split ('<' )[0 ].split ('=' )[0 ].strip ()).to_string (),
213+ extracted_requirement = dep_name ,
214+ scope = f'depends_{ dep_arch } ' ,
215+ is_runtime = True ,
216+ is_optional = False
217+ )
218+ )
219+ else :
220+ dependencies .append (
221+ models .DependentPackage (
222+ purl = PackageURL (type = 'arch' , name = dep .split ('>' )[0 ].split ('<' )[0 ].split ('=' )[0 ].strip ()).to_string (),
223+ extracted_requirement = dep ,
224+ scope = 'depends' ,
225+ is_runtime = True ,
226+ is_optional = False
227+ )
228+ )
229+
230+ # Build dependencies
231+ makedepends = data .get ('makedepends' , [])
232+ if not isinstance (makedepends , list ):
233+ makedepends = [makedepends ]
234+
235+ for dep in makedepends :
236+ if isinstance (dep , tuple ):
237+ dep_name , dep_arch = dep
238+ dependencies .append (
239+ models .DependentPackage (
240+ purl = PackageURL (type = 'arch' , name = dep_name .split ('>' )[0 ].split ('<' )[0 ].split ('=' )[0 ].strip ()).to_string (),
241+ extracted_requirement = dep_name ,
242+ scope = f'makedepends_{ dep_arch } ' ,
243+ is_runtime = False ,
244+ is_optional = False
245+ )
246+ )
247+ else :
248+ dependencies .append (
249+ models .DependentPackage (
250+ purl = PackageURL (type = 'arch' , name = dep .split ('>' )[0 ].split ('<' )[0 ].split ('=' )[0 ].strip ()).to_string (),
251+ extracted_requirement = dep ,
252+ scope = 'makedepends' ,
253+ is_runtime = False ,
254+ is_optional = False
255+ )
256+ )
257+
258+ # Optional dependencies
259+ optdepends = data .get ('optdepends' , [])
260+ if not isinstance (optdepends , list ):
261+ optdepends = [optdepends ]
262+
263+ for dep in optdepends :
264+ # optdepends format: "pkgname: description"
265+ if isinstance (dep , tuple ):
266+ dep_name , dep_arch = dep
267+ pkg_part = dep_name .split (':' )[0 ].strip ()
268+ dependencies .append (
269+ models .DependentPackage (
270+ purl = PackageURL (type = 'arch' , name = pkg_part ).to_string (),
271+ extracted_requirement = dep_name ,
272+ scope = f'optdepends_{ dep_arch } ' ,
273+ is_runtime = True ,
274+ is_optional = True
275+ )
276+ )
277+ else :
278+ pkg_part = dep .split (':' )[0 ].strip ()
279+ dependencies .append (
280+ models .DependentPackage (
281+ purl = PackageURL (type = 'arch' , name = pkg_part ).to_string (),
282+ extracted_requirement = dep ,
283+ scope = 'optdepends' ,
284+ is_runtime = True ,
285+ is_optional = True
286+ )
287+ )
288+
289+ # Build package data
290+ package_data = dict (
291+ datasource_id = cls .datasource_id ,
292+ type = cls .default_package_type ,
293+ name = pkgname ,
294+ version = version ,
295+ description = description ,
296+ homepage_url = homepage_url ,
297+ declared_license_expression = declared_license_expression , # ← FIXED!
298+ dependencies = dependencies ,
299+ purl = purl ,
300+ )
301+
302+
303+ # Store additional metadata in extra_data
304+ extra_data = {}
305+
306+ # Architecture
307+ if arch :
308+ extra_data ['arch' ] = arch
309+
310+ # Sources
311+ source = data .get ('source' )
312+ if source :
313+ extra_data ['source' ] = source if isinstance (source , list ) else [source ]
314+
315+ # Checksums
316+ for checksum_type in ['md5sums' , 'sha1sums' , 'sha256sums' , 'sha512sums' ]:
317+ if checksum_type in data :
318+ checksums = data [checksum_type ]
319+ extra_data [checksum_type ] = checksums if isinstance (checksums , list ) else [checksums ]
320+
321+ # Epoch
322+ if 'epoch' in data :
323+ extra_data ['epoch' ] = data ['epoch' ]
324+
325+ # Conflicts, provides, replaces
326+ for key in ['conflicts' , 'provides' , 'replaces' ]:
327+ if key in data :
328+ values = data [key ]
329+ extra_data [key ] = values if isinstance (values , list ) else [values ]
330+
331+ if extra_data :
332+ package_data ['extra_data' ] = extra_data
333+
334+ return models .PackageData .from_data (package_data , package_only = False )
335+
336+
337+ # Save this as: src/packagedcode/srcinfo.py
0 commit comments