Skip to content

Commit 350e4a4

Browse files
sethmlarsonpre-commit-ci[bot]auvipymayeutCopilot
authored
Generate SBOMs for repaired libraries (#577)
* Initial support for generating SBOMs for repaired libraries * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Use pathlib, add whichprovides to vendor * Also log error output * Give glob.glob() a string instead of Path * Add an integration test for included SBOMs * Make assertions more broad for exact software IDs * Add better messages to asserts * Fix dependencies for all policies * Skip unsupported policies * Allow both Ubuntu and CentOS * Update tests/integration/test_manylinux.py Co-authored-by: Matthieu Darbois <[email protected]> * Update tests/integration/test_manylinux.py Co-authored-by: Matthieu Darbois <[email protected]> * Update .pre-commit-config.yaml Co-authored-by: Matthieu Darbois <[email protected]> * Update src/auditwheel/repair.py Co-authored-by: Matthieu Darbois <[email protected]> * Add 'file_name' qualifier to PURL * Use 'repaired_wheel' for full tags list * Add unit tests for sbom module * add assertion message Co-authored-by: Copilot <[email protected]> * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Asif Saif Uddin <[email protected]> Co-authored-by: Matthieu Darbois <[email protected]> Co-authored-by: Copilot <[email protected]>
1 parent 909b1bd commit 350e4a4

File tree

8 files changed

+669
-10
lines changed

8 files changed

+669
-10
lines changed
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
MIT License
2+
3+
Copyright (c) 2025 Seth Larson
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a copy
6+
of this software and associated documentation files (the "Software"), to deal
7+
in the Software without restriction, including without limitation the rights
8+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
copies of the Software, and to permit persons to whom the Software is
10+
furnished to do so, subject to the following conditions:
11+
12+
The above copyright notice and this permission notice shall be included in all
13+
copies or substantial portions of the Software.
14+
15+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
SOFTWARE.
Lines changed: 359 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,359 @@
1+
# SPDX-License-Identifier: MIT
2+
3+
"""
4+
Module which provides (heh) 'yum provides'
5+
functionality across many package managers.
6+
"""
7+
8+
import dataclasses
9+
import pathlib
10+
import re
11+
import shutil
12+
import subprocess
13+
import sys
14+
import typing
15+
from urllib.parse import quote
16+
17+
__all__ = ["ProvidedBy", "whichprovides"]
18+
__version__ = "0.4.0"
19+
20+
_OS_RELEASE_LINES_RE = re.compile(r"^([A-Z_]+)=(?:\"([^\"]*)\"|(.*))$", re.MULTILINE)
21+
_APK_WHO_OWNS_RE = re.compile(r" is owned by ([^\s\-]+)-([^\s]+)$", re.MULTILINE)
22+
_DPKG_SEARCH_RE = re.compile(r"^([^:]+):")
23+
_DPKG_VERSION_RE = re.compile(r"^Version: ([^\s]+)", re.MULTILINE)
24+
_APT_FILE_SEARCH_RE = re.compile(r"^([^:]+): (.+)$", re.MULTILINE)
25+
26+
27+
@dataclasses.dataclass
28+
class ProvidedBy:
29+
package_type: str
30+
package_name: str
31+
package_version: str
32+
distro: typing.Union[str, None] = None
33+
34+
@property
35+
def purl(self) -> str:
36+
"""The Package URL (PURL) of the providing package"""
37+
38+
def _quote_purl(value: str) -> str:
39+
"""
40+
Quotes according to PURL rules which are different from
41+
typical URL percent encoding.
42+
"""
43+
return quote(value, safe="")
44+
45+
# PURL disallows many characters in the package type field.
46+
if not re.match(r"^[a-zA-Z0-9\+\-\.]+$", self.package_type):
47+
raise ValueError("Package type must be ASCII letters, numbers, +, -, and .")
48+
49+
parts = ["pkg:", self.package_type.lower(), "/"]
50+
if self.distro:
51+
parts.extend((_quote_purl(self.distro), "/"))
52+
parts.extend(
53+
(_quote_purl(self.package_name), "@", _quote_purl(self.package_version))
54+
)
55+
return "".join(parts)
56+
57+
58+
class PackageProvider:
59+
# Order in which the provider should be resolved.
60+
# Lower is attempted earlier than higher numbers.
61+
_resolve_order: int = 0
62+
_has_bin_cache: dict[str, typing.Union[str, bool]] = {}
63+
64+
@staticmethod
65+
def os_release() -> dict[str, str]:
66+
"""Dumb method of finding os-release information."""
67+
try:
68+
with open("/etc/os-release") as f:
69+
os_release = {}
70+
for name, value_quoted, value_unquoted in _OS_RELEASE_LINES_RE.findall(
71+
f.read()
72+
):
73+
value = value_quoted if value_quoted else value_unquoted
74+
os_release[name] = value
75+
return os_release
76+
except OSError:
77+
return {}
78+
79+
@staticmethod
80+
def distro() -> typing.Optional[str]:
81+
return PackageProvider.os_release().get("ID", None)
82+
83+
@classmethod
84+
def which(
85+
cls, bin: str, *, allowed_returncodes: typing.Optional[set[int]] = None
86+
) -> typing.Optional[str]:
87+
"""which, but tries to execute the program, too!"""
88+
cached_bin = cls._has_bin_cache.get(bin)
89+
assert cached_bin is not True
90+
if cached_bin is False:
91+
return None
92+
if cached_bin is not None:
93+
return cached_bin
94+
bin_which = shutil.which(bin)
95+
if bin_which is None: # Cache the 'not-found' result.
96+
cls._has_bin_cache[bin] = False
97+
return None
98+
try:
99+
subprocess.check_call(
100+
[bin_which, "--version"],
101+
stdout=subprocess.DEVNULL,
102+
stderr=subprocess.DEVNULL,
103+
)
104+
cls._has_bin_cache[bin] = bin_which
105+
return bin_which
106+
except subprocess.CalledProcessError as e:
107+
# If running --version returns an non-zero exit we
108+
# explicitly allow that here.
109+
if allowed_returncodes and e.returncode in allowed_returncodes:
110+
cls._has_bin_cache[bin] = bin_which
111+
return bin_which
112+
cls._has_bin_cache[bin] = False
113+
return None
114+
115+
@classmethod
116+
def is_available(cls) -> bool:
117+
return False
118+
119+
@classmethod
120+
def whichprovides(cls, filepaths: typing.Collection[str]) -> dict[str, ProvidedBy]:
121+
raise NotImplementedError()
122+
123+
124+
class _SinglePackageProvider(PackageProvider):
125+
"""Abstract PackageProvider for single-filepath APIs"""
126+
127+
@classmethod
128+
def whichprovides(cls, filepaths: typing.Collection[str]) -> dict[str, ProvidedBy]:
129+
results = {}
130+
for filepath in filepaths:
131+
if provided_by := cls.whichprovides1(filepath):
132+
results[filepath] = provided_by
133+
return results
134+
135+
@classmethod
136+
def whichprovides1(cls, filepath: str) -> typing.Optional[ProvidedBy]:
137+
raise NotImplementedError()
138+
139+
140+
class ApkPackageProvider(_SinglePackageProvider):
141+
@classmethod
142+
def is_available(cls) -> bool:
143+
return bool(cls.which("apk") and cls.distro())
144+
145+
@classmethod
146+
def whichprovides1(cls, filepath: str) -> typing.Optional[ProvidedBy]:
147+
apk_bin = cls.which("apk")
148+
distro = cls.distro()
149+
assert apk_bin is not None and distro is not None
150+
try:
151+
# $ apk info --who-owns /bin/bash
152+
# /bin/bash is owned by bash-5.2.26-r0
153+
stdout = subprocess.check_output(
154+
[apk_bin, "info", "--who-owns", str(filepath)],
155+
stderr=subprocess.DEVNULL,
156+
).decode()
157+
if match := _APK_WHO_OWNS_RE.search(stdout):
158+
return ProvidedBy(
159+
package_type="apk",
160+
distro=distro,
161+
package_name=match.group(1),
162+
package_version=match.group(2),
163+
)
164+
except subprocess.CalledProcessError:
165+
pass
166+
return None
167+
168+
169+
class RpmPackageProvider(_SinglePackageProvider):
170+
@classmethod
171+
def is_available(cls) -> bool:
172+
return bool(cls.which("rpm") and cls.distro())
173+
174+
@classmethod
175+
def whichprovides1(cls, filepath: str) -> typing.Optional[ProvidedBy]:
176+
rpm_bin = cls.which("rpm")
177+
distro = cls.distro()
178+
assert rpm_bin is not None and distro is not None
179+
try:
180+
# $ rpm -qf --queryformat "%{NAME} %{VERSION} %{RELEASE} ${ARCH}" /bin/bash
181+
# bash 4.4.20 4.el8_6
182+
stdout = subprocess.check_output(
183+
[
184+
rpm_bin,
185+
"-qf",
186+
"--queryformat",
187+
"%{NAME} %{VERSION} %{RELEASE} %{ARCH}",
188+
str(filepath),
189+
],
190+
stderr=subprocess.DEVNULL,
191+
).decode()
192+
package_name, package_version, package_release, *_ = stdout.strip().split(
193+
" ", 4
194+
)
195+
return ProvidedBy(
196+
package_type="rpm",
197+
distro=distro,
198+
package_name=package_name,
199+
package_version=f"{package_version}-{package_release}",
200+
)
201+
except subprocess.CalledProcessError:
202+
pass
203+
return None
204+
205+
206+
class DpkgPackageProvider(_SinglePackageProvider):
207+
@classmethod
208+
def is_available(cls) -> bool:
209+
return bool(cls.which("dpkg") and cls.distro())
210+
211+
@classmethod
212+
def whichprovides1(cls, filepath: str) -> typing.Optional[ProvidedBy]:
213+
dpkg_bin = cls.which("dpkg")
214+
distro = cls.distro()
215+
assert dpkg_bin is not None and distro is not None
216+
try:
217+
# $ dpkg -S /bin/bash
218+
# bash: /bin/bash
219+
stdout = subprocess.check_output(
220+
[dpkg_bin, "-S", str(filepath)],
221+
stderr=subprocess.DEVNULL,
222+
).decode()
223+
if match := _DPKG_SEARCH_RE.search(stdout):
224+
package_name = match.group(1)
225+
# $ dpkg -s bash
226+
# ...
227+
# Version: 5.1-6ubuntu1.1
228+
stdout = subprocess.check_output(
229+
[dpkg_bin, "-s", package_name],
230+
stderr=subprocess.DEVNULL,
231+
).decode()
232+
if match := _DPKG_VERSION_RE.search(stdout):
233+
return ProvidedBy(
234+
package_type="deb",
235+
distro=distro,
236+
package_name=package_name,
237+
package_version=match.group(1),
238+
)
239+
except subprocess.CalledProcessError:
240+
pass
241+
return None
242+
243+
244+
class AptFilePackageProvider(PackageProvider):
245+
# apt-file is slow, so resolve this one later.
246+
_resolve_order = 100
247+
248+
@classmethod
249+
def is_available(cls) -> bool:
250+
return bool(
251+
cls.which("apt")
252+
and cls.which("apt-file", allowed_returncodes={2})
253+
and cls.distro()
254+
)
255+
256+
@classmethod
257+
def whichprovides(cls, filepaths: typing.Collection[str]) -> dict[str, ProvidedBy]:
258+
apt_bin = cls.which("apt")
259+
apt_file_bin = cls.which("apt-file", allowed_returncodes={2})
260+
distro = cls.distro()
261+
assert apt_bin is not None and apt_file_bin is not None and distro is not None
262+
results = {}
263+
try:
264+
# $ echo '\n'.join(paths) | apt-file search --from-file -
265+
# Finding relevant cache files to search ...
266+
# ...
267+
# libwebpdemux2: /usr/lib/x86_64-linux-gnu/libwebpdemux.so.2.0.9
268+
stdout = subprocess.check_output(
269+
[apt_file_bin, "search", "--from-file", "-"],
270+
stderr=subprocess.DEVNULL,
271+
input=b"\n".join(
272+
[str(filepath).encode("utf-8") for filepath in filepaths]
273+
),
274+
).decode()
275+
for package_name, filepath in _APT_FILE_SEARCH_RE.findall(stdout):
276+
stdout = subprocess.check_output(
277+
[apt_bin, "show", package_name],
278+
stderr=subprocess.DEVNULL,
279+
).decode()
280+
if match := _DPKG_VERSION_RE.search(stdout):
281+
package_version = match.group(1)
282+
results[filepath] = ProvidedBy(
283+
package_type="deb",
284+
distro=distro,
285+
package_name=package_name,
286+
package_version=package_version,
287+
)
288+
except subprocess.CalledProcessError:
289+
pass
290+
return results
291+
292+
293+
def _package_providers() -> list[type[PackageProvider]]:
294+
"""Returns a list of package providers sorted in
295+
the order that they should be attempted.
296+
"""
297+
298+
def all_subclasses(cls):
299+
subclasses = set()
300+
for subcls in cls.__subclasses__():
301+
subclasses.add(subcls)
302+
subclasses |= all_subclasses(subcls)
303+
return subclasses
304+
305+
return sorted(all_subclasses(PackageProvider), key=lambda p: p._resolve_order)
306+
307+
308+
def whichprovides(filepath: typing.Union[str, list[str]]) -> dict[str, ProvidedBy]:
309+
"""Return a package URL (PURL) for the package that provides a file"""
310+
if isinstance(filepath, str):
311+
filepaths = [filepath]
312+
else:
313+
filepaths = filepath
314+
315+
# Link between the original path to the resolved
316+
# path and then allocate a structure for results.
317+
resolved_filepaths = {
318+
str(pathlib.Path(filepath).resolve()): filepath for filepath in filepaths
319+
}
320+
filepath_provided_by: dict[str, ProvidedBy] = {}
321+
for package_provider in _package_providers():
322+
remaining = set(resolved_filepaths) - set(filepath_provided_by)
323+
if not remaining:
324+
break
325+
if not package_provider.is_available():
326+
continue
327+
results = package_provider.whichprovides(remaining)
328+
filepath_provided_by.update(results)
329+
330+
return {
331+
resolved_filepaths[filepath]: value
332+
for filepath, value in filepath_provided_by.items()
333+
}
334+
335+
336+
def _main():
337+
if len(sys.argv) < 2:
338+
print(
339+
"Must provide one or more path argument "
340+
"('$ python -m whichprovides <paths>')",
341+
file=sys.stderr,
342+
)
343+
sys.exit(1)
344+
345+
filepaths = sys.argv[1:]
346+
provided_bys = whichprovides(filepaths)
347+
exit_code = 0
348+
for filepath in filepaths:
349+
provided_by = provided_bys.get(filepath)
350+
if provided_by:
351+
print(f"{filepath}: {provided_by.purl}")
352+
else:
353+
print(f"No known package providing {filepath}", file=sys.stderr)
354+
exit_code = 1
355+
sys.exit(exit_code)
356+
357+
358+
if __name__ == "__main__":
359+
_main()
Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from . import _main
2+
3+
_main()

0 commit comments

Comments
 (0)