Skip to content

Commit 1340415

Browse files
feat: generate build specification for pure python wheels (#1221)
Signed-off-by: Abhinav Pradeep <[email protected]>
1 parent 4172e54 commit 1340415

File tree

13 files changed

+370
-10
lines changed

13 files changed

+370
-10
lines changed

src/macaron/build_spec_generator/common_spec/base_spec.py

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,7 @@ class BaseBuildSpecDict(TypedDict, total=False):
5050
newline: NotRequired[str]
5151

5252
#: The version of the programming language or runtime, e.g., '11' for JDK, '3.11' for Python.
53-
language_version: Required[str]
53+
language_version: Required[list[str]]
5454

5555
#: List of release dependencies.
5656
dependencies: NotRequired[list[str]]
@@ -73,6 +73,11 @@ class BaseBuildSpecDict(TypedDict, total=False):
7373
#: Entry point script, class, or binary for running the project.
7474
entry_point: NotRequired[str | None]
7575

76+
#: A "back end" is tool that a "front end" (such as pip/build) would call to
77+
#: package the source distribution into the wheel format. build_backends would
78+
#: be a list of these that were used in building the wheel alongside their version.
79+
build_backends: NotRequired[dict[str, str]]
80+
7681

7782
class BaseBuildSpec(ABC):
7883
"""Abstract base class for build specification behavior and field resolution."""

src/macaron/build_spec_generator/common_spec/core.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -442,7 +442,7 @@ def gen_generic_build_spec(
442442
"git_repo": latest_component_repository.remote_path,
443443
"git_tag": latest_component_repository.commit_sha,
444444
"newline": "lf",
445-
"language_version": lang_version or "",
445+
"language_version": [lang_version] if lang_version else [],
446446
"ecosystem": purl.type,
447447
"purl": str(purl),
448448
"language": target_language,

src/macaron/build_spec_generator/common_spec/maven_spec.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,12 +58,14 @@ def resolve_fields(self, purl: PackageURL) -> None:
5858
jdk_from_jar or "Cannot find any.",
5959
)
6060

61+
existing = self.data["language_version"][0] if self.data["language_version"] else None
62+
6163
# Select JDK from jar or another source, with a default of version 8.
62-
selected_jdk_version = jdk_from_jar or self.data["language_version"] if self.data["language_version"] else "8"
64+
selected_jdk_version = jdk_from_jar or existing if existing else "8"
6365

6466
major_jdk_version = normalize_jdk_version(selected_jdk_version)
6567
if not major_jdk_version:
6668
logger.error("Failed to obtain the major version of %s", selected_jdk_version)
6769
return
6870

69-
self.data["language_version"] = major_jdk_version
71+
self.data["language_version"] = [major_jdk_version]

src/macaron/build_spec_generator/common_spec/pypi_spec.py

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,10 +3,22 @@
33

44
"""This module includes build specification and helper classes for PyPI packages."""
55

6+
import logging
7+
import os
8+
import re
69

10+
import tomli
711
from packageurl import PackageURL
12+
from packaging.requirements import InvalidRequirement, Requirement
13+
from packaging.utils import InvalidWheelFilename, parse_wheel_filename
814

915
from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict
16+
from macaron.config.defaults import defaults
17+
from macaron.errors import SourceCodeError
18+
from macaron.slsa_analyzer.package_registry import pypi_registry
19+
from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo
20+
21+
logger: logging.Logger = logging.getLogger(__name__)
1022

1123

1224
class PyPIBuildSpec(
@@ -34,3 +46,161 @@ def resolve_fields(self, purl: PackageURL) -> None:
3446
purl: str
3547
The target software component Package URL.
3648
"""
49+
if purl.type != "pypi":
50+
return
51+
52+
registry = pypi_registry.PyPIRegistry()
53+
registry.load_defaults()
54+
55+
registry_info = PackageRegistryInfo(
56+
build_tool_name="pip",
57+
build_tool_purl_type="pypi",
58+
package_registry=registry,
59+
metadata=[],
60+
)
61+
62+
pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info)
63+
64+
if pypi_package_json is not None:
65+
if pypi_package_json.package_json or pypi_package_json.download(dest=""):
66+
requires_array: list[str] = []
67+
build_backends: dict[str, str] = {}
68+
python_version_list: list[str] = []
69+
try:
70+
with pypi_package_json.wheel():
71+
logger.debug("Wheel at %s", pypi_package_json.wheel_path)
72+
# Should only have .dist-info directory
73+
logger.debug("It has directories %s", ",".join(os.listdir(pypi_package_json.wheel_path)))
74+
wheel_contents, metadata_contents = self.read_directory(pypi_package_json.wheel_path, purl)
75+
generator, version = self.read_generator_line(wheel_contents)
76+
if generator != "":
77+
build_backends[generator] = "==" + version
78+
if generator != "setuptools":
79+
# Apply METADATA heuristics to determine setuptools version
80+
if "License-File" in metadata_contents:
81+
build_backends["setuptools"] = "==" + defaults.get(
82+
"heuristic.pypi", "setuptools_version_emitting_license"
83+
)
84+
elif "Platform: UNKNOWN" in metadata_contents:
85+
build_backends["setuptools"] = "==" + defaults.get(
86+
"heuristic.pypi", "setuptools_version_emitting_platform_unknown"
87+
)
88+
else:
89+
build_backends["setuptools"] = "==" + defaults.get(
90+
"heuristic.pypi", "default_setuptools"
91+
)
92+
except SourceCodeError:
93+
logger.debug("Could not find pure wheel matching this PURL")
94+
95+
logger.debug("From .dist_info:")
96+
logger.debug(build_backends)
97+
98+
try:
99+
with pypi_package_json.sourcecode():
100+
try:
101+
pyproject_content = pypi_package_json.get_sourcecode_file_contents("pyproject.toml")
102+
content = tomli.loads(pyproject_content.decode("utf-8"))
103+
build_system: dict[str, list[str]] = content.get("build-system", {})
104+
requires_array = build_system.get("requires", [])
105+
python_version_constraint = content.get("project", {}).get("requires-python")
106+
if python_version_constraint:
107+
python_version_list.append(python_version_constraint)
108+
logger.debug("From pyproject.toml:")
109+
logger.debug(requires_array)
110+
except SourceCodeError:
111+
logger.debug("No pyproject.toml found")
112+
except SourceCodeError:
113+
logger.debug("No source distribution found")
114+
115+
# Merge in pyproject.toml information only when the wheel dist_info does not contain the same
116+
# Hatch is an interesting example of this merge being required.
117+
for requirement in requires_array:
118+
try:
119+
parsed_requirement = Requirement(requirement)
120+
if parsed_requirement.name not in build_backends:
121+
build_backends[parsed_requirement.name] = str(parsed_requirement.specifier)
122+
except InvalidRequirement:
123+
logger.debug("Malformed requirement encountered:")
124+
logger.debug(requirement)
125+
126+
logger.debug("Combined:")
127+
logger.debug(build_backends)
128+
self.data["build_backends"] = build_backends
129+
130+
if not python_version_list:
131+
try:
132+
# Get python version specified in the wheel file name
133+
logger.debug(pypi_package_json.wheel_filename)
134+
_, _, _, tags = parse_wheel_filename(pypi_package_json.wheel_filename)
135+
for tag in tags:
136+
python_version_list.append(tag.interpreter)
137+
logger.debug(python_version_list)
138+
except InvalidWheelFilename:
139+
logger.debug("Could not parse wheel file name to extract version")
140+
141+
self.data["language_version"] = python_version_list
142+
143+
def read_directory(self, wheel_path: str, purl: PackageURL) -> tuple[str, str]:
144+
"""
145+
Read in the WHEEL and METADATA file from the .dist_info directory.
146+
147+
Parameters
148+
----------
149+
wheel_path : str
150+
Path to the temporary directory where the wheel was
151+
downloaded into.
152+
purl: PackageURL
153+
PURL corresponding to the package being analyzed.
154+
155+
Returns
156+
-------
157+
tuple[str, str]
158+
Tuple where the first element is a string of the .dist-info/WHEEL
159+
contents and the second element is a string of the .dist-info/METADATA
160+
contents
161+
"""
162+
# From https://peps.python.org/pep-0427/#escaping-and-unicode
163+
normalized_name = re.sub(r"[^\w\d.]+", "_", purl.name, re.UNICODE)
164+
dist_info = f"{normalized_name}-{purl.version}.dist-info"
165+
logger.debug(dist_info)
166+
167+
dist_info_path = os.path.join(wheel_path, dist_info)
168+
169+
if not os.path.isdir(dist_info_path):
170+
return "", ""
171+
172+
wheel_path = os.path.join(dist_info_path, "WHEEL")
173+
metadata_path = os.path.join(dist_info_path, "METADATA")
174+
175+
wheel_contents = ""
176+
metadata_contents = ""
177+
178+
if os.path.exists(wheel_path):
179+
with open(wheel_path, encoding="utf-8") as wheel_file:
180+
wheel_contents = wheel_file.read()
181+
if os.path.exists(metadata_path):
182+
with open(metadata_path, encoding="utf-8") as metadata_file:
183+
metadata_contents = metadata_file.read()
184+
185+
return wheel_contents, metadata_contents
186+
187+
def read_generator_line(self, wheel_contents: str) -> tuple[str, str]:
188+
"""
189+
Parse through the "Generator: {build backend} {version}" line of .dist_info/WHEEL.
190+
191+
Parameters
192+
----------
193+
wheel_contents : str
194+
String of the contents of the .dist_info/WHEEL file
195+
196+
Returns
197+
-------
198+
tuple[str, str]
199+
Tuple where the first element is the generating build backend and
200+
the second element is its version.
201+
"""
202+
for line in wheel_contents.splitlines():
203+
if line.startswith("Generator:"):
204+
split_line = line.split(" ")
205+
return split_line[1], split_line[2]
206+
return "", ""

src/macaron/build_spec_generator/reproducible_central/reproducible_central.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ def gen_reproducible_central_build_spec(build_spec: BaseBuildSpecDict) -> str |
9595
"tool": ReproducibleCentralBuildTool[build_spec["build_tool"].upper()].value,
9696
"newline": build_spec["newline"],
9797
"buildinfo": f"target/{build_spec['artifact_id']}-{build_spec['version']}.buildinfo",
98-
"jdk": build_spec["language_version"],
98+
"jdk": build_spec["language_version"][0],
9999
"command": compose_shell_commands(build_spec["build_commands"]),
100100
}
101101

src/macaron/config/defaults.ini

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -644,3 +644,9 @@ custom_semgrep_rules_path =
644644
# .yaml prefix. Note, this will be ignored if a path to custom semgrep rules is not provided. This list may not contain
645645
# duplicated elements, meaning that ruleset names must be unique.
646646
disabled_custom_rulesets =
647+
# As per https://peps.python.org/pep-0639/appendix-examples/, presumably most versions < 59.1.1 will work here
648+
setuptools_version_emitting_license = 56.2.0
649+
# TODO: Investigate if other versions would be suitable
650+
setuptools_version_emitting_platform_unknown = 57.5.0
651+
# TODO: Investigate if other versions would be suitable
652+
default_setuptools = 67.7.2

src/macaron/repo_finder/repo_finder_pypi.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ def find_repo(
5858
pypi_registry = next((registry for registry in PACKAGE_REGISTRIES if isinstance(registry, PyPIRegistry)), None)
5959
if not pypi_registry:
6060
return "", RepoFinderInfo.PYPI_NO_REGISTRY
61-
pypi_asset = PyPIPackageJsonAsset(purl.name, purl.version, False, pypi_registry, {}, "")
61+
pypi_asset = PyPIPackageJsonAsset(purl.name, purl.version, False, pypi_registry, {}, "", "", "")
6262

6363
if not pypi_asset:
6464
# This should be unreachable, as the pypi_registry has already been confirmed to be of type PyPIRegistry.

0 commit comments

Comments
 (0)