|
3 | 3 |
|
4 | 4 | """This module includes build specification and helper classes for PyPI packages.""" |
5 | 5 |
|
| 6 | +import logging |
| 7 | +import os |
| 8 | +import re |
6 | 9 |
|
| 10 | +import tomli |
7 | 11 | from packageurl import PackageURL |
| 12 | +from packaging.requirements import InvalidRequirement, Requirement |
| 13 | +from packaging.utils import InvalidWheelFilename, parse_wheel_filename |
8 | 14 |
|
9 | 15 | from macaron.build_spec_generator.common_spec.base_spec import BaseBuildSpec, BaseBuildSpecDict |
| 16 | +from macaron.config.defaults import defaults |
| 17 | +from macaron.errors import SourceCodeError |
| 18 | +from macaron.slsa_analyzer.package_registry import pypi_registry |
| 19 | +from macaron.slsa_analyzer.specs.package_registry_spec import PackageRegistryInfo |
| 20 | + |
| 21 | +logger: logging.Logger = logging.getLogger(__name__) |
10 | 22 |
|
11 | 23 |
|
12 | 24 | class PyPIBuildSpec( |
@@ -34,3 +46,161 @@ def resolve_fields(self, purl: PackageURL) -> None: |
34 | 46 | purl: str |
35 | 47 | The target software component Package URL. |
36 | 48 | """ |
| 49 | + if purl.type != "pypi": |
| 50 | + return |
| 51 | + |
| 52 | + registry = pypi_registry.PyPIRegistry() |
| 53 | + registry.load_defaults() |
| 54 | + |
| 55 | + registry_info = PackageRegistryInfo( |
| 56 | + build_tool_name="pip", |
| 57 | + build_tool_purl_type="pypi", |
| 58 | + package_registry=registry, |
| 59 | + metadata=[], |
| 60 | + ) |
| 61 | + |
| 62 | + pypi_package_json = pypi_registry.find_or_create_pypi_asset(purl.name, purl.version, registry_info) |
| 63 | + |
| 64 | + if pypi_package_json is not None: |
| 65 | + if pypi_package_json.package_json or pypi_package_json.download(dest=""): |
| 66 | + requires_array: list[str] = [] |
| 67 | + build_backends: dict[str, str] = {} |
| 68 | + python_version_list: list[str] = [] |
| 69 | + try: |
| 70 | + with pypi_package_json.wheel(): |
| 71 | + logger.debug("Wheel at %s", pypi_package_json.wheel_path) |
| 72 | + # Should only have .dist-info directory |
| 73 | + logger.debug("It has directories %s", ",".join(os.listdir(pypi_package_json.wheel_path))) |
| 74 | + wheel_contents, metadata_contents = self.read_directory(pypi_package_json.wheel_path, purl) |
| 75 | + generator, version = self.read_generator_line(wheel_contents) |
| 76 | + if generator != "": |
| 77 | + build_backends[generator] = "==" + version |
| 78 | + if generator != "setuptools": |
| 79 | + # Apply METADATA heuristics to determine setuptools version |
| 80 | + if "License-File" in metadata_contents: |
| 81 | + build_backends["setuptools"] = "==" + defaults.get( |
| 82 | + "heuristic.pypi", "setuptools_version_emitting_license" |
| 83 | + ) |
| 84 | + elif "Platform: UNKNOWN" in metadata_contents: |
| 85 | + build_backends["setuptools"] = "==" + defaults.get( |
| 86 | + "heuristic.pypi", "setuptools_version_emitting_platform_unknown" |
| 87 | + ) |
| 88 | + else: |
| 89 | + build_backends["setuptools"] = "==" + defaults.get( |
| 90 | + "heuristic.pypi", "default_setuptools" |
| 91 | + ) |
| 92 | + except SourceCodeError: |
| 93 | + logger.debug("Could not find pure wheel matching this PURL") |
| 94 | + |
| 95 | + logger.debug("From .dist_info:") |
| 96 | + logger.debug(build_backends) |
| 97 | + |
| 98 | + try: |
| 99 | + with pypi_package_json.sourcecode(): |
| 100 | + try: |
| 101 | + pyproject_content = pypi_package_json.get_sourcecode_file_contents("pyproject.toml") |
| 102 | + content = tomli.loads(pyproject_content.decode("utf-8")) |
| 103 | + build_system: dict[str, list[str]] = content.get("build-system", {}) |
| 104 | + requires_array = build_system.get("requires", []) |
| 105 | + python_version_constraint = content.get("project", {}).get("requires-python") |
| 106 | + if python_version_constraint: |
| 107 | + python_version_list.append(python_version_constraint) |
| 108 | + logger.debug("From pyproject.toml:") |
| 109 | + logger.debug(requires_array) |
| 110 | + except SourceCodeError: |
| 111 | + logger.debug("No pyproject.toml found") |
| 112 | + except SourceCodeError: |
| 113 | + logger.debug("No source distribution found") |
| 114 | + |
| 115 | + # Merge in pyproject.toml information only when the wheel dist_info does not contain the same |
| 116 | + # Hatch is an interesting example of this merge being required. |
| 117 | + for requirement in requires_array: |
| 118 | + try: |
| 119 | + parsed_requirement = Requirement(requirement) |
| 120 | + if parsed_requirement.name not in build_backends: |
| 121 | + build_backends[parsed_requirement.name] = str(parsed_requirement.specifier) |
| 122 | + except InvalidRequirement: |
| 123 | + logger.debug("Malformed requirement encountered:") |
| 124 | + logger.debug(requirement) |
| 125 | + |
| 126 | + logger.debug("Combined:") |
| 127 | + logger.debug(build_backends) |
| 128 | + self.data["build_backends"] = build_backends |
| 129 | + |
| 130 | + if not python_version_list: |
| 131 | + try: |
| 132 | + # Get python version specified in the wheel file name |
| 133 | + logger.debug(pypi_package_json.wheel_filename) |
| 134 | + _, _, _, tags = parse_wheel_filename(pypi_package_json.wheel_filename) |
| 135 | + for tag in tags: |
| 136 | + python_version_list.append(tag.interpreter) |
| 137 | + logger.debug(python_version_list) |
| 138 | + except InvalidWheelFilename: |
| 139 | + logger.debug("Could not parse wheel file name to extract version") |
| 140 | + |
| 141 | + self.data["language_version"] = python_version_list |
| 142 | + |
| 143 | + def read_directory(self, wheel_path: str, purl: PackageURL) -> tuple[str, str]: |
| 144 | + """ |
| 145 | + Read in the WHEEL and METADATA file from the .dist_info directory. |
| 146 | +
|
| 147 | + Parameters |
| 148 | + ---------- |
| 149 | + wheel_path : str |
| 150 | + Path to the temporary directory where the wheel was |
| 151 | + downloaded into. |
| 152 | + purl: PackageURL |
| 153 | + PURL corresponding to the package being analyzed. |
| 154 | +
|
| 155 | + Returns |
| 156 | + ------- |
| 157 | + tuple[str, str] |
| 158 | + Tuple where the first element is a string of the .dist-info/WHEEL |
| 159 | + contents and the second element is a string of the .dist-info/METADATA |
| 160 | + contents |
| 161 | + """ |
| 162 | + # From https://peps.python.org/pep-0427/#escaping-and-unicode |
| 163 | + normalized_name = re.sub(r"[^\w\d.]+", "_", purl.name, re.UNICODE) |
| 164 | + dist_info = f"{normalized_name}-{purl.version}.dist-info" |
| 165 | + logger.debug(dist_info) |
| 166 | + |
| 167 | + dist_info_path = os.path.join(wheel_path, dist_info) |
| 168 | + |
| 169 | + if not os.path.isdir(dist_info_path): |
| 170 | + return "", "" |
| 171 | + |
| 172 | + wheel_path = os.path.join(dist_info_path, "WHEEL") |
| 173 | + metadata_path = os.path.join(dist_info_path, "METADATA") |
| 174 | + |
| 175 | + wheel_contents = "" |
| 176 | + metadata_contents = "" |
| 177 | + |
| 178 | + if os.path.exists(wheel_path): |
| 179 | + with open(wheel_path, encoding="utf-8") as wheel_file: |
| 180 | + wheel_contents = wheel_file.read() |
| 181 | + if os.path.exists(metadata_path): |
| 182 | + with open(metadata_path, encoding="utf-8") as metadata_file: |
| 183 | + metadata_contents = metadata_file.read() |
| 184 | + |
| 185 | + return wheel_contents, metadata_contents |
| 186 | + |
| 187 | + def read_generator_line(self, wheel_contents: str) -> tuple[str, str]: |
| 188 | + """ |
| 189 | + Parse through the "Generator: {build backend} {version}" line of .dist_info/WHEEL. |
| 190 | +
|
| 191 | + Parameters |
| 192 | + ---------- |
| 193 | + wheel_contents : str |
| 194 | + String of the contents of the .dist_info/WHEEL file |
| 195 | +
|
| 196 | + Returns |
| 197 | + ------- |
| 198 | + tuple[str, str] |
| 199 | + Tuple where the first element is the generating build backend and |
| 200 | + the second element is its version. |
| 201 | + """ |
| 202 | + for line in wheel_contents.splitlines(): |
| 203 | + if line.startswith("Generator:"): |
| 204 | + split_line = line.split(" ") |
| 205 | + return split_line[1], split_line[2] |
| 206 | + return "", "" |
0 commit comments