Skip to content
Merged
Show file tree
Hide file tree
Changes from 5 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,14 @@ Unreleased changes template.

{#v0-0-0-added}
### Added
* (pypi) From now on `sha256` values in the `requirements.txt` is no longer
mandatory when enabling {attr}`pip.parse.experimental_index_url` feature.
This means that `rules_python` will attempt to fetch metadata for all
packages through SimpleAPI unless they are pulled through direct URL
references. Fixes [#2023](https://github.com/bazel-contrib/rules_python/issues/2023).
In case you see issues with `rules_python` being too eager to fetch the SimpleAPI
metadata, you can use the newly added {attr}`pip.parse.experimental_skip_sources`
to skip metadata fetching for those packages.
* (uv) A {obj}`lock` rule that is the replacement for the
{obj}`compile_pip_requirements`. This may still have rough corners
so please report issues with it in the
Expand Down
12 changes: 7 additions & 5 deletions docs/pypi-dependencies.md
Original file line number Diff line number Diff line change
Expand Up @@ -386,11 +386,13 @@ This does not mean that `rules_python` is fetching the wheels eagerly, but it
rather means that it is calling the PyPI server to get the Simple API response
to get the list of all available source and wheel distributions. Once it has
got all of the available distributions, it will select the right ones depending
on the `sha256` values in your `requirements_lock.txt` file. The compatible
distribution URLs will be then written to the `MODULE.bazel.lock` file. Currently
users wishing to use the lock file with `rules_python` with this feature have
to set an environment variable `RULES_PYTHON_OS_ARCH_LOCK_FILE=0` which will
become default in the next release.
on the `sha256` values in your `requirements_lock.txt` file. If `sha256` hashes
are not present in the requirements file, we will fallback to matching by version
specified in the lock file. The compatible distribution URLs will be then
written to the `MODULE.bazel.lock` file. Currently users wishing to use the
lock file with `rules_python` with this feature have to set an environment
variable `RULES_PYTHON_OS_ARCH_LOCK_FILE=0` which will become default in the
next release.

Fetching the distribution information from the PyPI allows `rules_python` to
know which `whl` should be used on which target platform and it will determine
Expand Down
27 changes: 26 additions & 1 deletion python/private/pypi/extension.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -477,13 +477,21 @@ You cannot use both the additive_build_content and additive_build_content_file a
get_index_urls = None
if pip_attr.experimental_index_url:
is_reproducible = False
skip_sources = [
normalize_name(s)
for s in pip_attr.simpleapi_skip
]
get_index_urls = lambda ctx, distributions: simpleapi_download(
ctx,
attr = struct(
index_url = pip_attr.experimental_index_url,
extra_index_urls = pip_attr.experimental_extra_index_urls or [],
index_url_overrides = pip_attr.experimental_index_url_overrides or {},
sources = distributions,
sources = [
d
for d in distributions
if normalize_name(d) not in skip_sources
],
envsubst = pip_attr.envsubst,
# Auth related info
netrc = pip_attr.netrc,
Expand Down Expand Up @@ -700,6 +708,11 @@ This is equivalent to `--index-url` `pip` option.
If {attr}`download_only` is set, then `sdist` archives will be discarded and `pip.parse` will
operate in wheel-only mode.
:::

:::{versionchanged} VERSION_NEXT_FEATURE
Index metadata will be used to deduct `sha256` values for packages even if the
`sha256` values are not present in the requirements.txt lock file.
:::
""",
),
"experimental_index_url_overrides": attr.string_dict(
Expand Down Expand Up @@ -767,6 +780,18 @@ The Python version the dependencies are targetting, in Major.Minor format
If an interpreter isn't explicitly provided (using `python_interpreter` or
`python_interpreter_target`), then the version specified here must have
a corresponding `python.toolchain()` configured.
""",
),
"simpleapi_skip": attr.string_list(
doc = """\
The list of packages to skip fetching metadata for from SimpleAPI index. You should
normally not need this attribute, but in case you do, please report this as a bug
to `rules_python` and use this attribute until the bug is fixed.

EXPERIMENTAL: this may be removed without notice.

:::{versionadded} VERSION_NEXT_FEATURE
:::
""",
),
"whl_modifications": attr.label_keyed_string_dict(
Expand Down
15 changes: 10 additions & 5 deletions python/private/pypi/parse_requirements.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def parse_requirements(
req.distribution: None
for reqs in requirements_by_platform.values()
for req in reqs.values()
if req.srcs.shas
if not req.srcs.url
}),
)

Expand Down Expand Up @@ -315,10 +315,15 @@ def _add_dists(*, requirement, index_urls, logger = None):
whls = []
sdist = None

# TODO @aignas 2024-05-22: it is in theory possible to add all
# requirements by version instead of by sha256. This may be useful
# for some projects.
for sha256 in requirement.srcs.shas:
# First try to find distributions by SHA256 if provided
shas_to_use = requirement.srcs.shas
if not shas_to_use:
version = requirement.srcs.version
shas_to_use = index_urls.sha256s_by_version.get(version, [])
if logger:
logger.warn(lambda: "requirement file has been generated without hashes, will use all hashes for the given version {} that could find on the index:\n {}".format(version, shas_to_use))

for sha256 in shas_to_use:
# For now if the artifact is marked as yanked we just ignore it.
#
# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
Expand Down
35 changes: 33 additions & 2 deletions python/private/pypi/parse_simpleapi_html.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def parse_simpleapi_html(*, url, content):
Returns:
A list of structs with:
* filename: The filename of the artifact.
* version: The version of the artifact.
* url: The URL to download the artifact.
* sha256: The sha256 of the artifact.
* metadata_sha256: The whl METADATA sha256 if we can download it. If this is
Expand All @@ -51,15 +52,20 @@ def parse_simpleapi_html(*, url, content):

# Each line follows the following pattern
# <a href="https://...#sha256=..." attribute1="foo" ... attributeN="bar">filename</a><br />
sha256_by_version = {}
for line in lines[1:]:
dist_url, _, tail = line.partition("#sha256=")
dist_url = _absolute_url(url, dist_url)

sha256, _, tail = tail.partition("\"")

# See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api
yanked = "data-yanked" in line

head, _, _ = tail.rpartition("</a>")
maybe_metadata, _, filename = head.rpartition(">")
version = _version(filename)
sha256_by_version.setdefault(version, []).append(sha256)

metadata_sha256 = ""
metadata_url = ""
Expand All @@ -75,7 +81,8 @@ def parse_simpleapi_html(*, url, content):
if filename.endswith(".whl"):
whls[sha256] = struct(
filename = filename,
url = _absolute_url(url, dist_url),
version = version,
url = dist_url,
sha256 = sha256,
metadata_sha256 = metadata_sha256,
metadata_url = _absolute_url(url, metadata_url) if metadata_url else "",
Expand All @@ -84,7 +91,8 @@ def parse_simpleapi_html(*, url, content):
else:
sdists[sha256] = struct(
filename = filename,
url = _absolute_url(url, dist_url),
version = version,
url = dist_url,
sha256 = sha256,
metadata_sha256 = "",
metadata_url = "",
Expand All @@ -94,8 +102,31 @@ def parse_simpleapi_html(*, url, content):
return struct(
sdists = sdists,
whls = whls,
sha256_by_version = sha256_by_version,
)

_SDIST_EXTS = [
".tar", # handles any compression
".zip",
]

def _version(filename):
# See https://packaging.python.org/en/latest/specifications/binary-distribution-format/#binary-distribution-format

_, _, tail = filename.partition("-")
version, _, _ = tail.partition("-")
if version != tail:
# The format is {name}-{version}-{whl_specifiers}.whl
return version

# NOTE @aignas 2025-03-29: most of the files are wheels, so this is not the common path

# {name}-{version}.{ext}
for ext in _SDIST_EXTS:
version, _, _ = version.partition(ext) # build or name

return version

def _get_root_directory(url):
scheme_end = url.find("://")
if scheme_end == -1:
Expand Down
15 changes: 11 additions & 4 deletions python/private/pypi/simpleapi_download.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -127,10 +127,17 @@ def simpleapi_download(

failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
if failed_sources:
_fail("Failed to download metadata for {} for from urls: {}".format(
failed_sources,
index_urls,
))
_fail(
"\n".join([
"Failed to download metadata for {} for from urls: {}.".format(
failed_sources,
index_urls,
),
"If you would like to skip downloading metadata for these packages please add 'simpleapi_skip={}' to your 'pip.parse' call.".format(
render.list(failed_sources),
),
]),
)
return None

if warn_overrides:
Expand Down
Loading