Skip to content

Commit a6810e9

Browse files
committed
fix(pypi): change the parallelisation scheme for querying SimpleAPI
Instead of querying everything in parallel and yielding a lot of 404 warnings, let's query the main index first and then query the other indexes only for the packages that were not yet found. This should make the problem in #2100 not as serious. What is more, we can print the value of for the users to use. Work towards #2100
1 parent b5729b4 commit a6810e9

File tree

3 files changed

+54
-36
lines changed

3 files changed

+54
-36
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,10 @@ Unreleased changes template.
5656
* Bazel 6 support is dropped and Bazel 7.4.1 is the minimum supported
5757
version, per our Bazel support matrix. Earlier versions are not
5858
tested by CI, so functionality cannot be guaranteed.
59+
* ({bzl:obj}`pip.parse`) From now we will make fewer calls to indexes when
60+
fetching the metadata from SimpleAPI. The calls will be done in parallel to
61+
each index separately, so the extension evaluation time might slow down if
62+
not using {bzl:obj}`pip.parse.experimental_index_url_overrides`.
5963

6064
{#v0-0-0-fixed}
6165
### Fixed

python/private/pypi/extension.bzl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -653,6 +653,11 @@ The indexes must support Simple API as described here:
653653
https://packaging.python.org/en/latest/specifications/simple-repository-api/
654654
655655
This is equivalent to `--extra-index-urls` `pip` option.
656+
657+
:::{versionchanged} 1.1.0
658+
Starting with this version we will iterate over each index specified until
659+
we find metadata for all references distributions.
660+
:::
656661
""",
657662
default = [],
658663
),

python/private/pypi/simpleapi_download.bzl

Lines changed: 45 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ load("@bazel_features//:features.bzl", "bazel_features")
2020
load("//python/private:auth.bzl", "get_auth")
2121
load("//python/private:envsubst.bzl", "envsubst")
2222
load("//python/private:normalize_name.bzl", "normalize_name")
23+
load("//python/private:text_util.bzl", "render")
2324
load(":parse_simpleapi_html.bzl", "parse_simpleapi_html")
2425

2526
def simpleapi_download(ctx, *, attr, cache, parallel_download = True):
@@ -64,14 +65,20 @@ def simpleapi_download(ctx, *, attr, cache, parallel_download = True):
6465

6566
# NOTE @aignas 2024-03-31: we are not merging results from multiple indexes
6667
# to replicate how `pip` would handle this case.
67-
async_downloads = {}
6868
contents = {}
6969
index_urls = [attr.index_url] + attr.extra_index_urls
70-
for pkg in attr.sources:
71-
pkg_normalized = normalize_name(pkg)
7270

73-
success = False
74-
for index_url in index_urls:
71+
found_on_index = {}
72+
warn_overrides = False
73+
for i, index_url in enumerate(index_urls):
74+
if i != 0:
75+
# Warn the user about a potential fix for the overrides
76+
warn_overrides = True
77+
78+
async_downloads = {}
79+
sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
80+
for pkg in sources:
81+
pkg_normalized = normalize_name(pkg)
7582
result = _read_simpleapi(
7683
ctx = ctx,
7784
url = "{}/{}/".format(
@@ -84,42 +91,44 @@ def simpleapi_download(ctx, *, attr, cache, parallel_download = True):
8491
)
8592
if hasattr(result, "wait"):
8693
# We will process it in a separate loop:
87-
async_downloads.setdefault(pkg_normalized, []).append(
88-
struct(
89-
pkg_normalized = pkg_normalized,
90-
wait = result.wait,
91-
),
94+
async_downloads[pkg] = struct(
95+
pkg_normalized = pkg_normalized,
96+
wait = result.wait,
9297
)
93-
continue
94-
95-
if result.success:
98+
else:
9699
contents[pkg_normalized] = result.output
97-
success = True
98-
break
99-
100-
if not async_downloads and not success:
101-
fail("Failed to download metadata from urls: {}".format(
102-
", ".join(index_urls),
103-
))
104-
105-
if not async_downloads:
106-
return contents
107-
108-
# If we use `block` == False, then we need to have a second loop that is
109-
# collecting all of the results as they were being downloaded in parallel.
110-
for pkg, downloads in async_downloads.items():
111-
success = False
112-
for download in downloads:
100+
found_on_index[pkg] = index_url
101+
102+
if not async_downloads:
103+
continue
104+
105+
# If we use `block` == False, then we need to have a second loop that is
106+
# collecting all of the results as they were being downloaded in parallel.
107+
for pkg, download in async_downloads.items():
113108
result = download.wait()
114109

115-
if result.success and download.pkg_normalized not in contents:
110+
if result.success:
116111
contents[download.pkg_normalized] = result.output
117-
success = True
118-
119-
if not success:
120-
fail("Failed to download metadata from urls: {}".format(
121-
", ".join(index_urls),
122-
))
112+
found_on_index[pkg] = index_url
113+
114+
failed_sources = [pkg for pkg in attr.sources if pkg not in found_on_index]
115+
if failed_sources:
116+
fail("Failed to download metadata for {} for from urls: {}".format(
117+
failed_sources,
118+
index_urls,
119+
))
120+
121+
if warn_overrides:
122+
index_url_overrides = {
123+
pkg: found_on_index[pkg]
124+
for pkg in attr.sources
125+
if found_on_index[pkg] != attr.index_url
126+
}
127+
128+
# buildifier: disable=print
129+
print("You can use the following `index_url_overrides` to avoid the 404 warnings:\n{}".format(
130+
render.dict(index_url_overrides),
131+
))
123132

124133
return contents
125134

0 commit comments

Comments
 (0)