Skip to content

Commit 98d7003

Browse files
committed
fix: Avoid creating URLs with empty path segments
1 parent 4db0d91 commit 98d7003

File tree

3 files changed

+143
-4
lines changed

3 files changed

+143
-4
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ Unreleased changes template.
5757
{#v0-0-0-fixed}
5858
### Fixed
5959
* (gazelle) Providing multiple input requirements files to `gazelle_python_manifest` now works correctly.
60+
* (bazel downloader) Handle trailing slashes in pip index URLs in environment variables
6061

6162
{#v0-0-0-added}
6263
### Added

python/private/pypi/simpleapi_download.bzl

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,11 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
169169
# them to ctx.download if we want to correctly handle the relative URLs.
170170
# TODO: Add a test that env subbed index urls do not leak into the lock file.
171171

172-
real_url = envsubst(
172+
real_url = strip_empty_path_segments(envsubst(
173173
url,
174174
attr.envsubst,
175175
ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get,
176-
)
176+
))
177177

178178
cache_key = real_url
179179
if cache_key in cache:
@@ -194,11 +194,13 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
194194

195195
output = ctx.path(output_str.strip("_").lower() + ".html")
196196

197+
_get_auth = ctx.get_auth if hasattr(ctx, "get_auth") else get_auth
198+
197199
# NOTE: this may have block = True or block = False in the download_kwargs
198200
download = ctx.download(
199201
url = [real_url],
200202
output = output,
201-
auth = get_auth(ctx, [real_url], ctx_attr = attr),
203+
auth = _get_auth(ctx, [real_url], ctx_attr = attr),
202204
allow_fail = True,
203205
**download_kwargs
204206
)
@@ -211,6 +213,27 @@ def _read_simpleapi(ctx, url, attr, cache, **download_kwargs):
211213

212214
return _read_index_result(ctx, download, output, real_url, cache, cache_key)
213215

216+
def strip_empty_path_segments(url):
217+
"""Removes empty path segments from a URL. Does nothing for urls with no scheme.
218+
219+
Public only for testing.
220+
221+
Args:
222+
url: The url to remove empty path segments from
223+
224+
Returns:
225+
The url with empty path segments removed and any trailing slash preserved.
226+
If the url had no scheme it is returned unchanged.
227+
"""
228+
scheme, _, rest = url.partition("://")
229+
if rest == "":
230+
return url
231+
stripped = "/".join([p for p in rest.split("/") if p])
232+
if url.endswith("/"):
233+
return "{}://{}/".format(scheme, stripped)
234+
else:
235+
return "{}://{}".format(scheme, stripped)
236+
214237
def _read_index_result(ctx, result, output, url, cache, cache_key):
215238
if not result.success:
216239
return struct(success = False)

tests/pypi/simpleapi_download/simpleapi_download_tests.bzl

Lines changed: 116 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
""
1616

1717
load("@rules_testing//lib:test_suite.bzl", "test_suite")
18-
load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download") # buildifier: disable=bzl-visibility
18+
load("//python/private/pypi:simpleapi_download.bzl", "simpleapi_download", "strip_empty_path_segments") # buildifier: disable=bzl-visibility
1919

2020
_tests = []
2121

@@ -119,6 +119,121 @@ def _test_fail(env):
119119

120120
_tests.append(_test_fail)
121121

122+
def _test_download_url(env):
123+
downloads = {}
124+
125+
def download(url, output, **kwargs):
126+
_ = kwargs # buildifier: disable=unused-variable
127+
downloads[url[0]] = output
128+
return struct(success = True)
129+
130+
simpleapi_download(
131+
ctx = struct(
132+
os = struct(environ = {}),
133+
download = download,
134+
read = lambda i: "contents of " + i,
135+
path = lambda i: "path/for/" + i,
136+
get_auth = lambda ctx, urls, ctx_attr: struct(),
137+
),
138+
attr = struct(
139+
index_url_overrides = {},
140+
index_url = "https://example.com/main/simple/",
141+
extra_index_urls = [],
142+
sources = ["foo", "bar", "baz"],
143+
envsubst = [],
144+
),
145+
cache = {},
146+
parallel_download = False,
147+
)
148+
149+
env.expect.that_dict(downloads).contains_exactly({
150+
"https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
151+
"https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
152+
"https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
153+
})
154+
155+
_tests.append(_test_download_url)
156+
157+
def _test_download_url_parallel(env):
158+
downloads = {}
159+
160+
def download(url, output, **kwargs):
161+
_ = kwargs # buildifier: disable=unused-variable
162+
downloads[url[0]] = output
163+
return struct(wait = lambda: struct(success = True))
164+
165+
simpleapi_download(
166+
ctx = struct(
167+
os = struct(environ = {}),
168+
download = download,
169+
read = lambda i: "contents of " + i,
170+
path = lambda i: "path/for/" + i,
171+
get_auth = lambda ctx, urls, ctx_attr: struct(),
172+
),
173+
attr = struct(
174+
index_url_overrides = {},
175+
index_url = "https://example.com/main/simple/",
176+
extra_index_urls = [],
177+
sources = ["foo", "bar", "baz"],
178+
envsubst = [],
179+
),
180+
cache = {},
181+
parallel_download = True,
182+
)
183+
184+
env.expect.that_dict(downloads).contains_exactly({
185+
"https://example.com/main/simple/foo/": "path/for/https___example_com_main_simple_foo.html",
186+
"https://example.com/main/simple/bar/": "path/for/https___example_com_main_simple_bar.html",
187+
"https://example.com/main/simple/baz/": "path/for/https___example_com_main_simple_baz.html",
188+
})
189+
190+
_tests.append(_test_download_url_parallel)
191+
192+
def _test_download_envsubst_url(env):
193+
downloads = {}
194+
195+
def download(url, output, **kwargs):
196+
_ = kwargs # buildifier: disable=unused-variable
197+
downloads[url[0]] = output
198+
return struct(success = True)
199+
200+
simpleapi_download(
201+
ctx = struct(
202+
os = struct(environ = {"INDEX_URL": "https://example.com/main/simple/"}),
203+
download = download,
204+
read = lambda i: "contents of " + i,
205+
path = lambda i: "path/for/" + i,
206+
get_auth = lambda ctx, urls, ctx_attr: struct(),
207+
),
208+
attr = struct(
209+
index_url_overrides = {},
210+
index_url = "$INDEX_URL",
211+
extra_index_urls = [],
212+
sources = ["foo", "bar", "baz"],
213+
envsubst = ["INDEX_URL"],
214+
),
215+
cache = {},
216+
parallel_download = False,
217+
)
218+
219+
env.expect.that_dict(downloads).contains_exactly({
220+
"https://example.com/main/simple/foo/": "path/for/~index_url~_foo.html",
221+
"https://example.com/main/simple/bar/": "path/for/~index_url~_bar.html",
222+
"https://example.com/main/simple/baz/": "path/for/~index_url~_baz.html",
223+
})
224+
225+
_tests.append(_test_download_envsubst_url)
226+
227+
def _test_strip_empty_path_segments(env):
228+
env.expect.that_str(strip_empty_path_segments("no/scheme//is/unchanged")).equals("no/scheme//is/unchanged")
229+
env.expect.that_str(strip_empty_path_segments("scheme://with/no/empty/segments")).equals("scheme://with/no/empty/segments")
230+
env.expect.that_str(strip_empty_path_segments("scheme://with//empty/segments")).equals("scheme://with/empty/segments")
231+
env.expect.that_str(strip_empty_path_segments("scheme://with///multiple//empty/segments")).equals("scheme://with/multiple/empty/segments")
232+
env.expect.that_str(strip_empty_path_segments("scheme://with//trailing/slash/")).equals("scheme://with/trailing/slash/")
233+
env.expect.that_str(strip_empty_path_segments("scheme://with/trailing/slashes///")).equals("scheme://with/trailing/slashes/")
234+
235+
_tests.append(_test_strip_empty_path_segments)
236+
122237
def simpleapi_download_test_suite(name):
123238
"""Create the test suite.
124239

0 commit comments

Comments
 (0)