Skip to content

Commit 64bcaf4

Browse files
aignasrickeylev
authored andcommitted
fix(pipstar): correctly handle platlib and purelib in .data (#3501)
Some packages like use `platlib` in the data to put the main files. This PR is implementing correct handling of such packages by recursively merging two trees. If we have any collisions, we will print an error and stop. That is unlikely but better to be safe. Users can patch the failure to be a warning if necessary. In order to make this more testable, move the functions to a separate file. Fixes #3500 Fixes #2949 To be cherry-picked as part of #3466 (cherry picked from commit 62acad6)
1 parent e58f396 commit 64bcaf4

File tree

7 files changed

+152
-45
lines changed

7 files changed

+152
-45
lines changed

examples/pip_parse/BUILD.bazel

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -79,5 +79,8 @@ py_test(
7979
"WHEEL_DIST_INFO_CONTENTS": "$(rootpaths @pypi//requests:dist_info)",
8080
"YAMLLINT_ENTRY_POINT": "$(rlocationpath :yamllint)",
8181
},
82-
deps = ["@rules_python//python/runfiles"],
82+
deps = [
83+
"@pypi//libclang",
84+
"@rules_python//python/runfiles",
85+
],
8386
)

examples/pip_parse/requirements.in

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ s3cmd~=2.1.0
33
yamllint~=1.28.0
44
sphinx
55
sphinxcontrib-serializinghtml
6+
libclang

examples/pip_parse/requirements_lock.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,18 @@ jinja2==3.1.6 \
4242
--hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \
4343
--hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67
4444
# via sphinx
45+
libclang==18.1.1 \
46+
--hash=sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a \
47+
--hash=sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8 \
48+
--hash=sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb \
49+
--hash=sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592 \
50+
--hash=sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f \
51+
--hash=sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5 \
52+
--hash=sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8 \
53+
--hash=sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250 \
54+
--hash=sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b \
55+
--hash=sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe
56+
# via -r requirements.in
4557
markupsafe==2.1.3 \
4658
--hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \
4759
--hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \

examples/pip_parse/requirements_windows.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,18 @@ jinja2==3.1.6 \
4646
--hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \
4747
--hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67
4848
# via sphinx
49+
libclang==18.1.1 \
50+
--hash=sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a \
51+
--hash=sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8 \
52+
--hash=sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb \
53+
--hash=sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592 \
54+
--hash=sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f \
55+
--hash=sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5 \
56+
--hash=sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8 \
57+
--hash=sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250 \
58+
--hash=sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b \
59+
--hash=sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe
60+
# via -r requirements.in
4961
markupsafe==2.1.3 \
5062
--hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \
5163
--hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \

python/private/pypi/BUILD.bazel

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -415,6 +415,16 @@ bzl_library(
415415
srcs = ["whl_config_setting.bzl"],
416416
)
417417

418+
bzl_library(
419+
name = "whl_extract_bzl",
420+
srcs = ["whl_extract.bzl"],
421+
deps = [
422+
":whl_metadata_bzl",
423+
"//python/private:repo_utils_bzl",
424+
"@rules_python_internal//:rules_python_config_bzl",
425+
],
426+
)
427+
418428
bzl_library(
419429
name = "whl_library_alias_bzl",
420430
srcs = ["whl_library_alias.bzl"],
@@ -435,6 +445,7 @@ bzl_library(
435445
":patch_whl_bzl",
436446
":pep508_requirement_bzl",
437447
":pypi_repo_utils_bzl",
448+
":whl_extract_bzl",
438449
":whl_metadata_bzl",
439450
":whl_target_platforms_bzl",
440451
"//python/private:auth_bzl",
Lines changed: 109 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,109 @@
1+
"""A simple whl extractor."""
2+
3+
load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config")
4+
load("//python/private:repo_utils.bzl", "repo_utils")
5+
load(":whl_metadata.bzl", "find_whl_metadata")
6+
7+
def whl_extract(rctx, *, whl_path, logger):
8+
"""Extract whls in Starlark.
9+
10+
Args:
11+
rctx: the repository ctx.
12+
whl_path: the whl path to extract.
13+
logger: The logger to use
14+
"""
15+
install_dir_path = whl_path.dirname.get_child("site-packages")
16+
repo_utils.extract(
17+
rctx,
18+
archive = whl_path,
19+
output = install_dir_path,
20+
supports_whl_extraction = rp_config.supports_whl_extraction,
21+
)
22+
metadata_file = find_whl_metadata(
23+
install_dir = install_dir_path,
24+
logger = logger,
25+
)
26+
27+
# Get the <prefix>.dist_info dir name
28+
dist_info_dir = metadata_file.dirname
29+
rctx.file(
30+
dist_info_dir.get_child("INSTALLER"),
31+
"https://github.com/bazel-contrib/rules_python#pipstar",
32+
)
33+
repo_root_dir = whl_path.dirname
34+
35+
# Get the <prefix>.dist_info dir name
36+
data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
37+
if data_dir.exists:
38+
for prefix, dest_prefix in {
39+
# https://docs.python.org/3/library/sysconfig.html#posix-prefix
40+
# We are taking this from the legacy whl installer config
41+
"data": "data",
42+
"headers": "include",
43+
# In theory there may be directory collisions here, so it would be best to
44+
# merge the paths here. We are doing for quite a few levels deep. What is
45+
# more, this code has to be reasonably efficient because some packages like
46+
# to not put everything to the top level, but to indicate explicitly if
47+
# something is in `platlib` or `purelib` (e.g. libclang wheel).
48+
"platlib": "site-packages",
49+
"purelib": "site-packages",
50+
"scripts": "bin",
51+
}.items():
52+
src = data_dir.get_child(prefix)
53+
if not src.exists:
54+
# The prefix does not exist in the wheel, we can continue
55+
continue
56+
57+
for (src, dest) in merge_trees(src, repo_root_dir.get_child(dest_prefix)):
58+
logger.debug(lambda: "Renaming: {} -> {}".format(src, dest))
59+
rctx.rename(src, dest)
60+
61+
# TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python
62+
# shebang to be something else, for inspiration look at the hermetic
63+
# toolchain wrappers
64+
65+
# Ensure that there is no data dir left
66+
rctx.delete(data_dir)
67+
68+
def merge_trees(src, dest):
69+
"""Merge src into the destination path.
70+
71+
This will attempt to merge-move src files to the destination directory if there are
72+
existing files. Fails at directory depth is 10000 or if there are collisions.
73+
74+
Args:
75+
src: {type}`path` a src path to rename.
76+
dest: {type}`path` a dest path to rename to.
77+
78+
Returns:
79+
A list of tuples for src and destination paths.
80+
"""
81+
ret = []
82+
remaining = [(src, dest)]
83+
collisions = []
84+
for _ in range(10000):
85+
if collisions or not remaining:
86+
break
87+
88+
tmp = []
89+
for (s, d) in remaining:
90+
if not d.exists:
91+
ret.append((s, d))
92+
continue
93+
94+
if not s.is_dir or not d.is_dir:
95+
collisions.append(s)
96+
continue
97+
98+
for file_or_dir in s.readdir():
99+
tmp.append((file_or_dir, d.get_child(file_or_dir.basename)))
100+
101+
remaining = tmp
102+
103+
if remaining:
104+
fail("Exceeded maximum directory depth of 10000 during tree merge.")
105+
106+
if collisions:
107+
fail("Detected collisions between {} and {}: {}".format(src, dest, collisions))
108+
109+
return ret

python/private/pypi/whl_library.bzl

Lines changed: 3 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,8 @@ load(":parse_whl_name.bzl", "parse_whl_name")
2626
load(":patch_whl.bzl", "patch_whl")
2727
load(":pep508_requirement.bzl", "requirement")
2828
load(":pypi_repo_utils.bzl", "pypi_repo_utils")
29-
load(":whl_metadata.bzl", "find_whl_metadata", "whl_metadata")
29+
load(":whl_extract.bzl", "whl_extract")
30+
load(":whl_metadata.bzl", "whl_metadata")
3031
load(":whl_target_platforms.bzl", "whl_target_platforms")
3132

3233
_CPPFLAGS = "CPPFLAGS"
@@ -265,48 +266,6 @@ def _create_repository_execution_environment(rctx, python_interpreter, logger =
265266
env[_CPPFLAGS] = " ".join(cppflags)
266267
return env
267268

268-
def _extract_whl_star(rctx, *, whl_path, logger):
269-
install_dir_path = whl_path.dirname.get_child("site-packages")
270-
repo_utils.extract(
271-
rctx,
272-
archive = whl_path,
273-
output = install_dir_path,
274-
supports_whl_extraction = rp_config.supports_whl_extraction,
275-
)
276-
metadata_file = find_whl_metadata(
277-
install_dir = install_dir_path,
278-
logger = logger,
279-
)
280-
281-
# Get the <prefix>.dist_info dir name
282-
dist_info_dir = metadata_file.dirname
283-
rctx.file(
284-
dist_info_dir.get_child("INSTALLER"),
285-
"https://github.com/bazel-contrib/rules_python#pipstar",
286-
)
287-
repo_root_dir = whl_path.dirname
288-
289-
# Get the <prefix>.dist_info dir name
290-
data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
291-
if data_dir.exists:
292-
for prefix, dest in {
293-
# https://docs.python.org/3/library/sysconfig.html#posix-prefix
294-
# We are taking this from the legacy whl installer config
295-
"data": "data",
296-
"headers": "include",
297-
"platlib": "site-packages",
298-
"purelib": "site-packages",
299-
"scripts": "bin",
300-
}.items():
301-
src = data_dir.get_child(prefix)
302-
dest = repo_root_dir.get_child(dest)
303-
if src.exists:
304-
rctx.rename(src, dest)
305-
306-
# TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python
307-
# shebang to be something else, for inspiration look at the hermetic
308-
# toolchain wrappers
309-
310269
def _extract_whl_py(rctx, *, python_interpreter, args, whl_path, environment, logger):
311270
target_platforms = rctx.attr.experimental_target_platforms or []
312271
if target_platforms:
@@ -448,7 +407,7 @@ def _whl_library_impl(rctx):
448407
)
449408

450409
if enable_pipstar_extract:
451-
_extract_whl_star(rctx, whl_path = whl_path, logger = logger)
410+
whl_extract(rctx, whl_path = whl_path, logger = logger)
452411
else:
453412
_extract_whl_py(
454413
rctx,

0 commit comments

Comments
 (0)