Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion examples/pip_parse/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,8 @@ py_test(
"WHEEL_DIST_INFO_CONTENTS": "$(rootpaths @pypi//requests:dist_info)",
"YAMLLINT_ENTRY_POINT": "$(rlocationpath :yamllint)",
},
deps = ["@rules_python//python/runfiles"],
deps = [
"@pypi//libclang",
"@rules_python//python/runfiles",
],
)
1 change: 1 addition & 0 deletions examples/pip_parse/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ s3cmd~=2.1.0
yamllint~=1.28.0
sphinx
sphinxcontrib-serializinghtml
libclang
12 changes: 12 additions & 0 deletions examples/pip_parse/requirements_lock.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ jinja2==3.1.6 \
--hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \
--hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67
# via sphinx
libclang==18.1.1 \
--hash=sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a \
--hash=sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8 \
--hash=sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb \
--hash=sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592 \
--hash=sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f \
--hash=sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5 \
--hash=sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8 \
--hash=sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250 \
--hash=sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b \
--hash=sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe
# via -r requirements.in
markupsafe==2.1.3 \
--hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \
--hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \
Expand Down
12 changes: 12 additions & 0 deletions examples/pip_parse/requirements_windows.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,18 @@ jinja2==3.1.6 \
--hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \
--hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67
# via sphinx
libclang==18.1.1 \
--hash=sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a \
--hash=sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8 \
--hash=sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb \
--hash=sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592 \
--hash=sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f \
--hash=sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5 \
--hash=sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8 \
--hash=sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250 \
--hash=sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b \
--hash=sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe
# via -r requirements.in
markupsafe==2.1.3 \
--hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \
--hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \
Expand Down
11 changes: 11 additions & 0 deletions python/private/pypi/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,16 @@ bzl_library(
srcs = ["whl_config_setting.bzl"],
)

bzl_library(
name = "whl_extract_bzl",
srcs = ["whl_extract.bzl"],
deps = [
":whl_metadata_bzl",
"//python/private:repo_utils_bzl",
"@rules_python_internal//:rules_python_config_bzl",
],
)

bzl_library(
name = "whl_library_alias_bzl",
srcs = ["whl_library_alias.bzl"],
Expand All @@ -435,6 +445,7 @@ bzl_library(
":patch_whl_bzl",
":pep508_requirement_bzl",
":pypi_repo_utils_bzl",
":whl_extract_bzl",
":whl_metadata_bzl",
":whl_target_platforms_bzl",
"//python/private:auth_bzl",
Expand Down
109 changes: 109 additions & 0 deletions python/private/pypi/whl_extract.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
"""A simple whl extractor."""

load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config")
load("//python/private:repo_utils.bzl", "repo_utils")
load(":whl_metadata.bzl", "find_whl_metadata")

def whl_extract(rctx, *, whl_path, logger):
"""Extract whls in Starlark.

Args:
rctx: the repository ctx.
whl_path: the whl path to extract.
logger: The logger to use
"""
install_dir_path = whl_path.dirname.get_child("site-packages")
repo_utils.extract(
rctx,
archive = whl_path,
output = install_dir_path,
supports_whl_extraction = rp_config.supports_whl_extraction,
)
metadata_file = find_whl_metadata(
install_dir = install_dir_path,
logger = logger,
)

# Get the <prefix>.dist_info dir name
dist_info_dir = metadata_file.dirname
rctx.file(
dist_info_dir.get_child("INSTALLER"),
"https://github.com/bazel-contrib/rules_python#pipstar",
)
repo_root_dir = whl_path.dirname

# Get the <prefix>.dist_info dir name
data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
if data_dir.exists:
for prefix, dest_prefix in {
# https://docs.python.org/3/library/sysconfig.html#posix-prefix
# We are taking this from the legacy whl installer config
"data": "data",
"headers": "include",
# In theory there may be directory collisions here, so it would be best to
# merge the paths here. We are doing for quite a few levels deep. What is
# more, this code has to be reasonably efficient because some packages like
# to not put everything to the top level, but to indicate explicitly if
# something is in `platlib` or `purelib` (e.g. libclang wheel).
"platlib": "site-packages",
"purelib": "site-packages",
"scripts": "bin",
}.items():
src = data_dir.get_child(prefix)
if not src.exists:
# The prefix does not exist in the wheel, we can continue
continue

for (src, dest) in merge_trees(src, repo_root_dir.get_child(dest_prefix)):
logger.debug(lambda: "Renaming: {} -> {}".format(src, dest))
rctx.rename(src, dest)

# TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python
# shebang to be something else, for inspiration look at the hermetic
# toolchain wrappers

# Ensure that there is no data dir left
rctx.delete(data_dir)

def merge_trees(src, dest):
"""Merge src into the destination path.

This will attempt to merge-move src files to the destination directory if there are
existing files. Fails at directory depth is 10000 or if there are collisions.

Args:
src: {type}`path` a src path to rename.
dest: {type}`path` a dest path to rename to.

Returns:
A list of tuples for src and destination paths.
"""
ret = []
remaining = [(src, dest)]
collisions = []
for _ in range(10000):
if collisions or not remaining:
break

tmp = []
for (s, d) in remaining:
if not d.exists:
ret.append((s, d))
continue

if not s.is_dir or not d.is_dir:
collisions.append(s)
continue

for file_or_dir in s.readdir():
tmp.append((file_or_dir, d.get_child(file_or_dir.basename)))

remaining = tmp

if remaining:
fail("Exceeded maximum directory depth of 10000 during tree merge.")

if collisions:
fail("Detected collisions between {} and {}: {}".format(src, dest, collisions))

return ret
47 changes: 3 additions & 44 deletions python/private/pypi/whl_library.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ load(":parse_whl_name.bzl", "parse_whl_name")
load(":patch_whl.bzl", "patch_whl")
load(":pep508_requirement.bzl", "requirement")
load(":pypi_repo_utils.bzl", "pypi_repo_utils")
load(":whl_metadata.bzl", "find_whl_metadata", "whl_metadata")
load(":whl_extract.bzl", "whl_extract")
load(":whl_metadata.bzl", "whl_metadata")
load(":whl_target_platforms.bzl", "whl_target_platforms")

_CPPFLAGS = "CPPFLAGS"
Expand Down Expand Up @@ -265,48 +266,6 @@ def _create_repository_execution_environment(rctx, python_interpreter, logger =
env[_CPPFLAGS] = " ".join(cppflags)
return env

def _extract_whl_star(rctx, *, whl_path, logger):
install_dir_path = whl_path.dirname.get_child("site-packages")
repo_utils.extract(
rctx,
archive = whl_path,
output = install_dir_path,
supports_whl_extraction = rp_config.supports_whl_extraction,
)
metadata_file = find_whl_metadata(
install_dir = install_dir_path,
logger = logger,
)

# Get the <prefix>.dist_info dir name
dist_info_dir = metadata_file.dirname
rctx.file(
dist_info_dir.get_child("INSTALLER"),
"https://github.com/bazel-contrib/rules_python#pipstar",
)
repo_root_dir = whl_path.dirname

# Get the <prefix>.dist_info dir name
data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
if data_dir.exists:
for prefix, dest in {
# https://docs.python.org/3/library/sysconfig.html#posix-prefix
# We are taking this from the legacy whl installer config
"data": "data",
"headers": "include",
"platlib": "site-packages",
"purelib": "site-packages",
"scripts": "bin",
}.items():
src = data_dir.get_child(prefix)
dest = repo_root_dir.get_child(dest)
if src.exists:
rctx.rename(src, dest)

# TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python
# shebang to be something else, for inspiration look at the hermetic
# toolchain wrappers

def _extract_whl_py(rctx, *, python_interpreter, args, whl_path, environment, logger):
target_platforms = rctx.attr.experimental_target_platforms or []
if target_platforms:
Expand Down Expand Up @@ -448,7 +407,7 @@ def _whl_library_impl(rctx):
)

if enable_pipstar_extract:
_extract_whl_star(rctx, whl_path = whl_path, logger = logger)
whl_extract(rctx, whl_path = whl_path, logger = logger)
else:
_extract_whl_py(
rctx,
Expand Down