Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion examples/pip_parse/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,8 @@ py_test(
"WHEEL_DIST_INFO_CONTENTS": "$(rootpaths @pypi//requests:dist_info)",
"YAMLLINT_ENTRY_POINT": "$(rlocationpath :yamllint)",
},
deps = ["@rules_python//python/runfiles"],
deps = [
"@pypi//libclang",
"@rules_python//python/runfiles",
],
)
1 change: 1 addition & 0 deletions examples/pip_parse/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@ s3cmd~=2.1.0
yamllint~=1.28.0
sphinx
sphinxcontrib-serializinghtml
libclang
12 changes: 12 additions & 0 deletions examples/pip_parse/requirements_lock.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,18 @@ jinja2==3.1.6 \
--hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \
--hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67
# via sphinx
libclang==18.1.1 \
--hash=sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a \
--hash=sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8 \
--hash=sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb \
--hash=sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592 \
--hash=sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f \
--hash=sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5 \
--hash=sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8 \
--hash=sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250 \
--hash=sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b \
--hash=sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe
# via -r requirements.in
markupsafe==2.1.3 \
--hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \
--hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \
Expand Down
11 changes: 11 additions & 0 deletions python/private/pypi/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,16 @@ bzl_library(
srcs = ["whl_config_setting.bzl"],
)

bzl_library(
name = "whl_extract_bzl",
srcs = ["whl_extract.bzl"],
deps = [
":whl_metadata_bzl",
"//python/private:repo_utils_bzl",
"@rules_python_internal//:rules_python_config_bzl",
],
)

bzl_library(
name = "whl_library_alias_bzl",
srcs = ["whl_library_alias.bzl"],
Expand All @@ -435,6 +445,7 @@ bzl_library(
":patch_whl_bzl",
":pep508_requirement_bzl",
":pypi_repo_utils_bzl",
":whl_extract_bzl",
":whl_metadata_bzl",
":whl_target_platforms_bzl",
"//python/private:auth_bzl",
Expand Down
106 changes: 106 additions & 0 deletions python/private/pypi/whl_extract.bzl
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
"""A simple whl extractor."""

load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config")
load("//python/private:repo_utils.bzl", "repo_utils")
load(":whl_metadata.bzl", "find_whl_metadata")

def whl_extract(rctx, *, whl_path, logger):
"""Extract whls in Starlark.

Args:
rctx: the repository ctx.
whl_path: the whl path to extract.
logger: The logger to use
"""
install_dir_path = whl_path.dirname.get_child("site-packages")
repo_utils.extract(
rctx,
archive = whl_path,
output = install_dir_path,
supports_whl_extraction = rp_config.supports_whl_extraction,
)
metadata_file = find_whl_metadata(
install_dir = install_dir_path,
logger = logger,
)

# Get the <prefix>.dist_info dir name
dist_info_dir = metadata_file.dirname
rctx.file(
dist_info_dir.get_child("INSTALLER"),
"https://github.com/bazel-contrib/rules_python#pipstar",
)
repo_root_dir = whl_path.dirname

# Get the <prefix>.dist_info dir name
data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
if data_dir.exists:
for prefix, dest_prefix in {
# https://docs.python.org/3/library/sysconfig.html#posix-prefix
# We are taking this from the legacy whl installer config
"data": "data",
"headers": "include",
# In theory there may be directory collisions here, so it would be best to
# merge the paths here. We are doing for quite a few levels deep. What is
# more, this code has to be reasonably efficient because some packages like
# to not put everything to the top level, but to indicate explicitly if
# something is in `platlib` or `purelib` (e.g. libclang wheel).
"platlib": "site-packages",
"purelib": "site-packages",
"scripts": "bin",
}.items():
src = data_dir.get_child(prefix)
if not src.exists:
# The prefix does not exist in the wheel, we can continue
continue

for (src, dest) in merge_trees(src, repo_root_dir.get_child(dest_prefix)):
logger.debug(lambda: "Renaming: {} -> {}".format(src, dest))
rctx.rename(src, dest)

# TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python
# shebang to be something else, for inspiration look at the hermetic
# toolchain wrappers

# Ensure that there is no data dir left
rctx.delete(data_dir)

def merge_trees(src, dest):
"""Merge src into the destination path.

This will attempt to merge-move src files to the destination directory if there are
existing files. Fails at directory depth is 10000 or if there are collisions.

Args:
src: {type}`path` a src path to rename.
dest: {type}`path` a dest path to rename to.

Returns:
A list of tuples for src and destination paths.
"""
ret = []
remaining = [(src, dest)]
collisions = []
for _ in range(10000):
if collisions or not remaining:
break

tmp = []
for (src, dest) in remaining:
if not dest.exists:
ret.append((src, dest))
continue

if not src.is_dir:
collisions.append(src)
continue

for f in src.readdir():
tmp.append((f, dest.get_child(f.basename)))

remaining = tmp

if collisions:
fail(lambda: "detected collisions between platlib and purelib data: {}".format(collisions))

return ret
47 changes: 3 additions & 44 deletions python/private/pypi/whl_library.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ load(":parse_whl_name.bzl", "parse_whl_name")
load(":patch_whl.bzl", "patch_whl")
load(":pep508_requirement.bzl", "requirement")
load(":pypi_repo_utils.bzl", "pypi_repo_utils")
load(":whl_metadata.bzl", "find_whl_metadata", "whl_metadata")
load(":whl_extract.bzl", "whl_extract")
load(":whl_metadata.bzl", "whl_metadata")
load(":whl_target_platforms.bzl", "whl_target_platforms")

_CPPFLAGS = "CPPFLAGS"
Expand Down Expand Up @@ -265,48 +266,6 @@ def _create_repository_execution_environment(rctx, python_interpreter, logger =
env[_CPPFLAGS] = " ".join(cppflags)
return env

def _extract_whl_star(rctx, *, whl_path, logger):
install_dir_path = whl_path.dirname.get_child("site-packages")
repo_utils.extract(
rctx,
archive = whl_path,
output = install_dir_path,
supports_whl_extraction = rp_config.supports_whl_extraction,
)
metadata_file = find_whl_metadata(
install_dir = install_dir_path,
logger = logger,
)

# Get the <prefix>.dist_info dir name
dist_info_dir = metadata_file.dirname
rctx.file(
dist_info_dir.get_child("INSTALLER"),
"https://github.com/bazel-contrib/rules_python#pipstar",
)
repo_root_dir = whl_path.dirname

# Get the <prefix>.dist_info dir name
data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data")
if data_dir.exists:
for prefix, dest in {
# https://docs.python.org/3/library/sysconfig.html#posix-prefix
# We are taking this from the legacy whl installer config
"data": "data",
"headers": "include",
"platlib": "site-packages",
"purelib": "site-packages",
"scripts": "bin",
}.items():
src = data_dir.get_child(prefix)
dest = repo_root_dir.get_child(dest)
if src.exists:
rctx.rename(src, dest)

# TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python
# shebang to be something else, for inspiration look at the hermetic
# toolchain wrappers

def _extract_whl_py(rctx, *, python_interpreter, args, whl_path, environment, logger):
target_platforms = rctx.attr.experimental_target_platforms or []
if target_platforms:
Expand Down Expand Up @@ -448,7 +407,7 @@ def _whl_library_impl(rctx):
)

if enable_pipstar_extract:
_extract_whl_star(rctx, whl_path = whl_path, logger = logger)
whl_extract(rctx, whl_path = whl_path, logger = logger)
else:
_extract_whl_py(
rctx,
Expand Down