diff --git a/examples/pip_parse/BUILD.bazel b/examples/pip_parse/BUILD.bazel index 6ed8d26286..37a25fe873 100644 --- a/examples/pip_parse/BUILD.bazel +++ b/examples/pip_parse/BUILD.bazel @@ -79,5 +79,8 @@ py_test( "WHEEL_DIST_INFO_CONTENTS": "$(rootpaths @pypi//requests:dist_info)", "YAMLLINT_ENTRY_POINT": "$(rlocationpath :yamllint)", }, - deps = ["@rules_python//python/runfiles"], + deps = [ + "@pypi//libclang", + "@rules_python//python/runfiles", + ], ) diff --git a/examples/pip_parse/requirements.in b/examples/pip_parse/requirements.in index 9d9e766d21..e4af3b1efe 100644 --- a/examples/pip_parse/requirements.in +++ b/examples/pip_parse/requirements.in @@ -3,3 +3,4 @@ s3cmd~=2.1.0 yamllint~=1.28.0 sphinx sphinxcontrib-serializinghtml +libclang diff --git a/examples/pip_parse/requirements_lock.txt b/examples/pip_parse/requirements_lock.txt index dc34b45a45..13a2bba1e6 100644 --- a/examples/pip_parse/requirements_lock.txt +++ b/examples/pip_parse/requirements_lock.txt @@ -42,6 +42,18 @@ jinja2==3.1.6 \ --hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \ --hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67 # via sphinx +libclang==18.1.1 \ + --hash=sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a \ + --hash=sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8 \ + --hash=sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb \ + --hash=sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592 \ + --hash=sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f \ + --hash=sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5 \ + --hash=sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8 \ + --hash=sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250 \ + --hash=sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b \ + --hash=sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe + # via -r requirements.in markupsafe==2.1.3 \ --hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \ --hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \ diff --git a/examples/pip_parse/requirements_windows.txt b/examples/pip_parse/requirements_windows.txt index 78c1a45690..7a1329d521 100644 --- a/examples/pip_parse/requirements_windows.txt +++ b/examples/pip_parse/requirements_windows.txt @@ -46,6 +46,18 @@ jinja2==3.1.6 \ --hash=sha256:0137fb05990d35f1275a587e9aee6d56da821fc83491a0fb838183be43f66d6d \ --hash=sha256:85ece4451f492d0c13c5dd7c13a64681a86afae63a5f347908daf103ce6d2f67 # via sphinx +libclang==18.1.1 \ + --hash=sha256:0b2e143f0fac830156feb56f9231ff8338c20aecfe72b4ffe96f19e5a1dbb69a \ + --hash=sha256:3f0e1f49f04d3cd198985fea0511576b0aee16f9ff0e0f0cad7f9c57ec3c20e8 \ + --hash=sha256:4dd2d3b82fab35e2bf9ca717d7b63ac990a3519c7e312f19fa8e86dcc712f7fb \ + --hash=sha256:54dda940a4a0491a9d1532bf071ea3ef26e6dbaf03b5000ed94dd7174e8f9592 \ + --hash=sha256:69f8eb8f65c279e765ffd28aaa7e9e364c776c17618af8bff22a8df58677ff4f \ + --hash=sha256:6f14c3f194704e5d09769108f03185fce7acaf1d1ae4bbb2f30a72c2400cb7c5 \ + --hash=sha256:83ce5045d101b669ac38e6da8e58765f12da2d3aafb3b9b98d88b286a60964d8 \ + --hash=sha256:a1214966d08d73d971287fc3ead8dfaf82eb07fb197680d8b3859dbbbbf78250 \ + --hash=sha256:c533091d8a3bbf7460a00cb6c1a71da93bffe148f172c7d03b1c31fbf8aa2a0b \ + --hash=sha256:cf4a99b05376513717ab5d82a0db832c56ccea4fd61a69dbb7bccf2dfb207dbe + # via -r requirements.in markupsafe==2.1.3 \ --hash=sha256:05fb21170423db021895e1ea1e1f3ab3adb85d1c2333cbc2310f2a26bc77272e \ --hash=sha256:0a4e4a1aff6c7ac4cd55792abf96c915634c2b97e3cc1c7129578aa68ebd754e \ diff --git a/python/private/pypi/BUILD.bazel b/python/private/pypi/BUILD.bazel index aa96cf86a5..8194bb520d 100644 --- a/python/private/pypi/BUILD.bazel +++ b/python/private/pypi/BUILD.bazel @@ -415,6 +415,16 @@ bzl_library( srcs = ["whl_config_setting.bzl"], ) +bzl_library( + name = "whl_extract_bzl", + srcs = ["whl_extract.bzl"], + deps = [ + ":whl_metadata_bzl", + "//python/private:repo_utils_bzl", + "@rules_python_internal//:rules_python_config_bzl", + ], +) + bzl_library( name = "whl_library_alias_bzl", srcs = ["whl_library_alias.bzl"], @@ -435,6 +445,7 @@ bzl_library( ":patch_whl_bzl", ":pep508_requirement_bzl", ":pypi_repo_utils_bzl", + ":whl_extract_bzl", ":whl_metadata_bzl", ":whl_target_platforms_bzl", "//python/private:auth_bzl", diff --git a/python/private/pypi/whl_extract.bzl b/python/private/pypi/whl_extract.bzl new file mode 100644 index 0000000000..6b2e0507ac --- /dev/null +++ b/python/private/pypi/whl_extract.bzl @@ -0,0 +1,109 @@ +"""A simple whl extractor.""" + +load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config") +load("//python/private:repo_utils.bzl", "repo_utils") +load(":whl_metadata.bzl", "find_whl_metadata") + +def whl_extract(rctx, *, whl_path, logger): + """Extract whls in Starlark. + + Args: + rctx: the repository ctx. + whl_path: the whl path to extract. + logger: The logger to use + """ + install_dir_path = whl_path.dirname.get_child("site-packages") + repo_utils.extract( + rctx, + archive = whl_path, + output = install_dir_path, + supports_whl_extraction = rp_config.supports_whl_extraction, + ) + metadata_file = find_whl_metadata( + install_dir = install_dir_path, + logger = logger, + ) + + # Get the .dist_info dir name + dist_info_dir = metadata_file.dirname + rctx.file( + dist_info_dir.get_child("INSTALLER"), + "https://github.com/bazel-contrib/rules_python#pipstar", + ) + repo_root_dir = whl_path.dirname + + # Get the .dist_info dir name + data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data") + if data_dir.exists: + for prefix, dest_prefix in { + # https://docs.python.org/3/library/sysconfig.html#posix-prefix + # We are taking this from the legacy whl installer config + "data": "data", + "headers": "include", + # In theory there may be directory collisions here, so it would be best to + # merge the paths here. We are doing for quite a few levels deep. What is + # more, this code has to be reasonably efficient because some packages like + # to not put everything to the top level, but to indicate explicitly if + # something is in `platlib` or `purelib` (e.g. libclang wheel). + "platlib": "site-packages", + "purelib": "site-packages", + "scripts": "bin", + }.items(): + src = data_dir.get_child(prefix) + if not src.exists: + # The prefix does not exist in the wheel, we can continue + continue + + for (src, dest) in merge_trees(src, repo_root_dir.get_child(dest_prefix)): + logger.debug(lambda: "Renaming: {} -> {}".format(src, dest)) + rctx.rename(src, dest) + + # TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python + # shebang to be something else, for inspiration look at the hermetic + # toolchain wrappers + + # Ensure that there is no data dir left + rctx.delete(data_dir) + +def merge_trees(src, dest): + """Merge src into the destination path. + + This will attempt to merge-move src files to the destination directory if there are + existing files. Fails at directory depth is 10000 or if there are collisions. + + Args: + src: {type}`path` a src path to rename. + dest: {type}`path` a dest path to rename to. + + Returns: + A list of tuples for src and destination paths. + """ + ret = [] + remaining = [(src, dest)] + collisions = [] + for _ in range(10000): + if collisions or not remaining: + break + + tmp = [] + for (s, d) in remaining: + if not d.exists: + ret.append((s, d)) + continue + + if not s.is_dir or not d.is_dir: + collisions.append(s) + continue + + for file_or_dir in s.readdir(): + tmp.append((file_or_dir, d.get_child(file_or_dir.basename))) + + remaining = tmp + + if remaining: + fail("Exceeded maximum directory depth of 10000 during tree merge.") + + if collisions: + fail("Detected collisions between {} and {}: {}".format(src, dest, collisions)) + + return ret diff --git a/python/private/pypi/whl_library.bzl b/python/private/pypi/whl_library.bzl index c368dea733..3c4b6beeaf 100644 --- a/python/private/pypi/whl_library.bzl +++ b/python/private/pypi/whl_library.bzl @@ -26,7 +26,8 @@ load(":parse_whl_name.bzl", "parse_whl_name") load(":patch_whl.bzl", "patch_whl") load(":pep508_requirement.bzl", "requirement") load(":pypi_repo_utils.bzl", "pypi_repo_utils") -load(":whl_metadata.bzl", "find_whl_metadata", "whl_metadata") +load(":whl_extract.bzl", "whl_extract") +load(":whl_metadata.bzl", "whl_metadata") load(":whl_target_platforms.bzl", "whl_target_platforms") _CPPFLAGS = "CPPFLAGS" @@ -265,48 +266,6 @@ def _create_repository_execution_environment(rctx, python_interpreter, logger = env[_CPPFLAGS] = " ".join(cppflags) return env -def _extract_whl_star(rctx, *, whl_path, logger): - install_dir_path = whl_path.dirname.get_child("site-packages") - repo_utils.extract( - rctx, - archive = whl_path, - output = install_dir_path, - supports_whl_extraction = rp_config.supports_whl_extraction, - ) - metadata_file = find_whl_metadata( - install_dir = install_dir_path, - logger = logger, - ) - - # Get the .dist_info dir name - dist_info_dir = metadata_file.dirname - rctx.file( - dist_info_dir.get_child("INSTALLER"), - "https://github.com/bazel-contrib/rules_python#pipstar", - ) - repo_root_dir = whl_path.dirname - - # Get the .dist_info dir name - data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data") - if data_dir.exists: - for prefix, dest in { - # https://docs.python.org/3/library/sysconfig.html#posix-prefix - # We are taking this from the legacy whl installer config - "data": "data", - "headers": "include", - "platlib": "site-packages", - "purelib": "site-packages", - "scripts": "bin", - }.items(): - src = data_dir.get_child(prefix) - dest = repo_root_dir.get_child(dest) - if src.exists: - rctx.rename(src, dest) - - # TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python - # shebang to be something else, for inspiration look at the hermetic - # toolchain wrappers - def _extract_whl_py(rctx, *, python_interpreter, args, whl_path, environment, logger): target_platforms = rctx.attr.experimental_target_platforms or [] if target_platforms: @@ -448,7 +407,7 @@ def _whl_library_impl(rctx): ) if enable_pipstar_extract: - _extract_whl_star(rctx, whl_path = whl_path, logger = logger) + whl_extract(rctx, whl_path = whl_path, logger = logger) else: _extract_whl_py( rctx,