|
| 1 | +"""A simple whl extractor.""" |
| 2 | + |
| 3 | +load("@rules_python_internal//:rules_python_config.bzl", rp_config = "config") |
| 4 | +load("//python/private:repo_utils.bzl", "repo_utils") |
| 5 | +load(":whl_metadata.bzl", "find_whl_metadata") |
| 6 | + |
| 7 | +def whl_extract(rctx, *, whl_path, logger): |
| 8 | + """Extract whls in Starlark. |
| 9 | +
|
| 10 | + Args: |
| 11 | + rctx: the repository ctx. |
| 12 | + whl_path: the whl path to extract. |
| 13 | + logger: The logger to use |
| 14 | + """ |
| 15 | + install_dir_path = whl_path.dirname.get_child("site-packages") |
| 16 | + repo_utils.extract( |
| 17 | + rctx, |
| 18 | + archive = whl_path, |
| 19 | + output = install_dir_path, |
| 20 | + supports_whl_extraction = rp_config.supports_whl_extraction, |
| 21 | + ) |
| 22 | + metadata_file = find_whl_metadata( |
| 23 | + install_dir = install_dir_path, |
| 24 | + logger = logger, |
| 25 | + ) |
| 26 | + |
| 27 | + # Get the <prefix>.dist_info dir name |
| 28 | + dist_info_dir = metadata_file.dirname |
| 29 | + rctx.file( |
| 30 | + dist_info_dir.get_child("INSTALLER"), |
| 31 | + "https://github.com/bazel-contrib/rules_python#pipstar", |
| 32 | + ) |
| 33 | + repo_root_dir = whl_path.dirname |
| 34 | + |
| 35 | + # Get the <prefix>.dist_info dir name |
| 36 | + data_dir = dist_info_dir.dirname.get_child(dist_info_dir.basename[:-len(".dist-info")] + ".data") |
| 37 | + if data_dir.exists: |
| 38 | + for prefix, dest_prefix in { |
| 39 | + # https://docs.python.org/3/library/sysconfig.html#posix-prefix |
| 40 | + # We are taking this from the legacy whl installer config |
| 41 | + "data": "data", |
| 42 | + "headers": "include", |
| 43 | + # In theory there may be directory collisions here, so it would be best to |
| 44 | + # merge the paths here. We are doing for quite a few levels deep. What is |
| 45 | + # more, this code has to be reasonably efficient because some packages like |
| 46 | + # to not put everything to the top level, but to indicate explicitly if |
| 47 | + # something is in `platlib` or `purelib` (e.g. libclang wheel). |
| 48 | + "platlib": "site-packages", |
| 49 | + "purelib": "site-packages", |
| 50 | + "scripts": "bin", |
| 51 | + }.items(): |
| 52 | + src = data_dir.get_child(prefix) |
| 53 | + if not src.exists: |
| 54 | + # The prefix does not exist in the wheel, we can continue |
| 55 | + continue |
| 56 | + |
| 57 | + for (src, dest) in merge_trees(src, repo_root_dir.get_child(dest_prefix)): |
| 58 | + logger.debug(lambda: "Renaming: {} -> {}".format(src, dest)) |
| 59 | + rctx.rename(src, dest) |
| 60 | + |
| 61 | + # TODO @aignas 2025-12-16: when moving scripts to `bin`, rewrite the #!python |
| 62 | + # shebang to be something else, for inspiration look at the hermetic |
| 63 | + # toolchain wrappers |
| 64 | + |
| 65 | + # Ensure that there is no data dir left |
| 66 | + rctx.delete(data_dir) |
| 67 | + |
| 68 | +def merge_trees(src, dest): |
| 69 | + """Merge src into the destination path. |
| 70 | +
|
| 71 | + This will attempt to merge-move src files to the destination directory if there are |
| 72 | + existing files. Fails at directory depth is 10000 or if there are collisions. |
| 73 | +
|
| 74 | + Args: |
| 75 | + src: {type}`path` a src path to rename. |
| 76 | + dest: {type}`path` a dest path to rename to. |
| 77 | +
|
| 78 | + Returns: |
| 79 | + A list of tuples for src and destination paths. |
| 80 | + """ |
| 81 | + ret = [] |
| 82 | + remaining = [(src, dest)] |
| 83 | + collisions = [] |
| 84 | + for _ in range(10000): |
| 85 | + if collisions or not remaining: |
| 86 | + break |
| 87 | + |
| 88 | + tmp = [] |
| 89 | + for (s, d) in remaining: |
| 90 | + if not d.exists: |
| 91 | + ret.append((s, d)) |
| 92 | + continue |
| 93 | + |
| 94 | + if not s.is_dir or not d.is_dir: |
| 95 | + collisions.append(s) |
| 96 | + continue |
| 97 | + |
| 98 | + for file_or_dir in s.readdir(): |
| 99 | + tmp.append((file_or_dir, d.get_child(file_or_dir.basename))) |
| 100 | + |
| 101 | + remaining = tmp |
| 102 | + |
| 103 | + if remaining: |
| 104 | + fail("Exceeded maximum directory depth of 10000 during tree merge.") |
| 105 | + |
| 106 | + if collisions: |
| 107 | + fail("Detected collisions between {} and {}: {}".format(src, dest, collisions)) |
| 108 | + |
| 109 | + return ret |
0 commit comments