Skip to content

Commit cdf4f55

Browse files
feat(pypi): generate filegroup with all extracted wheel files (#3011)
Adds a filegroup with all the files that came from the extracted wheel. This has two benefits over using `whl_filegroup`: it avoids copying the wheel and makes the set of files directly visible to the analysis phase. Some wheels are multiple gigabytes in size (e.g. torch, cuda, tensorflow), so avoiding the copy and archive processing saves a decent amount of time. Knowing the specific files at analysis time is generally beneficial. The particular case I ran into was the CC rules were unhappy with a TreeArtifact of header files because they couldn't enforce some check about who was properly providing headers that were included (layering check?). Another example is using the unused_inputs_list optimization, which allows an action to ignore inputs that aren't actually used. e.g. an action could take all the wheel's files as inputs, only care about the headers, and then tell bazel all the non-header files aren't relevant, and thus changes to other files don't re-run the thing that only cares about headers. --------- Co-authored-by: google-labs-jules[bot] <161369871+google-labs-jules[bot]@users.noreply.github.com>
1 parent b2c3926 commit cdf4f55

File tree

9 files changed

+77
-11
lines changed

9 files changed

+77
-11
lines changed

CHANGELOG.md

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,10 @@ END_UNRELEASED_TEMPLATE
9090
* (pypi) To configure the environment for `requirements.txt` evaluation, use the newly added
9191
developer preview of the `pip.default` tag class. Only `rules_python` and root modules can use
9292
this feature. You can also configure custom `config_settings` using `pip.default`.
93+
* (pypi) PyPI dependencies now expose an `:extracted_whl_files` filegroup target
94+
of all the files extracted from the wheel. This can be used in lieu of
95+
{obj}`whl_filegroup` to avoid copying/extracting wheel multiple times to
96+
get a subset of their files.
9397
* (gazelle) New directive `gazelle:python_generate_pyi_deps`; when `true`,
9498
dependencies added to satisfy type-only imports (`if TYPE_CHECKING`) and type
9599
stub packages are added to `pyi_deps` instead of `deps`.

docs/pypi/use.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,16 @@ Note that the hub repo contains the following targets for each package:
4040
* `@pypi//numpy:data` - the {obj}`filegroup` for all of the extra files that are included
4141
as data in the `pkg` target.
4242
* `@pypi//numpy:dist_info` - the {obj}`filegroup` for all of the files in the `<pkg prefix with version>.distinfo` directory.
43+
* `@pypi//numpy:extracted_whl_files` - a {obj}`filegroup` of all the files
44+
extracted from the whl file.
4345
* `@pypi//numpy:whl` - the {obj}`filegroup` that is the `.whl` file itself, which includes all
4446
transitive dependencies via the {attr}`filegroup.data` attribute.
4547

48+
:::{versionadded} VERSION_NEXT_FEATURE
49+
50+
The `:extracted_whl_files` target was added
51+
:::
52+
4653
## Entry points
4754

4855
If you would like to access [entry points][whl_ep], see the `py_console_script_binary` rule documentation,

python/private/pypi/labels.bzl

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,7 @@
1414

1515
"""Constants used by parts of pip_repository for naming libraries and wheels."""
1616

17+
EXTRACTED_WHEEL_FILES = "extracted_whl_files"
1718
WHEEL_FILE_PUBLIC_LABEL = "whl"
1819
WHEEL_FILE_IMPL_LABEL = "_whl"
1920
PY_LIBRARY_PUBLIC_LABEL = "pkg"

python/private/pypi/pkg_aliases.bzl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ load(
7979
":labels.bzl",
8080
"DATA_LABEL",
8181
"DIST_INFO_LABEL",
82+
"EXTRACTED_WHEEL_FILES",
8283
"PY_LIBRARY_IMPL_LABEL",
8384
"PY_LIBRARY_PUBLIC_LABEL",
8485
"WHEEL_FILE_IMPL_LABEL",
@@ -151,6 +152,7 @@ def pkg_aliases(
151152
WHEEL_FILE_PUBLIC_LABEL: WHEEL_FILE_IMPL_LABEL if group_name else WHEEL_FILE_PUBLIC_LABEL,
152153
DATA_LABEL: DATA_LABEL,
153154
DIST_INFO_LABEL: DIST_INFO_LABEL,
155+
EXTRACTED_WHEEL_FILES: EXTRACTED_WHEEL_FILES,
154156
} | {
155157
x: x
156158
for x in extra_aliases or []

python/private/pypi/whl_library.bzl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -248,6 +248,7 @@ def _whl_library_impl(rctx):
248248
environment = _create_repository_execution_environment(rctx, python_interpreter, logger = logger)
249249

250250
whl_path = None
251+
sdist_filename = None
251252
if rctx.attr.whl_file:
252253
rctx.watch(rctx.attr.whl_file)
253254
whl_path = rctx.path(rctx.attr.whl_file)
@@ -277,6 +278,8 @@ def _whl_library_impl(rctx):
277278
if filename.endswith(".whl"):
278279
whl_path = rctx.path(filename)
279280
else:
281+
sdist_filename = filename
282+
280283
# It is an sdist and we need to tell PyPI to use a file in this directory
281284
# and, allow getting build dependencies from PYTHONPATH, which we
282285
# setup in this repository rule, but still download any necessary
@@ -382,6 +385,7 @@ def _whl_library_impl(rctx):
382385

383386
build_file_contents = generate_whl_library_build_bazel(
384387
name = whl_path.basename,
388+
sdist_filename = sdist_filename,
385389
dep_template = rctx.attr.dep_template or "@{}{{name}}//:{{target}}".format(rctx.attr.repo_prefix),
386390
entry_points = entry_points,
387391
metadata_name = metadata.name,
@@ -455,6 +459,7 @@ def _whl_library_impl(rctx):
455459

456460
build_file_contents = generate_whl_library_build_bazel(
457461
name = whl_path.basename,
462+
sdist_filename = sdist_filename,
458463
dep_template = rctx.attr.dep_template or "@{}{{name}}//:{{target}}".format(rctx.attr.repo_prefix),
459464
entry_points = entry_points,
460465
# TODO @aignas 2025-05-17: maybe have a build flag for this instead

python/private/pypi/whl_library_targets.bzl

Lines changed: 37 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ load(
2424
":labels.bzl",
2525
"DATA_LABEL",
2626
"DIST_INFO_LABEL",
27+
"EXTRACTED_WHEEL_FILES",
2728
"PY_LIBRARY_IMPL_LABEL",
2829
"PY_LIBRARY_PUBLIC_LABEL",
2930
"WHEEL_ENTRY_POINT_PREFIX",
@@ -33,6 +34,16 @@ load(
3334
load(":namespace_pkgs.bzl", _create_inits = "create_inits")
3435
load(":pep508_deps.bzl", "deps")
3536

37+
# Files that are special to the Bazel processing of things.
38+
_BAZEL_REPO_FILE_GLOBS = [
39+
"BUILD",
40+
"BUILD.bazel",
41+
"REPO.bazel",
42+
"WORKSPACE",
43+
"WORKSPACE",
44+
"WORKSPACE.bazel",
45+
]
46+
3647
def whl_library_targets_from_requires(
3748
*,
3849
name,
@@ -97,14 +108,12 @@ def whl_library_targets(
97108
*,
98109
name,
99110
dep_template,
111+
sdist_filename = None,
100112
data_exclude = [],
101113
srcs_exclude = [],
102114
tags = [],
103-
filegroups = {
104-
DIST_INFO_LABEL: ["site-packages/*.dist-info/**"],
105-
DATA_LABEL: ["data/**"],
106-
},
107115
dependencies = [],
116+
filegroups = None,
108117
dependencies_by_platform = {},
109118
dependencies_with_markers = {},
110119
group_deps = [],
@@ -129,14 +138,16 @@ def whl_library_targets(
129138
filegroup. This may be also parsed to generate extra metadata.
130139
dep_template: {type}`str` The dep_template to use for dependency
131140
interpolation.
141+
sdist_filename: {type}`str | None` If the wheel was built from an sdist,
142+
the filename of the sdist.
132143
tags: {type}`list[str]` The tags set on the `py_library`.
133144
dependencies: {type}`list[str]` A list of dependencies.
134145
dependencies_by_platform: {type}`dict[str, list[str]]` A list of
135146
dependencies by platform key.
136147
dependencies_with_markers: {type}`dict[str, str]` A marker to evaluate
137148
in order for the dep to be included.
138-
filegroups: {type}`dict[str, list[str]]` A dictionary of the target
139-
names and the glob matches.
149+
filegroups: {type}`dict[str, list[str]] | None` A dictionary of the target
150+
names and the glob matches. If `None`, defaults will be used.
140151
group_name: {type}`str` name of the dependency group (if any) which
141152
contains this library. If set, this library will behave as a shim
142153
to group implementation rules which will provide simultaneously
@@ -169,10 +180,28 @@ def whl_library_targets(
169180
tags = sorted(tags)
170181
data = [] + data
171182

172-
for filegroup_name, glob in filegroups.items():
183+
if filegroups == None:
184+
filegroups = {
185+
EXTRACTED_WHEEL_FILES: dict(
186+
include = ["**"],
187+
exclude = (
188+
_BAZEL_REPO_FILE_GLOBS +
189+
[sdist_filename] if sdist_filename else []
190+
),
191+
),
192+
DIST_INFO_LABEL: dict(
193+
include = ["site-packages/*.dist-info/**"],
194+
),
195+
DATA_LABEL: dict(
196+
include = ["data/**"],
197+
),
198+
}
199+
200+
for filegroup_name, glob_kwargs in filegroups.items():
201+
glob_kwargs = {"allow_empty": True} | glob_kwargs
173202
native.filegroup(
174203
name = filegroup_name,
175-
srcs = native.glob(glob, allow_empty = True),
204+
srcs = native.glob(**glob_kwargs),
176205
visibility = ["//visibility:public"],
177206
)
178207

python/private/whl_filegroup/whl_filegroup.bzl

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,14 @@ cc_library(
4242
includes = ["numpy_includes/numpy/core/include"],
4343
deps = ["@rules_python//python/cc:current_py_cc_headers"],
4444
)
45+
4546
```
47+
48+
:::{seealso}
49+
50+
The `:extracted_whl_files` target, which is a filegroup of all the files
51+
from the already extracted whl file.
52+
:::
4653
""",
4754
attrs = {
4855
"pattern": attr.string(default = "", doc = "Only file paths matching this regex pattern will be extracted."),

tests/pypi/pkg_aliases/pkg_aliases_test.bzl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ def _test_legacy_aliases(env):
4343
"whl": "@repo//:whl",
4444
"data": "@repo//:data",
4545
"dist_info": "@repo//:dist_info",
46+
"extracted_whl_files": "@repo//:extracted_whl_files",
4647
"my_special": "@repo//:my_special",
4748
}
4849

@@ -242,6 +243,10 @@ def _test_group_aliases(env):
242243
"name": "dist_info",
243244
"actual": "@repo//:dist_info",
244245
},
246+
{
247+
"name": "extracted_whl_files",
248+
"actual": "@repo//:extracted_whl_files",
249+
},
245250
{
246251
"name": "pkg",
247252
"actual": "//_groups:my_group_pkg",

tests/pypi/whl_library_targets/whl_library_targets_tests.bzl

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,10 @@ _tests = []
2727
def _test_filegroups(env):
2828
calls = []
2929

30-
def glob(match, *, allow_empty):
30+
def glob(include, *, exclude = [], allow_empty):
31+
_ = exclude # @unused
3132
env.expect.that_bool(allow_empty).equals(True)
32-
return match
33+
return include
3334

3435
whl_library_targets(
3536
name = "",
@@ -41,7 +42,7 @@ def _test_filegroups(env):
4142
rules = struct(),
4243
)
4344

44-
env.expect.that_collection(calls).contains_exactly([
45+
env.expect.that_collection(calls, expr = "filegroup calls").contains_exactly([
4546
{
4647
"name": "dist_info",
4748
"srcs": ["site-packages/*.dist-info/**"],
@@ -52,6 +53,11 @@ def _test_filegroups(env):
5253
"srcs": ["data/**"],
5354
"visibility": ["//visibility:public"],
5455
},
56+
{
57+
"name": "extracted_whl_files",
58+
"srcs": ["**"],
59+
"visibility": ["//visibility:public"],
60+
},
5561
{
5662
"name": "whl",
5763
"srcs": [""],

0 commit comments

Comments
 (0)