Skip to content

Commit 608ddb7

Browse files
authored
refactor(whl_library): move bazel file generation to Starlark (#1336)
Before this PR, the `wheel_installer` was doing three things: 1. Downloading the right wheel. 2. Extracting it into the output directory. 3. Generating BUILD.bazel files based on the extracted contents. This PR is moving the third part into the `whl_library` repository rule and it has the following benefits: * We can reduce code duplication and label sanitization functions in rules_python. * There are many things that the `wheel_installer` does not care anymore and we don't need to change less code when extending `whl_library` as we can now do many things in starlark directly. * It becomes easier to change the API of how we expose the generated BUILD.bazel patching because we only need to change the Starlark functions. Work towards #1330.
1 parent e355bec commit 608ddb7

18 files changed

+613
-772
lines changed

python/pip_install/BUILD.bazel

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@ filegroup(
44
"BUILD.bazel",
55
"//python/pip_install/private:distribution",
66
"//python/pip_install/tools/dependency_resolver:distribution",
7-
"//python/pip_install/tools/lib:distribution",
87
"//python/pip_install/tools/wheel_installer:distribution",
98
],
109
visibility = ["//:__pkg__"],
@@ -22,7 +21,6 @@ filegroup(
2221
name = "py_srcs",
2322
srcs = [
2423
"//python/pip_install/tools/dependency_resolver:py_srcs",
25-
"//python/pip_install/tools/lib:py_srcs",
2624
"//python/pip_install/tools/wheel_installer:py_srcs",
2725
],
2826
visibility = ["//python/pip_install/private:__pkg__"],

python/pip_install/pip_repository.bzl

Lines changed: 67 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ load("//python:repositories.bzl", "get_interpreter_dirname", "is_standalone_inte
1818
load("//python:versions.bzl", "WINDOWS_NAME")
1919
load("//python/pip_install:repositories.bzl", "all_requirements")
2020
load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse")
21+
load("//python/pip_install/private:generate_whl_library_build_bazel.bzl", "generate_whl_library_build_bazel")
2122
load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS")
2223
load("//python/private:bzlmod_enabled.bzl", "BZLMOD_ENABLED")
2324
load("//python/private:normalize_name.bzl", "normalize_name")
@@ -27,6 +28,8 @@ CPPFLAGS = "CPPFLAGS"
2728

2829
COMMAND_LINE_TOOLS_PATH_SLUG = "commandlinetools"
2930

31+
_WHEEL_ENTRY_POINT_PREFIX = "rules_python_wheel_entry_point"
32+
3033
def _construct_pypath(rctx):
3134
"""Helper function to construct a PYTHONPATH.
3235
@@ -663,16 +666,7 @@ def _whl_library_impl(rctx):
663666
"python.pip_install.tools.wheel_installer.wheel_installer",
664667
"--requirement",
665668
rctx.attr.requirement,
666-
"--repo",
667-
rctx.attr.repo,
668-
"--repo-prefix",
669-
rctx.attr.repo_prefix,
670669
]
671-
if rctx.attr.annotation:
672-
args.extend([
673-
"--annotation",
674-
rctx.path(rctx.attr.annotation),
675-
])
676670

677671
args = _parse_optional_attrs(rctx, args)
678672

@@ -687,8 +681,72 @@ def _whl_library_impl(rctx):
687681
if result.return_code:
688682
fail("whl_library %s failed: %s (%s) error code: '%s'" % (rctx.attr.name, result.stdout, result.stderr, result.return_code))
689683

684+
metadata = json.decode(rctx.read("metadata.json"))
685+
rctx.delete("metadata.json")
686+
687+
entry_points = {}
688+
for item in metadata["entry_points"]:
689+
name = item["name"]
690+
module = item["module"]
691+
attribute = item["attribute"]
692+
693+
# There is an extreme edge-case with entry_points that end with `.py`
694+
# See: https://github.com/bazelbuild/bazel/blob/09c621e4cf5b968f4c6cdf905ab142d5961f9ddc/src/test/java/com/google/devtools/build/lib/rules/python/PyBinaryConfiguredTargetTest.java#L174
695+
entry_point_without_py = name[:-3] + "_py" if name.endswith(".py") else name
696+
entry_point_target_name = (
697+
_WHEEL_ENTRY_POINT_PREFIX + "_" + entry_point_without_py
698+
)
699+
entry_point_script_name = entry_point_target_name + ".py"
700+
701+
rctx.file(
702+
entry_point_script_name,
703+
_generate_entry_point_contents(module, attribute),
704+
)
705+
entry_points[entry_point_without_py] = entry_point_script_name
706+
707+
build_file_contents = generate_whl_library_build_bazel(
708+
repo_prefix = rctx.attr.repo_prefix,
709+
dependencies = metadata["deps"],
710+
data_exclude = rctx.attr.pip_data_exclude,
711+
tags = [
712+
"pypi_name=" + metadata["name"],
713+
"pypi_version=" + metadata["version"],
714+
],
715+
entry_points = entry_points,
716+
annotation = None if not rctx.attr.annotation else struct(**json.decode(rctx.read(rctx.attr.annotation))),
717+
)
718+
rctx.file("BUILD.bazel", build_file_contents)
719+
690720
return
691721

722+
def _generate_entry_point_contents(
723+
module,
724+
attribute,
725+
shebang = "#!/usr/bin/env python3"):
726+
"""Generate the contents of an entry point script.
727+
728+
Args:
729+
module (str): The name of the module to use.
730+
attribute (str): The name of the attribute to call.
731+
shebang (str, optional): The shebang to use for the entry point python
732+
file.
733+
734+
Returns:
735+
str: A string of python code.
736+
"""
737+
contents = """\
738+
{shebang}
739+
import sys
740+
from {module} import {attribute}
741+
if __name__ == "__main__":
742+
sys.exit({attribute}())
743+
""".format(
744+
shebang = shebang,
745+
module = module,
746+
attribute = attribute,
747+
)
748+
return contents
749+
692750
whl_library_attrs = {
693751
"annotation": attr.label(
694752
doc = (
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
# Copyright 2023 The Bazel Authors. All rights reserved.
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# http://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
"""Generate the BUILD.bazel contents for a repo defined by a whl_library."""
16+
17+
load("//python/private:normalize_name.bzl", "normalize_name")
18+
19+
_WHEEL_FILE_LABEL = "whl"
20+
_PY_LIBRARY_LABEL = "pkg"
21+
_DATA_LABEL = "data"
22+
_DIST_INFO_LABEL = "dist_info"
23+
_WHEEL_ENTRY_POINT_PREFIX = "rules_python_wheel_entry_point"
24+
25+
_COPY_FILE_TEMPLATE = """\
26+
copy_file(
27+
name = "{dest}.copy",
28+
src = "{src}",
29+
out = "{dest}",
30+
is_executable = {is_executable},
31+
)
32+
"""
33+
34+
_ENTRY_POINT_RULE_TEMPLATE = """\
35+
py_binary(
36+
name = "{name}",
37+
srcs = ["{src}"],
38+
# This makes this directory a top-level in the python import
39+
# search path for anything that depends on this.
40+
imports = ["."],
41+
deps = ["{pkg}"],
42+
)
43+
"""
44+
45+
_BUILD_TEMPLATE = """\
46+
load("@rules_python//python:defs.bzl", "py_library", "py_binary")
47+
load("@bazel_skylib//rules:copy_file.bzl", "copy_file")
48+
49+
package(default_visibility = ["//visibility:public"])
50+
51+
filegroup(
52+
name = "{dist_info_label}",
53+
srcs = glob(["site-packages/*.dist-info/**"], allow_empty = True),
54+
)
55+
56+
filegroup(
57+
name = "{data_label}",
58+
srcs = glob(["data/**"], allow_empty = True),
59+
)
60+
61+
filegroup(
62+
name = "{whl_file_label}",
63+
srcs = glob(["*.whl"], allow_empty = True),
64+
data = {whl_file_deps},
65+
)
66+
67+
py_library(
68+
name = "{name}",
69+
srcs = glob(
70+
["site-packages/**/*.py"],
71+
exclude={srcs_exclude},
72+
# Empty sources are allowed to support wheels that don't have any
73+
# pure-Python code, e.g. pymssql, which is written in Cython.
74+
allow_empty = True,
75+
),
76+
data = {data} + glob(
77+
["site-packages/**/*"],
78+
exclude={data_exclude},
79+
),
80+
# This makes this directory a top-level in the python import
81+
# search path for anything that depends on this.
82+
imports = ["site-packages"],
83+
deps = {dependencies},
84+
tags = {tags},
85+
)
86+
"""
87+
88+
def generate_whl_library_build_bazel(
89+
repo_prefix,
90+
dependencies,
91+
data_exclude,
92+
tags,
93+
entry_points,
94+
annotation = None):
95+
"""Generate a BUILD file for an unzipped Wheel
96+
97+
Args:
98+
repo_prefix: the repo prefix that should be used for dependency lists.
99+
dependencies: a list of PyPI packages that are dependencies to the py_library.
100+
data_exclude: more patterns to exclude from the data attribute of generated py_library rules.
101+
tags: list of tags to apply to generated py_library rules.
102+
entry_points: A dict of entry points to add py_binary rules for.
103+
annotation: The annotation for the build file.
104+
105+
Returns:
106+
A complete BUILD file as a string
107+
"""
108+
109+
additional_content = []
110+
data = []
111+
srcs_exclude = []
112+
data_exclude = [] + data_exclude
113+
dependencies = sorted(dependencies)
114+
tags = sorted(tags)
115+
116+
for entry_point, entry_point_script_name in entry_points.items():
117+
additional_content.append(
118+
_generate_entry_point_rule(
119+
name = "{}_{}".format(_WHEEL_ENTRY_POINT_PREFIX, entry_point),
120+
script = entry_point_script_name,
121+
pkg = ":" + _PY_LIBRARY_LABEL,
122+
),
123+
)
124+
125+
if annotation:
126+
for src, dest in annotation.copy_files.items():
127+
data.append(dest)
128+
additional_content.append(_generate_copy_commands(src, dest))
129+
for src, dest in annotation.copy_executables.items():
130+
data.append(dest)
131+
additional_content.append(
132+
_generate_copy_commands(src, dest, is_executable = True),
133+
)
134+
data.extend(annotation.data)
135+
data_exclude.extend(annotation.data_exclude_glob)
136+
srcs_exclude.extend(annotation.srcs_exclude_glob)
137+
if annotation.additive_build_content:
138+
additional_content.append(annotation.additive_build_content)
139+
140+
_data_exclude = [
141+
"**/* *",
142+
"**/*.py",
143+
"**/*.pyc",
144+
"**/*.pyc.*", # During pyc creation, temp files named *.pyc.NNNN are created
145+
# RECORD is known to contain sha256 checksums of files which might include the checksums
146+
# of generated files produced when wheels are installed. The file is ignored to avoid
147+
# Bazel caching issues.
148+
"**/*.dist-info/RECORD",
149+
]
150+
for item in data_exclude:
151+
if item not in _data_exclude:
152+
_data_exclude.append(item)
153+
154+
lib_dependencies = [
155+
"@" + repo_prefix + normalize_name(d) + "//:" + _PY_LIBRARY_LABEL
156+
for d in dependencies
157+
]
158+
whl_file_deps = [
159+
"@" + repo_prefix + normalize_name(d) + "//:" + _WHEEL_FILE_LABEL
160+
for d in dependencies
161+
]
162+
163+
contents = "\n".join(
164+
[
165+
_BUILD_TEMPLATE.format(
166+
name = _PY_LIBRARY_LABEL,
167+
dependencies = repr(lib_dependencies),
168+
data_exclude = repr(_data_exclude),
169+
whl_file_label = _WHEEL_FILE_LABEL,
170+
whl_file_deps = repr(whl_file_deps),
171+
tags = repr(tags),
172+
data_label = _DATA_LABEL,
173+
dist_info_label = _DIST_INFO_LABEL,
174+
entry_point_prefix = _WHEEL_ENTRY_POINT_PREFIX,
175+
srcs_exclude = repr(srcs_exclude),
176+
data = repr(data),
177+
),
178+
] + additional_content,
179+
)
180+
181+
# NOTE: Ensure that we terminate with a new line
182+
return contents.rstrip() + "\n"
183+
184+
def _generate_copy_commands(src, dest, is_executable = False):
185+
"""Generate a [@bazel_skylib//rules:copy_file.bzl%copy_file][cf] target
186+
187+
[cf]: https://github.com/bazelbuild/bazel-skylib/blob/1.1.1/docs/copy_file_doc.md
188+
189+
Args:
190+
src (str): The label for the `src` attribute of [copy_file][cf]
191+
dest (str): The label for the `out` attribute of [copy_file][cf]
192+
is_executable (bool, optional): Whether or not the file being copied is executable.
193+
sets `is_executable` for [copy_file][cf]
194+
195+
Returns:
196+
str: A `copy_file` instantiation.
197+
"""
198+
return _COPY_FILE_TEMPLATE.format(
199+
src = src,
200+
dest = dest,
201+
is_executable = is_executable,
202+
)
203+
204+
def _generate_entry_point_rule(*, name, script, pkg):
205+
"""Generate a Bazel `py_binary` rule for an entry point script.
206+
207+
Note that the script is used to determine the name of the target. The name of
208+
entry point targets should be uniuqe to avoid conflicts with existing sources or
209+
directories within a wheel.
210+
211+
Args:
212+
name (str): The name of the generated py_binary.
213+
script (str): The path to the entry point's python file.
214+
pkg (str): The package owning the entry point. This is expected to
215+
match up with the `py_library` defined for each repository.
216+
217+
Returns:
218+
str: A `py_binary` instantiation.
219+
"""
220+
return _ENTRY_POINT_RULE_TEMPLATE.format(
221+
name = name,
222+
src = script.replace("\\", "/"),
223+
pkg = pkg,
224+
)

python/pip_install/private/srcs.bzl

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,10 +9,7 @@ This file is auto-generated from the `@rules_python//python/pip_install/private:
99
PIP_INSTALL_PY_SRCS = [
1010
"@rules_python//python/pip_install/tools/dependency_resolver:__init__.py",
1111
"@rules_python//python/pip_install/tools/dependency_resolver:dependency_resolver.py",
12-
"@rules_python//python/pip_install/tools/lib:__init__.py",
13-
"@rules_python//python/pip_install/tools/lib:annotation.py",
14-
"@rules_python//python/pip_install/tools/lib:arguments.py",
15-
"@rules_python//python/pip_install/tools/lib:bazel.py",
12+
"@rules_python//python/pip_install/tools/wheel_installer:arguments.py",
1613
"@rules_python//python/pip_install/tools/wheel_installer:namespace_pkgs.py",
1714
"@rules_python//python/pip_install/tools/wheel_installer:wheel.py",
1815
"@rules_python//python/pip_install/tools/wheel_installer:wheel_installer.py",

0 commit comments

Comments
 (0)