Skip to content

Commit da23db5

Browse files
authored
refactor(gazelle): Generate a modules map per wheel, then merge (#3415)
This change internally splits modules mapping generation to be per-wheel, with a final quick "merge" action at the end. The idea is to make this process both concurrent and cached (courtesy of Bazel), which can be ideal for codebases with a large set of requirements (as many monorepos end up doing) Note that the `generator.py` interface changed. This seemed internal, so I didn't mark it breaking (but this change could actually just leave the generator alone, since the current implementation is fine with 1 wheel). I ran this on the work repo and saw no change in output (but as I edited a single requirement, the overall process was fast ⚡ )
1 parent 3440572 commit da23db5

File tree

6 files changed

+174
-28
lines changed

6 files changed

+174
-28
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ END_UNRELEASED_TEMPLATE
6262
{#v0-0-0-changed}
6363
### Changed
6464
* (toolchains) Use toolchains from the [20251031] release.
65+
* (gazelle) Internally split modules mapping generation to be per-wheel for concurrency and caching.
6566

6667
{#v0-0-0-fixed}
6768
### Fixed

gazelle/modules_mapping/BUILD.bazel

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@ py_binary(
99
visibility = ["//visibility:public"],
1010
)
1111

12+
py_binary(
13+
name = "merger",
14+
srcs = ["merger.py"],
15+
visibility = ["//visibility:public"],
16+
)
17+
1218
copy_file(
1319
name = "pytest_wheel",
1420
src = "@pytest//file",
@@ -33,6 +39,18 @@ py_test(
3339
deps = [":generator"],
3440
)
3541

42+
py_test(
43+
name = "test_merger",
44+
srcs = ["test_merger.py"],
45+
data = [
46+
"django_types_wheel",
47+
"pytest_wheel",
48+
],
49+
imports = ["."],
50+
main = "test_merger.py",
51+
deps = [":merger"],
52+
)
53+
3654
filegroup(
3755
name = "distribution",
3856
srcs = glob(["**"]),

gazelle/modules_mapping/def.bzl

Lines changed: 32 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,25 +30,39 @@ def _modules_mapping_impl(ctx):
3030
transitive = [dep[DefaultInfo].files for dep in ctx.attr.wheels] + [dep[DefaultInfo].data_runfiles.files for dep in ctx.attr.wheels],
3131
)
3232

33-
args = ctx.actions.args()
33+
# Run the generator once per-wheel (to leverage caching)
34+
per_wheel_outputs = []
35+
for idx, whl in enumerate(all_wheels.to_list()):
36+
wheel_modules_mapping = ctx.actions.declare_file("{}.{}".format(modules_mapping.short_path, idx))
37+
args = ctx.actions.args()
38+
args.add("--output_file", wheel_modules_mapping.path)
39+
if ctx.attr.include_stub_packages:
40+
args.add("--include_stub_packages")
41+
args.add_all("--exclude_patterns", ctx.attr.exclude_patterns)
42+
args.add("--wheel", whl.path)
3443

35-
# Spill parameters to a file prefixed with '@'. Note, the '@' prefix is the same
36-
# prefix as used in the `generator.py` in `fromfile_prefix_chars` attribute.
37-
args.use_param_file(param_file_arg = "@%s")
38-
args.set_param_file_format(format = "multiline")
39-
if ctx.attr.include_stub_packages:
40-
args.add("--include_stub_packages")
41-
args.add("--output_file", modules_mapping)
42-
args.add_all("--exclude_patterns", ctx.attr.exclude_patterns)
43-
args.add_all("--wheels", all_wheels)
44+
ctx.actions.run(
45+
inputs = [whl],
46+
outputs = [wheel_modules_mapping],
47+
executable = ctx.executable._generator,
48+
arguments = [args],
49+
use_default_shell_env = False,
50+
)
51+
per_wheel_outputs.append(wheel_modules_mapping)
52+
53+
# Then merge the individual JSONs together
54+
merge_args = ctx.actions.args()
55+
merge_args.add("--output", modules_mapping.path)
56+
merge_args.add_all("--inputs", [f.path for f in per_wheel_outputs])
4457

4558
ctx.actions.run(
46-
inputs = all_wheels,
59+
inputs = per_wheel_outputs,
4760
outputs = [modules_mapping],
48-
executable = ctx.executable._generator,
49-
arguments = [args],
61+
executable = ctx.executable._merger,
62+
arguments = [merge_args],
5063
use_default_shell_env = False,
5164
)
65+
5266
return [DefaultInfo(files = depset([modules_mapping]))]
5367

5468
modules_mapping = rule(
@@ -79,6 +93,11 @@ modules_mapping = rule(
7993
default = "//modules_mapping:generator",
8094
executable = True,
8195
),
96+
"_merger": attr.label(
97+
cfg = "exec",
98+
default = "//modules_mapping:merger",
99+
executable = True,
100+
),
82101
},
83102
doc = "Creates a modules_mapping.json file for mapping module names to wheel distribution names.",
84103
)

gazelle/modules_mapping/generator.py

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -96,23 +96,28 @@ def module_for_path(self, path, whl):
9696
ext = "".join(pathlib.Path(root).suffixes)
9797
module = root[: -len(ext)].replace("/", ".")
9898
if not self.is_excluded(module):
99-
if not self.is_excluded(module):
100-
self.mapping[module] = wheel_name
99+
self.mapping[module] = wheel_name
101100

102101
def is_excluded(self, module):
103102
for pattern in self.excluded_patterns:
104103
if pattern.search(module):
105104
return True
106105
return False
107106

108-
# run is the entrypoint for the generator.
109-
def run(self, wheels):
110-
for whl in wheels:
111-
try:
112-
self.dig_wheel(whl)
113-
except AssertionError as error:
114-
print(error, file=self.stderr)
115-
return 1
107+
def run(self, wheel: pathlib.Path) -> int:
108+
"""
109+
Entrypoint for the generator.
110+
111+
Args:
112+
wheel: The path to the wheel file (`.whl`)
113+
Returns:
114+
Exit code (for `sys.exit`)
115+
"""
116+
try:
117+
self.dig_wheel(wheel)
118+
except AssertionError as error:
119+
print(error, file=self.stderr)
120+
return 1
116121
self.simplify()
117122
mapping_json = json.dumps(self.mapping)
118123
with open(self.output_file, "w") as f:
@@ -152,16 +157,13 @@ def data_has_purelib_or_platlib(path):
152157
parser = argparse.ArgumentParser(
153158
prog="generator",
154159
description="Generates the modules mapping used by the Gazelle manifest.",
155-
# Automatically read parameters from a file. Note, the '@' is the same prefix
156-
# as set in the 'args.use_param_file' in the bazel rule.
157-
fromfile_prefix_chars="@",
158160
)
159161
parser.add_argument("--output_file", type=str)
160162
parser.add_argument("--include_stub_packages", action="store_true")
161163
parser.add_argument("--exclude_patterns", nargs="+", default=[])
162-
parser.add_argument("--wheels", nargs="+", default=[])
164+
parser.add_argument("--wheel", type=pathlib.Path)
163165
args = parser.parse_args()
164166
generator = Generator(
165167
sys.stderr, args.output_file, args.exclude_patterns, args.include_stub_packages
166168
)
167-
sys.exit(generator.run(args.wheels))
169+
sys.exit(generator.run(args.wheel))

gazelle/modules_mapping/merger.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
#!/usr/bin/env python3
2+
"""Merges multiple modules_mapping.json files into a single file."""
3+
4+
import argparse
5+
import json
6+
from pathlib import Path
7+
8+
9+
def merge_modules_mappings(input_files: list[Path], output_file: Path) -> None:
10+
"""Merge multiple modules_mapping.json files into one.
11+
12+
Args:
13+
input_files: List of paths to input JSON files to merge
14+
output_file: Path where the merged output should be written
15+
"""
16+
merged_mapping = {}
17+
for input_file in input_files:
18+
mapping = json.loads(input_file.read_text())
19+
# Merge the mappings, with later files overwriting earlier ones
20+
# if there are conflicts
21+
merged_mapping.update(mapping)
22+
23+
output_file.write_text(json.dumps(merged_mapping))
24+
25+
26+
if __name__ == "__main__":
27+
parser = argparse.ArgumentParser(
28+
description="Merge multiple modules_mapping.json files"
29+
)
30+
parser.add_argument(
31+
"--output",
32+
required=True,
33+
type=Path,
34+
help="Output file path for merged mapping",
35+
)
36+
parser.add_argument(
37+
"--inputs",
38+
required=True,
39+
nargs="+",
40+
type=Path,
41+
help="Input JSON files to merge",
42+
)
43+
44+
args = parser.parse_args()
45+
merge_modules_mappings(args.inputs, args.output)
Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
import pathlib
2+
import unittest
3+
import json
4+
import tempfile
5+
6+
from merger import merge_modules_mappings
7+
8+
9+
class MergerTest(unittest.TestCase):
10+
_tmpdir: tempfile.TemporaryDirectory
11+
12+
def setUp(self) -> None:
13+
super().setUp()
14+
self._tmpdir = tempfile.TemporaryDirectory()
15+
16+
def tearDown(self) -> None:
17+
super().tearDown()
18+
self._tmpdir.cleanup()
19+
del self._tmpdir
20+
21+
@property
22+
def tmppath(self) -> pathlib.Path:
23+
return pathlib.Path(self._tmpdir.name)
24+
25+
def make_input(self, mapping: dict[str, str]) -> pathlib.Path:
26+
_fd, file = tempfile.mkstemp(suffix=".json", dir=self._tmpdir.name)
27+
path = pathlib.Path(file)
28+
path.write_text(json.dumps(mapping))
29+
return path
30+
31+
def test_merger(self):
32+
output_path = self.tmppath / "output.json"
33+
merge_modules_mappings(
34+
[
35+
self.make_input(
36+
{
37+
"_pytest": "pytest",
38+
"_pytest.__init__": "pytest",
39+
"_pytest._argcomplete": "pytest",
40+
"_pytest.config.argparsing": "pytest",
41+
}
42+
),
43+
self.make_input({"django_types": "django_types"}),
44+
],
45+
output_path,
46+
)
47+
48+
self.assertEqual(
49+
{
50+
"_pytest": "pytest",
51+
"_pytest.__init__": "pytest",
52+
"_pytest._argcomplete": "pytest",
53+
"_pytest.config.argparsing": "pytest",
54+
"django_types": "django_types",
55+
},
56+
json.loads(output_path.read_text()),
57+
)
58+
59+
60+
if __name__ == "__main__":
61+
unittest.main()

0 commit comments

Comments
 (0)