Skip to content

Commit f266829

Browse files
jklukasgemini-code-assist[bot]rickeylev
authored
feat(runfiles): support for --incompatible_compact_repo_mapping_manifest (#3277)
Under bzlmod, the repo mapping can become quite large (i.e. tens of megabytes) because its size scales as a factor of the number of repos in the transitive dependencies. To address this, the --incompatible_compact_repo_mapping_manifest flag was introduced. This changes the repo mapping formation to use prefixes (instead of exact repo names) for mapping things. To make this work with the runfiles library, the code has to be updated to handle these prefixes instead of just exact strings. Fixes #3022. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Richard Levasseur <[email protected]>
1 parent c566258 commit f266829

File tree

3 files changed

+292
-32
lines changed

3 files changed

+292
-32
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,8 @@ END_UNRELEASED_TEMPLATE
8585

8686
{#v0-0-0-added}
8787
### Added
88+
* (runfiles) The Python runfiles library now supports Bazel's
89+
`--incompatible_compact_repo_mapping_manifest` flag.
8890
* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the
8991
{obj}`main_module` attribute.
9092
* (bootstrap) {obj}`--bootstrap_impl=system_python` now supports the

python/runfiles/runfiles.py

Lines changed: 130 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,131 @@
1515
"""Runfiles lookup library for Bazel-built Python binaries and tests.
1616
1717
See @rules_python//python/runfiles/README.md for usage instructions.
18+
19+
:::{versionadded} VERSION_NEXT_FEATURE
20+
Support for Bazel's `--incompatible_compact_repo_mapping_manifest` flag was added.
21+
This enables prefix-based repository mappings to reduce memory usage for large
22+
dependency graphs under bzlmod.
23+
:::
1824
"""
25+
import collections.abc
1926
import inspect
2027
import os
2128
import posixpath
2229
import sys
30+
from collections import defaultdict
2331
from typing import Dict, Optional, Tuple, Union
2432

2533

34+
class _RepositoryMapping:
35+
"""Repository mapping for resolving apparent repository names to canonical ones.
36+
37+
Handles both exact mappings and prefix-based mappings introduced by the
38+
--incompatible_compact_repo_mapping_manifest flag.
39+
"""
40+
41+
def __init__(
42+
self,
43+
exact_mappings: Dict[Tuple[str, str], str],
44+
prefixed_mappings: Dict[Tuple[str, str], str],
45+
) -> None:
46+
"""Initialize repository mapping with exact and prefixed mappings.
47+
48+
Args:
49+
exact_mappings: Dict mapping (source_canonical, target_apparent) -> target_canonical
50+
prefixed_mappings: Dict mapping (source_prefix, target_apparent) -> target_canonical
51+
"""
52+
self._exact_mappings = exact_mappings
53+
54+
# Group prefixed mappings by target_apparent for faster lookups
55+
self._grouped_prefixed_mappings = defaultdict(list)
56+
for (
57+
prefix_source,
58+
target_app,
59+
), target_canonical in prefixed_mappings.items():
60+
self._grouped_prefixed_mappings[target_app].append(
61+
(prefix_source, target_canonical)
62+
)
63+
64+
@staticmethod
65+
def create_from_file(repo_mapping_path: Optional[str]) -> "_RepositoryMapping":
66+
"""Create RepositoryMapping from a repository mapping manifest file.
67+
68+
Args:
69+
repo_mapping_path: Path to the repository mapping file, or None if not available
70+
71+
Returns:
72+
RepositoryMapping instance with parsed mappings
73+
"""
74+
# If the repository mapping file can't be found, that is not an error: We
75+
# might be running without Bzlmod enabled or there may not be any runfiles.
76+
# In this case, just apply empty repo mappings.
77+
if not repo_mapping_path:
78+
return _RepositoryMapping({}, {})
79+
80+
try:
81+
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
82+
content = f.read()
83+
except FileNotFoundError:
84+
return _RepositoryMapping({}, {})
85+
86+
exact_mappings = {}
87+
prefixed_mappings = {}
88+
for line in content.splitlines():
89+
source_canonical, target_apparent, target_canonical = line.split(",")
90+
if source_canonical.endswith("*"):
91+
# This is a prefixed mapping - remove the '*' for prefix matching
92+
prefix = source_canonical[:-1]
93+
prefixed_mappings[(prefix, target_apparent)] = target_canonical
94+
else:
95+
# This is an exact mapping
96+
exact_mappings[(source_canonical, target_apparent)] = target_canonical
97+
98+
return _RepositoryMapping(exact_mappings, prefixed_mappings)
99+
100+
def lookup(self, source_repo: Optional[str], target_apparent: str) -> Optional[str]:
101+
"""Look up repository mapping for the given source and target.
102+
103+
This handles both exact mappings and prefix-based mappings introduced by the
104+
--incompatible_compact_repo_mapping_manifest flag. Exact mappings are tried
105+
first, followed by prefix-based mappings where order matters.
106+
107+
Args:
108+
source_repo: Source canonical repository name
109+
target_apparent: Target apparent repository name
110+
111+
Returns:
112+
target_canonical repository name, or None if no mapping exists
113+
"""
114+
if source_repo is None:
115+
return None
116+
117+
key = (source_repo, target_apparent)
118+
119+
# Try exact mapping first
120+
if key in self._exact_mappings:
121+
return self._exact_mappings[key]
122+
123+
# Try prefixed mapping if no exact match found
124+
if target_apparent in self._grouped_prefixed_mappings:
125+
for prefix_source, target_canonical in self._grouped_prefixed_mappings[
126+
target_apparent
127+
]:
128+
if source_repo.startswith(prefix_source):
129+
return target_canonical
130+
131+
# No mapping found
132+
return None
133+
134+
def is_empty(self) -> bool:
135+
"""Check if this repository mapping is empty (no exact or prefixed mappings).
136+
137+
Returns:
138+
True if there are no mappings, False otherwise
139+
"""
140+
return len(self._exact_mappings) == 0 and len(self._grouped_prefixed_mappings) == 0
141+
142+
26143
class _ManifestBased:
27144
"""`Runfiles` strategy that parses a runfiles-manifest to look up runfiles."""
28145

@@ -130,7 +247,7 @@ class Runfiles:
130247
def __init__(self, strategy: Union[_ManifestBased, _DirectoryBased]) -> None:
131248
self._strategy = strategy
132249
self._python_runfiles_root = _FindPythonRunfilesRoot()
133-
self._repo_mapping = _ParseRepoMapping(
250+
self._repo_mapping = _RepositoryMapping.create_from_file(
134251
strategy.RlocationChecked("_repo_mapping")
135252
)
136253

@@ -179,7 +296,7 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
179296
if os.path.isabs(path):
180297
return path
181298

182-
if source_repo is None and self._repo_mapping:
299+
if source_repo is None and not self._repo_mapping.is_empty():
183300
# Look up runfiles using the repository mapping of the caller of the
184301
# current method. If the repo mapping is empty, determining this
185302
# name is not necessary.
@@ -188,7 +305,8 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
188305
# Split off the first path component, which contains the repository
189306
# name (apparent or canonical).
190307
target_repo, _, remainder = path.partition("/")
191-
if not remainder or (source_repo, target_repo) not in self._repo_mapping:
308+
target_canonical = self._repo_mapping.lookup(source_repo, target_repo)
309+
if not remainder or target_canonical is None:
192310
# One of the following is the case:
193311
# - not using Bzlmod, so the repository mapping is empty and
194312
# apparent and canonical repository names are the same
@@ -202,11 +320,15 @@ def Rlocation(self, path: str, source_repo: Optional[str] = None) -> Optional[st
202320
source_repo is not None
203321
), "BUG: if the `source_repo` is None, we should never go past the `if` statement above"
204322

205-
# target_repo is an apparent repository name. Look up the corresponding
206-
# canonical repository name with respect to the current repository,
207-
# identified by its canonical name.
208-
target_canonical = self._repo_mapping[(source_repo, target_repo)]
209-
return self._strategy.RlocationChecked(target_canonical + "/" + remainder)
323+
# Look up the target repository using the repository mapping
324+
if target_canonical is not None:
325+
return self._strategy.RlocationChecked(
326+
target_canonical + "/" + remainder
327+
)
328+
329+
# No mapping found - assume target_repo is already canonical or
330+
# we're not using Bzlmod
331+
return self._strategy.RlocationChecked(path)
210332

211333
def EnvVars(self) -> Dict[str, str]:
212334
"""Returns environment variables for subprocesses.
@@ -359,30 +481,6 @@ def _FindPythonRunfilesRoot() -> str:
359481
return root
360482

361483

362-
def _ParseRepoMapping(repo_mapping_path: Optional[str]) -> Dict[Tuple[str, str], str]:
363-
"""Parses the repository mapping manifest."""
364-
# If the repository mapping file can't be found, that is not an error: We
365-
# might be running without Bzlmod enabled or there may not be any runfiles.
366-
# In this case, just apply an empty repo mapping.
367-
if not repo_mapping_path:
368-
return {}
369-
try:
370-
with open(repo_mapping_path, "r", encoding="utf-8", newline="\n") as f:
371-
content = f.read()
372-
except FileNotFoundError:
373-
return {}
374-
375-
repo_mapping = {}
376-
for line in content.split("\n"):
377-
if not line:
378-
# Empty line following the last line break
379-
break
380-
current_canonical, target_local, target_canonical = line.split(",")
381-
repo_mapping[(current_canonical, target_local)] = target_canonical
382-
383-
return repo_mapping
384-
385-
386484
def CreateManifestBased(manifest_path: str) -> Runfiles:
387485
return Runfiles.CreateManifestBased(manifest_path)
388486

tests/runfiles/runfiles_test.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
from typing import Any, List, Optional
1919

2020
from python.runfiles import runfiles
21+
from python.runfiles.runfiles import _RepositoryMapping
2122

2223

2324
class RunfilesTest(unittest.TestCase):
@@ -525,6 +526,165 @@ def testDirectoryBasedRlocationWithRepoMappingFromOtherRepo(self) -> None:
525526
r.Rlocation("config.json", "protobuf~3.19.2"), dir + "/config.json"
526527
)
527528

529+
def testDirectoryBasedRlocationWithCompactRepoMappingFromMain(self) -> None:
530+
"""Test repository mapping with prefix-based entries (compact format)."""
531+
with _MockFile(
532+
name="_repo_mapping",
533+
contents=[
534+
# Exact mappings (no asterisk)
535+
"_,config.json,config.json~1.2.3",
536+
",my_module,_main",
537+
",my_workspace,_main",
538+
# Prefixed mappings (with asterisk) - these apply to any repo starting with the prefix
539+
"deps+*,external_dep,external_dep~1.0.0",
540+
"test_deps+*,test_lib,test_lib~2.1.0",
541+
],
542+
) as rm:
543+
dir = os.path.dirname(rm.Path())
544+
r = runfiles.CreateDirectoryBased(dir)
545+
546+
# Test exact mappings still work
547+
self.assertEqual(
548+
r.Rlocation("my_module/bar/runfile", ""), dir + "/_main/bar/runfile"
549+
)
550+
self.assertEqual(
551+
r.Rlocation("my_workspace/bar/runfile", ""), dir + "/_main/bar/runfile"
552+
)
553+
554+
# Test prefixed mappings - should match any repo starting with "deps+"
555+
self.assertEqual(
556+
r.Rlocation("external_dep/foo/file", "deps+dep1"),
557+
dir + "/external_dep~1.0.0/foo/file",
558+
)
559+
self.assertEqual(
560+
r.Rlocation("external_dep/bar/file", "deps+dep2"),
561+
dir + "/external_dep~1.0.0/bar/file",
562+
)
563+
self.assertEqual(
564+
r.Rlocation("external_dep/nested/path/file", "deps+some_long_dep_name"),
565+
dir + "/external_dep~1.0.0/nested/path/file",
566+
)
567+
568+
# Test that prefixed mappings work for test_deps+ prefix too
569+
self.assertEqual(
570+
r.Rlocation("test_lib/test/file", "test_deps+junit"),
571+
dir + "/test_lib~2.1.0/test/file",
572+
)
573+
574+
# Test that non-matching prefixes don't match
575+
self.assertEqual(
576+
r.Rlocation("external_dep/foo/file", "other_prefix"),
577+
dir + "/external_dep/foo/file", # No mapping applied, use as-is
578+
)
579+
580+
def testDirectoryBasedRlocationWithCompactRepoMappingPrecedence(self) -> None:
581+
"""Test that exact mappings take precedence over prefixed mappings."""
582+
with _MockFile(
583+
name="_repo_mapping",
584+
contents=[
585+
# Exact mapping for a specific source repo
586+
"deps+specific_repo,external_dep,external_dep~exact",
587+
# Prefixed mapping for repos starting with "deps+"
588+
"deps+*,external_dep,external_dep~prefix",
589+
# Another prefixed mapping with different prefix
590+
"other+*,external_dep,external_dep~other",
591+
],
592+
) as rm:
593+
dir = os.path.dirname(rm.Path())
594+
r = runfiles.CreateDirectoryBased(dir)
595+
596+
# Exact mapping should take precedence over prefix
597+
self.assertEqual(
598+
r.Rlocation("external_dep/foo/file", "deps+specific_repo"),
599+
dir + "/external_dep~exact/foo/file",
600+
)
601+
602+
# Other repos with deps+ prefix should use the prefixed mapping
603+
self.assertEqual(
604+
r.Rlocation("external_dep/foo/file", "deps+other_repo"),
605+
dir + "/external_dep~prefix/foo/file",
606+
)
607+
608+
# Different prefix should use its own mapping
609+
self.assertEqual(
610+
r.Rlocation("external_dep/foo/file", "other+some_repo"),
611+
dir + "/external_dep~other/foo/file",
612+
)
613+
614+
def testDirectoryBasedRlocationWithCompactRepoMappingOrderMatters(self) -> None:
615+
"""Test that order matters for prefixed mappings (first match wins)."""
616+
with _MockFile(
617+
name="_repo_mapping",
618+
contents=[
619+
# More specific prefix comes first
620+
"deps+specific+*,lib,lib~specific",
621+
# More general prefix comes second
622+
"deps+*,lib,lib~general",
623+
],
624+
) as rm:
625+
dir = os.path.dirname(rm.Path())
626+
r = runfiles.CreateDirectoryBased(dir)
627+
628+
# Should match the more specific prefix first
629+
self.assertEqual(
630+
r.Rlocation("lib/foo/file", "deps+specific+repo"),
631+
dir + "/lib~specific/foo/file",
632+
)
633+
634+
# Should match the general prefix for non-specific repos
635+
self.assertEqual(
636+
r.Rlocation("lib/foo/file", "deps+other_repo"),
637+
dir + "/lib~general/foo/file",
638+
)
639+
640+
def testRepositoryMappingLookup(self) -> None:
641+
"""Test _RepositoryMapping.lookup() method for both exact and prefix-based mappings."""
642+
exact_mappings = {
643+
("", "my_workspace"): "_main",
644+
("", "config_lib"): "config_lib~1.0.0",
645+
("deps+specific_repo", "external_dep"): "external_dep~exact",
646+
}
647+
prefixed_mappings = {
648+
("deps+", "external_dep"): "external_dep~prefix",
649+
("test_deps+", "test_lib"): "test_lib~2.1.0",
650+
}
651+
652+
repo_mapping = _RepositoryMapping(exact_mappings, prefixed_mappings)
653+
654+
# Test exact lookups
655+
self.assertEqual(repo_mapping.lookup("", "my_workspace"), "_main")
656+
self.assertEqual(repo_mapping.lookup("", "config_lib"), "config_lib~1.0.0")
657+
self.assertEqual(
658+
repo_mapping.lookup("deps+specific_repo", "external_dep"),
659+
"external_dep~exact",
660+
)
661+
662+
# Test prefix-based lookups
663+
self.assertEqual(
664+
repo_mapping.lookup("deps+some_repo", "external_dep"), "external_dep~prefix"
665+
)
666+
self.assertEqual(
667+
repo_mapping.lookup("test_deps+another_repo", "test_lib"), "test_lib~2.1.0"
668+
)
669+
670+
# Test that exact takes precedence over prefix
671+
self.assertEqual(
672+
repo_mapping.lookup("deps+specific_repo", "external_dep"),
673+
"external_dep~exact",
674+
)
675+
676+
# Test non-existent mapping
677+
self.assertIsNone(repo_mapping.lookup("nonexistent", "repo"))
678+
self.assertIsNone(repo_mapping.lookup("unknown+repo", "missing"))
679+
680+
# Test empty mapping
681+
empty_mapping = _RepositoryMapping({}, {})
682+
self.assertIsNone(empty_mapping.lookup("any", "repo"))
683+
684+
# Test is_empty() method
685+
self.assertFalse(repo_mapping.is_empty()) # Should have mappings
686+
self.assertTrue(empty_mapping.is_empty()) # Should be empty
687+
528688
def testCurrentRepository(self) -> None:
529689
# Under bzlmod, the current repository name is the empty string instead
530690
# of the name in the workspace file.

0 commit comments

Comments
 (0)