Skip to content

Commit e05cf00

Browse files
fix: correctly merge conflicting paths when files (instead of dirs) are being linked (#3458)
When a file (instead of directory) was duplicated, it was incorrectly be treated as a directory. The conflicting path was turned into a directory and all the files were made child paths of it. In practice, this usually worked out OK still because the conflicting file was an `__init__.py` file for a pkgutil-style namespace package, which effectively converted it to an implicit namespace package. I think this was introduced by #3448, but am not 100% sure. To fix, first check for exact path equality when merging conflicting paths. If the candidate file has the same link_to_path as the grouping, then just link to that file and skip any remaining files in the group. The rest of the group's files are skipped because none of them can contribute links anymore: * If they have the same path, then they are ignored (first set wins) * If they are a sub-path, then it violates the preconditions of the group (all files in the group should be exact matches or sub-paths of a common prefix) * If they aren't under the common prefix, then it violates the preconditions of the group and are skipped. --------- Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>
1 parent 0816a1f commit e05cf00

File tree

2 files changed

+94
-62
lines changed

2 files changed

+94
-62
lines changed

python/private/venv_runfiles.bzl

Lines changed: 36 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -163,8 +163,10 @@ def _group_venv_path_entries(entries):
163163
current_group = None
164164
current_group_prefix = None
165165
for entry in entries:
166-
prefix = entry.venv_path
167-
anchored_prefix = prefix + "/"
166+
# NOTE: When a file is being directly linked, the anchored prefix can look
167+
# odd, e.g. "foo/__init__.py/". This is OK; it's just used to prevent
168+
# incorrect prefix substring matching.
169+
anchored_prefix = entry.venv_path + "/"
168170
if (current_group_prefix == None or
169171
not anchored_prefix.startswith(current_group_prefix)):
170172
current_group_prefix = anchored_prefix
@@ -193,26 +195,41 @@ def _merge_venv_path_group(ctx, group, keep_map):
193195
# be symlinked directly, not the directory containing them, due to
194196
# dynamic linker symlink resolution semantics on Linux.
195197
for entry in group:
196-
prefix = entry.venv_path
198+
root_venv_path = entry.venv_path
199+
anchored_link_to_path = entry.link_to_path + "/"
197200
for file in entry.files.to_list():
198-
# Compute the file-specific venv path. i.e. the relative
199-
# path of the file under entry.venv_path, joined with
200-
# entry.venv_path
201201
rf_root_path = runfiles_root_path(ctx, file.short_path)
202-
if not rf_root_path.startswith(entry.link_to_path):
203-
# This generally shouldn't occur in practice, but just
204-
# in case, skip them, for lack of a better option.
205-
continue
206-
venv_path = "{}/{}".format(
207-
prefix,
208-
rf_root_path.removeprefix(entry.link_to_path + "/"),
209-
)
210202

211-
# For lack of a better option, first added wins. We happen to
212-
# go in top-down prefix order, so the highest level namespace
213-
# package typically wins.
214-
if venv_path not in keep_map:
215-
keep_map[venv_path] = file
203+
# It's a file (or directory) being directly linked and
204+
# must be directly linked.
205+
if rf_root_path == entry.link_to_path:
206+
# For lack of a better option, first added wins.
207+
if entry.venv_path not in keep_map:
208+
keep_map[entry.venv_path] = file
209+
210+
# Skip anything remaining: anything left is either
211+
# the same path (first set wins), a suffix (violates
212+
# preconditions and can't link anyways), or not under
213+
# the prefix (violates preconditions).
214+
break
215+
else:
216+
# Compute the file-specific venv path. i.e. the relative
217+
# path of the file under entry.venv_path, joined with
218+
# entry.venv_path
219+
head, match, rel_venv_path = rf_root_path.partition(anchored_link_to_path)
220+
if not match or head:
221+
# If link_to_path didn't match, then obviously skip.
222+
# If head is non-empty, it means link_to_path wasn't
223+
# found at the start
224+
# This shouldn't occur in practice, but guard against it
225+
# just in case
226+
continue
227+
228+
venv_path = paths.join(root_venv_path, rel_venv_path)
229+
230+
# For lack of a better option, first added wins.
231+
if venv_path not in keep_map:
232+
keep_map[venv_path] = file
216233

217234
def get_venv_symlinks(ctx, files, package, version_str, site_packages_root):
218235
"""Compute the VenvSymlinkEntry objects for a library.

tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl

Lines changed: 58 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
""
22

3-
load("@bazel_skylib//lib:paths.bzl", "paths")
43
load("@rules_testing//lib:analysis_test.bzl", "analysis_test")
54
load("@rules_testing//lib:test_suite.bzl", "test_suite")
65
load("//python/private:py_info.bzl", "VenvSymlinkEntry", "VenvSymlinkKind") # buildifier: disable=bzl-visibility
@@ -58,34 +57,16 @@ def _venv_symlinks_from_entries(entries):
5857
))
5958
return sorted(result, key = lambda e: (e.link_to_path, e.venv_path))
6059

61-
def _entry(venv_path, link_to_path, files = [], **kwargs):
60+
def _entry(venv_path, link_to_path, files, **kwargs):
6261
kwargs.setdefault("kind", VenvSymlinkKind.LIB)
6362
kwargs.setdefault("package", None)
6463
kwargs.setdefault("version", None)
65-
66-
def short_pathify(path):
67-
path = paths.join(link_to_path, path)
68-
69-
# In tests, `../` is used to step out of the link_to_path scope.
70-
path = paths.normalize(path)
71-
72-
# Treat paths starting with "+" as external references. This matches
73-
# how bzlmod names things.
74-
if link_to_path.startswith("+"):
75-
# File.short_path to external repos have `../` prefixed
76-
path = paths.join("../", path)
77-
else:
78-
# File.short_path in main repo is main-repo relative
79-
_, _, path = path.partition("/")
80-
return path
64+
kwargs.setdefault("link_to_file", None)
8165

8266
return VenvSymlinkEntry(
8367
venv_path = venv_path,
8468
link_to_path = link_to_path,
85-
files = depset([
86-
_file(short_pathify(f))
87-
for f in files
88-
]),
69+
files = depset(files),
8970
**kwargs
9071
)
9172

@@ -100,15 +81,34 @@ _tests.append(_test_conflict_merging)
10081

10182
def _test_conflict_merging_impl(env, _):
10283
entries = [
103-
_entry("a", "+pypi_a/site-packages/a", ["a.txt"]),
104-
_entry("a-1.0.dist-info", "+pypi_a/site-packages/a-1.0.dist-info", ["METADATA"]),
105-
_entry("a/b", "+pypi_a_b/site-packages/a/b", ["b.txt"]),
106-
_entry("x", "_main/src/x", ["x.txt"]),
107-
_entry("x/p", "_main/src-dev/x/p", ["p.txt"]),
108-
_entry("duplicate", "+dupe_a/site-packages/duplicate", ["d.py"]),
109-
# This entry also provides a/x.py, but since the "a" entry is shorter
110-
# and comes first, its version of x.py should win.
111-
_entry("duplicate", "+dupe_b/site-packages/duplicate", ["d.py"]),
84+
_entry("a", "+pypi_a/site-packages/a", [
85+
_file("../+pypi_a/site-packages/a/a.txt"),
86+
]),
87+
_entry("a-1.0.dist-info", "+pypi_a/site-packages/a-1.0.dist-info", [
88+
_file("../+pypi_a/site-packages/a-1.0.dist-info/METADATA"),
89+
]),
90+
_entry("a/b", "+pypi_a_b/site-packages/a/b", [
91+
_file("../+pypi_a_b/site-packages/a/b/b.txt"),
92+
]),
93+
_entry("x", "_main/src/x", [
94+
_file("src/x/x.txt"),
95+
]),
96+
_entry("x/p", "_main/src-dev/x/p", [
97+
_file("src-dev/x/p/p.txt"),
98+
]),
99+
_entry("duplicate", "+dupe_a/site-packages/duplicate", [
100+
_file("../+dupe_a/site-packages/duplicate/d.py"),
101+
]),
102+
_entry("duplicate", "+dupe_b/site-packages/duplicate", [
103+
_file("../+dupe_b/site-packages/duplicate/d.py"),
104+
]),
105+
# Case: two distributions provide the same file (instead of directory)
106+
_entry("ff/fmod.py", "+ff_a/site-packages/ff/fmod.py", [
107+
_file("../+ff_a/site-packages/ff/fmod.py"),
108+
]),
109+
_entry("ff/fmod.py", "+ff_b/site-packages/ff/fmod.py", [
110+
_file("../+ff_b/site-packages/ff/fmod.py"),
111+
]),
112112
]
113113

114114
actual, conflicts = build_link_map(_ctx(), entries, return_conflicts = True)
@@ -117,6 +117,7 @@ def _test_conflict_merging_impl(env, _):
117117
"a/a.txt": _file("../+pypi_a/site-packages/a/a.txt"),
118118
"a/b/b.txt": _file("../+pypi_a_b/site-packages/a/b/b.txt"),
119119
"duplicate/d.py": _file("../+dupe_a/site-packages/duplicate/d.py"),
120+
"ff/fmod.py": _file("../+ff_a/site-packages/ff/fmod.py"),
120121
"x/p/p.txt": _file("src-dev/x/p/p.txt"),
121122
"x/x.txt": _file("src/x/x.txt"),
122123
}
@@ -274,8 +275,12 @@ _tests.append(_test_package_version_filtering)
274275

275276
def _test_package_version_filtering_impl(env, _):
276277
entries = [
277-
_entry("foo", "+pypi_v1/site-packages/foo", ["foo.txt"], package = "foo", version = "1.0"),
278-
_entry("foo", "+pypi_v2/site-packages/foo", ["bar.txt"], package = "foo", version = "2.0"),
278+
_entry("foo", "+pypi_v1/site-packages/foo", [
279+
_file("../+pypi_v1/site-packages/foo/foo.txt"),
280+
], package = "foo", version = "1.0"),
281+
_entry("foo", "+pypi_v2/site-packages/foo", [
282+
_file("../+pypi_v2/site-packages/foo/bar.txt"),
283+
], package = "foo", version = "2.0"),
279284
]
280285

281286
actual = build_link_map(_ctx(), entries)
@@ -300,15 +305,15 @@ def _test_malformed_entry_impl(env, _):
300305
"a",
301306
"+pypi_a/site-packages/a",
302307
# This file is outside the link_to_path, so it should be ignored.
303-
["../outside.txt"],
308+
[_file("../+pypi_a/site-packages/outside.txt")],
304309
),
305310
# A second, conflicting, entry is added to force merging of the known
306311
# files. Without this, there's no conflict, so files is never
307312
# considered.
308313
_entry(
309314
"a",
310315
"+pypi_b/site-packages/a",
311-
["../outside.txt"],
316+
[_file("../+pypi_b/site-packages/outside.txt")],
312317
),
313318
]
314319

@@ -328,11 +333,21 @@ _tests.append(_test_complex_namespace_packages)
328333

329334
def _test_complex_namespace_packages_impl(env, _):
330335
entries = [
331-
_entry("a/b", "+pypi_a_b/site-packages/a/b", ["b.txt"]),
332-
_entry("a/c", "+pypi_a_c/site-packages/a/c", ["c.txt"]),
333-
_entry("x/y/z", "+pypi_x_y_z/site-packages/x/y/z", ["z.txt"]),
334-
_entry("foo", "+pypi_foo/site-packages/foo", ["foo.txt"]),
335-
_entry("foobar", "+pypi_foobar/site-packages/foobar", ["foobar.txt"]),
336+
_entry("a/b", "+pypi_a_b/site-packages/a/b", [
337+
_file("../+pypi_a_b/site-packages/a/b/b.txt"),
338+
]),
339+
_entry("a/c", "+pypi_a_c/site-packages/a/c", [
340+
_file("../+pypi_a_c/site-packages/a/cc.txt"),
341+
]),
342+
_entry("x/y/z", "+pypi_x_y_z/site-packages/x/y/z", [
343+
_file("../+pypi_x_y_z/site-packages/x/y/z/z.txt"),
344+
]),
345+
_entry("foo", "+pypi_foo/site-packages/foo", [
346+
_file("../+pypi_foo/site-packages/foo/foo.txt"),
347+
]),
348+
_entry("foobar", "+pypi_foobar/site-packages/foobar", [
349+
_file("../+pypi_foobar/site-packages/foobar/foobar.txt"),
350+
]),
336351
]
337352

338353
actual = build_link_map(_ctx(), entries)
@@ -380,19 +395,19 @@ def _test_multiple_venv_symlink_kinds_impl(env, _):
380395
_entry(
381396
"libfile",
382397
"+pypi_lib/site-packages/libfile",
383-
["lib.txt"],
398+
[_file("../+pypi_lib/site-packages/libfile/lib.txt")],
384399
kind = VenvSymlinkKind.LIB,
385400
),
386401
_entry(
387402
"binfile",
388403
"+pypi_bin/bin/binfile",
389-
["bin.txt"],
404+
[_file("../+pypi_bin/bin/binfile/bin.txt")],
390405
kind = VenvSymlinkKind.BIN,
391406
),
392407
_entry(
393408
"includefile",
394409
"+pypi_include/include/includefile",
395-
["include.h"],
410+
[_file("../+pypi_include/include/includefile/include.h")],
396411
kind =
397412
VenvSymlinkKind.INCLUDE,
398413
),

0 commit comments

Comments
 (0)