33load ("@bazel_skylib//lib:paths.bzl" , "paths" )
44load (
55 ":common.bzl" ,
6- "PYTHON_FILE_EXTENSIONS" ,
76 "is_file" ,
87 "relative_path" ,
98 "runfiles_root_path" ,
@@ -58,7 +57,6 @@ def create_venv_app_files(ctx, deps, venv_dir_map):
5857 if is_file (link_to ):
5958 symlink_from = "{}/{}" .format (ctx .label .package , bin_venv_path )
6059 runfiles_symlinks [symlink_from ] = link_to
61-
6260 else :
6361 venv_link = ctx .actions .declare_symlink (bin_venv_path )
6462 venv_link_rf_path = runfiles_root_path (ctx , venv_link .short_path )
@@ -77,14 +75,16 @@ def create_venv_app_files(ctx, deps, venv_dir_map):
7775 )
7876
7977# Visible for testing
80- def build_link_map (ctx , entries ):
78+ def build_link_map (ctx , entries , return_conflicts = False ):
8179 """Compute the mapping of venv paths to their backing objects.
8280
83-
8481 Args:
8582 ctx: {type}`ctx` current ctx.
8683 entries: {type}`list[VenvSymlinkEntry]` the entries that describe the
8784 venv-relative
85+ return_conflicts: {type}`bool`. Only present for testing. If True,
86+ also return a list of the groups that had overlapping paths and had
87+ to be resolved and merged.
8888
8989 Returns:
9090 {type}`dict[str, dict[str, str|File]]` Mappings of venv paths to their
@@ -114,6 +114,7 @@ def build_link_map(ctx, entries):
114114
115115 # final paths to keep, grouped by kind
116116 keep_link_map = {} # dict[str kind, dict[path, str|File]]
117+ conflicts = [] if return_conflicts else None
117118 for kind , entries in entries_by_kind .items ():
118119 # dict[str kind-relative path, str|File link_to]
119120 keep_kind_link_map = {}
@@ -129,12 +130,17 @@ def build_link_map(ctx, entries):
129130 else :
130131 keep_kind_link_map [entry .venv_path ] = entry .link_to_path
131132 else :
133+ if return_conflicts :
134+ conflicts .append (group )
135+
132136 # Merge a group of overlapping prefixes
133137 _merge_venv_path_group (ctx , group , keep_kind_link_map )
134138
135139 keep_link_map [kind ] = keep_kind_link_map
136-
137- return keep_link_map
140+ if return_conflicts :
141+ return keep_link_map , conflicts
142+ else :
143+ return keep_link_map
138144
139145def _group_venv_path_entries (entries ):
140146 """Group entries by VenvSymlinkEntry.venv_path overlap.
@@ -235,109 +241,115 @@ def get_venv_symlinks(ctx, files, package, version_str, site_packages_root):
235241 # Append slash to prevent incorrect prefix-string matches
236242 site_packages_root += "/"
237243
238- # We have to build a list of (runfiles path, site-packages path) pairs of the files to
239- # create in the consuming binary's venv site-packages directory. To minimize the number of
240- # files to create, we just return the paths to the directories containing the code of
241- # interest.
242- #
243- # However, namespace packages complicate matters: multiple distributions install in the
244- # same directory in site-packages. This works out because they don't overlap in their
245- # files. Typically, they install to different directories within the namespace package
246- # directory. We also need to ensure that we can handle a case where the main package (e.g.
247- # airflow) has directories only containing data files and then namespace packages coming
248- # along and being next to it.
249- #
250- # Lastly we have to assume python modules just being `.py` files (e.g. typing-extensions)
251- # is just a single Python file.
244+ all_files = sorted (files , key = lambda f : f .short_path )
252245
253- dir_symlinks = {} # dirname -> runfile path
254- venv_symlinks = []
246+ # venv paths that cannot be directly linked. Dict acting as set.
247+ cannot_be_linked_directly = {}
255248
256- # Sort so order is top-down
257- all_files = sorted (files , key = lambda f : f .short_path )
249+ # dict[str path, VenvSymlinkEntry]
250+ # Where path is the venv path (i.e. relative to site_packages_prefix)
251+ venv_symlinks = {}
258252
253+ # List of (File, str venv_path) tuples
254+ files_left_to_link = []
255+
256+ # We want to minimize the number of files symlinked. Ideally, only the
257+ # top-level directories are symlinked. Unfortunately, shared libraries
258+ # complicate matters: if a shared library's directory is linked, then the
259+ # dynamic linker computes the wrong search path.
260+ #
261+ # To fix, we have to directly link shared libraries. This then means that
262+ # all the parent directories of the shared library can't be linked
263+ # directly.
259264 for src in all_files :
260- path = _repo_relative_short_path (src .short_path )
261- if not path .startswith (site_packages_root ):
265+ rf_root_path = runfiles_root_path (ctx , src .short_path )
266+ _ , _ , repo_rel_path = rf_root_path .partition ("/" )
267+ head , found_sp_root , venv_path = repo_rel_path .partition (site_packages_root )
268+ if head or not found_sp_root :
269+ # If head is set, then the path didn't start with site_packages_root
270+ # if found_sp_root is empty, then it means it wasn't found at all.
262271 continue
263- path = path .removeprefix (site_packages_root )
264- dir_name , _ , filename = path .rpartition ("/" )
265- runfiles_dir_name , _ , _ = runfiles_root_path (ctx , src .short_path ).partition ("/" )
266272
273+ filename = paths .basename (venv_path )
267274 if _is_linker_loaded_library (filename ):
268- entry = VenvSymlinkEntry (
275+ venv_symlinks [ venv_path ] = VenvSymlinkEntry (
269276 kind = VenvSymlinkKind .LIB ,
270- link_to_path = paths . join ( runfiles_dir_name , site_packages_root , filename ) ,
277+ link_to_path = rf_root_path ,
271278 link_to_file = src ,
272279 package = package ,
273280 version = version_str ,
274- venv_path = path ,
275281 files = depset ([src ]),
282+ venv_path = venv_path ,
276283 )
277- venv_symlinks .append (entry )
278- continue
279-
280- if dir_name in dir_symlinks :
281- # we already have this dir, this allows us to short-circuit since most of the
282- # ctx.files.data might share the same directories as ctx.files.srcs
284+ parent = paths .dirname (venv_path )
285+ for _ in range (len (venv_path ) + 1 ): # Iterate enough times to traverse up
286+ if not parent :
287+ break
288+ if cannot_be_linked_directly .get (parent , False ):
289+ # Already seen
290+ break
291+ cannot_be_linked_directly [parent ] = True
292+ parent = paths .dirname (parent )
293+ else :
294+ files_left_to_link .append ((src , venv_path ))
295+
296+ # At this point, venv_symlinks has entries for the shared libraries
297+ # and cannot_be_linked_directly has the directories that cannot be
298+ # directly linked. Next, we loop over the remaining files and group
299+ # them into the highest level directory that can be linked.
300+
301+ # dict[str venv_path, list[File]]
302+ optimized_groups = {}
303+
304+ for src , venv_path in files_left_to_link :
305+ parent = paths .dirname (venv_path )
306+ if not parent :
307+ # File in root, must be linked directly
308+ optimized_groups .setdefault (venv_path , [])
309+ optimized_groups [venv_path ].append (src )
283310 continue
284311
285- if dir_name :
286- # This can be either:
287- # * a directory with libs (e.g. numpy.libs, created by auditwheel)
288- # * a directory with `__init__.py` file that potentially also needs to be
289- # symlinked.
290- # * `.dist-info` directory
291- #
292- # This could be also regular files, that just need to be symlinked, so we will
293- # add the directory here.
294- dir_symlinks [dir_name ] = runfiles_dir_name
295- elif src .extension in PYTHON_FILE_EXTENSIONS :
296- # This would be files that do not have directories and we just need to add
297- # direct symlinks to them as is, we only allow Python files in here
298- entry = VenvSymlinkEntry (
299- kind = VenvSymlinkKind .LIB ,
300- link_to_path = paths .join (runfiles_dir_name , site_packages_root , filename ),
301- link_to_file = src ,
302- package = package ,
303- version = version_str ,
304- venv_path = path ,
305- files = depset ([src ]),
306- )
307- venv_symlinks .append (entry )
308-
309- # Sort so that we encounter `foo` before `foo/bar`. This ensures we
310- # see the top-most explicit package first.
311- dirnames = sorted (dir_symlinks .keys ())
312- first_level_explicit_packages = []
313- for d in dirnames :
314- is_sub_package = False
315- for existing in first_level_explicit_packages :
316- # Suffix with / to prevent foo matching foobar
317- if d .startswith (existing + "/" ):
318- is_sub_package = True
319- break
320- if not is_sub_package :
321- first_level_explicit_packages .append (d )
322-
323- for dirname in first_level_explicit_packages :
324- prefix = dir_symlinks [dirname ]
325- link_to_path = paths .join (prefix , site_packages_root , dirname )
326- entry = VenvSymlinkEntry (
312+ if parent in cannot_be_linked_directly :
313+ # File in a directory that cannot be directly linked,
314+ # so link the file directly
315+ optimized_groups .setdefault (venv_path , [])
316+ optimized_groups [venv_path ].append (src )
317+ else :
318+ # This path can be grouped. Find the highest-level directory to link.
319+ venv_path = parent
320+ next_parent = paths .dirname (parent )
321+ for _ in range (len (venv_path ) + 1 ): # Iterate enough times
322+ if next_parent :
323+ if next_parent not in cannot_be_linked_directly :
324+ venv_path = next_parent
325+ next_parent = paths .dirname (next_parent )
326+ else :
327+ break
328+ else :
329+ break
330+
331+ optimized_groups .setdefault (venv_path , [])
332+ optimized_groups [venv_path ].append (src )
333+
334+ # Finally, for each group, we create the VenvSymlinkEntry objects
335+ for venv_path , files in optimized_groups .items ():
336+ link_to_path = (
337+ _get_label_runfiles_repo (ctx , files [0 ].owner ) +
338+ "/" +
339+ site_packages_root +
340+ venv_path
341+ )
342+ venv_symlinks [venv_path ] = VenvSymlinkEntry (
327343 kind = VenvSymlinkKind .LIB ,
328344 link_to_path = link_to_path ,
345+ link_to_file = None ,
329346 package = package ,
330347 version = version_str ,
331- venv_path = dirname ,
332- files = depset ([
333- f
334- for f in all_files
335- if runfiles_root_path (ctx , f .short_path ).startswith (link_to_path + "/" )
336- ]),
348+ venv_path = venv_path ,
349+ files = depset (files ),
337350 )
338- venv_symlinks .append (entry )
339351
340- return venv_symlinks
352+ return venv_symlinks . values ()
341353
342354def _is_linker_loaded_library (filename ):
343355 """Tells if a filename is one that `dlopen()` or the runtime linker handles.
@@ -357,9 +369,10 @@ def _is_linker_loaded_library(filename):
357369 return True
358370 return False
359371
360- def _repo_relative_short_path ( short_path ):
361- # Convert `../+pypi+foo/some/file.py` to `some/file.py`
362- if short_path . startswith ( "../" ) :
363- return short_path [ 3 :]. partition ( "/" )[ 2 ]
372+ def _get_label_runfiles_repo ( ctx , label ):
373+ repo = label . repo_name
374+ if repo :
375+ return repo
364376 else :
365- return short_path
377+ # For files, empty repo means the main repo
378+ return ctx .workspace_name
0 commit comments