@@ -224,16 +224,12 @@ def _get_imports_and_venv_symlinks(ctx, semantics):
224224 imports = depset ()
225225 venv_symlinks = depset ()
226226 if VenvsSitePackages .is_enabled (ctx ):
227- dist_info_metadata = _get_distinfo_metadata (ctx )
228- venv_symlinks = _get_venv_symlinks (
229- ctx ,
230- dist_info_metadata ,
231- )
227+ venv_symlinks = _get_venv_symlinks (ctx )
232228 else :
233229 imports = collect_imports (ctx , semantics )
234230 return imports , venv_symlinks
235231
236- def _get_venv_symlinks (ctx , dist_info_metadata ):
232+ def _get_venv_symlinks (ctx ):
237233 imports = ctx .attr .imports
238234 if len (imports ) == 0 :
239235 fail ("When venvs_site_packages is enabled, exactly one `imports` " +
@@ -254,24 +250,13 @@ def _get_venv_symlinks(ctx, dist_info_metadata):
254250 # Append slash to prevent incorrectly prefix-string matches
255251 site_packages_root += "/"
256252
257- # We have to build a list of (runfiles path, site-packages path) pairs of
258- # the files to create in the consuming binary's venv site-packages directory.
259- # To minimize the number of files to create, we just return the paths
260- # to the directories containing the code of interest.
261- #
262- # However, namespace packages complicate matters: multiple
263- # distributions install in the same directory in site-packages. This
264- # works out because they don't overlap in their files. Typically, they
265- # install to different directories within the namespace package
266- # directory. Namespace package directories are simply directories
267- # within site-packages that *don't* have an `__init__.py` file, which
268- # can be arbitrarily deep. Thus, we simply have to look for the
269- # directories that _do_ have an `__init__.py` file and treat those as
270- # the path to symlink to.
253+ # If the package comes from PyPI then it will have a `.dist-info` as part of `data`, which
254+ # allows us to get the name of the package and its version. This means that we can ensure that
255+ # package usage closer to the terminal node can override dependencies.
271256
272- dir_symlinks = {} # dirname -> runfile path
273- venv_symlinks = []
274257 package = None
258+ version_str = None
259+ dist_info_metadata = _get_distinfo_metadata (ctx )
275260 if dist_info_metadata :
276261 # in order to be able to have replacements in the venv, we have to add a
277262 # third value into the venv_symlinks, which would be the normalized
@@ -280,38 +265,59 @@ def _get_venv_symlinks(ctx, dist_info_metadata):
280265 dist_info_dir = paths .basename (dist_info_metadata .dirname )
281266 package , _ , _suffix = dist_info_dir .rpartition (".dist-info" )
282267 package , _ , version_str = package .rpartition ("-" )
283- package = "{}-{}" . format (
284- normalize_name (package ),
285- version .normalize (version_str ),
268+ package , version_str = (
269+ normalize_name (package ), # will have no dashes
270+ version .normalize (version_str ), # will have no dashes either
286271 )
287272
288- for src in ctx .files .srcs + ctx .files .data :
273+ # We have to build a list of (runfiles path, site-packages path) pairs of the files to
274+ # create in the consuming binary's venv site-packages directory. To minimize the number of
275+ # files to create, we just return the paths to the directories containing the code of
276+ # interest.
277+ #
278+ # However, namespace packages complicate matters: multiple distributions install in the
279+ # same directory in site-packages. This works out because they don't overlap in their
280+ # files. Typically, they install to different directories within the namespace package
281+ # directory. We also need to ensure that we can handle a case where the main package (e.g.
282+ # airflow) has directories only containing data files and then namespace packages coming
283+ # along and being next to it.
284+ #
285+ # Lastly we have to assume python modules just being `.py` files (e.g. typing-extensions)
286+ # is just a single Python file.
287+
288+ dir_symlinks = {} # dirname -> runfile path
289+ venv_symlinks = []
290+ for src in ctx .files .srcs + ctx .files .data + ctx .files .pyi_srcs :
289291 path = _repo_relative_short_path (src .short_path )
290292 if not path .startswith (site_packages_root ):
291293 continue
292294 path = path .removeprefix (site_packages_root )
293295 dir_name , _ , filename = path .rpartition ("/" )
294296
295297 if dir_name in dir_symlinks :
296- # we already have this dir.
298+ # we already have this dir, this allows us to short-circuit since most of the
299+ # ctx.files.data might share the same directories as ctx.files.srcs
297300 continue
298301
299302 runfiles_dir_name , _ , _ = runfiles_root_path (ctx , src .short_path ).partition ("/" )
300303 if dir_name :
301- # This can be either a directory with libs (e.g. numpy.libs)
302- # or a directory with `__init__.py` file that potentially also needs to be
303- # symlinked.
304+ # This can be either:
305+ # * a directory with libs (e.g. numpy.libs, created by auditwheel)
306+ # * a directory with `__init__.py` file that potentially also needs to be
307+ # symlinked.
308+ # * `.dist-info` directory
304309 #
305310 # This could be also regular files, that just need to be symlinked, so we will
306311 # add the directory here.
307312 dir_symlinks [dir_name ] = runfiles_dir_name
308313 elif src .extension in PYTHON_FILE_EXTENSIONS :
309314 # This would be files that do not have directories and we just need to add
310- # direct symlinks to them as is:
315+ # direct symlinks to them as is, we only allow Python files in here
311316 entry = VenvSymlinkEntry (
312317 kind = VenvSymlinkKind .LIB ,
313318 link_to_path = paths .join (runfiles_dir_name , site_packages_root , filename ),
314319 package = package ,
320+ version = version_str ,
315321 venv_path = filename ,
316322 )
317323 venv_symlinks .append (entry )
@@ -336,6 +342,7 @@ def _get_venv_symlinks(ctx, dist_info_metadata):
336342 kind = VenvSymlinkKind .LIB ,
337343 link_to_path = paths .join (prefix , site_packages_root , dirname ),
338344 package = package ,
345+ version = version_str ,
339346 venv_path = dirname ,
340347 )
341348 venv_symlinks .append (entry )
0 commit comments