Skip to content

Commit 1d34260

Browse files
committed
add comments and add an extra value into the PyInfo
1 parent 20d3c25 commit 1d34260

File tree

3 files changed

+55
-45
lines changed

3 files changed

+55
-45
lines changed

python/private/py_executable.bzl

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -690,21 +690,16 @@ def _build_link_map(entries):
690690
# We overwrite duplicates by design. The dependency closer to the
691691
# binary gets precedence due to the topological ordering.
692692

693-
package = ""
694-
if entry.package:
695-
# We have normalized the version/package to PEP440 spec
696-
package, _, version = entry.package.partition("-")
693+
if entry.package and version_by_pkg.get(entry.package) != entry.version:
694+
# If we detect that we are adding a different package version, clear the
695+
# previously added values.
696+
version_by_pkg[entry.package] = entry.version
697697

698-
if version_by_pkg.get(package) != version:
699-
# If we detect that we are adding a different package version, clear the
700-
# previously added values.
701-
version_by_pkg[package] = version
698+
# Overwrite any existing values because this is closer to the terminal
699+
# node
700+
pkg_link_map.pop(entry.package, None)
702701

703-
# Overwrite any existing values because this is closer to the terminal
704-
# node
705-
pkg_link_map.pop(package, None)
706-
707-
link_map = pkg_link_map.setdefault(package, {})
702+
link_map = pkg_link_map.setdefault(entry.package, {})
708703
kind_map = link_map.setdefault(entry.kind, {})
709704

710705
if entry.venv_path in kind_map:

python/private/py_info.bzl

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,12 +71,20 @@ it means to not create a symlink.
7171
"package": """
7272
:type: str | None
7373
74-
Represents the PyPI package that the code originates from.
74+
Represents the PyPI package name that the code originates from. It is normalized according to the
75+
PEP440 with all `-` replaced with `_`, i.e. the same as the package name in the hub repository that
76+
it would come from.
7577
""",
7678
"venv_path": """
7779
:type: str
7880
7981
A path relative to the `kind` directory within the venv.
82+
""",
83+
"version": """
84+
:type: str | None
85+
86+
Represents the PyPI package version that the code originates from. It is normalized according to the
87+
PEP440 standard.
8088
""",
8189
},
8290
)

python/private/py_library.bzl

Lines changed: 38 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -224,16 +224,12 @@ def _get_imports_and_venv_symlinks(ctx, semantics):
224224
imports = depset()
225225
venv_symlinks = depset()
226226
if VenvsSitePackages.is_enabled(ctx):
227-
dist_info_metadata = _get_distinfo_metadata(ctx)
228-
venv_symlinks = _get_venv_symlinks(
229-
ctx,
230-
dist_info_metadata,
231-
)
227+
venv_symlinks = _get_venv_symlinks(ctx)
232228
else:
233229
imports = collect_imports(ctx, semantics)
234230
return imports, venv_symlinks
235231

236-
def _get_venv_symlinks(ctx, dist_info_metadata):
232+
def _get_venv_symlinks(ctx):
237233
imports = ctx.attr.imports
238234
if len(imports) == 0:
239235
fail("When venvs_site_packages is enabled, exactly one `imports` " +
@@ -254,24 +250,13 @@ def _get_venv_symlinks(ctx, dist_info_metadata):
254250
# Append slash to prevent incorrectly prefix-string matches
255251
site_packages_root += "/"
256252

257-
# We have to build a list of (runfiles path, site-packages path) pairs of
258-
# the files to create in the consuming binary's venv site-packages directory.
259-
# To minimize the number of files to create, we just return the paths
260-
# to the directories containing the code of interest.
261-
#
262-
# However, namespace packages complicate matters: multiple
263-
# distributions install in the same directory in site-packages. This
264-
# works out because they don't overlap in their files. Typically, they
265-
# install to different directories within the namespace package
266-
# directory. Namespace package directories are simply directories
267-
# within site-packages that *don't* have an `__init__.py` file, which
268-
# can be arbitrarily deep. Thus, we simply have to look for the
269-
# directories that _do_ have an `__init__.py` file and treat those as
270-
# the path to symlink to.
253+
# If the package comes from PyPI then it will have a `.dist-info` as part of `data`, which
254+
# allows us to get the name of the package and its version. This means that we can ensure that
255+
# package usage closer to the terminal node can override dependencies.
271256

272-
dir_symlinks = {} # dirname -> runfile path
273-
venv_symlinks = []
274257
package = None
258+
version_str = None
259+
dist_info_metadata = _get_distinfo_metadata(ctx)
275260
if dist_info_metadata:
276261
# in order to be able to have replacements in the venv, we have to add a
277262
# third value into the venv_symlinks, which would be the normalized
@@ -280,38 +265,59 @@ def _get_venv_symlinks(ctx, dist_info_metadata):
280265
dist_info_dir = paths.basename(dist_info_metadata.dirname)
281266
package, _, _suffix = dist_info_dir.rpartition(".dist-info")
282267
package, _, version_str = package.rpartition("-")
283-
package = "{}-{}".format(
284-
normalize_name(package),
285-
version.normalize(version_str),
268+
package, version_str = (
269+
normalize_name(package), # will have no dashes
270+
version.normalize(version_str), # will have no dashes either
286271
)
287272

288-
for src in ctx.files.srcs + ctx.files.data:
273+
# We have to build a list of (runfiles path, site-packages path) pairs of the files to
274+
# create in the consuming binary's venv site-packages directory. To minimize the number of
275+
# files to create, we just return the paths to the directories containing the code of
276+
# interest.
277+
#
278+
# However, namespace packages complicate matters: multiple distributions install in the
279+
# same directory in site-packages. This works out because they don't overlap in their
280+
# files. Typically, they install to different directories within the namespace package
281+
# directory. We also need to ensure that we can handle a case where the main package (e.g.
282+
# airflow) has directories only containing data files and then namespace packages coming
283+
# along and being next to it.
284+
#
285+
# Lastly we have to assume python modules just being `.py` files (e.g. typing-extensions)
286+
# is just a single Python file.
287+
288+
dir_symlinks = {} # dirname -> runfile path
289+
venv_symlinks = []
290+
for src in ctx.files.srcs + ctx.files.data + ctx.files.pyi_srcs:
289291
path = _repo_relative_short_path(src.short_path)
290292
if not path.startswith(site_packages_root):
291293
continue
292294
path = path.removeprefix(site_packages_root)
293295
dir_name, _, filename = path.rpartition("/")
294296

295297
if dir_name in dir_symlinks:
296-
# we already have this dir.
298+
# we already have this dir, this allows us to short-circuit since most of the
299+
# ctx.files.data might share the same directories as ctx.files.srcs
297300
continue
298301

299302
runfiles_dir_name, _, _ = runfiles_root_path(ctx, src.short_path).partition("/")
300303
if dir_name:
301-
# This can be either a directory with libs (e.g. numpy.libs)
302-
# or a directory with `__init__.py` file that potentially also needs to be
303-
# symlinked.
304+
# This can be either:
305+
# * a directory with libs (e.g. numpy.libs, created by auditwheel)
306+
# * a directory with `__init__.py` file that potentially also needs to be
307+
# symlinked.
308+
# * `.dist-info` directory
304309
#
305310
# This could be also regular files, that just need to be symlinked, so we will
306311
# add the directory here.
307312
dir_symlinks[dir_name] = runfiles_dir_name
308313
elif src.extension in PYTHON_FILE_EXTENSIONS:
309314
# This would be files that do not have directories and we just need to add
310-
# direct symlinks to them as is:
315+
# direct symlinks to them as is, we only allow Python files in here
311316
entry = VenvSymlinkEntry(
312317
kind = VenvSymlinkKind.LIB,
313318
link_to_path = paths.join(runfiles_dir_name, site_packages_root, filename),
314319
package = package,
320+
version = version_str,
315321
venv_path = filename,
316322
)
317323
venv_symlinks.append(entry)
@@ -336,6 +342,7 @@ def _get_venv_symlinks(ctx, dist_info_metadata):
336342
kind = VenvSymlinkKind.LIB,
337343
link_to_path = paths.join(prefix, site_packages_root, dirname),
338344
package = package,
345+
version = version_str,
339346
venv_path = dirname,
340347
)
341348
venv_symlinks.append(entry)

0 commit comments

Comments
 (0)