diff --git a/AGENTS.md b/AGENTS.md index e21b15bd03..3d5219c2bc 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -21,10 +21,16 @@ into the sentence, not verbatim. ### bzl_library targets for bzl source files -* `.bzl` files should have `bzl_library` defined for them. +* A `bzl_library` target should be defined for every `.bzl` file outside + of the `tests/` directory. * They should have a single `srcs` file and be named after the file with `_bzl` appended. -* Their deps should be based on the `load()` statements in the source file. +* Their deps should be based on the `load()` statements in the source file + and refer to the `bzl_library` target containing the loaded file. + * For files in rules_python: replace `.bzl` with `_bzl`. + e.g. given `load("//foo:bar.bzl", ...)`, the target is `//foo:bar_bzl`. + * For files outside rules_python: remove the `.bzl` suffix. e.g. given + `load("@foo//foo:bar.bzl", ...)`, the target is `@foo//foo:bar`. * `bzl_library()` targets should be kept in alphabetical order by name. Example: diff --git a/CHANGELOG.md b/CHANGELOG.md index abd89e51b5..382096f826 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -86,6 +86,9 @@ END_UNRELEASED_TEMPLATE {attr}`pip.defaults.whl_platform_tags` attribute to configure that. If `musllinux_*_x86_64` is specified, we will chose the lowest available wheel version. Fixes [#3250](https://github.com/bazel-contrib/rules_python/issues/3250). +* (venvs) {obj}`--vens_site_packages=yes` no longer errors when packages with + overlapping files or directories are used together. + ([#3204](https://github.com/bazel-contrib/rules_python/issues/3204)). {#v0-0-0-added} ### Added diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel index 916c14f9f2..5e2043c0c5 100644 --- a/python/private/BUILD.bazel +++ b/python/private/BUILD.bazel @@ -419,6 +419,7 @@ bzl_library( ":rules_cc_srcs_bzl", ":toolchain_types_bzl", ":transition_labels_bzl", + ":venv_runfiles_bzl", "@bazel_skylib//lib:dicts", "@bazel_skylib//lib:paths", "@bazel_skylib//lib:structs", @@ -468,6 +469,7 @@ bzl_library( ":py_internal_bzl", ":rule_builders_bzl", ":toolchain_types_bzl", + ":venv_runfiles_bzl", ":version_bzl", "@bazel_skylib//lib:dicts", "@bazel_skylib//rules:common_settings", @@ -727,6 +729,16 @@ bzl_library( ], ) +bzl_library( + name = "venv_runfiles_bzl", + srcs = ["venv_runfiles.bzl"], + deps = [ + ":common_bzl", + ":py_info.bzl", + "@bazel_skylib//lib:paths", + ], +) + # Needed to define bzl_library targets for docgen. (We don't define the # bzl_library target here because it'd give our users a transitive dependency # on Skylib.) diff --git a/python/private/common.bzl b/python/private/common.bzl index 9fc366818d..33b175c247 100644 --- a/python/private/common.bzl +++ b/python/private/common.bzl @@ -495,6 +495,9 @@ _BOOL_TYPE = type(True) def is_bool(v): return type(v) == _BOOL_TYPE +def is_file(v): + return type(v) == "File" + def target_platform_has_any_constraint(ctx, constraints): """Check if target platform has any of a list of constraints. @@ -511,6 +514,37 @@ def target_platform_has_any_constraint(ctx, constraints): return True return False +def relative_path(from_, to): + """Compute a relative path from one path to another. + + Args: + from_: {type}`str` the starting directory. Note that it should be + a directory because relative-symlinks are relative to the + directory the symlink resides in. + to: {type}`str` the path that `from_` wants to point to + + Returns: + {type}`str` a relative path + """ + from_parts = from_.split("/") + to_parts = to.split("/") + + # Strip common leading parts from both paths + n = min(len(from_parts), len(to_parts)) + for _ in range(n): + if from_parts[0] == to_parts[0]: + from_parts.pop(0) + to_parts.pop(0) + else: + break + + # Impossible to compute a relative path without knowing what ".." is + if from_parts and from_parts[0] == "..": + fail("cannot compute relative path from '%s' to '%s'", from_, to) + + parts = ([".."] * len(from_parts)) + to_parts + return paths.join(*parts) + def runfiles_root_path(ctx, short_path): """Compute a runfiles-root relative path from `File.short_path` diff --git a/python/private/py_executable.bzl b/python/private/py_executable.bzl index 59800da566..bef5934729 100644 --- a/python/private/py_executable.bzl +++ b/python/private/py_executable.bzl @@ -48,6 +48,7 @@ load( "filter_to_py_srcs", "get_imports", "is_bool", + "relative_path", "runfiles_root_path", "target_platform_has_any_constraint", ) @@ -63,6 +64,7 @@ load(":reexports.bzl", "BuiltinPyInfo", "BuiltinPyRuntimeInfo") load(":rule_builders.bzl", "ruleb") load(":toolchain_types.bzl", "EXEC_TOOLS_TOOLCHAIN_TYPE", "TARGET_TOOLCHAIN_TYPE", TOOLCHAIN_TYPE = "TARGET_TOOLCHAIN_TYPE") load(":transition_labels.bzl", "TRANSITION_LABELS") +load(":venv_runfiles.bzl", "create_venv_app_files") _py_builtins = py_internal _EXTERNAL_PATH_PREFIX = "external" @@ -499,37 +501,6 @@ def _create_zip_main(ctx, *, stage2_bootstrap, runtime_details, venv): ) return output -def relative_path(from_, to): - """Compute a relative path from one path to another. - - Args: - from_: {type}`str` the starting directory. Note that it should be - a directory because relative-symlinks are relative to the - directory the symlink resides in. - to: {type}`str` the path that `from_` wants to point to - - Returns: - {type}`str` a relative path - """ - from_parts = from_.split("/") - to_parts = to.split("/") - - # Strip common leading parts from both paths - n = min(len(from_parts), len(to_parts)) - for _ in range(n): - if from_parts[0] == to_parts[0]: - from_parts.pop(0) - to_parts.pop(0) - else: - break - - # Impossible to compute a relative path without knowing what ".." is - if from_parts and from_parts[0] == "..": - fail("cannot compute relative path from '%s' to '%s'", from_, to) - - parts = ([".."] * len(from_parts)) + to_parts - return paths.join(*parts) - # Create a venv the executable can use. # For venv details and the venv startup process, see: # * https://docs.python.org/3/library/venv.html @@ -636,9 +607,9 @@ def _create_venv(ctx, output_prefix, imports, runtime_details): VenvSymlinkKind.BIN: bin_dir, VenvSymlinkKind.LIB: site_packages, } - venv_symlinks = _create_venv_symlinks(ctx, venv_dir_map) + venv_app_files = create_venv_app_files(ctx, ctx.attr.deps, venv_dir_map) - files_without_interpreter = [pth, site_init] + venv_symlinks + files_without_interpreter = [pth, site_init] + venv_app_files if pyvenv_cfg: files_without_interpreter.append(pyvenv_cfg) @@ -663,94 +634,6 @@ def _create_venv(ctx, output_prefix, imports, runtime_details): ), ) -def _create_venv_symlinks(ctx, venv_dir_map): - """Creates symlinks within the venv. - - Args: - ctx: current rule ctx - venv_dir_map: mapping of VenvSymlinkKind constants to the - venv path. - - Returns: - {type}`list[File]` list of the File symlink objects created. - """ - - # maps venv-relative path to the runfiles path it should point to - entries = depset( - transitive = [ - dep[PyInfo].venv_symlinks - for dep in ctx.attr.deps - if PyInfo in dep - ], - ).to_list() - - link_map = _build_link_map(entries) - venv_files = [] - for kind, kind_map in link_map.items(): - base = venv_dir_map[kind] - for venv_path, link_to in kind_map.items(): - venv_link = ctx.actions.declare_symlink(paths.join(base, venv_path)) - venv_link_rf_path = runfiles_root_path(ctx, venv_link.short_path) - rel_path = relative_path( - # dirname is necessary because a relative symlink is relative to - # the directory the symlink resides within. - from_ = paths.dirname(venv_link_rf_path), - to = link_to, - ) - ctx.actions.symlink(output = venv_link, target_path = rel_path) - venv_files.append(venv_link) - - return venv_files - -def _build_link_map(entries): - # dict[str package, dict[str kind, dict[str rel_path, str link_to_path]]] - pkg_link_map = {} - - # dict[str package, str version] - version_by_pkg = {} - - for entry in entries: - link_map = pkg_link_map.setdefault(entry.package, {}) - kind_map = link_map.setdefault(entry.kind, {}) - - if version_by_pkg.setdefault(entry.package, entry.version) != entry.version: - # We ignore duplicates by design. - continue - elif entry.venv_path in kind_map: - # We ignore duplicates by design. - continue - else: - kind_map[entry.venv_path] = entry.link_to_path - - # An empty link_to value means to not create the site package symlink. Because of the - # ordering, this allows binaries to remove entries by having an earlier dependency produce - # empty link_to values. - for link_map in pkg_link_map.values(): - for kind, kind_map in link_map.items(): - for dir_path, link_to in kind_map.items(): - if not link_to: - kind_map.pop(dir_path) - - # dict[str kind, dict[str rel_path, str link_to_path]] - keep_link_map = {} - - # Remove entries that would be a child path of a created symlink. - # Earlier entries have precedence to match how exact matches are handled. - for link_map in pkg_link_map.values(): - for kind, kind_map in link_map.items(): - keep_kind_map = keep_link_map.setdefault(kind, {}) - for _ in range(len(kind_map)): - if not kind_map: - break - dirname, value = kind_map.popitem() - keep_kind_map[dirname] = value - prefix = dirname + "/" # Add slash to prevent /X matching /XY - for maybe_suffix in kind_map.keys(): - maybe_suffix += "/" # Add slash to prevent /X matching /XY - if maybe_suffix.startswith(prefix) or prefix.startswith(maybe_suffix): - kind_map.pop(maybe_suffix) - return keep_link_map - def _map_each_identity(v): return v diff --git a/python/private/py_info.bzl b/python/private/py_info.bzl index 31df5cfbde..f96dec554b 100644 --- a/python/private/py_info.bzl +++ b/python/private/py_info.bzl @@ -56,6 +56,14 @@ VenvSymlinkEntry = provider( An entry in `PyInfo.venv_symlinks` """, fields = { + "files": """ +:type: depset[File] + +Files under `link_to_path`. + +This is only used when multiple targets have overlapping `venv_path` paths. e.g. +if one adds files to `venv_path=a/` and another adds files to `venv_path=a/b/`. +""", "kind": """ :type: str diff --git a/python/private/py_library.bzl b/python/private/py_library.bzl index fc8e5839a0..b2a9fdd3be 100644 --- a/python/private/py_library.bzl +++ b/python/private/py_library.bzl @@ -28,7 +28,6 @@ load( load(":builders.bzl", "builders") load( ":common.bzl", - "PYTHON_FILE_EXTENSIONS", "collect_cc_info", "collect_imports", "collect_runfiles", @@ -38,14 +37,13 @@ load( "create_py_info", "filter_to_py_srcs", "get_imports", - "runfiles_root_path", ) load(":common_labels.bzl", "labels") load(":flags.bzl", "AddSrcsToRunfilesFlag", "PrecompileFlag", "VenvsSitePackages") load(":normalize_name.bzl", "normalize_name") load(":precompile.bzl", "maybe_precompile") load(":py_cc_link_params_info.bzl", "PyCcLinkParamsInfo") -load(":py_info.bzl", "PyInfo", "VenvSymlinkEntry", "VenvSymlinkKind") +load(":py_info.bzl", "PyInfo") load(":reexports.bzl", "BuiltinPyInfo") load(":rule_builders.bzl", "ruleb") load( @@ -53,6 +51,7 @@ load( "EXEC_TOOLS_TOOLCHAIN_TYPE", TOOLCHAIN_TYPE = "TARGET_TOOLCHAIN_TYPE", ) +load(":venv_runfiles.bzl", "get_venv_symlinks") load(":version.bzl", "version") LIBRARY_ATTRS = dicts.add( @@ -235,117 +234,27 @@ def _get_imports_and_venv_symlinks(ctx, semantics): venv_symlinks = [] if VenvsSitePackages.is_enabled(ctx): package, version_str = _get_package_and_version(ctx) - venv_symlinks = _get_venv_symlinks(ctx, package, version_str) - else: - imports = collect_imports(ctx, semantics) - return imports, venv_symlinks - -def _get_venv_symlinks(ctx, package, version_str): - imports = ctx.attr.imports - if len(imports) == 0: - fail("When venvs_site_packages is enabled, exactly one `imports` " + - "value must be specified, got 0") - elif len(imports) > 1: - fail("When venvs_site_packages is enabled, exactly one `imports` " + - "value must be specified, got {}".format(imports)) - else: - site_packages_root = imports[0] - - if site_packages_root.endswith("/"): - fail("The site packages root value from `imports` cannot end in " + - "slash, got {}".format(site_packages_root)) - if site_packages_root.startswith("/"): - fail("The site packages root value from `imports` cannot start with " + - "slash, got {}".format(site_packages_root)) - - # Append slash to prevent incorrectly prefix-string matches - site_packages_root += "/" - - # We have to build a list of (runfiles path, site-packages path) pairs of the files to - # create in the consuming binary's venv site-packages directory. To minimize the number of - # files to create, we just return the paths to the directories containing the code of - # interest. - # - # However, namespace packages complicate matters: multiple distributions install in the - # same directory in site-packages. This works out because they don't overlap in their - # files. Typically, they install to different directories within the namespace package - # directory. We also need to ensure that we can handle a case where the main package (e.g. - # airflow) has directories only containing data files and then namespace packages coming - # along and being next to it. - # - # Lastly we have to assume python modules just being `.py` files (e.g. typing-extensions) - # is just a single Python file. - - dir_symlinks = {} # dirname -> runfile path - venv_symlinks = [] - for src in ctx.files.srcs + ctx.files.data + ctx.files.pyi_srcs: - path = _repo_relative_short_path(src.short_path) - if not path.startswith(site_packages_root): - continue - path = path.removeprefix(site_packages_root) - dir_name, _, filename = path.rpartition("/") - if dir_name in dir_symlinks: - # we already have this dir, this allows us to short-circuit since most of the - # ctx.files.data might share the same directories as ctx.files.srcs - continue - - runfiles_dir_name, _, _ = runfiles_root_path(ctx, src.short_path).partition("/") - if dir_name: - # This can be either: - # * a directory with libs (e.g. numpy.libs, created by auditwheel) - # * a directory with `__init__.py` file that potentially also needs to be - # symlinked. - # * `.dist-info` directory - # - # This could be also regular files, that just need to be symlinked, so we will - # add the directory here. - dir_symlinks[dir_name] = runfiles_dir_name - elif src.extension in PYTHON_FILE_EXTENSIONS: - # This would be files that do not have directories and we just need to add - # direct symlinks to them as is, we only allow Python files in here - entry = VenvSymlinkEntry( - kind = VenvSymlinkKind.LIB, - link_to_path = paths.join(runfiles_dir_name, site_packages_root, filename), - package = package, - version = version_str, - venv_path = filename, - ) - venv_symlinks.append(entry) - - # Sort so that we encounter `foo` before `foo/bar`. This ensures we - # see the top-most explicit package first. - dirnames = sorted(dir_symlinks.keys()) - first_level_explicit_packages = [] - for d in dirnames: - is_sub_package = False - for existing in first_level_explicit_packages: - # Suffix with / to prevent foo matching foobar - if d.startswith(existing + "/"): - is_sub_package = True - break - if not is_sub_package: - first_level_explicit_packages.append(d) - - for dirname in first_level_explicit_packages: - prefix = dir_symlinks[dirname] - entry = VenvSymlinkEntry( - kind = VenvSymlinkKind.LIB, - link_to_path = paths.join(prefix, site_packages_root, dirname), - package = package, - version = version_str, - venv_path = dirname, + # NOTE: Already a list, but buildifier thinks its a depset and + # adds to_list() calls later. + imports = list(ctx.attr.imports) + if len(imports) == 0: + fail("When venvs_site_packages is enabled, exactly one `imports` " + + "value must be specified, got 0") + elif len(imports) > 1: + fail("When venvs_site_packages is enabled, exactly one `imports` " + + "value must be specified, got {}".format(imports)) + + venv_symlinks = get_venv_symlinks( + ctx, + ctx.files.srcs + ctx.files.data + ctx.files.pyi_srcs, + package, + version_str, + site_packages_root = imports[0], ) - venv_symlinks.append(entry) - - return venv_symlinks - -def _repo_relative_short_path(short_path): - # Convert `../+pypi+foo/some/file.py` to `some/file.py` - if short_path.startswith("../"): - return short_path[3:].partition("/")[2] else: - return short_path + imports = collect_imports(ctx, semantics) + return imports, venv_symlinks _MaybeBuiltinPyInfo = [BuiltinPyInfo] if BuiltinPyInfo != None else [] diff --git a/python/private/venv_runfiles.bzl b/python/private/venv_runfiles.bzl new file mode 100644 index 0000000000..291920b848 --- /dev/null +++ b/python/private/venv_runfiles.bzl @@ -0,0 +1,318 @@ +"""Code for constructing venvs.""" + +load("@bazel_skylib//lib:paths.bzl", "paths") +load( + ":common.bzl", + "PYTHON_FILE_EXTENSIONS", + "is_file", + "relative_path", + "runfiles_root_path", +) +load( + ":py_info.bzl", + "PyInfo", + "VenvSymlinkEntry", + "VenvSymlinkKind", +) + +def create_venv_app_files(ctx, deps, venv_dir_map): + """Creates the tree of app-specific files for a venv for a binary. + + App specific files are the files that come from dependencies. + + Args: + ctx: {type}`ctx` current ctx. + deps: {type}`list[Target]` the targets whose venv information + to put into the returned venv files. + venv_dir_map: mapping of VenvSymlinkKind constants to the + venv path. This tells the directory name of + platform/configuration-dependent directories. The values are + paths within the current ctx's venv (e.g. `_foo.venv/bin`). + + Returns: + {type}`list[File]` of the files that were created. + """ + + # maps venv-relative path to the runfiles path it should point to + entries = depset( + transitive = [ + dep[PyInfo].venv_symlinks + for dep in deps + if PyInfo in dep + ], + ).to_list() + + link_map = build_link_map(ctx, entries) + venv_files = [] + for kind, kind_map in link_map.items(): + base = venv_dir_map[kind] + for venv_path, link_to in kind_map.items(): + bin_venv_path = paths.join(base, venv_path) + if is_file(link_to): + if link_to.is_directory: + venv_link = ctx.actions.declare_directory(bin_venv_path) + else: + venv_link = ctx.actions.declare_file(bin_venv_path) + ctx.actions.symlink(output = venv_link, target_file = link_to) + else: + venv_link = ctx.actions.declare_symlink(bin_venv_path) + venv_link_rf_path = runfiles_root_path(ctx, venv_link.short_path) + rel_path = relative_path( + # dirname is necessary because a relative symlink is relative to + # the directory the symlink resides within. + from_ = paths.dirname(venv_link_rf_path), + to = link_to, + ) + ctx.actions.symlink(output = venv_link, target_path = rel_path) + venv_files.append(venv_link) + + return venv_files + +# Visible for testing +def build_link_map(ctx, entries): + """Compute the mapping of venv paths to their backing objects. + + + Args: + ctx: {type}`ctx` current ctx. + entries: {type}`list[VenvSymlinkEntry]` the entries that describe the + venv-relative + + Returns: + {type}`dict[str, dict[str, str|File]]` Mappings of venv paths to their + backing files. The first key is a `VenvSymlinkKind` value. + The inner dict keys are venv paths relative to the kind's diretory. The + inner dict values are strings or Files to link to. + """ + + version_by_pkg = {} # dict[str pkg, str version] + entries_by_kind = {} # dict[str kind, list[entry]] + + # Group by path kind and reduce to a single package's version of entries + for entry in entries: + entries_by_kind.setdefault(entry.kind, []) + if not entry.package: + entries_by_kind[entry.kind].append(entry) + continue + if entry.package not in version_by_pkg: + version_by_pkg[entry.package] = entry.version + entries_by_kind[entry.kind].append(entry) + continue + if entry.version == version_by_pkg[entry.package]: + entries_by_kind[entry.kind].append(entry) + continue + + # else: ignore it; not the selected version + + # final paths to keep, grouped by kind + keep_link_map = {} # dict[str kind, dict[path, str|File]] + for kind, entries in entries_by_kind.items(): + # dict[str kind-relative path, str|File link_to] + keep_kind_link_map = {} + + groups = _group_venv_path_entries(entries) + + for group in groups: + # If there's just one group, we can symlink to the directory + if len(group) == 1: + entry = group[0] + keep_kind_link_map[entry.venv_path] = entry.link_to_path + else: + # Merge a group of overlapping prefixes + _merge_venv_path_group(ctx, group, keep_kind_link_map) + + keep_link_map[kind] = keep_kind_link_map + + return keep_link_map + +def _group_venv_path_entries(entries): + """Group entries by VenvSymlinkEntry.venv_path overlap. + + This does an initial grouping by the top-level venv path an entry wants. + Entries that are underneath another entry are put into the same group. + + Returns: + {type}`list[list[VenvSymlinkEntry]]` The inner list is the entries under + a common venv path. The inner list is ordered from shortest to longest + path. + """ + + # Sort so order is top-down, ensuring grouping by short common prefix + entries = sorted(entries, key = lambda e: e.venv_path) + + groups = [] + current_group = None + current_group_prefix = None + for entry in entries: + prefix = entry.venv_path + anchored_prefix = prefix + "/" + if (current_group_prefix == None or + not anchored_prefix.startswith(current_group_prefix)): + current_group_prefix = anchored_prefix + current_group = [entry] + groups.append(current_group) + else: + current_group.append(entry) + + return groups + +def _merge_venv_path_group(ctx, group, keep_map): + """Merges a group of overlapping prefixes. + + Args: + ctx: {type}`ctx` current ctx. + group: {type}`list[VenvSymlinkEntry]` a group of entries with overlapping + `venv_path` prefixes, ordered from shortest to longest path. + keep_map: {type}`dict[str, str|File]` files kept after merging are + populated into this map. + """ + + # TODO: Compute the minimum number of entries to create. This can't avoid + # flattening the files depset, but can lower the number of materialized + # files significantly. Usually overlaps are limited to a small number + # of directories. + for entry in group: + prefix = entry.venv_path + for file in entry.files.to_list(): + # Compute the file-specific venv path. i.e. the relative + # path of the file under entry.venv_path, joined with + # entry.venv_path + rf_root_path = runfiles_root_path(ctx, file.short_path) + if not rf_root_path.startswith(entry.link_to_path): + # This generally shouldn't occur in practice, but just + # in case, skip them, for lack of a better option. + continue + venv_path = "{}/{}".format( + prefix, + rf_root_path.removeprefix(entry.link_to_path + "/"), + ) + + # For lack of a better option, first added wins. We happen to + # go in top-down prefix order, so the highest level namespace + # package typically wins. + if venv_path not in keep_map: + keep_map[venv_path] = file + +def get_venv_symlinks(ctx, files, package, version_str, site_packages_root): + """Compute the VenvSymlinkEntry objects for a library. + + Args: + ctx: {type}`ctx` the current ctx. + files: {type}`list[File]` the underlying files that are under + `site_packages_root` and intended to be part of the venv + contents. + package: {type}`str` the Python distribution name. + version_str: {type}`str` the distribution's version. + site_packages_root: {type}`str` prefix under which files are + considered to be part of the installed files. + + Returns: + {type}`list[VenvSymlinkEntry]` the entries that describe how + to map the files into a venv. + """ + if site_packages_root.endswith("/"): + fail("The `site_packages_root` value cannot end in " + + "slash, got {}".format(site_packages_root)) + if site_packages_root.startswith("/"): + fail("The `site_packages_root` cannot start with " + + "slash, got {}".format(site_packages_root)) + + # Append slash to prevent incorrect prefix-string matches + site_packages_root += "/" + + # We have to build a list of (runfiles path, site-packages path) pairs of the files to + # create in the consuming binary's venv site-packages directory. To minimize the number of + # files to create, we just return the paths to the directories containing the code of + # interest. + # + # However, namespace packages complicate matters: multiple distributions install in the + # same directory in site-packages. This works out because they don't overlap in their + # files. Typically, they install to different directories within the namespace package + # directory. We also need to ensure that we can handle a case where the main package (e.g. + # airflow) has directories only containing data files and then namespace packages coming + # along and being next to it. + # + # Lastly we have to assume python modules just being `.py` files (e.g. typing-extensions) + # is just a single Python file. + + dir_symlinks = {} # dirname -> runfile path + venv_symlinks = [] + + # Sort so order is top-down + all_files = sorted(files, key = lambda f: f.short_path) + + for src in all_files: + path = _repo_relative_short_path(src.short_path) + if not path.startswith(site_packages_root): + continue + path = path.removeprefix(site_packages_root) + dir_name, _, filename = path.rpartition("/") + + if dir_name in dir_symlinks: + # we already have this dir, this allows us to short-circuit since most of the + # ctx.files.data might share the same directories as ctx.files.srcs + continue + + runfiles_dir_name, _, _ = runfiles_root_path(ctx, src.short_path).partition("/") + if dir_name: + # This can be either: + # * a directory with libs (e.g. numpy.libs, created by auditwheel) + # * a directory with `__init__.py` file that potentially also needs to be + # symlinked. + # * `.dist-info` directory + # + # This could be also regular files, that just need to be symlinked, so we will + # add the directory here. + dir_symlinks[dir_name] = runfiles_dir_name + elif src.extension in PYTHON_FILE_EXTENSIONS: + # This would be files that do not have directories and we just need to add + # direct symlinks to them as is, we only allow Python files in here + entry = VenvSymlinkEntry( + kind = VenvSymlinkKind.LIB, + link_to_path = paths.join(runfiles_dir_name, site_packages_root, filename), + package = package, + version = version_str, + venv_path = filename, + files = depset([src]), + ) + venv_symlinks.append(entry) + + # Sort so that we encounter `foo` before `foo/bar`. This ensures we + # see the top-most explicit package first. + dirnames = sorted(dir_symlinks.keys()) + first_level_explicit_packages = [] + for d in dirnames: + is_sub_package = False + for existing in first_level_explicit_packages: + # Suffix with / to prevent foo matching foobar + if d.startswith(existing + "/"): + is_sub_package = True + break + if not is_sub_package: + first_level_explicit_packages.append(d) + + for dirname in first_level_explicit_packages: + prefix = dir_symlinks[dirname] + link_to_path = paths.join(prefix, site_packages_root, dirname) + entry = VenvSymlinkEntry( + kind = VenvSymlinkKind.LIB, + link_to_path = link_to_path, + package = package, + version = version_str, + venv_path = dirname, + files = depset([ + f + for f in all_files + if runfiles_root_path(ctx, f.short_path).startswith(link_to_path + "/") + ]), + ) + venv_symlinks.append(entry) + + return venv_symlinks + +def _repo_relative_short_path(short_path): + # Convert `../+pypi+foo/some/file.py` to `some/file.py` + if short_path.startswith("../"): + return short_path[3:].partition("/")[2] + else: + return short_path diff --git a/tests/bootstrap_impls/venv_relative_path_tests.bzl b/tests/bootstrap_impls/venv_relative_path_tests.bzl index ad4870fe08..72b5012809 100644 --- a/tests/bootstrap_impls/venv_relative_path_tests.bzl +++ b/tests/bootstrap_impls/venv_relative_path_tests.bzl @@ -15,7 +15,7 @@ "Unit tests for relative_path computation" load("@rules_testing//lib:test_suite.bzl", "test_suite") -load("//python/private:py_executable.bzl", "relative_path") # buildifier: disable=bzl-visibility +load("//python/private:common.bzl", "relative_path") # buildifier: disable=bzl-visibility _tests = [] diff --git a/tests/venv_site_packages_libs/BUILD.bazel b/tests/venv_site_packages_libs/BUILD.bazel index e64299e1ad..92d5dec6d3 100644 --- a/tests/venv_site_packages_libs/BUILD.bazel +++ b/tests/venv_site_packages_libs/BUILD.bazel @@ -26,6 +26,8 @@ py_reconfig_test( ":closer_lib", "//tests/venv_site_packages_libs/nspkg_alpha", "//tests/venv_site_packages_libs/nspkg_beta", + "//tests/venv_site_packages_libs/pkgutil_top", + "//tests/venv_site_packages_libs/pkgutil_top_sub", "@other//nspkg_delta", "@other//nspkg_gamma", "@other//nspkg_single", diff --git a/tests/venv_site_packages_libs/app_files_building/BUILD.bazel b/tests/venv_site_packages_libs/app_files_building/BUILD.bazel new file mode 100644 index 0000000000..60afd34c38 --- /dev/null +++ b/tests/venv_site_packages_libs/app_files_building/BUILD.bazel @@ -0,0 +1,5 @@ +load(":app_files_building_tests.bzl", "app_files_building_test_suite") + +app_files_building_test_suite( + name = "app_files_building_tests", +) diff --git a/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl new file mode 100644 index 0000000000..0a0265eb8c --- /dev/null +++ b/tests/venv_site_packages_libs/app_files_building/app_files_building_tests.bzl @@ -0,0 +1,247 @@ +"" + +load("@bazel_skylib//lib:paths.bzl", "paths") +load("@rules_testing//lib:analysis_test.bzl", "analysis_test") +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("//python/private:py_info.bzl", "VenvSymlinkEntry", "VenvSymlinkKind") # buildifier: disable=bzl-visibility +load("//python/private:venv_runfiles.bzl", "build_link_map") # buildifier: disable=bzl-visibility + +_tests = [] + +def _ctx(workspace_name = "_main"): + return struct( + workspace_name = workspace_name, + ) + +def _file(short_path): + return struct( + short_path = short_path, + ) + +def _entry(venv_path, link_to_path, files = [], **kwargs): + kwargs.setdefault("kind", VenvSymlinkKind.LIB) + kwargs.setdefault("package", None) + kwargs.setdefault("version", None) + + def short_pathify(path): + path = paths.join(link_to_path, path) + + # In tests, `../` is used to step out of the link_to_path scope. + path = paths.normalize(path) + + # Treat paths starting with "+" as external references. This matches + # how bzlmod names things. + if link_to_path.startswith("+"): + # File.short_path to external repos have `../` prefixed + path = paths.join("../", path) + else: + # File.short_path in main repo is main-repo relative + _, _, path = path.partition("/") + return path + + return VenvSymlinkEntry( + venv_path = venv_path, + link_to_path = link_to_path, + files = depset([ + _file(short_pathify(f)) + for f in files + ]), + **kwargs + ) + +def _test_conflict_merging(name): + analysis_test( + name = name, + impl = _test_conflict_merging_impl, + target = "//python:none", + ) + +_tests.append(_test_conflict_merging) + +def _test_conflict_merging_impl(env, _): + entries = [ + _entry("a", "+pypi_a/site-packages/a", ["a.txt"]), + _entry("a/b", "+pypi_a_b/site-packages/a/b", ["b.txt"]), + _entry("x", "_main/src/x", ["x.txt"]), + _entry("x/p", "_main/src-dev/x/p", ["p.txt"]), + _entry("duplicate", "+dupe_a/site-packages/duplicate", ["d.py"]), + # This entry also provides a/x.py, but since the "a" entry is shorter + # and comes first, its version of x.py should win. + _entry("duplicate", "+dupe_b/site-packages/duplicate", ["d.py"]), + ] + + actual = build_link_map(_ctx(), entries) + expected_libs = { + "a/a.txt": _file("../+pypi_a/site-packages/a/a.txt"), + "a/b/b.txt": _file("../+pypi_a_b/site-packages/a/b/b.txt"), + "duplicate/d.py": _file("../+dupe_a/site-packages/duplicate/d.py"), + "x/p/p.txt": _file("src-dev/x/p/p.txt"), + "x/x.txt": _file("src/x/x.txt"), + } + env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs) + env.expect.that_dict(actual).keys().contains_exactly([VenvSymlinkKind.LIB]) + +def _test_package_version_filtering(name): + analysis_test( + name = name, + impl = _test_package_version_filtering_impl, + target = "//python:none", + ) + +_tests.append(_test_package_version_filtering) + +def _test_package_version_filtering_impl(env, _): + entries = [ + _entry("foo", "+pypi_v1/site-packages/foo", ["foo.txt"], package = "foo", version = "1.0"), + _entry("foo", "+pypi_v2/site-packages/foo", ["bar.txt"], package = "foo", version = "2.0"), + ] + + actual = build_link_map(_ctx(), entries) + + expected_libs = { + "foo": "+pypi_v1/site-packages/foo", + } + env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs) + +def _test_malformed_entry(name): + analysis_test( + name = name, + impl = _test_malformed_entry_impl, + target = "//python:none", + ) + +_tests.append(_test_malformed_entry) + +def _test_malformed_entry_impl(env, _): + entries = [ + _entry( + "a", + "+pypi_a/site-packages/a", + # This file is outside the link_to_path, so it should be ignored. + ["../outside.txt"], + ), + # A second, conflicting, entry is added to force merging of the known + # files. Without this, there's no conflict, so files is never + # considered. + _entry( + "a", + "+pypi_b/site-packages/a", + ["../outside.txt"], + ), + ] + + actual = build_link_map(_ctx(), entries) + env.expect.that_dict(actual).contains_exactly({ + VenvSymlinkKind.LIB: {}, + }) + +def _test_complex_namespace_packages(name): + analysis_test( + name = name, + impl = _test_complex_namespace_packages_impl, + target = "//python:none", + ) + +_tests.append(_test_complex_namespace_packages) + +def _test_complex_namespace_packages_impl(env, _): + entries = [ + _entry("a/b", "+pypi_a_b/site-packages/a/b", ["b.txt"]), + _entry("a/c", "+pypi_a_c/site-packages/a/c", ["c.txt"]), + _entry("x/y/z", "+pypi_x_y_z/site-packages/x/y/z", ["z.txt"]), + _entry("foo", "+pypi_foo/site-packages/foo", ["foo.txt"]), + _entry("foobar", "+pypi_foobar/site-packages/foobar", ["foobar.txt"]), + ] + + actual = build_link_map(_ctx(), entries) + expected_libs = { + "a/b": "+pypi_a_b/site-packages/a/b", + "a/c": "+pypi_a_c/site-packages/a/c", + "foo": "+pypi_foo/site-packages/foo", + "foobar": "+pypi_foobar/site-packages/foobar", + "x/y/z": "+pypi_x_y_z/site-packages/x/y/z", + } + env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs) + +def _test_empty_and_trivial_inputs(name): + analysis_test( + name = name, + impl = _test_empty_and_trivial_inputs_impl, + target = "//python:none", + ) + +_tests.append(_test_empty_and_trivial_inputs) + +def _test_empty_and_trivial_inputs_impl(env, _): + # Test with empty list of entries + actual = build_link_map(_ctx(), []) + env.expect.that_dict(actual).contains_exactly({}) + + # Test with an entry with no files + entries = [_entry("a", "+pypi_a/site-packages/a", [])] + actual = build_link_map(_ctx(), entries) + env.expect.that_dict(actual).contains_exactly({ + VenvSymlinkKind.LIB: {"a": "+pypi_a/site-packages/a"}, + }) + +def _test_multiple_venv_symlink_kinds(name): + analysis_test( + name = name, + impl = _test_multiple_venv_symlink_kinds_impl, + target = "//python:none", + ) + +_tests.append(_test_multiple_venv_symlink_kinds) + +def _test_multiple_venv_symlink_kinds_impl(env, _): + entries = [ + _entry( + "libfile", + "+pypi_lib/site-packages/libfile", + ["lib.txt"], + kind = + VenvSymlinkKind.LIB, + ), + _entry( + "binfile", + "+pypi_bin/bin/binfile", + ["bin.txt"], + kind = VenvSymlinkKind.BIN, + ), + _entry( + "includefile", + "+pypi_include/include/includefile", + ["include.h"], + kind = + VenvSymlinkKind.INCLUDE, + ), + ] + + actual = build_link_map(_ctx(), entries) + + expected_libs = { + "libfile": "+pypi_lib/site-packages/libfile", + } + env.expect.that_dict(actual[VenvSymlinkKind.LIB]).contains_exactly(expected_libs) + + expected_bins = { + "binfile": "+pypi_bin/bin/binfile", + } + env.expect.that_dict(actual[VenvSymlinkKind.BIN]).contains_exactly(expected_bins) + + expected_includes = { + "includefile": "+pypi_include/include/includefile", + } + env.expect.that_dict(actual[VenvSymlinkKind.INCLUDE]).contains_exactly(expected_includes) + + env.expect.that_dict(actual).keys().contains_exactly([ + VenvSymlinkKind.LIB, + VenvSymlinkKind.BIN, + VenvSymlinkKind.INCLUDE, + ]) + +def app_files_building_test_suite(name): + test_suite( + name = name, + tests = _tests, + ) diff --git a/tests/venv_site_packages_libs/bin.py b/tests/venv_site_packages_libs/bin.py index 7e5838d2c2..772925f00e 100644 --- a/tests/venv_site_packages_libs/bin.py +++ b/tests/venv_site_packages_libs/bin.py @@ -14,14 +14,26 @@ def setUp(self): def assert_imported_from_venv(self, module_name): module = importlib.import_module(module_name) self.assertEqual(module.__name__, module_name) + self.assertIsNotNone( + module.__file__, + f"Expected module {module_name!r} to have" + + f"__file__ set, but got None. {module=}", + ) self.assertTrue( module.__file__.startswith(self.venv), f"\n{module_name} was imported, but not from the venv.\n" + f"venv : {self.venv}\n" + f"actual: {module.__file__}", ) + return module def test_imported_from_venv(self): + m = self.assert_imported_from_venv("pkgutil_top") + self.assertEqual(m.WHOAMI, "pkgutil_top") + + m = self.assert_imported_from_venv("pkgutil_top.sub") + self.assertEqual(m.WHOAMI, "pkgutil_top.sub") + self.assert_imported_from_venv("nspkg.subnspkg.alpha") self.assert_imported_from_venv("nspkg.subnspkg.beta") self.assert_imported_from_venv("nspkg.subnspkg.gamma") diff --git a/tests/venv_site_packages_libs/pkgutil_top/BUILD.bazel b/tests/venv_site_packages_libs/pkgutil_top/BUILD.bazel new file mode 100644 index 0000000000..c805b1ad53 --- /dev/null +++ b/tests/venv_site_packages_libs/pkgutil_top/BUILD.bazel @@ -0,0 +1,10 @@ +load("//python:py_library.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "pkgutil_top", + srcs = glob(["site-packages/**/*.py"]), + experimental_venvs_site_packages = "//python/config_settings:venvs_site_packages", + imports = [package_name() + "/site-packages"], +) diff --git a/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/__init__.py b/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/__init__.py new file mode 100644 index 0000000000..e1809a325a --- /dev/null +++ b/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/__init__.py @@ -0,0 +1,2 @@ +WHOAMI = "pkgutil_top" +__path__ = __import__("pkgutil").extend_path(__path__, __name__) diff --git a/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/top.py b/tests/venv_site_packages_libs/pkgutil_top/site-packages/pkgutil_top/top.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/tests/venv_site_packages_libs/pkgutil_top_sub/BUILD.bazel b/tests/venv_site_packages_libs/pkgutil_top_sub/BUILD.bazel new file mode 100644 index 0000000000..9d771628a0 --- /dev/null +++ b/tests/venv_site_packages_libs/pkgutil_top_sub/BUILD.bazel @@ -0,0 +1,10 @@ +load("//python:py_library.bzl", "py_library") + +package(default_visibility = ["//visibility:public"]) + +py_library( + name = "pkgutil_top_sub", + srcs = glob(["site-packages/**/*.py"]), + experimental_venvs_site_packages = "//python/config_settings:venvs_site_packages", + imports = [package_name() + "/site-packages"], +) diff --git a/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/__init__.py b/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/__init__.py new file mode 100644 index 0000000000..e7fb2340ea --- /dev/null +++ b/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/__init__.py @@ -0,0 +1 @@ +WHOAMI = "pkgutil_top.sub" diff --git a/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/suba.py b/tests/venv_site_packages_libs/pkgutil_top_sub/site-packages/pkgutil_top/sub/suba.py new file mode 100644 index 0000000000..e69de29bb2