From b5339a223625c0546ea5533ba3b182b4ce3766d2 Mon Sep 17 00:00:00 2001 From: Alexandre Miziara Date: Fri, 26 Sep 2025 20:11:27 -0300 Subject: [PATCH 1/6] Add ignore_native_libs option for hermetic manifest generation Fixes platform-specific inconsistency in gazelle_python_manifest where different native libraries (.so files) in platform-specific wheels caused different manifests on Linux vs macOS. Changes: - Add ignore_native_libs parameter to modules_mapping rule (default: False) - Skip .so file processing in generator.py when flag is enabled - Add comprehensive test coverage for the new functionality Usage: ```starlark modules_mapping( name = "modules_map", wheels = all_whl_requirements, ignore_native_libs = True, # Enables hermetic cross-platform builds ) ``` This solves the opencv-python-headless case and similar packages with platform-specific native libraries, enabling hermetic builds across different development platforms. --- gazelle/modules_mapping/def.bzl | 7 ++++++ gazelle/modules_mapping/generator.py | 10 +++++++-- gazelle/modules_mapping/test_generator.py | 27 ++++++++++++++++++++--- 3 files changed, 39 insertions(+), 5 deletions(-) diff --git a/gazelle/modules_mapping/def.bzl b/gazelle/modules_mapping/def.bzl index 48a5477b93..c9a77d153c 100644 --- a/gazelle/modules_mapping/def.bzl +++ b/gazelle/modules_mapping/def.bzl @@ -38,6 +38,8 @@ def _modules_mapping_impl(ctx): args.set_param_file_format(format = "multiline") if ctx.attr.include_stub_packages: args.add("--include_stub_packages") + if ctx.attr.ignore_native_libs: + args.add("--ignore_native_libs") args.add("--output_file", modules_mapping) args.add_all("--exclude_patterns", ctx.attr.exclude_patterns) args.add_all("--wheels", all_wheels) @@ -64,6 +66,11 @@ modules_mapping = rule( doc = "Whether to include stub packages in the mapping.", mandatory = False, ), + "ignore_native_libs": attr.bool( + default = False, + doc = "Whether to ignore native libraries (*.so files) for platform-independent mappings.", + mandatory = False, + ), "modules_mapping_name": attr.string( default = "modules_mapping.json", doc = "The name for the output JSON file.", diff --git a/gazelle/modules_mapping/generator.py b/gazelle/modules_mapping/generator.py index ea11f3e236..0cfedb8138 100644 --- a/gazelle/modules_mapping/generator.py +++ b/gazelle/modules_mapping/generator.py @@ -26,11 +26,12 @@ class Generator: output_file = None excluded_patterns = None - def __init__(self, stderr, output_file, excluded_patterns, include_stub_packages): + def __init__(self, stderr, output_file, excluded_patterns, include_stub_packages, ignore_native_libs=False): self.stderr = stderr self.output_file = output_file self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns] self.include_stub_packages = include_stub_packages + self.ignore_native_libs = ignore_native_libs self.mapping = {} # dig_wheel analyses the wheel .whl file determining the modules it provides @@ -74,6 +75,10 @@ def simplify(self): def module_for_path(self, path, whl): ext = pathlib.Path(path).suffix if ext == ".py" or ext == ".so": + # Skip native libraries if ignore_native_libs is enabled + if ext == ".so" and self.ignore_native_libs: + return + if "purelib" in path or "platlib" in path: root = "/".join(path.split("/")[2:]) else: @@ -158,10 +163,11 @@ def data_has_purelib_or_platlib(path): ) parser.add_argument("--output_file", type=str) parser.add_argument("--include_stub_packages", action="store_true") + parser.add_argument("--ignore_native_libs", action="store_true") parser.add_argument("--exclude_patterns", nargs="+", default=[]) parser.add_argument("--wheels", nargs="+", default=[]) args = parser.parse_args() generator = Generator( - sys.stderr, args.output_file, args.exclude_patterns, args.include_stub_packages + sys.stderr, args.output_file, args.exclude_patterns, args.include_stub_packages, args.ignore_native_libs ) sys.exit(generator.run(args.wheels)) diff --git a/gazelle/modules_mapping/test_generator.py b/gazelle/modules_mapping/test_generator.py index d6d2f19039..8d7762d8de 100644 --- a/gazelle/modules_mapping/test_generator.py +++ b/gazelle/modules_mapping/test_generator.py @@ -7,7 +7,7 @@ class GeneratorTest(unittest.TestCase): def test_generator(self): whl = pathlib.Path(__file__).parent / "pytest-8.3.3-py3-none-any.whl" - gen = Generator(None, None, {}, False) + gen = Generator(None, None, {}, False, False) gen.dig_wheel(whl) self.assertLessEqual( { @@ -21,7 +21,7 @@ def test_generator(self): def test_stub_generator(self): whl = pathlib.Path(__file__).parent / "django_types-0.19.1-py3-none-any.whl" - gen = Generator(None, None, {}, True) + gen = Generator(None, None, {}, True, False) gen.dig_wheel(whl) self.assertLessEqual( { @@ -32,13 +32,34 @@ def test_stub_generator(self): def test_stub_excluded(self): whl = pathlib.Path(__file__).parent / "django_types-0.19.1-py3-none-any.whl" - gen = Generator(None, None, {}, False) + gen = Generator(None, None, {}, False, False) gen.dig_wheel(whl) self.assertEqual( {}.items(), gen.mapping.items(), ) + def test_ignore_native_libs(self): + # Test the ignore_native_libs functionality with the module_for_path method + gen_with_native_libs = Generator(None, None, {}, False, False) + gen_without_native_libs = Generator(None, None, {}, False, True) + + # Simulate a Python file - should be included in both cases + gen_with_native_libs.module_for_path("cv2/__init__.py", "opencv_python_headless-4.8.1-cp310-cp310-linux_x86_64.whl") + gen_without_native_libs.module_for_path("cv2/__init__.py", "opencv_python_headless-4.8.1-cp310-cp310-linux_x86_64.whl") + + # Simulate a native library - should be included only when ignore_native_libs=False + gen_with_native_libs.module_for_path("opencv_python_headless.libs/libopenblas-r0-f650aae0.so", "opencv_python_headless-4.8.1-cp310-cp310-linux_x86_64.whl") + gen_without_native_libs.module_for_path("opencv_python_headless.libs/libopenblas-r0-f650aae0.so", "opencv_python_headless-4.8.1-cp310-cp310-linux_x86_64.whl") + + # Both should have the Python module mapping + self.assertIn("cv2", gen_with_native_libs.mapping) + self.assertIn("cv2", gen_without_native_libs.mapping) + + # Only gen_with_native_libs should have the native library mapping + self.assertIn("opencv_python_headless.libs.libopenblas-r0-f650aae0", gen_with_native_libs.mapping) + self.assertNotIn("opencv_python_headless.libs.libopenblas-r0-f650aae0", gen_without_native_libs.mapping) + if __name__ == "__main__": unittest.main() From 02bba6777d16e8f5840a56c96df7f8f7df91f513 Mon Sep 17 00:00:00 2001 From: Alexandre Miziara Date: Fri, 26 Sep 2025 21:24:20 -0300 Subject: [PATCH 2/6] Update docs --- examples/build_file_generation/BUILD.bazel | 3 +++ gazelle/docs/installation_and_usage.md | 8 ++++++++ gazelle/modules_mapping/def.bzl | 5 ++++- 3 files changed, 15 insertions(+), 1 deletion(-) diff --git a/examples/build_file_generation/BUILD.bazel b/examples/build_file_generation/BUILD.bazel index a378775968..4f7d897fb8 100644 --- a/examples/build_file_generation/BUILD.bazel +++ b/examples/build_file_generation/BUILD.bazel @@ -28,6 +28,9 @@ modules_mapping( "^_|(\\._)+", # This is the default. "(\\.tests)+", # Add a custom one to get rid of the psutil tests. ], + # Uncomment the next line to enable hermetic builds across platforms + # by ignoring platform-specific native libraries (useful for opencv-python-headless, etc.) + # ignore_native_libs = True, wheels = all_whl_requirements, ) diff --git a/gazelle/docs/installation_and_usage.md b/gazelle/docs/installation_and_usage.md index b151ade25e..f0eb6756d7 100644 --- a/gazelle/docs/installation_and_usage.md +++ b/gazelle/docs/installation_and_usage.md @@ -96,6 +96,14 @@ modules_mapping( # for tools like type checkers and IDEs, improving the development experience and # reducing manual overhead in managing separate stub packages. include_stub_packages = True, + + # ignore_native_libs: bool (default: False) + # If set to True, this flag ignores platform-specific native libraries (.so files) + # when generating the modules mapping. This ensures hermetic builds across different + # platforms (Linux, macOS, etc.) by producing identical manifests regardless of + # platform-specific wheel contents. Useful for packages like opencv-python-headless + # that include different native libraries on different platforms. + # ignore_native_libs = True, ) # Gazelle python extension needs a manifest file mapping from diff --git a/gazelle/modules_mapping/def.bzl b/gazelle/modules_mapping/def.bzl index c9a77d153c..ec00383789 100644 --- a/gazelle/modules_mapping/def.bzl +++ b/gazelle/modules_mapping/def.bzl @@ -68,7 +68,10 @@ modules_mapping = rule( ), "ignore_native_libs": attr.bool( default = False, - doc = "Whether to ignore native libraries (*.so files) for platform-independent mappings.", + doc = "Whether to ignore platform-specific native libraries (*.so files) when generating mappings. " + + "When True, ensures hermetic builds across different platforms by excluding native library " + + "mappings that vary between Linux, macOS, etc. Useful for packages like opencv-python-headless " + + "that bundle different native libraries on different platforms.", mandatory = False, ), "modules_mapping_name": attr.string( From 69c53ee2c5a9e583d515bd071628d5ca9ced5e91 Mon Sep 17 00:00:00 2001 From: Alexandre Miziara Date: Fri, 26 Sep 2025 21:34:27 -0300 Subject: [PATCH 3/6] Fix doc. --- gazelle/docs/installation_and_usage.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gazelle/docs/installation_and_usage.md b/gazelle/docs/installation_and_usage.md index f0eb6756d7..3577d6fe15 100644 --- a/gazelle/docs/installation_and_usage.md +++ b/gazelle/docs/installation_and_usage.md @@ -103,7 +103,7 @@ modules_mapping( # platforms (Linux, macOS, etc.) by producing identical manifests regardless of # platform-specific wheel contents. Useful for packages like opencv-python-headless # that include different native libraries on different platforms. - # ignore_native_libs = True, + ignore_native_libs = True, ) # Gazelle python extension needs a manifest file mapping from From f6d6483c8486ead73641d84dc5680ab1bd14b98d Mon Sep 17 00:00:00 2001 From: Alexandre Miziara Date: Fri, 26 Sep 2025 23:39:01 -0300 Subject: [PATCH 4/6] refact --- .bazelrc | 4 +- examples/build_file_generation/BUILD.bazel | 4 +- gazelle/docs/installation_and_usage.md | 15 +- gazelle/modules_mapping/def.bzl | 20 +-- gazelle/modules_mapping/generator.py | 29 +++- gazelle/modules_mapping/test_generator.py | 64 ++++++-- gazelle_python_manifest_platform_issue.md | 168 +++++++++++++++++++++ 7 files changed, 260 insertions(+), 44 deletions(-) create mode 100644 gazelle_python_manifest_platform_issue.md diff --git a/.bazelrc b/.bazelrc index d7e1771336..24d85c9771 100644 --- a/.bazelrc +++ b/.bazelrc @@ -4,8 +4,8 @@ # (Note, we cannot use `common --deleted_packages` because the bazel version command doesn't support it) # To update these lines, execute # `bazel run @rules_bazel_integration_test//tools:update_deleted_packages` -build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,gazelle/python/private,rules_python-repro,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered,tests/modules/another_module,tests/modules/other,tests/modules/other/nspkg_delta,tests/modules/other/nspkg_gamma,tests/modules/other/nspkg_single,tests/modules/other/simple_v1,tests/modules/other/simple_v2,tests/modules/other/with_external_data,tests/whl_with_build_files/testdata,tests/whl_with_build_files/testdata/somepkg,tests/whl_with_build_files/testdata/somepkg-1.0.dist-info,tests/whl_with_build_files/testdata/somepkg/subpkg -query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/pythonconfig,gazelle/python/private,rules_python-repro,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered,tests/modules/another_module,tests/modules/other,tests/modules/other/nspkg_delta,tests/modules/other/nspkg_gamma,tests/modules/other/nspkg_single,tests/modules/other/simple_v1,tests/modules/other/simple_v2,tests/modules/other/with_external_data,tests/whl_with_build_files/testdata,tests/whl_with_build_files/testdata/somepkg,tests/whl_with_build_files/testdata/somepkg-1.0.dist-info,tests/whl_with_build_files/testdata/somepkg/subpkg +build --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/docs,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/python/private,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered,tests/modules/another_module,tests/modules/other,tests/modules/other/nspkg_delta,tests/modules/other/nspkg_gamma,tests/modules/other/nspkg_single,tests/modules/other/simple_v1,tests/modules/other/simple_v2,tests/modules/other/with_external_data,tests/whl_with_build_files/testdata,tests/whl_with_build_files/testdata/somepkg,tests/whl_with_build_files/testdata/somepkg-1.0.dist-info,tests/whl_with_build_files/testdata/somepkg/subpkg +query --deleted_packages=examples/build_file_generation,examples/build_file_generation/random_number_generator,examples/bzlmod,examples/bzlmod_build_file_generation,examples/bzlmod_build_file_generation/other_module/other_module/pkg,examples/bzlmod_build_file_generation/runfiles,examples/bzlmod/entry_points,examples/bzlmod/entry_points/tests,examples/bzlmod/libs/my_lib,examples/bzlmod/other_module,examples/bzlmod/other_module/other_module/pkg,examples/bzlmod/patches,examples/bzlmod/py_proto_library,examples/bzlmod/py_proto_library/example.com/another_proto,examples/bzlmod/py_proto_library/example.com/proto,examples/bzlmod/runfiles,examples/bzlmod/tests,examples/bzlmod/tests/other_module,examples/bzlmod/whl_mods,examples/multi_python_versions/libs/my_lib,examples/multi_python_versions/requirements,examples/multi_python_versions/tests,examples/pip_parse,examples/pip_parse_vendored,examples/pip_repository_annotations,examples/py_proto_library,examples/py_proto_library/example.com/another_proto,examples/py_proto_library/example.com/proto,gazelle,gazelle/docs,gazelle/manifest,gazelle/manifest/generate,gazelle/manifest/hasher,gazelle/manifest/test,gazelle/modules_mapping,gazelle/python,gazelle/python/private,gazelle/pythonconfig,tests/integration/compile_pip_requirements,tests/integration/compile_pip_requirements_test_from_external_repo,tests/integration/custom_commands,tests/integration/ignore_root_user_error,tests/integration/ignore_root_user_error/submodule,tests/integration/local_toolchains,tests/integration/pip_parse,tests/integration/pip_parse/empty,tests/integration/py_cc_toolchain_registered,tests/modules/another_module,tests/modules/other,tests/modules/other/nspkg_delta,tests/modules/other/nspkg_gamma,tests/modules/other/nspkg_single,tests/modules/other/simple_v1,tests/modules/other/simple_v2,tests/modules/other/with_external_data,tests/whl_with_build_files/testdata,tests/whl_with_build_files/testdata/somepkg,tests/whl_with_build_files/testdata/somepkg-1.0.dist-info,tests/whl_with_build_files/testdata/somepkg/subpkg test --test_output=errors diff --git a/examples/build_file_generation/BUILD.bazel b/examples/build_file_generation/BUILD.bazel index 4f7d897fb8..8b3b3880d9 100644 --- a/examples/build_file_generation/BUILD.bazel +++ b/examples/build_file_generation/BUILD.bazel @@ -29,8 +29,8 @@ modules_mapping( "(\\.tests)+", # Add a custom one to get rid of the psutil tests. ], # Uncomment the next line to enable hermetic builds across platforms - # by ignoring platform-specific native libraries (useful for opencv-python-headless, etc.) - # ignore_native_libs = True, + # by skipping private shared objects under .libs directories (useful for opencv-python-headless, etc.) + # skip_private_shared_objects = True, wheels = all_whl_requirements, ) diff --git a/gazelle/docs/installation_and_usage.md b/gazelle/docs/installation_and_usage.md index 3577d6fe15..e3b1494a0a 100644 --- a/gazelle/docs/installation_and_usage.md +++ b/gazelle/docs/installation_and_usage.md @@ -97,13 +97,14 @@ modules_mapping( # reducing manual overhead in managing separate stub packages. include_stub_packages = True, - # ignore_native_libs: bool (default: False) - # If set to True, this flag ignores platform-specific native libraries (.so files) - # when generating the modules mapping. This ensures hermetic builds across different - # platforms (Linux, macOS, etc.) by producing identical manifests regardless of - # platform-specific wheel contents. Useful for packages like opencv-python-headless - # that include different native libraries on different platforms. - ignore_native_libs = True, + # skip_private_shared_objects: bool (default: False) + # If set to True, this flag skips private shared objects under .libs directories + # when generating the modules mapping. These are non-importable dependency libraries + # (like libopenblas.so) that vary between Linux distributions and break build + # hermiticity. Ensures identical manifests across platforms by excluding libraries + # that cannot be imported in Python code. macOS uses .dylib files which are + # naturally excluded by this Linux-specific .so filtering. + skip_private_shared_objects = True, ) # Gazelle python extension needs a manifest file mapping from diff --git a/gazelle/modules_mapping/def.bzl b/gazelle/modules_mapping/def.bzl index ec00383789..85ba536a06 100644 --- a/gazelle/modules_mapping/def.bzl +++ b/gazelle/modules_mapping/def.bzl @@ -38,8 +38,8 @@ def _modules_mapping_impl(ctx): args.set_param_file_format(format = "multiline") if ctx.attr.include_stub_packages: args.add("--include_stub_packages") - if ctx.attr.ignore_native_libs: - args.add("--ignore_native_libs") + if ctx.attr.skip_private_shared_objects: + args.add("--skip_private_shared_objects") args.add("--output_file", modules_mapping) args.add_all("--exclude_patterns", ctx.attr.exclude_patterns) args.add_all("--wheels", all_wheels) @@ -66,19 +66,19 @@ modules_mapping = rule( doc = "Whether to include stub packages in the mapping.", mandatory = False, ), - "ignore_native_libs": attr.bool( - default = False, - doc = "Whether to ignore platform-specific native libraries (*.so files) when generating mappings. " + - "When True, ensures hermetic builds across different platforms by excluding native library " + - "mappings that vary between Linux, macOS, etc. Useful for packages like opencv-python-headless " + - "that bundle different native libraries on different platforms.", - mandatory = False, - ), "modules_mapping_name": attr.string( default = "modules_mapping.json", doc = "The name for the output JSON file.", mandatory = False, ), + "skip_private_shared_objects": attr.bool( + default = False, + doc = "Whether to skip private shared objects under .libs directories when generating mappings. " + + "When True, excludes non-importable dependency libraries (like libopenblas.so) that vary " + + "between Linux platforms and break build hermiticity. These .libs files are not actual " + + "Python modules and cannot be imported. macOS uses .dylib files which are naturally excluded.", + mandatory = False, + ), "wheels": attr.label_list( allow_files = True, doc = "The list of wheels, usually the 'all_whl_requirements' from @//:requirements.bzl", diff --git a/gazelle/modules_mapping/generator.py b/gazelle/modules_mapping/generator.py index 0cfedb8138..ebbf09e08d 100644 --- a/gazelle/modules_mapping/generator.py +++ b/gazelle/modules_mapping/generator.py @@ -26,12 +26,19 @@ class Generator: output_file = None excluded_patterns = None - def __init__(self, stderr, output_file, excluded_patterns, include_stub_packages, ignore_native_libs=False): + def __init__( + self, + stderr, + output_file, + excluded_patterns, + include_stub_packages, + skip_private_shared_objects=False, + ): self.stderr = stderr self.output_file = output_file self.excluded_patterns = [re.compile(pattern) for pattern in excluded_patterns] self.include_stub_packages = include_stub_packages - self.ignore_native_libs = ignore_native_libs + self.skip_private_shared_objects = skip_private_shared_objects self.mapping = {} # dig_wheel analyses the wheel .whl file determining the modules it provides @@ -75,9 +82,13 @@ def simplify(self): def module_for_path(self, path, whl): ext = pathlib.Path(path).suffix if ext == ".py" or ext == ".so": - # Skip native libraries if ignore_native_libs is enabled - if ext == ".so" and self.ignore_native_libs: - return + # Skip private shared objects under .libs directories on Linux. + # These are non-importable dependency libraries (like libopenblas.so) that vary + # between platforms and make builds non-hermetic. macOS uses .dylib files + # which are naturally excluded by the .so check. + if ext == ".so" and self.skip_private_shared_objects: + if ".libs/" in path or path.split("/")[0].endswith(".libs"): + return if "purelib" in path or "platlib" in path: root = "/".join(path.split("/")[2:]) @@ -163,11 +174,15 @@ def data_has_purelib_or_platlib(path): ) parser.add_argument("--output_file", type=str) parser.add_argument("--include_stub_packages", action="store_true") - parser.add_argument("--ignore_native_libs", action="store_true") + parser.add_argument("--skip_private_shared_objects", action="store_true") parser.add_argument("--exclude_patterns", nargs="+", default=[]) parser.add_argument("--wheels", nargs="+", default=[]) args = parser.parse_args() generator = Generator( - sys.stderr, args.output_file, args.exclude_patterns, args.include_stub_packages, args.ignore_native_libs + sys.stderr, + args.output_file, + args.exclude_patterns, + args.include_stub_packages, + args.skip_private_shared_objects, ) sys.exit(generator.run(args.wheels)) diff --git a/gazelle/modules_mapping/test_generator.py b/gazelle/modules_mapping/test_generator.py index 8d7762d8de..e25ae7c9e1 100644 --- a/gazelle/modules_mapping/test_generator.py +++ b/gazelle/modules_mapping/test_generator.py @@ -39,26 +39,58 @@ def test_stub_excluded(self): gen.mapping.items(), ) - def test_ignore_native_libs(self): - # Test the ignore_native_libs functionality with the module_for_path method - gen_with_native_libs = Generator(None, None, {}, False, False) - gen_without_native_libs = Generator(None, None, {}, False, True) + def test_skip_private_shared_objects(self): + # Test the skip_private_shared_objects functionality with the module_for_path method + gen_with_private_libs = Generator(None, None, {}, False, False) + gen_without_private_libs = Generator(None, None, {}, False, True) - # Simulate a Python file - should be included in both cases - gen_with_native_libs.module_for_path("cv2/__init__.py", "opencv_python_headless-4.8.1-cp310-cp310-linux_x86_64.whl") - gen_without_native_libs.module_for_path("cv2/__init__.py", "opencv_python_headless-4.8.1-cp310-cp310-linux_x86_64.whl") + # Simulate Python files - should be included in both cases + gen_with_private_libs.module_for_path( + "cv2/__init__.py", + "opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.whl", + ) + gen_without_private_libs.module_for_path( + "cv2/__init__.py", + "opencv_python_headless-4.12.0.88-cp37-abi3-manylinux2014_x86_64.whl", + ) + gen_with_private_libs.module_for_path( + "numpy/__init__.py", "numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.whl" + ) + gen_without_private_libs.module_for_path( + "numpy/__init__.py", "numpy-2.2.6-cp310-cp310-manylinux_2_17_x86_64.whl" + ) + + # Real-world examples from wheels + private_shared_objects = [ + "opencv_python_headless.libs/libopenblas-r0-f650aae0.so", + "numpy.libs/libscipy_openblas64_-56d6093b.so", + ] + + # Add all private shared objects to both generators + for lib_path in private_shared_objects: + wheel_name = ( + "opencv_python_headless-4.12.0.88" + if "opencv" in lib_path + else "numpy-2.2.6" + ) + gen_with_private_libs.module_for_path(lib_path, f"{wheel_name}.whl") + gen_without_private_libs.module_for_path(lib_path, f"{wheel_name}.whl") - # Simulate a native library - should be included only when ignore_native_libs=False - gen_with_native_libs.module_for_path("opencv_python_headless.libs/libopenblas-r0-f650aae0.so", "opencv_python_headless-4.8.1-cp310-cp310-linux_x86_64.whl") - gen_without_native_libs.module_for_path("opencv_python_headless.libs/libopenblas-r0-f650aae0.so", "opencv_python_headless-4.8.1-cp310-cp310-linux_x86_64.whl") + # Both should have the Python module mappings + self.assertIn("cv2", gen_with_private_libs.mapping) + self.assertIn("cv2", gen_without_private_libs.mapping) + self.assertIn("numpy", gen_with_private_libs.mapping) + self.assertIn("numpy", gen_without_private_libs.mapping) - # Both should have the Python module mapping - self.assertIn("cv2", gen_with_native_libs.mapping) - self.assertIn("cv2", gen_without_native_libs.mapping) + # Only gen_with_private_libs should have the private shared object mappings + expected_private_mappings = [ + "opencv_python_headless.libs.libopenblas-r0-f650aae0", + "numpy.libs.libscipy_openblas64_-56d6093b", + ] - # Only gen_with_native_libs should have the native library mapping - self.assertIn("opencv_python_headless.libs.libopenblas-r0-f650aae0", gen_with_native_libs.mapping) - self.assertNotIn("opencv_python_headless.libs.libopenblas-r0-f650aae0", gen_without_native_libs.mapping) + for mapping in expected_private_mappings: + self.assertIn(mapping, gen_with_private_libs.mapping) + self.assertNotIn(mapping, gen_without_private_libs.mapping) if __name__ == "__main__": diff --git a/gazelle_python_manifest_platform_issue.md b/gazelle_python_manifest_platform_issue.md new file mode 100644 index 0000000000..5da73119dd --- /dev/null +++ b/gazelle_python_manifest_platform_issue.md @@ -0,0 +1,168 @@ +# Platform-Specific Inconsistency in gazelle_python_manifest + +## Problem Summary + +The `gazelle_python_manifest` rule generates different manifest files depending on the platform where it's executed, breaking build hermicity across development environments and CI systems. + +## Specific Issue + +When running the same codebase: +- **Local (macOS)**: `bazel test //:gazelle_python_manifest.test` **PASSES** +- **CI (Linux)**: `bazel test //:gazelle_python_manifest.test` **FAILS** with: + ``` + opencv_python_headless.libs.libopenblas-r0-f650aae0: opencv_python_headless + FAIL: files "gazelle_python_manifest.generated_manifest" and "gazelle_python.yaml" differ + ``` + +## Root Cause Analysis + +### How gazelle_python_manifest Works + +The `gazelle_python_manifest` rule doesn't just read `requirements.txt` - it **inspects the actual installed Python wheel files** in the Bazel `@pip` repository: + +1. **Extracts wheel contents**: Python wheels (`.whl` files) are platform-specific ZIP archives +2. **Scans for native libraries**: Looks for bundled `.so` files (Linux), `.dylib` files (macOS), etc. +3. **Maps import names to packages**: Creates mappings like `cv2: opencv_python_headless` +4. **Includes native library mappings**: Adds entries for detected native libraries + +## Practical Impact: Zero Functional Effects + +The platform-specific native library mappings (e.g., `opencv_python_headless.libs.libopenblas-r0-f650aae0`) have **no practical impact** on Python development: + +- **Real Python code never imports these**: Nobody writes `import opencv_python_headless.libs.libopenblas-r0-f650aae0` +- **Critical mappings unchanged**: `cv2: opencv_python_headless` works identically on all platforms +- **Native libraries auto-load**: When you `import cv2`, underlying .so files load automatically +- **Zero functional difference**: Python code behaves identically with or without these mappings + +The issue is purely about **build hermiticity**, not functionality. Adding `ignore_native_libs=True` would solve the platform inconsistency with no practical downsides. + +## Why Native Lib Mappings Exist + +These native library mappings are generated for completeness/accuracy, not functional necessity. The Python generator finds .so files in the wheel and dutifully maps them, but: + +1. Native libraries are auto-loaded: When you import cv2, the underlying native libraries (like OpenBLAS) are loaded automatically by the Python extension +2. No direct imports: Python code doesn't directly import .so files by their mangled names +3. Bazel doesn't need them: The actual dependency resolution for builds works through the main module mappings + +### Platform-Specific Wheel Contents + +The same Python package ships different wheels for different platforms: + +**Linux wheel** (`opencv_python_headless-4.x.x-cp310-cp310-linux_x86_64.whl`): +``` +opencv_python_headless/ +├── cv2/ +│ └── python-3.10/ +└── opencv_python_headless.libs/ + ├── libopenblas-r0-f650aae0.so ← Linux-specific OpenBLAS library + ├── libgfortran-2e0d59d6.so.5 + └── ...other Linux native libs +``` + +**macOS wheel** (`opencv_python_headless-4.x.x-cp310-cp310-macosx_11_0_arm64.whl`): +``` +opencv_python_headless/ +├── cv2/ +│ └── python-3.10/ +└── opencv_python_headless.libs/ + └── ...different macOS native libs (uses Apple's Accelerate framework) +``` + +### Resulting Manifest Differences + +**Linux CI generates**: +```yaml +modules_mapping: + cv2: opencv_python_headless + opencv_python_headless.libs.libopenblas-r0-f650aae0: opencv_python_headless # ← This line +``` + +**macOS local generates**: +```yaml +modules_mapping: + cv2: opencv_python_headless + # Missing the OpenBLAS line because macOS wheel doesn't contain it +``` + +## Reproduction Steps + +1. Have a Python project using `opencv-python-headless` with `gazelle_python_manifest` +2. Run `bazel run //:gazelle_python_manifest.update` on macOS → generates manifest A +3. Run the same command on Linux → generates manifest B +4. Compare: manifest B will have additional native library entries that manifest A lacks + +## Demonstrated Platform Dependency + +When attempting to force Linux platform selection on macOS: +```bash +bazel run //:gazelle_python_manifest.update --platforms=@io_bazel_rules_go//go/toolchain:linux_amd64 +``` + +This **successfully downloads the Linux wheel** but fails with: +``` +OSError: [Errno 8] Exec format error: '.../python_3_10_x86_64-unknown-linux-gnu/bin/python3' +``` + +This proves that platform specification controls which wheels are selected, but cross-platform execution is impossible. + +## Impact + +- **Breaks build hermicity**: Same source code produces different results on different platforms +- **CI/Local inconsistency**: Developers can't reproduce CI failures locally +- **Manual workarounds required**: Teams must either: + - Accept platform-specific manifest files (not hermetic) + - Generate manifests only in Linux containers + - Manually maintain manifest consistency + +## Current Workaround Attempts + +### 1. Manual Override (Not Sustainable) +Manually adding the missing entries to the manifest file, but this breaks the "DO NOT EDIT" contract and gets overwritten. + +### 2. Platform-Specific Generation (Partial Solution) +Generate the manifest only in CI/Linux environment and commit it, but this prevents local development from updating dependencies. + +### 3. Container-Based Generation (Complex) +Use Docker/containers for manifest generation, but adds complexity to the development workflow. + +## Proposed Solutions + +### Option 1: Platform-Agnostic Mode +Add a configuration option to `gazelle_python_manifest` to ignore platform-specific native libraries: +```python +gazelle_python_manifest( + name = "gazelle_python_manifest", + modules_mapping = ":modules_map", + pip_repository_name = "pip", + ignore_native_libs = True, # New option +) +``` + +### Option 2: Union Mode +Generate manifests that include native libraries from all target platforms, not just the current platform. + +### Option 3: Explicit Platform Targeting +Allow specifying target platforms for manifest generation: +```python +gazelle_python_manifest( + name = "gazelle_python_manifest", + modules_mapping = ":modules_map", + pip_repository_name = "pip", + target_platforms = ["linux_x86_64", "macos_arm64"], +) +``` + +## Environment Details + +- **rules_python version**: 1.6.1 +- **rules_python_gazelle_plugin version**: 1.3.0 +- **Python version**: 3.10 +- **Package causing issue**: opencv-python-headless (but affects any package with platform-specific native libraries) +- **Bazel version**: Latest +- **Platforms tested**: macOS ARM64, Linux x86_64 + +## Related Issues + +This is a fundamental design issue where `gazelle_python_manifest` prioritizes accuracy (detecting actual wheel contents) over hermicity (consistent results across platforms). + +The behavior is **intentional but problematic** for teams requiring hermetic builds across different development platforms. From 1d797ee8dddfff935cc88de5181b67cc0482e08f Mon Sep 17 00:00:00 2001 From: Alexandre Miziara Date: Fri, 26 Sep 2025 23:49:30 -0300 Subject: [PATCH 5/6] remove wrong commited doc --- gazelle_python_manifest_platform_issue.md | 168 ---------------------- 1 file changed, 168 deletions(-) delete mode 100644 gazelle_python_manifest_platform_issue.md diff --git a/gazelle_python_manifest_platform_issue.md b/gazelle_python_manifest_platform_issue.md deleted file mode 100644 index 5da73119dd..0000000000 --- a/gazelle_python_manifest_platform_issue.md +++ /dev/null @@ -1,168 +0,0 @@ -# Platform-Specific Inconsistency in gazelle_python_manifest - -## Problem Summary - -The `gazelle_python_manifest` rule generates different manifest files depending on the platform where it's executed, breaking build hermicity across development environments and CI systems. - -## Specific Issue - -When running the same codebase: -- **Local (macOS)**: `bazel test //:gazelle_python_manifest.test` **PASSES** -- **CI (Linux)**: `bazel test //:gazelle_python_manifest.test` **FAILS** with: - ``` - opencv_python_headless.libs.libopenblas-r0-f650aae0: opencv_python_headless - FAIL: files "gazelle_python_manifest.generated_manifest" and "gazelle_python.yaml" differ - ``` - -## Root Cause Analysis - -### How gazelle_python_manifest Works - -The `gazelle_python_manifest` rule doesn't just read `requirements.txt` - it **inspects the actual installed Python wheel files** in the Bazel `@pip` repository: - -1. **Extracts wheel contents**: Python wheels (`.whl` files) are platform-specific ZIP archives -2. **Scans for native libraries**: Looks for bundled `.so` files (Linux), `.dylib` files (macOS), etc. -3. **Maps import names to packages**: Creates mappings like `cv2: opencv_python_headless` -4. **Includes native library mappings**: Adds entries for detected native libraries - -## Practical Impact: Zero Functional Effects - -The platform-specific native library mappings (e.g., `opencv_python_headless.libs.libopenblas-r0-f650aae0`) have **no practical impact** on Python development: - -- **Real Python code never imports these**: Nobody writes `import opencv_python_headless.libs.libopenblas-r0-f650aae0` -- **Critical mappings unchanged**: `cv2: opencv_python_headless` works identically on all platforms -- **Native libraries auto-load**: When you `import cv2`, underlying .so files load automatically -- **Zero functional difference**: Python code behaves identically with or without these mappings - -The issue is purely about **build hermiticity**, not functionality. Adding `ignore_native_libs=True` would solve the platform inconsistency with no practical downsides. - -## Why Native Lib Mappings Exist - -These native library mappings are generated for completeness/accuracy, not functional necessity. The Python generator finds .so files in the wheel and dutifully maps them, but: - -1. Native libraries are auto-loaded: When you import cv2, the underlying native libraries (like OpenBLAS) are loaded automatically by the Python extension -2. No direct imports: Python code doesn't directly import .so files by their mangled names -3. Bazel doesn't need them: The actual dependency resolution for builds works through the main module mappings - -### Platform-Specific Wheel Contents - -The same Python package ships different wheels for different platforms: - -**Linux wheel** (`opencv_python_headless-4.x.x-cp310-cp310-linux_x86_64.whl`): -``` -opencv_python_headless/ -├── cv2/ -│ └── python-3.10/ -└── opencv_python_headless.libs/ - ├── libopenblas-r0-f650aae0.so ← Linux-specific OpenBLAS library - ├── libgfortran-2e0d59d6.so.5 - └── ...other Linux native libs -``` - -**macOS wheel** (`opencv_python_headless-4.x.x-cp310-cp310-macosx_11_0_arm64.whl`): -``` -opencv_python_headless/ -├── cv2/ -│ └── python-3.10/ -└── opencv_python_headless.libs/ - └── ...different macOS native libs (uses Apple's Accelerate framework) -``` - -### Resulting Manifest Differences - -**Linux CI generates**: -```yaml -modules_mapping: - cv2: opencv_python_headless - opencv_python_headless.libs.libopenblas-r0-f650aae0: opencv_python_headless # ← This line -``` - -**macOS local generates**: -```yaml -modules_mapping: - cv2: opencv_python_headless - # Missing the OpenBLAS line because macOS wheel doesn't contain it -``` - -## Reproduction Steps - -1. Have a Python project using `opencv-python-headless` with `gazelle_python_manifest` -2. Run `bazel run //:gazelle_python_manifest.update` on macOS → generates manifest A -3. Run the same command on Linux → generates manifest B -4. Compare: manifest B will have additional native library entries that manifest A lacks - -## Demonstrated Platform Dependency - -When attempting to force Linux platform selection on macOS: -```bash -bazel run //:gazelle_python_manifest.update --platforms=@io_bazel_rules_go//go/toolchain:linux_amd64 -``` - -This **successfully downloads the Linux wheel** but fails with: -``` -OSError: [Errno 8] Exec format error: '.../python_3_10_x86_64-unknown-linux-gnu/bin/python3' -``` - -This proves that platform specification controls which wheels are selected, but cross-platform execution is impossible. - -## Impact - -- **Breaks build hermicity**: Same source code produces different results on different platforms -- **CI/Local inconsistency**: Developers can't reproduce CI failures locally -- **Manual workarounds required**: Teams must either: - - Accept platform-specific manifest files (not hermetic) - - Generate manifests only in Linux containers - - Manually maintain manifest consistency - -## Current Workaround Attempts - -### 1. Manual Override (Not Sustainable) -Manually adding the missing entries to the manifest file, but this breaks the "DO NOT EDIT" contract and gets overwritten. - -### 2. Platform-Specific Generation (Partial Solution) -Generate the manifest only in CI/Linux environment and commit it, but this prevents local development from updating dependencies. - -### 3. Container-Based Generation (Complex) -Use Docker/containers for manifest generation, but adds complexity to the development workflow. - -## Proposed Solutions - -### Option 1: Platform-Agnostic Mode -Add a configuration option to `gazelle_python_manifest` to ignore platform-specific native libraries: -```python -gazelle_python_manifest( - name = "gazelle_python_manifest", - modules_mapping = ":modules_map", - pip_repository_name = "pip", - ignore_native_libs = True, # New option -) -``` - -### Option 2: Union Mode -Generate manifests that include native libraries from all target platforms, not just the current platform. - -### Option 3: Explicit Platform Targeting -Allow specifying target platforms for manifest generation: -```python -gazelle_python_manifest( - name = "gazelle_python_manifest", - modules_mapping = ":modules_map", - pip_repository_name = "pip", - target_platforms = ["linux_x86_64", "macos_arm64"], -) -``` - -## Environment Details - -- **rules_python version**: 1.6.1 -- **rules_python_gazelle_plugin version**: 1.3.0 -- **Python version**: 3.10 -- **Package causing issue**: opencv-python-headless (but affects any package with platform-specific native libraries) -- **Bazel version**: Latest -- **Platforms tested**: macOS ARM64, Linux x86_64 - -## Related Issues - -This is a fundamental design issue where `gazelle_python_manifest` prioritizes accuracy (detecting actual wheel contents) over hermicity (consistent results across platforms). - -The behavior is **intentional but problematic** for teams requiring hermetic builds across different development platforms. From a0d52a30bf389b6eaa7ff93c469c0b6fee35a593 Mon Sep 17 00:00:00 2001 From: Alexandre Miziara Date: Fri, 26 Sep 2025 23:55:23 -0300 Subject: [PATCH 6/6] Remove comment for hermetic builds in BUILD.bazel Removed commented line about enabling hermetic builds. --- examples/build_file_generation/BUILD.bazel | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/build_file_generation/BUILD.bazel b/examples/build_file_generation/BUILD.bazel index 8b3b3880d9..c8b6b8b11c 100644 --- a/examples/build_file_generation/BUILD.bazel +++ b/examples/build_file_generation/BUILD.bazel @@ -28,8 +28,6 @@ modules_mapping( "^_|(\\._)+", # This is the default. "(\\.tests)+", # Add a custom one to get rid of the psutil tests. ], - # Uncomment the next line to enable hermetic builds across platforms - # by skipping private shared objects under .libs directories (useful for opencv-python-headless, etc.) # skip_private_shared_objects = True, wheels = all_whl_requirements, )