diff --git a/MODULE.bazel b/MODULE.bazel index 920b210c..b56b3e82 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -16,6 +16,7 @@ bazel_dep(name = "aspect_tools_telemetry", version = "0.2.6") bazel_dep(name = "bazel_skylib", version = "1.4.2") bazel_dep(name = "rules_python", version = "0.29.0") bazel_dep(name = "platforms", version = "0.0.7") +bazel_dep(name = "gawk", version = "5.3.2.bcr.1") # Required to manipulate mtree files bazel_lib = use_extension("@aspect_bazel_lib//lib:extensions.bzl", "toolchains") bazel_lib.expand_template() diff --git a/docs/py_image_layer.md b/docs/py_image_layer.md index 65b495fc..d3eff870 100644 --- a/docs/py_image_layer.md +++ b/docs/py_image_layer.md @@ -38,13 +38,11 @@ oci_image(
 py_image_layer(name, binary, root, layer_groups, compress, tar_args, compute_unused_inputs,
-               platform, owner, group, kwargs)
+               platform, owner, group, awk, kwargs)
 
Produce a separate tar output for each layer of a python app -> Requires `awk` to be installed on the host machine/rbe runner. - For better performance, it is recommended to split the output of a py_binary into multiple layers. This can be done by grouping files into layers based on their path by using the `layer_groups` attribute. @@ -77,6 +75,7 @@ The default layer groups are: | platform | The platform to use for the transition. Default is None. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/transitions.md#platform_transition_binary-target_platform | None | | owner | An owner uid for the uncompressed files. See mtree_mutate: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#mutating-the-tar-contents | None | | group | A group uid for the uncompressed files. See mtree_mutate: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#mutating-the-tar-contents | None | +| awk | The awk command to use. Default is @gawk. | "@gawk" | | kwargs | attribute that apply to all targets expanded by the macro | none | **RETURNS** diff --git a/py/private/py_image_layer.bzl b/py/private/py_image_layer.bzl index 736d9d7c..10be4c36 100644 --- a/py/private/py_image_layer.bzl +++ b/py/private/py_image_layer.bzl @@ -53,7 +53,7 @@ default_layer_groups = { "packages": "\\\\.runfiles/.*/site-packages", } -def _split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs): +def _split_mtree_into_layer_groups(name, root, groups, group_names, awk, **kwargs): mtree_begin_blocks = "\n".join([ 'print "#mtree" >> "$(RULEDIR)/%s.%s.manifest.spec";' % (name, gn) for gn in group_names @@ -71,7 +71,7 @@ if ($$1 ~ "%s") { ]) cmd = """\ -awk < $< 'BEGIN { +$(execpath %s) < $< 'BEGIN { %s } { @@ -86,7 +86,7 @@ awk < $< 'BEGIN { # Every line that did not match the layer groups will go into the default layer. print $$0 >> "$(RULEDIR)/%s.default.manifest.spec" }' -""" % (mtree_begin_blocks, root, ifs, name) +""" % (awk, mtree_begin_blocks, root, ifs, name) native.genrule( name = "{}_manifests".format(name), @@ -95,6 +95,7 @@ awk < $< 'BEGIN { "{}.{}.manifest.spec".format(name, group_name) for group_name in group_names ], + tools = [awk], cmd = cmd, **kwargs ) @@ -110,11 +111,10 @@ def py_image_layer( platform = None, owner = None, group = None, + awk = "@gawk", **kwargs): """Produce a separate tar output for each layer of a python app - > Requires `awk` to be installed on the host machine/rbe runner. - For better performance, it is recommended to split the output of a py_binary into multiple layers. This can be done by grouping files into layers based on their path by using the `layer_groups` attribute. @@ -142,6 +142,7 @@ def py_image_layer( owner: An owner uid for the uncompressed files. See mtree_mutate: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#mutating-the-tar-contents group: A group uid for the uncompressed files. See mtree_mutate: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#mutating-the-tar-contents tar_args: Additional arguments to pass to the tar rule. Default is `[]`. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule-args + awk: The awk command to use. Default is `@gawk`. **kwargs: attribute that apply to all targets expanded by the macro Returns: @@ -176,7 +177,7 @@ def py_image_layer( groups = dict(groups, **default_layer_groups) group_names = groups.keys() + ["default"] - _split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs) + _split_mtree_into_layer_groups(name, root, groups, group_names, awk, **kwargs) # Finally create layers using the tar rule srcs = [] diff --git a/py/repositories.bzl b/py/repositories.bzl index 12b58e41..d7615e8f 100644 --- a/py/repositories.bzl +++ b/py/repositories.bzl @@ -37,6 +37,28 @@ def rules_py_dependencies(): url = "https://github.com/bazel-contrib/bazel-lib/releases/download/v2.10.0/bazel-lib-v2.10.0.tar.gz", ) + # from https://github.com/bazelbuild/bazel-central-registry/tree/main/modules/gawk/5.3.2.bcr.1 + http_archive( + name = "gawk", + remote_file_urls = { + "BUILD.bazel": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/overlay/BUILD.bazel"], + "MODULE.bazel": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/MODULE.bazel"], + "posix/config_darwin.h": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/overlay/posix/config_darwin.h"], + "posix/config_linux.h": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/overlay/posix/config_linux.h"], + "test/BUILD.bazel": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/overlay/test/BUILD.bazel"], + }, + remote_file_integrity = { + "BUILD.bazel": "sha256-dt89+9IJ3UzQvoKzyXOiBoF6ok/4u4G0cb0Ja+plFy0=", + "MODULE.bazel": "sha256-zfjL5e51DbBLeIeMljPMdugNz0QWy+mCrDqSIvgHE8g=", + "posix/config_darwin.h": "sha256-gPVRlvtdXPw4Ikwd5S89wPPw5AaiB2HTHa1KOtj40mU=", + "posix/config_linux.h": "sha256-iEaeXYBUCvprsIEEi5ipwqt0JV8d73+rLgoBYTegC6Q=", + "test/BUILD.bazel": "sha256-NktOb/GQZ8AimXwLEfGFMJB3TtgAFhobM5f9aWsHwLQ=", + }, + url = "https://ftpmirror.gnu.org/gnu/gawk/gawk-5.3.2.tar.xz", + strip_prefix = "gawk-5.3.2", + integrity = "sha256-+MNIZQnecFGSE4sA7ywAu73Q6Eww1cB9I/xzqdxMycw=", + ) + http_archive( name = "rules_python", sha256 = "c68bdc4fbec25de5b5493b8819cfc877c4ea299c0dcb15c244c5a00208cde311",