diff --git a/MODULE.bazel b/MODULE.bazel index 920b210c..b56b3e82 100644 --- a/MODULE.bazel +++ b/MODULE.bazel @@ -16,6 +16,7 @@ bazel_dep(name = "aspect_tools_telemetry", version = "0.2.6") bazel_dep(name = "bazel_skylib", version = "1.4.2") bazel_dep(name = "rules_python", version = "0.29.0") bazel_dep(name = "platforms", version = "0.0.7") +bazel_dep(name = "gawk", version = "5.3.2.bcr.1") # Required to manipulate mtree files bazel_lib = use_extension("@aspect_bazel_lib//lib:extensions.bzl", "toolchains") bazel_lib.expand_template() diff --git a/docs/py_image_layer.md b/docs/py_image_layer.md index 65b495fc..d3eff870 100644 --- a/docs/py_image_layer.md +++ b/docs/py_image_layer.md @@ -38,13 +38,11 @@ oci_image(
py_image_layer(name, binary, root, layer_groups, compress, tar_args, compute_unused_inputs, - platform, owner, group, kwargs) + platform, owner, group, awk, kwargs)Produce a separate tar output for each layer of a python app -> Requires `awk` to be installed on the host machine/rbe runner. - For better performance, it is recommended to split the output of a py_binary into multiple layers. This can be done by grouping files into layers based on their path by using the `layer_groups` attribute. @@ -77,6 +75,7 @@ The default layer groups are: | platform | The platform to use for the transition. Default is None. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/transitions.md#platform_transition_binary-target_platform |
None
|
| owner | An owner uid for the uncompressed files. See mtree_mutate: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#mutating-the-tar-contents | None
|
| group | A group uid for the uncompressed files. See mtree_mutate: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#mutating-the-tar-contents | None
|
+| awk | The awk command to use. Default is @gawk
. | "@gawk"
|
| kwargs | attribute that apply to all targets expanded by the macro | none |
**RETURNS**
diff --git a/py/private/py_image_layer.bzl b/py/private/py_image_layer.bzl
index 736d9d7c..10be4c36 100644
--- a/py/private/py_image_layer.bzl
+++ b/py/private/py_image_layer.bzl
@@ -53,7 +53,7 @@ default_layer_groups = {
"packages": "\\\\.runfiles/.*/site-packages",
}
-def _split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs):
+def _split_mtree_into_layer_groups(name, root, groups, group_names, awk, **kwargs):
mtree_begin_blocks = "\n".join([
'print "#mtree" >> "$(RULEDIR)/%s.%s.manifest.spec";' % (name, gn)
for gn in group_names
@@ -71,7 +71,7 @@ if ($$1 ~ "%s") {
])
cmd = """\
-awk < $< 'BEGIN {
+$(execpath %s) < $< 'BEGIN {
%s
}
{
@@ -86,7 +86,7 @@ awk < $< 'BEGIN {
# Every line that did not match the layer groups will go into the default layer.
print $$0 >> "$(RULEDIR)/%s.default.manifest.spec"
}'
-""" % (mtree_begin_blocks, root, ifs, name)
+""" % (awk, mtree_begin_blocks, root, ifs, name)
native.genrule(
name = "{}_manifests".format(name),
@@ -95,6 +95,7 @@ awk < $< 'BEGIN {
"{}.{}.manifest.spec".format(name, group_name)
for group_name in group_names
],
+ tools = [awk],
cmd = cmd,
**kwargs
)
@@ -110,11 +111,10 @@ def py_image_layer(
platform = None,
owner = None,
group = None,
+ awk = "@gawk",
**kwargs):
"""Produce a separate tar output for each layer of a python app
- > Requires `awk` to be installed on the host machine/rbe runner.
-
For better performance, it is recommended to split the output of a py_binary into multiple layers.
This can be done by grouping files into layers based on their path by using the `layer_groups` attribute.
@@ -142,6 +142,7 @@ def py_image_layer(
owner: An owner uid for the uncompressed files. See mtree_mutate: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#mutating-the-tar-contents
group: A group uid for the uncompressed files. See mtree_mutate: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#mutating-the-tar-contents
tar_args: Additional arguments to pass to the tar rule. Default is `[]`. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule-args
+ awk: The awk command to use. Default is `@gawk`.
**kwargs: attribute that apply to all targets expanded by the macro
Returns:
@@ -176,7 +177,7 @@ def py_image_layer(
groups = dict(groups, **default_layer_groups)
group_names = groups.keys() + ["default"]
- _split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs)
+ _split_mtree_into_layer_groups(name, root, groups, group_names, awk, **kwargs)
# Finally create layers using the tar rule
srcs = []
diff --git a/py/repositories.bzl b/py/repositories.bzl
index 12b58e41..d7615e8f 100644
--- a/py/repositories.bzl
+++ b/py/repositories.bzl
@@ -37,6 +37,28 @@ def rules_py_dependencies():
url = "https://github.com/bazel-contrib/bazel-lib/releases/download/v2.10.0/bazel-lib-v2.10.0.tar.gz",
)
+ # from https://github.com/bazelbuild/bazel-central-registry/tree/main/modules/gawk/5.3.2.bcr.1
+ http_archive(
+ name = "gawk",
+ remote_file_urls = {
+ "BUILD.bazel": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/overlay/BUILD.bazel"],
+ "MODULE.bazel": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/MODULE.bazel"],
+ "posix/config_darwin.h": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/overlay/posix/config_darwin.h"],
+ "posix/config_linux.h": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/overlay/posix/config_linux.h"],
+ "test/BUILD.bazel": ["https://raw.githubusercontent.com/bazelbuild/bazel-central-registry/refs/heads/main/modules/gawk/5.3.2.bcr.1/overlay/test/BUILD.bazel"],
+ },
+ remote_file_integrity = {
+ "BUILD.bazel": "sha256-dt89+9IJ3UzQvoKzyXOiBoF6ok/4u4G0cb0Ja+plFy0=",
+ "MODULE.bazel": "sha256-zfjL5e51DbBLeIeMljPMdugNz0QWy+mCrDqSIvgHE8g=",
+ "posix/config_darwin.h": "sha256-gPVRlvtdXPw4Ikwd5S89wPPw5AaiB2HTHa1KOtj40mU=",
+ "posix/config_linux.h": "sha256-iEaeXYBUCvprsIEEi5ipwqt0JV8d73+rLgoBYTegC6Q=",
+ "test/BUILD.bazel": "sha256-NktOb/GQZ8AimXwLEfGFMJB3TtgAFhobM5f9aWsHwLQ=",
+ },
+ url = "https://ftpmirror.gnu.org/gnu/gawk/gawk-5.3.2.tar.xz",
+ strip_prefix = "gawk-5.3.2",
+ integrity = "sha256-+MNIZQnecFGSE4sA7ywAu73Q6Eww1cB9I/xzqdxMycw=",
+ )
+
http_archive(
name = "rules_python",
sha256 = "c68bdc4fbec25de5b5493b8819cfc877c4ea299c0dcb15c244c5a00208cde311",