|
| 1 | +"""py_image_layer macro for creating multiple layers from a py_binary |
| 2 | +
|
| 3 | +> [!WARNING] |
| 4 | +> This macro is EXPERIMENTAL and is not subject to our SemVer guarantees. |
| 5 | +
|
| 6 | +A py_binary that uses `torch` and `numpy` can use the following layer groups: |
| 7 | +
|
| 8 | +``` |
| 9 | +load("@rules_oci//oci:defs.bzl", "oci_image") |
| 10 | +load("@aspect_rules_py//py:defs.bzl", "py_image_layer", "py_binary") |
| 11 | +
|
| 12 | +py_binary( |
| 13 | + name = "my_app_bin", |
| 14 | + deps = [ |
| 15 | + "@pip_deps//numpy", |
| 16 | + "@pip_deps//torch" |
| 17 | + ] |
| 18 | +) |
| 19 | +
|
| 20 | +oci_image( |
| 21 | + tars = py_image_layer( |
| 22 | + name = "my_app", |
| 23 | + py_binary = ":my_app_bin", |
| 24 | + layer_groups = { |
| 25 | + "torch": "pip_deps_torch.*", |
| 26 | + "numpy": "pip_deps_numpy.*", |
| 27 | + } |
| 28 | + ) |
| 29 | +) |
| 30 | +``` |
| 31 | +""" |
| 32 | + |
| 33 | +load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar") |
| 34 | + |
| 35 | +default_layer_groups = { |
| 36 | + # match *only* external pip like repositories that contain the string "site-packages" |
| 37 | + "packages": "\\.runfiles/.*/site-packages", |
| 38 | + # match *only* external repositories that begins with the string "python" |
| 39 | + # e.g. this will match |
| 40 | + # `/hello_world/hello_world_bin.runfiles/rules_python~0.21.0~python~python3_9_aarch64-unknown-linux-gnu/bin/python3` |
| 41 | + # but not match |
| 42 | + # `/hello_world/hello_world_bin.runfiles/_main/python_app` |
| 43 | + "interpreter": "\\.runfiles/python.*-.*/", |
| 44 | +} |
| 45 | + |
| 46 | +def _split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs): |
| 47 | + mtree_begin_blocks = "\n".join([ |
| 48 | + 'print "#mtree" >> "$(RULEDIR)/%s.%s.manifest.spec";' % (name, gn) |
| 49 | + for gn in group_names |
| 50 | + ]) |
| 51 | + |
| 52 | + # When an mtree entry matches a layer group, it will be moved into the mtree |
| 53 | + # for that group. |
| 54 | + ifs = "\n".join([ |
| 55 | + """\ |
| 56 | +if ($$1 ~ "%s") { |
| 57 | + print $$0 >> "$(RULEDIR)/%s.%s.manifest.spec"; |
| 58 | + next |
| 59 | +}""" % (regex, name, gn) |
| 60 | + for (gn, regex) in groups.items() |
| 61 | + ]) |
| 62 | + |
| 63 | + cmd = """\ |
| 64 | +awk < $< 'BEGIN { |
| 65 | + %s |
| 66 | +} |
| 67 | +{ |
| 68 | + # Exclude .whl files from container images |
| 69 | + if ($$1 ~ ".whl") { |
| 70 | + next |
| 71 | + } |
| 72 | + # Move everything under the specified root |
| 73 | + sub(/^/, ".%s") |
| 74 | + # Match by regexes and write to the destination. |
| 75 | + %s |
| 76 | + # Every line that did not match the layer groups will go into the default layer. |
| 77 | + print $$0 >> "$(RULEDIR)/%s.default.manifest.spec" |
| 78 | +}' |
| 79 | +""" % (mtree_begin_blocks, root, ifs, name) |
| 80 | + |
| 81 | + native.genrule( |
| 82 | + name = "_{}_manifests".format(name), |
| 83 | + srcs = [name + ".manifest"], |
| 84 | + outs = [ |
| 85 | + "{}.{}.manifest.spec".format(name, group_name) |
| 86 | + for group_name in group_names |
| 87 | + ], |
| 88 | + cmd = cmd, |
| 89 | + **kwargs |
| 90 | + ) |
| 91 | + |
| 92 | + |
| 93 | +def py_image_layer(name, py_binary, root = None, layer_groups = {}, compress = "gzip", tar_args = ["--options", "gzip:!timestamp"], **kwargs): |
| 94 | + """Produce a separate tar output for each layer of a python app |
| 95 | +
|
| 96 | + > Requires `awk` to be installed on the host machine/rbe runner. |
| 97 | +
|
| 98 | + For better performance, it is recommended to split the output of a py_binary into multiple layers. |
| 99 | + This can be done by grouping files into layers based on their path by using the `layer_groups` attribute. |
| 100 | +
|
| 101 | + The matching order for layer groups is as follows: |
| 102 | + 1. `layer_groups` are checked first. |
| 103 | + 2. If no match is found for `layer_groups`, the `default layer groups` are checked. |
| 104 | + 3. Any remaining files are placed into the default layer. |
| 105 | + |
| 106 | + The default layer groups are: |
| 107 | + ``` |
| 108 | + { |
| 109 | + "packages": "\\.runfiles/.*/site-packages",, # contains third-party deps |
| 110 | + "interpreter": "\\.runfiles/python.*-.*/", # contains the python interpreter |
| 111 | + } |
| 112 | + ``` |
| 113 | +
|
| 114 | + Args: |
| 115 | + name: base name for targets |
| 116 | + py_binary: a py_binary target |
| 117 | + root: Path to where the layers should be rooted. If not specified, the layers will be rooted at the workspace root. |
| 118 | + layer_groups: Additional layer groups to create. They are used to group files into layers based on their path. In the form of: ```{"<name>": "regex_to_match_against_file_paths"}``` |
| 119 | + compress: Compression algorithm to use. Default is gzip. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule |
| 120 | + tar_args: Additional arguments to pass to the tar rule. Default is `["--options", "gzip:!timestamp"]`. See: https://github.com/bazel-contrib/bazel-lib/blob/main/docs/tar.md#tar_rule |
| 121 | + **kwargs: attribute that apply to all targets expanded by the macro |
| 122 | +
|
| 123 | + Returns: |
| 124 | + A list of labels for each layer. |
| 125 | + """ |
| 126 | + if root != None and not root.startswith("/"): |
| 127 | + fail("root path must start with '/' but got '{root}', expected '/{root}'".format(root = root)) |
| 128 | + |
| 129 | + # Produce the manifest for a tar file of our py_binary, but don't tar it up yet, so we can split |
| 130 | + # into fine-grained layers for better pull, push and remote cache performance. |
| 131 | + mtree_spec( |
| 132 | + name = name + ".manifest", |
| 133 | + srcs = [py_binary], |
| 134 | + **kwargs |
| 135 | + ) |
| 136 | + |
| 137 | + groups = dict(**layer_groups) |
| 138 | + group_names = groups.keys() + ["default"] |
| 139 | + |
| 140 | + _split_mtree_into_layer_groups(name, root, groups, group_names, **kwargs) |
| 141 | + |
| 142 | + # Finally create layers using the tar rule |
| 143 | + result = [] |
| 144 | + for group_name in group_names: |
| 145 | + tar_target = "_{}_{}".format(name, group_name) |
| 146 | + tar( |
| 147 | + name = tar_target, |
| 148 | + srcs = [py_binary], |
| 149 | + mtree = "{}.{}.manifest.spec".format(name, group_name), |
| 150 | + compress = compress, |
| 151 | + args = tar_args, |
| 152 | + **kwargs |
| 153 | + ) |
| 154 | + result.append(tar_target) |
| 155 | + |
| 156 | + return result |
0 commit comments