Skip to content

Commit df77121

Browse files
biruntsalexeagle
andauthored
feat: use @gawk (#193)
Provide hermetic `awk` from `@gawk` module. Resolves #185 & Closes #185. Co-authored-by: Alex Eagle <[email protected]>
1 parent d997fb1 commit df77121

File tree

8 files changed

+43
-19
lines changed

8 files changed

+43
-19
lines changed

MODULE.bazel

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ bazel_dep(name = "bazel_lib", version = "3.0.0-rc.0")
1111
bazel_dep(name = "bazel_skylib", version = "1.5.0")
1212
bazel_dep(name = "rules_java", version = "8.8.0")
1313
bazel_dep(name = "rules_shell", version = "0.4.1")
14+
bazel_dep(name = "gawk", version = "5.3.2.bcr.3")
1415
bazel_dep(name = "tar.bzl", version = "0.6.0")
1516
bazel_dep(name = "yq.bzl", version = "0.3.1")
1617

apt/private/deb_postfix.bzl

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def deb_postfix(name, srcs, outs, mergedusr = False, **kwargs):
4343
-s "#^\\./lib64/\\(.\\)#./usr/lib64/\\1#" \
4444
-s "#^\\./libx32/\\(.\\)#./usr/libx32/\\1#" \
4545
"@$$data_file" 2< <(
46-
$(BSDTAR_BIN) -tvf "$$data_file" | awk '{
46+
$(BSDTAR_BIN) -tvf "$$data_file" | $(location @gawk//:gawk) '{
4747
ORS=""
4848
keep="y"
4949
if (substr($$1, 1, 1) == "d" && (\\
@@ -92,5 +92,6 @@ def deb_postfix(name, srcs, outs, mergedusr = False, **kwargs):
9292
%s
9393
""" % apply,
9494
toolchains = toolchains,
95+
tools = ["@gawk//:gawk"],
9596
**kwargs
9697
)

apt/private/dpkg_status.bzl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@ def _dpkg_status_impl(ctx):
1313
args = ctx.actions.args()
1414
args.add(bsdtar.tarinfo.binary)
1515
args.add(output)
16+
args.add(ctx.executable._gawk.path)
1617
args.add_all(ctx.files.controls)
1718

1819
ctx.actions.run(
1920
executable = ctx.executable._dpkg_status_sh,
2021
inputs = ctx.files.controls,
2122
outputs = [output],
22-
tools = bsdtar.default.files,
23+
tools = [bsdtar.default.files, ctx.executable._gawk],
2324
arguments = [args],
2425
)
2526

@@ -40,6 +41,12 @@ dpkg_status = rule(
4041
allow_files = [".tar.zst", ".tar.xz", ".tar.gz", ".tar"],
4142
mandatory = True,
4243
),
44+
"_gawk": attr.label(
45+
allow_single_file = True,
46+
executable = True,
47+
cfg = "exec",
48+
default = "@gawk//:gawk",
49+
),
4350
},
4451
implementation = _dpkg_status_impl,
4552
toolchains = [tar_lib.TOOLCHAIN_TYPE],

apt/private/dpkg_status.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,13 +3,14 @@ set -o pipefail -o errexit -o nounset
33

44
readonly bsdtar="$1"
55
readonly out="$2"
6-
shift 2
6+
readonly awk="$3"
7+
shift 3
78

89
tmp_out=$(mktemp)
910

1011
while (( $# > 0 )); do
1112
$bsdtar -xf "$1" --to-stdout ./control |
12-
awk '{
13+
"${awk}" '{
1314
print $0;
1415
if (NR == 1) { print "Status: install ok installed"};
1516
} END { print "" }

apt/private/dpkg_statusd.bzl

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,13 +16,14 @@ def _dpkg_statusd_impl(ctx):
1616
args.add(output)
1717
args.add(ctx.file.control)
1818
args.add(ctx.attr.package_name)
19+
args.add(ctx.executable._gawk.path)
1920
tar_lib.common.add_compression_args(ctx.attr.compression, args)
2021

2122
ctx.actions.run(
2223
executable = ctx.executable._dpkg_statusd_sh,
2324
inputs = [ctx.file.control],
2425
outputs = [output],
25-
tools = bsdtar.default.files,
26+
tools = [bsdtar.default.files, ctx.executable._gawk],
2627
arguments = [args],
2728
)
2829

@@ -48,6 +49,12 @@ dpkg_statusd = rule(
4849
doc = "Compress the archive file with a supported algorithm.",
4950
values = tar_lib.common.accepted_compression_types,
5051
),
52+
"_gawk": attr.label(
53+
allow_single_file = True,
54+
executable = True,
55+
cfg = "exec",
56+
default = "@gawk//:gawk",
57+
),
5158
},
5259
implementation = _dpkg_statusd_impl,
5360
toolchains = [tar_lib.TOOLCHAIN_TYPE],

apt/private/dpkg_statusd.sh

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,15 +5,16 @@ readonly bsdtar="$1"
55
readonly out="$2"
66
readonly control_path="$3"
77
readonly package_name="$4"
8-
shift 4
8+
readonly awk="$5"
9+
shift 5
910

1011
include=(--include "^./control$" --include "^./md5sums$")
1112

1213
tmp=$(mktemp -d)
1314
"$bsdtar" -xf "$control_path" "${include[@]}" -C "$tmp"
1415

1516
"$bsdtar" -cf - $@ --format=mtree "${include[@]}" --options '!gname,!uname,!sha1,!nlink,!time' "@$control_path" | \
16-
awk -v pkg="$package_name" '{
17+
"${awk}" -v pkg="$package_name" '{
1718
if ($1=="#mtree") {
1819
print $1; next
1920
};

distroless/private/flatten.bzl

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ def _flatten_impl(ctx):
1313
args = ctx.actions.args()
1414
args.add(bsdtar.tarinfo.binary)
1515
args.add(str(ctx.attr.deduplicate))
16+
args.add(ctx.executable._gawk.path)
1617
args.add_all(tar_lib.DEFAULT_ARGS)
1718
args.add("--create")
1819
tar_lib.common.add_compression_args(ctx.attr.compress, args)
@@ -23,7 +24,7 @@ def _flatten_impl(ctx):
2324
executable = ctx.executable._flatten_sh,
2425
inputs = ctx.files.tars,
2526
outputs = [output],
26-
tools = bsdtar.default.files,
27+
tools = [bsdtar.default.files, ctx.executable._gawk],
2728
arguments = [args],
2829
mnemonic = "Flatten",
2930
progress_message = "Flattening %{label}",
@@ -47,14 +48,18 @@ EXPERIMENTAL: We may change or remove it without a notice.
4748
4849
Remove duplicate entries from the archives after flattening.
4950
Deduplication is performed only for directories.
50-
51-
This requires `awk` to be available in the PATH.
5251
""", default = False),
5352
"compress": attr.string(
5453
doc = "Compress the archive file with a supported algorithm.",
5554
values = tar_lib.common.accepted_compression_types,
5655
),
5756
"_flatten_sh": attr.label(default = "//distroless/private:flatten.sh", executable = True, cfg = "exec", allow_single_file = True),
57+
"_gawk": attr.label(
58+
allow_single_file = True,
59+
executable = True,
60+
cfg = "exec",
61+
default = "@gawk//:gawk",
62+
),
5863
},
5964
implementation = _flatten_impl,
6065
toolchains = [tar_lib.TOOLCHAIN_TYPE],

distroless/private/flatten.sh

Lines changed: 10 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,37 +3,38 @@ set -o pipefail -o errexit
33

44
bsdtar="$1";
55
deduplicate="$2";
6-
shift 2;
6+
readonly awk="$3"
7+
shift 3;
78

89
# Deduplication requested, use this complex pipeline to deduplicate.
910
if [[ "$deduplicate" == "True" ]]; then
1011

1112
mtree=$(mktemp)
1213

13-
# List files in all archives and append to single column mtree.
14-
for arg in "$@"; do
14+
# List files in all archives and append to single column mtree.
15+
for arg in "$@"; do
1516
if [[ "$arg" == "@"* ]]; then
1617
"$bsdtar" -tf "${arg:1}" >> "$mtree"
1718
fi
1819
done
1920

2021
# There not a lot happening here but there is still too many implicit knowledge.
21-
#
22+
#
2223
# When we run bsdtar, we ask for it to prompt every entry, in the same order we created above, the mtree.
2324
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/write.c#L683
24-
#
25+
#
2526
# For every prompt, therefore entry, we have write 31 bytes of data, one of which has to be either 'Y' or 'N'.
2627
# And the reason for it is that since we are not TTY and pretending to be one, we can't interleave write calls
2728
# so we have to interleave it by filling up the buffer with 31 bytes of 'Y' or 'N'.
2829
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L240
2930
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L216
30-
#
31+
#
3132
# To match the extraction behavior of tar itself, we want to preserve only the final occurrence of each file
3233
# and directory in the archive. To do this, we iterate over all the entries twice. The first pass computes the
3334
# number of occurrences of each path, and the second pass determines whether each entry is the final (or only)
3435
# occurrence of that path.
3536

36-
$bsdtar --confirmation "$@" 2< <(awk '{
37+
$bsdtar --confirmation "$@" 2< <("${awk}" '{
3738
count[$1]++;
3839
files[NR] = $1
3940
}
@@ -50,7 +51,7 @@ if [[ "$deduplicate" == "True" ]]; then
5051
}
5152
}' "$mtree")
5253
rm "$mtree"
53-
else
54+
else
5455
# No deduplication, business as usual
5556
$bsdtar "$@"
56-
fi
57+
fi

0 commit comments

Comments
 (0)