Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ bazel_dep(name = "bazel_lib", version = "3.0.0-rc.0")
bazel_dep(name = "bazel_skylib", version = "1.5.0")
bazel_dep(name = "rules_java", version = "8.8.0")
bazel_dep(name = "rules_shell", version = "0.4.1")
bazel_dep(name = "gawk", version = "5.3.2.bcr.3")
bazel_dep(name = "tar.bzl", version = "0.6.0")
bazel_dep(name = "yq.bzl", version = "0.3.1")

Expand Down
3 changes: 2 additions & 1 deletion apt/private/deb_postfix.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ def deb_postfix(name, srcs, outs, mergedusr = False, **kwargs):
-s "#^\\./lib64/\\(.\\)#./usr/lib64/\\1#" \
-s "#^\\./libx32/\\(.\\)#./usr/libx32/\\1#" \
"@$$data_file" 2< <(
$(BSDTAR_BIN) -tvf "$$data_file" | awk '{
$(BSDTAR_BIN) -tvf "$$data_file" | $(location @gawk//:gawk) '{
ORS=""
keep="y"
if (substr($$1, 1, 1) == "d" && (\\
Expand Down Expand Up @@ -92,5 +92,6 @@ def deb_postfix(name, srcs, outs, mergedusr = False, **kwargs):
%s
""" % apply,
toolchains = toolchains,
tools = ["@gawk//:gawk"],
**kwargs
)
9 changes: 8 additions & 1 deletion apt/private/dpkg_status.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,14 @@ def _dpkg_status_impl(ctx):
args = ctx.actions.args()
args.add(bsdtar.tarinfo.binary)
args.add(output)
args.add(ctx.executable._gawk.path)
args.add_all(ctx.files.controls)

ctx.actions.run(
executable = ctx.executable._dpkg_status_sh,
inputs = ctx.files.controls,
outputs = [output],
tools = bsdtar.default.files,
tools = [bsdtar.default.files, ctx.executable._gawk],
arguments = [args],
)

Expand All @@ -40,6 +41,12 @@ dpkg_status = rule(
allow_files = [".tar.zst", ".tar.xz", ".tar.gz", ".tar"],
mandatory = True,
),
"_gawk": attr.label(
allow_single_file = True,
executable = True,
cfg = "exec",
default = "@gawk//:gawk",
),
},
implementation = _dpkg_status_impl,
toolchains = [tar_lib.TOOLCHAIN_TYPE],
Expand Down
5 changes: 3 additions & 2 deletions apt/private/dpkg_status.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,14 @@ set -o pipefail -o errexit -o nounset

readonly bsdtar="$1"
readonly out="$2"
shift 2
readonly awk="$3"
shift 3

tmp_out=$(mktemp)

while (( $# > 0 )); do
$bsdtar -xf "$1" --to-stdout ./control |
awk '{
"${awk}" '{
print $0;
if (NR == 1) { print "Status: install ok installed"};
} END { print "" }
Expand Down
9 changes: 8 additions & 1 deletion apt/private/dpkg_statusd.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,14 @@ def _dpkg_statusd_impl(ctx):
args.add(output)
args.add(ctx.file.control)
args.add(ctx.attr.package_name)
args.add(ctx.executable._gawk.path)
tar_lib.common.add_compression_args(ctx.attr.compression, args)

ctx.actions.run(
executable = ctx.executable._dpkg_statusd_sh,
inputs = [ctx.file.control],
outputs = [output],
tools = bsdtar.default.files,
tools = [bsdtar.default.files, ctx.executable._gawk],
arguments = [args],
)

Expand All @@ -48,6 +49,12 @@ dpkg_statusd = rule(
doc = "Compress the archive file with a supported algorithm.",
values = tar_lib.common.accepted_compression_types,
),
"_gawk": attr.label(
allow_single_file = True,
executable = True,
cfg = "exec",
default = "@gawk//:gawk",
),
},
implementation = _dpkg_statusd_impl,
toolchains = [tar_lib.TOOLCHAIN_TYPE],
Expand Down
5 changes: 3 additions & 2 deletions apt/private/dpkg_statusd.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@ readonly bsdtar="$1"
readonly out="$2"
readonly control_path="$3"
readonly package_name="$4"
shift 4
readonly awk="$5"
shift 5

include=(--include "^./control$" --include "^./md5sums$")

tmp=$(mktemp -d)
"$bsdtar" -xf "$control_path" "${include[@]}" -C "$tmp"

"$bsdtar" -cf - $@ --format=mtree "${include[@]}" --options '!gname,!uname,!sha1,!nlink,!time' "@$control_path" | \
awk -v pkg="$package_name" '{
"${awk}" -v pkg="$package_name" '{
if ($1=="#mtree") {
print $1; next
};
Expand Down
11 changes: 8 additions & 3 deletions distroless/private/flatten.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ def _flatten_impl(ctx):
args = ctx.actions.args()
args.add(bsdtar.tarinfo.binary)
args.add(str(ctx.attr.deduplicate))
args.add(ctx.executable._gawk.path)
args.add_all(tar_lib.DEFAULT_ARGS)
args.add("--create")
tar_lib.common.add_compression_args(ctx.attr.compress, args)
Expand All @@ -23,7 +24,7 @@ def _flatten_impl(ctx):
executable = ctx.executable._flatten_sh,
inputs = ctx.files.tars,
outputs = [output],
tools = bsdtar.default.files,
tools = [bsdtar.default.files, ctx.executable._gawk],
arguments = [args],
mnemonic = "Flatten",
progress_message = "Flattening %{label}",
Expand All @@ -47,14 +48,18 @@ EXPERIMENTAL: We may change or remove it without a notice.

Remove duplicate entries from the archives after flattening.
Deduplication is performed only for directories.

This requires `awk` to be available in the PATH.
""", default = False),
"compress": attr.string(
doc = "Compress the archive file with a supported algorithm.",
values = tar_lib.common.accepted_compression_types,
),
"_flatten_sh": attr.label(default = "//distroless/private:flatten.sh", executable = True, cfg = "exec", allow_single_file = True),
"_gawk": attr.label(
allow_single_file = True,
executable = True,
cfg = "exec",
default = "@gawk//:gawk",
),
},
implementation = _flatten_impl,
toolchains = [tar_lib.TOOLCHAIN_TYPE],
Expand Down
19 changes: 10 additions & 9 deletions distroless/private/flatten.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,37 +3,38 @@ set -o pipefail -o errexit

bsdtar="$1";
deduplicate="$2";
shift 2;
readonly awk="$3"
shift 3;

# Deduplication requested, use this complex pipeline to deduplicate.
if [[ "$deduplicate" == "True" ]]; then

mtree=$(mktemp)

# List files in all archives and append to single column mtree.
for arg in "$@"; do
# List files in all archives and append to single column mtree.
for arg in "$@"; do
if [[ "$arg" == "@"* ]]; then
"$bsdtar" -tf "${arg:1}" >> "$mtree"
fi
done

# There not a lot happening here but there is still too many implicit knowledge.
#
#
# When we run bsdtar, we ask for it to prompt every entry, in the same order we created above, the mtree.
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/write.c#L683
#
#
# For every prompt, therefore entry, we have write 31 bytes of data, one of which has to be either 'Y' or 'N'.
# And the reason for it is that since we are not TTY and pretending to be one, we can't interleave write calls
# so we have to interleave it by filling up the buffer with 31 bytes of 'Y' or 'N'.
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L240
# See: https://github.com/libarchive/libarchive/blob/f745a848d7a81758cd9fcd49d7fd45caeebe1c3d/tar/util.c#L216
#
#
# To match the extraction behavior of tar itself, we want to preserve only the final occurrence of each file
# and directory in the archive. To do this, we iterate over all the entries twice. The first pass computes the
# number of occurrences of each path, and the second pass determines whether each entry is the final (or only)
# occurrence of that path.

$bsdtar --confirmation "$@" 2< <(awk '{
$bsdtar --confirmation "$@" 2< <("${awk}" '{
count[$1]++;
files[NR] = $1
}
Expand All @@ -50,7 +51,7 @@ if [[ "$deduplicate" == "True" ]]; then
}
}' "$mtree")
rm "$mtree"
else
else
# No deduplication, business as usual
$bsdtar "$@"
fi
fi