From b8151df98642662a405428fe19cf06f34be137f8 Mon Sep 17 00:00:00 2001 From: Zane Bitter Date: Wed, 5 Nov 2025 16:58:07 +1300 Subject: [PATCH 1/3] Report file system space usage in agent-gather --- data/data/agent/files/usr/local/bin/agent-gather | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data/data/agent/files/usr/local/bin/agent-gather b/data/data/agent/files/usr/local/bin/agent-gather index c201f08a7dc..a2a0478db78 100644 --- a/data/data/agent/files/usr/local/bin/agent-gather +++ b/data/data/agent/files/usr/local/bin/agent-gather @@ -78,6 +78,8 @@ function gather_storage_data() { cp /etc/mtab "${ARTIFACTS_DIR}/etc/mtab" ( >&2 echo -n ".") lsblk > "${ARTIFACTS_DIR}/lsblk" + ( >&2 echo -n ".") + df -h > "${ARTIFACTS_DIR}/df" ( >&2 echo " Done") } From 505bc7d69437f9f86d7080f3bcd3f81772fd15d4 Mon Sep 17 00:00:00 2001 From: Zane Bitter Date: Mon, 3 Nov 2025 15:51:56 +1300 Subject: [PATCH 2/3] OCPBUGS-62790: Resize /var tmpfs to 10GiB on live ISO Installations using ABI/assisted with 16GiB of RAM on the bootstrap node were failing with "no space left on device" during bootstrapping. The live ISO environment uses a tmpfs mounted at /var that is sized at 50% of available RAM. On systems with 16GiB of RAM, this provides only 8GiB of tmpfs space. At the beginning of the bootstrap process, node-image-pull.sh creates an ostree checkout underneath /var/ostree-container. When this is added to the regular disk space usage of the later parts of the bootstrap, the peak tmpfs usage hits around 9.4GiB. Over time, it appears that even without that extra data the bootstrap has come to consume more than 8GiB, or at least dangerously close to it. This fix resizes the /var tmpfs, the loopback backing file in it, and the filesystem on it to 12GiB, if the tmpfs was smaller than that. This provides sufficient space for the image operations while maintaining compatibility with the minimum 16GB RAM requirement. --- .../systemd/system/node-image-pull.service | 3 +- .../systemd/system/resize-ephemeral.service | 10 ++++++ .../files/usr/local/bin/resize-ephemeral.sh | 33 +++++++++++++++++++ 3 files changed, 45 insertions(+), 1 deletion(-) create mode 100644 data/data/bootstrap/files/etc/systemd/system/resize-ephemeral.service create mode 100644 data/data/bootstrap/files/usr/local/bin/resize-ephemeral.sh diff --git a/data/data/bootstrap/files/etc/systemd/system/node-image-pull.service b/data/data/bootstrap/files/etc/systemd/system/node-image-pull.service index 8ac36d75b8c..4dd7dd56fe8 100644 --- a/data/data/bootstrap/files/etc/systemd/system/node-image-pull.service +++ b/data/data/bootstrap/files/etc/systemd/system/node-image-pull.service @@ -1,7 +1,8 @@ [Unit] Description=Node Image Pull Requires=network.target NetworkManager.service -After=network.target +Wants=resize-ephemeral.service +After=network.target resize-ephemeral.service [Service] Type=oneshot diff --git a/data/data/bootstrap/files/etc/systemd/system/resize-ephemeral.service b/data/data/bootstrap/files/etc/systemd/system/resize-ephemeral.service new file mode 100644 index 00000000000..26d21ae6ae5 --- /dev/null +++ b/data/data/bootstrap/files/etc/systemd/system/resize-ephemeral.service @@ -0,0 +1,10 @@ +[Unit] +Description=Resize ephemeral disk +Requires=network.target NetworkManager.service +Before=node-image-pull.service bootkube.service +ConditionPathExists=/run/ostree-live + +[Service] +Type=oneshot +RemainAfterExit=yes +ExecStart=/usr/local/bin/resize-ephemeral.sh 12G diff --git a/data/data/bootstrap/files/usr/local/bin/resize-ephemeral.sh b/data/data/bootstrap/files/usr/local/bin/resize-ephemeral.sh new file mode 100644 index 00000000000..814c891e4fc --- /dev/null +++ b/data/data/bootstrap/files/usr/local/bin/resize-ephemeral.sh @@ -0,0 +1,33 @@ +#!/bin/bash + +set -eu + +TARGET_SIZE="$1" +BASE_DIR="/run/ephemeral_base" +LOOPBACK_FILE="${BASE_DIR}/loopfs" + +function get_size_bytes() { + printf "%d" "$(($(stat -f -c "%b * %s" "${BASE_DIR}")))" +} + +TARGET_SIZE_BYTES="$(numfmt --from=iec "${TARGET_SIZE}")" +CURRENT_SIZE_BYTES="$(get_size_bytes)" +CURRENT_SIZE="$(numfmt --to=iec "${CURRENT_SIZE_BYTES}")" + +if ((TARGET_SIZE_BYTES > CURRENT_SIZE_BYTES)); then + echo "Expanding ephemeral base dir from ${CURRENT_SIZE} to ${TARGET_SIZE}" + mount -o remount,size="${TARGET_SIZE}" "${BASE_DIR}" + + echo "Expanding ephemeral loopback" + truncate -s "$(get_size_bytes)" "${LOOPBACK_FILE}" + + LOOPBACK_DEVICE="$(losetup -j "${LOOPBACK_FILE}" -O NAME -n)" + losetup -c "${LOOPBACK_DEVICE}" + + echo "Expanding ephemeral filesystem" + xfs_growfs -d "${LOOPBACK_DEVICE}" +else + echo "Ephemeral base dir size ${CURRENT_SIZE} is already larger than ${TARGET_SIZE}; not expanding" +fi + +df -h From 4e5cb2909169cf2a9d62a72db363bad3d9ad4f3e Mon Sep 17 00:00:00 2001 From: Zane Bitter Date: Thu, 27 Nov 2025 22:09:22 +1300 Subject: [PATCH 3/3] Log peak ramdisk usage of node-image-pull --- .../bootstrap/files/usr/local/bin/node-image-pull.sh.template | 2 ++ 1 file changed, 2 insertions(+) diff --git a/data/data/bootstrap/files/usr/local/bin/node-image-pull.sh.template b/data/data/bootstrap/files/usr/local/bin/node-image-pull.sh.template index ba7bcaf4198..7b321c3c9d0 100755 --- a/data/data/bootstrap/files/usr/local/bin/node-image-pull.sh.template +++ b/data/data/bootstrap/files/usr/local/bin/node-image-pull.sh.template @@ -85,6 +85,8 @@ ostree checkout --repo "${ostree_repo}" ${hardlink} coreos/node-image "${ostree_ # in the assisted-installer case, nuke the temporary repo to save RAM if grep -q coreos.liveiso= /proc/cmdline; then + df -h "${ostree_repo}" echo "Deleting temporary repo" rm -rf "${ostree_repo}" + df -h "${ostree_checkout}" fi