Skip to content

Commit 6b9d929

Browse files
Lachlan DonaldArturo Pie
authored andcommitted
Allow minimum disk before cleanup to be customized
1 parent db97804 commit 6b9d929

File tree

6 files changed

+94
-27
lines changed

6 files changed

+94
-27
lines changed
Lines changed: 48 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -1,25 +1,57 @@
11
#!/bin/bash
22
set -euo pipefail
33

4-
DISK_MIN_AVAILABLE=${DISK_MIN_AVAILABLE:-5242880} # 5GB
5-
DISK_MIN_INODES=${DISK_MIN_INODES:-250000} # docker needs lots
6-
7-
DOCKER_DIR="/var/lib/docker/"
8-
9-
disk_avail=$(df -k --output=avail "$DOCKER_DIR" | tail -n1)
10-
11-
echo "Disk space free: $(df -k -h --output=avail "$DOCKER_DIR" | tail -n1 | sed -e 's/^[[:space:]]//')"
12-
13-
if [[ $disk_avail -lt $DISK_MIN_AVAILABLE ]]; then
14-
echo "Not enough disk space free, cutoff is ${DISK_MIN_AVAILABLE} 🚨" >&2
15-
exit 1
4+
# Usage:
5+
# bk-check-disk-space.sh (min disk required) (min inodes required)
6+
# min disk required can be either an amount of bytes, a pattern like 10G
7+
# or 500M, or a percentage like 5%
8+
# min inodes must be a number, default to 250,000
9+
10+
# Converts human-readable units like 1.43K and 120.3M to bytes
11+
dehumanize() {
12+
awk '/[0-9][bB]?$/ {printf "%u\n", $1*1024}
13+
/[tT][bB]?$/ {printf "%u\n", $1*(1024*1024*1024)}
14+
/[gG][bB]?$/ {printf "%u\n", $1*(1024*1024)}
15+
/[mM][bB]?$/ {printf "%u\n", $1*(1024)}
16+
/[kK][bB]?$/ {printf "%u\n", $1*1}' <<< "$1"
17+
}
18+
19+
min_available=${1:-5G}
20+
docker_dir="/var/lib/docker/"
21+
22+
# First check the disk available
23+
24+
disk_avail=$(df -k --output=avail "$docker_dir" | tail -n1)
25+
disk_avail_human=$(df -k -h --output=avail "$docker_dir" | tail -n1 | tr -d '[:space:]')
26+
disk_used_pct=$(df -k --output=pcent "$docker_dir" | tail -n1 | tr -d '[:space:]' | tr -d '%')
27+
disk_free_pct=$((100-disk_used_pct))
28+
29+
printf "Disk space free: %s (%s%%)\\n" "$disk_avail_human" "$disk_free_pct"
30+
31+
# Check if the min_available is a percentage
32+
if [[ $min_available =~ \%$ ]] ; then
33+
if [[ $(echo "${disk_free_pct}<${min_available}" | sed 's/%//g' | bc) -gt 0 ]] ; then
34+
echo "Not enough disk space free, cutoff is ${min_available} 🚨" >&2
35+
exit 1
36+
fi
37+
else
38+
if [[ $disk_avail -lt $(dehumanize "$min_available") ]]; then
39+
echo "Not enough disk space free, cutoff is ${min_available} 🚨" >&2
40+
exit 1
41+
fi
1642
fi
1743

18-
inodes_avail=$(df -k --output=iavail "$DOCKER_DIR" | tail -n1)
44+
# Next check inodes, these can be exhausted by docker build operations
45+
46+
inodes_min_available=${2:-250000}
47+
inodes_avail=$(df -k --output=iavail "$docker_dir" | tail -n1 | tr -d '[:space:]')
48+
inodes_avail_human=$(df -k -h --output=iavail "$docker_dir" | tail -n1 | tr -d '[:space:]')
49+
inodes_used_pct=$(df -k --output=ipcent "$docker_dir" | tail -n1 | tr -d '[:space:]' | tr -d '%')
50+
inodes_free_pct=$((100-inodes_used_pct))
1951

20-
echo "Inodes free: $(df -k -h --output=iavail "$DOCKER_DIR" | tail -n1 | sed -e 's/^[[:space:]]//')"
52+
printf "Inodes free: %s (%s%%)\\n" "$inodes_avail_human" "$inodes_free_pct"
2153

22-
if [[ $inodes_avail -lt $DISK_MIN_INODES ]]; then
23-
echo "Not enough inodes free, cutoff is ${DISK_MIN_INODES} 🚨" >&2
54+
if [[ $inodes_avail -lt $inodes_min_available ]]; then
55+
echo "Not enough inodes free, cutoff is ${inodes_min_available} 🚨" >&2
2456
exit 1
2557
fi

packer/linux/conf/bin/bk-install-elastic-stack.sh

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,12 @@ export PLUGINS_ENABLED="${PLUGINS_ENABLED[*]-}"
6262
export BUILDKITE_ECR_POLICY=${BUILDKITE_ECR_POLICY:-none}
6363
EOF
6464

65+
# cron-env is sourced by crontab entries and low disk scripts
66+
cat << EOF > /var/lib/buildkite-agent/cron-env
67+
export DISK_MIN_AVAILABLE=$DISK_MIN_AVAILABLE
68+
export DOCKER_PRUNE_UNTIL=$DOCKER_PRUNE_UNTIL
69+
EOF
70+
6571
if [[ "${BUILDKITE_AGENT_RELEASE}" == "edge" ]] ; then
6672
echo "Downloading buildkite-agent edge..."
6773
curl -Lsf -o /usr/bin/buildkite-agent-edge \

packer/linux/conf/buildkite-agent/hooks/environment

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,11 @@ source ~/cfn-env
77
echo "~~~ :llama: Setting up elastic stack environment ($BUILDKITE_STACK_VERSION)"
88
cat ~/cfn-env
99

10+
if [[ -f ~/cron-env ]] ; then
11+
# shellcheck source=/dev/null
12+
source ~/cron-env
13+
fi
14+
1015
echo "Checking docker"
1116
if ! docker ps ; then
1217
echo "^^^ +++"
@@ -17,13 +22,13 @@ if ! docker ps ; then
1722
fi
1823

1924
echo "Checking disk space"
20-
if ! /usr/local/bin/bk-check-disk-space.sh ; then
25+
if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-5G}" ; then
2126

2227
echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL:-4h}"
2328
docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL:-4h}"
2429

2530
echo "Checking disk space again"
26-
if ! /usr/local/bin/bk-check-disk-space.sh ; then
31+
if ! /usr/local/bin/bk-check-disk-space.sh "${DISK_MIN_AVAILABLE:-5G}"; then
2732
echo "Disk health checks failed" >&2
2833
exit 1
2934
fi

packer/linux/conf/docker/cron.hourly/docker-gc

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -5,10 +5,16 @@ if [[ $EUID -eq 0 ]]; then
55
exec >> /var/log/elastic-stack.log 2>&1 # Logs to elastic-stack.log
66
fi
77

8-
DOCKER_PRUNE_UNTIL=${DOCKER_PRUNE_UNTIL:-4h}
8+
# Load config from file if it exists
9+
if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then
10+
# shellcheck source=/dev/null
11+
source /var/lib/buildkite-agent/cron-env
12+
else
13+
DOCKER_PRUNE_UNTIL=4h
14+
fi
915

1016
## ------------------------------------------
1117
## Prune stuff that doesn't affect cache hits
1218

13-
docker network prune --force --filter "until=${DOCKER_PRUNE_UNTIL}"
14-
docker container prune --force --filter "until=${DOCKER_PRUNE_UNTIL}"
19+
docker network prune --force --filter "until=${!DOCKER_PRUNE_UNTIL}"
20+
docker container prune --force --filter "until=${!DOCKER_PRUNE_UNTIL}"

packer/linux/conf/docker/cron.hourly/docker-low-disk-gc

Lines changed: 12 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,8 +5,6 @@ if [[ $EUID -eq 0 ]]; then
55
exec >> /var/log/elastic-stack.log 2>&1 # Logs to elastic-stack.log
66
fi
77

8-
DOCKER_PRUNE_UNTIL=${DOCKER_PRUNE_UNTIL:-1h}
9-
108
mark_instance_unhealthy() {
119
# cancel any running buildkite builds
1210
killall -QUIT buildkite-agent || true
@@ -19,14 +17,22 @@ mark_instance_unhealthy() {
1917

2018
trap mark_instance_unhealthy ERR
2119

20+
# Load config from file if it exists
21+
if [[ -f /var/lib/buildkite-agent/cron-env ]] ; then
22+
# shellcheck source=/dev/null
23+
source /var/lib/buildkite-agent/cron-env
24+
else
25+
DISK_MIN_AVAILABLE=5G
26+
fi
27+
2228
## -----------------------------------------------------------------
2329
## Check disk, we only want to prune images/containers if we have to
2430

25-
if ! /usr/local/bin/bk-check-disk-space.sh ; then
26-
echo "Cleaning up docker resources older than ${DOCKER_PRUNE_UNTIL}"
27-
docker image prune --all --force --filter "until=${DOCKER_PRUNE_UNTIL}"
31+
if ! /usr/local/bin/bk-check-disk-space.sh "$DISK_MIN_AVAILABLE" ; then
32+
echo "Cleaning up docker resources older than 1h"
33+
docker image prune --all --force --filter "until=1h"
2834

29-
if ! /usr/local/bin/bk-check-disk-space.sh ; then
35+
if ! /usr/local/bin/bk-check-disk-space.sh "$DISK_MIN_AVAILABLE" ; then
3036
echo "Disk health checks failed" >&2
3137
exit 1
3238
fi

templates/aws-stack.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -375,6 +375,16 @@ Parameters:
375375
- "false"
376376
Default: "false"
377377

378+
MinimumDiskAvailableBeforeCleanup:
379+
Type: String
380+
Description: Either a percentage (%) or absolute unit (B, MB, GB) of disk below which disk cleanup is run
381+
Default: "2GB"
382+
383+
DockerPruneUntil:
384+
Type: String
385+
Description: How far back to prune docker networks images and containers on hourly cleanup
386+
Default: "4h"
387+
378388
Outputs:
379389
VpcId:
380390
Value:
@@ -857,6 +867,8 @@ Resources:
857867
BUILDKITE_ECR_POLICY=${ECRAccessPolicy} \
858868
BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB=${BuildkiteTerminateInstanceAfterJob} \
859869
BUILDKITE_ADDITIONAL_SUDO_PERMISSIONS=${BuildkiteAdditionalSudoPermissions} \
870+
DISK_MIN_AVAILABLE="${MinimumDiskAvailableBeforeCleanup}" \
871+
DOCKER_PRUNE_UNTIL="${DockerPruneUntil} \
860872
AWS_DEFAULT_REGION=${AWS::Region} \
861873
SECRETS_PLUGIN_ENABLED=${EnableSecretsPlugin} \
862874
ECR_PLUGIN_ENABLED=${EnableECRPlugin} \

0 commit comments

Comments
 (0)