Skip to content

Commit bb64864

Browse files
author
James Healy
authored
Merge pull request #737 from bshelton229/shutdown-spot-timeout
Send SIGQUIT before spot termination time
2 parents b9d66bb + 46669ae commit bb64864

File tree

1 file changed

+41
-8
lines changed

1 file changed

+41
-8
lines changed

packer/linux/conf/buildkite-agent/scripts/stop-agent-gracefully

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -4,13 +4,46 @@ set -eu -o pipefail
44
# Import BUILDKITE_AGENTS_PER_INSTANCE
55
eval "$(cat /var/lib/buildkite-agent/cfn-env)"
66

7-
echo "Stopping buildkite-agent gracefully"
8-
systemctl stop "buildkite-agent"
7+
LIFECYCLE_TRANSITION=${1:-}
98

10-
# Need to ensure it's the buildkite-agent user, so it doesn't match this lifecycled handler script
11-
while pgrep -u buildkite-agent buildkite-agent > /dev/null; do
12-
echo "Waiting for all buildkite-agent processes to have stopped..."
13-
sleep 5
14-
done
9+
echo "Stopping buildkite-agent gracefully due to the lifecycle transition: ${LIFECYCLE_TRANSITION}"
1510

16-
echo "All buildkite-agent processes have stopped"
11+
if [[ $LIFECYCLE_TRANSITION = "ec2:SPOT_INSTANCE_TERMINATION" ]]; then
12+
# Send SIGTERM to the main buildkite-agent process in a non-blocking manner
13+
# to start a graceful shutdown
14+
systemctl kill --kill-who=main --signal=SIGTERM buildkite-agent
15+
16+
17+
# Spot termination time is provided as the third argument in RFC3339 format,
18+
# i.e. 2020-10-09T01:33:10Z
19+
SPOT_TERMINATION_TIME=$3
20+
SPOT_TERMINATION_TIME_UNIX=$(date '+%s' -d ${SPOT_TERMINATION_TIME})
21+
# Subtracts 20 seconds from the published spot termination time to give the
22+
# agent a little time to forcefully quit running jobs. If the agent doesn't
23+
# gracefully quit before this time we will send SIGQUIT.
24+
STOP_BY_TIME_UNIX=$(expr ${SPOT_TERMINATION_TIME_UNIX} - 20)
25+
26+
echo "Waiting for agents to quit gracefully until 20 seconds before the spot termination time: ${SPOT_TERMINATION_TIME}"
27+
while [[ $(date '+%s') -lt $STOP_BY_TIME_UNIX ]]; do
28+
if ! pgrep -u buildkite-agent buildkite-agent > /dev/null; then
29+
echo "All buildkite agents have stopped gracefully"
30+
exit 0
31+
fi
32+
sleep 1
33+
done
34+
35+
# Agents are still running and we're close to time, so send SIGQUIT. We can let
36+
# this script exit as spot will hard terminate in a matter of seconds.
37+
echo "Agents are still running. Sending SIGQUIT ahead of forced spot termination at ${SPOT_TERMINATION_TIME}"
38+
systemctl kill --kill-who=main --signal=SIGQUIT buildkite-agent
39+
else
40+
systemctl stop "buildkite-agent"
41+
42+
# Need to ensure it's the buildkite-agent user, so it doesn't match this lifecycled handler script
43+
while pgrep -u buildkite-agent buildkite-agent > /dev/null; do
44+
echo "Waiting for all buildkite-agent processes to have stopped..."
45+
sleep 5
46+
done
47+
48+
echo "All buildkite-agent processes have stopped"
49+
fi

0 commit comments

Comments
 (0)