Skip to content

Commit f9d7731

Browse files
committed
Mark instance unhealthy instead of shutting down
1 parent d9fe204 commit f9d7731

File tree

2 files changed

+36
-18
lines changed

2 files changed

+36
-18
lines changed

packer/linux/conf/buildkite-agent/scripts/terminate-instance

Lines changed: 36 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -3,28 +3,50 @@
33
set -euo pipefail
44

55
terminate() {
6-
aws autoscaling terminate-instance-in-auto-scaling-group --region "$1" --instance-id "$2" "--should-decrement-desired-capacity"
6+
aws autoscaling terminate-instance-in-auto-scaling-group \
7+
--region "$1" \
8+
--instance-id "$2" \
9+
--should-decrement-desired-capacity
710
}
811

9-
echo "sleeping for 10 seconds before terminating instance to allow agent logs to drain to cloudwatch..."
12+
mark_as_unhealthy() {
13+
aws autoscaling set-instance-health \
14+
--region "$1" \
15+
--instance-id "$2" \
16+
--health-status Unhealthy
17+
}
1018

19+
echo "sleeping for 10 seconds before terminating instance to allow agent logs to drain to cloudwatch..."
1120
sleep 10
1221

13-
token=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 60" --fail --silent --show-error --location "http://169.254.169.254/latest/api/token")
14-
instance_id=$(curl -H "X-aws-ec2-metadata-token: $token" --fail --silent --show-error --location "http://169.254.169.254/latest/meta-data/instance-id")
15-
region=$(curl -H "X-aws-ec2-metadata-token: $token" --fail --silent --show-error --location "http://169.254.169.254/latest/meta-data/placement/region")
22+
token=$(
23+
curl \
24+
--fail --silent --show-error \
25+
-X PUT \
26+
-H "X-aws-ec2-metadata-token-ttl-seconds: 60" \
27+
--location "http://169.254.169.254/latest/api/token"
28+
)
29+
instance_id=$(
30+
curl \
31+
--fail --silent --show-error \
32+
-H "X-aws-ec2-metadata-token: $token" \
33+
--location "http://169.254.169.254/latest/meta-data/instance-id"
34+
)
35+
region=$(
36+
curl \
37+
--fail --silent --show-error \
38+
-H "X-aws-ec2-metadata-token: $token" \
39+
--location "http://169.254.169.254/latest/meta-data/placement/region"
40+
)
1641

1742
echo "requesting instance termination..."
18-
1943
if [[ $BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB == "true" ]]; then
20-
# If we're the final before the ASG's min size, the call to terminate-instance-in-autoscaling-group will fail, as AWS
21-
# won't allow the ASG to go below its min size. In this case, we need to call shutdown instead and force the issue -
22-
# the ASG will then spin up a new instance to replace the one we're shutting down, leaving all well in the world.
23-
#
24-
# We need to do this because if the call to terminate fails, the systemd unit will start up a new buildkite-agent process
25-
# on this machine, with state left over from the last agent, which is the opposite of what we want when $BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB is true.
26-
terminate "$region" "$instance_id" || shutdown now
44+
# If the ASG is at the min capacity, the call to terminate-instance-in-autoscaling-group
45+
# In this case, we mark the instance as unhealthy, then the ASG will spin up a new instance
46+
# to replace it.
47+
terminate "$region" "$instance_id" || mark_as_unhealthy "$region" "$instance_id"
2748
else
28-
# If we're not in terminate-after-job mode, then it's fine for this to fail, as it'll be as if the instance never got shut down.
49+
# If we're not in terminate-after-job mode, then it's fine for this to fail. Systemd will restart
50+
# the agent and it'll be as if the instance never got shut down.
2951
terminate "$region" "$instance_id"
3052
fi

templates/aws-stack.yml

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,10 +1095,6 @@ Resources:
10951095
IamInstanceProfile:
10961096
Arn: !GetAtt "IAMInstanceProfile.Arn"
10971097
InstanceType: !Select [ "0", !Split [ ",", !Join [ ",", [ !Ref InstanceTypes, "", "", "" ] ] ] ]
1098-
InstanceInitiatedShutdownBehavior: !If
1099-
- TerminateInstanceAfterJob
1100-
- terminate
1101-
- stop
11021098
MetadataOptions:
11031099
HttpTokens: !Ref IMDSv2Tokens
11041100
# Allow containers using a Docker network on the host to receive IDMSv2 responses

0 commit comments

Comments
 (0)