|
3 | 3 | set -euo pipefail
|
4 | 4 |
|
5 | 5 | terminate() {
|
6 |
| - aws autoscaling terminate-instance-in-auto-scaling-group --region "$1" --instance-id "$2" "--should-decrement-desired-capacity" |
| 6 | + aws autoscaling terminate-instance-in-auto-scaling-group \ |
| 7 | + --region "$1" \ |
| 8 | + --instance-id "$2" \ |
| 9 | + --should-decrement-desired-capacity |
7 | 10 | }
|
8 | 11 |
|
9 |
| -echo "sleeping for 10 seconds before terminating instance to allow agent logs to drain to cloudwatch..." |
| 12 | +mark_as_unhealthy() { |
| 13 | + aws autoscaling set-instance-health \ |
| 14 | + --region "$1" \ |
| 15 | + --instance-id "$2" \ |
| 16 | + --health-status Unhealthy |
| 17 | +} |
10 | 18 |
|
| 19 | +echo "sleeping for 10 seconds before terminating instance to allow agent logs to drain to cloudwatch..." |
11 | 20 | sleep 10
|
12 | 21 |
|
13 |
| -token=$(curl -X PUT -H "X-aws-ec2-metadata-token-ttl-seconds: 60" --fail --silent --show-error --location "http://169.254.169.254/latest/api/token") |
14 |
| -instance_id=$(curl -H "X-aws-ec2-metadata-token: $token" --fail --silent --show-error --location "http://169.254.169.254/latest/meta-data/instance-id") |
15 |
| -region=$(curl -H "X-aws-ec2-metadata-token: $token" --fail --silent --show-error --location "http://169.254.169.254/latest/meta-data/placement/region") |
| 22 | +token=$( |
| 23 | + curl \ |
| 24 | + --fail --silent --show-error \ |
| 25 | + -X PUT \ |
| 26 | + -H "X-aws-ec2-metadata-token-ttl-seconds: 60" \ |
| 27 | + --location "http://169.254.169.254/latest/api/token" |
| 28 | +) |
| 29 | +instance_id=$( |
| 30 | + curl \ |
| 31 | + --fail --silent --show-error \ |
| 32 | + -H "X-aws-ec2-metadata-token: $token" \ |
| 33 | + --location "http://169.254.169.254/latest/meta-data/instance-id" |
| 34 | +) |
| 35 | +region=$( |
| 36 | + curl \ |
| 37 | + --fail --silent --show-error \ |
| 38 | + -H "X-aws-ec2-metadata-token: $token" \ |
| 39 | + --location "http://169.254.169.254/latest/meta-data/placement/region" |
| 40 | +) |
16 | 41 |
|
17 | 42 | echo "requesting instance termination..."
|
18 |
| - |
19 | 43 | if [[ $BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB == "true" ]]; then
|
20 |
| - # If we're the final before the ASG's min size, the call to terminate-instance-in-autoscaling-group will fail, as AWS |
21 |
| - # won't allow the ASG to go below its min size. In this case, we need to call shutdown instead and force the issue - |
22 |
| - # the ASG will then spin up a new instance to replace the one we're shutting down, leaving all well in the world. |
23 |
| - # |
24 |
| - # We need to do this because if the call to terminate fails, the systemd unit will start up a new buildkite-agent process |
25 |
| - # on this machine, with state left over from the last agent, which is the opposite of what we want when $BUILDKITE_TERMINATE_INSTANCE_AFTER_JOB is true. |
26 |
| - terminate "$region" "$instance_id" || shutdown now |
| 44 | + # If the ASG is at the min capacity, the call to terminate-instance-in-autoscaling-group |
| 45 | + # In this case, we mark the instance as unhealthy, then the ASG will spin up a new instance |
| 46 | + # to replace it. |
| 47 | + terminate "$region" "$instance_id" || mark_as_unhealthy "$region" "$instance_id" |
27 | 48 | else
|
28 |
| - # If we're not in terminate-after-job mode, then it's fine for this to fail, as it'll be as if the instance never got shut down. |
| 49 | + # If we're not in terminate-after-job mode, then it's fine for this to fail. Systemd will restart |
| 50 | + # the agent and it'll be as if the instance never got shut down. |
29 | 51 | terminate "$region" "$instance_id"
|
30 | 52 | fi
|
0 commit comments