Skip to content

Commit e3433b8

Browse files
committed
feat: custom resource to gracefully terminate agents
1 parent c614831 commit e3433b8

File tree

1 file changed

+130
-0
lines changed

1 file changed

+130
-0
lines changed

templates/aws-stack.yml

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1727,6 +1727,136 @@ Resources:
17271727
ServiceToken: !GetAtt AzRebalancingSuspenderFunction.Arn
17281728
AutoScalingGroupName: !Ref AgentAutoScaleGroup
17291729

1730+
StopBuildkiteAgentsRole:
1731+
Type: AWS::IAM::Role
1732+
Properties:
1733+
PermissionsBoundary:
1734+
!If [
1735+
SetInstanceRolePermissionsBoundaryARN,
1736+
!Ref InstanceRolePermissionsBoundaryARN,
1737+
!Ref "AWS::NoValue",
1738+
]
1739+
AssumeRolePolicyDocument:
1740+
Version: 2012-10-17
1741+
Statement:
1742+
- Effect: Allow
1743+
Principal:
1744+
Service:
1745+
- lambda.amazonaws.com
1746+
Action:
1747+
- sts:AssumeRole
1748+
ManagedPolicyArns:
1749+
- arn:aws:iam::aws:policy/service-role/AWSLambdaBasicExecutionRole
1750+
Policies:
1751+
- PolicyName: DescribeASGs
1752+
PolicyDocument:
1753+
Version: 2012-10-17
1754+
Statement:
1755+
- Effect: Allow
1756+
Action:
1757+
- "autoscaling:DescribeAutoScalingGroups"
1758+
Resource: "*"
1759+
- PolicyName: RunStopBuildkiteDocument
1760+
PolicyDocument:
1761+
Version: 2012-10-17
1762+
Statement:
1763+
- Effect: Allow
1764+
Action:
1765+
- "ssm:SendCommand"
1766+
Resource:
1767+
- !Sub arn:${AWS::Partition}:ssm:${AWS::Region}::document/AWS-RunShellScript
1768+
- PolicyName: StopBuildkiteInstances
1769+
PolicyDocument:
1770+
Version: 2012-10-17
1771+
Statement:
1772+
- Effect: Allow
1773+
Action:
1774+
- "ssm:SendCommand"
1775+
Resource:
1776+
- !Sub arn:${AWS::Partition}:ec2:${AWS::Region}:${AWS::AccountId}:instance/*
1777+
Condition:
1778+
StringEquals:
1779+
"aws:resourceTag/aws:cloudformation:logical-id": "AgentAutoScaleGroup"
1780+
1781+
StopBuildkiteAgentsFunction:
1782+
Type: AWS::Lambda::Function
1783+
Properties:
1784+
Description: "Gracefully stops all Buildkite agents in a given Auto Scaling group."
1785+
Code:
1786+
ZipFile: |
1787+
import boto3
1788+
import logging
1789+
import cfnresponse
1790+
1791+
logger = logging.getLogger()
1792+
logger.setLevel(logging.INFO)
1793+
1794+
autoscaling_client = boto3.client("autoscaling")
1795+
ssm_client = boto3.client("ssm")
1796+
1797+
def handler(event, context):
1798+
logger.info(f"Received event: {event}")
1799+
1800+
# only trigger on update upon replacement events
1801+
if event["RequestType"] == "Update":
1802+
try:
1803+
props = event["OldResourceProperties"]
1804+
autoscaling_group_name = props["AutoScalingGroupName"]
1805+
1806+
# Get all instances in the Auto Scaling group
1807+
instances = get_autoscaling_instances(autoscaling_group_name)
1808+
1809+
# Stop the Buikdite agent on each instance
1810+
if len(instances) == 0:
1811+
logger.info("No instances found in the Auto Scaling group.")
1812+
else:
1813+
logger.info(f"Stopping Buildkite agents on instances: {instances}")
1814+
stop_bk_agents(instances)
1815+
1816+
# Send success response to CloudFormation
1817+
cfnresponse.send(event, context, cfnresponse.SUCCESS, {}, "CustomResourcePhysicalID")
1818+
except Exception as e:
1819+
logger.error(f"Error: {str(e)}")
1820+
cfnresponse.send(event, context, cfnresponse.FAILED, {"Error": str(e)}, "CustomResourcePhysicalID")
1821+
else:
1822+
# For Create and Delete events, just send success response
1823+
cfnresponse.send(event, context, cfnresponse.SUCCESS, {}, "CustomResourcePhysicalID")
1824+
1825+
def get_autoscaling_instances(autoscaling_group_name):
1826+
"""Retrieve all instance IDs in the specified Auto Scaling group."""
1827+
logger.info(f"Retrieving instances in Auto Scaling group: {autoscaling_group_name}")
1828+
response = autoscaling_client.describe_auto_scaling_groups(
1829+
AutoScalingGroupNames=[autoscaling_group_name],
1830+
MaxRecords=1
1831+
)
1832+
instances = []
1833+
for instance in response["AutoScalingGroups"][0]["Instances"]:
1834+
instances.append(instance["InstanceId"])
1835+
logger.info(f"Instances in Auto Scaling group {autoscaling_group_name}: {instances}")
1836+
return instances
1837+
1838+
def stop_bk_agents(instances):
1839+
"""Gracefully terminates the Buildkite agent running on the given instances."""
1840+
logger.info(f"Running agent termination command on {instances}")
1841+
response = ssm_client.send_command(
1842+
InstanceIds=instances,
1843+
DocumentName="AWS-RunShellScript",
1844+
Parameters={
1845+
"commands": ["sudo kill -s SIGTERM $(/bin/pidof buildkite-agent)"]
1846+
}
1847+
)
1848+
logger.info(f"SSM command response: {response}")
1849+
Handler: index.handler
1850+
Role: !GetAtt StopBuildkiteAgentsRole.Arn
1851+
Runtime: "python3.12"
1852+
1853+
StopBuildkiteAgents:
1854+
Type: AWS::CloudFormation::CustomResource
1855+
Version: 1.0
1856+
Properties:
1857+
ServiceToken: !GetAtt StopBuildkiteAgentsFunction.Arn
1858+
AutoScalingGroupName: !Ref AgentAutoScaleGroup
1859+
17301860
SecurityGroup:
17311861
Type: AWS::EC2::SecurityGroup
17321862
Condition: CreateSecurityGroup

0 commit comments

Comments
 (0)