Skip to content

Commit 4300f51

Browse files
authored
[EKS Performance Testing] Implement github action for scaling up and down eks performance cluster (#1776)
1 parent 219783d commit 4300f51

File tree

1 file changed

+142
-0
lines changed

1 file changed

+142
-0
lines changed
Lines changed: 142 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,142 @@
1+
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+
# SPDX-License-Identifier: MIT
3+
name: EKS Cluster Scaling
4+
5+
on:
6+
schedule:
7+
- cron: '0 9 * * 0' # Scale up: Runs every Sunday at 9:00 AM
8+
- cron: '0 21 * * 1' # Scale down: Runs every Monday at 9:00 PM
9+
workflow_dispatch:
10+
inputs:
11+
region:
12+
description: 'AWS Region'
13+
required: true
14+
type: string
15+
default: 'us-west-2'
16+
cluster_name:
17+
description: 'EKS Cluster Name'
18+
required: true
19+
type: string
20+
default: 'eks-performance'
21+
desired_capacity_per_nodegroup:
22+
description: 'Desired capacity for each node group'
23+
required: true
24+
type: number
25+
default: 500
26+
node_group_count:
27+
description: 'Count of node groups'
28+
type: number
29+
default: 10
30+
31+
env:
32+
AWS_REGION: ${{ inputs.region || 'us-west-2' }}
33+
CLUSTER_NAME: ${{ inputs.cluster_name || 'eks-performance' }}
34+
NODE_GROUP_COUNT: ${{ inputs.node_group_count || 10 }}
35+
DESIRED_CAPACITY_PER_NODEGROUP: ${{ inputs.desired_capacity_per_nodegroup || 500 }}
36+
TERRAFORM_AWS_ASSUME_ROLE: ${{ vars.TERRAFORM_AWS_ASSUME_ROLE }}
37+
TERRAFORM_AWS_ASSUME_ROLE_DURATION: 3600 # 1 hour duration
38+
CWA_GITHUB_TEST_REPO_NAME: "aws/amazon-cloudwatch-agent-test"
39+
CWA_GITHUB_TEST_REPO_URL: "https://github.com/aws/amazon-cloudwatch-agent-test.git"
40+
CWA_GITHUB_TEST_REPO_BRANCH: "main"
41+
42+
jobs:
43+
scale-eks-cluster:
44+
runs-on: ubuntu-latest
45+
permissions:
46+
id-token: write
47+
contents: read
48+
steps:
49+
- uses: actions/checkout@v3
50+
with:
51+
repository: ${{ env.CWA_GITHUB_TEST_REPO_NAME }}
52+
ref: ${{ env.CWA_GITHUB_TEST_REPO_BRANCH }}
53+
54+
- name: Configure AWS Credentials
55+
uses: aws-actions/configure-aws-credentials@v4
56+
with:
57+
role-to-assume: ${{ env.TERRAFORM_AWS_ASSUME_ROLE}}
58+
aws-region: ${{ inputs.region || 'us-west-2' }}
59+
role-duration-seconds: ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }}
60+
61+
- name: Install kubectl
62+
uses: azure/setup-kubectl@v3
63+
with:
64+
version: 'latest'
65+
66+
- name: Update kubeconfig for EKS cluster
67+
run: |
68+
aws eks update-kubeconfig --name $CLUSTER_NAME --region $AWS_REGION
69+
70+
- name: Scale up node groups (Sunday)
71+
if: github.event.schedule == '0 9 * * 0'
72+
run: |
73+
echo "Starting scale UP operation with desired capacity: $DESIRED_CAPACITY_PER_NODEGROUP"
74+
75+
for i in $(seq 1 $NODE_GROUP_COUNT); do
76+
echo "Scaling node group: $CLUSTER_NAME-node-${i} to $DESIRED_CAPACITY_PER_NODEGROUP"
77+
aws eks update-nodegroup-config \
78+
--cluster-name $CLUSTER_NAME \
79+
--nodegroup-name $CLUSTER_NAME-node-${i} \
80+
--region $AWS_REGION \
81+
--scaling-config desiredSize=$DESIRED_CAPACITY_PER_NODEGROUP
82+
83+
echo "Waiting 1 minute before scaling next node group..."
84+
sleep 60
85+
done
86+
87+
- name: Scale down node groups (Monday)
88+
if: github.event.schedule == '0 21 * * 1'
89+
run: |
90+
echo "Starting scale DOWN operation with desired capacity: 0"
91+
92+
for i in $(seq 1 $NODE_GROUP_COUNT); do
93+
echo "Scaling node group: $CLUSTER_NAME-node-${i} to 0"
94+
aws eks update-nodegroup-config \
95+
--cluster-name $CLUSTER_NAME \
96+
--nodegroup-name $CLUSTER_NAME-node-${i} \
97+
--region $AWS_REGION \
98+
--scaling-config desiredSize=0
99+
100+
echo "Waiting 1 minute before scaling next node group..."
101+
sleep 60
102+
done
103+
104+
- name: Scale node groups (Manual)
105+
if: github.event_name == 'workflow_dispatch'
106+
run: |
107+
echo "Starting manual scaling operation with desired capacity: $DESIRED_CAPACITY_PER_NODEGROUP"
108+
109+
for i in $(seq 1 $NODE_GROUP_COUNT); do
110+
echo "Scaling node group: $CLUSTER_NAME-node-${i} to $DESIRED_CAPACITY_PER_NODEGROUP"
111+
aws eks update-nodegroup-config \
112+
--cluster-name $CLUSTER_NAME \
113+
--nodegroup-name $CLUSTER_NAME-node-${i} \
114+
--region $AWS_REGION \
115+
--scaling-config desiredSize=$DESIRED_CAPACITY_PER_NODEGROUP
116+
117+
done
118+
119+
- name: Validate total node count
120+
run: |
121+
echo "Waiting 20 minutes for scaling operations to complete..."
122+
sleep 1200
123+
124+
echo "Validating total number of nodes in the cluster..."
125+
ACTUAL_NODE_COUNT=$(kubectl get nodes --no-headers | wc -l)
126+
127+
# Determine expected count based on trigger type
128+
if [ "${{ github.event.schedule }}" = "0 21 * * 1" ]; then
129+
EXPECTED_NODE_COUNT=$(($NODE_GROUP_COUNT * 0))
130+
else
131+
EXPECTED_NODE_COUNT=$(($NODE_GROUP_COUNT * $DESIRED_CAPACITY_PER_NODEGROUP))
132+
fi
133+
134+
echo "Expected total nodes: $EXPECTED_NODE_COUNT"
135+
echo "Actual total nodes: $ACTUAL_NODE_COUNT"
136+
137+
if [ "$ACTUAL_NODE_COUNT" -eq "$EXPECTED_NODE_COUNT" ]; then
138+
echo "Validation successful! Node count matches expected value."
139+
else
140+
echo "Validation failed. Expected $EXPECTED_NODE_COUNT nodes but found $ACTUAL_NODE_COUNT nodes."
141+
exit 1
142+
fi

0 commit comments

Comments
 (0)