1+ # Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2+ # SPDX-License-Identifier: MIT
3+ name : EKS Cluster Scaling
4+
5+ on :
6+ schedule :
7+ - cron : ' 0 9 * * 0' # Scale up: Runs every Sunday at 9:00 AM
8+ - cron : ' 0 21 * * 1' # Scale down: Runs every Monday at 9:00 PM
9+ workflow_dispatch :
10+ inputs :
11+ region :
12+ description : ' AWS Region'
13+ required : true
14+ type : string
15+ default : ' us-west-2'
16+ cluster_name :
17+ description : ' EKS Cluster Name'
18+ required : true
19+ type : string
20+ default : ' eks-performance'
21+ desired_capacity_per_nodegroup :
22+ description : ' Desired capacity for each node group'
23+ required : true
24+ type : number
25+ default : 500
26+ node_group_count :
27+ description : ' Count of node groups'
28+ type : number
29+ default : 10
30+
31+ env :
32+ AWS_REGION : ${{ inputs.region || 'us-west-2' }}
33+ CLUSTER_NAME : ${{ inputs.cluster_name || 'eks-performance' }}
34+ NODE_GROUP_COUNT : ${{ inputs.node_group_count || 10 }}
35+ DESIRED_CAPACITY_PER_NODEGROUP : ${{ inputs.desired_capacity_per_nodegroup || 500 }}
36+ TERRAFORM_AWS_ASSUME_ROLE : ${{ vars.TERRAFORM_AWS_ASSUME_ROLE }}
37+ TERRAFORM_AWS_ASSUME_ROLE_DURATION : 3600 # 1 hour duration
38+ CWA_GITHUB_TEST_REPO_NAME : " aws/amazon-cloudwatch-agent-test"
39+ CWA_GITHUB_TEST_REPO_URL : " https://github.com/aws/amazon-cloudwatch-agent-test.git"
40+ CWA_GITHUB_TEST_REPO_BRANCH : " main"
41+
42+ jobs :
43+ scale-eks-cluster :
44+ runs-on : ubuntu-latest
45+ permissions :
46+ id-token : write
47+ contents : read
48+ steps :
49+ - uses : actions/checkout@v3
50+ with :
51+ repository : ${{ env.CWA_GITHUB_TEST_REPO_NAME }}
52+ ref : ${{ env.CWA_GITHUB_TEST_REPO_BRANCH }}
53+
54+ - name : Configure AWS Credentials
55+ uses : aws-actions/configure-aws-credentials@v4
56+ with :
57+ role-to-assume : ${{ env.TERRAFORM_AWS_ASSUME_ROLE}}
58+ aws-region : ${{ inputs.region || 'us-west-2' }}
59+ role-duration-seconds : ${{ env.TERRAFORM_AWS_ASSUME_ROLE_DURATION }}
60+
61+ - name : Install kubectl
62+ uses : azure/setup-kubectl@v3
63+ with :
64+ version : ' latest'
65+
66+ - name : Update kubeconfig for EKS cluster
67+ run : |
68+ aws eks update-kubeconfig --name $CLUSTER_NAME --region $AWS_REGION
69+
70+ - name : Scale up node groups (Sunday)
71+ if : github.event.schedule == '0 9 * * 0'
72+ run : |
73+ echo "Starting scale UP operation with desired capacity: $DESIRED_CAPACITY_PER_NODEGROUP"
74+
75+ for i in $(seq 1 $NODE_GROUP_COUNT); do
76+ echo "Scaling node group: $CLUSTER_NAME-node-${i} to $DESIRED_CAPACITY_PER_NODEGROUP"
77+ aws eks update-nodegroup-config \
78+ --cluster-name $CLUSTER_NAME \
79+ --nodegroup-name $CLUSTER_NAME-node-${i} \
80+ --region $AWS_REGION \
81+ --scaling-config desiredSize=$DESIRED_CAPACITY_PER_NODEGROUP
82+
83+ echo "Waiting 1 minute before scaling next node group..."
84+ sleep 60
85+ done
86+
87+ - name : Scale down node groups (Monday)
88+ if : github.event.schedule == '0 21 * * 1'
89+ run : |
90+ echo "Starting scale DOWN operation with desired capacity: 0"
91+
92+ for i in $(seq 1 $NODE_GROUP_COUNT); do
93+ echo "Scaling node group: $CLUSTER_NAME-node-${i} to 0"
94+ aws eks update-nodegroup-config \
95+ --cluster-name $CLUSTER_NAME \
96+ --nodegroup-name $CLUSTER_NAME-node-${i} \
97+ --region $AWS_REGION \
98+ --scaling-config desiredSize=0
99+
100+ echo "Waiting 1 minute before scaling next node group..."
101+ sleep 60
102+ done
103+
104+ - name : Scale node groups (Manual)
105+ if : github.event_name == 'workflow_dispatch'
106+ run : |
107+ echo "Starting manual scaling operation with desired capacity: $DESIRED_CAPACITY_PER_NODEGROUP"
108+
109+ for i in $(seq 1 $NODE_GROUP_COUNT); do
110+ echo "Scaling node group: $CLUSTER_NAME-node-${i} to $DESIRED_CAPACITY_PER_NODEGROUP"
111+ aws eks update-nodegroup-config \
112+ --cluster-name $CLUSTER_NAME \
113+ --nodegroup-name $CLUSTER_NAME-node-${i} \
114+ --region $AWS_REGION \
115+ --scaling-config desiredSize=$DESIRED_CAPACITY_PER_NODEGROUP
116+
117+ done
118+
119+ - name : Validate total node count
120+ run : |
121+ echo "Waiting 20 minutes for scaling operations to complete..."
122+ sleep 1200
123+
124+ echo "Validating total number of nodes in the cluster..."
125+ ACTUAL_NODE_COUNT=$(kubectl get nodes --no-headers | wc -l)
126+
127+ # Determine expected count based on trigger type
128+ if [ "${{ github.event.schedule }}" = "0 21 * * 1" ]; then
129+ EXPECTED_NODE_COUNT=$(($NODE_GROUP_COUNT * 0))
130+ else
131+ EXPECTED_NODE_COUNT=$(($NODE_GROUP_COUNT * $DESIRED_CAPACITY_PER_NODEGROUP))
132+ fi
133+
134+ echo "Expected total nodes: $EXPECTED_NODE_COUNT"
135+ echo "Actual total nodes: $ACTUAL_NODE_COUNT"
136+
137+ if [ "$ACTUAL_NODE_COUNT" -eq "$EXPECTED_NODE_COUNT" ]; then
138+ echo "Validation successful! Node count matches expected value."
139+ else
140+ echo "Validation failed. Expected $EXPECTED_NODE_COUNT nodes but found $ACTUAL_NODE_COUNT nodes."
141+ exit 1
142+ fi
0 commit comments