Skip to content

Commit 7215c51

Browse files
authored
Fix Bugs in ECS Cluster Cleanup Script (#1798)
Co-authored-by: Akansha Agarwal <[email protected]>
1 parent bbf21cd commit 7215c51

File tree

1 file changed

+31
-28
lines changed

1 file changed

+31
-28
lines changed

tool/clean/clean_ecs/clean_ecs.go

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -19,38 +19,33 @@ import (
1919
"github.com/aws/amazon-cloudwatch-agent/tool/clean"
2020
)
2121

22-
// Clean ecs clusters if they have been open longer than 7 day
23-
func main() {
24-
err := cleanCluster()
25-
if err != nil {
26-
log.Fatalf("errors cleaning %v", err)
27-
}
28-
}
22+
// Clean ECS clusters if they have been running longer than 7 days
2923

30-
func cleanCluster() error {
31-
log.Print("Begin to clean ECS Clusters")
24+
var expirationTimeOneWeek = time.Now().UTC().Add(clean.KeepDurationOneWeek)
3225

33-
cxt := context.Background()
34-
defaultConfig, err := config.LoadDefaultConfig(cxt)
26+
func main() {
27+
ctx := context.Background()
28+
defaultConfig, err := config.LoadDefaultConfig(ctx)
3529
if err != nil {
36-
return err
30+
log.Fatalf("Error loading AWS config for ECS cleanup: %v", err)
3731
}
38-
ecsClient := ecs.NewFromConfig(defaultConfig)
3932

40-
terminateClusters(cxt, ecsClient)
41-
return err
33+
ecsClient := ecs.NewFromConfig(defaultConfig)
34+
terminateClusters(ctx, ecsClient)
4235
}
4336

4437
func terminateClusters(ctx context.Context, client *ecs.Client) {
4538
// you can only filter ecs by name or arn
4639
// not regex of tag name like ec2
4740
// describe cluster input max is 100
41+
42+
log.Print("Begin to clean ECS Clusters")
43+
4844
ecsListClusterInput := ecs.ListClustersInput{
4945
MaxResults: aws.Int32(100),
5046
}
5147
for {
5248
clusterIds := make([]*string, 0)
53-
expirationDateCluster := time.Now().UTC().Add(clean.KeepDurationOneWeek)
5449
listClusterOutput, err := client.ListClusters(ctx, &ecsListClusterInput)
5550
if err != nil || listClusterOutput.ClusterArns == nil || len(listClusterOutput.ClusterArns) == 0 {
5651
break
@@ -60,10 +55,20 @@ func terminateClusters(ctx context.Context, client *ecs.Client) {
6055
if err != nil || describeClustersOutput.Clusters == nil || len(describeClustersOutput.Clusters) == 0 {
6156
break
6257
}
58+
59+
/* Cluster should meet all criteria to be deleted:
60+
1. Prefix should match: 'cwagent-integ-test-cluster-'
61+
2. No running services on cluster
62+
3. No running or pending tasks OR Task started more than 1 week ago
63+
*/
64+
6365
for _, cluster := range describeClustersOutput.Clusters {
6466
if !strings.HasPrefix(*cluster.ClusterName, "cwagent-integ-test-cluster-") {
6567
continue
6668
}
69+
if cluster.ActiveServicesCount > 0 {
70+
continue
71+
}
6772
if cluster.RunningTasksCount == 0 && cluster.PendingTasksCount == 0 {
6873
clusterIds = append(clusterIds, cluster.ClusterArn)
6974
continue
@@ -75,7 +80,7 @@ func terminateClusters(ctx context.Context, client *ecs.Client) {
7580
}
7681
addCluster := true
7782
for _, task := range describeTasks.Tasks {
78-
if expirationDateCluster.After(*task.StartedAt) {
83+
if expirationTimeOneWeek.After(*task.StartedAt) {
7984
log.Printf("Task %s launch-date %s", *task.TaskArn, *task.StartedAt)
8085
} else {
8186
addCluster = false
@@ -86,17 +91,14 @@ func terminateClusters(ctx context.Context, client *ecs.Client) {
8691
clusterIds = append(clusterIds, cluster.ClusterArn)
8792
}
8893
}
89-
if len(clusterIds) == 0 {
90-
log.Printf("No clusters to terminate")
91-
return
92-
}
9394

95+
// Deletion Logic
9496
for _, clusterId := range clusterIds {
95-
log.Printf("cluster to temrinate %s", *clusterId)
97+
log.Printf("Cluster to terminate: %s", *clusterId)
9698
listContainerInstanceInput := ecs.ListContainerInstancesInput{Cluster: clusterId}
9799
listContainerInstances, err := client.ListContainerInstances(ctx, &listContainerInstanceInput)
98100
if err != nil {
99-
log.Printf("Error %v getting container instances cluster %s", err, *clusterId)
101+
log.Printf("Error getting container instances cluster %s: %v", *clusterId, err)
100102
continue
101103
}
102104
for _, instance := range listContainerInstances.ContainerInstanceArns {
@@ -107,31 +109,32 @@ func terminateClusters(ctx context.Context, client *ecs.Client) {
107109
}
108110
_, err = client.DeregisterContainerInstance(ctx, &deregisterContainerInstanceInput)
109111
if err != nil {
110-
log.Printf("Error %v deregister container instances cluster %s container %v", err, *clusterId, instance)
112+
log.Printf("Error deregister container instances cluster %s container %v: %v", err, *clusterId, instance, err)
111113
continue
112114
}
113115
}
114116
serviceInput := ecs.ListServicesInput{Cluster: clusterId}
115117
services, err := client.ListServices(ctx, &serviceInput)
116118
if err != nil {
117-
log.Printf("Error %v getting services cluster %s", err, *clusterId)
119+
log.Printf("Error getting services cluster %s: %v", *clusterId, err)
118120
continue
119121
}
120122
for _, service := range services.ServiceArns {
121123
deleteServiceInput := ecs.DeleteServiceInput{Cluster: clusterId, Service: aws.String(service)}
122124
_, err := client.DeleteService(ctx, &deleteServiceInput)
123125
if err != nil {
124-
log.Printf("Error %v deleteing service %s cluster %s", err, serviceInput, *clusterId)
126+
log.Printf("Error deleting service %s in cluster %s: %v", serviceInput, *clusterId, err)
125127
continue
126128
}
127129
}
128130
terminateClusterInput := ecs.DeleteClusterInput{Cluster: clusterId}
129131
_, err = client.DeleteCluster(ctx, &terminateClusterInput)
130132
if err != nil {
131-
log.Printf("Error %v terminating cluster %s", err, *clusterId)
133+
log.Printf("Error terminating cluster %s: %v", *clusterId, err)
132134
}
133135
}
134-
if ecsListClusterInput.NextToken == nil {
136+
// Pagination to break loop
137+
if listClusterOutput.NextToken == nil {
135138
break
136139
}
137140
ecsListClusterInput.NextToken = listClusterOutput.NextToken

0 commit comments

Comments
 (0)