Skip to content

Commit a2fc23a

Browse files
authored
Delete Clusters with Active Services during ECS Cleanup (#1799)
Co-authored-by: Akansha Agarwal <[email protected]>
1 parent dae12ee commit a2fc23a

File tree

1 file changed

+87
-59
lines changed

1 file changed

+87
-59
lines changed

tool/clean/clean_ecs/clean_ecs.go

Lines changed: 87 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,9 @@ import (
2121

2222
// Clean ECS clusters if they have been running longer than 7 days
2323

24-
var expirationTimeOneWeek = time.Now().UTC().Add(clean.KeepDurationOneWeek)
24+
var expirationTimeOneWeek = time.Now().UTC().Add(-clean.KeepDurationOneWeek)
25+
26+
const cwaIntegTestClusterPrefix = "cwagent-integ-test-cluster-"
2527

2628
func main() {
2729
ctx := context.Background()
@@ -44,8 +46,9 @@ func terminateClusters(ctx context.Context, client *ecs.Client) {
4446
ecsListClusterInput := ecs.ListClustersInput{
4547
MaxResults: aws.Int32(100),
4648
}
49+
clusterIds := make([]*string, 0)
50+
4751
for {
48-
clusterIds := make([]*string, 0)
4952
listClusterOutput, err := client.ListClusters(ctx, &ecsListClusterInput)
5053
if err != nil || listClusterOutput.ClusterArns == nil || len(listClusterOutput.ClusterArns) == 0 {
5154
break
@@ -58,85 +61,110 @@ func terminateClusters(ctx context.Context, client *ecs.Client) {
5861

5962
/* Cluster should meet all criteria to be deleted:
6063
1. Prefix should match: 'cwagent-integ-test-cluster-'
61-
2. No running services on cluster
62-
3. No running or pending tasks OR Task started more than 1 week ago
64+
2. No running or pending tasks OR Task started more than 1 week ago (ie expired)
6365
*/
6466

6567
for _, cluster := range describeClustersOutput.Clusters {
66-
if !strings.HasPrefix(*cluster.ClusterName, "cwagent-integ-test-cluster-") {
67-
continue
68-
}
69-
if cluster.ActiveServicesCount > 0 {
68+
if !strings.HasPrefix(*cluster.ClusterName, cwaIntegTestClusterPrefix) {
7069
continue
7170
}
7271
if cluster.RunningTasksCount == 0 && cluster.PendingTasksCount == 0 {
7372
clusterIds = append(clusterIds, cluster.ClusterArn)
7473
continue
7574
}
76-
describeTaskInput := ecs.DescribeTasksInput{Cluster: cluster.ClusterArn}
77-
describeTasks, err := client.DescribeTasks(ctx, &describeTaskInput)
78-
if err != nil {
79-
continue
80-
}
81-
addCluster := true
82-
for _, task := range describeTasks.Tasks {
83-
if expirationTimeOneWeek.After(*task.StartedAt) {
84-
log.Printf("Task %s launch-date %s", *task.TaskArn, *task.StartedAt)
85-
} else {
86-
addCluster = false
87-
break
88-
}
89-
}
90-
if addCluster {
75+
76+
if isClusterTasksExpired(ctx, client, cluster.ClusterArn) {
9177
clusterIds = append(clusterIds, cluster.ClusterArn)
78+
continue
9279
}
9380
}
9481

95-
// Deletion Logic
96-
for _, clusterId := range clusterIds {
97-
log.Printf("Cluster to terminate: %s", *clusterId)
98-
listContainerInstanceInput := ecs.ListContainerInstancesInput{Cluster: clusterId}
99-
listContainerInstances, err := client.ListContainerInstances(ctx, &listContainerInstanceInput)
82+
// Pagination to break loop
83+
if listClusterOutput.NextToken == nil {
84+
break
85+
}
86+
ecsListClusterInput.NextToken = listClusterOutput.NextToken
87+
}
88+
89+
// Deletion Logic
90+
for _, clusterId := range clusterIds {
91+
log.Printf("Cluster to terminate: %s", *clusterId)
92+
93+
// Delete cluster services
94+
serviceInput := ecs.ListServicesInput{Cluster: clusterId}
95+
services, err := client.ListServices(ctx, &serviceInput)
96+
if err != nil {
97+
log.Printf("Error getting services cluster %s: %v", *clusterId, err)
98+
continue
99+
}
100+
101+
for _, service := range services.ServiceArns {
102+
// Scale Down Service
103+
updateServiceInput := ecs.UpdateServiceInput{Cluster: clusterId, Service: aws.String(service), DesiredCount: aws.Int32(0)}
104+
_, err := client.UpdateService(ctx, &updateServiceInput)
100105
if err != nil {
101-
log.Printf("Error getting container instances cluster %s: %v", *clusterId, err)
102-
continue
103-
}
104-
for _, instance := range listContainerInstances.ContainerInstanceArns {
105-
deregisterContainerInstanceInput := ecs.DeregisterContainerInstanceInput{
106-
ContainerInstance: aws.String(instance),
107-
Cluster: clusterId,
108-
Force: aws.Bool(true),
109-
}
110-
_, err = client.DeregisterContainerInstance(ctx, &deregisterContainerInstanceInput)
111-
if err != nil {
112-
log.Printf("Error deregister container instances cluster %s container %v: %v", err, *clusterId, instance, err)
113-
continue
114-
}
106+
log.Printf("Error scaling down service %s in cluster %s: %v", service, *clusterId, err)
107+
log.Print("Trying service deletion anyways...")
115108
}
116-
serviceInput := ecs.ListServicesInput{Cluster: clusterId}
117-
services, err := client.ListServices(ctx, &serviceInput)
109+
110+
// Delete Service
111+
deleteServiceInput := ecs.DeleteServiceInput{Cluster: clusterId, Service: aws.String(service)}
112+
_, err = client.DeleteService(ctx, &deleteServiceInput)
118113
if err != nil {
119-
log.Printf("Error getting services cluster %s: %v", *clusterId, err)
114+
log.Printf("Error deleting service %s in cluster %s: %v", service, *clusterId, err)
120115
continue
121116
}
122-
for _, service := range services.ServiceArns {
123-
deleteServiceInput := ecs.DeleteServiceInput{Cluster: clusterId, Service: aws.String(service)}
124-
_, err := client.DeleteService(ctx, &deleteServiceInput)
125-
if err != nil {
126-
log.Printf("Error deleting service %s in cluster %s: %v", serviceInput, *clusterId, err)
127-
continue
128-
}
117+
}
118+
119+
// Delete Container Instances
120+
listContainerInstanceInput := ecs.ListContainerInstancesInput{Cluster: clusterId}
121+
listContainerInstances, err := client.ListContainerInstances(ctx, &listContainerInstanceInput)
122+
if err != nil {
123+
log.Printf("Error getting container instances cluster %s: %v", *clusterId, err)
124+
}
125+
for _, instance := range listContainerInstances.ContainerInstanceArns {
126+
deregisterContainerInstanceInput := ecs.DeregisterContainerInstanceInput{
127+
ContainerInstance: aws.String(instance),
128+
Cluster: clusterId,
129+
Force: aws.Bool(true),
129130
}
130-
terminateClusterInput := ecs.DeleteClusterInput{Cluster: clusterId}
131-
_, err = client.DeleteCluster(ctx, &terminateClusterInput)
131+
_, err = client.DeregisterContainerInstance(ctx, &deregisterContainerInstanceInput)
132132
if err != nil {
133-
log.Printf("Error terminating cluster %s: %v", *clusterId, err)
133+
log.Printf("Error deregister container instances cluster %s container %s: %v", *clusterId, instance, err)
134134
}
135135
}
136-
// Pagination to break loop
137-
if listClusterOutput.NextToken == nil {
138-
break
136+
137+
// Delete Cluster
138+
terminateClusterInput := ecs.DeleteClusterInput{Cluster: clusterId}
139+
_, err = client.DeleteCluster(ctx, &terminateClusterInput)
140+
if err != nil {
141+
log.Printf("Error terminating cluster %s: %v", *clusterId, err)
142+
}
143+
log.Printf("Cluster deleted")
144+
}
145+
}
146+
147+
func isClusterTasksExpired(ctx context.Context, client *ecs.Client, clusterArn *string) bool {
148+
listTasksInput := ecs.ListTasksInput{Cluster: clusterArn}
149+
listTasksOutput, err := client.ListTasks(ctx, &listTasksInput)
150+
if err != nil {
151+
log.Printf("Failed to listTasks for cluster %s: %v", *clusterArn, err)
152+
return false
153+
}
154+
describeTaskInput := ecs.DescribeTasksInput{
155+
Cluster: clusterArn,
156+
Tasks: listTasksOutput.TaskArns,
157+
}
158+
describeTasks, err := client.DescribeTasks(ctx, &describeTaskInput)
159+
if err != nil {
160+
log.Printf("Failed to describeTasks for cluster %s: %v", *clusterArn, err)
161+
return false
162+
}
163+
for _, task := range describeTasks.Tasks {
164+
if task.StartedAt != nil && expirationTimeOneWeek.Before(*task.StartedAt) {
165+
log.Printf("Task %s launched too recently on launch-date %s.", *task.TaskArn, *task.StartedAt)
166+
return false
139167
}
140-
ecsListClusterInput.NextToken = listClusterOutput.NextToken
141168
}
169+
return true
142170
}

0 commit comments

Comments
 (0)