Skip to content

Commit 7f92c30

Browse files
Merge pull request #6 from alexander-demicev/cleanupimprove
Improve janitors cleanup logic
2 parents 5ab9bd8 + b08ce5f commit 7f92c30

File tree

6 files changed

+352
-10
lines changed

6 files changed

+352
-10
lines changed

action/action.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ func (a *action) Cleanup(ctx context.Context, input *Input) error {
4343
{Service: elb.ServiceName, Run: a.cleanLoadBalancers},
4444
{Service: ec2.ServiceName, Run: a.cleanSecurityGroups},
4545
{Service: cloudformation.ServiceName, Run: a.cleanCfStacks},
46+
{Service: ec2.ServiceName, Run: a.cleanVPCs},
4647
}
4748
inputRegions := strings.Split(input.Regions, ",")
4849

action/cleanup_asg.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ func (a *action) cleanASGs(ctx context.Context, input *CleanupScope) error {
1616
for _, asg := range page.AutoScalingGroups {
1717
var ignore, markedForDeletion bool
1818
for _, tag := range asg.Tags {
19-
if *tag.Key == input.IgnoreTag {
19+
switch *tag.Key {
20+
case input.IgnoreTag:
2021
ignore = true
21-
} else if *tag.Key == DeletionTag {
22+
case DeletionTag:
2223
markedForDeletion = true
2324
}
2425
}

action/cleanup_cf.go

Lines changed: 54 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,10 @@ func (a *action) cleanCfStacks(ctx context.Context, input *CleanupScope) error {
1616
for _, stack := range page.Stacks {
1717
var ignore, markedForDeletion bool
1818
for _, tag := range stack.Tags {
19-
if *tag.Key == input.IgnoreTag {
19+
switch *tag.Key {
20+
case input.IgnoreTag:
2021
ignore = true
21-
} else if *tag.Key == DeletionTag {
22+
case DeletionTag:
2223
markedForDeletion = true
2324
}
2425
}
@@ -29,6 +30,12 @@ func (a *action) cleanCfStacks(ctx context.Context, input *CleanupScope) error {
2930
}
3031

3132
if !markedForDeletion {
33+
if !a.canUpdateStack(aws.StringValue(stack.StackStatus)) {
34+
LogDebug("cloudformation stack %s is in state %s and cannot be updated, skipping",
35+
*stack.StackName, aws.StringValue(stack.StackStatus))
36+
continue
37+
}
38+
3239
// NOTE: only mark for future deletion if we're not running in dry-mode
3340
if a.commit {
3441
LogDebug("cloudformation stack %s does not have deletion tag, marking for future deletion and skipping cleanup", *stack.StackName)
@@ -44,6 +51,10 @@ func (a *action) cleanCfStacks(ctx context.Context, input *CleanupScope) error {
4451
cf.ResourceStatusDeleteInProgress:
4552
LogDebug("cloudformation stack %s is already deleted/deleting, skipping cleanup", *stack.StackName)
4653
continue
54+
case cf.StackStatusDeleteFailed:
55+
LogDebug("cloudformation stack %s is in DELETE_FAILED state, adding to delete list", *stack.StackName)
56+
stacksToDelete = append(stacksToDelete, stack.StackName)
57+
continue
4758
}
4859

4960
LogDebug("adding cloudformation stack %s to delete list", *stack.StackName)
@@ -102,8 +113,27 @@ func (a *action) markCfStackForFutureDeletion(ctx context.Context, stack *cf.Sta
102113
func (a *action) deleteCfStack(ctx context.Context, stackName string, client *cf.CloudFormation) error {
103114
Log("Deleting CloudFormation stack %s", stackName)
104115

105-
if _, err := client.DeleteStackWithContext(ctx, &cf.DeleteStackInput{StackName: &stackName}); err != nil {
106-
return fmt.Errorf("failed to delete cloudformation stack %s: %w", stackName, err)
116+
stacks, err := client.DescribeStacksWithContext(ctx, &cf.DescribeStacksInput{StackName: &stackName})
117+
if err != nil {
118+
return fmt.Errorf("failed to describe cloudformation stack %s: %w", stackName, err)
119+
}
120+
121+
if len(stacks.Stacks) > 0 {
122+
stackStatus := aws.StringValue(stacks.Stacks[0].StackStatus)
123+
if stackStatus == cf.StackStatusDeleteFailed {
124+
Log("Stack %s is in DELETE_FAILED state, attempting to continue deletion", stackName)
125+
126+
if _, err := client.DeleteStackWithContext(ctx, &cf.DeleteStackInput{
127+
StackName: &stackName,
128+
RetainResources: []*string{},
129+
}); err != nil {
130+
return fmt.Errorf("failed to continue deletion of cloudformation stack %s: %w", stackName, err)
131+
}
132+
} else {
133+
if _, err := client.DeleteStackWithContext(ctx, &cf.DeleteStackInput{StackName: &stackName}); err != nil {
134+
return fmt.Errorf("failed to delete cloudformation stack %s: %w", stackName, err)
135+
}
136+
}
107137
}
108138

109139
if err := client.WaitUntilStackDeleteCompleteWithContext(ctx, &cf.DescribeStacksInput{StackName: &stackName}); err != nil {
@@ -112,3 +142,23 @@ func (a *action) deleteCfStack(ctx context.Context, stackName string, client *cf
112142

113143
return nil
114144
}
145+
146+
func (a *action) canUpdateStack(stackStatus string) bool {
147+
nonUpdatableStates := []string{
148+
cf.StackStatusCreateInProgress,
149+
cf.StackStatusDeleteInProgress,
150+
cf.StackStatusDeleteFailed,
151+
cf.StackStatusDeleteComplete,
152+
cf.StackStatusUpdateInProgress,
153+
cf.StackStatusUpdateRollbackInProgress,
154+
cf.StackStatusUpdateRollbackCompleteCleanupInProgress,
155+
cf.StackStatusReviewInProgress,
156+
}
157+
158+
for _, state := range nonUpdatableStates {
159+
if stackStatus == state {
160+
return false
161+
}
162+
}
163+
return true
164+
}

action/cleanup_lb.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,9 +22,10 @@ func (a *action) cleanLoadBalancers(ctx context.Context, input *CleanupScope) er
2222
var ignore, markedForDeletion bool
2323
for _, tagDescription := range tags.TagDescriptions {
2424
for _, tag := range tagDescription.Tags {
25-
if *tag.Key == input.IgnoreTag {
25+
switch *tag.Key {
26+
case input.IgnoreTag:
2627
ignore = true
27-
} else if *tag.Key == DeletionTag {
28+
case DeletionTag:
2829
markedForDeletion = true
2930
}
3031
}

action/cleanup_sgs.go

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ package action
33
import (
44
"context"
55
"fmt"
6+
"strings"
67
"time"
78

89
"github.com/aws/aws-sdk-go/aws"
@@ -153,8 +154,59 @@ func (a *action) deleteSecurityGroupRules(ctx context.Context, sgId string, sgIn
153154
func (a *action) deleteSecurityGroup(ctx context.Context, sgId string, client *ec2.EC2) error {
154155
Log("Deleting Security Group %s", sgId)
155156

156-
if _, err := client.DeleteSecurityGroupWithContext(ctx, &ec2.DeleteSecurityGroupInput{GroupId: &sgId}); err != nil {
157-
return fmt.Errorf("failed to delete security group %s: %w", sgId, err)
157+
maxRetries := 5
158+
retryDelay := 30 * time.Second
159+
160+
for attempt := 1; attempt <= maxRetries; attempt++ {
161+
if _, err := client.DeleteSecurityGroupWithContext(ctx, &ec2.DeleteSecurityGroupInput{GroupId: &sgId}); err != nil {
162+
if attempt < maxRetries && a.isDependencyViolation(err) {
163+
LogDebug("Security group %s has dependencies, retrying in %v (attempt %d/%d)", sgId, retryDelay, attempt, maxRetries)
164+
165+
if err := a.handleSecurityGroupDependencies(ctx, sgId, client); err != nil {
166+
LogDebug("Failed to handle dependencies for security group %s: %v", sgId, err)
167+
}
168+
169+
time.Sleep(retryDelay)
170+
continue
171+
}
172+
return fmt.Errorf("failed to delete security group %s: %w", sgId, err)
173+
}
174+
break
175+
}
176+
177+
return nil
178+
}
179+
180+
func (a *action) isDependencyViolation(err error) bool {
181+
if err == nil {
182+
return false
183+
}
184+
errStr := err.Error()
185+
return strings.Contains(errStr, "DependencyViolation") ||
186+
strings.Contains(errStr, "has a dependent object")
187+
}
188+
189+
func (a *action) handleSecurityGroupDependencies(ctx context.Context, sgId string, client *ec2.EC2) error {
190+
eniResp, err := client.DescribeNetworkInterfacesWithContext(ctx, &ec2.DescribeNetworkInterfacesInput{
191+
Filters: []*ec2.Filter{
192+
{
193+
Name: aws.String("group-id"),
194+
Values: []*string{aws.String(sgId)},
195+
},
196+
},
197+
})
198+
if err != nil {
199+
return fmt.Errorf("failed to describe network interfaces for sg %s: %w", sgId, err)
200+
}
201+
202+
for _, eni := range eniResp.NetworkInterfaces {
203+
LogDebug("Security group %s is used by network interface %s (status: %s)",
204+
sgId, aws.StringValue(eni.NetworkInterfaceId), aws.StringValue(eni.Status))
205+
206+
if aws.StringValue(eni.Status) == "available" {
207+
LogDebug("Network interface %s is available but not being deleted automatically for safety",
208+
aws.StringValue(eni.NetworkInterfaceId))
209+
}
158210
}
159211

160212
return nil

0 commit comments

Comments
 (0)