Skip to content

Commit 386ff08

Browse files
fix(aws): implement cleanup on partial creation failures
Signed-off-by: Carlos Eduardo Arango Gutierrez <eduardoa@nvidia.com>
1 parent 60d47f4 commit 386ff08

File tree

1 file changed

+59
-7
lines changed

1 file changed

+59
-7
lines changed

pkg/provider/aws/create.go

Lines changed: 59 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,8 @@ import (
2828
"github.com/aws/aws-sdk-go-v2/service/ec2/types"
2929
)
3030

31+
type cleanupFunc func() error
32+
3133
// Create creates an EC2 instance with proper Network configuration
3234
// VPC, Subnet, Internet Gateway, Route Table, Security Group
3335
// If the environment specifies a cluster configuration, it delegates to CreateCluster()
@@ -39,46 +41,96 @@ func (p *Provider) Create() error {
3941

4042
// Single-node deployment
4143
cache := new(AWS)
44+
var cleanupStack []cleanupFunc
45+
var err error
46+
47+
// Defer cleanup on failure - execute cleanup functions in reverse order
48+
defer func() {
49+
if err != nil {
50+
p.log.Warning("Creation failed, rolling back created resources...")
51+
for i := len(cleanupStack) - 1; i >= 0; i-- {
52+
if cleanupErr := cleanupStack[i](); cleanupErr != nil {
53+
p.log.Warning("Cleanup failed: %v", cleanupErr)
54+
}
55+
}
56+
}
57+
}()
4258

4359
p.updateProgressingCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Creating AWS resources") // nolint:errcheck, gosec, staticcheck
4460

45-
if err := p.createVPC(cache); err != nil {
61+
if err = p.createVPC(cache); err != nil {
4662
p.updateDegradedCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Error creating VPC") // nolint:errcheck, gosec, staticcheck
4763
return fmt.Errorf("error creating VPC: %v", err)
4864
}
65+
// Push VPC cleanup function
66+
cleanupStack = append(cleanupStack, func() error {
67+
cleanupCache := &AWS{Vpcid: cache.Vpcid}
68+
return p.deleteVPC(cleanupCache)
69+
})
4970
p.updateProgressingCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "VPC created") // nolint:errcheck, gosec, staticcheck
5071

51-
if err := p.createSubnet(cache); err != nil {
72+
if err = p.createSubnet(cache); err != nil {
5273
p.updateDegradedCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Error creating subnet") // nolint:errcheck, gosec, staticcheck
5374
return fmt.Errorf("error creating subnet: %v", err)
5475
}
76+
// Push subnet cleanup function
77+
cleanupStack = append(cleanupStack, func() error {
78+
cleanupCache := &AWS{Subnetid: cache.Subnetid}
79+
return p.deleteSubnet(cleanupCache)
80+
})
5581
p.updateProgressingCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Subnet created") // nolint:errcheck, gosec, staticcheck
5682

57-
if err := p.createInternetGateway(cache); err != nil {
83+
if err = p.createInternetGateway(cache); err != nil {
5884
p.updateDegradedCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Error creating Internet Gateway") // nolint:errcheck, gosec, staticcheck
5985
return fmt.Errorf("error creating Internet Gateway: %v", err)
6086
}
87+
// Push Internet Gateway cleanup function
88+
cleanupStack = append(cleanupStack, func() error {
89+
cleanupCache := &AWS{
90+
InternetGwid: cache.InternetGwid,
91+
Vpcid: cache.Vpcid,
92+
}
93+
return p.deleteInternetGateway(cleanupCache)
94+
})
6195
p.updateProgressingCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Internet Gateway created") // nolint:errcheck, gosec, staticcheck
6296

63-
if err := p.createRouteTable(cache); err != nil {
97+
if err = p.createRouteTable(cache); err != nil {
6498
p.updateDegradedCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Error creating route table") // nolint:errcheck, gosec, staticcheck
6599
return fmt.Errorf("error creating route table: %v", err)
66100
}
101+
// Push route table cleanup function
102+
cleanupStack = append(cleanupStack, func() error {
103+
cleanupCache := &AWS{
104+
RouteTable: cache.RouteTable,
105+
Vpcid: cache.Vpcid,
106+
}
107+
return p.deleteRouteTable(cleanupCache)
108+
})
67109
p.updateProgressingCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Route Table created") // nolint:errcheck, gosec, staticcheck
68110

69-
if err := p.createSecurityGroup(cache); err != nil {
111+
if err = p.createSecurityGroup(cache); err != nil {
70112
p.updateDegradedCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Error creating security group") // nolint:errcheck, gosec, staticcheck
71113
return fmt.Errorf("error creating security group: %v", err)
72114
}
115+
// Push security group cleanup function
116+
cleanupStack = append(cleanupStack, func() error {
117+
cleanupCache := &AWS{SecurityGroupid: cache.SecurityGroupid}
118+
return p.deleteSecurityGroups(cleanupCache)
119+
})
73120
p.updateProgressingCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Security Group created") // nolint:errcheck, gosec, staticcheck
74121

75-
if err := p.createEC2Instance(cache); err != nil {
122+
if err = p.createEC2Instance(cache); err != nil {
76123
p.updateDegradedCondition(*p.Environment.DeepCopy(), cache, "v1alpha1.Creating", "Error creating EC2 instance") // nolint:errcheck, gosec, staticcheck
77124
return fmt.Errorf("error creating EC2 instance: %v", err)
78125
}
126+
// Push EC2 instance cleanup function
127+
cleanupStack = append(cleanupStack, func() error {
128+
cleanupCache := &AWS{Instanceid: cache.Instanceid}
129+
return p.deleteEC2Instances(cleanupCache)
130+
})
79131

80132
// Save objects ID's into a cache file
81-
if err := p.updateAvailableCondition(*p.Environment, cache); err != nil {
133+
if err = p.updateAvailableCondition(*p.Environment, cache); err != nil {
82134
return fmt.Errorf("error creating cache file: %v", err)
83135
}
84136
return nil

0 commit comments

Comments
 (0)