Skip to content

Commit 972c722

Browse files
Merge pull request #8584 from r4f4/aws-delete-s3-bucket
OCPBUGS-35037: aws: delete ignition bucket on bootstrap destroy
2 parents 38c6f89 + 9d17c12 commit 972c722

File tree

4 files changed

+127
-20
lines changed

4 files changed

+127
-20
lines changed

pkg/asset/manifests/aws/cluster.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ func GenerateClusterAssets(ic *installconfig.InstallConfig, clusterID *installco
151151
},
152152
},
153153
S3Bucket: &capa.S3Bucket{
154-
Name: fmt.Sprintf("openshift-bootstrap-data-%s", clusterID.InfraID),
154+
Name: GetIgnitionBucketName(clusterID.InfraID),
155155
PresignedURLDuration: &metav1.Duration{Duration: 1 * time.Hour},
156156
BestEffortDeleteObjects: ptr.To(ic.Config.AWS.BestEffortDeleteIgnition),
157157
},
@@ -265,3 +265,8 @@ func GenerateClusterAssets(ic *installconfig.InstallConfig, clusterID *installco
265265
},
266266
}, nil
267267
}
268+
269+
// GetIgnitionBucketName returns the name of the bucket for the given cluster.
270+
func GetIgnitionBucketName(infraID string) string {
271+
return fmt.Sprintf("openshift-bootstrap-data-%s", infraID)
272+
}

pkg/infrastructure/aws/clusterapi/aws.go

Lines changed: 93 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
"github.com/aws/aws-sdk-go/aws/session"
1313
"github.com/aws/aws-sdk-go/service/ec2"
1414
"github.com/aws/aws-sdk-go/service/elbv2"
15+
"github.com/aws/aws-sdk-go/service/s3"
16+
"github.com/aws/aws-sdk-go/service/s3/s3manager"
1517
"github.com/sirupsen/logrus"
1618
"k8s.io/apimachinery/pkg/util/wait"
1719
"k8s.io/utils/ptr"
@@ -30,12 +32,15 @@ var (
3032
_ clusterapi.PreProvider = (*Provider)(nil)
3133
_ clusterapi.InfraReadyProvider = (*Provider)(nil)
3234
_ clusterapi.BootstrapDestroyer = (*Provider)(nil)
35+
_ clusterapi.PostDestroyer = (*Provider)(nil)
3336

3437
errNotFound = errors.New("not found")
3538
)
3639

3740
// Provider implements AWS CAPI installation.
38-
type Provider struct{}
41+
type Provider struct {
42+
bestEffortDeleteIgnition bool
43+
}
3944

4045
// Name gives the name of the provider, AWS.
4146
func (*Provider) Name() string { return awstypes.Name }
@@ -220,25 +225,28 @@ func getHostedZoneIDForNLB(ctx context.Context, awsSession *session.Session, reg
220225

221226
// DestroyBootstrap removes aws bootstrap resources not handled
222227
// by the deletion of the bootstrap machine by the capi controllers.
223-
func (*Provider) DestroyBootstrap(ctx context.Context, in clusterapi.BootstrapDestroyInput) error {
224-
if err := removeSSHRule(ctx, in.Client, in.Metadata.InfraID); err != nil {
225-
return fmt.Errorf("failed to remove bootstrap SSH rule: %w", err)
226-
}
227-
return nil
228-
}
229-
230-
// removeSSHRule removes the SSH rule for accessing the bootstrap node
231-
// by removing the rule from the cluster spec and updating the object.
232-
func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string) error {
228+
func (p *Provider) DestroyBootstrap(ctx context.Context, in clusterapi.BootstrapDestroyInput) error {
233229
awsCluster := &capa.AWSCluster{}
234230
key := k8sClient.ObjectKey{
235-
Name: infraID,
231+
Name: in.Metadata.InfraID,
236232
Namespace: capiutils.Namespace,
237233
}
238-
if err := cl.Get(ctx, key, awsCluster); err != nil {
234+
if err := in.Client.Get(ctx, key, awsCluster); err != nil {
239235
return fmt.Errorf("failed to get AWSCluster: %w", err)
240236
}
241237

238+
// Save this value for use in the post-destroy hook since we don't have capi running anymore by that point.
239+
p.bestEffortDeleteIgnition = ptr.Deref(awsCluster.Spec.S3Bucket.BestEffortDeleteObjects, false)
240+
241+
if err := removeSSHRule(ctx, in.Client, in.Metadata.InfraID, awsCluster); err != nil {
242+
return fmt.Errorf("failed to remove bootstrap SSH rule: %w", err)
243+
}
244+
return nil
245+
}
246+
247+
// removeSSHRule removes the SSH rule for accessing the bootstrap node
248+
// by removing the rule from the cluster spec and updating the object.
249+
func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string, awsCluster *capa.AWSCluster) error {
242250
postBootstrapRules := []capa.IngressRule{}
243251
for _, rule := range awsCluster.Spec.NetworkSpec.AdditionalControlPlaneIngressRules {
244252
if strings.EqualFold(rule.Description, awsmanifest.BootstrapSSHDescription) {
@@ -254,6 +262,10 @@ func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string) err
254262
}
255263
logrus.Debug("Updated AWSCluster to remove bootstrap SSH rule")
256264

265+
key := k8sClient.ObjectKey{
266+
Name: infraID,
267+
Namespace: capiutils.Namespace,
268+
}
257269
timeout := 15 * time.Minute
258270
untilTime := time.Now().Add(timeout)
259271
warnTime := time.Now().Add(5 * time.Minute)
@@ -299,3 +311,71 @@ func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string) err
299311

300312
return nil
301313
}
314+
315+
// PostDestroy deletes the ignition bucket after capi stopped running, so it won't try to reconcile the bucket.
316+
func (p *Provider) PostDestroy(ctx context.Context, in clusterapi.PostDestroyerInput) error {
317+
region := in.Metadata.AWS.Region
318+
session, err := awsconfig.GetSessionWithOptions(
319+
awsconfig.WithRegion(region),
320+
awsconfig.WithServiceEndpoints(region, in.Metadata.AWS.ServiceEndpoints),
321+
)
322+
if err != nil {
323+
return fmt.Errorf("failed to create aws session: %w", err)
324+
}
325+
326+
bucketName := awsmanifest.GetIgnitionBucketName(in.Metadata.InfraID)
327+
if err := removeS3Bucket(ctx, session, bucketName); err != nil {
328+
if p.bestEffortDeleteIgnition {
329+
logrus.Warnf("failed to delete ignition bucket %s: %v", bucketName, err)
330+
return nil
331+
}
332+
return fmt.Errorf("failed to delete ignition bucket %s: %w", bucketName, err)
333+
}
334+
335+
return nil
336+
}
337+
338+
// removeS3Bucket deletes an s3 bucket given its name.
339+
func removeS3Bucket(ctx context.Context, session *session.Session, bucketName string) error {
340+
client := s3.New(session)
341+
342+
iter := s3manager.NewDeleteListIterator(client, &s3.ListObjectsInput{
343+
Bucket: aws.String(bucketName),
344+
})
345+
err := s3manager.NewBatchDeleteWithClient(client).Delete(ctx, iter)
346+
if err != nil && !isBucketNotFound(err) {
347+
return err
348+
}
349+
logrus.Debugf("bucket %q emptied", bucketName)
350+
351+
if _, err := client.DeleteBucketWithContext(ctx, &s3.DeleteBucketInput{Bucket: aws.String(bucketName)}); err != nil {
352+
if isBucketNotFound(err) {
353+
logrus.Debugf("bucket %q already deleted", bucketName)
354+
return nil
355+
}
356+
return err
357+
}
358+
return nil
359+
}
360+
361+
func isBucketNotFound(err interface{}) bool {
362+
switch s3Err := err.(type) {
363+
case awserr.Error:
364+
if s3Err.Code() == s3.ErrCodeNoSuchBucket {
365+
return true
366+
}
367+
origErr := s3Err.OrigErr()
368+
if origErr != nil {
369+
return isBucketNotFound(origErr)
370+
}
371+
case s3manager.Error:
372+
if s3Err.OrigErr != nil {
373+
return isBucketNotFound(s3Err.OrigErr)
374+
}
375+
case s3manager.Errors:
376+
if len(s3Err) == 1 {
377+
return isBucketNotFound(s3Err[0])
378+
}
379+
}
380+
return false
381+
}

pkg/infrastructure/clusterapi/clusterapi.go

Lines changed: 17 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -386,9 +386,9 @@ func (i *InfraProvider) DestroyBootstrap(ctx context.Context, dir string) error
386386

387387
machineDeletionTimeout := 5 * time.Minute
388388
logrus.Infof("Waiting up to %v for bootstrap machine deletion %s/%s...", machineDeletionTimeout, machineNamespace, machineName)
389-
ctx, cancel := context.WithTimeout(ctx, machineDeletionTimeout)
390-
wait.UntilWithContext(ctx, func(context.Context) {
391-
err := sys.Client().Get(ctx, client.ObjectKey{
389+
cctx, cancel := context.WithTimeout(ctx, machineDeletionTimeout)
390+
wait.UntilWithContext(cctx, func(context.Context) {
391+
err := sys.Client().Get(cctx, client.ObjectKey{
392392
Name: machineName,
393393
Namespace: machineNamespace,
394394
}, &clusterv1.Machine{})
@@ -402,14 +402,25 @@ func (i *InfraProvider) DestroyBootstrap(ctx context.Context, dir string) error
402402
}
403403
}, 2*time.Second)
404404

405-
err = ctx.Err()
405+
err = cctx.Err()
406406
if err != nil && !errors.Is(err, context.Canceled) {
407407
logrus.Warnf("Timeout deleting bootstrap machine: %s", err)
408-
} else {
409-
logrus.Infof("Finished destroying bootstrap resources")
410408
}
411409
clusterapi.System().Teardown()
412410

411+
if p, ok := i.impl.(PostDestroyer); ok {
412+
postDestroyInput := PostDestroyerInput{
413+
Metadata: *metadata,
414+
}
415+
if err := p.PostDestroy(ctx, postDestroyInput); err != nil {
416+
return fmt.Errorf("failed during post-destroy hook: %w", err)
417+
}
418+
logrus.Debugf("Finished running post-destroy hook")
419+
} else {
420+
logrus.Infof("no post-destroy requirements for the %s provider", i.impl.Name())
421+
}
422+
423+
logrus.Infof("Finished destroying bootstrap resources")
413424
return nil
414425
}
415426

pkg/infrastructure/clusterapi/types.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,3 +106,14 @@ type BootstrapDestroyInput struct {
106106
Client client.Client
107107
Metadata types.ClusterMetadata
108108
}
109+
110+
// PostDestroyer allows platform-specific behavior after bootstrap has been destroyed and
111+
// ClusterAPI has stopped running.
112+
type PostDestroyer interface {
113+
PostDestroy(ctx context.Context, in PostDestroyerInput) error
114+
}
115+
116+
// PostDestroyerInput collects args passed to the PostDestroyer hook.
117+
type PostDestroyerInput struct {
118+
Metadata types.ClusterMetadata
119+
}

0 commit comments

Comments
 (0)