Skip to content

Commit 82cb5b2

Browse files
committed
OCPBUGS-33877: bump aws bootstrap destroy timeout
In CI, we are seeing that deleting the SSH rule is frequently hitting the 5-minute time limit. This increases the time limit to 15 minutes and adds logging and warning messages for observation. In testing, most cases of deleting this rule take 10 seconds, but not infrequently we can see an outlier that takes, say, 8 minutes.
1 parent 90915ce commit 82cb5b2

File tree

1 file changed

+9
-1
lines changed
  • pkg/infrastructure/aws/clusterapi

1 file changed

+9
-1
lines changed

pkg/infrastructure/aws/clusterapi/aws.go

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -233,8 +233,10 @@ func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string) err
233233
}
234234
logrus.Debug("Updated AWSCluster to remove bootstrap SSH rule")
235235

236-
timeout := 5 * time.Minute
236+
timeout := 15 * time.Minute
237237
untilTime := time.Now().Add(timeout)
238+
warnTime := time.Now().Add(5 * time.Minute)
239+
warned := false
238240
timezone, _ := untilTime.Zone()
239241
logrus.Infof("Waiting up to %v (until %v %s) for bootstrap SSH rule to be destroyed...", timeout, untilTime.Format(time.Kitchen), timezone)
240242
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
@@ -247,12 +249,18 @@ func removeSSHRule(ctx context.Context, cl k8sClient.Client, infraID string) err
247249
if err := cl.Get(ctx, key, c); err != nil {
248250
return false, err
249251
}
252+
if time.Now().After(warnTime) && !warned {
253+
logrus.Warn("Deleting bootstrap SSH rule is still progressing but taking longer than expected")
254+
warned = true
255+
}
250256
if sg, ok := c.Status.Network.SecurityGroups[capa.SecurityGroupControlPlane]; ok {
251257
for _, r := range sg.IngressRules {
252258
if r.Description == awsmanifest.BootstrapSSHDescription {
259+
logrus.Debugf("Still waiting for bootstrap SSH security rule %s to be deleted from %s...", r.Description, sg.ID)
253260
return false, nil
254261
}
255262
}
263+
logrus.Debugf("The bootstrap SSH security rule %s has been removed from %s", awsmanifest.BootstrapSSHDescription, sg.ID)
256264
return true, nil
257265
}
258266
// This shouldn't happen, but if control plane SG is not found, return an error.

0 commit comments

Comments
 (0)