Skip to content

Commit 17693a2

Browse files
committed
Fix CAPI bootstrap deletion
The tear down of local capi controller is happening right after triggering the deletion of the bootstrap machine, leaving almost no time for the machine deletion event to be handled resulting in a leftover bootstrap machine. This commit fixes the issue by checking up to 2 minutes if the bootstrap machine was deleted prior to destroy the local control plane.
1 parent e527352 commit 17693a2

File tree

1 file changed

+29
-3
lines changed

1 file changed

+29
-3
lines changed

pkg/infrastructure/clusterapi/clusterapi.go

Lines changed: 29 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package clusterapi
22

33
import (
44
"context"
5+
"errors"
56
"fmt"
67
"time"
78

@@ -323,14 +324,39 @@ func (i *InfraProvider) DestroyBootstrap(dir string) error {
323324

324325
// TODO(padillon): start system if not running
325326
if sys := clusterapi.System(); sys.State() == clusterapi.SystemStateRunning {
327+
machineName := capiutils.GenerateBoostrapMachineName(metadata.InfraID)
328+
machineNamespace := capiutils.Namespace
326329
if err := sys.Client().Delete(context.TODO(), &clusterv1.Machine{
327330
ObjectMeta: metav1.ObjectMeta{
328-
Name: capiutils.GenerateBoostrapMachineName(metadata.InfraID),
329-
Namespace: capiutils.Namespace,
331+
Name: machineName,
332+
Namespace: machineNamespace,
330333
},
331-
}); client.IgnoreNotFound(err) != nil {
334+
}); err != nil {
332335
return fmt.Errorf("failed to delete bootstrap machine: %w", err)
333336
}
337+
338+
machineDeletionTimeout := 2 * time.Minute
339+
logrus.Infof("Waiting up to %v for bootstrap machine deletion %s/%s...", machineDeletionTimeout, machineNamespace, machineName)
340+
machineContext, cancel := context.WithTimeout(context.TODO(), machineDeletionTimeout)
341+
wait.Until(func() {
342+
err := sys.Client().Get(context.TODO(), client.ObjectKey{
343+
Name: machineName,
344+
Namespace: machineNamespace,
345+
}, &clusterv1.Machine{})
346+
if err != nil {
347+
if apierrors.IsNotFound(err) {
348+
logrus.Debugf("Machine deleted: %s", machineName)
349+
cancel()
350+
} else {
351+
logrus.Debugf("Error when deleting bootstrap machine: %s", err)
352+
}
353+
}
354+
}, 2*time.Second, machineContext.Done())
355+
356+
err = machineContext.Err()
357+
if err != nil && !errors.Is(err, context.Canceled) {
358+
logrus.Infof("Timeout deleting bootstrap machine: %s", err)
359+
}
334360
}
335361
logrus.Infof("Finished destroying bootstrap resources")
336362
clusterapi.System().Teardown()

0 commit comments

Comments
 (0)