Skip to content

Commit cfdf298

Browse files
committed
pendingCreateMachine safety map to prevent early termination
1 parent 52f840e commit cfdf298

File tree

2 files changed

+38
-12
lines changed

2 files changed

+38
-12
lines changed

pkg/util/provider/machinecontroller/controller.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,10 @@ type controller struct {
240240
// - lastAcquire time
241241
// it is used to limit removal of `health timed out` machines
242242
permitGiver permits.PermitGiver
243+
// pendingMachineCreationMap keeps track of machines that are currently in
244+
// creation flow, this is used to determine whether or not a machine should
245+
//be processed by the termination queue
246+
pendingMachineCreationMap sync.Map
243247

244248
// control listers
245249
secretLister corelisters.SecretLister

pkg/util/provider/machinecontroller/machine.go

Lines changed: 34 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ func (c *controller) addMachine(obj interface{}) {
4646
// On restart of the controller process, a machine that was marked for
4747
// deletion would be processed as part of an `add` event. This check
4848
// ensures that its enqueued in the correct queue.
49-
if machine.DeletionTimestamp != nil {
49+
if c.shouldMachineBeMovedToTerminatingQueue(machine) {
5050
c.enqueueMachineTermination(machine, "handling terminating machine object ADD event")
5151
} else {
5252
c.enqueueMachine(obj, "handling machine obj ADD event")
@@ -67,7 +67,7 @@ func (c *controller) updateMachine(oldObj, newObj interface{}) {
6767
return
6868
}
6969

70-
if newMachine.DeletionTimestamp != nil {
70+
if c.shouldMachineBeMovedToTerminatingQueue(newMachine) {
7171
c.enqueueMachineTermination(newMachine, "handling terminating machine object UPDATE event")
7272
} else {
7373
c.enqueueMachine(newObj, "handling machine object UPDATE event")
@@ -83,7 +83,7 @@ func (c *controller) deleteMachine(obj interface{}) {
8383
}
8484
machine, ok = tombstone.Obj.(*v1alpha1.Machine)
8585
if !ok {
86-
utilruntime.HandleError(fmt.Errorf("Tombstone contained object that is not a Machine Deployment %#v", obj))
86+
utilruntime.HandleError(fmt.Errorf("Tombstone contained object that is not a Machine %#v", obj))
8787
return
8888
}
8989
}
@@ -153,8 +153,15 @@ func (c *controller) reconcileClusterMachineKey(key string) error {
153153
return err
154154
}
155155

156-
if machine.DeletionTimestamp != nil {
156+
// Add finalizers if not present on machine object
157+
_, err = c.addMachineFinalizers(ctx, machine)
158+
if err != nil {
159+
return err
160+
}
161+
162+
if c.shouldMachineBeMovedToTerminatingQueue(machine) {
157163
klog.Errorf("Machine %q should be in machine termination queue", machine.Name)
164+
c.enqueueMachineTermination(machine, "handling terminating machine object")
158165
return nil
159166
}
160167

@@ -202,12 +209,6 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp
202209
return retry, err
203210
}
204211

205-
// Add finalizers if not present on machine object
206-
retry, err = c.addMachineFinalizers(ctx, machine)
207-
if err != nil {
208-
return retry, err
209-
}
210-
211212
if machine.Labels[v1alpha1.NodeLabelKey] != "" && machine.Status.CurrentStatus.Phase != "" {
212213
// If reference to node object exists execute the below
213214
retry, err := c.reconcileMachineHealth(ctx, machine)
@@ -237,6 +238,8 @@ func (c *controller) reconcileClusterMachine(ctx context.Context, machine *v1alp
237238
}
238239

239240
if machine.Spec.ProviderID == "" || machine.Status.CurrentStatus.Phase == "" || machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff {
241+
c.pendingMachineCreationMap.Store(machine.Name, "")
242+
240243
return c.triggerCreationFlow(
241244
ctx,
242245
&driver.CreateMachineRequest{
@@ -484,6 +487,10 @@ func addedOrRemovedEssentialTaints(oldNode, node *corev1.Node, taintKeys []strin
484487
*/
485488

486489
func (c *controller) triggerCreationFlow(ctx context.Context, createMachineRequest *driver.CreateMachineRequest) (machineutils.RetryPeriod, error) {
490+
defer func() {
491+
c.pendingMachineCreationMap.Delete(createMachineRequest.Machine.Name)
492+
}()
493+
487494
var (
488495
// Declarations
489496
nodeName, providerID string
@@ -780,11 +787,16 @@ func (c *controller) initializeMachine(ctx context.Context, machine *v1alpha1.Ma
780787

781788
func (c *controller) triggerDeletionFlow(ctx context.Context, deleteMachineRequest *driver.DeleteMachineRequest) (machineutils.RetryPeriod, error) {
782789
var (
783-
machine = deleteMachineRequest.Machine
784-
finalizers = sets.NewString(machine.Finalizers...)
790+
machine = deleteMachineRequest.Machine
791+
finalizers = sets.NewString(machine.Finalizers...)
792+
_, isMachineInCreationFlow = c.pendingMachineCreationMap.Load(machine.Name)
785793
)
786794

787795
switch {
796+
case isMachineInCreationFlow:
797+
err := fmt.Errorf("Machine %q is in creation flow. Deletion cannot proceed", machine.Name)
798+
return machineutils.MediumRetry, err
799+
788800
case !finalizers.Has(MCMFinalizerName):
789801
// If Finalizers are not present on machine
790802
err := fmt.Errorf("Machine %q is missing finalizers. Deletion cannot proceed", machine.Name)
@@ -857,3 +869,13 @@ func buildAddressStatus(addresses sets.Set[corev1.NodeAddress], nodeName string)
857869
})
858870
return res
859871
}
872+
873+
func (c *controller) shouldMachineBeMovedToTerminatingQueue(machine *v1alpha1.Machine) bool {
874+
_, isMachineInCreationFlow := c.pendingMachineCreationMap.Load(machine.Name)
875+
876+
if machine.DeletionTimestamp != nil && isMachineInCreationFlow {
877+
klog.Infof("Cannot delete machine %q, its deletionTimestamp is set but it is currently being processed by the creation flow\n", machine.Name)
878+
}
879+
880+
return !isMachineInCreationFlow && machine.DeletionTimestamp != nil
881+
}

0 commit comments

Comments
 (0)