Skip to content

Commit 2609f37

Browse files
authored
Remove node.machine.sapcloud.io/trigger-deletion-by-mcm annotation (#1055)
1 parent 1867051 commit 2609f37

File tree

9 files changed

+556
-219
lines changed

9 files changed

+556
-219
lines changed

machine-controller-manager

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
Subproject commit 0f18f3d7c1127c4d2daf1df4ce8310e697fb2d14

pkg/util/provider/machinecontroller/controller.go

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,9 @@ const (
4747
// MCMFinalizerName is the finalizer used to tag dependencies before deletion
4848
// of the object. This finalizer is carried over from the MCM
4949
MCMFinalizerName = "machine.sapcloud.io/machine-controller-manager"
50+
// NodeFinalizerName is the finalizer used to tag node dependencies before deletion
51+
// Added to resolve https://github.com/gardener/machine-controller-manager/issues/1051
52+
NodeFinalizerName = "node.machine.sapcloud.io/machine-controller"
5053
// MCFinalizerName is the finalizer created for the external
5154
// machine controller to differentiate it from the MCMFinalizerName
5255
// This finalizer is added only on secret-objects to avoid race between in-tree and out-of-tree controllers.
@@ -187,9 +190,9 @@ func NewController(
187190
// Machine Controller's Informers
188191
if targetCoreInformerFactory != nil {
189192
_, _ = targetCoreInformerFactory.Core().V1().Nodes().Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
190-
AddFunc: controller.addNodeToMachine,
191-
UpdateFunc: controller.updateNodeToMachine,
192-
DeleteFunc: controller.deleteNodeToMachine,
193+
AddFunc: controller.addNode,
194+
UpdateFunc: controller.updateNode,
195+
DeleteFunc: controller.deleteNode,
193196
})
194197
}
195198

pkg/util/provider/machinecontroller/machine.go

Lines changed: 3 additions & 179 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ package controller
77

88
import (
99
"context"
10-
"errors"
1110
"fmt"
1211
"maps"
1312
"slices"
@@ -17,8 +16,6 @@ import (
1716
corev1 "k8s.io/api/core/v1"
1817
apierrors "k8s.io/apimachinery/pkg/api/errors"
1918
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
20-
"k8s.io/apimachinery/pkg/labels"
21-
"k8s.io/apimachinery/pkg/selection"
2219
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
2320
"k8s.io/apimachinery/pkg/util/sets"
2421
"k8s.io/client-go/tools/cache"
@@ -302,182 +299,6 @@ func (c *controller) reconcileClusterMachineTermination(key string) error {
302299
return nil
303300
}
304301

305-
/*
306-
SECTION
307-
Machine controller - nodeToMachine
308-
*/
309-
var (
310-
errMultipleMachineMatch = errors.New("multiple machines matching node")
311-
errNoMachineMatch = errors.New("no machines matching node found")
312-
)
313-
314-
func (c *controller) addNodeToMachine(obj any) {
315-
node := obj.(*corev1.Node)
316-
if node == nil {
317-
klog.Errorf("Couldn't convert to node from object")
318-
return
319-
}
320-
321-
// If NotManagedByMCM annotation is present on node, don't process this node object
322-
if _, annotationPresent := node.ObjectMeta.Annotations[machineutils.NotManagedByMCM]; annotationPresent {
323-
return
324-
}
325-
326-
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
327-
if err != nil {
328-
klog.Errorf("Couldn't get key for object %+v: %v", obj, err)
329-
return
330-
}
331-
332-
machine, err := c.getMachineFromNode(key)
333-
if err != nil {
334-
if err == errNoMachineMatch {
335-
// errNoMachineMatch could mean that VM is still in creation hence ignoring it
336-
return
337-
}
338-
339-
klog.Errorf("Couldn't fetch machine %s, Error: %s", key, err)
340-
return
341-
}
342-
343-
isMachineCrashLooping := machine.Status.CurrentStatus.Phase == v1alpha1.MachineCrashLoopBackOff
344-
isMachineTerminating := machine.Status.CurrentStatus.Phase == v1alpha1.MachineTerminating
345-
_, _, nodeConditionsHaveChanged := nodeConditionsHaveChanged(machine.Status.Conditions, node.Status.Conditions)
346-
347-
if !isMachineCrashLooping && !isMachineTerminating && nodeConditionsHaveChanged {
348-
c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. conditions of backing node %q have changed", getNodeName(machine)))
349-
}
350-
}
351-
352-
func (c *controller) updateNodeToMachine(oldObj, newObj any) {
353-
oldNode := oldObj.(*corev1.Node)
354-
node := newObj.(*corev1.Node)
355-
if node == nil {
356-
klog.Errorf("Couldn't convert to node from object")
357-
return
358-
}
359-
360-
machine, err := c.getMachineFromNode(node.Name)
361-
if err != nil {
362-
klog.Errorf("Unable to handle update event for node %s, couldn't fetch associated machine. Error: %s", node.Name, err)
363-
return
364-
}
365-
366-
// check for the TriggerDeletionByMCM annotation on the node object
367-
// if it is present then mark the machine object for deletion
368-
if value, ok := node.Annotations[machineutils.TriggerDeletionByMCM]; ok && value == "true" {
369-
if machine.DeletionTimestamp == nil {
370-
klog.Infof("Node %s for machine %s is annotated to trigger deletion by MCM.", node.Name, machine.Name)
371-
if err := c.controlMachineClient.Machines(c.namespace).Delete(context.Background(), machine.Name, metav1.DeleteOptions{}); err != nil {
372-
klog.Errorf("Machine object %s backing the node %s could not be marked for deletion.", machine.Name, node.Name)
373-
return
374-
}
375-
klog.Infof("Machine object %s backing the node %s marked for deletion.", machine.Name, node.Name)
376-
}
377-
}
378-
379-
// to reconcile on addition/removal of essential taints in machine lifecycle, example - critical component taint
380-
if addedOrRemovedEssentialTaints(oldNode, node, machineutils.EssentialTaints) {
381-
c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. atleast one of essential taints on backing node %q has changed", getNodeName(machine)))
382-
return
383-
}
384-
385-
if inPlaceUpdateLabelsChanged(oldNode, node) {
386-
c.enqueueMachine(machine, fmt.Sprintf("handling node UPDATE event. in-place update label added or updated for node %q", getNodeName(machine)))
387-
return
388-
}
389-
390-
c.addNodeToMachine(newObj)
391-
}
392-
393-
func (c *controller) deleteNodeToMachine(obj any) {
394-
key, err := cache.DeletionHandlingMetaNamespaceKeyFunc(obj)
395-
if err != nil {
396-
klog.Errorf("Couldn't get key for object %+v: %v", obj, err)
397-
return
398-
}
399-
400-
machine, err := c.getMachineFromNode(key)
401-
if err != nil {
402-
klog.Errorf("Couldn't fetch machine %s, Error: %s", key, err)
403-
return
404-
}
405-
406-
// donot respond if machine obj is already in termination flow
407-
if machine.DeletionTimestamp == nil {
408-
c.enqueueMachine(machine, fmt.Sprintf("backing node obj %q got deleted", key))
409-
}
410-
}
411-
412-
/*
413-
SECTION
414-
NodeToMachine operations
415-
*/
416-
417-
func (c *controller) getMachineFromNode(nodeName string) (*v1alpha1.Machine, error) {
418-
var (
419-
list = []string{nodeName}
420-
selector = labels.NewSelector()
421-
req, _ = labels.NewRequirement("node", selection.Equals, list)
422-
)
423-
424-
selector = selector.Add(*req)
425-
machines, _ := c.machineLister.List(selector)
426-
427-
if len(machines) > 1 {
428-
return nil, errMultipleMachineMatch
429-
} else if len(machines) < 1 {
430-
return nil, errNoMachineMatch
431-
}
432-
433-
return machines[0], nil
434-
}
435-
436-
func inPlaceUpdateLabelsChanged(oldNode, node *corev1.Node) bool {
437-
if oldNode == nil || node == nil {
438-
return false
439-
}
440-
441-
labelKeys := []string{
442-
v1alpha1.LabelKeyNodeCandidateForUpdate,
443-
v1alpha1.LabelKeyNodeSelectedForUpdate,
444-
v1alpha1.LabelKeyNodeUpdateResult,
445-
}
446-
447-
for _, key := range labelKeys {
448-
oldVal, oldOk := oldNode.Labels[key]
449-
newVal, newOk := node.Labels[key]
450-
if (!oldOk && newOk) || (key == v1alpha1.LabelKeyNodeUpdateResult && oldVal != newVal) {
451-
return true
452-
}
453-
}
454-
455-
return false
456-
}
457-
458-
func addedOrRemovedEssentialTaints(oldNode, node *corev1.Node, taintKeys []string) bool {
459-
mapOldNodeTaintKeys := make(map[string]bool)
460-
mapNodeTaintKeys := make(map[string]bool)
461-
462-
for _, t := range oldNode.Spec.Taints {
463-
mapOldNodeTaintKeys[t.Key] = true
464-
}
465-
466-
for _, t := range node.Spec.Taints {
467-
mapNodeTaintKeys[t.Key] = true
468-
}
469-
470-
for _, tk := range taintKeys {
471-
_, oldNodeHasTaint := mapOldNodeTaintKeys[tk]
472-
_, newNodeHasTaint := mapNodeTaintKeys[tk]
473-
if oldNodeHasTaint != newNodeHasTaint {
474-
klog.V(2).Infof("Taint with key %q has been added/removed from the node %q", tk, node.Name)
475-
return true
476-
}
477-
}
478-
return false
479-
}
480-
481302
/*
482303
SECTION
483304
Machine operations - Create, Delete
@@ -820,6 +641,9 @@ func (c *controller) triggerDeletionFlow(ctx context.Context, deleteMachineReque
820641
case strings.Contains(machine.Status.LastOperation.Description, machineutils.InitiateVMDeletion):
821642
return c.deleteVM(ctx, deleteMachineRequest)
822643

644+
case strings.Contains(machine.Status.LastOperation.Description, machineutils.RemoveNodeFinalizers):
645+
return c.deleteNodeFinalizers(ctx, machine)
646+
823647
case strings.Contains(machine.Status.LastOperation.Description, machineutils.InitiateNodeDeletion):
824648
return c.deleteNodeObject(ctx, machine)
825649

pkg/util/provider/machinecontroller/machine_safety.go

Lines changed: 17 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -170,28 +170,30 @@ func (c *controller) AnnotateNodesUnmanagedByMCM(ctx context.Context) (machineut
170170
klog.Errorf("Couldn't fetch machine, Error: %s", err)
171171
} else if err == errNoMachineMatch {
172172

173-
if !node.CreationTimestamp.Time.Before(time.Now().Add(c.safetyOptions.MachineCreationTimeout.Duration * -1)) {
174-
// node creationTimestamp is NOT before now() - machineCreationTime
173+
if time.Since(node.CreationTimestamp.Time) < c.safetyOptions.MachineCreationTimeout.Duration {
174+
// node creationTimestamp is NOT before now() - machineCreationTimeout
175175
// meaning creationTimeout has not occurred since node creation
176176
// hence don't tag such nodes
177177
klog.V(3).Infof("Node %q is still too young to be tagged with NotManagedByMCM", node.Name)
178178
continue
179-
} else if _, annotationPresent := node.ObjectMeta.Annotations[machineutils.NotManagedByMCM]; annotationPresent {
180-
// annotation already exists, ignore this node
181-
continue
182-
}
183-
184-
// if no backing machine object for a node, annotate it
185-
nodeCopy := node.DeepCopy()
186-
annotations := map[string]string{
187-
machineutils.NotManagedByMCM: "1",
188179
}
189-
190-
klog.V(3).Infof("Adding NotManagedByMCM annotation to Node %q", node.Name)
191-
// err is returned only when node update fails
192-
if err := c.updateNodeWithAnnotations(ctx, nodeCopy, annotations); err != nil {
180+
// Remove MCM finalizer from orphan nodes to allow deletion
181+
if err := c.removeNodeFinalizers(ctx, node); err != nil {
182+
klog.Errorf("Failed to remove finalizer from orphan node %q: %v", node.Name, err)
193183
return machineutils.MediumRetry, err
194184
}
185+
if _, annotationPresent := node.ObjectMeta.Annotations[machineutils.NotManagedByMCM]; !annotationPresent {
186+
// if no backing machine object for a node, annotate it
187+
nodeCopy := node.DeepCopy()
188+
annotations := map[string]string{
189+
machineutils.NotManagedByMCM: "1",
190+
}
191+
klog.V(3).Infof("Adding NotManagedByMCM annotation to Node %q", node.Name)
192+
// err is returned only when node update fails
193+
if err := c.updateNodeWithAnnotations(ctx, nodeCopy, annotations); err != nil {
194+
return machineutils.MediumRetry, err
195+
}
196+
}
195197
}
196198
} else {
197199
_, hasAnnot := node.Annotations[machineutils.NotManagedByMCM]

pkg/util/provider/machinecontroller/machine_safety_test.go

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -734,6 +734,70 @@ var _ = Describe("safety_logic", func() {
734734
err: nil,
735735
},
736736
}),
737+
Entry("Annotate orphan node and remove its MCM finalizer", &data{
738+
setup: setup{
739+
node: &corev1.Node{
740+
TypeMeta: metav1.TypeMeta{
741+
APIVersion: "v1",
742+
Kind: "Node",
743+
},
744+
ObjectMeta: metav1.ObjectMeta{
745+
Name: "test-node-0",
746+
Annotations: map[string]string{
747+
"anno1": "value1",
748+
},
749+
Finalizers: []string{
750+
NodeFinalizerName,
751+
},
752+
CreationTimestamp: metav1.NewTime(metav1.Now().Add(-21 * time.Minute)),
753+
},
754+
},
755+
},
756+
action: action{},
757+
expect: expect{
758+
node0: &corev1.Node{
759+
TypeMeta: metav1.TypeMeta{
760+
APIVersion: "v1",
761+
Kind: "Node",
762+
},
763+
ObjectMeta: metav1.ObjectMeta{
764+
Name: "test-node-0",
765+
Annotations: map[string]string{
766+
"anno1": "value1",
767+
machineutils.NotManagedByMCM: "1",
768+
},
769+
// Finalizer should be removed to allow deletion
770+
Finalizers: []string{},
771+
},
772+
},
773+
node1: &corev1.Node{
774+
TypeMeta: metav1.TypeMeta{
775+
APIVersion: "v1",
776+
Kind: "Node",
777+
},
778+
ObjectMeta: metav1.ObjectMeta{
779+
Name: "test-node-1",
780+
Annotations: map[string]string{
781+
"anno1": "value1",
782+
},
783+
},
784+
},
785+
node2: &corev1.Node{
786+
TypeMeta: metav1.TypeMeta{
787+
APIVersion: "v1",
788+
Kind: "Node",
789+
},
790+
ObjectMeta: metav1.ObjectMeta{
791+
Name: "test-node-2",
792+
Annotations: map[string]string{
793+
"anno1": "value1",
794+
},
795+
},
796+
},
797+
retry: machineutils.LongRetry,
798+
err: nil,
799+
},
800+
}),
737801
Entry("Node incorrectly assigned NotManagedByMCM annotation", &data{
738802
setup: setup{
739803
node: &corev1.Node{

0 commit comments

Comments
 (0)