@@ -10,14 +10,14 @@ import (
1010 "net"
1111 "strings"
1212
13- "github.com/google/uuid"
1413 metalv1alpha1 "github.com/ironcore-dev/metal-operator/api/v1alpha1"
1514 corev1 "k8s.io/api/core/v1"
1615 apierrors "k8s.io/apimachinery/pkg/api/errors"
1716 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1817 "k8s.io/apimachinery/pkg/types"
1918 "k8s.io/client-go/tools/cache"
2019 "k8s.io/client-go/util/workqueue"
20+ "k8s.io/klog/v2"
2121 ctrl "sigs.k8s.io/controller-runtime"
2222 ctrlcache "sigs.k8s.io/controller-runtime/pkg/cache"
2323 "sigs.k8s.io/controller-runtime/pkg/client"
@@ -54,21 +54,19 @@ func NewNodeReconciler(targetClient client.Client, metalClient client.Client, no
5454func (r * NodeReconciler ) Start (ctx context.Context ) error {
5555 defer r .queue .ShutDown ()
5656
57- l := ctrl .LoggerFrom (ctx ).WithName ("node-controller" )
58-
5957 _ , err := r .informer .AddEventHandler (cache.ResourceEventHandlerFuncs {
6058 AddFunc : func (obj any ) {
6159 node , ok := obj .(* corev1.Node )
6260 if ! ok {
63- l . Error ( nil , "unexpected object type in AddFunc" , "type" , fmt . Sprintf ( " %T" , obj ) )
61+ klog . Errorf ( "unexpected object type: %T" , obj )
6462 return
6563 }
6664 r .queue .Add (client .ObjectKeyFromObject (node ))
6765 },
6866 UpdateFunc : func (oldObj , newObj any ) {
6967 node , ok := newObj .(* corev1.Node )
7068 if ! ok {
71- l . Error ( nil , "unexpected object type in UpdateFunc" , "type" , fmt . Sprintf ( " %T" , newObj ) )
69+ klog . Errorf ( "unexpected object type: %T" , newObj )
7270 return
7371 }
7472 r .queue .Add (client .ObjectKeyFromObject (node ))
@@ -79,7 +77,7 @@ func (r *NodeReconciler) Start(ctx context.Context) error {
7977 }
8078 node , ok := obj .(* corev1.Node )
8179 if ! ok {
82- l . Error ( nil , "unexpected object type in DeleteFunc" , "type" , fmt . Sprintf ( " %T" , obj ) )
80+ klog . Errorf ( "unexpected object type: %T" , obj )
8381 return
8482 }
8583 key := client .ObjectKeyFromObject (node )
@@ -99,11 +97,8 @@ func (r *NodeReconciler) Start(ctx context.Context) error {
9997 func () {
10098 defer r .queue .Done (key )
10199
102- reqL := l .WithValues ("namespace" , key .Namespace , "node-name" , key .Name , "reconcile-id" , uuid .NewString ())
103- reqCtx := ctrl .LoggerInto (ctx , reqL )
104-
105- if err = r .Reconcile (reqCtx , ctrl.Request {NamespacedName : key }); err != nil {
106- reqL .Error (err , "Failed to reconcile Node" )
100+ if err = r .Reconcile (ctx , ctrl.Request {NamespacedName : key }); err != nil {
101+ klog .Errorf ("Failed to reconcile Node %s: %v" , key , err )
107102 r .queue .AddRateLimited (key )
108103 return
109104 }
@@ -113,45 +108,32 @@ func (r *NodeReconciler) Start(ctx context.Context) error {
113108 }
114109 }()
115110 <- ctx .Done ()
116- l .Info ("Stopping Node reconciler" )
111+ klog .Info ("Stopping Node reconciler" )
117112 return nil
118113}
119114
120115func (r * NodeReconciler ) Reconcile (ctx context.Context , req ctrl.Request ) error {
121- l := ctrl .LoggerFrom (ctx )
122- l .V (2 ).Info ("Reconciling Node" )
116+ klog .V (2 ).Infof ("Reconciling Node %s" , req .NamespacedName )
123117
124118 node := & corev1.Node {}
125119 if err := r .targetClient .Get (ctx , req .NamespacedName , node ); err != nil {
126120 if client .IgnoreNotFound (err ) != nil {
127121 return err
128122 }
129- l .V (2 ).Info ("Node not found, skipping reconciliation" )
130- return nil
131- }
132-
133- if node .Spec .ProviderID == "" {
134- l .Info ("Node has empty spec.providerID, skipping reconciliation" )
123+ klog .V (2 ).Infof ("Node %s not found, skipping reconciliation" , req .NamespacedName )
135124 return nil
136125 }
137126
138- serverClaimKey , err := parseProviderID (node .Spec .ProviderID )
139- if err != nil {
140- return fmt .Errorf ("unable to parse provider ID: %w" , err )
141- }
142-
143- l = l .WithValues ("server-claim-name" , serverClaimKey .Name , "server-claim-namespace" , serverClaimKey .Namespace )
144-
145127 if ! node .DeletionTimestamp .IsZero () {
146- l . Info ( "Node is deleting, reconciling delete flow" )
147- return r .reconcileDelete (ctx , node , serverClaimKey )
128+ klog . V ( 2 ). Infof ( "Node %s is deleting, reconciling delete flow" , req . NamespacedName )
129+ return r .reconcileDelete (ctx , node )
148130 }
149131
150- if err = r .reconcilePodCIDR (ctx , node ); err != nil {
132+ if err : = r .reconcilePodCIDR (ctx , node ); err != nil {
151133 return fmt .Errorf ("unable to reconcile PodCIDR: %w" , err )
152134 }
153135
154- if err = r .reconcileMaintenance (ctx , node , serverClaimKey ); err != nil {
136+ if err : = r .reconcileMaintenance (ctx , node ); err != nil {
155137 return fmt .Errorf ("unable to reconcile maintenance: %w" , err )
156138 }
157139
@@ -180,12 +162,26 @@ func parseProviderID(providerID string) (types.NamespacedName, error) {
180162 return types.NamespacedName {Namespace : parts [0 ], Name : parts [1 ]}, nil
181163}
182164
183- func (r * NodeReconciler ) reconcileDelete (ctx context.Context , node * corev1.Node , serverClaimKey types. NamespacedName ) error {
165+ func (r * NodeReconciler ) reconcileDelete (ctx context.Context , node * corev1.Node ) error {
184166 if ! controllerutil .ContainsFinalizer (node , nodeMaintenanceFinalizer ) {
185167 return nil
186168 }
187169
188- if err := r .ensureServerMaintenanceNotExists (ctx , serverClaimKey ); err != nil {
170+ serverClaimKey , err := parseProviderID (node .Spec .ProviderID )
171+ if err != nil {
172+ klog .Errorf ("Node %s has empty/invalid spec.providerID during node deletion: %v. Skipping CR cleanup to unblock node deletion" , node .Name , err )
173+
174+ base := node .DeepCopy ()
175+ if removed := controllerutil .RemoveFinalizer (node , nodeMaintenanceFinalizer ); removed {
176+ if patchErr := r .targetClient .Patch (ctx , node , client .MergeFrom (base )); patchErr != nil {
177+ return fmt .Errorf ("unable to remove finalizer: %w" , patchErr )
178+ }
179+ }
180+
181+ return nil
182+ }
183+
184+ if err = r .ensureServerMaintenanceNotExists (ctx , serverClaimKey ); err != nil {
189185 return fmt .Errorf ("unable to cleanup ServerMaintenance: %w" , err )
190186 }
191187
@@ -200,11 +196,15 @@ func (r *NodeReconciler) reconcileDelete(ctx context.Context, node *corev1.Node,
200196}
201197
202198func (r * NodeReconciler ) ensureServerMaintenanceNotExists (ctx context.Context , key types.NamespacedName ) error {
203- maintenance := & metalv1alpha1.ServerMaintenance {
204- ObjectMeta : metav1.ObjectMeta {
205- Name : key .Name ,
206- Namespace : key .Namespace ,
207- },
199+
200+ maintenance := & metalv1alpha1.ServerMaintenance {}
201+ if err := r .metalClient .Get (ctx , key , maintenance ); err != nil {
202+ return client .IgnoreNotFound (err )
203+ }
204+
205+ if maintenance .Labels == nil || maintenance .Labels [labelKeyManagedBy ] != cloudProviderMetalName {
206+ klog .Infof ("ServerMaintenance %s exists but is not managed by CCM, skipping deletion" , key )
207+ return nil
208208 }
209209
210210 if err := r .metalClient .Delete (ctx , maintenance ); err != nil {
@@ -215,15 +215,13 @@ func (r *NodeReconciler) ensureServerMaintenanceNotExists(ctx context.Context, k
215215}
216216
217217func (r * NodeReconciler ) reconcilePodCIDR (ctx context.Context , node * corev1.Node ) error {
218- l := ctrl .LoggerFrom (ctx )
219-
220218 if PodPrefixSize <= 0 {
221219 // <= 0 disables automatic assignment of pod CIDR.
222220 return nil
223221 }
224222
225223 if node .Spec .PodCIDR != "" {
226- l . Info ("PodCIDR is already populated; patch was not done" , "PodCIDR" , node .Spec .PodCIDR )
224+ klog . InfoS ("PodCIDR is already populated; patch was not done" , "Node" , node . Name , "PodCIDR" , node .Spec .PodCIDR )
227225 return nil
228226 }
229227
@@ -248,13 +246,13 @@ func (r *NodeReconciler) reconcilePodCIDR(ctx context.Context, node *corev1.Node
248246 return fmt .Errorf ("failed to patch Node's PodCIDR with error %w" , err )
249247 }
250248
251- l . Info ("Patched Node's PodCIDR and PodCIDRs" , "PodCIDR" , podCIDR )
249+ klog . InfoS ("Patched Node's PodCIDR and PodCIDRs" , "Node" , node . Name , "PodCIDR" , podCIDR )
252250
253251 return nil
254252 }
255253 }
256254
257- l .Info ("Node does not have a NodeInternalIP, not setting podCIDR" )
255+ klog .Info ("Node does not have a NodeInternalIP, not setting podCIDR" )
258256 return nil
259257}
260258
@@ -268,20 +266,23 @@ func zeroHostBits(ip net.IP, maskSize int) net.IP {
268266 return ip .Mask (mask )
269267}
270268
271- func (r * NodeReconciler ) reconcileMaintenance (ctx context.Context , node * corev1.Node , serverClaimKey types.NamespacedName ) error {
272- l := ctrl .LoggerFrom (ctx )
269+ func (r * NodeReconciler ) reconcileMaintenance (ctx context.Context , node * corev1.Node ) error {
270+ serverClaimKey , err := parseProviderID (node .Spec .ProviderID )
271+ if err != nil {
272+ return fmt .Errorf ("unable to parse provider ID: %w" , err )
273+ }
273274
274275 serverClaim := & metalv1alpha1.ServerClaim {}
275- if err : = r .metalClient .Get (ctx , serverClaimKey , serverClaim ); err != nil {
276+ if err = r .metalClient .Get (ctx , serverClaimKey , serverClaim ); err != nil {
276277 if apierrors .IsNotFound (err ) {
277- l . Info ( "ServerClaim not found, skipping reconciliation" )
278+ klog . V ( 2 ). Infof ( "ServerClaim %s not found, skipping reconciliation" , serverClaimKey )
278279 return nil
279280 }
280281 return fmt .Errorf ("unable to get ServerClaim: %w" , err )
281282 }
282283
283284 if serverClaim .Spec .ServerRef == nil {
284- l . Info ( "ServerClaim has empty ServerRef, skipping maintenance logic" )
285+ klog . V ( 2 ). Infof ( "ServerClaim %s has empty ServerRef, skipping maintenance logic" , serverClaimKey )
285286 return nil
286287 }
287288
0 commit comments