@@ -202,7 +202,6 @@ func patchMachineSet(ctx context.Context, patchHelper *patch.Helper, machineSet
202
202
203
203
func (r * Reconciler ) reconcile (ctx context.Context , cluster * clusterv1.Cluster , machineSet * clusterv1.MachineSet ) (ctrl.Result , error ) {
204
204
log := ctrl .LoggerFrom (ctx )
205
- log .V (4 ).Info ("Reconcile MachineSet" )
206
205
207
206
// Reconcile and retrieve the Cluster object.
208
207
if machineSet .Labels == nil {
@@ -291,7 +290,7 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
291
290
continue
292
291
}
293
292
if conditions .IsFalse (machine , clusterv1 .MachineOwnerRemediatedCondition ) {
294
- log .Info ("Deleting unhealthy machine " )
293
+ log .Info ("Deleting machine because marked as unhealthy by the MachineHealthCheck controller " )
295
294
patch := client .MergeFrom (machine .DeepCopy ())
296
295
if err := r .Client .Delete (ctx , machine ); err != nil {
297
296
errs = append (errs , errors .Wrap (err , "failed to delete" ))
@@ -341,7 +340,6 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
341
340
342
341
// Quickly reconcile until the nodes become Ready.
343
342
if machineSet .Status .ReadyReplicas != replicas {
344
- log .V (4 ).Info ("Some nodes are not ready yet, requeuing until they are ready" )
345
343
return ctrl.Result {RequeueAfter : 15 * time .Second }, nil
346
344
}
347
345
@@ -358,10 +356,10 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
358
356
switch {
359
357
case diff < 0 :
360
358
diff *= - 1
361
- log .Info ("Too few replicas" , "need " , * (ms .Spec .Replicas ), "creating " , diff )
359
+ log .Info (fmt . Sprintf ( "MachineSet is scaling up to %d replicas by creating %d machines " , * ( ms . Spec . Replicas ), diff ), "replicas " , * (ms .Spec .Replicas ), "machineCount " , len ( machines ) )
362
360
if ms .Annotations != nil {
363
361
if _ , ok := ms .Annotations [clusterv1 .DisableMachineCreate ]; ok {
364
- log .V ( 2 ). Info ("Automatic creation of new machines disabled for machine set" )
362
+ log .Info ("Automatic creation of new machines disabled for machine set" )
365
363
return nil
366
364
}
367
365
}
@@ -371,11 +369,7 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
371
369
)
372
370
373
371
for i := 0 ; i < diff ; i ++ {
374
- log .Info (fmt .Sprintf ("Creating machine %d of %d, ( spec.replicas(%d) > currentMachineCount(%d) )" ,
375
- i + 1 , diff , * (ms .Spec .Replicas ), len (machines )))
376
-
377
372
machine := r .getNewMachine (ms )
378
- log = log .WithValues ("Machine" , klog .KObj (machine ))
379
373
380
374
// Clone and set the infrastructure and bootstrap references.
381
375
var (
@@ -400,9 +394,10 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
400
394
})
401
395
if err != nil {
402
396
conditions .MarkFalse (ms , clusterv1 .MachinesCreatedCondition , clusterv1 .BootstrapTemplateCloningFailedReason , clusterv1 .ConditionSeverityError , err .Error ())
403
- return errors .Wrapf (err , "failed to clone bootstrap configuration for MachineSet %q in namespace %q " , ms . Name , ms . Namespace )
397
+ return errors .Wrapf (err , "failed to clone bootstrap configuration from %s %s while creating a machine " , machine . Spec . Bootstrap . ConfigRef . Kind , klog . KRef ( machine . Spec . Bootstrap . ConfigRef . Namespace , machine . Spec . Bootstrap . ConfigRef . Name ) )
404
398
}
405
399
machine .Spec .Bootstrap .ConfigRef = bootstrapRef
400
+ log = log .WithValues (bootstrapRef .Kind , klog .KRef (bootstrapRef .Namespace , bootstrapRef .Name ))
406
401
}
407
402
408
403
infraRef , err = external .CreateFromTemplate (ctx , & external.CreateFromTemplateInput {
@@ -421,30 +416,31 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
421
416
})
422
417
if err != nil {
423
418
conditions .MarkFalse (ms , clusterv1 .MachinesCreatedCondition , clusterv1 .InfrastructureTemplateCloningFailedReason , clusterv1 .ConditionSeverityError , err .Error ())
424
- return errors .Wrapf (err , "failed to clone infrastructure configuration for MachineSet %q in namespace %q " , ms . Name , ms . Namespace )
419
+ return errors .Wrapf (err , "failed to clone infrastructure machine from %s %s while creating a machine " , machine . Spec . InfrastructureRef . Kind , klog . KRef ( machine . Spec . InfrastructureRef . Namespace , machine . Spec . InfrastructureRef . Name ) )
425
420
}
421
+ log = log .WithValues (infraRef .Kind , klog .KRef (infraRef .Namespace , infraRef .Name ))
426
422
machine .Spec .InfrastructureRef = * infraRef
427
423
428
424
if err := r .Client .Create (ctx , machine ); err != nil {
429
- log .Error (err , "Unable to create Machine " )
430
- r .recorder .Eventf (ms , corev1 .EventTypeWarning , "FailedCreate" , "Failed to create machine %q : %v" , machine . Name , err )
425
+ log .Error (err , "Error while creating a machine " )
426
+ r .recorder .Eventf (ms , corev1 .EventTypeWarning , "FailedCreate" , "Failed to create machine: %v" , err )
431
427
errs = append (errs , err )
432
428
conditions .MarkFalse (ms , clusterv1 .MachinesCreatedCondition , clusterv1 .MachineCreationFailedReason ,
433
429
clusterv1 .ConditionSeverityError , err .Error ())
434
430
435
431
// Try to cleanup the external objects if the Machine creation failed.
436
432
if err := r .Client .Delete (ctx , util .ObjectReferenceToUnstructured (* infraRef )); ! apierrors .IsNotFound (err ) {
437
- log .Error (err , "Failed to cleanup infrastructure configuration object after Machine creation error" )
433
+ log .Error (err , "Failed to cleanup infrastructure machine object after Machine creation error" , infraRef . Kind , klog . KRef ( infraRef . Namespace , infraRef . Name ) )
438
434
}
439
435
if bootstrapRef != nil {
440
436
if err := r .Client .Delete (ctx , util .ObjectReferenceToUnstructured (* bootstrapRef )); ! apierrors .IsNotFound (err ) {
441
- log .Error (err , "Failed to cleanup bootstrap configuration object after Machine creation error" )
437
+ log .Error (err , "Failed to cleanup bootstrap configuration object after Machine creation error" , bootstrapRef . Kind , klog . KRef ( bootstrapRef . Namespace , bootstrapRef . Name ) )
442
438
}
443
439
}
444
440
continue
445
441
}
446
442
447
- log .Info (fmt .Sprintf ("Created machine %d of %d with name %q " , i + 1 , diff , machine . Name ))
443
+ log .Info (fmt .Sprintf ("Created machine %d of %d" , i + 1 , diff ), "Machine" , klog . KObj ( machine ))
448
444
r .recorder .Eventf (ms , corev1 .EventTypeNormal , "SuccessfulCreate" , "Created machine %q" , machine .Name )
449
445
machineList = append (machineList , machine )
450
446
}
@@ -454,26 +450,29 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
454
450
}
455
451
return r .waitForMachineCreation (ctx , machineList )
456
452
case diff > 0 :
457
- log .Info ("Too many replicas" , "need " , * (ms .Spec .Replicas ), "deleting " , diff )
453
+ log .Info (fmt . Sprintf ( "MachineSet is scaling down to %d replicas by deleting %d machines " , * ( ms . Spec . Replicas ), diff ), "replicas " , * (ms .Spec .Replicas ), "machineCount " , len ( machines ), "deletePolicy" , ms . Spec . DeletePolicy )
458
454
459
455
deletePriorityFunc , err := getDeletePriorityFunc (ms )
460
456
if err != nil {
461
457
return err
462
458
}
463
- log .Info ("Found delete policy" , "delete-policy" , ms .Spec .DeletePolicy )
464
459
465
460
var errs []error
466
461
machinesToDelete := getMachinesToDeletePrioritized (machines , diff , deletePriorityFunc )
467
- for _ , machine := range machinesToDelete {
468
- log = log .WithValues ("Machine" , klog .KObj (machine ))
469
- if err := r .Client .Delete (ctx , machine ); err != nil {
470
- log .Error (err , "Unable to delete Machine" )
471
- r .recorder .Eventf (ms , corev1 .EventTypeWarning , "FailedDelete" , "Failed to delete machine %q: %v" , machine .Name , err )
472
- errs = append (errs , err )
473
- continue
462
+ for i , machine := range machinesToDelete {
463
+ log := log .WithValues ("Machine" , klog .KObj (machine ))
464
+ if machine .GetDeletionTimestamp ().IsZero () {
465
+ log .Info (fmt .Sprintf ("Deleting machine %d of %d" , i + 1 , diff ))
466
+ if err := r .Client .Delete (ctx , machine ); err != nil {
467
+ log .Error (err , "Unable to delete Machine" )
468
+ r .recorder .Eventf (ms , corev1 .EventTypeWarning , "FailedDelete" , "Failed to delete machine %q: %v" , machine .Name , err )
469
+ errs = append (errs , err )
470
+ continue
471
+ }
472
+ r .recorder .Eventf (ms , corev1 .EventTypeNormal , "SuccessfulDelete" , "Deleted machine %q" , machine .Name )
473
+ } else {
474
+ log .Info (fmt .Sprintf ("Waiting for machine %d of %d to be deleted" , i + 1 , diff ))
474
475
}
475
- log .Info ("Deleted machine" )
476
- r .recorder .Eventf (ms , corev1 .EventTypeNormal , "SuccessfulDelete" , "Deleted machine %q" , machine .Name )
477
476
}
478
477
479
478
if len (errs ) > 0 {
@@ -673,13 +672,13 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste
673
672
}
674
673
675
674
if machine .Status .NodeRef == nil {
676
- log .V (2 ).Info ("Unable to retrieve Node status, missing NodeRef" )
675
+ log .V (4 ).Info ("Waiting for the machine controller to set status. NodeRef on the Machine " )
677
676
continue
678
677
}
679
678
680
679
node , err := r .getMachineNode (ctx , cluster , machine )
681
- if err != nil {
682
- log .Error (err , "Unable to retrieve Node status" )
680
+ if err != nil && machine . GetDeletionTimestamp (). IsZero () {
681
+ log .Error (err , "Unable to retrieve Node status" , "node" , klog . KObj ( node ) )
683
682
continue
684
683
}
685
684
@@ -688,6 +687,8 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste
688
687
if noderefutil .IsNodeAvailable (node , ms .Spec .MinReadySeconds , metav1 .Now ()) {
689
688
availableReplicasCount ++
690
689
}
690
+ } else if machine .GetDeletionTimestamp ().IsZero () {
691
+ log .Info ("Waiting for the Kubernetes node on the machine to report ready state" )
691
692
}
692
693
}
693
694
@@ -702,17 +703,17 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste
702
703
ms .Status .ReadyReplicas != newStatus .ReadyReplicas ||
703
704
ms .Status .AvailableReplicas != newStatus .AvailableReplicas ||
704
705
ms .Generation != ms .Status .ObservedGeneration {
705
- // Save the generation number we acted on, otherwise we might wrongfully indicate
706
- // that we've seen a spec update when we retry.
707
- newStatus .ObservedGeneration = ms .Generation
708
- newStatus .DeepCopyInto (& ms .Status )
709
-
710
706
log .V (4 ).Info ("Updating status: " +
711
707
fmt .Sprintf ("replicas %d->%d (need %d), " , ms .Status .Replicas , newStatus .Replicas , desiredReplicas ) +
712
708
fmt .Sprintf ("fullyLabeledReplicas %d->%d, " , ms .Status .FullyLabeledReplicas , newStatus .FullyLabeledReplicas ) +
713
709
fmt .Sprintf ("readyReplicas %d->%d, " , ms .Status .ReadyReplicas , newStatus .ReadyReplicas ) +
714
710
fmt .Sprintf ("availableReplicas %d->%d, " , ms .Status .AvailableReplicas , newStatus .AvailableReplicas ) +
715
- fmt .Sprintf ("sequence No: %v->%v" , ms .Status .ObservedGeneration , newStatus .ObservedGeneration ))
711
+ fmt .Sprintf ("observedGeneration %v->%v" , ms .Status .ObservedGeneration , ms .Generation ))
712
+
713
+ // Save the generation number we acted on, otherwise we might wrongfully indicate
714
+ // that we've seen a spec update when we retry.
715
+ newStatus .ObservedGeneration = ms .Generation
716
+ newStatus .DeepCopyInto (& ms .Status )
716
717
}
717
718
switch {
718
719
// We are scaling up
@@ -728,6 +729,9 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste
728
729
// NOTE: we are checking the number of machines ready so we report resize completed only when the machines
729
730
// are actually provisioned (vs reporting completed immediately after the last machine object is created). This convention is also used by KCP.
730
731
if newStatus .ReadyReplicas == newStatus .Replicas {
732
+ if conditions .IsFalse (ms , clusterv1 .ResizedCondition ) {
733
+ log .Info ("All the replicas are ready" , "replicas" , newStatus .ReadyReplicas )
734
+ }
731
735
conditions .MarkTrue (ms , clusterv1 .ResizedCondition )
732
736
}
733
737
// This means that there was no error in generating the desired number of machine objects
0 commit comments