@@ -196,7 +196,6 @@ func patchMachineSet(ctx context.Context, patchHelper *patch.Helper, machineSet
196
196
197
197
func (r * Reconciler ) reconcile (ctx context.Context , cluster * clusterv1.Cluster , machineSet * clusterv1.MachineSet ) (ctrl.Result , error ) {
198
198
log := ctrl .LoggerFrom (ctx )
199
- log .V (4 ).Info ("Reconcile MachineSet" )
200
199
201
200
// Reconcile and retrieve the Cluster object.
202
201
if machineSet .Labels == nil {
@@ -285,7 +284,7 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
285
284
continue
286
285
}
287
286
if conditions .IsFalse (machine , clusterv1 .MachineOwnerRemediatedCondition ) {
288
- log .Info ("Deleting unhealthy machine " )
287
+ log .Info ("Deleting machine because marked as unhealthy by the MachineHealthCheck controller " )
289
288
patch := client .MergeFrom (machine .DeepCopy ())
290
289
if err := r .Client .Delete (ctx , machine ); err != nil {
291
290
errs = append (errs , errors .Wrap (err , "failed to delete" ))
@@ -335,7 +334,6 @@ func (r *Reconciler) reconcile(ctx context.Context, cluster *clusterv1.Cluster,
335
334
336
335
// Quickly reconcile until the nodes become Ready.
337
336
if machineSet .Status .ReadyReplicas != replicas {
338
- log .V (4 ).Info ("Some nodes are not ready yet, requeuing until they are ready" )
339
337
return ctrl.Result {RequeueAfter : 15 * time .Second }, nil
340
338
}
341
339
@@ -352,10 +350,10 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
352
350
switch {
353
351
case diff < 0 :
354
352
diff *= - 1
355
- log .Info ("Too few replicas" , "need " , * (ms .Spec .Replicas ), "creating " , diff )
353
+ log .Info (fmt . Sprintf ( "MachineSet is scaling up to %d replicas by creating %d machines " , * ( ms . Spec . Replicas ), diff ), "replicas " , * (ms .Spec .Replicas ), "machineCount " , len ( machines ) )
356
354
if ms .Annotations != nil {
357
355
if _ , ok := ms .Annotations [clusterv1 .DisableMachineCreate ]; ok {
358
- log .V ( 2 ). Info ("Automatic creation of new machines disabled for machine set" )
356
+ log .Info ("Automatic creation of new machines disabled for machine set" )
359
357
return nil
360
358
}
361
359
}
@@ -365,11 +363,7 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
365
363
)
366
364
367
365
for i := 0 ; i < diff ; i ++ {
368
- log .Info (fmt .Sprintf ("Creating machine %d of %d, ( spec.replicas(%d) > currentMachineCount(%d) )" ,
369
- i + 1 , diff , * (ms .Spec .Replicas ), len (machines )))
370
-
371
366
machine := r .getNewMachine (ms )
372
- log = log .WithValues ("Machine" , klog .KObj (machine ))
373
367
374
368
// Clone and set the infrastructure and bootstrap references.
375
369
var (
@@ -394,9 +388,10 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
394
388
})
395
389
if err != nil {
396
390
conditions .MarkFalse (ms , clusterv1 .MachinesCreatedCondition , clusterv1 .BootstrapTemplateCloningFailedReason , clusterv1 .ConditionSeverityError , err .Error ())
397
- return errors .Wrapf (err , "failed to clone bootstrap configuration for MachineSet %q in namespace %q " , ms . Name , ms . Namespace )
391
+ return errors .Wrapf (err , "failed to clone bootstrap configuration from %s %s while creating a machine " , machine . Spec . Bootstrap . ConfigRef . Kind , klog . KRef ( machine . Spec . Bootstrap . ConfigRef . Namespace , machine . Spec . Bootstrap . ConfigRef . Name ) )
398
392
}
399
393
machine .Spec .Bootstrap .ConfigRef = bootstrapRef
394
+ log = log .WithValues (bootstrapRef .Kind , klog .KRef (bootstrapRef .Namespace , bootstrapRef .Name ))
400
395
}
401
396
402
397
infraRef , err = external .CreateFromTemplate (ctx , & external.CreateFromTemplateInput {
@@ -415,30 +410,31 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
415
410
})
416
411
if err != nil {
417
412
conditions .MarkFalse (ms , clusterv1 .MachinesCreatedCondition , clusterv1 .InfrastructureTemplateCloningFailedReason , clusterv1 .ConditionSeverityError , err .Error ())
418
- return errors .Wrapf (err , "failed to clone infrastructure configuration for MachineSet %q in namespace %q " , ms . Name , ms . Namespace )
413
+ return errors .Wrapf (err , "failed to clone infrastructure machine from %s %s while creating a machine " , machine . Spec . InfrastructureRef . Kind , klog . KRef ( machine . Spec . InfrastructureRef . Namespace , machine . Spec . InfrastructureRef . Name ) )
419
414
}
415
+ log = log .WithValues (infraRef .Kind , klog .KRef (infraRef .Namespace , infraRef .Name ))
420
416
machine .Spec .InfrastructureRef = * infraRef
421
417
422
418
if err := r .Client .Create (ctx , machine ); err != nil {
423
- log .Error (err , "Unable to create Machine " )
424
- r .recorder .Eventf (ms , corev1 .EventTypeWarning , "FailedCreate" , "Failed to create machine %q : %v" , machine . Name , err )
419
+ log .Error (err , "Error while creating a machine " )
420
+ r .recorder .Eventf (ms , corev1 .EventTypeWarning , "FailedCreate" , "Failed to create machine: %v" , err )
425
421
errs = append (errs , err )
426
422
conditions .MarkFalse (ms , clusterv1 .MachinesCreatedCondition , clusterv1 .MachineCreationFailedReason ,
427
423
clusterv1 .ConditionSeverityError , err .Error ())
428
424
429
425
// Try to cleanup the external objects if the Machine creation failed.
430
426
if err := r .Client .Delete (ctx , util .ObjectReferenceToUnstructured (* infraRef )); ! apierrors .IsNotFound (err ) {
431
- log .Error (err , "Failed to cleanup infrastructure configuration object after Machine creation error" )
427
+ log .Error (err , "Failed to cleanup infrastructure machine object after Machine creation error" , infraRef . Kind , klog . KRef ( infraRef . Namespace , infraRef . Name ) )
432
428
}
433
429
if bootstrapRef != nil {
434
430
if err := r .Client .Delete (ctx , util .ObjectReferenceToUnstructured (* bootstrapRef )); ! apierrors .IsNotFound (err ) {
435
- log .Error (err , "Failed to cleanup bootstrap configuration object after Machine creation error" )
431
+ log .Error (err , "Failed to cleanup bootstrap configuration object after Machine creation error" , bootstrapRef . Kind , klog . KRef ( bootstrapRef . Namespace , bootstrapRef . Name ) )
436
432
}
437
433
}
438
434
continue
439
435
}
440
436
441
- log .Info (fmt .Sprintf ("Created machine %d of %d with name %q " , i + 1 , diff , machine . Name ))
437
+ log .Info (fmt .Sprintf ("Created machine %d of %d" , i + 1 , diff ), "Machine" , klog . KObj ( machine ))
442
438
r .recorder .Eventf (ms , corev1 .EventTypeNormal , "SuccessfulCreate" , "Created machine %q" , machine .Name )
443
439
machineList = append (machineList , machine )
444
440
}
@@ -448,26 +444,29 @@ func (r *Reconciler) syncReplicas(ctx context.Context, ms *clusterv1.MachineSet,
448
444
}
449
445
return r .waitForMachineCreation (ctx , machineList )
450
446
case diff > 0 :
451
- log .Info ("Too many replicas" , "need " , * (ms .Spec .Replicas ), "deleting " , diff )
447
+ log .Info (fmt . Sprintf ( "MachineSet is scaling down to %d replicas by deleting %d machines " , * ( ms . Spec . Replicas ), diff ), "replicas " , * (ms .Spec .Replicas ), "machineCount " , len ( machines ), "deletePolicy" , ms . Spec . DeletePolicy )
452
448
453
449
deletePriorityFunc , err := getDeletePriorityFunc (ms )
454
450
if err != nil {
455
451
return err
456
452
}
457
- log .Info ("Found delete policy" , "delete-policy" , ms .Spec .DeletePolicy )
458
453
459
454
var errs []error
460
455
machinesToDelete := getMachinesToDeletePrioritized (machines , diff , deletePriorityFunc )
461
- for _ , machine := range machinesToDelete {
462
- log = log .WithValues ("Machine" , klog .KObj (machine ))
463
- if err := r .Client .Delete (ctx , machine ); err != nil {
464
- log .Error (err , "Unable to delete Machine" )
465
- r .recorder .Eventf (ms , corev1 .EventTypeWarning , "FailedDelete" , "Failed to delete machine %q: %v" , machine .Name , err )
466
- errs = append (errs , err )
467
- continue
456
+ for i , machine := range machinesToDelete {
457
+ log := log .WithValues ("Machine" , klog .KObj (machine ))
458
+ if machine .GetDeletionTimestamp ().IsZero () {
459
+ log .Info (fmt .Sprintf ("Deleting machine %d of %d" , i + 1 , diff ))
460
+ if err := r .Client .Delete (ctx , machine ); err != nil {
461
+ log .Error (err , "Unable to delete Machine" )
462
+ r .recorder .Eventf (ms , corev1 .EventTypeWarning , "FailedDelete" , "Failed to delete machine %q: %v" , machine .Name , err )
463
+ errs = append (errs , err )
464
+ continue
465
+ }
466
+ r .recorder .Eventf (ms , corev1 .EventTypeNormal , "SuccessfulDelete" , "Deleted machine %q" , machine .Name )
467
+ } else {
468
+ log .Info (fmt .Sprintf ("Waiting for machine %d of %d to be deleted" , i + 1 , diff ))
468
469
}
469
- log .Info ("Deleted machine" )
470
- r .recorder .Eventf (ms , corev1 .EventTypeNormal , "SuccessfulDelete" , "Deleted machine %q" , machine .Name )
471
470
}
472
471
473
472
if len (errs ) > 0 {
@@ -667,13 +666,13 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste
667
666
}
668
667
669
668
if machine .Status .NodeRef == nil {
670
- log .V (2 ).Info ("Unable to retrieve Node status, missing NodeRef" )
669
+ log .V (4 ).Info ("Waiting for the machine controller to set status. NodeRef on the Machine " )
671
670
continue
672
671
}
673
672
674
673
node , err := r .getMachineNode (ctx , cluster , machine )
675
- if err != nil {
676
- log .Error (err , "Unable to retrieve Node status" )
674
+ if err != nil && machine . GetDeletionTimestamp (). IsZero () {
675
+ log .Error (err , "Unable to retrieve Node status" , "node" , klog . KObj ( node ) )
677
676
continue
678
677
}
679
678
@@ -682,6 +681,8 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste
682
681
if noderefutil .IsNodeAvailable (node , ms .Spec .MinReadySeconds , metav1 .Now ()) {
683
682
availableReplicasCount ++
684
683
}
684
+ } else if machine .GetDeletionTimestamp ().IsZero () {
685
+ log .Info ("Waiting for the Kubernetes node on the machine to report ready state" )
685
686
}
686
687
}
687
688
@@ -696,17 +697,17 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste
696
697
ms .Status .ReadyReplicas != newStatus .ReadyReplicas ||
697
698
ms .Status .AvailableReplicas != newStatus .AvailableReplicas ||
698
699
ms .Generation != ms .Status .ObservedGeneration {
699
- // Save the generation number we acted on, otherwise we might wrongfully indicate
700
- // that we've seen a spec update when we retry.
701
- newStatus .ObservedGeneration = ms .Generation
702
- newStatus .DeepCopyInto (& ms .Status )
703
-
704
700
log .V (4 ).Info ("Updating status: " +
705
701
fmt .Sprintf ("replicas %d->%d (need %d), " , ms .Status .Replicas , newStatus .Replicas , desiredReplicas ) +
706
702
fmt .Sprintf ("fullyLabeledReplicas %d->%d, " , ms .Status .FullyLabeledReplicas , newStatus .FullyLabeledReplicas ) +
707
703
fmt .Sprintf ("readyReplicas %d->%d, " , ms .Status .ReadyReplicas , newStatus .ReadyReplicas ) +
708
704
fmt .Sprintf ("availableReplicas %d->%d, " , ms .Status .AvailableReplicas , newStatus .AvailableReplicas ) +
709
- fmt .Sprintf ("sequence No: %v->%v" , ms .Status .ObservedGeneration , newStatus .ObservedGeneration ))
705
+ fmt .Sprintf ("observedGeneration %v->%v" , ms .Status .ObservedGeneration , ms .Generation ))
706
+
707
+ // Save the generation number we acted on, otherwise we might wrongfully indicate
708
+ // that we've seen a spec update when we retry.
709
+ newStatus .ObservedGeneration = ms .Generation
710
+ newStatus .DeepCopyInto (& ms .Status )
710
711
}
711
712
switch {
712
713
// We are scaling up
@@ -722,6 +723,9 @@ func (r *Reconciler) updateStatus(ctx context.Context, cluster *clusterv1.Cluste
722
723
// NOTE: we are checking the number of machines ready so we report resize completed only when the machines
723
724
// are actually provisioned (vs reporting completed immediately after the last machine object is created). This convention is also used by KCP.
724
725
if newStatus .ReadyReplicas == newStatus .Replicas {
726
+ if conditions .IsFalse (ms , clusterv1 .ResizedCondition ) {
727
+ log .Info ("All the replicas are ready" , "replicas" , newStatus .ReadyReplicas )
728
+ }
725
729
conditions .MarkTrue (ms , clusterv1 .ResizedCondition )
726
730
}
727
731
// This means that there was no error in generating the desired number of machine objects
0 commit comments