Skip to content

Commit 37341af

Browse files
authored
Merge pull request #11811 from sbueringer/pr-conditions-wait-completed
🌱 Handle "waiting for completion" in KCP, MD, MS and Machine conditions
2 parents 401521b + ee17e8f commit 37341af

File tree

8 files changed

+86
-10
lines changed

8 files changed

+86
-10
lines changed

controlplane/kubeadm/internal/controllers/status.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -865,6 +865,9 @@ func aggregateStaleMachines(machines collections.Machines) string {
865865
if strings.Contains(deletingCondition.Message, "failed to evict Pod") {
866866
delayReasons.Insert("Pod eviction errors")
867867
}
868+
if strings.Contains(deletingCondition.Message, "waiting for completion") {
869+
delayReasons.Insert("Pods not completed yet")
870+
}
868871
}
869872
}
870873
}
@@ -889,7 +892,7 @@ func aggregateStaleMachines(machines collections.Machines) string {
889892
message += "in deletion since more than 15m"
890893
if len(delayReasons) > 0 {
891894
reasonList := []string{}
892-
for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors"} {
895+
for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors", "Pods not completed yet"} {
893896
if delayReasons.Has(r) {
894897
reasonList = append(reasonList, r)
895898
}

controlplane/kubeadm/internal/controllers/status_test.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -467,7 +467,28 @@ func Test_setScalingDownCondition(t *testing.T) {
467467
Status: controlplanev1.KubeadmControlPlaneStatus{Replicas: 3},
468468
},
469469
Machines: collections.FromMachines(
470-
&clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1", DeletionTimestamp: ptr.To(metav1.Time{Time: time.Now().Add(-1 * time.Hour)})}},
470+
&clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m1", DeletionTimestamp: ptr.To(metav1.Time{Time: time.Now().Add(-1 * time.Hour)})},
471+
Status: clusterv1.MachineStatus{
472+
V1Beta2: &clusterv1.MachineV1Beta2Status{
473+
Conditions: []metav1.Condition{
474+
{
475+
Type: clusterv1.MachineDeletingV1Beta2Condition,
476+
Status: metav1.ConditionTrue,
477+
Reason: clusterv1.MachineDeletingDrainingNodeV1Beta2Reason,
478+
Message: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
479+
* Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node
480+
* Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently
481+
* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1
482+
* Pod pod-9-wait-completed: waiting for completion
483+
After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`,
484+
},
485+
},
486+
},
487+
Deletion: &clusterv1.MachineDeletionStatus{
488+
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-6 * time.Minute)},
489+
},
490+
},
491+
},
471492
&clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m2"}},
472493
&clusterv1.Machine{ObjectMeta: metav1.ObjectMeta{Name: "m3"}},
473494
),
@@ -477,7 +498,7 @@ func Test_setScalingDownCondition(t *testing.T) {
477498
Status: metav1.ConditionTrue,
478499
Reason: controlplanev1.KubeadmControlPlaneScalingDownV1Beta2Reason,
479500
Message: "Scaling down from 3 to 1 replicas is blocked because:\n" +
480-
"* Machine m1 is in deletion since more than 15m",
501+
"* Machine m1 is in deletion since more than 15m, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors, Pods not completed yet",
481502
},
482503
},
483504
{

internal/controllers/machine/machine_controller_status.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -710,6 +710,9 @@ func calculateDeletingConditionForSummary(machine *clusterv1.Machine) v1beta2con
710710
if strings.Contains(deletingCondition.Message, "failed to evict Pod") {
711711
delayReasons = append(delayReasons, "Pod eviction errors")
712712
}
713+
if strings.Contains(deletingCondition.Message, "waiting for completion") {
714+
delayReasons = append(delayReasons, "Pods not completed yet")
715+
}
713716
if len(delayReasons) > 0 {
714717
msg += fmt.Sprintf(", delay likely due to %s", strings.Join(delayReasons, ", "))
715718
}

internal/controllers/machine/machine_controller_status_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1715,6 +1715,7 @@ func TestCalculateDeletingConditionForSummary(t *testing.T) {
17151715
* Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node
17161716
* Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently
17171717
* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1
1718+
* Pod pod-9-wait-completed: waiting for completion
17181719
After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`,
17191720
},
17201721
},
@@ -1733,7 +1734,7 @@ After above Pods have been removed from the Node, the following Pods will be evi
17331734
Type: clusterv1.MachineDeletingV1Beta2Condition,
17341735
Status: metav1.ConditionTrue,
17351736
Reason: clusterv1.MachineDeletingV1Beta2Reason,
1736-
Message: "Machine deletion in progress since more than 15m, stage: DrainingNode, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors",
1737+
Message: "Machine deletion in progress since more than 15m, stage: DrainingNode, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors, Pods not completed yet",
17371738
},
17381739
},
17391740
},

internal/controllers/machinedeployment/machinedeployment_status.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -581,6 +581,9 @@ func aggregateStaleMachines(machines collections.Machines) string {
581581
if strings.Contains(deletingCondition.Message, "failed to evict Pod") {
582582
delayReasons.Insert("Pod eviction errors")
583583
}
584+
if strings.Contains(deletingCondition.Message, "waiting for completion") {
585+
delayReasons.Insert("Pods not completed yet")
586+
}
584587
}
585588
}
586589
}
@@ -605,7 +608,7 @@ func aggregateStaleMachines(machines collections.Machines) string {
605608
message += "in deletion since more than 15m"
606609
if len(delayReasons) > 0 {
607610
reasonList := []string{}
608-
for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors"} {
611+
for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors", "Pods not completed yet"} {
609612
if delayReasons.Has(r) {
610613
reasonList = append(reasonList, r)
611614
}

internal/controllers/machinedeployment/machinedeployment_status_test.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -648,15 +648,36 @@ func Test_setScalingDownCondition(t *testing.T) {
648648
},
649649
machines: []*clusterv1.Machine{
650650
fakeMachine("m1"),
651-
fakeMachine("stale-machine-1", withStaleDeletion()),
651+
fakeMachine("stale-machine-1", withStaleDeletion(), func(m *clusterv1.Machine) {
652+
m.Status = clusterv1.MachineStatus{
653+
V1Beta2: &clusterv1.MachineV1Beta2Status{
654+
Conditions: []metav1.Condition{
655+
{
656+
Type: clusterv1.MachineDeletingV1Beta2Condition,
657+
Status: metav1.ConditionTrue,
658+
Reason: clusterv1.MachineDeletingDrainingNodeV1Beta2Reason,
659+
Message: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
660+
* Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node
661+
* Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently
662+
* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1
663+
* Pod pod-9-wait-completed: waiting for completion
664+
After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`,
665+
},
666+
},
667+
},
668+
Deletion: &clusterv1.MachineDeletionStatus{
669+
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-6 * time.Minute)},
670+
},
671+
}
672+
}),
652673
},
653674
getAndAdoptMachineSetsForDeploymentSucceeded: true,
654675
expectCondition: metav1.Condition{
655676
Type: clusterv1.MachineDeploymentScalingDownV1Beta2Condition,
656677
Status: metav1.ConditionTrue,
657678
Reason: clusterv1.MachineDeploymentScalingDownV1Beta2Reason,
658679
Message: "Scaling down from 2 to 1 replicas\n" +
659-
"* Machine stale-machine-1 is in deletion since more than 15m",
680+
"* Machine stale-machine-1 is in deletion since more than 15m, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors, Pods not completed yet",
660681
},
661682
},
662683
{

internal/controllers/machineset/machineset_controller_status.go

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -457,6 +457,9 @@ func aggregateStaleMachines(machines []*clusterv1.Machine) string {
457457
if strings.Contains(deletingCondition.Message, "failed to evict Pod") {
458458
delayReasons.Insert("Pod eviction errors")
459459
}
460+
if strings.Contains(deletingCondition.Message, "waiting for completion") {
461+
delayReasons.Insert("Pods not completed yet")
462+
}
460463
}
461464
}
462465
}
@@ -481,7 +484,7 @@ func aggregateStaleMachines(machines []*clusterv1.Machine) string {
481484
message += "in deletion since more than 15m"
482485
if len(delayReasons) > 0 {
483486
reasonList := []string{}
484-
for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors"} {
487+
for _, r := range []string{"PodDisruptionBudgets", "Pods not terminating", "Pod eviction errors", "Pods not completed yet"} {
485488
if delayReasons.Has(r) {
486489
reasonList = append(reasonList, r)
487490
}

internal/controllers/machineset/machineset_controller_status_test.go

Lines changed: 23 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -420,7 +420,28 @@ func Test_setScalingDownCondition(t *testing.T) {
420420
name: "scaling down with 1 stale machine",
421421
ms: machineSet1Replica,
422422
machines: []*clusterv1.Machine{
423-
fakeMachine("stale-machine-1", withStaleDeletionTimestamp()),
423+
fakeMachine("stale-machine-1", withStaleDeletionTimestamp(), func(m *clusterv1.Machine) {
424+
m.Status = clusterv1.MachineStatus{
425+
V1Beta2: &clusterv1.MachineV1Beta2Status{
426+
Conditions: []metav1.Condition{
427+
{
428+
Type: clusterv1.MachineDeletingV1Beta2Condition,
429+
Status: metav1.ConditionTrue,
430+
Reason: clusterv1.MachineDeletingDrainingNodeV1Beta2Reason,
431+
Message: `Drain not completed yet (started at 2024-10-09T16:13:59Z):
432+
* Pods pod-2-deletionTimestamp-set-1, pod-3-to-trigger-eviction-successfully-1: deletionTimestamp set, but still not removed from the Node
433+
* Pod pod-5-to-trigger-eviction-pdb-violated-1: cannot evict pod as it would violate the pod's disruption budget. The disruption budget pod-5-pdb needs 20 healthy pods and has 20 currently
434+
* Pod pod-6-to-trigger-eviction-some-other-error: failed to evict Pod, some other error 1
435+
* Pod pod-9-wait-completed: waiting for completion
436+
After above Pods have been removed from the Node, the following Pods will be evicted: pod-7-eviction-later, pod-8-eviction-later`,
437+
},
438+
},
439+
},
440+
Deletion: &clusterv1.MachineDeletionStatus{
441+
NodeDrainStartTime: &metav1.Time{Time: time.Now().Add(-6 * time.Minute)},
442+
},
443+
}
444+
}),
424445
fakeMachine("machine-2"),
425446
},
426447
getAndAdoptMachinesForMachineSetSucceeded: true,
@@ -429,7 +450,7 @@ func Test_setScalingDownCondition(t *testing.T) {
429450
Status: metav1.ConditionTrue,
430451
Reason: clusterv1.MachineSetScalingDownV1Beta2Reason,
431452
Message: "Scaling down from 2 to 1 replicas\n" +
432-
"* Machine stale-machine-1 is in deletion since more than 15m",
453+
"* Machine stale-machine-1 is in deletion since more than 15m, delay likely due to PodDisruptionBudgets, Pods not terminating, Pod eviction errors, Pods not completed yet",
433454
},
434455
},
435456
{

0 commit comments

Comments
 (0)