@@ -64,6 +64,16 @@ type NodeDrainTimeoutSpecInput struct {
64
64
// Allows to inject a function to be run after test namespace is created.
65
65
// If not specified, this is a no-op.
66
66
PostNamespaceCreated func (managementClusterProxy framework.ClusterProxy , workloadClusterNamespace string )
67
+
68
+ // Enables additional verification for volumes blocking machine deletion.
69
+ // Requires to add appropriate resources via CreateAdditionalResources.
70
+ VerifyNodeVolumeDetach bool
71
+
72
+ // Allows to overwrite the default function used for unblocking volume detachments.
73
+ UnblockNodeVolumeDetachment func (ctx context.Context , bootstrapClusterProxy framework.ClusterProxy , cluster * clusterv1.Cluster )
74
+
75
+ // Allows to create additional resources.
76
+ CreateAdditionalResources func (ctx context.Context , clusterProxy framework.ClusterProxy , cluster * clusterv1.Cluster )
67
77
}
68
78
69
79
// NodeDrainTimeoutSpec goes through the following steps:
@@ -72,13 +82,16 @@ type NodeDrainTimeoutSpecInput struct {
72
82
// * Deploy MachineDrainRules
73
83
// * Deploy Deployment with unevictable Pods on CP & MD Nodes
74
84
// * Deploy Deployment with evictable Pods with finalizer on CP & MD Nodes
85
+ // * Deploy additional resources if defined in input
75
86
// * Trigger Node drain by scaling down the control plane to 1 and MachineDeployments to 0
76
87
// * Get draining control plane and MachineDeployment Machines
77
88
// * Verify drain of Deployments with order 1
78
89
// * Verify drain of Deployments with order 5
79
90
// * Verify skipped Pods are still there and don't have a deletionTimestamp
80
91
// * Verify Node drains for control plane and MachineDeployment Machines are blocked (only by PDBs)
81
92
// * Set NodeDrainTimeout to 1s to unblock Node drain
93
+ // * Verify machine deletion is blocked by waiting for volume detachment (only if VerifyNodeVolumeDetach is enabled)
94
+ // * Unblocks waiting for volume detachment (only if VerifyNodeVolumeDetach is enabled)
82
95
// * Verify scale down succeeded because Node drains were unblocked.
83
96
func NodeDrainTimeoutSpec (ctx context.Context , inputGetter func () NodeDrainTimeoutSpecInput ) {
84
97
var (
@@ -100,6 +113,10 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
100
113
Expect (input .E2EConfig .GetIntervals (specName , "wait-deployment-available" )).ToNot (BeNil ())
101
114
Expect (input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" )).ToNot (BeNil ())
102
115
116
+ if input .VerifyNodeVolumeDetach && input .UnblockNodeVolumeDetachment == nil {
117
+ input .UnblockNodeVolumeDetachment = unblockNodeVolumeDetachmentFunc (input .E2EConfig .GetIntervals (specName , "wait-control-plane" ), input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ))
118
+ }
119
+
103
120
// Setup a Namespace where to host objects for this spec and create a watcher for the namespace events.
104
121
namespace , cancelWatches = framework .SetupSpecNamespace (ctx , specName , input .BootstrapClusterProxy , input .ArtifactFolder , input .PostNamespaceCreated )
105
122
clusterResources = new (clusterctl.ApplyClusterTemplateAndWaitResult )
@@ -147,6 +164,9 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
147
164
Cluster : cluster ,
148
165
ModifyControlPlaneTopology : func (topology * clusterv1.ControlPlaneTopology ) {
149
166
topology .NodeDrainTimeout = & metav1.Duration {Duration : time .Duration (0 )}
167
+ if input .VerifyNodeVolumeDetach {
168
+ topology .NodeVolumeDetachTimeout = & metav1.Duration {Duration : time .Duration (0 )}
169
+ }
150
170
if topology .Metadata .Labels == nil {
151
171
topology .Metadata .Labels = map [string ]string {}
152
172
}
@@ -159,6 +179,9 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
159
179
Cluster : cluster ,
160
180
ModifyMachineDeploymentTopology : func (topology * clusterv1.MachineDeploymentTopology ) {
161
181
topology .NodeDrainTimeout = & metav1.Duration {Duration : time .Duration (0 )}
182
+ if input .VerifyNodeVolumeDetach {
183
+ topology .NodeVolumeDetachTimeout = & metav1.Duration {Duration : time .Duration (0 )}
184
+ }
162
185
if topology .Metadata .Labels == nil {
163
186
topology .Metadata .Labels = map [string ]string {}
164
187
}
@@ -174,12 +197,14 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
174
197
workloadClusterProxy := input .BootstrapClusterProxy .GetWorkloadCluster (ctx , cluster .Namespace , cluster .Name )
175
198
176
199
By ("Deploy MachineDrainRules." )
177
- Expect (input .BootstrapClusterProxy .GetClient ().Create (ctx ,
178
- generateMachineDrainRule (namespace .Name , clusterName , "drain-order-1" , 1 ))).To (Succeed ())
179
- Expect (input .BootstrapClusterProxy .GetClient ().Create (ctx ,
180
- generateMachineDrainRule (namespace .Name , clusterName , "drain-order-5" , 5 ))).To (Succeed ())
181
- Expect (input .BootstrapClusterProxy .GetClient ().Create (ctx ,
182
- generateMachineDrainRule (namespace .Name , clusterName , "drain-order-10" , 10 ))).To (Succeed ())
200
+ machineDrainRules := []* clusterv1.MachineDrainRule {
201
+ generateMachineDrainRule (namespace .Name , clusterName , "drain-order-1" , 1 ),
202
+ generateMachineDrainRule (namespace .Name , clusterName , "drain-order-5" , 5 ),
203
+ generateMachineDrainRule (namespace .Name , clusterName , "drain-order-10" , 10 ),
204
+ }
205
+ for _ , rule := range machineDrainRules {
206
+ Expect (input .BootstrapClusterProxy .GetClient ().Create (ctx , rule )).To (Succeed ())
207
+ }
183
208
184
209
By ("Deploy Deployment with unevictable Pods on control plane and MachineDeployment Nodes." )
185
210
framework .DeployUnevictablePod (ctx , framework.DeployUnevictablePodInput {
@@ -248,6 +273,10 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
248
273
}
249
274
}
250
275
276
+ if input .CreateAdditionalResources != nil {
277
+ input .CreateAdditionalResources (ctx , input .BootstrapClusterProxy , cluster )
278
+ }
279
+
251
280
By ("Trigger Node drain by scaling down the control plane to 1 and MachineDeployments to 0." )
252
281
modifyControlPlaneViaClusterAndWait (ctx , modifyControlPlaneViaClusterAndWaitInput {
253
282
ClusterProxy : input .BootstrapClusterProxy ,
@@ -432,7 +461,35 @@ func NodeDrainTimeoutSpec(ctx context.Context, inputGetter func() NodeDrainTimeo
432
461
WaitForMachineDeployments : input .E2EConfig .GetIntervals (specName , "wait-worker-nodes" ),
433
462
})
434
463
435
- By ("Verify scale down succeeded because Node drains were unblocked" )
464
+ if input .VerifyNodeVolumeDetach {
465
+ By ("Verify Node removal for control plane and MachineDeployment Machines are blocked (only by volume detachments)" )
466
+ Eventually (func (g Gomega ) {
467
+ waitingCPMachine := & clusterv1.Machine {}
468
+ g .Expect (input .BootstrapClusterProxy .GetClient ().Get (ctx , drainingCPMachineKey , waitingCPMachine )).To (Succeed ())
469
+
470
+ condition := conditions .Get (waitingCPMachine , clusterv1 .VolumeDetachSucceededCondition )
471
+ g .Expect (condition ).ToNot (BeNil ())
472
+ g .Expect (condition .Status ).To (Equal (corev1 .ConditionFalse ))
473
+ // Deletion still not be blocked because of the volume.
474
+ g .Expect (condition .Message ).To (ContainSubstring ("Waiting for node volumes to be detached" ))
475
+ }, input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" )... ).Should (Succeed ())
476
+ for _ , machineKey := range drainingMDMachineKeys {
477
+ Eventually (func (g Gomega ) {
478
+ drainedMDMachine := & clusterv1.Machine {}
479
+ g .Expect (input .BootstrapClusterProxy .GetClient ().Get (ctx , machineKey , drainedMDMachine )).To (Succeed ())
480
+
481
+ condition := conditions .Get (drainedMDMachine , clusterv1 .VolumeDetachSucceededCondition )
482
+ g .Expect (condition ).ToNot (BeNil ())
483
+ g .Expect (condition .Status ).To (Equal (corev1 .ConditionFalse )) // Deletion still not be blocked because of the volume.
484
+ g .Expect (condition .Message ).To (ContainSubstring ("Waiting for node volumes to be detached" ))
485
+ }, input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" )... ).Should (Succeed ())
486
+ }
487
+
488
+ By ("Executing input.UnblockNodeVolumeDetachment to unblock waiting for volume detachments" )
489
+ input .UnblockNodeVolumeDetachment (ctx , input .BootstrapClusterProxy , cluster )
490
+ }
491
+
492
+ By ("Verify scale down succeeded because Node drains and Volume detachments were unblocked" )
436
493
// When we scale down the KCP, controlplane machines are deleted one by one, so it requires more time
437
494
// MD Machine deletion is done in parallel and will be faster.
438
495
nodeDrainTimeoutKCPInterval := getDrainAndDeleteInterval (input .E2EConfig .GetIntervals (specName , "wait-machine-deleted" ), drainTimeout , controlPlaneReplicas )
@@ -641,3 +698,28 @@ func getDrainAndDeleteInterval(deleteInterval []interface{}, drainTimeout *metav
641
698
res := []interface {}{intervalDuration .String (), deleteInterval [1 ]}
642
699
return res
643
700
}
701
+
702
+ func unblockNodeVolumeDetachmentFunc (waitControlPlaneIntervals , waitWorkerNodeIntervals []interface {}) func (ctx context.Context , bootstrapClusterProxy framework.ClusterProxy , cluster * clusterv1.Cluster ) {
703
+ return func (ctx context.Context , bootstrapClusterProxy framework.ClusterProxy , cluster * clusterv1.Cluster ) {
704
+ By ("Set NodeVolumeDetachTimeout to 1s to unblock waiting for volume detachments" )
705
+ // Note: This also verifies that KCP & MachineDeployments are still propagating changes to NodeVolumeDetachTimeout down to
706
+ // Machines that already have a deletionTimestamp.
707
+ nodeVolumeDetachTimeout := & metav1.Duration {Duration : time .Duration (1 ) * time .Second }
708
+ modifyControlPlaneViaClusterAndWait (ctx , modifyControlPlaneViaClusterAndWaitInput {
709
+ ClusterProxy : bootstrapClusterProxy ,
710
+ Cluster : cluster ,
711
+ ModifyControlPlaneTopology : func (topology * clusterv1.ControlPlaneTopology ) {
712
+ topology .NodeVolumeDetachTimeout = nodeVolumeDetachTimeout
713
+ },
714
+ WaitForControlPlane : waitControlPlaneIntervals ,
715
+ })
716
+ modifyMachineDeploymentViaClusterAndWait (ctx , modifyMachineDeploymentViaClusterAndWaitInput {
717
+ ClusterProxy : bootstrapClusterProxy ,
718
+ Cluster : cluster ,
719
+ ModifyMachineDeploymentTopology : func (topology * clusterv1.MachineDeploymentTopology ) {
720
+ topology .NodeVolumeDetachTimeout = nodeVolumeDetachTimeout
721
+ },
722
+ WaitForMachineDeployments : waitWorkerNodeIntervals ,
723
+ })
724
+ }
725
+ }
0 commit comments