Skip to content

Commit 5bc22e7

Browse files
authored
test: fix race on drain e2e (cloudnative-pg#6972)
If the storage was not local to the node, it was possible for the drained pod to be recreated on a different node before we were able to verify its pending state. We now consider a good behaviour to be "pending" or already running on a different node. Closes cloudnative-pg#6971 Signed-off-by: Francesco Canovai <francesco.canovai@enterprisedb.com>
1 parent aef7914 commit 5bc22e7

File tree

1 file changed

+14
-10
lines changed

1 file changed

+14
-10
lines changed

tests/e2e/drain_node_test.go

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package e2e
1818

1919
import (
2020
"fmt"
21+
"time"
2122

2223
corev1 "k8s.io/api/core/v1"
2324
"k8s.io/apimachinery/pkg/types"
@@ -462,16 +463,20 @@ var _ = Describe("E2E Drain Node", Serial, Label(tests.LabelDisruptive, tests.La
462463
It("can drain the primary node and recover the cluster when uncordoned", func() {
463464
AssertCreateCluster(namespace, clusterName, sampleFile, env)
464465

466+
var drainedNodeName string
465467
By("waiting for the jobs to be removed", func() {
466468
// Wait for jobs to be removed
467469
timeout := 180
470+
var podList *corev1.PodList
468471
Eventually(func() (int, error) {
469-
podList, err := pods.List(env.Ctx, env.Client, namespace)
472+
var err error
473+
podList, err = pods.List(env.Ctx, env.Client, namespace)
470474
if err != nil {
471475
return 0, err
472476
}
473477
return len(podList.Items), err
474478
}, timeout).Should(BeEquivalentTo(1))
479+
drainedNodeName = podList.Items[0].Spec.NodeName
475480
})
476481

477482
// Load test data
@@ -490,16 +495,15 @@ var _ = Describe("E2E Drain Node", Serial, Label(tests.LabelDisruptive, tests.La
490495
testTimeouts[testsUtils.DrainNode],
491496
)
492497

493-
By("verifying the primary is now pending", func() {
494-
timeout := 180
495-
// Expect a failover to have happened
496-
Eventually(func() (string, error) {
498+
By("verifying the primary is now pending or somewhere else", func() {
499+
Eventually(func(g Gomega) {
497500
pod, err := pods.Get(env.Ctx, env.Client, namespace, clusterName+"-1")
498-
if err != nil {
499-
return "", err
500-
}
501-
return string(pod.Status.Phase), err
502-
}, timeout).Should(BeEquivalentTo("Pending"))
501+
g.Expect(err).ToNot(HaveOccurred())
502+
g.Expect(pod).Should(SatisfyAny(
503+
HaveField("Spec.NodeName", Not(BeEquivalentTo(drainedNodeName))),
504+
HaveField("Status.Phase", BeEquivalentTo("Pending")),
505+
))
506+
}).WithTimeout(180 * time.Second).WithPolling(PollingTime * time.Second).Should(Succeed())
503507
})
504508

505509
By("uncordoning all nodes", func() {

0 commit comments

Comments
 (0)