Skip to content

Commit bceec5a

Browse files
committed
e2e flake CheckDaemonStatus assert on async value
The util for checking on daemonstatus was checking once if the Status of the daemonset was reporting that all the desired Pods are scheduled and ready. However, the pattern used in the e2e test for this function was not taking into consideration that the controller needs to propagate the Pod status to the DeamonSet status, and was asserting on the condition only once after waiting for all the Pods to be ready. In order to avoid more churn code, change the CheckDaemonStatus signature to the wait.Condition type and use it in a async poll loop on the tests.
1 parent de8f6b0 commit bceec5a

File tree

5 files changed

+35
-32
lines changed

5 files changed

+35
-32
lines changed

test/e2e/apps/controller_revision.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ var _ = SIGDescribe("ControllerRevision", framework.WithSerial(), func() {
136136
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
137137
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
138138
framework.ExpectNoError(err, "error waiting for daemon pod to start")
139-
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
140-
framework.ExpectNoError(err)
139+
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
140+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
141141

142142
ginkgo.By(fmt.Sprintf("Confirm DaemonSet %q successfully created with %q label", dsName, dsLabelSelector))
143143
dsList, err := csAppsV1.DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: dsLabelSelector})

test/e2e/apps/daemon_set.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
184184
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
185185
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds))
186186
framework.ExpectNoError(err, "error waiting for daemon pod to start")
187-
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
188-
framework.ExpectNoError(err)
187+
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
188+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
189189

190190
ginkgo.By("Stop a daemon pod, check that the daemon pod is revived.")
191191
podList := listDaemonPods(ctx, c, ns, label)
@@ -224,8 +224,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
224224
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
225225
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name}))
226226
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
227-
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
228-
framework.ExpectNoError(err)
227+
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
228+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
229229

230230
ginkgo.By("Update the node label to green, and wait for daemons to be unscheduled")
231231
nodeSelector[daemonsetColorLabel] = "green"
@@ -243,8 +243,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
243243
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
244244
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{greenNode.Name}))
245245
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
246-
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
247-
framework.ExpectNoError(err)
246+
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
247+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
248248
})
249249

250250
// We defer adding this test to conformance pending the disposition of moving DaemonSet scheduling logic to the
@@ -287,8 +287,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
287287
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
288288
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name}))
289289
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
290-
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
291-
framework.ExpectNoError(err)
290+
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
291+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
292292

293293
ginkgo.By("Remove the node label and wait for daemons to be unscheduled")
294294
_, err = setDaemonSetNodeLabels(ctx, c, node.Name, map[string]string{})
@@ -312,8 +312,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
312312
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
313313
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds))
314314
framework.ExpectNoError(err, "error waiting for daemon pod to start")
315-
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
316-
framework.ExpectNoError(err)
315+
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
316+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
317317

318318
ginkgo.By("Set a daemon pod's phase to 'Failed', check that the daemon pod is revived.")
319319
podList := listDaemonPods(ctx, c, ns, label)
@@ -863,8 +863,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
863863
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
864864
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
865865
framework.ExpectNoError(err, "error waiting for daemon pod to start")
866-
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
867-
framework.ExpectNoError(err)
866+
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
867+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
868868

869869
ginkgo.By("listing all DaemonSets")
870870
dsList, err := cs.AppsV1().DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
@@ -911,8 +911,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
911911
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
912912
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
913913
framework.ExpectNoError(err, "error waiting for daemon pod to start")
914-
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
915-
framework.ExpectNoError(err)
914+
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
915+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
916916

917917
ginkgo.By("Getting /status")
918918
dsResource := schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "daemonsets"}

test/e2e/framework/daemonset/fixtures.go

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package daemonset
1818

1919
import (
2020
"context"
21-
"fmt"
2221

2322
appsv1 "k8s.io/api/apps/v1"
2423
v1 "k8s.io/api/core/v1"
@@ -139,16 +138,18 @@ func checkDaemonPodStateOnNodes(ctx context.Context, c clientset.Interface, ds *
139138
return len(nodesToPodCount) == len(nodeNames), nil
140139
}
141140

142-
// CheckDaemonStatus returns an error if not all desired pods are scheduled or
143-
// not all of them are ready.
144-
func CheckDaemonStatus(ctx context.Context, f *framework.Framework, dsName string) error {
145-
ds, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get(ctx, dsName, metav1.GetOptions{})
146-
if err != nil {
147-
return err
148-
}
149-
desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady
150-
if desired == scheduled && scheduled == ready {
151-
return nil
141+
// CheckDaemonStatus returns false if not all desired pods are scheduled or not all of them are ready.
142+
func CheckDaemonStatus(ctx context.Context, f *framework.Framework, dsName string) func(ctx context.Context) (bool, error) {
143+
return func(ctx context.Context) (bool, error) {
144+
ds, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get(ctx, dsName, metav1.GetOptions{})
145+
if err != nil {
146+
return false, err
147+
}
148+
desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady
149+
if desired == scheduled && scheduled == ready {
150+
return true, nil
151+
}
152+
framework.Logf("error in daemon status. DesiredScheduled: %d, CurrentScheduled: %d, Ready: %d", desired, scheduled, ready)
153+
return false, nil
152154
}
153-
return fmt.Errorf("error in daemon status. DesiredScheduled: %d, CurrentScheduled: %d, Ready: %d", desired, scheduled, ready)
154155
}

test/e2e/network/loadbalancer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,8 +1307,8 @@ func testRollingUpdateLBConnectivityDisruption(ctx context.Context, f *framework
13071307
creationTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
13081308
err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, nodeNames))
13091309
framework.ExpectNoError(err, "error waiting for daemon pods to start")
1310-
err = e2edaemonset.CheckDaemonStatus(ctx, f, name)
1311-
framework.ExpectNoError(err)
1310+
err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, name))
1311+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
13121312

13131313
ginkgo.By(fmt.Sprintf("Creating a service %s with type=LoadBalancer externalTrafficPolicy=%s in namespace %s", name, externalTrafficPolicy, ns))
13141314
jig := e2eservice.NewTestJig(cs, ns, name)

test/e2e/upgrades/apps/daemonsets.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,8 @@ func (t *DaemonSetUpgradeTest) validateRunningDaemonSet(ctx context.Context, f *
9494

9595
// DaemonSet resource itself should be good
9696
ginkgo.By("confirming the DaemonSet resource is in a good state")
97-
err = e2edaemonset.CheckDaemonStatus(ctx, f, t.daemonSet.Name)
98-
framework.ExpectNoError(err)
97+
98+
err = wait.PollUntilContextTimeout(ctx, framework.Poll, framework.PodStartTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, t.daemonSet.Name))
99+
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
100+
99101
}

0 commit comments

Comments
 (0)