Skip to content

Commit e43065d

Browse files
committed
e2e daemon set: better polling in CheckDaemonStatus
As a quick fix for a flake, bceec5a introduced polling with wait.Poll in all callers of CheckDaemonStatus. This commit reverts all callers to what they were before (CheckDaemonStatus + ExpectNoError) and implements polling according to E2E best practices (https://github.com/kubernetes/community/blob/master/contributors/devel/sig-testing/writing-good-e2e-tests.md#polling-and-timeouts): - no logging while polling - support for progress reporting while polling - last but not least, produce an informative failure message in case of a timeout, including a dump of the daemon set as YAML
1 parent 55b83c9 commit e43065d

File tree

5 files changed

+38
-34
lines changed

5 files changed

+38
-34
lines changed

test/e2e/apps/controller_revision.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -136,8 +136,8 @@ var _ = SIGDescribe("ControllerRevision", framework.WithSerial(), func() {
136136
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
137137
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
138138
framework.ExpectNoError(err, "error waiting for daemon pod to start")
139-
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
140-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
139+
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
140+
framework.ExpectNoError(err)
141141

142142
ginkgo.By(fmt.Sprintf("Confirm DaemonSet %q successfully created with %q label", dsName, dsLabelSelector))
143143
dsList, err := csAppsV1.DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: dsLabelSelector})

test/e2e/apps/daemon_set.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -184,8 +184,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
184184
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
185185
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds))
186186
framework.ExpectNoError(err, "error waiting for daemon pod to start")
187-
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
188-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
187+
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
188+
framework.ExpectNoError(err)
189189

190190
ginkgo.By("Stop a daemon pod, check that the daemon pod is revived.")
191191
podList := listDaemonPods(ctx, c, ns, label)
@@ -224,8 +224,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
224224
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
225225
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name}))
226226
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
227-
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
228-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
227+
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
228+
framework.ExpectNoError(err)
229229

230230
ginkgo.By("Update the node label to green, and wait for daemons to be unscheduled")
231231
nodeSelector[daemonsetColorLabel] = "green"
@@ -243,8 +243,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
243243
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
244244
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{greenNode.Name}))
245245
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
246-
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
247-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
246+
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
247+
framework.ExpectNoError(err)
248248
})
249249

250250
// We defer adding this test to conformance pending the disposition of moving DaemonSet scheduling logic to the
@@ -287,8 +287,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
287287
gomega.Expect(daemonSetLabels).To(gomega.HaveLen(1))
288288
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, []string{newNode.Name}))
289289
framework.ExpectNoError(err, "error waiting for daemon pods to be running on new nodes")
290-
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
291-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
290+
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
291+
framework.ExpectNoError(err)
292292

293293
ginkgo.By("Remove the node label and wait for daemons to be unscheduled")
294294
_, err = setDaemonSetNodeLabels(ctx, c, node.Name, map[string]string{})
@@ -312,8 +312,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
312312
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
313313
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, ds))
314314
framework.ExpectNoError(err, "error waiting for daemon pod to start")
315-
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
316-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
315+
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
316+
framework.ExpectNoError(err)
317317

318318
ginkgo.By("Set a daemon pod's phase to 'Failed', check that the daemon pod is revived.")
319319
podList := listDaemonPods(ctx, c, ns, label)
@@ -863,8 +863,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
863863
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
864864
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
865865
framework.ExpectNoError(err, "error waiting for daemon pod to start")
866-
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
867-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
866+
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
867+
framework.ExpectNoError(err)
868868

869869
ginkgo.By("listing all DaemonSets")
870870
dsList, err := cs.AppsV1().DaemonSets("").List(ctx, metav1.ListOptions{LabelSelector: labelSelector})
@@ -911,8 +911,8 @@ var _ = SIGDescribe("Daemon set", framework.WithSerial(), func() {
911911
ginkgo.By("Check that daemon pods launch on every node of the cluster.")
912912
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, checkRunningOnAllNodes(f, testDaemonset))
913913
framework.ExpectNoError(err, "error waiting for daemon pod to start")
914-
err = wait.PollUntilContextTimeout(ctx, dsRetryPeriod, dsRetryTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, dsName))
915-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
914+
err = e2edaemonset.CheckDaemonStatus(ctx, f, dsName)
915+
framework.ExpectNoError(err)
916916

917917
ginkgo.By("Getting /status")
918918
dsResource := schema.GroupVersionResource{Group: "apps", Version: "v1", Resource: "daemonsets"}

test/e2e/framework/daemonset/fixtures.go

Lines changed: 18 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ package daemonset
1818

1919
import (
2020
"context"
21+
"fmt"
2122

2223
appsv1 "k8s.io/api/apps/v1"
2324
v1 "k8s.io/api/core/v1"
@@ -26,6 +27,7 @@ import (
2627
"k8s.io/kubectl/pkg/util/podutils"
2728
"k8s.io/kubernetes/pkg/controller/daemon"
2829
"k8s.io/kubernetes/test/e2e/framework"
30+
"k8s.io/kubernetes/test/utils/format"
2931
)
3032

3133
func NewDaemonSet(dsName, image string, labels map[string]string, volumes []v1.Volume, mounts []v1.VolumeMount, ports []v1.ContainerPort, args ...string) *appsv1.DaemonSet {
@@ -138,18 +140,20 @@ func checkDaemonPodStateOnNodes(ctx context.Context, c clientset.Interface, ds *
138140
return len(nodesToPodCount) == len(nodeNames), nil
139141
}
140142

141-
// CheckDaemonStatus returns false if not all desired pods are scheduled or not all of them are ready.
142-
func CheckDaemonStatus(ctx context.Context, f *framework.Framework, dsName string) func(ctx context.Context) (bool, error) {
143-
return func(ctx context.Context) (bool, error) {
144-
ds, err := f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get(ctx, dsName, metav1.GetOptions{})
145-
if err != nil {
146-
return false, err
147-
}
148-
desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady
149-
if desired == scheduled && scheduled == ready {
150-
return true, nil
151-
}
152-
framework.Logf("error in daemon status. DesiredScheduled: %d, CurrentScheduled: %d, Ready: %d", desired, scheduled, ready)
153-
return false, nil
154-
}
143+
// CheckDaemonStatus ensures that eventually the daemon set has the desired
144+
// number of pods scheduled and ready. It returns a descriptive error if that
145+
// state is not reached in the amount of time it takes to start
146+
// pods. f.Timeouts.PodStart can be changed to influence that timeout.
147+
func CheckDaemonStatus(ctx context.Context, f *framework.Framework, dsName string) error {
148+
return framework.Gomega().Eventually(ctx, framework.GetObject(f.ClientSet.AppsV1().DaemonSets(f.Namespace.Name).Get, dsName, metav1.GetOptions{})).
149+
WithTimeout(f.Timeouts.PodStart).
150+
Should(framework.MakeMatcher(func(ds *appsv1.DaemonSet) (failure func() string, err error) {
151+
desired, scheduled, ready := ds.Status.DesiredNumberScheduled, ds.Status.CurrentNumberScheduled, ds.Status.NumberReady
152+
if desired == scheduled && scheduled == ready {
153+
return nil, nil
154+
}
155+
return func() string {
156+
return fmt.Sprintf("Expected daemon set to reach state where all desired pods are scheduled and ready. Got instead DesiredScheduled: %d, CurrentScheduled: %d, Ready: %d\n%s", desired, scheduled, ready, format.Object(ds, 1))
157+
}, nil
158+
}))
155159
}

test/e2e/network/loadbalancer.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1307,8 +1307,8 @@ func testRollingUpdateLBConnectivityDisruption(ctx context.Context, f *framework
13071307
creationTimeout := e2eservice.GetServiceLoadBalancerCreationTimeout(ctx, cs)
13081308
err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonPodOnNodes(f, ds, nodeNames))
13091309
framework.ExpectNoError(err, "error waiting for daemon pods to start")
1310-
err = wait.PollUntilContextTimeout(ctx, framework.Poll, creationTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, name))
1311-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
1310+
err = e2edaemonset.CheckDaemonStatus(ctx, f, name)
1311+
framework.ExpectNoError(err)
13121312

13131313
ginkgo.By(fmt.Sprintf("Creating a service %s with type=LoadBalancer externalTrafficPolicy=%s in namespace %s", name, externalTrafficPolicy, ns))
13141314
jig := e2eservice.NewTestJig(cs, ns, name)

test/e2e/upgrades/apps/daemonsets.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ func (t *DaemonSetUpgradeTest) validateRunningDaemonSet(ctx context.Context, f *
9595
// DaemonSet resource itself should be good
9696
ginkgo.By("confirming the DaemonSet resource is in a good state")
9797

98-
err = wait.PollUntilContextTimeout(ctx, framework.Poll, framework.PodStartTimeout, true, e2edaemonset.CheckDaemonStatus(ctx, f, t.daemonSet.Name))
99-
framework.ExpectNoError(err, "error waiting for daemonset to report all pods are scheduled and ready")
98+
err = e2edaemonset.CheckDaemonStatus(ctx, f, t.daemonSet.Name)
99+
framework.ExpectNoError(err)
100100

101101
}

0 commit comments

Comments
 (0)