Skip to content

Commit 3b1a5bf

Browse files
committed
Fail fast when waiting for job conditions in e2e tests
1 parent e9cde03 commit 3b1a5bf

File tree

2 files changed

+79
-39
lines changed

2 files changed

+79
-39
lines changed

test/e2e/apps/job.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,7 @@ var _ = SIGDescribe("Job", func() {
137137
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
138138

139139
ginkgo.By("Ensuring job fails")
140-
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
140+
err = e2ejob.WaitForJobFailed(ctx, f.ClientSet, f.Namespace.Name, job.Name)
141141
framework.ExpectNoError(err, "failed to ensure job failure in namespace: %s", f.Namespace.Name)
142142
})
143143

@@ -605,7 +605,7 @@ done`}
605605
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
606606

607607
ginkgo.By("Awaiting for the job to fail as there are failed indexes")
608-
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
608+
err = e2ejob.WaitForJobFailed(ctx, f.ClientSet, f.Namespace.Name, job.Name)
609609
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
610610

611611
ginkgo.By("Verifying the Job status fields to ensure all indexes were executed")
@@ -641,7 +641,7 @@ done`}
641641
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
642642

643643
ginkgo.By("Awaiting for the job to fail as the number of max failed indexes is exceeded")
644-
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
644+
err = e2ejob.WaitForJobFailed(ctx, f.ClientSet, f.Namespace.Name, job.Name)
645645
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
646646

647647
ginkgo.By("Verifying the Job status fields to ensure early termination of the job")
@@ -684,7 +684,7 @@ done`}
684684
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
685685

686686
ginkgo.By("Awaiting for the job to fail as all indexes are failed")
687-
err = e2ejob.WaitForJobFailed(f.ClientSet, f.Namespace.Name, job.Name)
687+
err = e2ejob.WaitForJobFailed(ctx, f.ClientSet, f.Namespace.Name, job.Name)
688688
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)
689689

690690
ginkgo.By("Verifying the Job status fields to ensure the upper indexes didn't execute")

test/e2e/framework/job/wait.go

Lines changed: 75 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,14 @@ import (
2121
"fmt"
2222
"time"
2323

24+
"github.com/onsi/gomega"
2425
batchv1 "k8s.io/api/batch/v1"
2526
v1 "k8s.io/api/core/v1"
2627
apierrors "k8s.io/apimachinery/pkg/api/errors"
2728
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2829
"k8s.io/apimachinery/pkg/util/wait"
2930
clientset "k8s.io/client-go/kubernetes"
31+
"k8s.io/klog/v2"
3032
"k8s.io/kubernetes/test/e2e/framework"
3133
"k8s.io/kubernetes/test/utils/format"
3234
"k8s.io/utils/ptr"
@@ -82,14 +84,33 @@ func waitForJobPodsInPhase(ctx context.Context, c clientset.Interface, ns, jobNa
8284
// both conformance CI jobs with GA-only features and e2e CI jobs with all default-enabled features.
8385
// So, we need to skip "Complete" condition reason verifications in the e2e conformance test cases.
8486
func WaitForJobComplete(ctx context.Context, c clientset.Interface, ns, jobName string, reason *string, completions int32) error {
85-
if err := wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, false, func(ctx context.Context) (bool, error) {
86-
curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
87+
// This function is called by HandleRetry, which will retry
88+
// on transient API errors or stop polling in the case of other errors.
89+
get := func(ctx context.Context) (*batchv1.Job, error) {
90+
job, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
8791
if err != nil {
88-
return false, err
92+
return nil, err
93+
}
94+
if isJobFailed(job) {
95+
return nil, gomega.StopTrying("job failed while waiting for its completion").Attach("job", job)
96+
}
97+
return job, nil
98+
}
99+
match := func(job *batchv1.Job) (func() string, error) {
100+
if job.Status.Succeeded == completions {
101+
return nil, nil
89102
}
90-
return curr.Status.Succeeded == completions, nil
91-
}); err != nil {
92-
return nil
103+
return func() string {
104+
return fmt.Sprintf("expected job %q to have %v successful pods. got %v", klog.KObj(job), completions, job.Status.Succeeded)
105+
}, nil
106+
}
107+
err := framework.Gomega().
108+
Eventually(ctx, framework.HandleRetry(get)).
109+
WithTimeout(JobTimeout).
110+
WithPolling(framework.Poll).
111+
Should(framework.MakeMatcher(match))
112+
if err != nil {
113+
return err
93114
}
94115
return WaitForJobCondition(ctx, c, ns, jobName, batchv1.JobComplete, reason)
95116
}
@@ -117,48 +138,55 @@ func WaitForJobSuspend(ctx context.Context, c clientset.Interface, ns, jobName s
117138
}
118139

119140
// WaitForJobFailed uses c to wait for the Job jobName in namespace ns to fail
120-
func WaitForJobFailed(c clientset.Interface, ns, jobName string) error {
121-
return wait.PollImmediate(framework.Poll, JobTimeout, func() (bool, error) {
122-
curr, err := c.BatchV1().Jobs(ns).Get(context.TODO(), jobName, metav1.GetOptions{})
141+
func WaitForJobFailed(ctx context.Context, c clientset.Interface, ns, jobName string) error {
142+
// This function is called by HandleRetry, which will retry
143+
// on transient API errors or stop polling in the case of other errors.
144+
get := func(ctx context.Context) (*batchv1.Job, error) {
145+
job, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
123146
if err != nil {
124-
return false, err
147+
return nil, err
125148
}
126-
127-
return isJobFailed(curr), nil
128-
})
149+
if isJobCompleted(job) {
150+
return nil, gomega.StopTrying("job completed while waiting for its failure").Attach("job", job)
151+
}
152+
return job, nil
153+
}
154+
match := func(job *batchv1.Job) (func() string, error) {
155+
if isJobFailed(job) {
156+
return nil, nil
157+
}
158+
return func() string {
159+
return fmt.Sprintf("expected job %q to fail", klog.KObj(job))
160+
}, nil
161+
}
162+
return framework.Gomega().
163+
Eventually(ctx, framework.HandleRetry(get)).
164+
WithTimeout(JobTimeout).
165+
WithPolling(framework.Poll).
166+
Should(framework.MakeMatcher(match))
129167
}
130168

131169
// WaitForJobCondition waits for the specified Job to have the expected condition with the specific reason.
132170
// When the nil reason is passed, the "reason" string in the condition is
133171
// not checked.
134172
func WaitForJobCondition(ctx context.Context, c clientset.Interface, ns, jobName string, cType batchv1.JobConditionType, reason *string) error {
135-
err := wait.PollUntilContextTimeout(ctx, framework.Poll, JobTimeout, false, func(ctx context.Context) (bool, error) {
136-
curr, err := c.BatchV1().Jobs(ns).Get(ctx, jobName, metav1.GetOptions{})
137-
if err != nil {
138-
return false, err
139-
}
140-
for _, c := range curr.Status.Conditions {
173+
match := func(job *batchv1.Job) (func() string, error) {
174+
for _, c := range job.Status.Conditions {
141175
if c.Type == cType && c.Status == v1.ConditionTrue {
142176
if reason == nil || *reason == c.Reason {
143-
return true, nil
177+
return nil, nil
144178
}
145179
}
146180
}
147-
return false, nil
148-
})
149-
if err != nil {
150-
return fmt.Errorf("waiting for Job %q to have the condition %q with reason: %v: %w", jobName, cType, reason, err)
151-
}
152-
return nil
153-
}
154-
155-
func isJobFailed(j *batchv1.Job) bool {
156-
for _, c := range j.Status.Conditions {
157-
if (c.Type == batchv1.JobFailed) && c.Status == v1.ConditionTrue {
158-
return true
159-
}
181+
return func() string {
182+
return fmt.Sprintf("expected job %q to reach the expected condition %q with reason %q", klog.KObj(job), cType, ptr.Deref(reason, "<nil>"))
183+
}, nil
160184
}
161-
return false
185+
return framework.Gomega().
186+
Eventually(ctx, framework.GetObject(c.BatchV1().Jobs(ns).Get, jobName, metav1.GetOptions{})).
187+
WithTimeout(JobTimeout).
188+
WithPolling(framework.Poll).
189+
Should(framework.MakeMatcher(match))
162190
}
163191

164192
// WaitForJobFinish uses c to wait for the Job jobName in namespace ns to finish (either Failed or Complete).
@@ -179,8 +207,20 @@ func WaitForJobFinishWithTimeout(ctx context.Context, c clientset.Interface, ns,
179207
}
180208

181209
func isJobFinished(j *batchv1.Job) bool {
210+
return isJobCompleted(j) || isJobFailed(j)
211+
}
212+
213+
func isJobFailed(j *batchv1.Job) bool {
214+
return isConditionTrue(j, batchv1.JobFailed)
215+
}
216+
217+
func isJobCompleted(j *batchv1.Job) bool {
218+
return isConditionTrue(j, batchv1.JobComplete)
219+
}
220+
221+
func isConditionTrue(j *batchv1.Job, condition batchv1.JobConditionType) bool {
182222
for _, c := range j.Status.Conditions {
183-
if (c.Type == batchv1.JobComplete || c.Type == batchv1.JobFailed) && c.Status == v1.ConditionTrue {
223+
if c.Type == condition && c.Status == v1.ConditionTrue {
184224
return true
185225
}
186226
}

0 commit comments

Comments
 (0)