Skip to content

Commit 7529696

Browse files
authored
Merge pull request kubernetes#128334 from mimowo/job-windows-e2e-test
Job Pod Failure policy refactor e2e test using exit codes
2 parents daef8c2 + af77241 commit 7529696

File tree

2 files changed

+21
-37
lines changed

2 files changed

+21
-37
lines changed

test/e2e/apps/job.go

Lines changed: 6 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -146,10 +146,10 @@ var _ = SIGDescribe("Job", func() {
146146
/*
147147
Testname: Ensure pod failure policy allows to ignore failure matching on the exit code
148148
Description: This test is using an indexed job. The pod corresponding to each index
149-
creates a marker file on the host and runs 'forever' until evicted. Once
150-
the marker file is created the pod succeeds seeing it on restart. Thus,
151-
we trigger one failure per index due to eviction, so the Job would be
152-
marked as failed, if not for the ignore rule matching on exit codes.
149+
creates a marker file on the host and fails. Once the marker file is
150+
created the pod succeeds seeing it on restart. Thus, we trigger one
151+
failure per index, so the Job would be marked as failed, if not for the
152+
ignore rule matching on exit codes.
153153
*/
154154
ginkgo.It("should allow to use a pod failure policy to ignore failure matching on exit code", func(ctx context.Context) {
155155
// We set the backoffLimit = numPods-1 so that we can tolerate random
@@ -165,53 +165,22 @@ var _ = SIGDescribe("Job", func() {
165165
framework.ExpectNoError(err)
166166

167167
ginkgo.By("Creating a job")
168-
job := e2ejob.NewTestJobOnNode("notTerminateOncePerIndex", "evicted-pod-ignore-on-exit-code", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name)
168+
job := e2ejob.NewTestJobOnNode("failOncePerIndex", "fail-pod-ignore-on-exit-code", v1.RestartPolicyNever, parallelism, completions, nil, backoffLimit, node.Name)
169169
job.Spec.CompletionMode = ptr.To(batchv1.IndexedCompletion)
170170
job.Spec.PodFailurePolicy = &batchv1.PodFailurePolicy{
171171
Rules: []batchv1.PodFailurePolicyRule{
172172
{
173-
// Ignore the pod failure caused by the eviction based on the
174-
// exit code corresponding to SIGKILL.
175173
Action: batchv1.PodFailurePolicyActionIgnore,
176174
OnExitCodes: &batchv1.PodFailurePolicyOnExitCodesRequirement{
177175
Operator: batchv1.PodFailurePolicyOnExitCodesOpIn,
178-
Values: []int32{137},
176+
Values: []int32{42},
179177
},
180178
},
181179
},
182180
}
183181
job, err = e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
184182
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
185183

186-
ginkgo.By("Waiting for all the pods to be ready")
187-
err = e2ejob.WaitForJobReady(ctx, f.ClientSet, f.Namespace.Name, job.Name, ptr.To(int32(numPods)))
188-
framework.ExpectNoError(err, "failed to await for all pods to be ready for job: %s/%s", job.Name, job.Namespace)
189-
190-
ginkgo.By("Fetch all running pods")
191-
pods, err := e2ejob.GetAllRunningJobPods(ctx, f.ClientSet, f.Namespace.Name, job.Name)
192-
framework.ExpectNoError(err, "failed to get running pods for the job: %s/%s", job.Name, job.Namespace)
193-
gomega.Expect(pods).To(gomega.HaveLen(numPods), "Number of running pods doesn't match parallelism")
194-
195-
ginkgo.By("Evict all the Pods")
196-
workqueue.ParallelizeUntil(ctx, numPods, numPods, func(index int) {
197-
defer ginkgo.GinkgoRecover()
198-
199-
pod := pods[index]
200-
ginkgo.By(fmt.Sprintf("Evicting the running pod: %s/%s", pod.Name, pod.Namespace))
201-
evictTarget := &policyv1.Eviction{
202-
ObjectMeta: metav1.ObjectMeta{
203-
Name: pod.Name,
204-
Namespace: pod.Namespace,
205-
},
206-
}
207-
err = f.ClientSet.CoreV1().Pods(pod.Namespace).EvictV1(ctx, evictTarget)
208-
framework.ExpectNoError(err, "failed to evict the pod: %s/%s", pod.Name, pod.Namespace)
209-
210-
ginkgo.By(fmt.Sprintf("Awaiting for the pod: %s/%s to be deleted", pod.Name, pod.Namespace))
211-
err = e2epod.WaitForPodNotFoundInNamespace(ctx, f.ClientSet, pod.Name, pod.Namespace, f.Timeouts.PodDelete)
212-
framework.ExpectNoError(err, "failed to await for all pods to be deleted: %s/%s", pod.Name, pod.Namespace)
213-
})
214-
215184
ginkgo.By("Ensuring job reaches completions")
216185
err = e2ejob.WaitForJobComplete(ctx, f.ClientSet, f.Namespace.Name, job.Name, nil, completions)
217186
framework.ExpectNoError(err, "failed to ensure job completion in namespace: %s", f.Namespace.Name)

test/e2e/framework/job/fixtures.go

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -136,6 +136,21 @@ func NewTestJobOnNode(behavior, name string, rPol v1.RestartPolicy, parallelism,
136136
exit 1
137137
fi
138138
`}
139+
case "failOncePerIndex":
140+
// Use marker files per index. If the given marker file already exists
141+
// then terminate successfully. Otherwise create the marker file and
142+
// fail with exit code 42.
143+
setupHostPathDirectory(job)
144+
job.Spec.Template.Spec.Containers[0].Command = []string{"/bin/sh", "-c"}
145+
job.Spec.Template.Spec.Containers[0].Args = []string{`
146+
if [[ -r /data/foo-$JOB_COMPLETION_INDEX ]]
147+
then
148+
exit 0
149+
else
150+
touch /data/foo-$JOB_COMPLETION_INDEX
151+
exit 42
152+
fi
153+
`}
139154
case "notTerminateOncePerIndex":
140155
// Use marker files per index. If the given marker file already exists
141156
// then terminate successfully. Otherwise create the marker file and

0 commit comments

Comments
 (0)