Skip to content

Commit 6f4ad7a

Browse files
authored
Merge pull request kubernetes#121491 from dejanzele/pod-replacement-policy-e2e
Switch feature flag to beta for pod replacement policy and add e2e test
2 parents f4f5d07 + e98c33b commit 6f4ad7a

File tree

5 files changed

+255
-136
lines changed

5 files changed

+255
-136
lines changed

pkg/controller/job/job_controller_test.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3452,6 +3452,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
34523452
wantStatus: batch.JobStatus{
34533453
Failed: 1,
34543454
Succeeded: 2,
3455+
Terminating: ptr.To[int32](0),
34553456
CompletedIndexes: "0,1",
34563457
FailedIndexes: ptr.To(""),
34573458
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
@@ -3483,6 +3484,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
34833484
},
34843485
wantStatus: batch.JobStatus{
34853486
Active: 2,
3487+
Terminating: ptr.To[int32](0),
34863488
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
34873489
FailedIndexes: ptr.To(""),
34883490
},
@@ -3509,6 +3511,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
35093511
wantStatus: batch.JobStatus{
35103512
Active: 2,
35113513
Failed: 1,
3514+
Terminating: ptr.To[int32](0),
35123515
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
35133516
FailedIndexes: ptr.To(""),
35143517
},
@@ -3535,6 +3538,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
35353538
Active: 1,
35363539
Failed: 1,
35373540
FailedIndexes: ptr.To("0"),
3541+
Terminating: ptr.To[int32](0),
35383542
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
35393543
},
35403544
},
@@ -3583,6 +3587,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
35833587
Active: 1,
35843588
Failed: 1,
35853589
FailedIndexes: ptr.To("0"),
3590+
Terminating: ptr.To[int32](0),
35863591
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
35873592
},
35883593
},
@@ -3632,6 +3637,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
36323637
Active: 0,
36333638
Failed: 1,
36343639
FailedIndexes: ptr.To(""),
3640+
Terminating: ptr.To[int32](0),
36353641
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
36363642
Conditions: []batch.JobCondition{
36373643
{
@@ -3695,6 +3701,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
36953701
Active: 2,
36963702
Failed: 0,
36973703
FailedIndexes: ptr.To(""),
3704+
Terminating: ptr.To[int32](0),
36983705
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
36993706
},
37003707
},
@@ -3721,6 +3728,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
37213728
Failed: 2,
37223729
Succeeded: 0,
37233730
FailedIndexes: ptr.To(""),
3731+
Terminating: ptr.To[int32](0),
37243732
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
37253733
Conditions: []batch.JobCondition{
37263734
{
@@ -3754,6 +3762,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
37543762
wantStatus: batch.JobStatus{
37553763
Failed: 1,
37563764
Succeeded: 1,
3765+
Terminating: ptr.To[int32](0),
37573766
FailedIndexes: ptr.To("0"),
37583767
CompletedIndexes: "1",
37593768
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
@@ -3792,6 +3801,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
37923801
wantStatus: batch.JobStatus{
37933802
Failed: 3,
37943803
Succeeded: 1,
3804+
Terminating: ptr.To[int32](0),
37953805
FailedIndexes: ptr.To("0,2"),
37963806
CompletedIndexes: "1",
37973807
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
@@ -3830,6 +3840,7 @@ func TestSyncJobWithJobBackoffLimitPerIndex(t *testing.T) {
38303840
wantStatus: batch.JobStatus{
38313841
Active: 2,
38323842
Succeeded: 1,
3843+
Terminating: ptr.To[int32](0),
38333844
CompletedIndexes: "1",
38343845
UncountedTerminatedPods: &batch.UncountedTerminatedPods{},
38353846
},

pkg/controller/podgc/gc_controller_test.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -295,6 +295,7 @@ func TestGCOrphaned(t *testing.T) {
295295
},
296296
itemsInQueue: 1,
297297
deletedPodNames: sets.NewString("a"),
298+
patchedPodNames: sets.NewString("a"),
298299
},
299300
{
300301
name: "some nodes missing",
@@ -308,6 +309,7 @@ func TestGCOrphaned(t *testing.T) {
308309
},
309310
itemsInQueue: 1,
310311
deletedPodNames: sets.NewString("a", "c", "d"),
312+
patchedPodNames: sets.NewString("d"),
311313
},
312314
{
313315
name: "node added to client after quarantine",
@@ -457,6 +459,7 @@ func TestGCUnscheduledTerminating(t *testing.T) {
457459
{name: "c", phase: v1.PodRunning, deletionTimeStamp: &metav1.Time{}, nodeName: ""},
458460
},
459461
deletedPodNames: sets.NewString("a", "b", "c"),
462+
patchedPodNames: sets.NewString("c"),
460463
},
461464
{
462465
name: "Scheduled pod in any phase must not be deleted",
@@ -607,6 +610,7 @@ func TestGCTerminating(t *testing.T) {
607610
{name: "e6", phase: v1.PodUnknown, nodeName: "worker-5"},
608611
},
609612
deletedPodNames: sets.NewString("b1", "b4", "b5", "b6"),
613+
patchedPodNames: sets.NewString("b1", "b4", "b5", "b6"),
610614
},
611615
{
612616
name: "pods deleted from node tained out-of-service; PodDisruptionConditions enabled",

pkg/features/kube_features.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -390,6 +390,7 @@ const (
390390
// owner: @kannon92
391391
// kep : https://kep.k8s.io/3939
392392
// alpha: v1.28
393+
// beta: v1.29
393394
//
394395
// Allow users to specify recreating pods of a job only when
395396
// pods have fully terminated.
@@ -991,7 +992,7 @@ var defaultKubernetesFeatureGates = map[featuregate.Feature]featuregate.FeatureS
991992

992993
JobPodFailurePolicy: {Default: true, PreRelease: featuregate.Beta},
993994

994-
JobPodReplacementPolicy: {Default: false, PreRelease: featuregate.Alpha},
995+
JobPodReplacementPolicy: {Default: true, PreRelease: featuregate.Beta},
995996

996997
JobReadyPods: {Default: true, PreRelease: featuregate.GA, LockToDefault: true}, // remove in 1.31
997998

test/e2e/apps/job.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,61 @@ var _ = SIGDescribe("Job", func() {
344344
}
345345
})
346346

347+
ginkgo.It("should recreate pods only after they have failed if pod replacement policy is set to Failed", func(ctx context.Context) {
348+
ginkgo.By("Creating a job")
349+
job := e2ejob.NewTestJob("", "pod-recreate-failed", v1.RestartPolicyNever, 1, 1, nil, 1)
350+
job.Spec.PodReplacementPolicy = ptr.To(batchv1.Failed)
351+
job.Spec.Template.Spec.Containers[0].Command = []string{"/bin/sh", "-c", `_term(){
352+
sleep 5
353+
exit 143
354+
}
355+
trap _term SIGTERM
356+
while true; do
357+
sleep 1
358+
done`}
359+
job, err := e2ejob.CreateJob(ctx, f.ClientSet, f.Namespace.Name, job)
360+
framework.ExpectNoError(err, "failed to create job in namespace: %s", f.Namespace.Name)
361+
362+
err = e2ejob.WaitForJobPodsRunning(ctx, f.ClientSet, f.Namespace.Name, job.Name, 1)
363+
framework.ExpectNoError(err, "failed to wait for job pod to become running in namespace: %s", f.Namespace.Name)
364+
365+
ginkgo.By("Deleting job pod")
366+
pods, err := e2ejob.GetJobPods(ctx, f.ClientSet, f.Namespace.Name, job.Name)
367+
framework.ExpectNoError(err, "failed to get pod list for job %s in namespace: %s", job.Name, f.Namespace.Name)
368+
369+
framework.ExpectNoError(e2epod.DeletePodsWithGracePeriod(ctx, f.ClientSet, pods.Items, 30), "failed to delete pods in namespace: %s", f.Namespace.Name)
370+
371+
ginkgo.By("Ensuring pod does not get recreated while it is in terminating state")
372+
err = e2ejob.WaitForJobState(ctx, f.ClientSet, f.Namespace.Name, job.Name, f.Timeouts.PodDelete, func(job *batchv1.Job) string {
373+
if job.Status.Active == 0 && job.Status.Failed == 0 && *job.Status.Terminating == 1 {
374+
return ""
375+
} else {
376+
return fmt.Sprintf(
377+
"expected job to have 0 active pod, 0 failed pod and 1 terminating pods, but got %d active pods, %d failed pods and %d terminating pods",
378+
job.Status.Active,
379+
job.Status.Failed,
380+
*job.Status.Terminating,
381+
)
382+
}
383+
})
384+
framework.ExpectNoError(err, "failed to ensure pod is not recreated while it is in terminating state")
385+
386+
ginkgo.By("Ensuring pod gets recreated after it has failed")
387+
err = e2ejob.WaitForJobState(ctx, f.ClientSet, f.Namespace.Name, job.Name, f.Timeouts.PodDelete, func(job *batchv1.Job) string {
388+
if job.Status.Active == 1 && job.Status.Failed == 1 && *job.Status.Terminating == 0 {
389+
return ""
390+
} else {
391+
return fmt.Sprintf(
392+
"expected job to have 1 active pods, 1 failed pods and 0 terminating pod, but got %d active pods, %d failed pods and %d terminating pods",
393+
job.Status.Active,
394+
job.Status.Failed,
395+
*job.Status.Terminating,
396+
)
397+
}
398+
})
399+
framework.ExpectNoError(err, "failed to wait for pod to get recreated")
400+
})
401+
347402
/*
348403
Release: v1.24
349404
Testname: Ensure Pods of an Indexed Job get a unique index.

0 commit comments

Comments
 (0)