@@ -26,6 +26,7 @@ import (
26
26
batchinternal "k8s.io/kubernetes/pkg/apis/batch"
27
27
"k8s.io/kubernetes/test/e2e/framework"
28
28
jobutil "k8s.io/kubernetes/test/e2e/framework/job"
29
+ e2enode "k8s.io/kubernetes/test/e2e/framework/node"
29
30
e2epod "k8s.io/kubernetes/test/e2e/framework/pod"
30
31
31
32
"github.com/onsi/ginkgo"
@@ -94,12 +95,10 @@ var _ = SIGDescribe("Job", func() {
94
95
framework .ConformanceIt ("should run a job to completion when tasks sometimes fail and are locally restarted" , func () {
95
96
ginkgo .By ("Creating a job" )
96
97
// One failure, then a success, local restarts.
97
- // We can't use the random failure approach used by the
98
- // non-local test below, because kubelet will throttle
99
- // frequently failing containers in a given pod, ramping
100
- // up to 5 minutes between restarts, making test timeouts
101
- // due to successive failures too likely with a reasonable
102
- // test timeout.
98
+ // We can't use the random failure approach, because kubelet will
99
+ // throttle frequently failing containers in a given pod, ramping
100
+ // up to 5 minutes between restarts, making test timeout due to
101
+ // successive failures too likely with a reasonable test timeout.
103
102
job := jobutil .NewTestJob ("failOnce" , "fail-once-local" , v1 .RestartPolicyOnFailure , parallelism , completions , nil , backoffLimit )
104
103
job , err := jobutil .CreateJob (f .ClientSet , f .Namespace .Name , job )
105
104
framework .ExpectNoError (err , "failed to create job in namespace: %s" , f .Namespace .Name )
@@ -111,18 +110,20 @@ var _ = SIGDescribe("Job", func() {
111
110
112
111
// Pods sometimes fail, but eventually succeed, after pod restarts
113
112
ginkgo .It ("should run a job to completion when tasks sometimes fail and are not locally restarted" , func () {
113
+ // One failure, then a success, no local restarts.
114
+ // We can't use the random failure approach, because JobController
115
+ // will throttle frequently failing Pods of a given Job, ramping
116
+ // up to 6 minutes between restarts, making test timeout due to
117
+ // successive failures.
118
+ // Instead, we force the Job's Pods to be scheduled to a single Node
119
+ // and use a hostPath volume to persist data across new Pods.
120
+ ginkgo .By ("Looking for a node to schedule job pod" )
121
+ node , err := e2enode .GetRandomReadySchedulableNode (f .ClientSet )
122
+ framework .ExpectNoError (err )
123
+
114
124
ginkgo .By ("Creating a job" )
115
- // 50% chance of container success, local restarts.
116
- // Can't use the failOnce approach because that relies
117
- // on an emptyDir, which is not preserved across new pods.
118
- // Worst case analysis: 15 failures, each taking 1 minute to
119
- // run due to some slowness, 1 in 2^15 chance of happening,
120
- // causing test flake. Should be very rare.
121
- // With the introduction of backoff limit and high failure rate this
122
- // is hitting its timeout, the 3 is a reasonable that should make this
123
- // test less flaky, for now.
124
- job := jobutil .NewTestJob ("randomlySucceedOrFail" , "rand-non-local" , v1 .RestartPolicyNever , parallelism , 3 , nil , 999 )
125
- job , err := jobutil .CreateJob (f .ClientSet , f .Namespace .Name , job )
125
+ job := jobutil .NewTestJobOnNode ("failOnce" , "fail-once-non-local" , v1 .RestartPolicyNever , parallelism , completions , nil , backoffLimit , node .Name )
126
+ job , err = jobutil .CreateJob (f .ClientSet , f .Namespace .Name , job )
126
127
framework .ExpectNoError (err , "failed to create job in namespace: %s" , f .Namespace .Name )
127
128
128
129
ginkgo .By ("Ensuring job reaches completions" )
0 commit comments