@@ -45,6 +45,7 @@ import (
45
45
clientset "k8s.io/client-go/kubernetes"
46
46
typedv1 "k8s.io/client-go/kubernetes/typed/batch/v1"
47
47
restclient "k8s.io/client-go/rest"
48
+ cache "k8s.io/client-go/tools/cache"
48
49
"k8s.io/client-go/tools/record"
49
50
"k8s.io/client-go/util/retry"
50
51
featuregatetesting "k8s.io/component-base/featuregate/testing"
@@ -65,6 +66,7 @@ import (
65
66
66
67
const waitInterval = time .Second
67
68
const fastPodFailureBackoff = 100 * time .Millisecond
69
+ const fastSyncJobBatchPeriod = 100 * time .Millisecond
68
70
69
71
// Time duration used to account for controller latency in tests in which it is
70
72
// expected the Job controller does not make a change. In that cases we wait a
@@ -4067,6 +4069,145 @@ func TestNodeSelectorUpdate(t *testing.T) {
4067
4069
4068
4070
}
4069
4071
4072
+ // TestDelayedJobUpdateEvent tests that a Job only creates one Pod even when
4073
+ // the job events are delayed. This test verfies the finishedJobStore is working
4074
+ // correctly and preventing from job controller creating a new pod if the job success
4075
+ // or fail event is delayed.
4076
+ func TestDelayedJobUpdateEvent (t * testing.T ) {
4077
+ t .Cleanup (setDurationDuringTest (& jobcontroller .DefaultJobPodFailureBackOff , fastPodFailureBackoff ))
4078
+ t .Cleanup (setDurationDuringTest (& jobcontroller .SyncJobBatchPeriod , fastSyncJobBatchPeriod ))
4079
+ closeFn , restConfig , clientSet , ns := setup (t , "simple" )
4080
+ t .Cleanup (closeFn )
4081
+ // the transform is used to introduce a delay for the job events. Since all the object have to go through
4082
+ // transform func first before being added to the informer cache, this would serve as an indirect way to
4083
+ // introduce watch event delay.
4084
+ transformOpt := informers .WithTransform (cache .TransformFunc (func (obj interface {}) (interface {}, error ) {
4085
+ _ , ok := obj .(* batchv1.Job )
4086
+ if ok {
4087
+ // This will make sure pod events are processed before the job events occur.
4088
+ time .Sleep (2 * fastSyncJobBatchPeriod )
4089
+ }
4090
+ return obj , nil
4091
+ }))
4092
+
4093
+ type jobStatus struct {
4094
+ succeeded int
4095
+ failed int
4096
+ status batchv1.JobConditionType
4097
+ }
4098
+
4099
+ cases := map [string ]struct {
4100
+ podReplacementPolicyEnabled bool
4101
+ job * batchv1.Job
4102
+ podUpdate func (* v1.Pod ) bool
4103
+ wantStatus jobStatus
4104
+ }{
4105
+ "job succeeded event delayed" : {
4106
+ job : & batchv1.Job {},
4107
+ podUpdate : func (p * v1.Pod ) bool {
4108
+ p .Status .Phase = v1 .PodSucceeded
4109
+ p .Status .ContainerStatuses = []v1.ContainerStatus {
4110
+ {
4111
+ State : v1.ContainerState {
4112
+ Terminated : & v1.ContainerStateTerminated {
4113
+ FinishedAt : metav1 .Now (),
4114
+ },
4115
+ },
4116
+ },
4117
+ }
4118
+ return true
4119
+ },
4120
+ wantStatus : jobStatus {
4121
+ succeeded : 1 ,
4122
+ failed : 0 ,
4123
+ status : batchv1 .JobComplete ,
4124
+ },
4125
+ },
4126
+ "job failed event delayed" : {
4127
+ job : & batchv1.Job {
4128
+ Spec : batchv1.JobSpec {
4129
+ Template : v1.PodTemplateSpec {
4130
+ Spec : v1.PodSpec {
4131
+ Containers : []v1.Container {
4132
+ {
4133
+ Name : "main-container" ,
4134
+ Image : "foo" ,
4135
+ ImagePullPolicy : v1 .PullIfNotPresent ,
4136
+ TerminationMessagePolicy : v1 .TerminationMessageFallbackToLogsOnError ,
4137
+ },
4138
+ },
4139
+ },
4140
+ },
4141
+ BackoffLimit : ptr.To [int32 ](0 ),
4142
+ },
4143
+ },
4144
+ podUpdate : func (p * v1.Pod ) bool {
4145
+ p .Status = v1.PodStatus {
4146
+ Phase : v1 .PodFailed ,
4147
+ ContainerStatuses : []v1.ContainerStatus {
4148
+ {
4149
+ Name : "main-container" ,
4150
+ State : v1.ContainerState {
4151
+ Terminated : & v1.ContainerStateTerminated {
4152
+ ExitCode : 5 ,
4153
+ },
4154
+ },
4155
+ },
4156
+ },
4157
+ }
4158
+ return true
4159
+ },
4160
+ wantStatus : jobStatus {
4161
+ succeeded : 0 ,
4162
+ failed : 1 ,
4163
+ status : batchv1 .JobFailed ,
4164
+ },
4165
+ },
4166
+ }
4167
+
4168
+ for name , tc := range cases {
4169
+ tc := tc
4170
+ t .Run (name , func (t * testing.T ) {
4171
+ ctx , cancel := startJobControllerAndWaitForCaches (t , restConfig , transformOpt )
4172
+ t .Cleanup (cancel )
4173
+ resetMetrics ()
4174
+
4175
+ jobObj , err := createJobWithDefaults (ctx , clientSet , ns .Name , tc .job )
4176
+ if err != nil {
4177
+ t .Fatalf ("Failed to create Job: %v" , err )
4178
+ }
4179
+
4180
+ validateJobPodsStatus (ctx , t , clientSet , jobObj , podsByStatus {
4181
+ Active : 1 ,
4182
+ Ready : ptr.To [int32 ](0 ),
4183
+ Terminating : ptr.To [int32 ](0 ),
4184
+ })
4185
+
4186
+ if _ , err := updateJobPodsStatus (ctx , clientSet , jobObj , tc .podUpdate , 1 ); err != nil {
4187
+ t .Fatalf ("Error %q while updating pod status for Job: %v" , err , jobObj .Name )
4188
+ }
4189
+
4190
+ validateJobsPodsStatusOnly (ctx , t , clientSet , jobObj , podsByStatus {
4191
+ Failed : tc .wantStatus .failed ,
4192
+ Succeeded : tc .wantStatus .succeeded ,
4193
+ Ready : ptr.To [int32 ](0 ),
4194
+ Terminating : ptr.To [int32 ](0 ),
4195
+ })
4196
+
4197
+ validateJobCondition (ctx , t , clientSet , jobObj , tc .wantStatus .status )
4198
+
4199
+ jobPods , err := getJobPods (ctx , t , clientSet , jobObj , func (ps v1.PodStatus ) bool { return true })
4200
+ if err != nil {
4201
+ t .Fatalf ("Error %v getting the list of pods for job %q" , err , klog .KObj (jobObj ))
4202
+ }
4203
+ if len (jobPods ) != 1 {
4204
+ t .Errorf ("Found %d Pods for the job %q, want 1" , len (jobPods ), klog .KObj (jobObj ))
4205
+ }
4206
+ })
4207
+ }
4208
+
4209
+ }
4210
+
4070
4211
type podsByStatus struct {
4071
4212
Active int
4072
4213
Ready * int32
@@ -4488,9 +4629,9 @@ func setup(t testing.TB, nsBaseName string) (framework.TearDownFunc, *restclient
4488
4629
return closeFn , config , clientSet , ns
4489
4630
}
4490
4631
4491
- func startJobControllerAndWaitForCaches (tb testing.TB , restConfig * restclient.Config ) (context.Context , context.CancelFunc ) {
4632
+ func startJobControllerAndWaitForCaches (tb testing.TB , restConfig * restclient.Config , options ... informers. SharedInformerOption ) (context.Context , context.CancelFunc ) {
4492
4633
tb .Helper ()
4493
- informerSet := informers .NewSharedInformerFactory (clientset .NewForConfigOrDie (restclient .AddUserAgent (restConfig , "job-informers" )), 0 )
4634
+ informerSet := informers .NewSharedInformerFactoryWithOptions (clientset .NewForConfigOrDie (restclient .AddUserAgent (restConfig , "job-informers" )), 0 , options ... )
4494
4635
jc , ctx , cancel := createJobControllerWithSharedInformers (tb , restConfig , informerSet )
4495
4636
informerSet .Start (ctx .Done ())
4496
4637
go jc .Run (ctx , 1 )
0 commit comments