@@ -902,20 +902,31 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
902
902
903
903
var manageJobErr error
904
904
905
+ // This is the starting point for evaluating the end state of the Job.
906
+ // Note that we need to order evaluations since a Job could satisfy multiple criteria at the same time in some cases:
907
+ // 1. Evaluate the pre-existing SuccessCriteriaMet and FailureTarget to respect the previous reconcile results, then transform FailureTarget to Failed.
908
+ // 2. Evaluate failure scenarios.
909
+ // 3. Evaluate success scenarios.
910
+ // 4. Evaluate jobCtx.finishedCondition (see trackJobStatusAndRemoveFinalizers), then transform FailureTarget to Failed and SuccessCriteriaMet to Complete once the job is finished.
911
+
905
912
exceedsBackoffLimit := jobCtx .failed > * job .Spec .BackoffLimit
913
+ // Evaluate the pre-existing SuccessCriteriaMet.
906
914
jobCtx .finishedCondition = hasSuccessCriteriaMetCondition (& job )
907
915
908
916
// Given that the Job already has the SuccessCriteriaMet condition, the termination condition already had confirmed in another cycle.
909
917
// So, the job-controller evaluates the podFailurePolicy only when the Job doesn't have the SuccessCriteriaMet condition.
910
918
if jobCtx .finishedCondition == nil {
919
+ // Evaluate the pre-existing FailureTarget.
911
920
failureTargetCondition := findConditionByType (job .Status .Conditions , batch .JobFailureTarget )
912
921
if failureTargetCondition != nil && failureTargetCondition .Status == v1 .ConditionTrue {
913
922
jobCtx .finishedCondition = newFailedConditionForFailureTarget (failureTargetCondition , jm .clock .Now ())
923
+ // Evaluate failure scenarios for PodFailurePolicy.
914
924
} else if failJobMessage := getFailJobMessage (& job , pods ); failJobMessage != nil {
915
925
// Prepare the interim FailureTarget condition to record the failure message before the finalizers (allowing removal of the pods) are removed.
916
926
jobCtx .finishedCondition = newCondition (batch .JobFailureTarget , v1 .ConditionTrue , batch .JobReasonPodFailurePolicy , * failJobMessage , jm .clock .Now ())
917
927
}
918
928
}
929
+ // Evaluate failure scenarios for BackoffLimit and ActiveDeadlineSeconds.
919
930
if jobCtx .finishedCondition == nil {
920
931
if exceedsBackoffLimit || pastBackoffLimitOnFailure (& job , pods ) {
921
932
// check if the number of pod restart exceeds backoff (for restart OnFailure only)
@@ -933,6 +944,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
933
944
if isIndexedJob (& job ) {
934
945
jobCtx .prevSucceededIndexes , jobCtx .succeededIndexes = calculateSucceededIndexes (logger , & job , pods )
935
946
jobCtx .succeeded = int32 (jobCtx .succeededIndexes .total ())
947
+ // Evaluate failure scenarios for BackoffLimitPerIndex.
936
948
if hasBackoffLimitPerIndex (& job ) {
937
949
jobCtx .failedIndexes = calculateFailedIndexes (logger , & job , pods )
938
950
if jobCtx .finishedCondition == nil {
@@ -944,6 +956,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
944
956
}
945
957
jobCtx .podsWithDelayedDeletionPerIndex = getPodsWithDelayedDeletionPerIndex (logger , jobCtx )
946
958
}
959
+ // Evaluate success scenarios for SuccessPolicy.
947
960
if jobCtx .finishedCondition == nil {
948
961
if msg , met := matchSuccessPolicy (logger , job .Spec .SuccessPolicy , * job .Spec .Completions , jobCtx .succeededIndexes ); met {
949
962
jobCtx .finishedCondition = newCondition (batch .JobSuccessCriteriaMet , v1 .ConditionTrue , batch .JobReasonSuccessPolicy , msg , jm .clock .Now ())
@@ -971,6 +984,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
971
984
active , action , manageJobErr = jm .manageJob (ctx , & job , jobCtx )
972
985
manageJobCalled = true
973
986
}
987
+ // Evaluate success scenarios for Completions.
974
988
complete := false
975
989
if job .Spec .Completions == nil {
976
990
// This type of job is complete when any pod exits with success.
@@ -1253,6 +1267,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
1253
1267
needsFlush = true
1254
1268
}
1255
1269
}
1270
+ // Evaluate jobCtx.finishedCondition and transform FailureTarget to Failed.
1256
1271
if jobCtx .finishedCondition != nil && jobCtx .finishedCondition .Type == batch .JobFailureTarget {
1257
1272
1258
1273
// Append the interim FailureTarget condition to update the job status with before finalizers are removed.
@@ -1263,6 +1278,7 @@ func (jm *Controller) trackJobStatusAndRemoveFinalizers(ctx context.Context, job
1263
1278
// It is also used in the enactJobFinished function for reporting.
1264
1279
jobCtx .finishedCondition = newFailedConditionForFailureTarget (jobCtx .finishedCondition , jm .clock .Now ())
1265
1280
}
1281
+ // Evaluate jobCtx.finishedCondition and transform SuccessCriteriaMet to Complete.
1266
1282
if isSuccessCriteriaMetCondition (jobCtx .finishedCondition ) {
1267
1283
// Append the interim SuccessCriteriaMet condition to update the job status with before finalizers are removed.
1268
1284
if hasSuccessCriteriaMetCondition (jobCtx .job ) == nil {
0 commit comments