@@ -811,18 +811,16 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
811
811
if err != nil {
812
812
return err
813
813
}
814
- var terminating * int32
815
- if feature .DefaultFeatureGate .Enabled (features .JobPodReplacementPolicy ) {
816
- terminating = ptr .To (controller .CountTerminatingPods (pods ))
817
- }
818
814
jobCtx := & syncJobCtx {
819
815
job : & job ,
820
816
pods : pods ,
821
817
activePods : controller .FilterActivePods (logger , pods ),
822
- terminating : terminating ,
823
818
uncounted : newUncountedTerminatedPods (* job .Status .UncountedTerminatedPods ),
824
819
expectedRmFinalizers : jm .finalizerExpectations .getExpectedUIDs (key ),
825
820
}
821
+ if trackTerminatingPods (& job ) {
822
+ jobCtx .terminating = ptr .To (controller .CountTerminatingPods (pods ))
823
+ }
826
824
active := int32 (len (jobCtx .activePods ))
827
825
newSucceededPods , newFailedPods := getNewFinishedPods (jobCtx )
828
826
jobCtx .succeeded = job .Status .Succeeded + int32 (len (newSucceededPods )) + int32 (len (jobCtx .uncounted .succeeded ))
@@ -896,7 +894,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
896
894
jobCtx .finishedCondition = nil
897
895
}
898
896
active -= deleted
899
- if feature . DefaultFeatureGate . Enabled ( features . JobPodReplacementPolicy ) {
897
+ if trackTerminatingPods ( jobCtx . job ) {
900
898
* jobCtx .terminating += deleted
901
899
}
902
900
manageJobErr = err
@@ -956,11 +954,15 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
956
954
}
957
955
}
958
956
957
+ var terminating * int32
958
+ if feature .DefaultFeatureGate .Enabled (features .JobPodReplacementPolicy ) {
959
+ terminating = jobCtx .terminating
960
+ }
959
961
needsStatusUpdate := suspendCondChanged || active != job .Status .Active || ! ptr .Equal (ready , job .Status .Ready )
960
- needsStatusUpdate = needsStatusUpdate || ! ptr .Equal (job .Status .Terminating , jobCtx . terminating )
962
+ needsStatusUpdate = needsStatusUpdate || ! ptr .Equal (job .Status .Terminating , terminating )
961
963
job .Status .Active = active
962
964
job .Status .Ready = ready
963
- job .Status .Terminating = jobCtx . terminating
965
+ job .Status .Terminating = terminating
964
966
err = jm .trackJobStatusAndRemoveFinalizers (ctx , jobCtx , needsStatusUpdate )
965
967
if err != nil {
966
968
return fmt .Errorf ("tracking status: %w" , err )
@@ -1507,23 +1509,12 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
1507
1509
jm .expectations .ExpectDeletions (logger , jobKey , len (podsToDelete ))
1508
1510
removed , err := jm .deleteJobPods (ctx , job , jobKey , podsToDelete )
1509
1511
active -= removed
1510
- if feature . DefaultFeatureGate . Enabled ( features . JobPodReplacementPolicy ) {
1512
+ if trackTerminatingPods ( job ) {
1511
1513
* jobCtx .terminating += removed
1512
1514
}
1513
1515
return active , metrics .JobSyncActionPodsDeleted , err
1514
1516
}
1515
1517
1516
- var terminating int32 = 0
1517
- if onlyReplaceFailedPods (jobCtx .job ) {
1518
- // For PodFailurePolicy specified but PodReplacementPolicy disabled
1519
- // we still need to count terminating pods for replica counts
1520
- // But we will not allow updates to status.
1521
- if jobCtx .terminating == nil {
1522
- terminating = controller .CountTerminatingPods (jobCtx .pods )
1523
- } else {
1524
- terminating = * jobCtx .terminating
1525
- }
1526
- }
1527
1518
wantActive := int32 (0 )
1528
1519
if job .Spec .Completions == nil {
1529
1520
// Job does not specify a number of completions. Therefore, number active
@@ -1559,7 +1550,7 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
1559
1550
logger .V (4 ).Info ("Too many pods running for job" , "job" , klog .KObj (job ), "deleted" , len (podsToDelete ), "target" , wantActive )
1560
1551
removed , err := jm .deleteJobPods (ctx , job , jobKey , podsToDelete )
1561
1552
active -= removed
1562
- if feature . DefaultFeatureGate . Enabled ( features . JobPodReplacementPolicy ) {
1553
+ if trackTerminatingPods ( job ) {
1563
1554
* jobCtx .terminating += removed
1564
1555
}
1565
1556
// While it is possible for a Job to require both pod creations and
@@ -1569,6 +1560,12 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
1569
1560
return active , metrics .JobSyncActionPodsDeleted , err
1570
1561
}
1571
1562
1563
+ var terminating int32 = 0
1564
+ if onlyReplaceFailedPods (jobCtx .job ) {
1565
+ // When onlyReplaceFailedPods=true, then also trackTerminatingPods=true,
1566
+ // and so we can use the value.
1567
+ terminating = * jobCtx .terminating
1568
+ }
1572
1569
if diff := wantActive - terminating - active ; diff > 0 {
1573
1570
var remainingTime time.Duration
1574
1571
if ! hasBackoffLimitPerIndex (job ) {
@@ -1954,6 +1951,17 @@ func countReadyPods(pods []*v1.Pod) int32 {
1954
1951
return cnt
1955
1952
}
1956
1953
1954
+ // trackTerminatingPods checks if the count of terminating pods is tracked.
1955
+ // They are tracked when any the following is true:
1956
+ // - JobPodReplacementPolicy is enabled to be returned in the status field,
1957
+ // - only failed pods are replaced, because pod failure policy is used
1958
+ func trackTerminatingPods (job * batch.Job ) bool {
1959
+ if feature .DefaultFeatureGate .Enabled (features .JobPodReplacementPolicy ) {
1960
+ return true
1961
+ }
1962
+ return feature .DefaultFeatureGate .Enabled (features .JobPodFailurePolicy ) && job .Spec .PodFailurePolicy != nil
1963
+ }
1964
+
1957
1965
// This checks if we should apply PodReplacementPolicy.
1958
1966
// PodReplacementPolicy controls when we recreate pods if they are marked as terminating
1959
1967
// Failed means that we recreate only once the pod has terminated.
0 commit comments