@@ -811,18 +811,16 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
811
811
if err != nil {
812
812
return err
813
813
}
814
- var terminating * int32
815
- if feature .DefaultFeatureGate .Enabled (features .JobPodReplacementPolicy ) {
816
- terminating = ptr .To (controller .CountTerminatingPods (pods ))
817
- }
818
814
jobCtx := & syncJobCtx {
819
815
job : & job ,
820
816
pods : pods ,
821
817
activePods : controller .FilterActivePods (logger , pods ),
822
- terminating : terminating ,
823
818
uncounted : newUncountedTerminatedPods (* job .Status .UncountedTerminatedPods ),
824
819
expectedRmFinalizers : jm .finalizerExpectations .getExpectedUIDs (key ),
825
820
}
821
+ if trackTerminatingPods (& job ) {
822
+ jobCtx .terminating = ptr .To (controller .CountTerminatingPods (pods ))
823
+ }
826
824
active := int32 (len (jobCtx .activePods ))
827
825
newSucceededPods , newFailedPods := getNewFinishedPods (jobCtx )
828
826
jobCtx .succeeded = job .Status .Succeeded + int32 (len (newSucceededPods )) + int32 (len (jobCtx .uncounted .succeeded ))
@@ -896,7 +894,7 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
896
894
jobCtx .finishedCondition = nil
897
895
}
898
896
active -= deleted
899
- if feature . DefaultFeatureGate . Enabled ( features . JobPodReplacementPolicy ) {
897
+ if trackTerminatingPods ( jobCtx . job ) {
900
898
* jobCtx .terminating += deleted
901
899
}
902
900
manageJobErr = err
@@ -956,11 +954,15 @@ func (jm *Controller) syncJob(ctx context.Context, key string) (rErr error) {
956
954
}
957
955
}
958
956
957
+ var terminating * int32
958
+ if feature .DefaultFeatureGate .Enabled (features .JobPodReplacementPolicy ) {
959
+ terminating = jobCtx .terminating
960
+ }
959
961
needsStatusUpdate := suspendCondChanged || active != job .Status .Active || ! ptr .Equal (ready , job .Status .Ready )
960
- needsStatusUpdate = needsStatusUpdate || ! ptr .Equal (job .Status .Terminating , jobCtx . terminating )
962
+ needsStatusUpdate = needsStatusUpdate || ! ptr .Equal (job .Status .Terminating , terminating )
961
963
job .Status .Active = active
962
964
job .Status .Ready = ready
963
- job .Status .Terminating = jobCtx . terminating
965
+ job .Status .Terminating = terminating
964
966
err = jm .trackJobStatusAndRemoveFinalizers (ctx , jobCtx , needsStatusUpdate )
965
967
if err != nil {
966
968
return fmt .Errorf ("tracking status: %w" , err )
@@ -1504,23 +1506,12 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
1504
1506
jm .expectations .ExpectDeletions (logger , jobKey , len (podsToDelete ))
1505
1507
removed , err := jm .deleteJobPods (ctx , job , jobKey , podsToDelete )
1506
1508
active -= removed
1507
- if feature . DefaultFeatureGate . Enabled ( features . JobPodReplacementPolicy ) {
1509
+ if trackTerminatingPods ( job ) {
1508
1510
* jobCtx .terminating += removed
1509
1511
}
1510
1512
return active , metrics .JobSyncActionPodsDeleted , err
1511
1513
}
1512
1514
1513
- var terminating int32 = 0
1514
- if onlyReplaceFailedPods (jobCtx .job ) {
1515
- // For PodFailurePolicy specified but PodReplacementPolicy disabled
1516
- // we still need to count terminating pods for replica counts
1517
- // But we will not allow updates to status.
1518
- if jobCtx .terminating == nil {
1519
- terminating = controller .CountTerminatingPods (jobCtx .pods )
1520
- } else {
1521
- terminating = * jobCtx .terminating
1522
- }
1523
- }
1524
1515
wantActive := int32 (0 )
1525
1516
if job .Spec .Completions == nil {
1526
1517
// Job does not specify a number of completions. Therefore, number active
@@ -1556,7 +1547,7 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
1556
1547
logger .V (4 ).Info ("Too many pods running for job" , "job" , klog .KObj (job ), "deleted" , len (podsToDelete ), "target" , wantActive )
1557
1548
removed , err := jm .deleteJobPods (ctx , job , jobKey , podsToDelete )
1558
1549
active -= removed
1559
- if feature . DefaultFeatureGate . Enabled ( features . JobPodReplacementPolicy ) {
1550
+ if trackTerminatingPods ( job ) {
1560
1551
* jobCtx .terminating += removed
1561
1552
}
1562
1553
// While it is possible for a Job to require both pod creations and
@@ -1566,6 +1557,12 @@ func (jm *Controller) manageJob(ctx context.Context, job *batch.Job, jobCtx *syn
1566
1557
return active , metrics .JobSyncActionPodsDeleted , err
1567
1558
}
1568
1559
1560
+ var terminating int32 = 0
1561
+ if onlyReplaceFailedPods (jobCtx .job ) {
1562
+ // When onlyReplaceFailedPods=true, then also trackTerminatingPods=true,
1563
+ // and so we can use the value.
1564
+ terminating = * jobCtx .terminating
1565
+ }
1569
1566
if diff := wantActive - terminating - active ; diff > 0 {
1570
1567
var remainingTime time.Duration
1571
1568
if ! hasBackoffLimitPerIndex (job ) {
@@ -1951,6 +1948,17 @@ func countReadyPods(pods []*v1.Pod) int32 {
1951
1948
return cnt
1952
1949
}
1953
1950
1951
+ // trackTerminatingPods checks if the count of terminating pods is tracked.
1952
+ // They are tracked when any the following is true:
1953
+ // - JobPodReplacementPolicy is enabled to be returned in the status field,
1954
+ // - only failed pods are replaced, because pod failure policy is used
1955
+ func trackTerminatingPods (job * batch.Job ) bool {
1956
+ if feature .DefaultFeatureGate .Enabled (features .JobPodReplacementPolicy ) {
1957
+ return true
1958
+ }
1959
+ return feature .DefaultFeatureGate .Enabled (features .JobPodFailurePolicy ) && job .Spec .PodFailurePolicy != nil
1960
+ }
1961
+
1954
1962
// This checks if we should apply PodReplacementPolicy.
1955
1963
// PodReplacementPolicy controls when we recreate pods if they are marked as terminating
1956
1964
// Failed means that we recreate only once the pod has terminated.
0 commit comments