@@ -174,7 +174,8 @@ func (r *ReconcileOperationJob) operateTargets(
174174 if len (candidates ) == 0 {
175175 return nil
176176 }
177- return operator .OperateTargets (ctx , candidates , operationJob )
177+ errMap := operator .OperateTargets (ctx , candidates , operationJob )
178+ return ctrlutils .AggregateErrors (ojutils .ConvertErrMapToList (errMap ))
178179}
179180
180181func (r * ReconcileOperationJob ) getTargetsOpsStatus (
@@ -244,11 +245,10 @@ func (r *ReconcileOperationJob) getTargetsOpsStatus(
244245// ensureActiveDeadlineAndTTL calculate time to ActiveDeadlineSeconds and TTLSecondsAfterFinished and release targets
245246func (r * ReconcileOperationJob ) ensureActiveDeadlineAndTTL (ctx context.Context , operationJob * appsv1alpha1.OperationJob , candidates []* OpsCandidate , logger logr.Logger ) (bool , * time.Duration , error ) {
246247 if operationJob .Spec .ActiveDeadlineSeconds != nil {
247- var allowReleaseCandidates []* OpsCandidate
248248 for i := range candidates {
249249 candidate := candidates [i ]
250- // just skip if target operation already finished, or not started
251- if IsCandidateOpsFinished ( candidate ) || candidate .OpsStatus .StartTime == nil {
250+ // just skip if target not started
251+ if candidate .OpsStatus .StartTime == nil {
252252 continue
253253 }
254254 leftTime := time .Duration (* operationJob .Spec .ActiveDeadlineSeconds )* time .Second - time .Since (candidate .OpsStatus .StartTime .Time )
@@ -257,17 +257,10 @@ func (r *ReconcileOperationJob) ensureActiveDeadlineAndTTL(ctx context.Context,
257257 } else {
258258 logger .Info ("should end but still processing" )
259259 r .Recorder .Eventf (operationJob , corev1 .EventTypeNormal , "Timeout" , "Try to fail OperationJob for timeout..." )
260- // mark operationjob and targets failed and release targets
260+ // mark target failed if timeout
261261 MarkCandidateFailed (candidate )
262- allowReleaseCandidates = append (allowReleaseCandidates , candidate )
263262 }
264263 }
265- if len (allowReleaseCandidates ) > 0 {
266- releaseErr := r .releaseTargets (ctx , operationJob , allowReleaseCandidates , false )
267- operationJob .Status = r .calculateStatus (operationJob , candidates )
268- updateErr := r .updateStatus (ctx , operationJob )
269- return false , nil , controllerutils .AggregateErrors ([]error {releaseErr , updateErr })
270- }
271264 }
272265
273266 if operationJob .Spec .TTLSecondsAfterFinished != nil {
@@ -286,26 +279,55 @@ func (r *ReconcileOperationJob) ensureActiveDeadlineAndTTL(ctx context.Context,
286279 return false , nil , nil
287280}
288281
282+ // ensureFailedTargetsReleased select failed but unreleased targets and call releaseTargets
283+ func (r * ReconcileOperationJob ) ensureFailedTargetsReleased (ctx context.Context , operationJob * appsv1alpha1.OperationJob , candidates []* OpsCandidate ) error {
284+ var allowReleaseCandidates []* OpsCandidate
285+ for i := range candidates {
286+ if IsCandidateOpsFailed (candidates [i ]) && ! IsCandidateOpsReleased (candidates [i ]) {
287+ allowReleaseCandidates = append (allowReleaseCandidates , candidates [i ])
288+ }
289+ }
290+ if len (allowReleaseCandidates ) > 0 {
291+ releaseErr := r .releaseTargets (ctx , operationJob , allowReleaseCandidates , false )
292+ operationJob .Status = r .calculateStatus (operationJob , candidates )
293+ updateErr := r .updateStatus (ctx , operationJob )
294+ return controllerutils .AggregateErrors ([]error {releaseErr , updateErr })
295+ }
296+ return nil
297+ }
298+
289299// releaseTargets try to release the targets from operation when the operationJob is deleted
290300func (r * ReconcileOperationJob ) releaseTargets (ctx context.Context , operationJob * appsv1alpha1.OperationJob , candidates []* OpsCandidate , needUpdateStatus bool ) error {
291301 actionHandler , enablePodOpsLifecycle , err := r .getActionHandler (operationJob )
292302 if err != nil {
293303 return err
294304 }
295- releaseErr := actionHandler .ReleaseTargets (ctx , candidates , operationJob )
305+
306+ // start to release targets
307+ releaseErrMap := actionHandler .ReleaseTargets (ctx , candidates , operationJob )
296308 _ , _ = controllerutils .SlowStartBatch (len (candidates ), controllerutils .SlowStartInitialBatchSize , false , func (i int , _ error ) error {
297309 candidate := candidates [i ]
298310 // cancel lifecycle if necessary
299311 if enablePodOpsLifecycle {
300312 err = r .cleanCandidateOpsLifecycle (ctx , true , candidate , operationJob )
301- releaseErr = controllerutils .AggregateErrors ([]error {releaseErr , err })
313+ releaseErrMap [ candidate . PodName ] = controllerutils .AggregateErrors ([]error {releaseErrMap [ candidate . PodName ] , err })
302314 }
303315 // mark candidate as failed if not finished
304316 if ! IsCandidateOpsFinished (candidate ) {
305317 candidate .OpsStatus .Progress = appsv1alpha1 .OperationProgressFailed
306318 }
307319 return nil
308320 })
321+
322+ // mark target as released if error not occurred
323+ for _ , candidate := range candidates {
324+ if releaseErrMap [candidate .PodName ] == nil {
325+ MarkCandidateReleased (candidate )
326+ }
327+ }
328+ releaseErr := ctrlutils .AggregateErrors (ojutils .ConvertErrMapToList (releaseErrMap ))
329+
330+ // update candidates status to job status
309331 if ! needUpdateStatus {
310332 return releaseErr
311333 }
0 commit comments