@@ -166,13 +166,13 @@ func (r *Reconciler) executeUpdatingStage(
166166 for i := 0 ; i < len (updatingStageStatus .Clusters ) && clusterUpdatingCount < maxConcurrency ; i ++ {
167167 clusterStatus := & updatingStageStatus .Clusters [i ]
168168 clusterUpdateSucceededCond := meta .FindStatusCondition (clusterStatus .Conditions , string (placementv1beta1 .ClusterUpdatingConditionSucceeded ))
169- if condition .IsConditionStatusTrueIgnoreGeneration (clusterUpdateSucceededCond ) {
169+ if condition .IsConditionStatusTrueIgnoreGeneration (clusterUpdateSucceededCond ) { // Ignoring generation here as once succeeded in a previous Run state, it won't change.
170170 // The cluster has been updated successfully.
171171 finishedClusterCount ++
172172 continue
173173 }
174174 clusterUpdatingCount ++
175- if condition .IsConditionStatusFalseIgnoreGeneration (clusterUpdateSucceededCond ) {
175+ if condition .IsConditionStatusFalseIgnoreGeneration (clusterUpdateSucceededCond ) { // Ignoring generation here as once failed, it won't change as the update run is aborted on failure.
176176 // The cluster is marked as failed to update, this cluster is counted as updating cluster since it's not finished to avoid processing more clusters than maxConcurrency in this round.
177177 failedErr := fmt .Errorf ("the cluster `%s` in the stage %s has failed" , clusterStatus .ClusterName , updatingStageStatus .StageName )
178178 klog .ErrorS (failedErr , "The cluster has failed to be updated" , "updateRun" , updateRunRef )
@@ -182,7 +182,7 @@ func (r *Reconciler) executeUpdatingStage(
182182 // The cluster needs to be processed.
183183 clusterStartedCond := meta .FindStatusCondition (clusterStatus .Conditions , string (placementv1beta1 .ClusterUpdatingConditionStarted ))
184184 binding := toBeUpdatedBindingsMap [clusterStatus .ClusterName ]
185- if ! condition .IsConditionStatusTrueIgnoreGeneration (clusterStartedCond ) {
185+ if ! condition .IsConditionStatusTrueIgnoreGeneration (clusterStartedCond ) { // Ignoring generation here as once started in a previous Run state, it won't change.
186186 // The cluster has not started updating yet.
187187 if ! isBindingSyncedWithClusterStatus (resourceSnapshotName , updateRun , binding , clusterStatus ) {
188188 klog .V (2 ).InfoS ("Found the first cluster that needs to be updated" , "cluster" , clusterStatus .ClusterName , "stage" , updatingStageStatus .StageName , "updateRun" , updateRunRef )
@@ -231,9 +231,15 @@ func (r *Reconciler) executeUpdatingStage(
231231 }
232232 }
233233 }
234- markClusterUpdatingStarted (clusterStatus , updateRun .GetGeneration ())
234+ // The cluster is starting to update. Only mark the cluster as started if not already marked or previously false.
235+ // Ignoring generation here as once started in a previous Run state, it won't change.
236+ if ! condition .IsConditionStatusTrueIgnoreGeneration (clusterStartedCond ) {
237+ markClusterUpdatingStarted (clusterStatus , updateRun .GetGeneration ())
238+ }
235239 stageUpdatingProgressCond := meta .FindStatusCondition (updatingStageStatus .Conditions , string (placementv1beta1 .StageUpdatingConditionProgressing ))
236- if finishedClusterCount == 0 && (! condition .IsConditionStatusTrueIgnoreGeneration (stageUpdatingProgressCond )) {
240+ // Mark the stage as started if not already marked or previously false (restarted after update run has stopped or waiting).
241+ // Ignoring generation here as the progressing generation is updated based on the state of the update run.
242+ if finishedClusterCount == 0 && ! condition .IsConditionStatusTrueIgnoreGeneration (stageUpdatingProgressCond ) {
237243 markStageUpdatingStarted (updatingStageStatus , updateRun .GetGeneration ())
238244 }
239245 // Need to continue as we need to process at most maxConcurrency number of clusters in parallel.
@@ -338,8 +344,10 @@ func (r *Reconciler) executeDeleteStage(
338344 for i := range existingDeleteStageStatus .Clusters {
339345 existingDeleteStageClusterMap [existingDeleteStageStatus .Clusters [i ].ClusterName ] = & existingDeleteStageStatus .Clusters [i ]
340346 }
341- // Mark the delete stage as started in case it's not.
342- markStageUpdatingStarted (updateRunStatus .DeletionStageStatus , updateRun .GetGeneration ())
347+ // Mark the delete stage as started in case it's not. Ignoring generation here as progressing generation is updated based on the state of the update run and could have started in previous Run state.
348+ if ! condition .IsConditionStatusTrueIgnoreGeneration (meta .FindStatusCondition (existingDeleteStageStatus .Conditions , string (placementv1beta1 .StageUpdatingConditionProgressing ))) {
349+ markStageUpdatingStarted (updateRunStatus .DeletionStageStatus , updateRun .GetGeneration ())
350+ }
343351 for _ , binding := range toBeDeletedBindings {
344352 bindingSpec := binding .GetBindingSpec ()
345353 curCluster , exist := existingDeleteStageClusterMap [bindingSpec .TargetCluster ]
@@ -352,11 +360,13 @@ func (r *Reconciler) executeDeleteStage(
352360 // In validation, we already check the binding must exist in the status.
353361 delete (existingDeleteStageClusterMap , bindingSpec .TargetCluster )
354362 // Make sure the cluster is not marked as deleted as the binding is still there.
363+ // Ignoring generation here, it won't change as the update run is aborted.
355364 if condition .IsConditionStatusTrueIgnoreGeneration (meta .FindStatusCondition (curCluster .Conditions , string (placementv1beta1 .ClusterUpdatingConditionSucceeded ))) {
356365 unexpectedErr := controller .NewUnexpectedBehaviorError (fmt .Errorf ("the deleted cluster `%s` in the deleting stage still has a binding" , bindingSpec .TargetCluster ))
357366 klog .ErrorS (unexpectedErr , "The cluster in the deleting stage is not removed yet but marked as deleted" , "cluster" , curCluster .ClusterName , "updateRun" , updateRunRef )
358367 return false , fmt .Errorf ("%w: %s" , errStagedUpdatedAborted , unexpectedErr .Error ())
359368 }
369+ // Ignoring generation here as once started in a previous Run state, it won't change.
360370 if condition .IsConditionStatusTrueIgnoreGeneration (meta .FindStatusCondition (curCluster .Conditions , string (placementv1beta1 .ClusterUpdatingConditionStarted ))) {
361371 // The cluster status is marked as being deleted.
362372 if binding .GetDeletionTimestamp ().IsZero () {
@@ -373,15 +383,24 @@ func (r *Reconciler) executeDeleteStage(
373383 return false , controller .NewAPIServerError (false , err )
374384 }
375385 klog .V (2 ).InfoS ("Deleted a binding pointing to a to be deleted cluster" , "binding" , klog .KObj (binding ), "cluster" , curCluster .ClusterName , "updateRun" , updateRunRef )
376- markClusterUpdatingStarted (curCluster , updateRun .GetGeneration ())
386+ // Mark the cluster as deleting. Only mark the cluster as started if not already marked or previously false.
387+ // Ignoring generation here as once started in a previous Run state, it won't change.
388+ if ! condition .IsConditionStatusTrueIgnoreGeneration (meta .FindStatusCondition (curCluster .Conditions , string (placementv1beta1 .ClusterUpdatingConditionStarted ))) {
389+ markClusterUpdatingStarted (curCluster , updateRun .GetGeneration ())
390+ }
377391 }
378392 // The rest of the clusters in the stage are not in the toBeDeletedBindings so it should be marked as delete succeeded.
379393 for _ , clusterStatus := range existingDeleteStageClusterMap {
380394 // Make sure the cluster is marked as deleted.
395+ // Ignoring generation here as once cluster started in a previous Run state, it won't change.
381396 if ! condition .IsConditionStatusTrueIgnoreGeneration (meta .FindStatusCondition (clusterStatus .Conditions , string (placementv1beta1 .ClusterUpdatingConditionStarted ))) {
382397 markClusterUpdatingStarted (clusterStatus , updateRun .GetGeneration ())
383398 }
384- markClusterUpdatingSucceeded (clusterStatus , updateRun .GetGeneration ())
399+ // Make sure the cluster is marked as delete succeeded.
400+ // Ignoring generation here as once cluster succeeded in a previous Run state, it won't change.
401+ if ! condition .IsConditionStatusTrueIgnoreGeneration (meta .FindStatusCondition (clusterStatus .Conditions , string (placementv1beta1 .ClusterUpdatingConditionSucceeded ))) {
402+ markClusterUpdatingSucceeded (clusterStatus , updateRun .GetGeneration ())
403+ }
385404 }
386405 klog .InfoS ("The delete stage is progressing" , "numberOfDeletingClusters" , len (toBeDeletedBindings ), "updateRun" , updateRunRef )
387406 if len (toBeDeletedBindings ) == 0 {
@@ -445,6 +464,7 @@ func (r *Reconciler) handleStageApprovalTask(
445464) (bool , error ) {
446465 updateRunRef := klog .KObj (updateRun )
447466
467+ // Ignoring generation for checking request approved conditions as they could be set in previous Run state.
448468 stageTaskApproved := condition .IsConditionStatusTrueIgnoreGeneration (meta .FindStatusCondition (stageTaskStatus .Conditions , string (placementv1beta1 .StageTaskConditionApprovalRequestApproved )))
449469 if stageTaskApproved {
450470 // The stageTask has been approved.
@@ -456,8 +476,7 @@ func (r *Reconciler) handleStageApprovalTask(
456476 requestRef := klog .KObj (approvalRequest )
457477 if err := r .Client .Create (ctx , approvalRequest ); err != nil {
458478 if apierrors .IsAlreadyExists (err ) {
459- // The approval task already exists.
460- markStageTaskRequestCreated (stageTaskStatus , updateRun .GetGeneration ())
479+ // The approval task already exists. No need to update request created condition again.
461480 if err = r .Client .Get (ctx , client .ObjectKeyFromObject (approvalRequest ), approvalRequest ); err != nil {
462481 klog .ErrorS (err , "Failed to get the already existing approval request" , "approvalRequest" , requestRef , "stage" , updatingStage .Name , "updateRun" , updateRunRef )
463482 return false , controller .NewAPIServerError (true , err )
@@ -488,14 +507,15 @@ func (r *Reconciler) handleStageApprovalTask(
488507 // Approved state should not change once the approval is accepted.
489508 klog .V (2 ).InfoS ("The approval request has been approval-accepted, ignoring changing back to unapproved" , "approvalRequestTask" , requestRef , "stage" , updatingStage .Name , "updateRun" , updateRunRef )
490509 }
510+ // Mark the stage task as approved as previously not approved. Will catch the very first check if approved in the beginning of this function.
491511 markStageTaskRequestApproved (stageTaskStatus , updateRun .GetGeneration ())
492512 } else {
493513 // retriable error
494514 klog .ErrorS (err , "Failed to create the approval request" , "approvalRequest" , requestRef , "stage" , updatingStage .Name , "updateRun" , updateRunRef )
495515 return false , controller .NewAPIServerError (false , err )
496516 }
497517 } else {
498- // The approval request has been created for the first time.
518+ // The approval request has been created for the first time. Mark the stage task as request created.
499519 klog .V (2 ).InfoS ("The approval request has been created" , "approvalRequestTask" , requestRef , "stage" , updatingStage .Name , "updateRun" , updateRunRef )
500520 markStageTaskRequestCreated (stageTaskStatus , updateRun .GetGeneration ())
501521 return false , nil
@@ -677,6 +697,7 @@ func markUpdateRunProgressing(updateRun placementv1beta1.UpdateRunObj) {
677697func markUpdateRunProgressingIfNotWaitingOrStuck (updateRun placementv1beta1.UpdateRunObj ) {
678698 updateRunStatus := updateRun .GetUpdateRunStatus ()
679699 progressingCond := meta .FindStatusCondition (updateRunStatus .Conditions , string (placementv1beta1 .StagedUpdateRunConditionProgressing ))
700+ // Not ignoring generation here as we want to check the latest condition while executing.
680701 if condition .IsConditionStatusFalse (progressingCond , updateRun .GetGeneration ()) &&
681702 (progressingCond .Reason == condition .UpdateRunWaitingReason || progressingCond .Reason == condition .UpdateRunStuckReason ) {
682703 // The updateRun is waiting or stuck, no need to mark it as started.
0 commit comments