Skip to content

Commit d10bcca

Browse files
committed
implement stagedUpdateRun execution
1 parent cb0c5f7 commit d10bcca

File tree

9 files changed

+1482
-90
lines changed

9 files changed

+1482
-90
lines changed

pkg/controllers/updaterun/controller.go

Lines changed: 72 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"time"
1414

1515
"k8s.io/apimachinery/pkg/api/meta"
16+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1617
"k8s.io/apimachinery/pkg/types"
1718
"k8s.io/client-go/tools/record"
1819
"k8s.io/client-go/util/workqueue"
@@ -40,10 +41,6 @@ var (
4041
// errInitializedFailed is the error when the ClusterStagedUpdateRun fails to initialize.
4142
// It is a wrapped error of errStagedUpdatedAborted, because some initialization functions are reused in the validation step.
4243
errInitializedFailed = fmt.Errorf("%w: failed to initialize the clusterStagedUpdateRun", errStagedUpdatedAborted)
43-
44-
// stageUpdatingWaitTime is the time to wait before rechecking the stage update status.
45-
// Put it as a variable for convenient testing.
46-
stageUpdatingWaitTime = 60 * time.Second
4744
)
4845

4946
// Reconciler reconciles a ClusterStagedUpdateRun object.
@@ -127,10 +124,35 @@ func (r *Reconciler) Reconcile(ctx context.Context, req runtime.Request) (runtim
127124
klog.V(2).InfoS("The clusterStagedUpdateRun is validated", "clusterStagedUpdateRun", runObjRef)
128125
}
129126

130-
// TODO(wantjian): execute the clusterStagedUpdateRun and fix the requeue time.
131-
klog.V(2).InfoS("Executing the clusterStagedUpdateRun", "clusterStagedUpdateRun", runObjRef, "updatingStageIndex", updatingStageIndex,
132-
"toBeUpdatedBindings count", len(toBeUpdatedBindings), "toBeDeletedBindings count", len(toBeDeletedBindings))
133-
return runtime.Result{RequeueAfter: stageUpdatingWaitTime}, nil
127+
// The previous run is completed but the update to the status failed.
128+
if updatingStageIndex == -1 {
129+
klog.V(2).InfoS("The clusterStagedUpdateRun is completed", "clusterStagedUpdateRun", runObjRef)
130+
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, &updateRun)
131+
}
132+
133+
// Execute the updateRun.
134+
klog.V(2).InfoS("Continue to execute the clusterStagedUpdateRun", "updatingStageIndex", updatingStageIndex, "clusterStagedUpdateRun", runObjRef)
135+
finished, waitTime, execErr := r.execute(ctx, &updateRun, updatingStageIndex, toBeUpdatedBindings, toBeDeletedBindings)
136+
if execErr != nil && errors.Is(execErr, errStagedUpdatedAborted) {
137+
// errStagedUpdatedAborted cannot be retried.
138+
return runtime.Result{}, r.recordUpdateRunFailed(ctx, &updateRun, execErr.Error())
139+
}
140+
141+
if finished {
142+
klog.V(2).InfoS("The clusterStagedUpdateRun is completed", "clusterStagedUpdateRun", runObjRef)
143+
return runtime.Result{}, r.recordUpdateRunSucceeded(ctx, &updateRun)
144+
}
145+
146+
// The execution is not finished yet or it encounters a retriable error.
147+
// We need to record the status and requeue.
148+
if updateErr := r.recordUpdateRunStatus(ctx, &updateRun); updateErr != nil {
149+
return runtime.Result{}, updateErr
150+
}
151+
klog.V(2).InfoS("The clusterStagedUpdateRun is not finished yet", "requeueWaitTime", waitTime, "execErr", execErr, "clusterStagedUpdateRun", runObjRef)
152+
if execErr != nil {
153+
return runtime.Result{}, execErr
154+
}
155+
return runtime.Result{Requeue: true, RequeueAfter: waitTime}, nil
134156
}
135157

136158
// handleDelete handles the deletion of the clusterStagedUpdateRun object.
@@ -162,6 +184,48 @@ func (r *Reconciler) ensureFinalizer(ctx context.Context, updateRun *placementv1
162184
return r.Update(ctx, updateRun, client.FieldOwner(utils.UpdateRunControllerFieldManagerName))
163185
}
164186

187+
// recordUpdateRunSucceeded records the succeeded condition in the ClusterStagedUpdateRun status.
188+
func (r *Reconciler) recordUpdateRunSucceeded(ctx context.Context, updateRun *placementv1alpha1.ClusterStagedUpdateRun) error {
189+
meta.SetStatusCondition(&updateRun.Status.Conditions, metav1.Condition{
190+
Type: string(placementv1alpha1.StagedUpdateRunConditionSucceeded),
191+
Status: metav1.ConditionTrue,
192+
ObservedGeneration: updateRun.Generation,
193+
Reason: condition.UpdateRunSucceededReason,
194+
})
195+
if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {
196+
klog.ErrorS(updateErr, "Failed to update the ClusterStagedUpdateRun status as succeeded", "clusterStagedUpdateRun", klog.KObj(updateRun))
197+
// updateErr can be retried.
198+
return controller.NewUpdateIgnoreConflictError(updateErr)
199+
}
200+
return nil
201+
}
202+
203+
// recordUpdateRunFailed records the failed condition in the ClusterStagedUpdateRun status.
204+
func (r *Reconciler) recordUpdateRunFailed(ctx context.Context, updateRun *placementv1alpha1.ClusterStagedUpdateRun, message string) error {
205+
meta.SetStatusCondition(&updateRun.Status.Conditions, metav1.Condition{
206+
Type: string(placementv1alpha1.StagedUpdateRunConditionSucceeded),
207+
Status: metav1.ConditionFalse,
208+
ObservedGeneration: updateRun.Generation,
209+
Reason: condition.UpdateRunFailedReason,
210+
Message: message,
211+
})
212+
if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {
213+
klog.ErrorS(updateErr, "Failed to update the ClusterStagedUpdateRun status as failed", "clusterStagedUpdateRun", klog.KObj(updateRun))
214+
// updateErr can be retried.
215+
return controller.NewUpdateIgnoreConflictError(updateErr)
216+
}
217+
return nil
218+
}
219+
220+
// recordUpdateRunStatus records the ClusterStagedUpdateRun status.
221+
func (r *Reconciler) recordUpdateRunStatus(ctx context.Context, updateRun *placementv1alpha1.ClusterStagedUpdateRun) error {
222+
if updateErr := r.Client.Status().Update(ctx, updateRun); updateErr != nil {
223+
klog.ErrorS(updateErr, "Failed to update the ClusterStagedUpdateRun status", "clusterStagedUpdateRun", klog.KObj(updateRun))
224+
return controller.NewUpdateIgnoreConflictError(updateErr)
225+
}
226+
return nil
227+
}
228+
165229
// SetupWithManager sets up the controller with the Manager.
166230
func (r *Reconciler) SetupWithManager(mgr runtime.Manager) error {
167231
r.recorder = mgr.GetEventRecorderFor("clusterresource-stagedupdaterun-controller")

pkg/controllers/updaterun/controller_integration_test.go

Lines changed: 34 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2222
"k8s.io/apimachinery/pkg/runtime"
2323
"k8s.io/apimachinery/pkg/types"
24+
"sigs.k8s.io/controller-runtime/pkg/client"
2425
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
2526

2627
clusterv1beta1 "go.goms.io/fleet/apis/cluster/v1beta1"
@@ -327,7 +328,7 @@ func generateTestClusterStagedUpdateStrategy() *placementv1alpha1.ClusterStagedU
327328
{
328329
Type: placementv1alpha1.AfterStageTaskTypeTimedWait,
329330
WaitTime: metav1.Duration{
330-
Duration: time.Minute * 10,
331+
Duration: time.Second * 4,
331332
},
332333
},
333334
},
@@ -469,7 +470,7 @@ func validateApprovalRequestCount(ctx context.Context, count int) {
469470
}, timeout, interval).Should(Equal(count), "approval requests count mismatch")
470471
}
471472

472-
func generateTrueCondition(updateRun *placementv1alpha1.ClusterStagedUpdateRun, condType any) metav1.Condition {
473+
func generateTrueCondition(obj client.Object, condType any) metav1.Condition {
473474
reason, typeStr := "", ""
474475
switch cond := condType.(type) {
475476
case placementv1alpha1.StagedUpdateRunConditionType:
@@ -498,16 +499,38 @@ func generateTrueCondition(updateRun *placementv1alpha1.ClusterStagedUpdateRun,
498499
reason = condition.ClusterUpdatingSucceededReason
499500
}
500501
typeStr = string(cond)
502+
case placementv1alpha1.AfterStageTaskConditionType:
503+
switch cond {
504+
case placementv1alpha1.AfterStageTaskConditionWaitTimeElapsed:
505+
reason = condition.AfterStageTaskWaitTimeElapsedReason
506+
case placementv1alpha1.AfterStageTaskConditionApprovalRequestCreated:
507+
reason = condition.AfterStageTaskApprovalRequestCreatedReason
508+
case placementv1alpha1.AfterStageTaskConditionApprovalRequestApproved:
509+
reason = condition.AfterStageTaskApprovalRequestApprovedReason
510+
}
511+
typeStr = string(cond)
512+
case placementv1alpha1.ApprovalRequestConditionType:
513+
switch cond {
514+
case placementv1alpha1.ApprovalRequestConditionApproved:
515+
reason = "LGTM"
516+
}
517+
typeStr = string(cond)
518+
case placementv1beta1.ResourceBindingConditionType:
519+
switch cond {
520+
case placementv1beta1.ResourceBindingAvailable:
521+
reason = condition.AvailableReason
522+
}
523+
typeStr = string(cond)
501524
}
502525
return metav1.Condition{
503526
Status: metav1.ConditionTrue,
504527
Type: typeStr,
505-
ObservedGeneration: updateRun.Generation,
528+
ObservedGeneration: obj.GetGeneration(),
506529
Reason: reason,
507530
}
508531
}
509532

510-
func generateFalseCondition(updateRun *placementv1alpha1.ClusterStagedUpdateRun, condType any) metav1.Condition {
533+
func generateFalseCondition(obj client.Object, condType any) metav1.Condition {
511534
reason, typeStr := "", ""
512535
switch cond := condType.(type) {
513536
case placementv1alpha1.StagedUpdateRunConditionType:
@@ -530,11 +553,17 @@ func generateFalseCondition(updateRun *placementv1alpha1.ClusterStagedUpdateRun,
530553
reason = condition.ClusterUpdatingFailedReason
531554
}
532555
typeStr = string(cond)
556+
case placementv1beta1.ResourceBindingConditionType:
557+
switch cond {
558+
case placementv1beta1.ResourceBindingApplied:
559+
reason = condition.ApplyFailedReason
560+
}
561+
typeStr = string(cond)
533562
}
534563
return metav1.Condition{
535564
Status: metav1.ConditionFalse,
536565
Type: typeStr,
537-
ObservedGeneration: updateRun.Generation,
566+
ObservedGeneration: obj.GetGeneration(),
538567
Reason: reason,
539568
}
540569
}

0 commit comments

Comments
 (0)