Skip to content

Commit aeab9da

Browse files
committed
chore: Eliminate unnecessary reconciliations after progressive rollout failure (#976)
Signed-off-by: jvogelman <julie_vogelman@intuit.com>
1 parent 5fa965d commit aeab9da

File tree

5 files changed

+165
-138
lines changed

5 files changed

+165
-138
lines changed

internal/controller/isbservicerollout/isbservicerollout_controller.go

Lines changed: 32 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ func (r *ISBServiceRolloutReconciler) reconcile(ctx context.Context, isbServiceR
313313
}
314314

315315
// if we still have interstepbufferservices that need deleting, or if we're in the middle of an upgrade strategy, then requeue
316-
if !allDeleted || inProgressStrategy != apiv1.UpgradeStrategyNoOp {
316+
if !allDeleted || inProgressStrategy == apiv1.UpgradeStrategyPPND {
317317
if requeueDelay == 0 {
318318
requeueDelay = common.DefaultRequeueDelay
319319
} else {
@@ -454,33 +454,39 @@ func (r *ISBServiceRolloutReconciler) processExistingISBService(ctx context.Cont
454454
case apiv1.UpgradeStrategyProgressive:
455455
numaLogger.Debug("processing InterstepBufferService with Progressive")
456456

457-
done, progressiveRequeueDelay, err := progressive.ProcessResource(ctx, isbServiceRollout, existingISBServiceDef, needsUpdate, r, r.client)
457+
assessmentComplete, failed, progressiveRequeueDelay, err := progressive.ProcessResource(ctx, isbServiceRollout, existingISBServiceDef, needsUpdate, r, r.client)
458458
if err != nil {
459459
return 0, fmt.Errorf("error processing isbsvc with progressive: %s", err.Error())
460460
}
461-
if done {
462-
// update the list of riders in the Status based on our child which was just promoted
463-
promotedISBService, err := ctlrcommon.FindMostCurrentChildOfUpgradeState(ctx, isbServiceRollout, common.LabelValueUpgradePromoted, nil, true, r.client)
464-
if err != nil {
465-
return 0, err
466-
}
467-
468-
currentRiderList, err := r.GetDesiredRiders(isbServiceRollout, promotedISBService.GetName(), promotedISBService)
469-
if err != nil {
470-
return 0, fmt.Errorf("error getting desired Riders for pipeline %s: %s", newISBServiceDef.GetName(), err)
471-
}
472-
r.SetCurrentRiderList(ctx, isbServiceRollout, currentRiderList)
473-
474-
r.inProgressStrategyMgr.UnsetStrategy(ctx, isbServiceRollout)
461+
requeueDelay := time.Duration(0)
462+
if progressiveRequeueDelay > 0 {
463+
requeueDelay = progressiveRequeueDelay
464+
}
465+
if assessmentComplete {
466+
if !failed {
467+
// update the list of riders in the Status based on our child which was just promoted
468+
promotedISBService, err := ctlrcommon.FindMostCurrentChildOfUpgradeState(ctx, isbServiceRollout, common.LabelValueUpgradePromoted, nil, true, r.client)
469+
if err != nil {
470+
return 0, err
471+
}
475472

476-
// Update metrics for progressive rollout
477-
if isbServiceRollout.GetUpgradingChildStatus() != nil {
478-
// assessmentResult value indicates that the progressive rollout is completed, so we can generate the metrics for the same
479-
assessmentResult := metrics.EvaluateSuccessStatusForMetrics(isbServiceRollout.GetUpgradingChildStatus().AssessmentResult)
480-
if assessmentResult != "" {
481-
r.customMetrics.IncISBServiceProgressiveCompleted(isbServiceRollout.GetRolloutObjectMeta().GetNamespace(), isbServiceRollout.GetRolloutObjectMeta().GetName(),
482-
isbServiceRollout.GetUpgradingChildStatus().Name, metrics.EvaluateSuccessStatusForMetrics(isbServiceRollout.GetUpgradingChildStatus().BasicAssessmentResult),
483-
assessmentResult, isbServiceRollout.GetUpgradingChildStatus().ForcedSuccess)
473+
currentRiderList, err := r.GetDesiredRiders(isbServiceRollout, promotedISBService.GetName(), promotedISBService)
474+
if err != nil {
475+
return 0, fmt.Errorf("error getting desired Riders for pipeline %s: %s", newISBServiceDef.GetName(), err)
476+
}
477+
r.SetCurrentRiderList(ctx, isbServiceRollout, currentRiderList)
478+
479+
r.inProgressStrategyMgr.UnsetStrategy(ctx, isbServiceRollout)
480+
481+
// Update metrics for progressive rollout
482+
if isbServiceRollout.GetUpgradingChildStatus() != nil {
483+
// assessmentResult value indicates that the progressive rollout is completed, so we can generate the metrics for the same
484+
assessmentResult := metrics.EvaluateSuccessStatusForMetrics(isbServiceRollout.GetUpgradingChildStatus().AssessmentResult)
485+
if assessmentResult != "" {
486+
r.customMetrics.IncISBServiceProgressiveCompleted(isbServiceRollout.GetRolloutObjectMeta().GetNamespace(), isbServiceRollout.GetRolloutObjectMeta().GetName(),
487+
isbServiceRollout.GetUpgradingChildStatus().Name, metrics.EvaluateSuccessStatusForMetrics(isbServiceRollout.GetUpgradingChildStatus().BasicAssessmentResult),
488+
assessmentResult, isbServiceRollout.GetUpgradingChildStatus().ForcedSuccess)
489+
}
484490
}
485491
}
486492
} else {
@@ -498,8 +504,9 @@ func (r *ISBServiceRolloutReconciler) processExistingISBService(ctx context.Cont
498504
}
499505

500506
// requeue using the provided delay
501-
return progressiveRequeueDelay, nil
507+
requeueDelay = progressiveRequeueDelay
502508
}
509+
return requeueDelay, nil
503510
case apiv1.UpgradeStrategyApply:
504511
// update ISBService
505512
err = r.updateISBService(ctx, isbServiceRollout, newISBServiceDef, needsRecreate)

internal/controller/monovertexrollout/monovertexrollout_controller.go

Lines changed: 42 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -266,16 +266,14 @@ func (r *MonoVertexRolloutReconciler) reconcile(ctx context.Context, monoVertexR
266266
}
267267
}
268268

269-
inProgressStrategy := r.inProgressStrategyMgr.GetStrategy(ctx, monoVertexRollout)
270-
271269
// clean up recyclable monovertices
272270
allDeleted, err := ctlrcommon.GarbageCollectChildren(ctx, monoVertexRollout, r, r.client)
273271
if err != nil {
274272
return ctrl.Result{}, err
275273
}
276274

277275
// if we still have monovertices that need deleting, or if we're in the middle of an upgrade strategy, then requeue
278-
if !allDeleted || inProgressStrategy != apiv1.UpgradeStrategyNoOp {
276+
if !allDeleted {
279277
if requeueDelay == 0 {
280278
requeueDelay = common.DefaultRequeueDelay
281279
} else {
@@ -357,53 +355,57 @@ func (r *MonoVertexRolloutReconciler) processExistingMonoVertex(ctx context.Cont
357355
}
358356
}
359357

360-
done, progressiveRequeueDelay, err := progressive.ProcessResource(ctx, monoVertexRollout, existingMonoVertexDef, needsUpdate, r, r.client)
358+
assessmentComplete, failed, progressiveRequeueDelay, err := progressive.ProcessResource(ctx, monoVertexRollout, existingMonoVertexDef, needsUpdate, r, r.client)
361359
if err != nil {
362360
return 0, err
363361
}
364-
if done {
365-
366-
// update the list of riders in the Status based on our child which was just promoted
367-
promotedMonoVertex, err := ctlrcommon.FindMostCurrentChildOfUpgradeState(ctx, monoVertexRollout, common.LabelValueUpgradePromoted, nil, true, r.client)
368-
if err != nil {
369-
return 0, err
370-
}
362+
if progressiveRequeueDelay > 0 {
363+
requeueDelay = progressiveRequeueDelay
364+
}
365+
if assessmentComplete {
366+
if !failed {
367+
// update the list of riders in the Status based on our child which was just promoted
368+
promotedMonoVertex, err := ctlrcommon.FindMostCurrentChildOfUpgradeState(ctx, monoVertexRollout, common.LabelValueUpgradePromoted, nil, true, r.client)
369+
if err != nil {
370+
return 0, err
371+
}
371372

372-
// update promotedPodSelector immediately so VPA targets the newly promoted child's pods
373-
// without waiting for the next reconcile
374-
monoVertexRollout.Status.PromotedPodSelector = buildPromotedPodSelector(promotedMonoVertex.GetName())
373+
// update promotedPodSelector immediately so VPA targets the newly promoted child's pods
374+
// without waiting for the next reconcile
375+
monoVertexRollout.Status.PromotedPodSelector = buildPromotedPodSelector(promotedMonoVertex.GetName())
375376

376-
currentRiderList, err := r.GetDesiredRiders(monoVertexRollout, promotedMonoVertex.GetName(), promotedMonoVertex)
377-
if err != nil {
378-
return 0, fmt.Errorf("error getting desired Riders for MonoVertex %s: %s", newMonoVertexDef.GetName(), err)
379-
}
380-
r.SetCurrentRiderList(ctx, monoVertexRollout, currentRiderList)
377+
currentRiderList, err := r.GetDesiredRiders(monoVertexRollout, promotedMonoVertex.GetName(), promotedMonoVertex)
378+
if err != nil {
379+
return 0, fmt.Errorf("error getting desired Riders for MonoVertex %s: %s", newMonoVertexDef.GetName(), err)
380+
}
381+
r.SetCurrentRiderList(ctx, monoVertexRollout, currentRiderList)
381382

382-
// we need to prevent the possibility that we're done but we fail to update the Progressive Status
383-
// therefore, we publish Rollout.Status here, so if that fails, then we won't be "done" and so we'll come back in here to try again
384-
err = r.updateMonoVertexRolloutStatus(ctx, monoVertexRollout)
385-
if err != nil {
386-
return 0, err
387-
}
383+
// we need to prevent the possibility that we're done but we fail to update the Progressive Status
384+
// therefore, we publish Rollout.Status here, so if that fails, then we won't be "done" and so we'll come back in here to try again
385+
err = r.updateMonoVertexRolloutStatus(ctx, monoVertexRollout)
386+
if err != nil {
387+
return 0, err
388+
}
388389

389-
r.inProgressStrategyMgr.UnsetStrategy(ctx, monoVertexRollout)
390-
monoVertexRollout.Status.ProgressiveStatus.PromotedMonoVertexStatus = nil
391-
392-
// generate metrics for MonoVertex progressive rollout results
393-
if monoVertexRollout.GetUpgradingChildStatus() != nil {
394-
// assessmentResult value indicates that the progressive rollout is completed, so we can generate the metrics for the same
395-
assessmentResult := metrics.EvaluateSuccessStatusForMetrics(monoVertexRollout.GetUpgradingChildStatus().AssessmentResult)
396-
if assessmentResult != "" {
397-
r.customMetrics.IncMonovertexProgressiveCompleted(monoVertexRollout.GetRolloutObjectMeta().GetNamespace(), monoVertexRollout.GetRolloutObjectMeta().GetName(),
398-
monoVertexRollout.GetUpgradingChildStatus().Name, metrics.EvaluateSuccessStatusForMetrics(monoVertexRollout.GetUpgradingChildStatus().BasicAssessmentResult),
399-
assessmentResult, monoVertexRollout.GetUpgradingChildStatus().ForcedSuccess)
390+
r.inProgressStrategyMgr.UnsetStrategy(ctx, monoVertexRollout)
391+
monoVertexRollout.Status.ProgressiveStatus.PromotedMonoVertexStatus = nil
392+
393+
// generate metrics for MonoVertex progressive rollout results
394+
if monoVertexRollout.GetUpgradingChildStatus() != nil {
395+
// assessmentResult value indicates that the progressive rollout is completed, so we can generate the metrics for the same
396+
assessmentResult := metrics.EvaluateSuccessStatusForMetrics(monoVertexRollout.GetUpgradingChildStatus().AssessmentResult)
397+
if assessmentResult != "" {
398+
r.customMetrics.IncMonovertexProgressiveCompleted(monoVertexRollout.GetRolloutObjectMeta().GetNamespace(), monoVertexRollout.GetRolloutObjectMeta().GetName(),
399+
monoVertexRollout.GetUpgradingChildStatus().Name, metrics.EvaluateSuccessStatusForMetrics(monoVertexRollout.GetUpgradingChildStatus().BasicAssessmentResult),
400+
assessmentResult, monoVertexRollout.GetUpgradingChildStatus().ForcedSuccess)
401+
}
402+
}
403+
// we need to requeue one time to ensure that the newly promoted MonoVertex scales back to its rollout-defined scale
404+
if requeueDelay == 0 {
405+
requeueDelay = common.DefaultRequeueDelay
400406
}
401407
}
402-
// we need to requeue one time to ensure that the newly promoted MonoVertex scales back to its rollout-defined scale
403-
requeueDelay = common.DefaultRequeueDelay
404408

405-
} else {
406-
requeueDelay = progressiveRequeueDelay
407409
}
408410
default:
409411
if needsUpdate {

internal/controller/pipelinerollout/pipelinerollout_controller.go

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -446,15 +446,14 @@ func (r *PipelineRolloutReconciler) reconcile(
446446

447447
// get current in progress strategy if there is one
448448
inProgressStrategy := r.inProgressStrategyMgr.GetStrategy(ctx, pipelineRollout)
449-
inProgressStrategySet := inProgressStrategy != apiv1.UpgradeStrategyNoOp
450449

451450
// clean up recyclable pipelines
452451
allDeleted, err := r.garbageCollectChildren(ctx, pipelineRollout)
453452
if err != nil {
454453
return 0, nil, err
455454
}
456455
// there are some cases that require re-queueing
457-
if !allDeleted || inProgressStrategySet {
456+
if !allDeleted || inProgressStrategy == apiv1.UpgradeStrategyPPND {
458457
if requeueDelay == 0 {
459458
requeueDelay = common.DefaultRequeueDelay
460459
} else {
@@ -609,45 +608,52 @@ func (r *PipelineRolloutReconciler) processExistingPipeline(ctx context.Context,
609608
case apiv1.UpgradeStrategyProgressive:
610609
numaLogger.Debug("processing pipeline with Progressive")
611610

612-
done, progressiveRequeueDelay, err := progressive.ProcessResource(ctx, pipelineRollout, existingPipelineDef, needsUpdate, r, r.client)
611+
assessmentComplete, failed, progressiveRequeueDelay, err := progressive.ProcessResource(ctx, pipelineRollout, existingPipelineDef, needsUpdate, r, r.client)
613612
if err != nil {
614613
return 0, err
615614
}
616-
if done {
617-
// update the list of riders in the Status based on our child which was just promoted
618-
promotedPipeline, err := ctlrcommon.FindMostCurrentChildOfUpgradeState(ctx, pipelineRollout, common.LabelValueUpgradePromoted, nil, true, r.client)
619-
if err != nil {
620-
return 0, err
621-
}
622-
currentRiderList, err := r.GetDesiredRiders(pipelineRollout, promotedPipeline.GetName(), promotedPipeline)
623-
if err != nil {
624-
return 0, fmt.Errorf("error getting desired Riders for pipeline %s: %s", newPipelineDef.GetName(), err)
625-
}
626-
r.SetCurrentRiderList(ctx, pipelineRollout, currentRiderList)
615+
if progressiveRequeueDelay > 0 {
616+
requeueDelay = progressiveRequeueDelay
617+
}
618+
if assessmentComplete {
619+
if !failed {
620+
// update the list of riders in the Status based on our child which was just promoted
621+
promotedPipeline, err := ctlrcommon.FindMostCurrentChildOfUpgradeState(ctx, pipelineRollout, common.LabelValueUpgradePromoted, nil, true, r.client)
622+
if err != nil {
623+
return 0, err
624+
}
625+
currentRiderList, err := r.GetDesiredRiders(pipelineRollout, promotedPipeline.GetName(), promotedPipeline)
626+
if err != nil {
627+
return 0, fmt.Errorf("error getting desired Riders for pipeline %s: %s", newPipelineDef.GetName(), err)
628+
}
629+
r.SetCurrentRiderList(ctx, pipelineRollout, currentRiderList)
627630

628-
pipelineRollout.Status.ProgressiveStatus.PromotedPipelineStatus = nil
631+
pipelineRollout.Status.ProgressiveStatus.PromotedPipelineStatus = nil
629632

630-
// we need to prevent the possibility that we're done, but we fail to update the Progressive Status
631-
// therefore, we publish Rollout.Status here, so if that fails, then we won't be "done" and so we'll come back in here to try again
632-
err = r.updatePipelineRolloutStatus(ctx, pipelineRollout)
633-
if err != nil {
634-
return 0, err
635-
}
636-
r.inProgressStrategyMgr.UnsetStrategy(ctx, pipelineRollout)
633+
// we need to prevent the possibility that we're done but we fail to update the Progressive Status
634+
// therefore, we publish Rollout.Status here, so if that fails, then we won't be "done" and so we'll come back in here to try again
635+
err = r.updatePipelineRolloutStatus(ctx, pipelineRollout)
636+
if err != nil {
637+
return 0, err
638+
}
639+
640+
r.inProgressStrategyMgr.UnsetStrategy(ctx, pipelineRollout)
637641

638-
if pipelineRollout.GetUpgradingChildStatus() != nil {
639-
// assessmentResult value indicates that the progressive rollout is completed, so we can generate the metrics for the same
640-
assessmentResult := metrics.EvaluateSuccessStatusForMetrics(pipelineRollout.GetUpgradingChildStatus().AssessmentResult)
641-
if assessmentResult != "" {
642-
r.customMetrics.IncPipelineProgressiveCompleted(pipelineRollout.GetRolloutObjectMeta().GetNamespace(), pipelineRollout.GetRolloutObjectMeta().GetName(),
643-
pipelineRollout.GetUpgradingChildStatus().Name, metrics.EvaluateSuccessStatusForMetrics(pipelineRollout.GetUpgradingChildStatus().BasicAssessmentResult),
644-
assessmentResult, pipelineRollout.GetUpgradingChildStatus().ForcedSuccess)
642+
if pipelineRollout.GetUpgradingChildStatus() != nil {
643+
// assessmentResult value indicates that the progressive rollout is completed, so we can generate the metrics for the same
644+
assessmentResult := metrics.EvaluateSuccessStatusForMetrics(pipelineRollout.GetUpgradingChildStatus().AssessmentResult)
645+
if assessmentResult != "" {
646+
r.customMetrics.IncPipelineProgressiveCompleted(pipelineRollout.GetRolloutObjectMeta().GetNamespace(), pipelineRollout.GetRolloutObjectMeta().GetName(),
647+
pipelineRollout.GetUpgradingChildStatus().Name, metrics.EvaluateSuccessStatusForMetrics(pipelineRollout.GetUpgradingChildStatus().BasicAssessmentResult),
648+
assessmentResult, pipelineRollout.GetUpgradingChildStatus().ForcedSuccess)
649+
}
650+
}
651+
// we need to requeue one time to ensure that the newly promoted Pipeline scales back to its rollout-defined scale
652+
if requeueDelay == 0 {
653+
requeueDelay = common.DefaultRequeueDelay
645654
}
646655
}
647-
// we need to requeue one time to ensure that the newly promoted Pipeline scales back to its rollout-defined scale
648-
requeueDelay = common.DefaultRequeueDelay
649-
} else {
650-
requeueDelay = progressiveRequeueDelay
656+
651657
}
652658
default:
653659
if needsUpdate && upgradeStrategyType == apiv1.UpgradeStrategyApply {

0 commit comments

Comments
 (0)