Skip to content

Commit 9cf6174

Browse files
author
Per Goncalves da Silva
committed
Surface revision retrying condition to ClusterExtension
Signed-off-by: Per Goncalves da Silva <[email protected]>
1 parent 200f918 commit 9cf6174

File tree

3 files changed

+137
-149
lines changed

3 files changed

+137
-149
lines changed

internal/operator-controller/applier/boxcutter.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -398,6 +398,9 @@ func (bc *Boxcutter) apply(ctx context.Context, contentFS fs.FS, ext *ocv1.Clust
398398
if progressingCondition == nil && availableCondition == nil && succeededCondition == nil {
399399
return false, "New revision created", nil
400400
} else if progressingCondition != nil && progressingCondition.Status == metav1.ConditionTrue {
401+
if progressingCondition.Reason == ocv1.ClusterExtensionRevisionReasonRetrying {
402+
return false, "", errors.New(progressingCondition.Message)
403+
}
401404
return false, progressingCondition.Message, nil
402405
} else if availableCondition != nil && availableCondition.Status != metav1.ConditionTrue {
403406
return false, "", errors.New(availableCondition.Message)

internal/operator-controller/controllers/clusterextensionrevision_controller.go

Lines changed: 44 additions & 95 deletions
Original file line numberDiff line numberDiff line change
@@ -118,13 +118,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
118118

119119
revision, opts, err := c.toBoxcutterRevision(ctx, rev)
120120
if err != nil {
121-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
122-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
123-
Status: metav1.ConditionFalse,
124-
Reason: ocv1.ClusterExtensionRevisionReasonReconciling,
125-
Message: err.Error(),
126-
ObservedGeneration: rev.Generation,
127-
})
121+
setRetryingConditions(rev, err.Error())
128122
return ctrl.Result{}, fmt.Errorf("converting to boxcutter revision: %v", err)
129123
}
130124

@@ -137,16 +131,15 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
137131
// Reconcile
138132
//
139133
if err := c.ensureFinalizer(ctx, rev, clusterExtensionRevisionTeardownFinalizer); err != nil {
134+
l.Error(err, "failed to ensure finalizer")
140135
return ctrl.Result{}, fmt.Errorf("error ensuring teardown finalizer: %v", err)
141136
}
142137

143138
if err := c.establishWatch(ctx, rev, revision); err != nil {
144139
werr := fmt.Errorf("establish watch: %v", err)
145-
// this error is very likely transient, so we should keep revision as progressing
146-
markAsProgressing(rev, ocv1.ClusterExtensionRevisionReasonRetrying, werr.Error())
140+
setRetryingConditions(rev, werr.Error())
147141
return ctrl.Result{}, werr
148142
}
149-
markAsProgressing(rev, ocv1.ClusterExtensionRevisionReasonRollingOut, fmt.Sprintf("Revision %s is being rolled out.", revVersion))
150143

151144
rres, err := c.RevisionEngine.Reconcile(ctx, *revision, opts...)
152145
if err != nil {
@@ -155,18 +148,7 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
155148
} else {
156149
l.Error(err, "revision reconcile failed")
157150
}
158-
markAsProgressing(rev, ocv1.ClusterExtensionRevisionReasonRetrying, err.Error())
159-
if meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) != nil {
160-
// it is a probably transient error, and we do not know if the revision is available or not
161-
// perhaps we should not report it at all, hoping that it is going to be mitigated in the next reconcile?
162-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
163-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
164-
Status: metav1.ConditionUnknown,
165-
Reason: ocv1.ClusterExtensionRevisionReasonReconciling,
166-
Message: err.Error(),
167-
ObservedGeneration: rev.Generation,
168-
})
169-
}
151+
setRetryingConditions(rev, err.Error())
170152
return ctrl.Result{}, fmt.Errorf("revision reconcile: %v", err)
171153
}
172154
// Log detailed reconcile reports only in debug mode (V(1)) to reduce verbosity.
@@ -176,37 +158,14 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
176158
// TODO: report status, backoff?
177159
if verr := rres.GetValidationError(); verr != nil {
178160
l.Error(fmt.Errorf("%w", verr), "preflight validation failed, retrying after 10s")
179-
markAsProgressing(rev, ocv1.ClusterExtensionRevisionReasonRetrying, fmt.Sprintf("revision validation error: %s", verr))
180-
if meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) != nil {
181-
// it is a probably transient error, and we do not know if the revision is available or not
182-
// perhaps we should not report it at all, hoping that it is going to be mitigated in the next reconcile?
183-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
184-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
185-
Status: metav1.ConditionUnknown,
186-
Reason: ocv1.ClusterExtensionRevisionReasonReconciling,
187-
Message: fmt.Sprintf("revision validation error: %s", verr),
188-
ObservedGeneration: rev.Generation,
189-
})
190-
}
161+
setRetryingConditions(rev, fmt.Sprintf("revision validation error: %s", verr))
191162
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
192163
}
193164

194165
for i, pres := range rres.GetPhases() {
195166
if verr := pres.GetValidationError(); verr != nil {
196167
l.Error(fmt.Errorf("%w", verr), "phase preflight validation failed, retrying after 10s", "phase", i)
197-
198-
markAsProgressing(rev, ocv1.ClusterExtensionRevisionReasonRetrying, fmt.Sprintf("phase %d validation error: %s", i, verr))
199-
if meta.FindStatusCondition(rev.Status.Conditions, ocv1.ClusterExtensionRevisionTypeAvailable) != nil {
200-
// it is a probably transient error, and we do not know if the revision is available or not
201-
// perhaps we should not report it at all, hoping that it is going to be mitigated in the next reconcile?
202-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
203-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
204-
Status: metav1.ConditionUnknown,
205-
Reason: ocv1.ClusterExtensionRevisionReasonReconciling,
206-
Message: fmt.Sprintf("phase %d validation error: %s", i, verr),
207-
ObservedGeneration: rev.Generation,
208-
})
209-
}
168+
setRetryingConditions(rev, fmt.Sprintf("phase %d validation error: %s", i, verr))
210169
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
211170
}
212171

@@ -219,18 +178,15 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
219178

220179
if len(collidingObjs) > 0 {
221180
l.Error(fmt.Errorf("object collision detected"), "object collision, retrying after 10s", "phase", i, "collisions", collidingObjs)
222-
// collisions are probably stickier than phase roll out probe failures - so we'd probably want to set
223-
// Progressing to false here due to the collision
224-
markAsNotProgressing(rev, ocv1.ClusterExtensionRevisionReasonRetrying, fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")))
225-
226-
// NOTE(pedjak): not sure if we want to retry here - collisions are probably not transient?
181+
setRetryingConditions(rev, fmt.Sprintf("revision object collisions in phase %d\n%s", i, strings.Join(collidingObjs, "\n\n")))
227182
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
228183
}
229184
}
230185

231186
if !rres.InTransistion() {
232-
// we have rolled out all objects in all phases, not interested in probes here
233-
markAsNotProgressing(rev, ocv1.ClusterExtensionRevisionReasonRolledOut, fmt.Sprintf("Revision %s is rolled out.", revVersion))
187+
markAsNotProgressing(rev, ocv1.ClusterExtensionRevisionReasonRolledOut, fmt.Sprintf("Revision %s has rolled out.", revVersion))
188+
} else {
189+
markAsProgressing(rev, ocv1.ClusterExtensionRevisionReasonRollingOut, fmt.Sprintf("Revision %s is rolling out.", revVersion))
234190
}
235191

236192
//nolint:nestif
@@ -246,12 +202,12 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
246202
// TODO: It feels like an error here needs to propagate to a status _somewhere_.
247203
// Not sure the current CER makes sense? But it also feels off to set the CE
248204
// status from outside the CE reconciler.
205+
l.Error(err, "failed to patch previous revision", "revision", a.GetName())
206+
setRetryingConditions(rev, fmt.Sprintf("error archiving previous revision: %s", err))
249207
return ctrl.Result{}, fmt.Errorf("archive previous Revision: %w", err)
250208
}
251209
}
252210

253-
// It would be good to understand from Nico how we can distinguish between progression and availability probes
254-
// and how to best check that all Availability probes are passing
255211
markAsAvailable(rev, ocv1.ClusterExtensionRevisionReasonProbesSucceeded, "Objects are available and pass all probes.")
256212

257213
// We'll probably only want to remove this once we are done updating the ClusterExtension conditions
@@ -290,10 +246,11 @@ func (c *ClusterExtensionRevisionReconciler) reconcile(ctx context.Context, rev
290246
break
291247
}
292248
}
249+
293250
if len(probeFailureMsgs) > 0 {
294251
markAsUnavailable(rev, ocv1.ClusterExtensionRevisionReasonProbeFailure, strings.Join(probeFailureMsgs, "\n"))
295252
} else {
296-
markAsUnavailable(rev, ocv1.ClusterExtensionRevisionReasonReconciling, fmt.Sprintf("Revision %s has not been rolled out completely.", revVersion))
253+
markAsUnavailable(rev, ocv1.ClusterExtensionRevisionReasonReconciling, fmt.Sprintf("Revision %s is rolling out.", revVersion))
297254
}
298255
}
299256

@@ -310,13 +267,7 @@ func (c *ClusterExtensionRevisionReconciler) teardown(ctx context.Context, rev *
310267
} else {
311268
l.Error(err, "revision teardown failed")
312269
}
313-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
314-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
315-
Status: metav1.ConditionFalse,
316-
Reason: ocv1.ClusterExtensionRevisionReasonReconciling,
317-
Message: err.Error(),
318-
ObservedGeneration: rev.Generation,
319-
})
270+
setRetryingConditions(rev, err.Error())
320271
return ctrl.Result{}, fmt.Errorf("revision teardown: %v", err)
321272
}
322273

@@ -330,36 +281,13 @@ func (c *ClusterExtensionRevisionReconciler) teardown(ctx context.Context, rev *
330281
}
331282

332283
if err := c.TrackingCache.Free(ctx, rev); err != nil {
333-
meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
334-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
335-
Status: metav1.ConditionFalse,
336-
Reason: ocv1.ClusterExtensionRevisionReasonReconciling,
337-
Message: err.Error(),
338-
ObservedGeneration: rev.Generation,
339-
})
284+
setRetryingConditions(rev, err.Error())
340285
return ctrl.Result{}, fmt.Errorf("error stopping informers: %v", err)
341286
}
342287

343288
// Ensure conditions are set before removing the finalizer when archiving
344-
if rev.Spec.LifecycleState == ocv1.ClusterExtensionRevisionLifecycleStateArchived {
345-
condUpdated := meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
346-
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
347-
Status: metav1.ConditionUnknown,
348-
Reason: ocv1.ClusterExtensionRevisionReasonArchived,
349-
Message: "revision is archived",
350-
ObservedGeneration: rev.Generation,
351-
})
352-
condUpdated = meta.SetStatusCondition(&rev.Status.Conditions, metav1.Condition{
353-
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
354-
Status: metav1.ConditionFalse,
355-
Reason: ocv1.ClusterExtensionRevisionReasonArchived,
356-
Message: "revision is archived",
357-
ObservedGeneration: rev.Generation,
358-
}) || condUpdated
359-
360-
if condUpdated {
361-
return ctrl.Result{}, nil
362-
}
289+
if rev.Spec.LifecycleState == ocv1.ClusterExtensionRevisionLifecycleStateArchived && setArchivedConditions(rev) {
290+
return ctrl.Result{}, nil
363291
}
364292

365293
if err := c.removeFinalizer(ctx, rev, clusterExtensionRevisionTeardownFinalizer); err != nil {
@@ -458,7 +386,7 @@ func (c *ClusterExtensionRevisionReconciler) listPreviousRevisions(ctx context.C
458386
}
459387

460388
revList := &ocv1.ClusterExtensionRevisionList{}
461-
if err := c.TrackingCache.List(ctx, revList, client.MatchingLabels{
389+
if err := c.Client.List(ctx, revList, client.MatchingLabels{
462390
ClusterExtensionRevisionOwnerLabel: ownerLabel,
463391
}); err != nil {
464392
return nil, fmt.Errorf("listing revisions: %w", err)
@@ -599,6 +527,11 @@ var (
599527
}
600528
)
601529

530+
func setRetryingConditions(cer *ocv1.ClusterExtensionRevision, message string) {
531+
markAsProgressing(cer, ocv1.ClusterExtensionRevisionReasonRetrying, message)
532+
markAsAvailableUnknown(cer, ocv1.ClusterExtensionRevisionReasonReconciling, message)
533+
}
534+
602535
func markAsProgressing(cer *ocv1.ClusterExtensionRevision, reason, message string) {
603536
meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{
604537
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
@@ -609,8 +542,8 @@ func markAsProgressing(cer *ocv1.ClusterExtensionRevision, reason, message strin
609542
})
610543
}
611544

612-
func markAsNotProgressing(cer *ocv1.ClusterExtensionRevision, reason, message string) {
613-
meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{
545+
func markAsNotProgressing(cer *ocv1.ClusterExtensionRevision, reason, message string) bool {
546+
return meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{
614547
Type: ocv1.ClusterExtensionRevisionTypeProgressing,
615548
Status: metav1.ConditionFalse,
616549
Reason: reason,
@@ -619,8 +552,8 @@ func markAsNotProgressing(cer *ocv1.ClusterExtensionRevision, reason, message st
619552
})
620553
}
621554

622-
func markAsAvailable(cer *ocv1.ClusterExtensionRevision, reason, message string) {
623-
meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{
555+
func markAsAvailable(cer *ocv1.ClusterExtensionRevision, reason, message string) bool {
556+
return meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{
624557
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
625558
Status: metav1.ConditionTrue,
626559
Reason: reason,
@@ -638,3 +571,19 @@ func markAsUnavailable(cer *ocv1.ClusterExtensionRevision, reason, message strin
638571
ObservedGeneration: cer.Generation,
639572
})
640573
}
574+
575+
func markAsAvailableUnknown(cer *ocv1.ClusterExtensionRevision, reason, message string) bool {
576+
return meta.SetStatusCondition(&cer.Status.Conditions, metav1.Condition{
577+
Type: ocv1.ClusterExtensionRevisionTypeAvailable,
578+
Status: metav1.ConditionUnknown,
579+
Reason: reason,
580+
Message: message,
581+
ObservedGeneration: cer.Generation,
582+
})
583+
}
584+
585+
func setArchivedConditions(cer *ocv1.ClusterExtensionRevision) bool {
586+
const msg = "revision is archived"
587+
updated := markAsNotProgressing(cer, ocv1.ClusterExtensionRevisionReasonArchived, msg)
588+
return markAsAvailableUnknown(cer, ocv1.ClusterExtensionRevisionReasonArchived, msg) || updated
589+
}

0 commit comments

Comments
 (0)