Skip to content

Commit 5915d37

Browse files
Merge pull request #1050 from Davoska/OCPBUGS-15200-filter-out-update-effect-none-errors
OCPBUGS-15200: Filter out shallowly `UpdateEffectNone` errors from a `MultipleErrors` message in the Failing condition
2 parents ce6c0b8 + 7676d46 commit 5915d37

File tree

3 files changed

+525
-19
lines changed

3 files changed

+525
-19
lines changed

pkg/cvo/cvo_scenarios_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3568,7 +3568,7 @@ func TestCVO_ParallelError(t *testing.T) {
35683568
},
35693569
"0000_20_a_file.yaml": nil,
35703570
"0000_20_b_file.yaml": &payload.UpdateError{
3571-
UpdateEffect: payload.UpdateEffectNone,
3571+
UpdateEffect: payload.UpdateEffectFail,
35723572
Message: "Failed to reconcile 20-b-file resource",
35733573
},
35743574
}}
@@ -3753,7 +3753,7 @@ func TestCVO_ParallelError(t *testing.T) {
37533753
{Type: DesiredReleaseAccepted, Status: configv1.ConditionTrue, Reason: "PayloadLoaded",
37543754
Message: "Payload loaded version=\"1.0.0-abc\" image=\"image/image:1\" architecture=\"" + architecture + "\""},
37553755
{Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse},
3756-
{Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Reason: "MultipleErrors", Message: "Multiple errors are preventing progress:\n* Failed to reconcile 10-a-file resource\n* Failed to reconcile 20-b-file resource"},
3756+
{Type: ClusterStatusFailing, Status: configv1.ConditionTrue, Message: "Failed to reconcile 20-b-file resource"},
37573757
{Type: configv1.OperatorProgressing, Status: configv1.ConditionTrue, Reason: "MultipleErrors", Message: "Unable to apply 1.0.0-abc: an unknown error has occurred: MultipleErrors"},
37583758
{Type: configv1.RetrievedUpdates, Status: configv1.ConditionFalse},
37593759
},

pkg/cvo/status.go

Lines changed: 75 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,37 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
293293
})
294294
}
295295

296-
progressReason, progressMessage, skipFailure := convertErrorToProgressing(cvStatus.History, now.Time, status)
296+
failure := status.Failure
297+
failingReason, failingMessage := getReasonMessageFromError(failure)
298+
var skipFailure bool
299+
var progressReason, progressMessage string
300+
if !status.Reconciling && len(cvStatus.History) != 0 {
301+
progressReason, progressMessage, skipFailure = convertErrorToProgressing(now.Time, failure)
302+
failure = filterErrorForFailingCondition(failure, payload.UpdateEffectNone)
303+
filteredFailingReason, filteredFailingMessage := getReasonMessageFromError(failure)
304+
if failingReason != filteredFailingReason {
305+
klog.Infof("Filtered failure reason changed from '%s' to '%s'", failingReason, filteredFailingReason)
306+
}
307+
if failingMessage != filteredFailingMessage {
308+
klog.Infof("Filtered failure message changed from '%s' to '%s'", failingMessage, filteredFailingMessage)
309+
}
310+
failingReason, failingMessage = filteredFailingReason, filteredFailingMessage
311+
}
312+
313+
// set the failing condition
314+
failingCondition := configv1.ClusterOperatorStatusCondition{
315+
Type: ClusterStatusFailing,
316+
Status: configv1.ConditionFalse,
317+
LastTransitionTime: now,
318+
}
319+
if failure != nil && !skipFailure {
320+
failingCondition.Status = configv1.ConditionTrue
321+
failingCondition.Reason = failingReason
322+
failingCondition.Message = failingMessage
323+
}
324+
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, failingCondition)
297325

326+
// update progressing
298327
if err := status.Failure; err != nil && !skipFailure {
299328
var reason string
300329
msg := progressMessage
@@ -307,16 +336,6 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
307336
msg = "an error occurred"
308337
}
309338

310-
// set the failing condition
311-
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{
312-
Type: ClusterStatusFailing,
313-
Status: configv1.ConditionTrue,
314-
Reason: reason,
315-
Message: err.Error(),
316-
LastTransitionTime: now,
317-
})
318-
319-
// update progressing
320339
if status.Reconciling {
321340
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{
322341
Type: configv1.OperatorProgressing,
@@ -336,9 +355,6 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
336355
}
337356

338357
} else {
339-
// clear the failure condition
340-
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{Type: ClusterStatusFailing, Status: configv1.ConditionFalse, LastTransitionTime: now})
341-
342358
// update progressing
343359
if status.Reconciling {
344360
message := fmt.Sprintf("Cluster version is %s", version)
@@ -413,6 +429,48 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
413429
}
414430
}
415431

432+
// getReasonMessageFromError returns the reason and the message from an error.
433+
// If the reason or message is not available, an empty string is returned.
434+
func getReasonMessageFromError(err error) (reason, message string) {
435+
if uErr, ok := err.(*payload.UpdateError); ok {
436+
reason = uErr.Reason
437+
}
438+
if err != nil {
439+
message = err.Error()
440+
}
441+
return reason, message
442+
}
443+
444+
// filterErrorForFailingCondition filters out update errors based on the given
445+
// updateEffect from a MultipleError error. If the err has the reason
446+
// MultipleErrors, its immediate nested errors are filtered out and the error
447+
// is recreated. If all nested errors are filtered out, nil is returned.
448+
func filterErrorForFailingCondition(err error, updateEffect payload.UpdateEffectType) error {
449+
if err == nil {
450+
return nil
451+
}
452+
if uErr, ok := err.(*payload.UpdateError); ok && uErr.Reason == "MultipleErrors" {
453+
if nested, ok := uErr.Nested.(interface{ Errors() []error }); ok {
454+
filtered := nested.Errors()
455+
filtered = filterOutUpdateErrors(filtered, updateEffect)
456+
return newMultipleError(filtered)
457+
}
458+
}
459+
return err
460+
}
461+
462+
// filterOutUpdateErrors filters out update errors of the given effect.
463+
func filterOutUpdateErrors(errs []error, updateEffect payload.UpdateEffectType) []error {
464+
filtered := make([]error, 0, len(errs))
465+
for _, err := range errs {
466+
if uErr, ok := err.(*payload.UpdateError); ok && uErr.UpdateEffect == updateEffect {
467+
continue
468+
}
469+
filtered = append(filtered, err)
470+
}
471+
return filtered
472+
}
473+
416474
func setImplicitlyEnabledCapabilitiesCondition(cvStatus *configv1.ClusterVersionStatus, implicitlyEnabled []configv1.ClusterVersionCapability,
417475
now metav1.Time) {
418476

@@ -479,11 +537,11 @@ func setDesiredReleaseAcceptedCondition(cvStatus *configv1.ClusterVersionStatus,
479537
// how an update error is interpreted. An error may simply need to be reported but does not indicate the update is
480538
// failing. An error may indicate the update is failing or that if the error continues for a defined interval the
481539
// update is failing.
482-
func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time, status *SyncWorkerStatus) (reason string, message string, ok bool) {
483-
if len(history) == 0 || status.Failure == nil || status.Reconciling {
540+
func convertErrorToProgressing(now time.Time, statusFailure error) (reason string, message string, ok bool) {
541+
if statusFailure == nil {
484542
return "", "", false
485543
}
486-
uErr, ok := status.Failure.(*payload.UpdateError)
544+
uErr, ok := statusFailure.(*payload.UpdateError)
487545
if !ok {
488546
return "", "", false
489547
}

0 commit comments

Comments
 (0)