Skip to content

Commit ac72abe

Browse files
committed
pkg/cvo/status: Filter out immediate UpdateEffectNone errors nested in MultipleErrors in Failing condition
Various errors get propagated to users, such as the summarized task graph error. For example, in the form of the message in the Failing condition. However, update errors set with the update effect of UpdateEffectNone can confuse users, as these primarily informing messages get displayed together with valid update errors that heavily impact the update. This can result in a message such as: { "lastTransitionTime": "2023-06-20T13:40:12Z", "message": "Multiple errors are preventing progress:\n* Cluster operator authentication is updating versions\n* Could not update customresourcedefinition \"alertingrules.monitoring.openshift.io\" (512 of 993): the object is invalid, possibly due to local cluster configuration", "reason": "MultipleErrors", "status": "True", "type": "Failing" } The Failing condition is not true because of the UpdateEffectNone error ("Cluster operator authentication is updating versions"), but its message still gets displayed. This commit makes sure that update errors that do not heavily affect the update will be removed from the MultipleErrors error in the Failing condition message to an extent. The filtered out errors from the message will still be displayed in the logs and in other places, such as the ReconciliationIssues condition. The original code handles correctly situations where the status failure is an UpdateEffectNone error. The new changes leave such errors be. In case the MultipleErrors error contains only UpdateEffectNone errors, the error is unchanged to keep the original logic unchanged and keep the commit simple. The goal of this commit is to remove unimportant messages from MultipleErrors errors that contain valid messages in the Failing condition. The current code contains an override to set the Failing condition when history is empty or the CVO is reconciling. This commit will keep this logic functional. This means the filtering is only applied when history is not empty and the CVO is not reconciling the payload.
1 parent dc830db commit ac72abe

File tree

2 files changed

+267
-17
lines changed

2 files changed

+267
-17
lines changed

pkg/cvo/status.go

Lines changed: 75 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -293,8 +293,37 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
293293
})
294294
}
295295

296-
progressReason, progressMessage, skipFailure := convertErrorToProgressing(cvStatus.History, now.Time, status)
296+
failure := status.Failure
297+
failingReason, failingMessage := getReasonMessageFromError(failure)
298+
var skipFailure bool
299+
var progressReason, progressMessage string
300+
if !status.Reconciling && len(cvStatus.History) != 0 {
301+
progressReason, progressMessage, skipFailure = convertErrorToProgressing(now.Time, failure)
302+
failure = filterErrorForFailingCondition(failure, payload.UpdateEffectNone)
303+
filteredFailingReason, filteredFailingMessage := getReasonMessageFromError(failure)
304+
if failingReason != filteredFailingReason {
305+
klog.Infof("Filtered failure reason changed from '%s' to '%s'", failingReason, filteredFailingReason)
306+
}
307+
if failingMessage != filteredFailingMessage {
308+
klog.Infof("Filtered failure message changed from '%s' to '%s'", failingMessage, filteredFailingMessage)
309+
}
310+
failingReason, failingMessage = filteredFailingReason, filteredFailingMessage
311+
}
312+
313+
// set the failing condition
314+
failingCondition := configv1.ClusterOperatorStatusCondition{
315+
Type: ClusterStatusFailing,
316+
Status: configv1.ConditionFalse,
317+
LastTransitionTime: now,
318+
}
319+
if failure != nil && !skipFailure {
320+
failingCondition.Status = configv1.ConditionTrue
321+
failingCondition.Reason = failingReason
322+
failingCondition.Message = failingMessage
323+
}
324+
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, failingCondition)
297325

326+
// update progressing
298327
if err := status.Failure; err != nil && !skipFailure {
299328
var reason string
300329
msg := progressMessage
@@ -307,16 +336,6 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
307336
msg = "an error occurred"
308337
}
309338

310-
// set the failing condition
311-
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{
312-
Type: ClusterStatusFailing,
313-
Status: configv1.ConditionTrue,
314-
Reason: reason,
315-
Message: err.Error(),
316-
LastTransitionTime: now,
317-
})
318-
319-
// update progressing
320339
if status.Reconciling {
321340
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{
322341
Type: configv1.OperatorProgressing,
@@ -336,9 +355,6 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
336355
}
337356

338357
} else {
339-
// clear the failure condition
340-
resourcemerge.SetOperatorStatusCondition(&cvStatus.Conditions, configv1.ClusterOperatorStatusCondition{Type: ClusterStatusFailing, Status: configv1.ConditionFalse, LastTransitionTime: now})
341-
342358
// update progressing
343359
if status.Reconciling {
344360
message := fmt.Sprintf("Cluster version is %s", version)
@@ -413,6 +429,48 @@ func updateClusterVersionStatus(cvStatus *configv1.ClusterVersionStatus, status
413429
}
414430
}
415431

432+
// getReasonMessageFromError returns the reason and the message from an error.
433+
// If the reason or message is not available, an empty string is returned.
434+
func getReasonMessageFromError(err error) (reason, message string) {
435+
if uErr, ok := err.(*payload.UpdateError); ok {
436+
reason = uErr.Reason
437+
}
438+
if err != nil {
439+
message = err.Error()
440+
}
441+
return reason, message
442+
}
443+
444+
// filterErrorForFailingCondition filters out update errors based on the given
445+
// updateEffect from a MultipleError error. If the err has the reason
446+
// MultipleErrors, its immediate nested errors are filtered out and the error
447+
// is recreated. If all nested errors are filtered out, nil is returned.
448+
func filterErrorForFailingCondition(err error, updateEffect payload.UpdateEffectType) error {
449+
if err == nil {
450+
return nil
451+
}
452+
if uErr, ok := err.(*payload.UpdateError); ok && uErr.Reason == "MultipleErrors" {
453+
if nested, ok := uErr.Nested.(interface{ Errors() []error }); ok {
454+
filtered := nested.Errors()
455+
filtered = filterOutUpdateErrors(filtered, updateEffect)
456+
return newMultipleError(filtered)
457+
}
458+
}
459+
return err
460+
}
461+
462+
// filterOutUpdateErrors filters out update errors of the given effect.
463+
func filterOutUpdateErrors(errs []error, updateEffect payload.UpdateEffectType) []error {
464+
filtered := make([]error, 0, len(errs))
465+
for _, err := range errs {
466+
if uErr, ok := err.(*payload.UpdateError); ok && uErr.UpdateEffect == updateEffect {
467+
continue
468+
}
469+
filtered = append(filtered, err)
470+
}
471+
return filtered
472+
}
473+
416474
func setImplicitlyEnabledCapabilitiesCondition(cvStatus *configv1.ClusterVersionStatus, implicitlyEnabled []configv1.ClusterVersionCapability,
417475
now metav1.Time) {
418476

@@ -479,11 +537,11 @@ func setDesiredReleaseAcceptedCondition(cvStatus *configv1.ClusterVersionStatus,
479537
// how an update error is interpreted. An error may simply need to be reported but does not indicate the update is
480538
// failing. An error may indicate the update is failing or that if the error continues for a defined interval the
481539
// update is failing.
482-
func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time, status *SyncWorkerStatus) (reason string, message string, ok bool) {
483-
if len(history) == 0 || status.Failure == nil || status.Reconciling {
540+
func convertErrorToProgressing(now time.Time, statusFailure error) (reason string, message string, ok bool) {
541+
if statusFailure == nil {
484542
return "", "", false
485543
}
486-
uErr, ok := status.Failure.(*payload.UpdateError)
544+
uErr, ok := statusFailure.(*payload.UpdateError)
487545
if !ok {
488546
return "", "", false
489547
}

pkg/cvo/status_test.go

Lines changed: 192 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,13 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t
497497
Reason: "MultipleErrors",
498498
Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is updating versions",
499499
},
500+
shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true,
501+
expectedConditionModified: &configv1.ClusterOperatorStatusCondition{
502+
Type: ClusterStatusFailing,
503+
Status: configv1.ConditionTrue,
504+
Reason: "ClusterOperatorNotAvailable",
505+
Message: "Cluster operator A is not available",
506+
},
500507
},
501508
{
502509
name: "MultipleErrors of UpdateEffectNone and UpdateEffectNone",
@@ -527,6 +534,11 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t
527534
Reason: "MultipleErrors",
528535
Message: "Multiple errors are preventing progress:\n* Cluster operator A is updating versions\n* Cluster operator B is getting conscious",
529536
},
537+
shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true,
538+
expectedConditionModified: &configv1.ClusterOperatorStatusCondition{
539+
Type: ClusterStatusFailing,
540+
Status: configv1.ConditionFalse,
541+
},
530542
},
531543
{
532544
name: "MultipleErrors of UpdateEffectFail, UpdateEffectFailAfterInterval, and UpdateEffectNone",
@@ -562,6 +574,13 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t
562574
Reason: "MultipleErrors",
563575
Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator B is updating versions\n* Cluster operator C is degraded",
564576
},
577+
shouldModifyWhenNotReconcilingAndHistoryNotEmpty: true,
578+
expectedConditionModified: &configv1.ClusterOperatorStatusCondition{
579+
Type: ClusterStatusFailing,
580+
Status: configv1.ConditionTrue,
581+
Reason: "MultipleErrors",
582+
Message: "Multiple errors are preventing progress:\n* Cluster operator A is not available\n* Cluster operator C is degraded",
583+
},
565584
},
566585
}
567586
for _, tc := range tests {
@@ -601,3 +620,176 @@ func TestUpdateClusterVersionStatus_FilteringMultipleErrorsForFailingCondition(t
601620
})
602621
}
603622
}
623+
624+
func Test_filterOutUpdateErrors(t *testing.T) {
625+
type args struct {
626+
errs []error
627+
updateEffectType payload.UpdateEffectType
628+
}
629+
tests := []struct {
630+
name string
631+
args args
632+
want []error
633+
}{
634+
{
635+
name: "empty errors",
636+
args: args{
637+
errs: []error{},
638+
updateEffectType: payload.UpdateEffectNone,
639+
},
640+
want: []error{},
641+
},
642+
{
643+
name: "single update error of the specified value",
644+
args: args{
645+
errs: []error{
646+
&payload.UpdateError{
647+
Name: "None",
648+
UpdateEffect: payload.UpdateEffectNone,
649+
},
650+
},
651+
updateEffectType: payload.UpdateEffectNone,
652+
},
653+
want: []error{},
654+
},
655+
{
656+
name: "errors do not contain update errors of the specified value",
657+
args: args{
658+
errs: []error{
659+
&payload.UpdateError{
660+
Name: "Fail",
661+
UpdateEffect: payload.UpdateEffectFail,
662+
},
663+
&payload.UpdateError{
664+
Name: "Report",
665+
UpdateEffect: payload.UpdateEffectReport,
666+
},
667+
&payload.UpdateError{
668+
Name: "Fail After Interval",
669+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
670+
},
671+
},
672+
updateEffectType: payload.UpdateEffectNone,
673+
},
674+
want: []error{
675+
&payload.UpdateError{
676+
Name: "Fail",
677+
UpdateEffect: payload.UpdateEffectFail,
678+
},
679+
&payload.UpdateError{
680+
Name: "Report",
681+
UpdateEffect: payload.UpdateEffectReport,
682+
},
683+
&payload.UpdateError{
684+
Name: "Fail After Interval",
685+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
686+
},
687+
},
688+
},
689+
{
690+
name: "errors contain update errors of the specified value UpdateEffectNone",
691+
args: args{
692+
errs: []error{
693+
&payload.UpdateError{
694+
Name: "Fail After Interval",
695+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
696+
},
697+
&payload.UpdateError{
698+
Name: "None #1",
699+
UpdateEffect: payload.UpdateEffectNone,
700+
},
701+
&payload.UpdateError{
702+
Name: "Report",
703+
UpdateEffect: payload.UpdateEffectReport,
704+
},
705+
&payload.UpdateError{
706+
Name: "None #2",
707+
UpdateEffect: payload.UpdateEffectNone,
708+
},
709+
},
710+
updateEffectType: payload.UpdateEffectNone,
711+
},
712+
want: []error{
713+
&payload.UpdateError{
714+
Name: "Fail After Interval",
715+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
716+
},
717+
&payload.UpdateError{
718+
Name: "Report",
719+
UpdateEffect: payload.UpdateEffectReport,
720+
},
721+
},
722+
},
723+
{
724+
name: "errors contain update errors of the specified value UpdateEffectReport",
725+
args: args{
726+
errs: []error{
727+
&payload.UpdateError{
728+
Name: "Fail After Interval",
729+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
730+
},
731+
&payload.UpdateError{
732+
Name: "None #1",
733+
UpdateEffect: payload.UpdateEffectNone,
734+
},
735+
&payload.UpdateError{
736+
Name: "Report",
737+
UpdateEffect: payload.UpdateEffectReport,
738+
},
739+
&payload.UpdateError{
740+
Name: "None #2",
741+
UpdateEffect: payload.UpdateEffectNone,
742+
},
743+
},
744+
updateEffectType: payload.UpdateEffectReport,
745+
},
746+
want: []error{
747+
&payload.UpdateError{
748+
Name: "Fail After Interval",
749+
UpdateEffect: payload.UpdateEffectFailAfterInterval,
750+
},
751+
&payload.UpdateError{
752+
Name: "None #1",
753+
UpdateEffect: payload.UpdateEffectNone,
754+
},
755+
&payload.UpdateError{
756+
Name: "None #2",
757+
UpdateEffect: payload.UpdateEffectNone,
758+
},
759+
},
760+
},
761+
{
762+
name: "errors contain only update errors of the specified value UpdateEffectNone",
763+
args: args{
764+
errs: []error{
765+
&payload.UpdateError{
766+
Name: "None #1",
767+
UpdateEffect: payload.UpdateEffectNone,
768+
},
769+
&payload.UpdateError{
770+
Name: "None #2",
771+
UpdateEffect: payload.UpdateEffectNone,
772+
},
773+
&payload.UpdateError{
774+
Name: "None #3",
775+
UpdateEffect: payload.UpdateEffectNone,
776+
},
777+
&payload.UpdateError{
778+
Name: "None #4",
779+
UpdateEffect: payload.UpdateEffectNone,
780+
},
781+
},
782+
updateEffectType: payload.UpdateEffectNone,
783+
},
784+
want: []error{},
785+
},
786+
}
787+
for _, tt := range tests {
788+
t.Run(tt.name, func(t *testing.T) {
789+
filtered := filterOutUpdateErrors(tt.args.errs, tt.args.updateEffectType)
790+
if difference := cmp.Diff(filtered, tt.want); difference != "" {
791+
t.Errorf("got errors differ from expected:\n%s", difference)
792+
}
793+
})
794+
}
795+
}

0 commit comments

Comments
 (0)