diff --git a/pkg/cvo/cvo_scenarios_test.go b/pkg/cvo/cvo_scenarios_test.go index 4ff4f03da..a8286af94 100644 --- a/pkg/cvo/cvo_scenarios_test.go +++ b/pkg/cvo/cvo_scenarios_test.go @@ -2563,13 +2563,15 @@ func TestCVO_ParallelError(t *testing.T) { worker := o.configSync.(*SyncWorker) b := &errorResourceBuilder{errors: map[string]error{ "0000_10_a_file.yaml": &payload.UpdateError{ - Reason: "ClusterOperatorNotAvailable", - Name: "operator-1", + Reason: "ClusterOperatorNotAvailable", + UpdateEffect: payload.UpdateEffectNone, + Name: "operator-1", }, "0000_20_a_file.yaml": nil, "0000_20_b_file.yaml": &payload.UpdateError{ - Reason: "ClusterOperatorNotAvailable", - Name: "operator-2", + Reason: "ClusterOperatorNotAvailable", + UpdateEffect: payload.UpdateEffectNone, + Name: "operator-2", }, }} worker.builder = b diff --git a/pkg/cvo/internal/operatorstatus.go b/pkg/cvo/internal/operatorstatus.go index 83731cfd0..004c76c20 100644 --- a/pkg/cvo/internal/operatorstatus.go +++ b/pkg/cvo/internal/operatorstatus.go @@ -96,6 +96,10 @@ func (b *clusterOperatorBuilder) WithModifier(f resourcebuilder.MetaV1ObjectModi func (b *clusterOperatorBuilder) Do(ctx context.Context) error { os := readClusterOperatorV1OrDie(b.raw) + + // add cluster operator's start time if not already there + payload.COUpdateStartTimesEnsureName(os.Name) + if b.modifier != nil { b.modifier(os) } @@ -128,10 +132,11 @@ func waitForOperatorStatusToBeDone(ctx context.Context, interval time.Duration, actual, err := client.Get(ctx, expected.Name) if err != nil { lastErr = &payload.UpdateError{ - Nested: err, - Reason: "ClusterOperatorNotAvailable", - Message: fmt.Sprintf("Cluster operator %s has not yet reported success", expected.Name), - Name: expected.Name, + Nested: err, + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: fmt.Sprintf("Cluster operator %s has not yet reported success", expected.Name), + Name: expected.Name, } return false, nil } @@ -160,10 +165,11 @@ func waitForOperatorStatusToBeDone(ctx context.Context, interval time.Duration, message := fmt.Sprintf("Cluster operator %s is still updating", actual.Name) lastErr = &payload.UpdateError{ - Nested: errors.New(lowerFirst(message)), - Reason: "ClusterOperatorNotAvailable", - Message: message, - Name: actual.Name, + Nested: errors.New(lowerFirst(message)), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: message, + Name: actual.Name, } return false, nil } @@ -210,31 +216,44 @@ func waitForOperatorStatusToBeDone(ctx context.Context, interval time.Duration, } } + nestedMessage := fmt.Errorf("cluster operator %s conditions: available=%v, progressing=%v, degraded=%v", + actual.Name, available, progressing, degraded) + + if !available { + lastErr = &payload.UpdateError{ + Nested: nestedMessage, + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: fmt.Sprintf("Cluster operator %s is not available", actual.Name), + Name: actual.Name, + } + return false, nil + } + condition := failingCondition if degradedCondition != nil { condition = degradedCondition } if condition != nil && condition.Status == configv1.ConditionTrue { - message := fmt.Sprintf("Cluster operator %s is reporting a failure", actual.Name) if len(condition.Message) > 0 { - message = fmt.Sprintf("Cluster operator %s is reporting a failure: %s", actual.Name, condition.Message) + nestedMessage = fmt.Errorf("cluster operator %s is reporting a message: %s", actual.Name, condition.Message) } lastErr = &payload.UpdateError{ - Nested: errors.New(lowerFirst(message)), - Reason: "ClusterOperatorDegraded", - Message: message, - Name: actual.Name, + Nested: nestedMessage, + UpdateEffect: payload.UpdateEffectFailAfterInterval, + Reason: "ClusterOperatorDegraded", + Message: fmt.Sprintf("Cluster operator %s is degraded", actual.Name), + Name: actual.Name, } return false, nil } lastErr = &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator %s is not done; it is available=%v, progressing=%v, degraded=%v", - actual.Name, available, progressing, degraded, - ), - Reason: "ClusterOperatorNotAvailable", - Message: fmt.Sprintf("Cluster operator %s has not yet reported success", actual.Name), - Name: actual.Name, + Nested: nestedMessage, + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: fmt.Sprintf("Cluster operator %s is updating versions", actual.Name), + Name: actual.Name, } return false, nil }, ctx.Done()) diff --git a/pkg/cvo/internal/operatorstatus_test.go b/pkg/cvo/internal/operatorstatus_test.go index 4ccbc90ac..aeefe6776 100644 --- a/pkg/cvo/internal/operatorstatus_test.go +++ b/pkg/cvo/internal/operatorstatus_test.go @@ -11,7 +11,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/runtime/schema" - "k8s.io/apimachinery/pkg/util/diff" clientgotesting "k8s.io/client-go/testing" configv1 "github.com/openshift/api/config/v1" @@ -43,10 +42,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: apierrors.NewNotFound(schema.GroupResource{"", "clusteroperator"}, "test-co"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co has not yet reported success", - Name: "test-co", + Nested: apierrors.NewNotFound(schema.GroupResource{"", "clusteroperator"}, "test-co"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co has not yet reported success", + Name: "test-co", }, }, { name: "cluster operator reporting no versions with no operands", @@ -62,10 +62,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is still updating"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co is still updating", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is still updating"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is still updating", + Name: "test-co", }, }, { name: "cluster operator reporting no versions", @@ -83,10 +84,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is still updating"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co is still updating", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is still updating"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is still updating", + Name: "test-co", }, }, { name: "cluster operator reporting no versions for operand", @@ -109,10 +111,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is still updating"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co is still updating", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is still updating"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is still updating", + Name: "test-co", }, }, { name: "cluster operator reporting old versions", @@ -137,10 +140,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is still updating"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co is still updating", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is still updating"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is still updating", + Name: "test-co", }, }, { name: "cluster operator reporting mix of desired and old versions", @@ -165,10 +169,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is still updating"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co is still updating", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is still updating"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is still updating", + Name: "test-co", }, }, { name: "cluster operator reporting desired operator and old versions for 2 operands", @@ -197,10 +202,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is still updating"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co is still updating", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is still updating"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is still updating", + Name: "test-co", }, }, { name: "cluster operator reporting desired operator and mix of old and desired versions for 2 operands", @@ -229,10 +235,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is still updating"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co is still updating", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is still updating"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is still updating", + Name: "test-co", }, }, { name: "cluster operator reporting desired versions and no conditions", @@ -257,10 +264,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is not done; it is available=false, progressing=true, degraded=true"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co has not yet reported success", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co conditions: available=false, progressing=true, degraded=true"), + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is not available", + Name: "test-co", }, }, { name: "cluster operator reporting progressing=true", @@ -286,13 +294,14 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is not done; it is available=false, progressing=true, degraded=true"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co has not yet reported success", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co conditions: available=false, progressing=true, degraded=true"), + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is not available", + Name: "test-co", }, }, { - name: "cluster operator reporting degraded=true", + name: "cluster operator reporting available=false degraded=true", actual: &configv1.ClusterOperator{ ObjectMeta: metav1.ObjectMeta{Name: "test-co"}, Status: configv1.ClusterOperatorStatus{ @@ -301,7 +310,7 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, { Name: "operand-1", Version: "v1", }}, - Conditions: []configv1.ClusterOperatorStatusCondition{{Type: configv1.OperatorDegraded, Status: configv1.ConditionTrue, Message: "random error"}}, + Conditions: []configv1.ClusterOperatorStatusCondition{{Type: configv1.OperatorAvailable, Status: configv1.ConditionFalse}, {Type: configv1.OperatorDegraded, Status: configv1.ConditionTrue}}, }, }, exp: &configv1.ClusterOperator{ @@ -315,10 +324,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is reporting a failure: random error"), - Reason: "ClusterOperatorDegraded", - Message: "Cluster operator test-co is reporting a failure: random error", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co conditions: available=false, progressing=true, degraded=true"), + UpdateEffect: payload.UpdateEffectFail, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is not available", + Name: "test-co", }, }, { name: "cluster operator reporting available=true progressing=true", @@ -344,10 +354,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is not done; it is available=true, progressing=true, degraded=true"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co has not yet reported success", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co conditions: available=true, progressing=true, degraded=true"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is updating versions", + Name: "test-co", }, }, { name: "cluster operator reporting available=true degraded=true", @@ -373,10 +384,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is reporting a failure: random error"), - Reason: "ClusterOperatorDegraded", - Message: "Cluster operator test-co is reporting a failure: random error", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is reporting a message: random error"), + UpdateEffect: payload.UpdateEffectFailAfterInterval, + Reason: "ClusterOperatorDegraded", + Message: "Cluster operator test-co is degraded", + Name: "test-co", }, }, { name: "cluster operator reporting available=true progressing=true degraded=true", @@ -402,10 +414,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is reporting a failure: random error"), - Reason: "ClusterOperatorDegraded", - Message: "Cluster operator test-co is reporting a failure: random error", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co is reporting a message: random error"), + UpdateEffect: payload.UpdateEffectFailAfterInterval, + Reason: "ClusterOperatorDegraded", + Message: "Cluster operator test-co is degraded", + Name: "test-co", }, }, { name: "cluster operator reporting available=true no progressing or degraded", @@ -431,10 +444,11 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { }, }, expErr: &payload.UpdateError{ - Nested: fmt.Errorf("cluster operator test-co is not done; it is available=true, progressing=true, degraded=true"), - Reason: "ClusterOperatorNotAvailable", - Message: "Cluster operator test-co has not yet reported success", - Name: "test-co", + Nested: fmt.Errorf("cluster operator test-co conditions: available=true, progressing=true, degraded=true"), + UpdateEffect: payload.UpdateEffectNone, + Reason: "ClusterOperatorNotAvailable", + Message: "Cluster operator test-co is updating versions", + Name: "test-co", }, }, { name: "cluster operator reporting available=true progressing=false degraded=false", @@ -484,7 +498,7 @@ func Test_waitForOperatorStatusToBeDone(t *testing.T) { t.Fatalf("unexpected error: %v", err) } if !reflect.DeepEqual(test.expErr, err) { - t.Fatalf("unexpected: %s", diff.ObjectReflectDiff(test.expErr, err)) + t.Fatalf("Incorrect value returned -\nexpected: %#v\nreturned: %#v", test.expErr, err) } }) } diff --git a/pkg/cvo/status.go b/pkg/cvo/status.go index 5cad64d28..e2b24dd9e 100644 --- a/pkg/cvo/status.go +++ b/pkg/cvo/status.go @@ -6,6 +6,7 @@ import ( "encoding/json" "fmt" "math" + "strings" "time" "k8s.io/klog/v2" @@ -240,14 +241,18 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1 }) } - progressReason, progressShortMessage, skipFailure := convertErrorToProgressing(config.Status.History, now.Time, status) + progressReason, progressMessage, skipFailure := convertErrorToProgressing(config.Status.History, now.Time, status) if err := status.Failure; err != nil && !skipFailure { var reason string - msg := "an error occurred" + msg := progressMessage if uErr, ok := err.(*payload.UpdateError); ok { reason = uErr.Reason - msg = payload.SummaryForReason(reason, uErr.Name) + if msg == "" { + msg = payload.SummaryForReason(reason, uErr.Name) + } + } else if msg == "" { + msg = "an error occurred" } // set the failing condition @@ -304,7 +309,7 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1 case fractionComplete > 0 && skipFailure: reason = progressReason message = fmt.Sprintf("Working towards %s: %d of %d done (%.0f%% complete), %s", version, - status.Done, status.Total, math.Trunc(float64(fractionComplete*100)), progressShortMessage) + status.Done, status.Total, math.Trunc(float64(fractionComplete*100)), progressMessage) case fractionComplete > 0: message = fmt.Sprintf("Working towards %s: %d of %d done (%.0f%% complete)", version, status.Done, status.Total, math.Trunc(float64(fractionComplete*100))) @@ -315,7 +320,7 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1 message = fmt.Sprintf("Working towards %s: downloading update", version) case skipFailure: reason = progressReason - message = fmt.Sprintf("Working towards %s: %s", version, progressShortMessage) + message = fmt.Sprintf("Working towards %s: %s", version, progressMessage) default: message = fmt.Sprintf("Working towards %s", version) } @@ -348,21 +353,35 @@ func (optr *Operator) syncStatus(ctx context.Context, original, config *configv1 // convertErrorToProgressing returns true if the provided status indicates a failure condition can be interpreted as // still making internal progress. The general error we try to suppress is an operator or operators still being -// unavailable AND the general payload task making progress towards its goal. An operator is given 40 minutes since -// its last update to go ready, or an hour has elapsed since the update began, before the condition is ignored. +// unavailable AND the general payload task making progress towards its goal. The error's UpdateEffect determines +// whether an error should be considered a failure and, if so, whether the operator should be given up to 40 minutes +// to recover from the error. func convertErrorToProgressing(history []configv1.UpdateHistory, now time.Time, status *SyncWorkerStatus) (reason string, message string, ok bool) { - if len(history) == 0 || status.Failure == nil || status.Reconciling || status.LastProgress.IsZero() { - return "", "", false - } - if now.Sub(status.LastProgress) > 40*time.Minute || now.Sub(history[0].StartedTime.Time) > time.Hour { + if len(history) == 0 || status.Failure == nil || status.Reconciling { return "", "", false } uErr, ok := status.Failure.(*payload.UpdateError) if !ok { return "", "", false } - if uErr.Reason == "ClusterOperatorNotAvailable" || uErr.Reason == "ClusterOperatorsNotAvailable" { + switch uErr.UpdateEffect { + case payload.UpdateEffectNone: return uErr.Reason, fmt.Sprintf("waiting on %s", uErr.Name), true + case payload.UpdateEffectFail: + return "", "", false + case payload.UpdateEffectFailAfterInterval: + var exceeded []string + threshold := now.Add(-(40 * time.Minute)) + for _, name := range strings.Split(uErr.Name, ", ") { + if payload.COUpdateStartTimesGet(name).Before(threshold) { + exceeded = append(exceeded, name) + } + } + if len(exceeded) > 0 { + return uErr.Reason, fmt.Sprintf("wait has exceeded 40 minutes for these operators: %s", strings.Join(exceeded, ", ")), false + } else { + return uErr.Reason, fmt.Sprintf("waiting up to 40 minutes on %s", uErr.Name), true + } } return "", "", false } diff --git a/pkg/cvo/sync_worker.go b/pkg/cvo/sync_worker.go index b527a982d..c44df7e17 100644 --- a/pkg/cvo/sync_worker.go +++ b/pkg/cvo/sync_worker.go @@ -548,6 +548,10 @@ func (w *SyncWorker) syncOnce(ctx context.Context, work *SyncWork, maxWorkers in } klog.V(4).Infof("Running sync %s (force=%t) on generation %d in state %s at attempt %d", versionString(desired), work.Desired.Force, work.Generation, work.State, work.Attempt) + if work.Attempt == 0 { + payload.InitCOUpdateStartTimes() + } + // cache the payload until the release image changes validPayload := w.payload if validPayload != nil && validPayload.Release.Image == desired.Image { @@ -985,6 +989,7 @@ func isClusterOperatorNotAvailable(err error) bool { // newClusterOperatorsNotAvailable unifies multiple ClusterOperatorNotAvailable errors into // a single error. It returns nil if the provided errors are not of the same type. func newClusterOperatorsNotAvailable(errs []error) error { + updateEffect := payload.UpdateEffectNone names := make([]string, 0, len(errs)) for _, err := range errs { uErr, ok := err.(*payload.UpdateError) @@ -994,6 +999,15 @@ func newClusterOperatorsNotAvailable(errs []error) error { if len(uErr.Name) > 0 { names = append(names, uErr.Name) } + switch uErr.UpdateEffect { + case payload.UpdateEffectNone: + case payload.UpdateEffectFail: + updateEffect = payload.UpdateEffectFail + case payload.UpdateEffectFailAfterInterval: + if updateEffect != payload.UpdateEffectFail { + updateEffect = payload.UpdateEffectFailAfterInterval + } + } } if len(names) == 0 { return nil @@ -1006,10 +1020,11 @@ func newClusterOperatorsNotAvailable(errs []error) error { sort.Strings(names) name := strings.Join(names, ", ") return &payload.UpdateError{ - Nested: errors.NewAggregate(errs), - Reason: "ClusterOperatorsNotAvailable", - Message: fmt.Sprintf("Some cluster operators are still updating: %s", name), - Name: name, + Nested: errors.NewAggregate(errs), + UpdateEffect: updateEffect, + Reason: "ClusterOperatorsNotAvailable", + Message: fmt.Sprintf("Some cluster operators are still updating: %s", name), + Name: name, } } diff --git a/pkg/payload/task.go b/pkg/payload/task.go index aa90600cf..7a1832811 100644 --- a/pkg/payload/task.go +++ b/pkg/payload/task.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "strings" + "sync" "time" "github.com/pkg/errors" @@ -22,6 +23,11 @@ var ( Name: "cluster_operator_payload_errors", Help: "Report the number of errors encountered applying the payload.", }, []string{"version"}) + + clusterOperatorUpdateStartTimes = struct { + lock sync.RWMutex + m map[string]time.Time + }{m: make(map[string]time.Time)} ) func init() { @@ -30,6 +36,31 @@ func init() { ) } +// InitCOUpdateStartTimes creates the clusterOperatorUpdateStartTimes map thereby resulting +// in an empty map. +func InitCOUpdateStartTimes() { + clusterOperatorUpdateStartTimes.lock.Lock() + clusterOperatorUpdateStartTimes.m = make(map[string]time.Time) + clusterOperatorUpdateStartTimes.lock.Unlock() +} + +// COUpdateStartTimesEnsureName adds name to clusterOperatorUpdateStartTimes map and sets to +// current time if name does not already exist in map. +func COUpdateStartTimesEnsureName(name string) { + clusterOperatorUpdateStartTimes.lock.Lock() + if _, ok := clusterOperatorUpdateStartTimes.m[name]; !ok { + clusterOperatorUpdateStartTimes.m[name] = time.Now() + } + clusterOperatorUpdateStartTimes.lock.Unlock() +} + +// COUpdateStartTimesGet returns name's value from clusterOperatorUpdateStartTimes map. +func COUpdateStartTimesGet(name string) time.Time { + clusterOperatorUpdateStartTimes.lock.Lock() + defer clusterOperatorUpdateStartTimes.lock.Unlock() + return clusterOperatorUpdateStartTimes.m[name] +} + // ResourceBuilder abstracts how a manifest is created on the server. Introduced for testing. type ResourceBuilder interface { Apply(context.Context, *manifest.Manifest, State) error @@ -112,12 +143,28 @@ func (st *Task) Run(ctx context.Context, version string, builder ResourceBuilder } } +// UpdateEffectType defines the effect an update error has on the overall update state. +type UpdateEffectType string + +const ( + // "None" defines an error as having no affect on the update state. + UpdateEffectNone UpdateEffectType = "None" + + // "Fail" defines an error as indicating the update is failing. + UpdateEffectFail UpdateEffectType = "Fail" + + // "FailAfterInterval" defines an error as one which indicates the update is failing + // if the error continues for a defined interval. + UpdateEffectFailAfterInterval UpdateEffectType = "FailAfterInterval" +) + // UpdateError is a wrapper for errors that occur during a payload sync. type UpdateError struct { - Nested error - Reason string - Message string - Name string + Nested error + UpdateEffect UpdateEffectType + Reason string + Message string + Name string Task *Task }