Skip to content

Commit 06c5b04

Browse files
Merge pull request #1098 from wking/preserve-cache-on-update-service-error
OCPBUGS-44018: pkg/cvo/availableupdates: Preserve update advice on update-service failures
2 parents c4b8362 + d2b092a commit 06c5b04

File tree

2 files changed

+29
-10
lines changed

2 files changed

+29
-10
lines changed

pkg/cvo/availableupdates.go

Lines changed: 24 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,8 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
5757
// updates are only checked at most once per minimumUpdateCheckInterval or if the generation changes
5858
optrAvailableUpdates := optr.getAvailableUpdates()
5959
needFreshFetch := true
60+
preserveCacheOnFailure := false
61+
maximumCacheInterval := 24 * time.Hour
6062
if optrAvailableUpdates == nil {
6163
klog.V(2).Info("First attempt to retrieve available updates")
6264
optrAvailableUpdates = &availableUpdates{}
@@ -66,14 +68,18 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
6668
for i := range config.Status.ConditionalUpdates {
6769
optrAvailableUpdates.ConditionalUpdates = append(optrAvailableUpdates.ConditionalUpdates, *config.Status.ConditionalUpdates[i].DeepCopy())
6870
}
69-
} else if !optrAvailableUpdates.RecentlyChanged(optr.minimumUpdateCheckInterval) {
70-
klog.V(2).Infof("Retrieving available updates again, because more than %s has elapsed since %s", optr.minimumUpdateCheckInterval, optrAvailableUpdates.LastAttempt.Format(time.RFC3339))
7171
} else if channel != optrAvailableUpdates.Channel {
7272
klog.V(2).Infof("Retrieving available updates again, because the channel has changed from %q to %q", optrAvailableUpdates.Channel, channel)
7373
} else if desiredArch != optrAvailableUpdates.Architecture {
7474
klog.V(2).Infof("Retrieving available updates again, because the architecture has changed from %q to %q", optrAvailableUpdates.Architecture, desiredArch)
75+
} else if !optrAvailableUpdates.RecentlyChanged(maximumCacheInterval) {
76+
klog.V(2).Infof("Retrieving available updates again, because more than %s has elapsed since last change at %s. Will clear the cache if this fails.", maximumCacheInterval, optrAvailableUpdates.LastAttempt.Format(time.RFC3339))
77+
} else if !optrAvailableUpdates.RecentlyAttempted(optr.minimumUpdateCheckInterval) {
78+
klog.V(2).Infof("Retrieving available updates again, because more than %s has elapsed since last attempt at %s", optr.minimumUpdateCheckInterval, optrAvailableUpdates.LastAttempt.Format(time.RFC3339))
79+
preserveCacheOnFailure = true
7580
} else if updateService == optrAvailableUpdates.UpdateService || (updateService == defaultUpdateService && optrAvailableUpdates.UpdateService == "") {
7681
needsConditionalUpdateEval := false
82+
preserveCacheOnFailure = true
7783
for _, conditionalUpdate := range optrAvailableUpdates.ConditionalUpdates {
7884
if recommended := findRecommendedCondition(conditionalUpdate.Conditions); recommended == nil {
7985
needsConditionalUpdateEval = true
@@ -126,11 +132,19 @@ func (optr *Operator) syncAvailableUpdates(ctx context.Context, config *configv1
126132
optrAvailableUpdates.UpdateService = updateService
127133
optrAvailableUpdates.Channel = channel
128134
optrAvailableUpdates.Architecture = desiredArch
129-
optrAvailableUpdates.Current = current
130-
optrAvailableUpdates.Updates = updates
131-
optrAvailableUpdates.ConditionalUpdates = conditionalUpdates
132135
optrAvailableUpdates.ConditionRegistry = optr.conditionRegistry
133136
optrAvailableUpdates.Condition = condition
137+
138+
responseFailed := (condition.Type == configv1.RetrievedUpdates &&
139+
condition.Status == configv1.ConditionFalse &&
140+
(condition.Reason == "RemoteFailed" ||
141+
condition.Reason == "ResponseFailed" ||
142+
condition.Reason == "ResponseInvalid"))
143+
if !responseFailed || (responseFailed && !preserveCacheOnFailure) {
144+
optrAvailableUpdates.Current = current
145+
optrAvailableUpdates.Updates = updates
146+
optrAvailableUpdates.ConditionalUpdates = conditionalUpdates
147+
}
134148
}
135149

136150
optrAvailableUpdates.evaluateConditionalUpdates(ctx)
@@ -183,10 +197,14 @@ type availableUpdates struct {
183197
Condition configv1.ClusterOperatorStatusCondition
184198
}
185199

186-
func (u *availableUpdates) RecentlyChanged(interval time.Duration) bool {
200+
func (u *availableUpdates) RecentlyAttempted(interval time.Duration) bool {
187201
return u.LastAttempt.After(time.Now().Add(-interval))
188202
}
189203

204+
func (u *availableUpdates) RecentlyChanged(interval time.Duration) bool {
205+
return u.LastSyncOrConfigChange.After(time.Now().Add(-interval))
206+
}
207+
190208
func (u *availableUpdates) NeedsUpdate(original *configv1.ClusterVersion) *configv1.ClusterVersion {
191209
if u == nil {
192210
return nil

pkg/cvo/cvo_test.go

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2675,17 +2675,18 @@ func TestOperator_availableUpdatesSync(t *testing.T) {
26752675
},
26762676
},
26772677
{
2678-
name: "if last check time was too recent, do nothing",
2678+
name: "if last successful check time was too recent, do nothing",
26792679
handler: func(w http.ResponseWriter, req *http.Request) {
26802680
http.Error(w, "bad things", http.StatusInternalServerError)
26812681
},
26822682
optr: &Operator{
26832683
updateService: "http://localhost:8080/graph",
26842684
minimumUpdateCheckInterval: 1 * time.Minute,
26852685
availableUpdates: &availableUpdates{
2686-
UpdateService: "http://localhost:8080/graph",
2687-
Channel: "fast",
2688-
LastAttempt: time.Now(),
2686+
UpdateService: "http://localhost:8080/graph",
2687+
Channel: "fast",
2688+
LastAttempt: time.Now(),
2689+
LastSyncOrConfigChange: time.Now(),
26892690
},
26902691
release: configv1.Release{
26912692
Version: "4.0.1",

0 commit comments

Comments
 (0)