Skip to content

Commit 1009d0f

Browse files
committed
Add an exception for fast CO updates
The added exception is for the case where * a CO does not report Progressing, and * CVO waits for the CO's update less than 2 minutes
1 parent 1527393 commit 1009d0f

File tree

2 files changed

+182
-1
lines changed

2 files changed

+182
-1
lines changed

pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -618,10 +618,12 @@ func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals)
618618
var eventsInUpgradeWindows monitorapi.Intervals
619619

620620
var start, stop time.Time
621+
COWaiting := map[string]monitorapi.Intervals{}
621622
for _, event := range events {
622623
if !isInUpgradeWindow(upgradeWindows, event) {
623624
continue
624625
}
626+
updateCOWaiting(event, COWaiting)
625627
eventsInUpgradeWindows = append(eventsInUpgradeWindows, event)
626628
if start.IsZero() || event.From.Before(start) {
627629
start = event.From
@@ -651,6 +653,16 @@ func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals)
651653
}
652654

653655
except := func(co string, _ string) string {
656+
intervals, ok := COWaiting[co]
657+
if !ok {
658+
// CO have not shown up in CVO Progressing message
659+
return fmt.Sprintf("%s completing its update so fast that CVO did not recogize any waiting", co)
660+
}
661+
from, to := fromAndTo(intervals)
662+
if d := to.Sub(from); d < 2*time.Minute {
663+
// CO showed up in CVO Progressing message but the total duration is less than two minutes
664+
return fmt.Sprintf("%s completing its update within less than two minutes: %s", co, d.String())
665+
}
654666
return ""
655667
}
656668

@@ -668,6 +680,9 @@ func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals)
668680
exception = except(operatorName, "")
669681
if exception != "" {
670682
output = fmt.Sprintf("%s which is expected up to %s", output, exception)
683+
} else {
684+
from, to := fromAndTo(COWaiting[operatorName])
685+
output = fmt.Sprintf("%s and CVO waited for it over 2 minutes from %s to %s", output, from.Format(time.RFC3339), to.Format(time.RFC3339))
671686
}
672687
mcTestCase.FailureOutput = &junitapi.FailureOutput{
673688
Output: output,
@@ -835,6 +850,67 @@ func testUpgradeOperatorProgressingStateTransitions(events monitorapi.Intervals)
835850
return ret
836851
}
837852

853+
func fromAndTo(intervals monitorapi.Intervals) (time.Time, time.Time) {
854+
var from, to time.Time
855+
for _, interval := range intervals {
856+
if from.IsZero() || interval.From.Before(from) {
857+
from = interval.From
858+
}
859+
if to.IsZero() || interval.To.After(to) {
860+
to = interval.To
861+
}
862+
}
863+
return from, to
864+
}
865+
866+
func updateCOWaiting(interval monitorapi.Interval, waiting map[string]monitorapi.Intervals) {
867+
if waiting == nil {
868+
return
869+
}
870+
if interval.Source != monitorapi.SourceOperatorState ||
871+
interval.Locator.Type != monitorapi.LocatorTypeClusterVersion ||
872+
interval.Locator.Keys[monitorapi.LocatorClusterVersionKey] != "version" {
873+
return
874+
}
875+
876+
c, ok := interval.Message.Annotations[monitorapi.AnnotationCondition]
877+
if !ok {
878+
return
879+
}
880+
if t := configv1.ClusterStatusConditionType(c); t != configv1.OperatorProgressing {
881+
return
882+
}
883+
884+
status, ok := interval.Message.Annotations[monitorapi.AnnotationStatus]
885+
if !ok {
886+
return
887+
}
888+
s := configv1.ConditionStatus(status)
889+
if s != configv1.ConditionTrue {
890+
return
891+
}
892+
operators := getOperatorsFromProgressingMessage(interval.Message.HumanMessage)
893+
for o := range operators {
894+
waiting[o] = append(waiting[o], interval)
895+
}
896+
return
897+
}
898+
899+
const ProgressingWaitingCOsKey = "waiting on "
900+
901+
func getOperatorsFromProgressingMessage(message string) sets.Set[string] {
902+
ret := sets.New[string]()
903+
// If CVO changes the message, we have to change here accordingly
904+
// https://github.com/openshift/cluster-version-operator/blob/a26c85e0fc1651645b009ee8c84b50e629fcc299/pkg/cvo/status.go#L593
905+
if i := strings.LastIndex(message, ProgressingWaitingCOsKey); i == -1 {
906+
return nil
907+
} else {
908+
ret.Insert(strings.Split(message[i+len(ProgressingWaitingCOsKey):], ", ")...)
909+
910+
}
911+
return ret
912+
}
913+
838914
type startedStaged struct {
839915
// OSUpdateStarted is the event Reason emitted by the machine config operator when a node begins extracting
840916
// it's OS content.

pkg/monitortests/clusterversionoperator/legacycvomonitortests/operators_test.go

Lines changed: 106 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,8 +4,12 @@ import (
44
"testing"
55
"time"
66

7-
"github.com/openshift/origin/pkg/monitor/monitorapi"
7+
configv1 "github.com/openshift/api/config/v1"
88
"github.com/stretchr/testify/assert"
9+
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
10+
"k8s.io/apimachinery/pkg/util/sets"
11+
12+
"github.com/openshift/origin/pkg/monitor/monitorapi"
913
)
1014

1115
// buildUpgradeInterval creates a standard upgrade interval using the given reason and eventTime.
@@ -192,3 +196,104 @@ func Test_isInUpgradeWindow(t *testing.T) {
192196
})
193197
}
194198
}
199+
200+
func Test_getOperatorsFromProgressingMessage(t *testing.T) {
201+
tests := []struct {
202+
name string
203+
message string
204+
expect sets.Set[string]
205+
}{
206+
{
207+
name: "unknown message ",
208+
message: "bar foo",
209+
},
210+
{
211+
name: "single CO",
212+
message: "working towards ${VERSION}: 106 of 841 done (12% complete), waiting on single",
213+
expect: sets.New[string]("single"),
214+
},
215+
{
216+
name: "multiple COs",
217+
message: "working towards ${VERSION}: 106 of 841 done (12% complete), waiting on etcd, kube-apiserver",
218+
expect: sets.New[string]("kube-apiserver", "etcd"),
219+
},
220+
}
221+
for _, tt := range tests {
222+
t.Run(tt.name, func(t *testing.T) {
223+
actual := getOperatorsFromProgressingMessage(tt.message)
224+
assert.Equal(t, tt.expect, actual)
225+
})
226+
}
227+
}
228+
229+
func Test_updateCOWaiting(t *testing.T) {
230+
now := time.Now()
231+
next := 3 * time.Hour
232+
interval := func(m string, start time.Time, d time.Duration) monitorapi.Interval {
233+
return monitorapi.NewInterval(monitorapi.SourceOperatorState, monitorapi.Warning).
234+
Locator(monitorapi.NewLocator().ClusterVersion(&configv1.ClusterVersion{
235+
ObjectMeta: metav1.ObjectMeta{Name: "version"}})).
236+
Message(monitorapi.NewMessage().Reason("reason").
237+
HumanMessage(m).
238+
WithAnnotation(monitorapi.AnnotationCondition, string(configv1.OperatorProgressing)).
239+
WithAnnotation(monitorapi.AnnotationStatus, string(configv1.ConditionTrue))).
240+
Display().
241+
Build(start, start.Add(d))
242+
}
243+
244+
tests := []struct {
245+
name string
246+
message string
247+
d time.Duration
248+
waiting map[string]monitorapi.Intervals
249+
expect map[string]time.Duration
250+
}{
251+
{
252+
name: "happy case",
253+
d: time.Hour,
254+
message: "working towards ${VERSION}: 106 of 841 done (12% complete), waiting on etcd, kube-apiserver",
255+
waiting: map[string]monitorapi.Intervals{},
256+
expect: map[string]time.Duration{"etcd": time.Hour, "kube-apiserver": time.Hour},
257+
},
258+
{
259+
name: "incremental one",
260+
d: 2 * time.Minute,
261+
message: "working towards ${VERSION}: 106 of 841 done (12% complete), waiting on etcd, kube-apiserver",
262+
waiting: map[string]monitorapi.Intervals{"etcd": {interval("some", now, 3*time.Minute)}},
263+
expect: map[string]time.Duration{"etcd": next + 2*time.Minute, "kube-apiserver": 2 * time.Minute},
264+
},
265+
{
266+
name: "incremental all",
267+
d: 2 * time.Minute,
268+
message: "working towards ${VERSION}: 106 of 841 done (12% complete), waiting on etcd, kube-apiserver",
269+
waiting: map[string]monitorapi.Intervals{"etcd": {interval("some", now, 3*time.Minute)}, "kube-apiserver": {interval("some", now, 6*time.Minute)}},
270+
expect: map[string]time.Duration{"etcd": next + 2*time.Minute, "kube-apiserver": next + 2*time.Minute},
271+
},
272+
{
273+
name: "unknown message",
274+
message: "unknown message",
275+
waiting: map[string]monitorapi.Intervals{},
276+
expect: map[string]time.Duration{},
277+
},
278+
}
279+
for _, tt := range tests {
280+
i := monitorapi.NewInterval(monitorapi.SourceOperatorState, monitorapi.Warning).
281+
Locator(monitorapi.NewLocator().ClusterVersion(&configv1.ClusterVersion{
282+
ObjectMeta: metav1.ObjectMeta{Name: "version"}})).
283+
Message(monitorapi.NewMessage().Reason("reason").
284+
HumanMessage(tt.message).
285+
WithAnnotation(monitorapi.AnnotationCondition, string(configv1.OperatorProgressing)).
286+
WithAnnotation(monitorapi.AnnotationStatus, string(configv1.ConditionTrue))).
287+
Display().
288+
Build(now.Add(next), now.Add(next+tt.d))
289+
t.Run(tt.name, func(t *testing.T) {
290+
updateCOWaiting(i, tt.waiting)
291+
actual := map[string]time.Duration{}
292+
for co, intervals := range tt.waiting {
293+
from, to := fromAndTo(intervals)
294+
actual[co] = to.Sub(from)
295+
}
296+
assert.Equal(t, tt.expect, actual)
297+
})
298+
}
299+
}

0 commit comments

Comments
 (0)