Skip to content

Commit c0e26a4

Browse files
committed
OTA-1418: Allow forcing health insights on CV
When a `ClusterVersion` is annotated with `usc.openshift.io/force-health-insight`, make USC emit a health insight, suitable for testing health insight lifecycle. Note that such health insight never goes away for now; this will be addressed by followup PRs. Health insights UIDs are a hash of the insight message and the resources involved. This will become problematic if we ever want to do aggregation, but for now it suits the purpose.
1 parent 5a1a40f commit c0e26a4

File tree

2 files changed

+245
-18
lines changed

2 files changed

+245
-18
lines changed

pkg/updatestatus/controlplaneinformer.go

Lines changed: 87 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@ package updatestatus
22

33
import (
44
"context"
5+
"crypto/md5"
6+
"encoding/base64"
57
"errors"
68
"fmt"
79
"strings"
@@ -102,7 +104,7 @@ func (c *controlPlaneInformerController) sync(ctx context.Context, syncCtx facto
102104
return fmt.Errorf("failed to parse queue key: %w", err)
103105
}
104106

105-
var msg informerMsg
107+
var msgs []informerMsg
106108
switch t {
107109
case clusterVersionKindName:
108110
clusterVersion, err := c.clusterVersions.Get(name)
@@ -115,8 +117,11 @@ func (c *controlPlaneInformerController) sync(ctx context.Context, syncCtx facto
115117
}
116118

117119
now := c.now()
118-
insight := assessClusterVersion(clusterVersion, now)
119-
msg = makeInsightMsgForClusterVersion(insight, now)
120+
cvInsight, healthInsights := assessClusterVersion(clusterVersion, now)
121+
msgs = append(msgs, makeInsightMsgForClusterVersion(cvInsight, now))
122+
for item := range healthInsights {
123+
msgs = append(msgs, makeInsightMsgForHealthInsight(healthInsights[item], now))
124+
}
120125

121126
case clusterOperatorKindName:
122127
clusterVersion, err := c.clusterVersions.Get("version")
@@ -139,16 +144,19 @@ func (c *controlPlaneInformerController) sync(ctx context.Context, syncCtx facto
139144
if err != nil {
140145
return fmt.Errorf("failed to assess cluster operator %s: %w", name, err)
141146
}
142-
msg = makeInsightMsgForClusterOperator(insight, now)
147+
msgs = append(msgs, makeInsightMsgForClusterOperator(insight, now))
143148
default:
144149
return fmt.Errorf("invalid queue key %s with unexpected type %s", queueKey, t)
145150
}
146-
var msgForLog string
147-
if klog.V(4).Enabled() {
148-
msgForLog = fmt.Sprintf(" | msg=%s", string(msg.insight))
151+
152+
for _, msg := range msgs {
153+
var msgForLog string
154+
if klog.V(4).Enabled() {
155+
msgForLog = fmt.Sprintf(" | msg=%s", string(msg.insight))
156+
}
157+
klog.V(2).Infof("CPI :: Syncing %s %s%s", t, name, msgForLog)
158+
c.sendInsight(msg)
149159
}
150-
klog.V(2).Infof("CPI :: Syncing %s %s%s", t, name, msgForLog)
151-
c.sendInsight(msg)
152160

153161
return nil
154162
}
@@ -310,11 +318,47 @@ func makeInsightMsgForClusterVersion(cvInsight *ClusterVersionStatusInsight, acq
310318
}
311319
}
312320

321+
func uidForHealthInsight(healthInsight *HealthInsight) string {
322+
hasher := md5.New()
323+
hasher.Write([]byte(healthInsight.Impact.Summary))
324+
for i := range healthInsight.Scope.Resources {
325+
hasher.Write([]byte(healthInsight.Scope.Resources[i].Group))
326+
hasher.Write([]byte(healthInsight.Scope.Resources[i].Resource))
327+
hasher.Write([]byte(healthInsight.Scope.Resources[i].Namespace))
328+
hasher.Write([]byte(healthInsight.Scope.Resources[i].Name))
329+
}
330+
331+
sum := hasher.Sum(nil)
332+
encoded := base64.StdEncoding.EncodeToString(sum)
333+
encoded = strings.TrimRight(encoded, "=")
334+
335+
return fmt.Sprintf("usc-%s", encoded)
336+
}
337+
338+
func makeInsightMsgForHealthInsight(healthInsight *HealthInsight, acquiredAt metav1.Time) informerMsg {
339+
uid := uidForHealthInsight(healthInsight)
340+
insight := ControlPlaneInsight{
341+
UID: uid,
342+
AcquiredAt: acquiredAt,
343+
ControlPlaneInsightUnion: ControlPlaneInsightUnion{
344+
Type: HealthInsightType,
345+
HealthInsight: healthInsight,
346+
},
347+
}
348+
349+
// Should handle errors, but ultimately we will have a proper API and won’t need to serialize ourselves
350+
rawInsight, _ := yaml.Marshal(insight)
351+
return informerMsg{
352+
uid: uid,
353+
insight: rawInsight,
354+
}
355+
}
356+
313357
// assessClusterVersion produces a ClusterVersion status insight from the current state of the ClusterVersion resource.
314358
// It does not take previous status insight into account. Many fields of the status insights (such as completion) cannot
315359
// be properly calculated without also watching and processing ClusterOperators, so that functionality will need to be
316360
// added later.
317-
func assessClusterVersion(cv *configv1.ClusterVersion, now metav1.Time) *ClusterVersionStatusInsight {
361+
func assessClusterVersion(cv *configv1.ClusterVersion, now metav1.Time) (*ClusterVersionStatusInsight, []*HealthInsight) {
318362

319363
var lastHistoryItem *configv1.UpdateHistory
320364
if len(cv.Status.History) > 0 {
@@ -364,7 +408,39 @@ func assessClusterVersion(cv *configv1.ClusterVersion, now metav1.Time) *Cluster
364408
insight.EstimatedCompletedAt = &metav1.Time{Time: est}
365409
}
366410

367-
return insight
411+
var healthInsights []*HealthInsight
412+
if forcedHealthInsight := forcedHealthInsight(cv, now); forcedHealthInsight != nil {
413+
healthInsights = append(healthInsights, forcedHealthInsight)
414+
}
415+
416+
return insight, healthInsights
417+
}
418+
419+
const (
420+
uscForceHealthInsightAnnotation = "usc.openshift.io/force-health-insight"
421+
)
422+
423+
func forcedHealthInsight(cv *configv1.ClusterVersion, now metav1.Time) *HealthInsight {
424+
if _, ok := cv.Annotations[uscForceHealthInsightAnnotation]; !ok {
425+
return nil
426+
}
427+
428+
return &HealthInsight{
429+
StartedAt: now,
430+
Scope: InsightScope{
431+
Type: ControlPlaneScope,
432+
Resources: []ResourceRef{{Resource: "clusterversions", Group: configv1.GroupName, Name: cv.Name}},
433+
},
434+
Impact: InsightImpact{
435+
Level: InfoImpactLevel,
436+
Type: NoneImpactType,
437+
Summary: fmt.Sprintf("Forced health insight for ClusterVersion %s", cv.Name),
438+
Description: fmt.Sprintf("The resource has a %q annotation which forces USC to generate this health insight for testing purposes.", uscForceHealthInsightAnnotation),
439+
},
440+
Remediation: InsightRemediation{
441+
Reference: "https://issues.redhat.com/browse/OTA-1418",
442+
},
443+
}
368444
}
369445

370446
// estimateCompletion returns a time.Time that is 60 minutes after the given time. Proper estimation needs to be added

pkg/updatestatus/controlplaneinformer_test.go

Lines changed: 158 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -86,6 +86,7 @@ func Test_sync_with_cv(t *testing.T) {
8686
name string
8787
cvProgressing *configv1.ClusterOperatorStatusCondition
8888
cvHistory []configv1.UpdateHistory
89+
cvAnnotations map[string]string
8990

9091
expectedMsgs map[string]ControlPlaneInsight
9192
}{
@@ -193,11 +194,72 @@ func Test_sync_with_cv(t *testing.T) {
193194
},
194195
},
195196
},
197+
{
198+
name: "Cluster during a standard update with forced health insight",
199+
cvProgressing: &progressingTrue,
200+
cvHistory: []configv1.UpdateHistory{inProgress419, completed418},
201+
cvAnnotations: map[string]string{
202+
uscForceHealthInsightAnnotation: "value-does-not-matter",
203+
},
204+
expectedMsgs: map[string]ControlPlaneInsight{
205+
"usc-0kmuaUQRUJDOAIAF1KWTmg": {
206+
UID: "usc-0kmuaUQRUJDOAIAF1KWTmg",
207+
AcquiredAt: now,
208+
ControlPlaneInsightUnion: ControlPlaneInsightUnion{
209+
Type: HealthInsightType,
210+
HealthInsight: &HealthInsight{
211+
StartedAt: now,
212+
Scope: InsightScope{
213+
Type: ControlPlaneScope,
214+
Resources: []ResourceRef{
215+
cvRef,
216+
},
217+
},
218+
Impact: InsightImpact{
219+
Level: InfoImpactLevel,
220+
Type: NoneImpactType,
221+
Summary: "Forced health insight for ClusterVersion version",
222+
Description: "The resource has a \"usc.openshift.io/force-health-insight\" annotation which forces USC to generate this health insight for testing purposes.",
223+
},
224+
Remediation: InsightRemediation{
225+
Reference: "https://issues.redhat.com/browse/OTA-1418",
226+
},
227+
},
228+
},
229+
},
230+
"usc-cv-version": {
231+
UID: "usc-cv-version",
232+
AcquiredAt: now,
233+
ControlPlaneInsightUnion: ControlPlaneInsightUnion{
234+
Type: ClusterVersionStatusInsightType,
235+
ClusterVersionStatusInsight: &ClusterVersionStatusInsight{
236+
Resource: cvRef,
237+
Assessment: ControlPlaneAssessmentProgressing,
238+
Versions: ControlPlaneUpdateVersions{
239+
Target: Version{Version: "4.19.0"},
240+
Previous: Version{Version: "4.18.0"},
241+
},
242+
Completion: 0,
243+
StartedAt: minutesAgo[60],
244+
EstimatedCompletedAt: &now,
245+
Conditions: []metav1.Condition{
246+
newClusterVersionStatusInsightUpdating(
247+
metav1.ConditionTrue,
248+
ClusterVersionProgressing,
249+
"ClusterVersion has Progressing=True(Reason=ProgressingTrue) | Message='Cluster is progressing'",
250+
now,
251+
),
252+
},
253+
},
254+
},
255+
},
256+
},
257+
},
196258
}
197259

198260
for _, tc := range testCases {
199261
t.Run(tc.name, func(t *testing.T) {
200-
cv := makeTestClusterVersion(tc.cvProgressing, tc.cvHistory)
262+
cv := makeTestClusterVersion(tc.cvProgressing, tc.cvHistory, tc.cvAnnotations)
201263

202264
cvIndexer := cache.NewIndexer(cache.MetaNamespaceKeyFunc, cache.Indexers{})
203265
if err := cvIndexer.Add(cv); err != nil {
@@ -235,7 +297,10 @@ func Test_sync_with_cv(t *testing.T) {
235297
})
236298
}
237299

238-
if diff := cmp.Diff(expectedMsgs, actualMsgs, cmp.AllowUnexported(informerMsg{})); diff != "" {
300+
ignoreOrder := cmpopts.SortSlices(func(a, b informerMsg) bool {
301+
return a.uid < b.uid
302+
})
303+
if diff := cmp.Diff(expectedMsgs, actualMsgs, ignoreOrder, cmp.AllowUnexported(informerMsg{})); diff != "" {
239304
t.Errorf("Sync messages differ from expected:\n%s", diff)
240305
}
241306
})
@@ -531,9 +596,16 @@ func Test_sync_with_co(t *testing.T) {
531596
}
532597
}
533598

534-
func makeTestClusterVersion(progressing *configv1.ClusterOperatorStatusCondition, history []configv1.UpdateHistory) *configv1.ClusterVersion {
599+
func makeTestClusterVersion(
600+
progressing *configv1.ClusterOperatorStatusCondition,
601+
history []configv1.UpdateHistory,
602+
annotations map[string]string,
603+
) *configv1.ClusterVersion {
535604
cv := &configv1.ClusterVersion{
536-
ObjectMeta: metav1.ObjectMeta{Name: "version"},
605+
ObjectMeta: metav1.ObjectMeta{
606+
Name: "version",
607+
Annotations: annotations,
608+
},
537609
Status: configv1.ClusterVersionStatus{
538610
Conditions: []configv1.ClusterOperatorStatusCondition{},
539611
History: []configv1.UpdateHistory{},
@@ -554,6 +626,7 @@ type testSyncContext struct {
554626
queue workqueue.TypedRateLimitingInterface[any]
555627
}
556628

629+
//goland:noinspection GoDeprecation
557630
func (c testSyncContext) Queue() workqueue.RateLimitingInterface { //nolint:staticcheck
558631
return c.queue
559632
}
@@ -933,7 +1006,7 @@ func Test_assessClusterOperator(t *testing.T) {
9331006
}
9341007
}
9351008

936-
func Test_assessClusterVersion(t *testing.T) {
1009+
func Test_assessClusterVersion_cvStatusInsight(t *testing.T) {
9371010
now := metav1.Now()
9381011
var minutesAgo [120]metav1.Time
9391012
for i := range minutesAgo {
@@ -1244,10 +1317,88 @@ func Test_assessClusterVersion(t *testing.T) {
12441317
Conditions: []configv1.ClusterOperatorStatusCondition{tc.cvProgressing},
12451318
},
12461319
}
1247-
actual := assessClusterVersion(cv, now)
1248-
if diff := cmp.Diff(tc.expected, actual, cmpopts.IgnoreFields(metav1.Condition{}, "LastTransitionTime")); diff != "" {
1320+
actualCvStatusInsight, healthInsights := assessClusterVersion(cv, now)
1321+
if diff := cmp.Diff(tc.expected, actualCvStatusInsight); diff != "" {
12491322
t.Errorf("CV Status Insight differs from expected:\n%s", diff)
12501323
}
1324+
1325+
if diff := cmp.Diff([]*HealthInsight(nil), healthInsights); diff != "" {
1326+
t.Errorf("Unexpected health insights:\n%s", diff)
1327+
}
1328+
})
1329+
}
1330+
}
1331+
1332+
func Test_assessClusterVersion_testingHealthInsight(t *testing.T) {
1333+
now := metav1.Now()
1334+
var minutesAgo [30]metav1.Time
1335+
for i := range minutesAgo {
1336+
minutesAgo[i] = metav1.NewTime(now.Add(-time.Duration(i) * time.Minute))
1337+
}
1338+
1339+
cvReference := ResourceRef{
1340+
Resource: "clusterversions",
1341+
Group: "config.openshift.io",
1342+
Name: "version",
1343+
}
1344+
1345+
testCases := []struct {
1346+
name string
1347+
1348+
cvAnnotations map[string]string
1349+
1350+
expected []*HealthInsight
1351+
}{
1352+
{
1353+
name: "no annotations -> no insight",
1354+
cvAnnotations: nil,
1355+
expected: nil,
1356+
},
1357+
{
1358+
name: "unrelated annotations -> no insight",
1359+
cvAnnotations: map[string]string{
1360+
"foo": "bar",
1361+
},
1362+
expected: nil,
1363+
},
1364+
{
1365+
name: "usc.openshift.io/force-health-insight=true -> health insight",
1366+
cvAnnotations: map[string]string{
1367+
"usc.openshift.io/force-health-insight": "value-does-not-matter",
1368+
},
1369+
expected: []*HealthInsight{
1370+
{
1371+
StartedAt: now,
1372+
Scope: InsightScope{
1373+
Type: ControlPlaneScope,
1374+
Resources: []ResourceRef{cvReference},
1375+
},
1376+
Impact: InsightImpact{
1377+
Level: InfoImpactLevel,
1378+
Type: NoneImpactType,
1379+
Summary: "Forced health insight for ClusterVersion version",
1380+
Description: "The resource has a \"usc.openshift.io/force-health-insight\" annotation which forces USC to generate this health insight for testing purposes.",
1381+
},
1382+
Remediation: InsightRemediation{
1383+
Reference: "https://issues.redhat.com/browse/OTA-1418",
1384+
},
1385+
},
1386+
},
1387+
},
1388+
}
1389+
for _, tc := range testCases {
1390+
t.Run(tc.name, func(t *testing.T) {
1391+
cv := &configv1.ClusterVersion{
1392+
ObjectMeta: metav1.ObjectMeta{
1393+
Name: "version",
1394+
Annotations: tc.cvAnnotations,
1395+
},
1396+
}
1397+
_, healthInsights := assessClusterVersion(cv, now)
1398+
1399+
if diff := cmp.Diff(tc.expected, healthInsights); diff != "" {
1400+
t.Errorf("Health insights differ from expected:\n%s", diff)
1401+
}
12511402
})
12521403
}
12531404
}

0 commit comments

Comments
 (0)