Skip to content

Commit e369104

Browse files
committed
Extract the cluster operator name from Down/Degraded Alerts
This will now appear in the locator and help with analysis about which ClusterOperator is down/degraded. It may also prevent overlap problems we could be experiencing when more than one is down.
1 parent ffb7a7a commit e369104

File tree

2 files changed

+21
-13
lines changed

2 files changed

+21
-13
lines changed

pkg/monitor/monitorapi/construction.go

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import (
55
"strings"
66
"time"
77

8+
"github.com/prometheus/common/model"
89
corev1 "k8s.io/api/core/v1"
910
"k8s.io/kube-openapi/pkg/util/sets"
1011
)
@@ -92,23 +93,39 @@ func (b *LocatorBuilder) NodeFromName(nodeName string) Locator {
9293
Build()
9394
}
9495

95-
func (b *LocatorBuilder) AlertFromNames(alertName, node, namespace, pod, container string) Locator {
96+
func (b *LocatorBuilder) AlertFromPromSampleStream(alert *model.SampleStream) Locator {
9697
b.targetType = LocatorTypeAlert
98+
99+
alertName := string(alert.Metric[model.AlertNameLabel])
97100
if len(alertName) > 0 {
98101
b.annotations[LocatorAlertKey] = alertName
99102
}
103+
node := string(alert.Metric["instance"])
100104
if len(node) > 0 {
101105
b.annotations[LocatorNodeKey] = node
102106
}
107+
namespace := string(alert.Metric["namespace"])
103108
if len(namespace) > 0 {
104109
b.annotations[LocatorNamespaceKey] = namespace
105110
}
111+
pod := string(alert.Metric["pod"])
106112
if len(pod) > 0 {
107113
b.annotations[LocatorPodKey] = pod
108114
}
115+
container := string(alert.Metric["container"])
109116
if len(container) > 0 {
110117
b.annotations[LocatorContainerKey] = container
111118
}
119+
120+
// Some alerts include a very useful name field, ClusterOperator[Down|Degraded] for example,
121+
// always comes from the namespace openshift-cluster-version, but this field is the actual
122+
// name of the operator that was detected to be down. This is very useful for locators and
123+
// analysis.
124+
additionalName := string(alert.Metric["name"])
125+
if len(additionalName) > 0 {
126+
b.annotations[LocatorNameKey] = additionalName
127+
}
128+
112129
return b.Build()
113130
}
114131

pkg/monitortests/testframework/alertanalyzer/alerts.go

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ import (
1313
"github.com/openshift/origin/pkg/monitor/monitorapi"
1414
prometheusv1 "github.com/prometheus/client_golang/api/prometheus/v1"
1515
prometheustypes "github.com/prometheus/common/model"
16+
"github.com/sirupsen/logrus"
1617
apierrors "k8s.io/apimachinery/pkg/api/errors"
1718
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1819
"k8s.io/apimachinery/pkg/util/wait"
@@ -296,15 +297,8 @@ func createEventIntervalsForAlerts(ctx context.Context, alerts prometheustypes.V
296297
case alerts.Type() == prometheustypes.ValMatrix:
297298
matrixAlert := alerts.(prometheustypes.Matrix)
298299
for _, alert := range matrixAlert {
299-
alertName := alert.Metric[prometheustypes.AlertNameLabel]
300300

301-
lb := monitorapi.NewLocator().AlertFromNames(
302-
string(alertName),
303-
string(alert.Metric["instance"]),
304-
string(alert.Metric["namespace"]),
305-
string(alert.Metric["pod"]),
306-
string(alert.Metric["container"]),
307-
)
301+
lb := monitorapi.NewLocator().AlertFromPromSampleStream(alert)
308302

309303
var level monitorapi.IntervalLevel
310304
switch {
@@ -364,10 +358,7 @@ func createEventIntervalsForAlerts(ctx context.Context, alerts prometheustypes.V
364358
}
365359

366360
default:
367-
ret = append(ret, monitorapi.NewInterval(monitorapi.SourceAlert, monitorapi.Error).
368-
Locator(monitorapi.NewLocator().AlertFromNames("all", "", "", "", "")).
369-
Message(monitorapi.NewMessage().HumanMessagef("unhandled type: %v", alerts.Type())).
370-
Build(startTime, time.Now()))
361+
logrus.WithField("type", alerts.Type()).Warning("unhandled prometheus alert type received in alert monitor")
371362
}
372363

373364
return ret, nil

0 commit comments

Comments
 (0)