Skip to content

Commit b87c0b9

Browse files
emanlodovicejustinjung04
authored andcommitted
Add alert lifecycle observer
Signed-off-by: Emmanuel Lodovice <[email protected]>
1 parent eacba0f commit b87c0b9

File tree

11 files changed

+514
-63
lines changed

11 files changed

+514
-63
lines changed

alertobserver/alertobserver.go

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
// Copyright 2023 Prometheus Team
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package alertobserver
15+
16+
import (
17+
"github.com/prometheus/alertmanager/types"
18+
)
19+
20+
const (
21+
EventAlertReceived string = "received"
22+
EventAlertRejected string = "rejected"
23+
EventAlertAddedToAggrGroup string = "addedAggrGroup"
24+
EventAlertFailedAddToAggrGroup string = "failedAddAggrGroup"
25+
EventAlertPipelineStart string = "pipelineStart"
26+
EventAlertPipelinePassStage string = "pipelinePassStage"
27+
EventAlertMuted string = "muted"
28+
EventAlertSent string = "sent"
29+
EventAlertSendFailed string = "sendFailed"
30+
)
31+
32+
type AlertEventMeta map[string]interface{}
33+
34+
type LifeCycleObserver interface {
35+
Observe(event string, alerts []*types.Alert, meta AlertEventMeta)
36+
}

alertobserver/testing.go

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
// Copyright 2023 Prometheus Team
2+
// Licensed under the Apache License, Version 2.0 (the "License");
3+
// you may not use this file except in compliance with the License.
4+
// You may obtain a copy of the License at
5+
//
6+
// http://www.apache.org/licenses/LICENSE-2.0
7+
//
8+
// Unless required by applicable law or agreed to in writing, software
9+
// distributed under the License is distributed on an "AS IS" BASIS,
10+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11+
// See the License for the specific language governing permissions and
12+
// limitations under the License.
13+
14+
package alertobserver
15+
16+
import (
17+
"sync"
18+
19+
"github.com/prometheus/alertmanager/types"
20+
)
21+
22+
type FakeLifeCycleObserver struct {
23+
AlertsPerEvent map[string][]*types.Alert
24+
PipelineStageAlerts map[string][]*types.Alert
25+
MetaPerEvent map[string][]AlertEventMeta
26+
Mtx sync.RWMutex
27+
}
28+
29+
func (o *FakeLifeCycleObserver) Observe(event string, alerts []*types.Alert, meta AlertEventMeta) {
30+
o.Mtx.Lock()
31+
defer o.Mtx.Unlock()
32+
if event == EventAlertPipelinePassStage {
33+
o.PipelineStageAlerts[meta["stageName"].(string)] = append(o.PipelineStageAlerts[meta["stageName"].(string)], alerts...)
34+
} else {
35+
o.AlertsPerEvent[event] = append(o.AlertsPerEvent[event], alerts...)
36+
}
37+
o.MetaPerEvent[event] = append(o.MetaPerEvent[event], meta)
38+
}
39+
40+
func NewFakeLifeCycleObserver() *FakeLifeCycleObserver {
41+
return &FakeLifeCycleObserver{
42+
PipelineStageAlerts: map[string][]*types.Alert{},
43+
AlertsPerEvent: map[string][]*types.Alert{},
44+
MetaPerEvent: map[string][]AlertEventMeta{},
45+
}
46+
}

api/api.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ import (
2626
"github.com/prometheus/common/promslog"
2727
"github.com/prometheus/common/route"
2828

29+
"github.com/prometheus/alertmanager/alertobserver"
2930
apiv2 "github.com/prometheus/alertmanager/api/v2"
3031
"github.com/prometheus/alertmanager/cluster"
3132
"github.com/prometheus/alertmanager/config"
@@ -85,6 +86,9 @@ type Options struct {
8586
GroupInfoFunc func(func(*dispatch.Route) bool) dispatch.AlertGroupInfos
8687
// APICallback define the callback function that each api call will perform before returned.
8788
APICallback callback.Callback
89+
// AlertLCObserver is used to add hooks to the different alert life cycle events.
90+
// If nil then no observer methods will be invoked in the life cycle events.
91+
AlertLCObserver alertobserver.LifeCycleObserver
8892
}
8993

9094
func (o Options) validate() error {
@@ -135,6 +139,7 @@ func New(opts Options) (*API, error) {
135139
opts.Peer,
136140
l.With("version", "v2"),
137141
opts.Registry,
142+
opts.AlertLCObserver,
138143
)
139144
if err != nil {
140145
return nil, err

api/v2/api.go

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535

3636
alertgroupinfolist_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/alertgroupinfolist"
3737

38+
"github.com/prometheus/alertmanager/alertobserver"
3839
"github.com/prometheus/alertmanager/api/metrics"
3940
open_api_models "github.com/prometheus/alertmanager/api/v2/models"
4041
"github.com/prometheus/alertmanager/api/v2/restapi"
@@ -77,8 +78,9 @@ type API struct {
7778
route *dispatch.Route
7879
setAlertStatus setAlertStatusFn
7980

80-
logger *slog.Logger
81-
m *metrics.Alerts
81+
logger *slog.Logger
82+
m *metrics.Alerts
83+
alertLCObserver alertobserver.LifeCycleObserver
8284

8385
Handler http.Handler
8486
}
@@ -103,6 +105,7 @@ func NewAPI(
103105
peer cluster.ClusterPeer,
104106
l *slog.Logger,
105107
r prometheus.Registerer,
108+
o alertobserver.LifeCycleObserver,
106109
) (*API, error) {
107110
if apiCallback == nil {
108111
apiCallback = callback.NoopAPICallback{}
@@ -119,6 +122,7 @@ func NewAPI(
119122
logger: l,
120123
m: metrics.NewAlerts(r),
121124
uptime: time.Now(),
125+
alertLCObserver: o,
122126
}
123127

124128
// Load embedded swagger file.
@@ -406,19 +410,30 @@ func (api *API) postAlertsHandler(params alert_ops.PostAlertsParams) middleware.
406410
if err := a.Validate(); err != nil {
407411
validationErrs.Add(err)
408412
api.m.Invalid().Inc()
413+
if api.alertLCObserver != nil {
414+
m := alertobserver.AlertEventMeta{"msg": err.Error()}
415+
api.alertLCObserver.Observe(alertobserver.EventAlertRejected, []*types.Alert{a}, m)
416+
}
409417
continue
410418
}
411419
validAlerts = append(validAlerts, a)
412420
}
413421
if err := api.alerts.Put(validAlerts...); err != nil {
414422
logger.Error("Failed to create alerts", "err", err)
423+
if api.alertLCObserver != nil {
424+
m := alertobserver.AlertEventMeta{"msg": err.Error()}
425+
api.alertLCObserver.Observe(alertobserver.EventAlertRejected, validAlerts, m)
426+
}
415427
return alert_ops.NewPostAlertsInternalServerError().WithPayload(err.Error())
416428
}
417429

418430
if validationErrs.Len() > 0 {
419431
logger.Error("Failed to validate alerts", "err", validationErrs.Error())
420432
return alert_ops.NewPostAlertsBadRequest().WithPayload(validationErrs.Error())
421433
}
434+
if api.alertLCObserver != nil {
435+
api.alertLCObserver.Observe(alertobserver.EventAlertReceived, validAlerts, alertobserver.AlertEventMeta{})
436+
}
422437

423438
return alert_ops.NewPostAlertsOK()
424439
}

api/v2/api_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@ import (
1717
"bytes"
1818
"encoding/json"
1919
"fmt"
20+
"github.com/prometheus/client_golang/prometheus"
21+
2022
"io"
2123
"net/http"
2224
"net/http/httptest"
@@ -31,6 +33,8 @@ import (
3133
"github.com/prometheus/common/promslog"
3234
"github.com/stretchr/testify/require"
3335

36+
"github.com/prometheus/alertmanager/alertobserver"
37+
"github.com/prometheus/alertmanager/api/metrics"
3438
alert_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/alert"
3539
alertgroup_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/alertgroup"
3640
alertgroupinfolist_ops "github.com/prometheus/alertmanager/api/v2/restapi/operations/alertgroupinfolist"
@@ -1196,6 +1200,67 @@ func TestListAlertInfosHandler(t *testing.T) {
11961200
}
11971201
}
11981202

1203+
func TestPostAlertHandler(t *testing.T) {
1204+
now := time.Now()
1205+
for i, tc := range []struct {
1206+
start, end time.Time
1207+
err bool
1208+
code int
1209+
}{
1210+
{time.Time{}, time.Time{}, false, 200},
1211+
{now, time.Time{}, false, 200},
1212+
{time.Time{}, now.Add(time.Duration(-1) * time.Second), false, 200},
1213+
{time.Time{}, now, false, 200},
1214+
{time.Time{}, now.Add(time.Duration(1) * time.Second), false, 200},
1215+
{now.Add(time.Duration(-2) * time.Second), now.Add(time.Duration(-1) * time.Second), false, 200},
1216+
{now.Add(time.Duration(1) * time.Second), now.Add(time.Duration(2) * time.Second), false, 200},
1217+
{now.Add(time.Duration(1) * time.Second), now, false, 400},
1218+
} {
1219+
alerts, alertsBytes := createAlert(t, tc.start, tc.end)
1220+
api := API{
1221+
uptime: time.Now(),
1222+
alerts: newFakeAlerts([]*types.Alert{}),
1223+
logger: log.NewNopLogger(),
1224+
m: metrics.NewAlerts(prometheus.NewRegistry()),
1225+
}
1226+
api.Update(&config.Config{
1227+
Global: &config.GlobalConfig{
1228+
ResolveTimeout: model.Duration(5),
1229+
},
1230+
Route: &config.Route{},
1231+
}, nil)
1232+
1233+
r, err := http.NewRequest("POST", "/api/v2/alerts", bytes.NewReader(alertsBytes))
1234+
require.NoError(t, err)
1235+
1236+
w := httptest.NewRecorder()
1237+
p := runtime.TextProducer()
1238+
responder := api.postAlertsHandler(alert_ops.PostAlertsParams{
1239+
HTTPRequest: r,
1240+
Alerts: alerts,
1241+
})
1242+
responder.WriteResponse(w, p)
1243+
body, _ := io.ReadAll(w.Result().Body)
1244+
1245+
require.Equal(t, tc.code, w.Code, fmt.Sprintf("test case: %d, response: %s", i, string(body)))
1246+
1247+
observer := alertobserver.NewFakeLifeCycleObserver()
1248+
api.alertLCObserver = observer
1249+
r, err = http.NewRequest("POST", "/api/v2/alerts", bytes.NewReader(alertsBytes))
1250+
require.NoError(t, err)
1251+
api.postAlertsHandler(alert_ops.PostAlertsParams{
1252+
HTTPRequest: r,
1253+
Alerts: alerts,
1254+
})
1255+
amAlert := OpenAPIAlertsToAlerts(alerts)
1256+
if tc.code == 200 {
1257+
require.Equal(t, observer.AlertsPerEvent[alertobserver.EventAlertReceived][0].Fingerprint(), amAlert[0].Fingerprint())
1258+
} else {
1259+
require.Equal(t, observer.AlertsPerEvent[alertobserver.EventAlertRejected][0].Fingerprint(), amAlert[0].Fingerprint())
1260+
}
1261+
}
1262+
}
1263+
11991264
type limitNumberOfAlertsReturnedCallback struct {
12001265
limit int
12011266
}

api/v2/testing.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -132,3 +132,23 @@ func newGetAlertStatus(f *fakeAlerts) func(model.Fingerprint) types.AlertStatus
132132
return status
133133
}
134134
}
135+
136+
func createAlert(t *testing.T, start, ends time.Time) (open_api_models.PostableAlerts, []byte) {
137+
startsAt := strfmt.DateTime(start)
138+
endsAt := strfmt.DateTime(ends)
139+
140+
alert := open_api_models.PostableAlert{
141+
StartsAt: startsAt,
142+
EndsAt: endsAt,
143+
Annotations: open_api_models.LabelSet{"annotation1": "some text"},
144+
Alert: open_api_models.Alert{
145+
Labels: open_api_models.LabelSet{"label1": "test1"},
146+
GeneratorURL: "http://localhost:3000",
147+
},
148+
}
149+
alerts := open_api_models.PostableAlerts{}
150+
alerts = append(alerts, &alert)
151+
b, err := json.Marshal(alerts)
152+
require.NoError(t, err)
153+
return alerts, b
154+
}

cmd/alertmanager/main.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -481,6 +481,7 @@ func run() int {
481481
marker,
482482
notificationLog,
483483
pipelinePeer,
484+
nil,
484485
)
485486

486487
configuredReceivers.Set(float64(len(activeReceivers)))
@@ -492,7 +493,7 @@ func run() int {
492493
silencer.Mutes(labels)
493494
})
494495

495-
disp = dispatch.NewDispatcher(alerts, routes, pipeline, marker, timeoutFunc, nil, logger, dispMetrics)
496+
disp = dispatch.NewDispatcher(alerts, routes, pipeline, marker, timeoutFunc, nil, logger, dispMetrics, nil)
496497
routes.Walk(func(r *dispatch.Route) {
497498
if r.RouteOpts.RepeatInterval > *retention {
498499
configLogger.Warn(

0 commit comments

Comments
 (0)