Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ https://sloth.dev
Release the Sloth!

```bash
sloth generate -i ./examples/getting-started.yml
sloth generate --slo-plugins-path=./examples/plugins -i ./examples/getting-started.yml
```

```yaml
Expand All @@ -64,6 +64,9 @@ labels:
owner: "myteam"
repo: "myorg/myservice"
tier: "2"
slo_plugins:
chain:
- id: "sloth.dev/contrib/alert_for/v1"
slos:
# We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
- name: "requests-availability"
Expand All @@ -83,10 +86,14 @@ slos:
# Overwrite default Sloth SLO alert summmary on ticket and page alerts.
summary: "High error rate on 'myservice' requests responses"
page_alert:
# Requires `sloth.dev/contrib/alert_for/v1` from `--slo-plugins-path`.
for: 5m
labels:
severity: "pageteam"
routing_key: "myteam"
ticket_alert:
# Requires `sloth.dev/contrib/alert_for/v1` from `--slo-plugins-path`.
for: 10m
labels:
severity: "slack"
slack_channel: "#alerts-myteam"
Expand Down
5 changes: 5 additions & 0 deletions examples/getting-started.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@ labels:
owner: "myteam"
repo: "myorg/myservice"
tier: "2"
slo_plugins:
chain:
- id: "sloth.dev/contrib/alert_for/v1"
slos:
# We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
- name: "requests-availability"
Expand All @@ -21,10 +24,12 @@ slos:
# Overwrite default Sloth SLO alert summmary on ticket and page alerts.
summary: "High error rate on 'myservice' requests responses"
page_alert:
for: 5m
labels:
severity: pageteam
routing_key: myteam
ticket_alert:
for: 10m
labels:
severity: "slack"
slack_channel: "#alerts-myteam"
31 changes: 31 additions & 0 deletions internal/plugin/slo/contrib/alert_for_v1/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# sloth.dev/contrib/alert_for/v1

This plugin sets Prometheus alert `for` durations from the Sloth `prometheus/v1` YAML spec fields:

- `slos[].alerting.page_alert.for`
- `slos[].alerting.ticket_alert.for`

This plugin is required because the core plugins ignore `for` and always generate alerts without a pending time.

## Example

```yaml
version: "prometheus/v1"
service: "myservice"
slo_plugins:
chain:
- id: "sloth.dev/contrib/alert_for/v1"
slos:
- name: "requests-availability"
objective: 99.9
sli:
events:
error_query: sum(rate(http_requests_total{code=~"5.."}[{{.window}}]))
total_query: sum(rate(http_requests_total[{{.window}}]))
alerting:
name: MyServiceHighErrorRate
page_alert:
for: 5m
ticket_alert:
for: 10m
```
70 changes: 70 additions & 0 deletions internal/plugin/slo/contrib/alert_for_v1/plugin.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package plugin

import (
"context"
"encoding/json"

prommodel "github.com/prometheus/common/model"

"github.com/slok/sloth/pkg/common/conventions"
pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1"
)

const (
PluginVersion = "prometheus/slo/v1"
PluginID = "sloth.dev/contrib/alert_for/v1"
)

func NewPlugin(_ json.RawMessage, _ pluginslov1.AppUtils) (pluginslov1.Plugin, error) {
return plugin{}, nil
}

type plugin struct{}

func (p plugin) ProcessSLO(_ context.Context, request *pluginslov1.Request, result *pluginslov1.Result) error {
src := request.OriginalSource.SlothV1
if src == nil {
return nil
}

var pageFor prommodel.Duration
var ticketFor prommodel.Duration
found := false
for _, specSLO := range src.SLOs {
if specSLO.Name != request.SLO.Name {
continue
}

pageFor = specSLO.Alerting.PageAlert.For
ticketFor = specSLO.Alerting.TicketAlert.For
found = true
break
}

if !found || (pageFor == 0 && ticketFor == 0) {
return nil
}

pageSeverity := request.MWMBAlertGroup.PageQuick.Severity.String()
ticketSeverity := request.MWMBAlertGroup.TicketQuick.Severity.String()

for i := range result.SLORules.AlertRules.Rules {
rule := &result.SLORules.AlertRules.Rules[i]
if rule.Labels == nil {
continue
}

switch rule.Labels[conventions.PromSLOSeverityLabelName] {
case pageSeverity:
if pageFor != 0 {
rule.For = pageFor
}
case ticketSeverity:
if ticketFor != 0 {
rule.For = ticketFor
}
}
}

return nil
}
143 changes: 143 additions & 0 deletions internal/plugin/slo/contrib/alert_for_v1/plugin_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,143 @@
package plugin_test

import (
"testing"
"time"

prommodel "github.com/prometheus/common/model"
"github.com/prometheus/prometheus/model/rulefmt"
"github.com/stretchr/testify/assert"

plugin "github.com/slok/sloth/internal/plugin/slo/contrib/alert_for_v1"
"github.com/slok/sloth/pkg/common/conventions"
"github.com/slok/sloth/pkg/common/model"
prometheusv1 "github.com/slok/sloth/pkg/prometheus/api/v1"
pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1"
pluginslov1testing "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1/testing"
)

func TestPlugin(t *testing.T) {
tests := map[string]struct {
pluginFactory func(t *testing.T) (pluginslov1.Plugin, error)
req pluginslov1.Request
res pluginslov1.Result
expRes pluginslov1.Result
}{
"Using the plugin as embedded yaegi plugin, it should set page and ticket `for` durations.": {
pluginFactory: func(t *testing.T) (pluginslov1.Plugin, error) {
return pluginslov1testing.NewTestPlugin(t.Context(), pluginslov1testing.TestPluginConfig{})
},
req: pluginslov1.Request{
SLO: model.PromSLO{Name: "requests-availability"},
MWMBAlertGroup: model.MWMBAlertGroup{
PageQuick: model.MWMBAlert{Severity: model.PageAlertSeverity},
TicketQuick: model.MWMBAlert{Severity: model.TicketAlertSeverity},
},
OriginalSource: model.PromSLOGroupSource{
SlothV1: &prometheusv1.Spec{
Version: prometheusv1.Version,
Service: "myservice",
SLOs: []prometheusv1.SLO{
{
Name: "requests-availability",
Objective: 99.9,
SLI: prometheusv1.SLI{Raw: &prometheusv1.SLIRaw{ErrorRatioQuery: "1"}},
Alerting: prometheusv1.Alerting{
Name: "MyServiceHighErrorRate",
PageAlert: prometheusv1.Alert{For: prommodel.Duration(5 * time.Minute)},
TicketAlert: prometheusv1.Alert{For: prommodel.Duration(10 * time.Minute)},
},
},
},
},
},
},
res: pluginslov1.Result{
SLORules: model.PromSLORules{
AlertRules: model.PromRuleGroup{
Rules: []rulefmt.Rule{
{Alert: "MyServiceHighErrorRate", Labels: map[string]string{conventions.PromSLOSeverityLabelName: "page"}},
{Alert: "MyServiceHighErrorRate", Labels: map[string]string{conventions.PromSLOSeverityLabelName: "ticket"}},
},
},
},
},
expRes: pluginslov1.Result{
SLORules: model.PromSLORules{
AlertRules: model.PromRuleGroup{
Rules: []rulefmt.Rule{
{Alert: "MyServiceHighErrorRate", For: prommodel.Duration(5 * time.Minute), Labels: map[string]string{conventions.PromSLOSeverityLabelName: "page"}},
{Alert: "MyServiceHighErrorRate", For: prommodel.Duration(10 * time.Minute), Labels: map[string]string{conventions.PromSLOSeverityLabelName: "ticket"}},
},
},
},
},
},

"Using the plugin as compiled Go plugin, it should set page and ticket `for` durations.": {
pluginFactory: func(t *testing.T) (pluginslov1.Plugin, error) {
return plugin.NewPlugin(nil, pluginslov1.AppUtils{})
},
req: pluginslov1.Request{
SLO: model.PromSLO{Name: "requests-availability"},
MWMBAlertGroup: model.MWMBAlertGroup{
PageQuick: model.MWMBAlert{Severity: model.PageAlertSeverity},
TicketQuick: model.MWMBAlert{Severity: model.TicketAlertSeverity},
},
OriginalSource: model.PromSLOGroupSource{
SlothV1: &prometheusv1.Spec{
Version: prometheusv1.Version,
Service: "myservice",
SLOs: []prometheusv1.SLO{
{
Name: "requests-availability",
Objective: 99.9,
SLI: prometheusv1.SLI{Raw: &prometheusv1.SLIRaw{ErrorRatioQuery: "1"}},
Alerting: prometheusv1.Alerting{
Name: "MyServiceHighErrorRate",
PageAlert: prometheusv1.Alert{For: prommodel.Duration(5 * time.Minute)},
TicketAlert: prometheusv1.Alert{For: prommodel.Duration(10 * time.Minute)},
},
},
},
},
},
},
res: pluginslov1.Result{
SLORules: model.PromSLORules{
AlertRules: model.PromRuleGroup{
Rules: []rulefmt.Rule{
{Alert: "MyServiceHighErrorRate", Labels: map[string]string{conventions.PromSLOSeverityLabelName: "page"}},
{Alert: "MyServiceHighErrorRate", Labels: map[string]string{conventions.PromSLOSeverityLabelName: "ticket"}},
},
},
},
},
expRes: pluginslov1.Result{
SLORules: model.PromSLORules{
AlertRules: model.PromRuleGroup{
Rules: []rulefmt.Rule{
{Alert: "MyServiceHighErrorRate", For: prommodel.Duration(5 * time.Minute), Labels: map[string]string{conventions.PromSLOSeverityLabelName: "page"}},
{Alert: "MyServiceHighErrorRate", For: prommodel.Duration(10 * time.Minute), Labels: map[string]string{conventions.PromSLOSeverityLabelName: "ticket"}},
},
},
},
},
},
}

for name, test := range tests {
t.Run(name, func(t *testing.T) {
assert := assert.New(t)

p, err := test.pluginFactory(t)
assert.NoError(err)

res := test.res
err = p.ProcessSLO(t.Context(), &test.req, &res)
if assert.NoError(err) {
assert.Equal(test.expRes, res)
}
})
}
}
Loading