diff --git a/README.md b/README.md
index 2a73355b..c65b33d1 100644
--- a/README.md
+++ b/README.md
@@ -54,7 +54,7 @@ https://sloth.dev
Release the Sloth!
```bash
-sloth generate -i ./examples/getting-started.yml
+sloth generate --slo-plugins-path=./examples/plugins -i ./examples/getting-started.yml
```
```yaml
@@ -64,6 +64,9 @@ labels:
owner: "myteam"
repo: "myorg/myservice"
tier: "2"
+slo_plugins:
+ chain:
+ - id: "sloth.dev/contrib/alert_for/v1"
slos:
# We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
- name: "requests-availability"
@@ -83,10 +86,14 @@ slos:
# Overwrite default Sloth SLO alert summmary on ticket and page alerts.
summary: "High error rate on 'myservice' requests responses"
page_alert:
+ # Requires `sloth.dev/contrib/alert_for/v1` from `--slo-plugins-path`.
+ for: 5m
labels:
severity: "pageteam"
routing_key: "myteam"
ticket_alert:
+ # Requires `sloth.dev/contrib/alert_for/v1` from `--slo-plugins-path`.
+ for: 10m
labels:
severity: "slack"
slack_channel: "#alerts-myteam"
diff --git a/examples/getting-started.yml b/examples/getting-started.yml
index c1ba5e78..eaf2e8dc 100644
--- a/examples/getting-started.yml
+++ b/examples/getting-started.yml
@@ -4,6 +4,9 @@ labels:
owner: "myteam"
repo: "myorg/myservice"
tier: "2"
+slo_plugins:
+ chain:
+ - id: "sloth.dev/contrib/alert_for/v1"
slos:
# We allow failing (5xx and 429) 1 request every 1000 requests (99.9%).
- name: "requests-availability"
@@ -21,10 +24,12 @@ slos:
# Overwrite default Sloth SLO alert summmary on ticket and page alerts.
summary: "High error rate on 'myservice' requests responses"
page_alert:
+ for: 5m
labels:
severity: pageteam
routing_key: myteam
ticket_alert:
+ for: 10m
labels:
severity: "slack"
slack_channel: "#alerts-myteam"
diff --git a/internal/plugin/slo/contrib/alert_for_v1/README.md b/internal/plugin/slo/contrib/alert_for_v1/README.md
new file mode 100644
index 00000000..b0043cbf
--- /dev/null
+++ b/internal/plugin/slo/contrib/alert_for_v1/README.md
@@ -0,0 +1,31 @@
+# sloth.dev/contrib/alert_for/v1
+
+This plugin sets Prometheus alert `for` durations from the Sloth `prometheus/v1` YAML spec fields:
+
+- `slos[].alerting.page_alert.for`
+- `slos[].alerting.ticket_alert.for`
+
+This plugin is required because the core plugins ignore `for` and always generate alerts without a pending time.
+
+## Example
+
+```yaml
+version: "prometheus/v1"
+service: "myservice"
+slo_plugins:
+ chain:
+ - id: "sloth.dev/contrib/alert_for/v1"
+slos:
+ - name: "requests-availability"
+ objective: 99.9
+ sli:
+ events:
+ error_query: sum(rate(http_requests_total{code=~"5.."}[{{.window}}]))
+ total_query: sum(rate(http_requests_total[{{.window}}]))
+ alerting:
+ name: MyServiceHighErrorRate
+ page_alert:
+ for: 5m
+ ticket_alert:
+ for: 10m
+```
diff --git a/internal/plugin/slo/contrib/alert_for_v1/plugin.go b/internal/plugin/slo/contrib/alert_for_v1/plugin.go
new file mode 100644
index 00000000..a3f4d977
--- /dev/null
+++ b/internal/plugin/slo/contrib/alert_for_v1/plugin.go
@@ -0,0 +1,70 @@
+package plugin
+
+import (
+ "context"
+ "encoding/json"
+
+ prommodel "github.com/prometheus/common/model"
+
+ "github.com/slok/sloth/pkg/common/conventions"
+ pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1"
+)
+
+const (
+ PluginVersion = "prometheus/slo/v1"
+ PluginID = "sloth.dev/contrib/alert_for/v1"
+)
+
+func NewPlugin(_ json.RawMessage, _ pluginslov1.AppUtils) (pluginslov1.Plugin, error) {
+ return plugin{}, nil
+}
+
+type plugin struct{}
+
+func (p plugin) ProcessSLO(_ context.Context, request *pluginslov1.Request, result *pluginslov1.Result) error {
+ src := request.OriginalSource.SlothV1
+ if src == nil {
+ return nil
+ }
+
+ var pageFor prommodel.Duration
+ var ticketFor prommodel.Duration
+ found := false
+ for _, specSLO := range src.SLOs {
+ if specSLO.Name != request.SLO.Name {
+ continue
+ }
+
+ pageFor = specSLO.Alerting.PageAlert.For
+ ticketFor = specSLO.Alerting.TicketAlert.For
+ found = true
+ break
+ }
+
+ if !found || (pageFor == 0 && ticketFor == 0) {
+ return nil
+ }
+
+ pageSeverity := request.MWMBAlertGroup.PageQuick.Severity.String()
+ ticketSeverity := request.MWMBAlertGroup.TicketQuick.Severity.String()
+
+ for i := range result.SLORules.AlertRules.Rules {
+ rule := &result.SLORules.AlertRules.Rules[i]
+ if rule.Labels == nil {
+ continue
+ }
+
+ switch rule.Labels[conventions.PromSLOSeverityLabelName] {
+ case pageSeverity:
+ if pageFor != 0 {
+ rule.For = pageFor
+ }
+ case ticketSeverity:
+ if ticketFor != 0 {
+ rule.For = ticketFor
+ }
+ }
+ }
+
+ return nil
+}
diff --git a/internal/plugin/slo/contrib/alert_for_v1/plugin_test.go b/internal/plugin/slo/contrib/alert_for_v1/plugin_test.go
new file mode 100644
index 00000000..c7a8716d
--- /dev/null
+++ b/internal/plugin/slo/contrib/alert_for_v1/plugin_test.go
@@ -0,0 +1,143 @@
+package plugin_test
+
+import (
+ "testing"
+ "time"
+
+ prommodel "github.com/prometheus/common/model"
+ "github.com/prometheus/prometheus/model/rulefmt"
+ "github.com/stretchr/testify/assert"
+
+ plugin "github.com/slok/sloth/internal/plugin/slo/contrib/alert_for_v1"
+ "github.com/slok/sloth/pkg/common/conventions"
+ "github.com/slok/sloth/pkg/common/model"
+ prometheusv1 "github.com/slok/sloth/pkg/prometheus/api/v1"
+ pluginslov1 "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1"
+ pluginslov1testing "github.com/slok/sloth/pkg/prometheus/plugin/slo/v1/testing"
+)
+
+func TestPlugin(t *testing.T) {
+ tests := map[string]struct {
+ pluginFactory func(t *testing.T) (pluginslov1.Plugin, error)
+ req pluginslov1.Request
+ res pluginslov1.Result
+ expRes pluginslov1.Result
+ }{
+ "Using the plugin as embedded yaegi plugin, it should set page and ticket `for` durations.": {
+ pluginFactory: func(t *testing.T) (pluginslov1.Plugin, error) {
+ return pluginslov1testing.NewTestPlugin(t.Context(), pluginslov1testing.TestPluginConfig{})
+ },
+ req: pluginslov1.Request{
+ SLO: model.PromSLO{Name: "requests-availability"},
+ MWMBAlertGroup: model.MWMBAlertGroup{
+ PageQuick: model.MWMBAlert{Severity: model.PageAlertSeverity},
+ TicketQuick: model.MWMBAlert{Severity: model.TicketAlertSeverity},
+ },
+ OriginalSource: model.PromSLOGroupSource{
+ SlothV1: &prometheusv1.Spec{
+ Version: prometheusv1.Version,
+ Service: "myservice",
+ SLOs: []prometheusv1.SLO{
+ {
+ Name: "requests-availability",
+ Objective: 99.9,
+ SLI: prometheusv1.SLI{Raw: &prometheusv1.SLIRaw{ErrorRatioQuery: "1"}},
+ Alerting: prometheusv1.Alerting{
+ Name: "MyServiceHighErrorRate",
+ PageAlert: prometheusv1.Alert{For: prommodel.Duration(5 * time.Minute)},
+ TicketAlert: prometheusv1.Alert{For: prommodel.Duration(10 * time.Minute)},
+ },
+ },
+ },
+ },
+ },
+ },
+ res: pluginslov1.Result{
+ SLORules: model.PromSLORules{
+ AlertRules: model.PromRuleGroup{
+ Rules: []rulefmt.Rule{
+ {Alert: "MyServiceHighErrorRate", Labels: map[string]string{conventions.PromSLOSeverityLabelName: "page"}},
+ {Alert: "MyServiceHighErrorRate", Labels: map[string]string{conventions.PromSLOSeverityLabelName: "ticket"}},
+ },
+ },
+ },
+ },
+ expRes: pluginslov1.Result{
+ SLORules: model.PromSLORules{
+ AlertRules: model.PromRuleGroup{
+ Rules: []rulefmt.Rule{
+ {Alert: "MyServiceHighErrorRate", For: prommodel.Duration(5 * time.Minute), Labels: map[string]string{conventions.PromSLOSeverityLabelName: "page"}},
+ {Alert: "MyServiceHighErrorRate", For: prommodel.Duration(10 * time.Minute), Labels: map[string]string{conventions.PromSLOSeverityLabelName: "ticket"}},
+ },
+ },
+ },
+ },
+ },
+
+ "Using the plugin as compiled Go plugin, it should set page and ticket `for` durations.": {
+ pluginFactory: func(t *testing.T) (pluginslov1.Plugin, error) {
+ return plugin.NewPlugin(nil, pluginslov1.AppUtils{})
+ },
+ req: pluginslov1.Request{
+ SLO: model.PromSLO{Name: "requests-availability"},
+ MWMBAlertGroup: model.MWMBAlertGroup{
+ PageQuick: model.MWMBAlert{Severity: model.PageAlertSeverity},
+ TicketQuick: model.MWMBAlert{Severity: model.TicketAlertSeverity},
+ },
+ OriginalSource: model.PromSLOGroupSource{
+ SlothV1: &prometheusv1.Spec{
+ Version: prometheusv1.Version,
+ Service: "myservice",
+ SLOs: []prometheusv1.SLO{
+ {
+ Name: "requests-availability",
+ Objective: 99.9,
+ SLI: prometheusv1.SLI{Raw: &prometheusv1.SLIRaw{ErrorRatioQuery: "1"}},
+ Alerting: prometheusv1.Alerting{
+ Name: "MyServiceHighErrorRate",
+ PageAlert: prometheusv1.Alert{For: prommodel.Duration(5 * time.Minute)},
+ TicketAlert: prometheusv1.Alert{For: prommodel.Duration(10 * time.Minute)},
+ },
+ },
+ },
+ },
+ },
+ },
+ res: pluginslov1.Result{
+ SLORules: model.PromSLORules{
+ AlertRules: model.PromRuleGroup{
+ Rules: []rulefmt.Rule{
+ {Alert: "MyServiceHighErrorRate", Labels: map[string]string{conventions.PromSLOSeverityLabelName: "page"}},
+ {Alert: "MyServiceHighErrorRate", Labels: map[string]string{conventions.PromSLOSeverityLabelName: "ticket"}},
+ },
+ },
+ },
+ },
+ expRes: pluginslov1.Result{
+ SLORules: model.PromSLORules{
+ AlertRules: model.PromRuleGroup{
+ Rules: []rulefmt.Rule{
+ {Alert: "MyServiceHighErrorRate", For: prommodel.Duration(5 * time.Minute), Labels: map[string]string{conventions.PromSLOSeverityLabelName: "page"}},
+ {Alert: "MyServiceHighErrorRate", For: prommodel.Duration(10 * time.Minute), Labels: map[string]string{conventions.PromSLOSeverityLabelName: "ticket"}},
+ },
+ },
+ },
+ },
+ },
+ }
+
+ for name, test := range tests {
+ t.Run(name, func(t *testing.T) {
+ assert := assert.New(t)
+
+ p, err := test.pluginFactory(t)
+ assert.NoError(err)
+
+ res := test.res
+ err = p.ProcessSLO(t.Context(), &test.req, &res)
+ if assert.NoError(err) {
+ assert.Equal(test.expRes, res)
+ }
+ })
+ }
+}
diff --git a/pkg/prometheus/api/v1/README.md b/pkg/prometheus/api/v1/README.md
index 9c9d239a..542ef8bc 100755
--- a/pkg/prometheus/api/v1/README.md
+++ b/pkg/prometheus/api/v1/README.md
@@ -87,7 +87,7 @@ const Version = "prometheus/v1"
```
-## type [Alert]()
+## type [Alert]()
Alert configures specific SLO alert.
@@ -96,6 +96,12 @@ type Alert struct {
// Disable disables the alert and makes Sloth not generating this alert. This
// can be helpful for example to disable ticket(warning) alerts.
Disable bool `json:"disable,omitempty"`
+ // For is the alerting time window that a rule must be active before firing.
+ // Default is 0m, which means no pending time.
+ //
+ // Sloth core plugins ignore this field. Use an output plugin (SLO plugin) that applies
+ // it on the generated Prometheus rules, like the `custom_alert_for.go` example.
+ For prommodel.Duration `json:"for,omitempty"`
// Labels are the Prometheus labels for the specific alert. For example can be
// useful to route the Page alert to specific Slack channel.
Labels map[string]string `json:"labels,omitempty"`
@@ -105,7 +111,7 @@ type Alert struct {
```
-## type [Alerting]()
+## type [Alerting]()
Alerting wraps all the configuration required by the SLO alerts.
@@ -126,7 +132,7 @@ type Alerting struct {
```
-## type [SLI]()
+## type [SLI]()
SLI will tell what is good or bad for the SLO. All SLIs will be get based on time windows, that's why Sloth needs the queries to use \`\{\{.window\}\}\` template variable.
@@ -144,7 +150,7 @@ type SLI struct {
```
-## type [SLIEvents]()
+## type [SLIEvents]()
SLIEvents is an SLI that is calculated as the division of bad events and total events, giving a ratio SLI. Normally this is the most common ratio type.
@@ -162,7 +168,7 @@ type SLIEvents struct {
```
-## type [SLIPlugin]()
+## type [SLIPlugin]()
SLIPlugin will use the SLI returned by the SLI plugin selected along with the options.
@@ -176,7 +182,7 @@ type SLIPlugin struct {
```
-## type [SLIRaw]()
+## type [SLIRaw]()
SLIRaw is a error ratio SLI already calculated. Normally this will be used when the SLI is already calculated by other recording rule, system...
@@ -188,7 +194,7 @@ type SLIRaw struct {
```
-## type [SLO]()
+## type [SLO]()
SLO is the configuration/declaration of the service level objective of a service.
@@ -216,7 +222,7 @@ type SLO struct {
```
-## type [SLOPlugin]()
+## type [SLOPlugin]()
SLOPlugin is a plugin that will be used on the chain of plugins for the SLO generation.
@@ -237,7 +243,7 @@ type SLOPlugin struct {
```
-## type [SLOPlugins]()
+## type [SLOPlugins]()
SLOPlugins are the list plugins that will be used on the process of SLOs for the rules generation.
@@ -255,7 +261,7 @@ type SLOPlugins struct {
```
-## type [Spec]()
+## type [Spec]()
Spec represents the root type of the SLOs declaration specification.
diff --git a/pkg/prometheus/api/v1/v1.go b/pkg/prometheus/api/v1/v1.go
index e18b4b9f..3b7e639f 100644
--- a/pkg/prometheus/api/v1/v1.go
+++ b/pkg/prometheus/api/v1/v1.go
@@ -54,7 +54,11 @@
// disable: true
package v1
-import "encoding/json"
+import (
+ "encoding/json"
+
+ prommodel "github.com/prometheus/common/model"
+)
const Version = "prometheus/v1"
@@ -160,6 +164,12 @@ type Alert struct {
// Disable disables the alert and makes Sloth not generating this alert. This
// can be helpful for example to disable ticket(warning) alerts.
Disable bool `json:"disable,omitempty"`
+ // For is the alerting time window that a rule must be active before firing.
+ // Default is 0m, which means no pending time.
+ //
+ // Sloth core plugins ignore this field. Use an output plugin (SLO plugin) that applies
+ // it on the generated Prometheus rules, like the `custom_alert_for.go` example.
+ For prommodel.Duration `json:"for,omitempty"`
// Labels are the Prometheus labels for the specific alert. For example can be
// useful to route the Page alert to specific Slack channel.
Labels map[string]string `json:"labels,omitempty"`