Skip to content

Commit 31a53bb

Browse files
authored
feat: Support for PromQL Alerts (#108) (#129)
1 parent e4747a5 commit 31a53bb

File tree

5 files changed

+286
-0
lines changed

5 files changed

+286
-0
lines changed

sysdig/provider.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,7 @@ func Provider() *schema.Provider {
7878
"sysdig_monitor_alert_event": resourceSysdigMonitorAlertEvent(),
7979
"sysdig_monitor_alert_anomaly": resourceSysdigMonitorAlertAnomaly(),
8080
"sysdig_monitor_alert_group_outlier": resourceSysdigMonitorAlertGroupOutlier(),
81+
"sysdig_monitor_alert_promql": resourceSysdigMonitorAlertPromql(),
8182
"sysdig_monitor_dashboard": resourceSysdigMonitorDashboard(),
8283
"sysdig_monitor_notification_channel_email": resourceSysdigMonitorNotificationChannelEmail(),
8384
"sysdig_monitor_notification_channel_opsgenie": resourceSysdigMonitorNotificationChannelOpsGenie(),
Lines changed: 151 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,151 @@
1+
package sysdig
2+
3+
import (
4+
"context"
5+
"strconv"
6+
"time"
7+
8+
"github.com/hashicorp/terraform-plugin-sdk/v2/diag"
9+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
10+
11+
"github.com/draios/terraform-provider-sysdig/sysdig/internal/client/monitor"
12+
)
13+
14+
func resourceSysdigMonitorAlertPromql() *schema.Resource {
15+
timeout := 5 * time.Minute
16+
17+
return &schema.Resource{
18+
CreateContext: resourceSysdigAlertPromqlCreate,
19+
UpdateContext: resourceSysdigAlertPromqlUpdate,
20+
ReadContext: resourceSysdigAlertPromqlRead,
21+
DeleteContext: resourceSysdigAlertPromqlDelete,
22+
Importer: &schema.ResourceImporter{
23+
StateContext: schema.ImportStatePassthroughContext,
24+
},
25+
26+
Timeouts: &schema.ResourceTimeout{
27+
Create: schema.DefaultTimeout(timeout),
28+
Update: schema.DefaultTimeout(timeout),
29+
Read: schema.DefaultTimeout(timeout),
30+
Delete: schema.DefaultTimeout(timeout),
31+
},
32+
33+
Schema: createAlertSchema(map[string]*schema.Schema{
34+
"promql": {
35+
Type: schema.TypeString,
36+
Required: true,
37+
},
38+
}),
39+
}
40+
}
41+
42+
func resourceSysdigAlertPromqlCreate(ctx context.Context, data *schema.ResourceData, i interface{}) diag.Diagnostics {
43+
client, err := i.(SysdigClients).sysdigMonitorClient()
44+
if err != nil {
45+
return diag.FromErr(err)
46+
}
47+
48+
alert, err := promqlAlertFromResourceData(data)
49+
if err != nil {
50+
return diag.FromErr(err)
51+
}
52+
53+
alertCreated, err := client.CreateAlert(ctx, *alert)
54+
if err != nil {
55+
return diag.FromErr(err)
56+
}
57+
58+
data.SetId(strconv.Itoa(alertCreated.ID))
59+
data.Set("version", alertCreated.Version)
60+
return nil
61+
}
62+
63+
func resourceSysdigAlertPromqlUpdate(ctx context.Context, data *schema.ResourceData, i interface{}) diag.Diagnostics {
64+
client, err := i.(SysdigClients).sysdigMonitorClient()
65+
if err != nil {
66+
return diag.FromErr(err)
67+
}
68+
69+
alert, err := promqlAlertFromResourceData(data)
70+
if err != nil {
71+
return diag.FromErr(err)
72+
}
73+
74+
alert.ID, _ = strconv.Atoi(data.Id())
75+
76+
_, err = client.UpdateAlert(ctx, *alert)
77+
if err != nil {
78+
return diag.FromErr(err)
79+
}
80+
81+
return nil
82+
}
83+
84+
func resourceSysdigAlertPromqlRead(ctx context.Context, data *schema.ResourceData, i interface{}) diag.Diagnostics {
85+
client, err := i.(SysdigClients).sysdigMonitorClient()
86+
if err != nil {
87+
return diag.FromErr(err)
88+
}
89+
90+
id, err := strconv.Atoi(data.Id())
91+
if err != nil {
92+
return diag.FromErr(err)
93+
}
94+
95+
alert, err := client.GetAlertById(ctx, id)
96+
97+
if err != nil {
98+
data.SetId("")
99+
return nil
100+
}
101+
102+
err = promqlAlertToResourceData(&alert, data)
103+
if err != nil {
104+
return diag.FromErr(err)
105+
}
106+
107+
return nil
108+
}
109+
110+
func resourceSysdigAlertPromqlDelete(ctx context.Context, data *schema.ResourceData, i interface{}) diag.Diagnostics {
111+
client, err := i.(SysdigClients).sysdigMonitorClient()
112+
if err != nil {
113+
return diag.FromErr(err)
114+
}
115+
116+
id, err := strconv.Atoi(data.Id())
117+
if err != nil {
118+
return diag.FromErr(err)
119+
}
120+
121+
err = client.DeleteAlert(ctx, id)
122+
if err != nil {
123+
return diag.FromErr(err)
124+
}
125+
126+
return nil
127+
}
128+
129+
func promqlAlertFromResourceData(data *schema.ResourceData) (alert *monitor.Alert, err error) {
130+
alert, err = alertFromResourceData(data)
131+
if err != nil {
132+
return
133+
}
134+
135+
alert.Type = "PROMETHEUS"
136+
137+
alert.Condition = data.Get("promql").(string)
138+
139+
return
140+
}
141+
142+
func promqlAlertToResourceData(alert *monitor.Alert, data *schema.ResourceData) (err error) {
143+
err = alertToResourceData(alert, data)
144+
if err != nil {
145+
return
146+
}
147+
148+
data.Set("promql", alert.Condition)
149+
150+
return
151+
}
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
package sysdig_test
2+
3+
import (
4+
"fmt"
5+
"os"
6+
"testing"
7+
8+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/acctest"
9+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/resource"
10+
"github.com/hashicorp/terraform-plugin-sdk/v2/helper/schema"
11+
12+
"github.com/draios/terraform-provider-sysdig/sysdig"
13+
)
14+
15+
func TestAccAlertPromql(t *testing.T) {
16+
rText := func() string { return acctest.RandStringFromCharSet(10, acctest.CharSetAlphaNum) }
17+
18+
resource.ParallelTest(t, resource.TestCase{
19+
PreCheck: func() {
20+
if v := os.Getenv("SYSDIG_MONITOR_API_TOKEN"); v == "" {
21+
t.Fatal("SYSDIG_MONITOR_API_TOKEN must be set for acceptance tests")
22+
}
23+
},
24+
ProviderFactories: map[string]func() (*schema.Provider, error){
25+
"sysdig": func() (*schema.Provider, error) {
26+
return sysdig.Provider(), nil
27+
},
28+
},
29+
Steps: []resource.TestStep{
30+
{
31+
Config: alertPromqlWithName(rText()),
32+
},
33+
{
34+
ResourceName: "sysdig_monitor_alert_promql.sample",
35+
ImportState: true,
36+
ImportStateVerify: true,
37+
},
38+
},
39+
})
40+
}
41+
42+
func alertPromqlWithName(name string) string {
43+
return fmt.Sprintf(`
44+
resource "sysdig_monitor_alert_promql" "sample" {
45+
name = "TERRAFORM TEST - PROMQL %s"
46+
description = "TERRAFORM TEST - PROMQL %s"
47+
severity = 3
48+
49+
promql = "(elasticsearch_jvm_memory_used_bytes{area=\"heap\"} / elasticsearch_jvm_memory_max_bytes{area=\"heap\"}) * 100 > 80"
50+
51+
trigger_after_minutes = 10
52+
53+
enabled = false
54+
}
55+
`, name, name)
56+
}
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
---
2+
layout: "sysdig"
3+
page_title: "Sysdig: sysdig_monitor_alert_promql"
4+
sidebar_current: "docs-sysdig-monitor-alert-promql"
5+
description: |-
6+
Creates a Sysdig Monitor PromQL Alert.
7+
---
8+
9+
# sysdig\_monitor\_alert\_promql
10+
11+
Creates a Sysdig Monitor PromQL Alert. Monitor prometheus metrics and alert if they violate user-defined PromQL-based metric expression.
12+
13+
`~> **Note:** Sysdig Terraform Provider is under rapid development at this point. If you experience any issue or discrepancy while using it, please make sure you have the latest version. If the issue persists, or you have a Feature Request to support an additional set of resources, please open a [new issue](https://github.com/sysdiglabs/terraform-provider-sysdig/issues/new) in the GitHub repository.`
14+
15+
## Example usage
16+
17+
```hcl
18+
resource "sysdig_monitor_alert_promql" "sample" {
19+
name = "Elasticsearch JVM heap usage"
20+
description = "A Kubernetes pod failed to restart"
21+
severity = 6
22+
23+
promql = "(elasticsearch_jvm_memory_used_bytes{area=\"heap\"} / elasticsearch_jvm_memory_max_bytes{area=\"heap\"}) * 100 > 80"
24+
trigger_after_minutes = 10
25+
}
26+
```
27+
28+
## Argument Reference
29+
30+
### Common alert arguments
31+
32+
These arguments are common to all alerts in Sysdig Monitor.
33+
34+
* `name` - (Required) The name of the Monitor alert. It must be unique.
35+
* `description` - (Optional) The description of Monitor alert.
36+
* `severity` - (Optional) Severity of the Monitor alert. It must be a value between 0 and 7,
37+
with 0 being the most critical and 7 the less critical. Defaults to 4.
38+
* `trigger_after_minutes` - (Required) Threshold of time for the status to stabilize until the alert is fired.
39+
* `enabled` - (Optional) Boolean that defines if the alert is enabled or not. Defaults to true.
40+
* `notification_channels` - (Optional) List of notification channel IDs where an alert must be sent to once fired.
41+
* `renotification_minutes` - (Optional) Number of minutes for the alert to re-notify until the status is solved.
42+
* `custom_notification` - (Optional) Allows to define a custom notification title, prepend and append text.
43+
44+
### `custom_notification`
45+
46+
By defining this field, the user can modify the title and the body of the message sent when the alert
47+
is fired.
48+
49+
* `title` - (Required) Sets the title of the alert. It is commonly defined as `{{__alert_name__}} is {{__alert_status__}}`.
50+
* `prepend` - (Optional) Text to add before the alert template.
51+
* `append` - (Optional) Text to add after the alert template.
52+
53+
### PromQL alert arguments
54+
55+
* `promql` - (Required) PromQL-based metric expression to alert on. Example: `histogram_quantile(0.99, rate(etcd_http_successful_duration_seconds_bucket[5m]) > 0.15` or `predict_linear(sysdig_fs_free_bytes{fstype!~"tmpfs"}[1h], 24*3600) < 10000000000`.
56+
57+
## Attributes Reference
58+
59+
### Common alert attributes
60+
61+
In addition to all arguments above, the following attributes are exported, which are common to all the
62+
alerts in Sysdig Monitor:
63+
64+
* `id` - ID of the alert created.
65+
* `version` - Current version of the resource in Sysdig Monitor.
66+
* `team` - Team ID that owns the alert.
67+
68+
69+
## Import
70+
71+
PromQL Monitor alerts can be imported using the alert ID, e.g.
72+
73+
```
74+
$ terraform import sysdig_monitor_alert_promql.example 12345
75+
```

website/sysdig.erb

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,6 +113,9 @@
113113
<li<%= sidebar_current("docs-sysdig-monitor-alert-metric") %>>
114114
<a href="/docs/providers/sysdig/r/sysdig_monitor_alert_metric.html">sysdig_monitor_alert_metric</a>
115115
</li>
116+
<li<%= sidebar_current("docs-sysdig-monitor-alert-promql") %>>
117+
<a href="/docs/providers/sysdig/r/sysdig_monitor_alert_promql.html">sysdig_monitor_alert_promql</a>
118+
</li>
116119
<li<%= sidebar_current("docs-sysdig-monitor-dashboard") %>>
117120
<a href="/docs/providers/sysdig/r/sysdig_monitor_dashboard.html">sysdig_monitor_dashboard</a>
118121
</li>

0 commit comments

Comments
 (0)