Skip to content

Commit a8fc213

Browse files
anaivanovyduartep
andauthored
csp-mixin(gcp compute engine): Add alert for CPU (#1345)
* csp-mixin(gcp compute engine): Add alert for CPU * split alerts in multiple files * importRules directly on mixing * use config prometheus for each provider * fix lint * fix alerts * remove alerts libsonnet file --------- Co-authored-by: yduartep <[email protected]>
1 parent b4993cb commit a8fc213

File tree

7 files changed

+68
-53
lines changed

7 files changed

+68
-53
lines changed

csp-mixin/.lint

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,3 +71,9 @@ exclusions:
7171
panel-title-description-rule:
7272
entries:
7373
- dashboard: Azure Service Bus
74+
- dashboard: GCP Compute Engine
75+
- dashboard: Azure Virtual Machines
76+
target-rate-interval-rule:
77+
entries:
78+
- dashboard: GCP Compute Engine
79+

csp-mixin/alerts.libsonnet

Lines changed: 0 additions & 52 deletions
This file was deleted.

csp-mixin/alerts/azure-alerts.yml

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
groups:
2+
- name: azure
3+
rules:
4+
- alert: AzureVMHighCpuUtilization
5+
expr: |
6+
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_compute_virtualmachines_percentage_cpu_average_percent{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) > 85
7+
for: 5m
8+
keep_firing_for: 10m
9+
labels:
10+
severity: critical
11+
service: 'Azure Virtual Machines'
12+
namespace: cloud-provider-azure
13+
annotations:
14+
summary: 'CPU utilization is too high.'
15+
description: 'The VM {{ $labels.resourceName }} is under heavy load and may become unresponsive.'
16+
dashboard_uid: '58f33c50e66c911b0ad8a25aa438a96e'
17+
18+
- alert: AzureVMUnavailable
19+
expr: |
20+
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_compute_virtualmachines_vmavailabilitymetric_average_count{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) != 1
21+
for: 5m
22+
keep_firing_for: 10m
23+
labels:
24+
severity: critical
25+
service: 'Azure Virtual Machines.'
26+
namespace: cloud-provider-azure
27+
annotations:
28+
summary: 'VM unavailable.'
29+
description: 'The VM {{ $labels.resourceName }} is not functioning or crashed, which may require immediate action.'
30+
dashboard_uid: '58f33c50e66c911b0ad8a25aa438a96e'

csp-mixin/alerts/gcp-alerts.yml

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
groups:
2+
- name: gcp
3+
rules:
4+
- alert: GcpCEHighCpuUtilization
5+
expr: |
6+
100 * avg by (job,project_id,instance_name) (stackdriver_gce_instance_compute_googleapis_com_instance_cpu_utilization{job=~".+",project_id=~".+",instance_name=~".+"}) > 85
7+
for: 5m
8+
keep_firing_for: 10m
9+
labels:
10+
severity: critical
11+
service: 'Compute Engine'
12+
namespace: cloud-provider-gcp
13+
annotations:
14+
summary: 'CPU utilization is too high.'
15+
description: 'The VM {{ $labels.instance_name }} is under heavy load and may become unresponsive.'
16+
dashboard_uid: 'f115fe73641347c43415535d77e2dc0f'

csp-mixin/azureconfig.libsonnet

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,13 @@
1919
groupLabels: ['job', 'resourceGroup', 'subscriptionName'],
2020
instanceLabels: ['resourceName'],
2121
metricsSource: 'azuremonitor',
22+
23+
local importRules(rules) = {
24+
groups+: std.parseYaml(rules).groups,
25+
},
26+
27+
prometheus: {
28+
alerts: importRules(importstr 'alerts/azure-alerts.yml'),
29+
},
2230
},
2331
}

csp-mixin/gcpconfig.libsonnet

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,5 +19,12 @@
1919
groupLabels: ['job'],
2020
instanceLabels: ['bucket_name'],
2121
metricsSource: 'stackdriver',
22+
local importRules(rules) = {
23+
groups+: std.parseYaml(rules).groups,
24+
},
25+
26+
prometheus: {
27+
alerts: importRules(importstr 'alerts/gcp-alerts.yml'),
28+
},
2229
},
2330
}

csp-mixin/main.libsonnet

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ local commonlib = import 'common-lib/common/main.libsonnet';
1515
dashboards: (import './dashboards.libsonnet').new(this),
1616
},
1717
prometheus: {
18-
alerts: (import './alerts.libsonnet').new(this),
18+
alerts: this.config.prometheus.alerts,
1919
recordingRules: {},
2020
},
2121
asMonitoringMixin(): {

0 commit comments

Comments
 (0)