Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions csp-mixin/.lint
Original file line number Diff line number Diff line change
Expand Up @@ -71,3 +71,9 @@ exclusions:
panel-title-description-rule:
entries:
- dashboard: Azure Service Bus
- dashboard: GCP Compute Engine
- dashboard: Azure Virtual Machines
target-rate-interval-rule:
entries:
- dashboard: GCP Compute Engine

52 changes: 0 additions & 52 deletions csp-mixin/alerts.libsonnet

This file was deleted.

30 changes: 30 additions & 0 deletions csp-mixin/alerts/azure-alerts.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
groups:
- name: azure
rules:
- alert: AzureVMHighCpuUtilization
expr: |
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_compute_virtualmachines_percentage_cpu_average_percent{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) > 85
for: 5m
keep_firing_for: 10m
labels:
severity: critical
service: 'Azure Virtual Machines'
namespace: cloud-provider-azure
annotations:
summary: 'CPU utilization is too high.'
description: 'The VM {{ $labels.resourceName }} is under heavy load and may become unresponsive.'
dashboard_uid: '58f33c50e66c911b0ad8a25aa438a96e'

- alert: AzureVMUnavailable
expr: |
avg by (job,resourceGroup,subscriptionName,resourceName) (azure_microsoft_compute_virtualmachines_vmavailabilitymetric_average_count{job=~".+",resourceGroup=~".+",subscriptionName=~".+",resourceName=~".+"}) != 1
for: 5m
keep_firing_for: 10m
labels:
severity: critical
service: 'Azure Virtual Machines.'
namespace: cloud-provider-azure
annotations:
summary: 'VM unavailable.'
description: 'The VM {{ $labels.resourceName }} is not functioning or crashed, which may require immediate action.'
dashboard_uid: '58f33c50e66c911b0ad8a25aa438a96e'
16 changes: 16 additions & 0 deletions csp-mixin/alerts/gcp-alerts.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
groups:
- name: gcp
rules:
- alert: GcpCEHighCpuUtilization
expr: |
100 * avg by (job,project_id,instance_name) (stackdriver_gce_instance_compute_googleapis_com_instance_cpu_utilization{job=~".+",project_id=~".+",instance_name=~".+"}) > 85
for: 5m
keep_firing_for: 10m
labels:
severity: critical
service: 'Compute Engine'
namespace: cloud-provider-gcp
annotations:
summary: 'CPU utilization is too high.'
description: 'The VM {{ $labels.instance_name }} is under heavy load and may become unresponsive.'
dashboard_uid: 'f115fe73641347c43415535d77e2dc0f'
8 changes: 8 additions & 0 deletions csp-mixin/azureconfig.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,13 @@
groupLabels: ['job', 'resourceGroup', 'subscriptionName'],
instanceLabels: ['resourceName'],
metricsSource: 'azuremonitor',

local importRules(rules) = {
groups+: std.parseYaml(rules).groups,
},

prometheus: {
alerts: importRules(importstr 'alerts/azure-alerts.yml'),
},
},
}
7 changes: 7 additions & 0 deletions csp-mixin/gcpconfig.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -19,5 +19,12 @@
groupLabels: ['job'],
instanceLabels: ['bucket_name'],
metricsSource: 'stackdriver',
local importRules(rules) = {
groups+: std.parseYaml(rules).groups,
},

prometheus: {
alerts: importRules(importstr 'alerts/gcp-alerts.yml'),
},
},
}
2 changes: 1 addition & 1 deletion csp-mixin/main.libsonnet
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ local commonlib = import 'common-lib/common/main.libsonnet';
dashboards: (import './dashboards.libsonnet').new(this),
},
prometheus: {
alerts: (import './alerts.libsonnet').new(this),
alerts: this.config.prometheus.alerts,
recordingRules: {},
},
asMonitoringMixin(): {
Expand Down
Loading