Skip to content

Commit b8901a9

Browse files
authored
Merge pull request grafana#411 from aallawala/aja_alertmanager
use alertmanager jobname for alertmanager dashboard panels
2 parents b9d0544 + d804539 commit b8901a9

File tree

3 files changed

+29
-27
lines changed

3 files changed

+29
-27
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@
6969
* [ENHANCEMENT] Added `CortexKVStoreFailure` alert. #406
7070
* [ENHANCEMENT] Use configured `ruler` jobname for ruler dashboard panels. #409
7171
* [ENHANCEMENT] Add ability to override `datasource` for generated dashboards. #407
72+
* [ENHANCEMENT] Use alertmanager jobname for alertmanager dashboard panels #411
7273
* [BUGFIX] Fixed `CortexIngesterHasNotShippedBlocks` alert false positive in case an ingester instance had ingested samples in the past, then no traffic was received for a long period and then it started receiving samples again. #308
7374
* [BUGFIX] Alertmanager: fixed `--alertmanager.cluster.peers` CLI flag passed to alertmanager when HA is enabled. #329
7475
* [BUGFIX] Fixed `CortexInconsistentRuntimeConfig` metric. #335

cortex-mixin/config.libsonnet

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
store_gateway: '(store-gateway|cortex$)',
3838
gateway: '(gateway|cortex-gw|cortex-gw-internal)',
3939
compactor: 'compactor.*', // Match also custom compactor deployments.
40+
alertmanager: 'alertmanager',
4041
},
4142

4243
// Grouping labels, to uniquely identify and group by {jobs, clusters}

cortex-mixin/dashboards/alertmanager.libsonnet

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ local utils = import 'mixin-utils/utils.libsonnet';
1111
})
1212
.addPanel(
1313
$.panel('Total Alerts') +
14-
$.statPanel('sum(cluster_job_%s:cortex_alertmanager_alerts:sum{%s})' % [$._config.per_instance_label, $.jobMatcher('alertmanager')], format='short')
14+
$.statPanel('sum(cluster_job_%s:cortex_alertmanager_alerts:sum{%s})' % [$._config.per_instance_label, $.jobMatcher($._config.job_names.alertmanager)], format='short')
1515
)
1616
.addPanel(
1717
$.panel('Total Silences') +
18-
$.statPanel('sum(cluster_job_%s:cortex_alertmanager_silences:sum{%s})' % [$._config.per_instance_label, $.jobMatcher('alertmanager')], format='short')
18+
$.statPanel('sum(cluster_job_%s:cortex_alertmanager_silences:sum{%s})' % [$._config.per_instance_label, $.jobMatcher($._config.job_names.alertmanager)], format='short')
1919
)
2020
.addPanel(
2121
$.panel('Tenants') +
22-
$.statPanel('max(cortex_alertmanager_tenants_discovered{%s})' % $.jobMatcher('alertmanager'), format='short')
22+
$.statPanel('max(cortex_alertmanager_tenants_discovered{%s})' % $.jobMatcher($._config.job_names.alertmanager), format='short')
2323
)
2424
)
2525
.addRow(
@@ -32,8 +32,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
3232
sum(cluster_job:cortex_alertmanager_alerts_received_total:rate5m{%s})
3333
-
3434
sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{%s})
35-
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
36-
'sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{%s})' % $.jobMatcher('alertmanager'),
35+
||| % [$.jobMatcher($._config.job_names.alertmanager), $.jobMatcher($._config.job_names.alertmanager)],
36+
'sum(cluster_job:cortex_alertmanager_alerts_invalid_total:rate5m{%s})' % $.jobMatcher($._config.job_names.alertmanager),
3737
],
3838
['success', 'failed']
3939
)
@@ -49,8 +49,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
4949
sum(cluster_job_integration:cortex_alertmanager_notifications_total:rate5m{%s})
5050
-
5151
sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{%s})
52-
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
53-
'sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{%s})' % $.jobMatcher('alertmanager'),
52+
||| % [$.jobMatcher($._config.job_names.alertmanager), $.jobMatcher($._config.job_names.alertmanager)],
53+
'sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{%s})' % $.jobMatcher($._config.job_names.alertmanager),
5454
],
5555
['success', 'failed']
5656
)
@@ -66,15 +66,15 @@ local utils = import 'mixin-utils/utils.libsonnet';
6666
sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{%s}) by(integration)
6767
) > 0
6868
or on () vector(0)
69-
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
70-
'sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{%s}) by(integration)' % $.jobMatcher('alertmanager'),
69+
||| % [$.jobMatcher($._config.job_names.alertmanager), $.jobMatcher($._config.job_names.alertmanager)],
70+
'sum(cluster_job_integration:cortex_alertmanager_notifications_failed_total:rate5m{%s}) by(integration)' % $.jobMatcher($._config.job_names.alertmanager),
7171
],
7272
['success - {{ integration }}', 'failed - {{ integration }}']
7373
)
7474
)
7575
.addPanel(
7676
$.panel('Latency') +
77-
$.latencyPanel('cortex_alertmanager_notification_latency_seconds', '{%s}' % $.jobMatcher('alertmanager'))
77+
$.latencyPanel('cortex_alertmanager_notification_latency_seconds', '{%s}' % $.jobMatcher($._config.job_names.alertmanager))
7878
)
7979
)
8080
.addRow(
@@ -96,23 +96,23 @@ local utils = import 'mixin-utils/utils.libsonnet';
9696
.addPanel(
9797
$.panel('Per %s Tenants' % $._config.per_instance_label) +
9898
$.queryPanel(
99-
'max by(%s) (cortex_alertmanager_tenants_owned{%s})' % [$._config.per_instance_label, $.jobMatcher('alertmanager')],
99+
'max by(%s) (cortex_alertmanager_tenants_owned{%s})' % [$._config.per_instance_label, $.jobMatcher($._config.job_names.alertmanager)],
100100
'{{%s}}' % $._config.per_instance_label
101101
) +
102102
$.stack
103103
)
104104
.addPanel(
105105
$.panel('Per %s Alerts' % $._config.per_instance_label) +
106106
$.queryPanel(
107-
'sum by(%s) (cluster_job_%s:cortex_alertmanager_alerts:sum{%s})' % [$._config.per_instance_label, $._config.per_instance_label, $.jobMatcher('alertmanager')],
107+
'sum by(%s) (cluster_job_%s:cortex_alertmanager_alerts:sum{%s})' % [$._config.per_instance_label, $._config.per_instance_label, $.jobMatcher($._config.job_names.alertmanager)],
108108
'{{%s}}' % $._config.per_instance_label
109109
) +
110110
$.stack
111111
)
112112
.addPanel(
113113
$.panel('Per %s Silences' % $._config.per_instance_label) +
114114
$.queryPanel(
115-
'sum by(%s) (cluster_job_%s:cortex_alertmanager_silences:sum{%s})' % [$._config.per_instance_label, $._config.per_instance_label, $.jobMatcher('alertmanager')],
115+
'sum by(%s) (cluster_job_%s:cortex_alertmanager_silences:sum{%s})' % [$._config.per_instance_label, $._config.per_instance_label, $.jobMatcher($._config.job_names.alertmanager)],
116116
'{{%s}}' % $._config.per_instance_label
117117
) +
118118
$.stack
@@ -128,23 +128,23 @@ local utils = import 'mixin-utils/utils.libsonnet';
128128
sum(rate(cortex_alertmanager_sync_configs_total{%s}[$__rate_interval]))
129129
-
130130
sum(rate(cortex_alertmanager_sync_configs_failed_total{%s}[$__rate_interval]))
131-
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
132-
'sum(rate(cortex_alertmanager_sync_configs_failed_total{%s}[$__rate_interval]))' % $.jobMatcher('alertmanager'),
131+
||| % [$.jobMatcher($._config.job_names.alertmanager), $.jobMatcher($._config.job_names.alertmanager)],
132+
'sum(rate(cortex_alertmanager_sync_configs_failed_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.alertmanager),
133133
],
134134
['success', 'failed']
135135
)
136136
)
137137
.addPanel(
138138
$.panel('Syncs/sec (By Reason)') +
139139
$.queryPanel(
140-
'sum by(reason) (rate(cortex_alertmanager_sync_configs_total{%s}[$__rate_interval]))' % $.jobMatcher('alertmanager'),
140+
'sum by(reason) (rate(cortex_alertmanager_sync_configs_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.alertmanager),
141141
'{{reason}}'
142142
)
143143
)
144144
.addPanel(
145145
$.panel('Ring Check Errors/sec') +
146146
$.queryPanel(
147-
'sum (rate(cortex_alertmanager_ring_check_errors_total{%s}[$__rate_interval]))' % $.jobMatcher('alertmanager'),
147+
'sum (rate(cortex_alertmanager_ring_check_errors_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.alertmanager),
148148
'errors'
149149
)
150150
)
@@ -154,7 +154,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
154154
.addPanel(
155155
$.panel('Initial syncs /sec') +
156156
$.queryPanel(
157-
'sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{%s}[$__rate_interval]))' % $.jobMatcher('alertmanager'),
157+
'sum by(outcome) (rate(cortex_alertmanager_state_initial_sync_completed_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.alertmanager),
158158
'{{outcome}}'
159159
) + {
160160
targets: [
@@ -167,7 +167,7 @@ local utils = import 'mixin-utils/utils.libsonnet';
167167
)
168168
.addPanel(
169169
$.panel('Initial sync duration') +
170-
$.latencyPanel('cortex_alertmanager_state_initial_sync_duration_seconds', '{%s}' % $.jobMatcher('alertmanager')) + {
170+
$.latencyPanel('cortex_alertmanager_state_initial_sync_duration_seconds', '{%s}' % $.jobMatcher($._config.job_names.alertmanager)) + {
171171
targets: [
172172
target {
173173
interval: '1m',
@@ -184,8 +184,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
184184
sum(rate(cortex_alertmanager_state_fetch_replica_state_total{%s}[$__rate_interval]))
185185
-
186186
sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{%s}[$__rate_interval]))
187-
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
188-
'sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{%s}[$__rate_interval]))' % $.jobMatcher('alertmanager'),
187+
||| % [$.jobMatcher($._config.job_names.alertmanager), $.jobMatcher($._config.job_names.alertmanager)],
188+
'sum(rate(cortex_alertmanager_state_fetch_replica_state_failed_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.alertmanager),
189189
],
190190
['success', 'failed']
191191
) + {
@@ -208,8 +208,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
208208
sum(cluster_job:cortex_alertmanager_state_replication_total:rate5m{%s})
209209
-
210210
sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{%s})
211-
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
212-
'sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{%s})' % $.jobMatcher('alertmanager'),
211+
||| % [$.jobMatcher($._config.job_names.alertmanager), $.jobMatcher($._config.job_names.alertmanager)],
212+
'sum(cluster_job:cortex_alertmanager_state_replication_failed_total:rate5m{%s})' % $.jobMatcher($._config.job_names.alertmanager),
213213
],
214214
['success', 'failed']
215215
)
@@ -222,8 +222,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
222222
sum(cluster_job:cortex_alertmanager_partial_state_merges_total:rate5m{%s})
223223
-
224224
sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{%s})
225-
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
226-
'sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{%s})' % $.jobMatcher('alertmanager'),
225+
||| % [$.jobMatcher($._config.job_names.alertmanager), $.jobMatcher($._config.job_names.alertmanager)],
226+
'sum(cluster_job:cortex_alertmanager_partial_state_merges_failed_total:rate5m{%s})' % $.jobMatcher($._config.job_names.alertmanager),
227227
],
228228
['success', 'failed']
229229
)
@@ -236,8 +236,8 @@ local utils = import 'mixin-utils/utils.libsonnet';
236236
sum(rate(cortex_alertmanager_state_persist_total{%s}[$__rate_interval]))
237237
-
238238
sum(rate(cortex_alertmanager_state_persist_failed_total{%s}[$__rate_interval]))
239-
||| % [$.jobMatcher('alertmanager'), $.jobMatcher('alertmanager')],
240-
'sum(rate(cortex_alertmanager_state_persist_failed_total{%s}[$__rate_interval]))' % $.jobMatcher('alertmanager'),
239+
||| % [$.jobMatcher($._config.job_names.alertmanager), $.jobMatcher($._config.job_names.alertmanager)],
240+
'sum(rate(cortex_alertmanager_state_persist_failed_total{%s}[$__rate_interval]))' % $.jobMatcher($._config.job_names.alertmanager),
241241
],
242242
['success', 'failed']
243243
)

0 commit comments

Comments
 (0)