|
1 | | -local grafana = import 'github.com/grafana/grafonnet-lib/grafonnet/grafana.libsonnet'; |
| 1 | +local grafana = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonnet'; |
2 | 2 | local dashboard = grafana.dashboard; |
3 | | -local row = grafana.row; |
4 | | -local prometheus = grafana.prometheus; |
5 | | -local template = grafana.template; |
6 | | -local graphPanel = grafana.graphPanel; |
| 3 | +local prometheus = grafana.query.prometheus; |
| 4 | +local variable = dashboard.variable; |
| 5 | +local panel = grafana.panel; |
| 6 | +local row = panel.row; |
7 | 7 |
|
8 | 8 | { |
9 | 9 | grafanaDashboards+:: { |
10 | | - |
11 | 10 | local amQuerySelector = std.join(',', ['%s=~"$%s"' % [label, label] for label in std.split($._config.alertmanagerClusterLabels, ',')]), |
12 | 11 | local amNameDashboardLegend = std.join('/', ['{{%s}}' % [label] for label in std.split($._config.alertmanagerNameLabels, ',')]), |
13 | 12 |
|
14 | | - local alertmanagerClusterSelectorTemplates = |
| 13 | + local datasource = |
| 14 | + variable.datasource.new('datasource', 'prometheus') |
| 15 | + + variable.datasource.generalOptions.withLabel('Data Source') |
| 16 | + + variable.datasource.generalOptions.withCurrent('Prometheus') |
| 17 | + + variable.datasource.generalOptions.showOnDashboard.withLabelAndValue(), |
| 18 | + |
| 19 | + local alertmanagerClusterSelectorVariables = |
15 | 20 | [ |
16 | | - template.new( |
17 | | - name=label, |
18 | | - label=label, |
19 | | - datasource='$datasource', |
20 | | - query='label_values(alertmanager_alerts, %s)' % label, |
21 | | - current='', |
22 | | - refresh=2, |
23 | | - includeAll=false, |
24 | | - sort=1 |
25 | | - ) |
| 21 | + variable.query.new(label) |
| 22 | + + variable.query.generalOptions.withLabel(label) |
| 23 | + + variable.query.withDatasourceFromVariable(datasource) |
| 24 | + + variable.query.queryTypes.withLabelValues(label, metric='alertmanager_alerts') |
| 25 | + + variable.query.generalOptions.withCurrent('') |
| 26 | + + variable.query.refresh.onTime() |
| 27 | + + variable.query.selectionOptions.withIncludeAll(false) |
| 28 | + + variable.query.withSort(type='alphabetical') |
26 | 29 | for label in std.split($._config.alertmanagerClusterLabels, ',') |
27 | 30 | ], |
28 | 31 |
|
29 | | - local integrationTemplate = |
30 | | - template.new( |
31 | | - name='integration', |
32 | | - datasource='$datasource', |
33 | | - query='label_values(alertmanager_notifications_total{integration=~"%s"}, integration)' % $._config.alertmanagerCriticalIntegrationsRegEx, |
34 | | - current='all', |
35 | | - hide='2', // Always hide |
36 | | - refresh=2, |
37 | | - includeAll=true, |
38 | | - sort=1 |
39 | | - ), |
| 32 | + local integrationVariable = |
| 33 | + variable.query.new('integration') |
| 34 | + + variable.query.withDatasourceFromVariable(datasource) |
| 35 | + + variable.query.queryTypes.withLabelValues('integration', metric='alertmanager_notifications_total{integration=~"%s"}' % $._config.alertmanagerCriticalIntegrationsRegEx) |
| 36 | + + variable.query.generalOptions.withCurrent('$__all') |
| 37 | + + variable.datasource.generalOptions.showOnDashboard.withNothing() |
| 38 | + + variable.query.refresh.onTime() |
| 39 | + + variable.query.selectionOptions.withIncludeAll(true) |
| 40 | + + variable.query.withSort(type='alphabetical'), |
| 41 | + |
| 42 | + local panelTimeSeriesStdOptions = |
| 43 | + {} |
| 44 | + + panel.timeSeries.fieldConfig.defaults.custom.stacking.withMode('normal') |
| 45 | + + panel.timeSeries.fieldConfig.defaults.custom.withFillOpacity(10) |
| 46 | + + panel.timeSeries.fieldConfig.defaults.custom.withShowPoints('never') |
| 47 | + + panel.timeSeries.options.legend.withShowLegend(false) |
| 48 | + + panel.timeSeries.options.tooltip.withMode('multi') |
| 49 | + + panel.timeSeries.queryOptions.withDatasource('prometheus', '$datasource'), |
40 | 50 |
|
41 | 51 | 'alertmanager-overview.json': |
42 | 52 | local alerts = |
43 | | - graphPanel.new( |
44 | | - 'Alerts', |
45 | | - description='current set of alerts stored in the Alertmanager', |
46 | | - datasource='$datasource', |
47 | | - span=6, |
48 | | - format='none', |
49 | | - stack=true, |
50 | | - fill=1, |
51 | | - legend_show=false, |
52 | | - ) |
53 | | - .addTarget(prometheus.target('sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend })); |
| 53 | + panel.timeSeries.new('Alerts') |
| 54 | + + panel.timeSeries.panelOptions.withDescription('current set of alerts stored in the Alertmanager') |
| 55 | + + panel.timeSeries.standardOptions.withUnit('none') |
| 56 | + + panelTimeSeriesStdOptions |
| 57 | + + panel.timeSeries.queryOptions.withTargets([ |
| 58 | + prometheus.new( |
| 59 | + '$datasource', |
| 60 | + 'sum(alertmanager_alerts{%(amQuerySelector)s}) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, |
| 61 | + ) |
| 62 | + + prometheus.withIntervalFactor(2) |
| 63 | + + prometheus.withLegendFormat('%(amNameDashboardLegend)s' % $._config { amNameDashboardLegend: amNameDashboardLegend }), |
| 64 | + ]); |
54 | 65 |
|
55 | 66 | local alertsRate = |
56 | | - graphPanel.new( |
57 | | - 'Alerts receive rate', |
58 | | - description='rate of successful and invalid alerts received by the Alertmanager', |
59 | | - datasource='$datasource', |
60 | | - span=6, |
61 | | - format='ops', |
62 | | - stack=true, |
63 | | - fill=1, |
64 | | - legend_show=false, |
65 | | - ) |
66 | | - .addTarget(prometheus.target('sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend })) |
67 | | - .addTarget(prometheus.target('sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend })); |
| 67 | + panel.timeSeries.new('Alerts receive rate') |
| 68 | + + panel.timeSeries.panelOptions.withDescription('rate of successful and invalid alerts received by the Alertmanager') |
| 69 | + + panel.timeSeries.standardOptions.withUnit('ops') |
| 70 | + + panelTimeSeriesStdOptions |
| 71 | + + panel.timeSeries.queryOptions.withTargets([ |
| 72 | + prometheus.new( |
| 73 | + '$datasource', |
| 74 | + 'sum(rate(alertmanager_alerts_received_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, |
| 75 | + ) |
| 76 | + + prometheus.withIntervalFactor(2) |
| 77 | + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Received' % $._config { amNameDashboardLegend: amNameDashboardLegend }), |
| 78 | + prometheus.new( |
| 79 | + '$datasource', |
| 80 | + 'sum(rate(alertmanager_alerts_invalid_total{%(amQuerySelector)s}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, |
| 81 | + ) |
| 82 | + + prometheus.withIntervalFactor(2) |
| 83 | + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Invalid' % $._config { amNameDashboardLegend: amNameDashboardLegend }), |
| 84 | + ]); |
68 | 85 |
|
69 | 86 | local notifications = |
70 | | - graphPanel.new( |
71 | | - '$integration: Notifications Send Rate', |
72 | | - description='rate of successful and invalid notifications sent by the Alertmanager', |
73 | | - datasource='$datasource', |
74 | | - format='ops', |
75 | | - stack=true, |
76 | | - fill=1, |
77 | | - legend_show=false, |
78 | | - repeat='integration' |
79 | | - ) |
80 | | - .addTarget(prometheus.target('sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend })) |
81 | | - .addTarget(prometheus.target('sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend })); |
| 87 | + panel.timeSeries.new('$integration: Notifications Send Rate') |
| 88 | + + panel.timeSeries.panelOptions.withDescription('rate of successful and invalid notifications sent by the Alertmanager') |
| 89 | + + panel.timeSeries.standardOptions.withUnit('ops') |
| 90 | + + panelTimeSeriesStdOptions |
| 91 | + + panel.timeSeries.panelOptions.withRepeat('integration') |
| 92 | + + panel.timeSeries.queryOptions.withTargets([ |
| 93 | + prometheus.new( |
| 94 | + '$datasource', |
| 95 | + 'sum(rate(alertmanager_notifications_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, |
| 96 | + ) |
| 97 | + + prometheus.withIntervalFactor(2) |
| 98 | + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Total' % $._config { amNameDashboardLegend: amNameDashboardLegend }), |
| 99 | + prometheus.new( |
| 100 | + '$datasource', |
| 101 | + 'sum(rate(alertmanager_notifications_failed_total{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (integration,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s)' % $._config { amQuerySelector: amQuerySelector }, |
| 102 | + ) |
| 103 | + + prometheus.withIntervalFactor(2) |
| 104 | + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Failed' % $._config { amNameDashboardLegend: amNameDashboardLegend }), |
| 105 | + ]); |
82 | 106 |
|
83 | 107 | local notificationDuration = |
84 | | - graphPanel.new( |
85 | | - '$integration: Notification Duration', |
86 | | - description='latency of notifications sent by the Alertmanager', |
87 | | - datasource='$datasource', |
88 | | - format='s', |
89 | | - stack=false, |
90 | | - fill=1, |
91 | | - legend_show=false, |
92 | | - repeat='integration' |
93 | | - ) |
94 | | - .addTarget(prometheus.target( |
95 | | - ||| |
96 | | - histogram_quantile(0.99, |
97 | | - sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |
98 | | - ) |
99 | | - ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend } |
100 | | - )) |
101 | | - .addTarget(prometheus.target( |
102 | | - ||| |
103 | | - histogram_quantile(0.50, |
104 | | - sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |
105 | | - ) |
106 | | - ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend } |
107 | | - )) |
108 | | - .addTarget(prometheus.target( |
109 | | - ||| |
110 | | - sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |
111 | | - / |
112 | | - sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |
113 | | - ||| % $._config { amQuerySelector: amQuerySelector }, legendFormat='%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend } |
114 | | - )); |
| 108 | + panel.timeSeries.new('$integration: Notification Duration') |
| 109 | + + panel.timeSeries.panelOptions.withDescription('latency of notifications sent by the Alertmanager') |
| 110 | + + panel.timeSeries.standardOptions.withUnit('s') |
| 111 | + + panelTimeSeriesStdOptions |
| 112 | + + panel.timeSeries.panelOptions.withRepeat('integration') |
| 113 | + + panel.timeSeries.queryOptions.withTargets([ |
| 114 | + prometheus.new( |
| 115 | + '$datasource', |
| 116 | + ||| |
| 117 | + histogram_quantile(0.99, |
| 118 | + sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |
| 119 | + ) |
| 120 | + ||| % $._config { amQuerySelector: amQuerySelector }, |
| 121 | + ) |
| 122 | + + prometheus.withIntervalFactor(2) |
| 123 | + + prometheus.withLegendFormat('%(amNameDashboardLegend)s 99th Percentile' % $._config { amNameDashboardLegend: amNameDashboardLegend }), |
| 124 | + prometheus.new( |
| 125 | + '$datasource', |
| 126 | + ||| |
| 127 | + histogram_quantile(0.50, |
| 128 | + sum(rate(alertmanager_notification_latency_seconds_bucket{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (le,%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |
| 129 | + ) |
| 130 | + ||| % $._config { amQuerySelector: amQuerySelector }, |
| 131 | + ) |
| 132 | + + prometheus.withIntervalFactor(2) |
| 133 | + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Median' % $._config { amNameDashboardLegend: amNameDashboardLegend }), |
| 134 | + prometheus.new( |
| 135 | + '$datasource', |
| 136 | + ||| |
| 137 | + sum(rate(alertmanager_notification_latency_seconds_sum{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |
| 138 | + / |
| 139 | + sum(rate(alertmanager_notification_latency_seconds_count{%(amQuerySelector)s, integration="$integration"}[$__rate_interval])) by (%(alertmanagerClusterLabels)s,%(alertmanagerNameLabels)s) |
| 140 | + ||| % $._config { amQuerySelector: amQuerySelector }, |
| 141 | + ) |
| 142 | + + prometheus.withIntervalFactor(2) |
| 143 | + + prometheus.withLegendFormat('%(amNameDashboardLegend)s Average' % $._config { amNameDashboardLegend: amNameDashboardLegend }), |
| 144 | + ]); |
115 | 145 |
|
116 | | - dashboard.new( |
117 | | - '%sOverview' % $._config.dashboardNamePrefix, |
118 | | - time_from='now-1h', |
119 | | - tags=($._config.dashboardTags), |
120 | | - timezone='utc', |
121 | | - refresh='30s', |
122 | | - graphTooltip='shared_crosshair', |
123 | | - uid='alertmanager-overview' |
124 | | - ) |
125 | | - .addTemplate( |
126 | | - { |
127 | | - current: { |
128 | | - text: 'Prometheus', |
129 | | - value: 'Prometheus', |
130 | | - }, |
131 | | - hide: 0, |
132 | | - label: 'Data Source', |
133 | | - name: 'datasource', |
134 | | - options: [], |
135 | | - query: 'prometheus', |
136 | | - refresh: 1, |
137 | | - regex: '', |
138 | | - type: 'datasource', |
139 | | - }, |
140 | | - ) |
141 | | - .addTemplates(alertmanagerClusterSelectorTemplates) |
142 | | - .addTemplate(integrationTemplate) |
143 | | - .addRow( |
144 | | - row.new('Alerts') |
145 | | - .addPanel(alerts) |
146 | | - .addPanel(alertsRate) |
147 | | - ) |
148 | | - .addRow( |
149 | | - row.new('Notifications') |
150 | | - .addPanel(notifications) |
151 | | - .addPanel(notificationDuration) |
152 | | - ), |
| 146 | + dashboard.new('%sOverview' % $._config.dashboardNamePrefix) |
| 147 | + + dashboard.time.withFrom('now-1h') |
| 148 | + + dashboard.withTags($._config.dashboardTags) |
| 149 | + + dashboard.withTimezone('utc') |
| 150 | + + dashboard.timepicker.withRefreshIntervals('30s') |
| 151 | + + dashboard.graphTooltip.withSharedCrosshair() |
| 152 | + + dashboard.withUid('alertmanager-overview') |
| 153 | + + dashboard.withVariables( |
| 154 | + [datasource] |
| 155 | + + alertmanagerClusterSelectorVariables |
| 156 | + + [integrationVariable] |
| 157 | + ) |
| 158 | + + dashboard.withPanels( |
| 159 | + grafana.util.grid.makeGrid([ |
| 160 | + row.new('Alerts') |
| 161 | + + row.withPanels([ |
| 162 | + alerts, |
| 163 | + alertsRate |
| 164 | + ]), |
| 165 | + row.new('Notifications') |
| 166 | + + row.withPanels([ |
| 167 | + notifications, |
| 168 | + notificationDuration |
| 169 | + ]) |
| 170 | + ], panelWidth=12, panelHeight=7) |
| 171 | + ) |
153 | 172 | }, |
154 | 173 | } |
0 commit comments