Skip to content

Commit aab3e7a

Browse files
authored
Merge pull request ceph#63353 from VallariAg/wip-vallari-nvmeof-ceph-exporter
monitoring: fix "Total gateway" and "Ceph Health NVMeoF WARNING" grafana graphs
2 parents 8ee13d7 + 16d46ae commit aab3e7a

File tree

2 files changed

+54
-110
lines changed

2 files changed

+54
-110
lines changed

monitoring/ceph-mixin/dashboards/ceph-nvmeof.libsonnet

Lines changed: 32 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -133,14 +133,14 @@ local g = import 'grafonnet/grafana.libsonnet';
133133
interval='1m',
134134
color={ mode: 'thresholds' },
135135
thresholdsMode='',
136-
noValue=null,
136+
noValue='0',
137137
).addThresholds([
138138
{ color: '#808080', value: null },
139139
{ color: 'red', value: 1.0003 },
140140
])
141141
.addTarget(
142142
$.addTargetSchema(
143-
expr="count(ceph_nvmeof_gateway_info) + sum(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
143+
expr='count(ceph_cephadm_daemon_status{service_type="nvmeof"})',
144144
format='time_series',
145145
instant=true,
146146
legendFormat='Total',
@@ -150,17 +150,17 @@ local g = import 'grafonnet/grafana.libsonnet';
150150
)
151151
.addTarget(
152152
$.addTargetSchema(
153-
expr='count(ceph_nvmeof_gateway_info)',
153+
expr='count(ceph_cephadm_daemon_status{service_type="nvmeof"}==1 or ceph_cephadm_daemon_status{service_type="nvmeof"}==2)',
154154
format='time_series',
155-
instant=false,
155+
instant=true,
156156
legendFormat='Available',
157-
range=true,
157+
range=false,
158158
datasource='$datasource',
159159
)
160160
)
161161
.addTarget(
162162
$.addTargetSchema(
163-
expr="(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
163+
expr='count(ceph_cephadm_daemon_status{service_type="nvmeof"}==0 or ceph_cephadm_daemon_status{service_type="nvmeof"}==-1 or ceph_cephadm_daemon_status{service_type="nvmeof"} == -2)',
164164
format='time_series',
165165
instant=true,
166166
legendFormat='Down',
@@ -208,8 +208,8 @@ local g = import 'grafonnet/grafana.libsonnet';
208208
]),
209209

210210
$.timeSeriesPanel(
211-
title='Ceph Health NVMeoF WARNING',
212-
description='Ceph healthchecks NVMeoF WARNINGs',
211+
title='Unhealthy Gateway Trend',
212+
description='Gateways in error states',
213213
gridPosition={ x: 8, y: 1, w: 7, h: 8 },
214214
lineInterpolation='linear',
215215
lineWidth=1,
@@ -221,89 +221,63 @@ local g = import 'grafonnet/grafana.libsonnet';
221221
showPoints='auto',
222222
unit='none',
223223
displayMode='list',
224-
showLegend=true,
224+
showLegend=false,
225225
placement='bottom',
226-
tooltip={ mode: 'multi', sort: 'desc' },
226+
tooltip={ hideZeros: true, mode: 'multi', sort: 'desc' },
227227
stackingMode='normal',
228228
spanNulls=false,
229229
decimals=0,
230230
thresholdsMode='absolute',
231-
noValue='0',
231+
noValue=0,
232232
).addThresholds([
233233
{ color: 'green', value: null },
234234
])
235235
.addTarget(
236236
$.addTargetSchema(
237-
expr="sum(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
237+
expr='group by (daemon_name) (ceph_cephadm_daemon_status{service_type="nvmeof"} == 0)',
238238
format='',
239239
instant=false,
240-
legendFormat='NVMEOF_GATEWAY_DOWN',
240+
legendFormat='stopped - {{ daemon_name }} ',
241241
range=true,
242242
datasource='$datasource',
243243
)
244244
)
245245
.addTarget(
246246
$.addTargetSchema(
247-
expr="sum(ceph_health_detail{name='NVMEOF_GATEWAY_DELETING'})",
247+
expr='group by (daemon_name) (ceph_cephadm_daemon_status{service_type="nvmeof"} == -1)',
248248
format='',
249249
instant=false,
250-
legendFormat='NVMEOF_GATEWAY_DELETING',
250+
legendFormat='error - {{ daemon_name }}',
251251
range=true,
252252
datasource='$datasource',
253253
)
254254
)
255255
.addTarget(
256256
$.addTargetSchema(
257-
expr="sum(ceph_health_detail{name='NVMEOF_SINGLE_GATEWAY'})",
257+
expr='group by (daemon_name) (ceph_cephadm_daemon_status{service_type="nvmeof"} == -2)',
258258
format='',
259259
instant=false,
260-
legendFormat='NVMEOF_SINGLE_GATEWAY',
260+
legendFormat='unknown_state - {{ daemon_name }}',
261261
range=true,
262262
datasource='$datasource',
263263
)
264264
)
265-
.addOverrides([
266-
{
267-
matcher: { id: 'byName', options: 'NVMEOF_GATEWAY_DOWN' },
268-
properties: [
269-
{
270-
id: 'color',
271-
value: {
272-
fixedColor: 'red',
273-
mode: 'fixed',
274-
},
275-
},
276-
],
277-
},
278-
{
279-
matcher: { id: 'byName', options: 'NVMEOF_GATEWAY_DELETING' },
280-
properties: [
281-
{
282-
id: 'color',
283-
value: {
284-
fixedColor: 'dark-purple',
285-
mode: 'fixed',
286-
},
287-
},
288-
],
289-
},
290-
{
291-
matcher: { id: 'byName', options: 'NVMEOF_SINGLE_GATEWAY' },
292-
properties: [
293-
{
294-
id: 'custom.lineWidth',
295-
value: 1,
296-
},
297-
{
298-
id: 'color',
299-
value: {
300-
fixedColor: 'super-light-orange',
301-
mode: 'shades',
265+
.addOverrides(
266+
[
267+
{
268+
matcher: { id: 'byType', options: 'number' },
269+
properties: [
270+
{
271+
id: 'color',
272+
value: {
273+
fixedColor: 'orange',
274+
mode: 'shades',
275+
},
302276
},
303-
},
304-
],
305-
},
306-
]),
277+
],
278+
},
279+
]
280+
),
307281

308282
$.addAlertListPanel(
309283
title='Active Alerts',

monitoring/ceph-mixin/dashboards_out/ceph-nvmeof.json

Lines changed: 22 additions & 52 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,7 @@
138138
"decimals": 0,
139139
"links": [ ],
140140
"mappings": [ ],
141+
"noValue": "0",
141142
"thresholds": {
142143
"mode": "",
143144
"steps": [
@@ -228,7 +229,7 @@
228229
"targets": [
229230
{
230231
"datasource": "$datasource",
231-
"expr": "count(ceph_nvmeof_gateway_info) + sum(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
232+
"expr": "count(ceph_cephadm_daemon_status{service_type=\"nvmeof\"})",
232233
"format": "time_series",
233234
"instant": true,
234235
"intervalFactor": 1,
@@ -238,17 +239,17 @@
238239
},
239240
{
240241
"datasource": "$datasource",
241-
"expr": "count(ceph_nvmeof_gateway_info)",
242+
"expr": "count(ceph_cephadm_daemon_status{service_type=\"nvmeof\"}==1 or ceph_cephadm_daemon_status{service_type=\"nvmeof\"}==2)",
242243
"format": "time_series",
243-
"instant": false,
244+
"instant": true,
244245
"intervalFactor": 1,
245246
"legendFormat": "Available",
246-
"range": true,
247+
"range": false,
247248
"refId": "B"
248249
},
249250
{
250251
"datasource": "$datasource",
251-
"expr": "(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
252+
"expr": "count(ceph_cephadm_daemon_status{service_type=\"nvmeof\"}==0 or ceph_cephadm_daemon_status{service_type=\"nvmeof\"}==-1 or ceph_cephadm_daemon_status{service_type=\"nvmeof\"} == -2)",
252253
"format": "time_series",
253254
"instant": true,
254255
"intervalFactor": 1,
@@ -263,7 +264,7 @@
263264
},
264265
{
265266
"datasource": "$datasource",
266-
"description": "Ceph healthchecks NVMeoF WARNINGs",
267+
"description": "Gateways in error states",
267268
"fieldConfig": {
268269
"defaults": {
269270
"color": {
@@ -300,7 +301,8 @@
300301
}
301302
},
302303
"decimals": 0,
303-
"noValue": "0",
304+
"mappings": [ ],
305+
"noValue": 0,
304306
"thresholds": {
305307
"mode": "absolute",
306308
"steps": [
@@ -315,48 +317,14 @@
315317
"overrides": [
316318
{
317319
"matcher": {
318-
"id": "byName",
319-
"options": "NVMEOF_GATEWAY_DOWN"
320-
},
321-
"properties": [
322-
{
323-
"id": "color",
324-
"value": {
325-
"fixedColor": "red",
326-
"mode": "fixed"
327-
}
328-
}
329-
]
330-
},
331-
{
332-
"matcher": {
333-
"id": "byName",
334-
"options": "NVMEOF_GATEWAY_DELETING"
335-
},
336-
"properties": [
337-
{
338-
"id": "color",
339-
"value": {
340-
"fixedColor": "dark-purple",
341-
"mode": "fixed"
342-
}
343-
}
344-
]
345-
},
346-
{
347-
"matcher": {
348-
"id": "byName",
349-
"options": "NVMEOF_SINGLE_GATEWAY"
320+
"id": "byType",
321+
"options": "number"
350322
},
351323
"properties": [
352-
{
353-
"id": "custom.lineWidth",
354-
"value": 1
355-
},
356324
{
357325
"id": "color",
358326
"value": {
359-
"fixedColor": "super-light-orange",
327+
"fixedColor": "orange",
360328
"mode": "shades"
361329
}
362330
}
@@ -376,9 +344,10 @@
376344
"calcs": [ ],
377345
"displayMode": "list",
378346
"placement": "bottom",
379-
"showLegend": true
347+
"showLegend": false
380348
},
381349
"tooltip": {
350+
"hideZeros": true,
382351
"mode": "multi",
383352
"sort": "desc"
384353
}
@@ -388,36 +357,36 @@
388357
"targets": [
389358
{
390359
"datasource": "$datasource",
391-
"expr": "sum(ceph_health_detail{name='NVMEOF_GATEWAY_DOWN'})",
360+
"expr": "group by (daemon_name) (ceph_cephadm_daemon_status{service_type=\"nvmeof\"} == 0)",
392361
"format": "",
393362
"instant": false,
394363
"intervalFactor": 1,
395-
"legendFormat": "NVMEOF_GATEWAY_DOWN",
364+
"legendFormat": "stopped - {{ daemon_name }} ",
396365
"range": true,
397366
"refId": "A"
398367
},
399368
{
400369
"datasource": "$datasource",
401-
"expr": "sum(ceph_health_detail{name='NVMEOF_GATEWAY_DELETING'})",
370+
"expr": "group by (daemon_name) (ceph_cephadm_daemon_status{service_type=\"nvmeof\"} == -1)",
402371
"format": "",
403372
"instant": false,
404373
"intervalFactor": 1,
405-
"legendFormat": "NVMEOF_GATEWAY_DELETING",
374+
"legendFormat": "error - {{ daemon_name }}",
406375
"range": true,
407376
"refId": "B"
408377
},
409378
{
410379
"datasource": "$datasource",
411-
"expr": "sum(ceph_health_detail{name='NVMEOF_SINGLE_GATEWAY'})",
380+
"expr": "group by (daemon_name) (ceph_cephadm_daemon_status{service_type=\"nvmeof\"} == -2)",
412381
"format": "",
413382
"instant": false,
414383
"intervalFactor": 1,
415-
"legendFormat": "NVMEOF_SINGLE_GATEWAY",
384+
"legendFormat": "unknown_state - {{ daemon_name }}",
416385
"range": true,
417386
"refId": "C"
418387
}
419388
],
420-
"title": "Ceph Health NVMeoF WARNING",
389+
"title": "Unhealthy Gateway Trend",
421390
"type": "timeseries"
422391
},
423392
{
@@ -1048,6 +1017,7 @@
10481017
}
10491018
},
10501019
"decimals": 0,
1020+
"mappings": [ ],
10511021
"noValue": "0",
10521022
"thresholds": {
10531023
"mode": "absolute",

0 commit comments

Comments
 (0)