Skip to content

Commit c47ace9

Browse files
authored
Merge pull request ceph#43707 from BenoitKnecht/ceph-mgr-service-id
mgr: Fix ceph_daemon label in ceph_rgw_* metrics Reviewed-by: Aashish Sharma <aasharma@redhat.com> Reviewed-by: Ernesto Puerta <epuertat@redhat.com> Reviewed-by: Pere Diaz Bou <pdiazbou@redhat.com>
2 parents 8505861 + 2daaa05 commit c47ace9

File tree

9 files changed

+254
-64
lines changed

9 files changed

+254
-64
lines changed

monitoring/grafana/dashboards/hosts-overview.json

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -796,7 +796,7 @@
796796
"multi": false,
797797
"name": "rgw_hosts",
798798
"options": [ ],
799-
"query": "label_values(ceph_rgw_qlen, ceph_daemon)",
799+
"query": "label_values(ceph_rgw_metadata, ceph_daemon)",
800800
"refresh": 1,
801801
"regex": "rgw.(.*)",
802802
"sort": 1,

monitoring/grafana/dashboards/jsonnet/grafana_dashboards.jsonnet

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
7070
addTemplateSchema('mds_hosts', '$datasource', 'label_values(ceph_mds_inodes, ceph_daemon)', 1, true, 1, null, 'mds.(.*)')
7171
)
7272
.addTemplate(
73-
addTemplateSchema('rgw_hosts', '$datasource', 'label_values(ceph_rgw_qlen, ceph_daemon)', 1, true, 1, null, 'rgw.(.*)')
73+
addTemplateSchema('rgw_hosts', '$datasource', 'label_values(ceph_rgw_metadata, ceph_daemon)', 1, true, 1, null, 'rgw.(.*)')
7474
)
7575
.addPanels([
7676
HostsOverviewSingleStatPanel(
@@ -450,7 +450,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
450450
type='panel', id='graph', name='Graph', version='5.0.0'
451451
)
452452
.addTemplate(
453-
addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_req, ceph_daemon)', 1, true, 1, '', '')
453+
addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_metadata, ceph_daemon)', 1, true, 1, '', '')
454454
)
455455
.addTemplate(
456456
addTemplateSchema('code', '$datasource', 'label_values(haproxy_server_http_responses_total{instance=~"$ingress_service"}, code)', 1, true, 1, 'HTTP Code', '')
@@ -468,14 +468,14 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
468468
'',
469469
's',
470470
'short',
471-
'rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])',
471+
'rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata',
472472
'GET AVG',
473473
0, 1, 8, 7
474474
)
475475
.addTargets(
476476
[
477477
addTargetSchema(
478-
'rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])',
478+
'rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata',
479479
1,
480480
'time_series',
481481
'PUT AVG'
@@ -485,7 +485,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
485485
'',
486486
'none',
487487
'short',
488-
'sum by(rgw_host) (label_replace(rate(ceph_rgw_req[30s]), "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"))',
488+
'sum by (rgw_host) (label_replace(rate(ceph_rgw_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, "rgw_host", "$1", "ceph_daemon", "rgw.(.*)"))',
489489
'{{rgw_host}}',
490490
8, 1, 7, 7
491491
),
@@ -494,7 +494,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
494494
'Latencies are shown stacked, without a yaxis to provide a visual indication of GET latency imbalance across RGW hosts',
495495
's',
496496
'short',
497-
'label_replace(rate(ceph_rgw_get_initial_lat_sum[30s]),"rgw_host","$1","ceph_daemon","rgw.(.*)") / \nlabel_replace(rate(ceph_rgw_get_initial_lat_count[30s]),"rgw_host","$1","ceph_daemon","rgw.(.*)")',
497+
'label_replace(\n rate(ceph_rgw_get_initial_lat_sum[30s]) /\n rate(ceph_rgw_get_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n"rgw_host", "$1", "ceph_daemon", "rgw.(.*)")',
498498
'{{rgw_host}}',
499499
15, 1, 6, 7
500500
),
@@ -520,7 +520,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
520520
'Total bytes transferred in/out through get/put operations, by radosgw instance',
521521
'bytes',
522522
'short',
523-
'sum by(rgw_host) (\n (label_replace(rate(ceph_rgw_get_b[30s]), "rgw_host","$1","ceph_daemon","rgw.(.*)")) + \n (label_replace(rate(ceph_rgw_put_b[30s]), "rgw_host","$1","ceph_daemon","rgw.(.*)"))\n)',
523+
'label_replace(sum by (instance_id) (\n rate(ceph_rgw_get_b[30s]) + \n rate(ceph_rgw_put_b[30s])\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata, "rgw_host", "$1", "ceph_daemon", "rgw.(.*)")',
524524
'{{rgw_host}}',
525525
8, 8, 7, 6
526526
),
@@ -529,7 +529,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
529529
'Latencies are shown stacked, without a yaxis to provide a visual indication of PUT latency imbalance across RGW hosts',
530530
's',
531531
'short',
532-
'label_replace(rate(ceph_rgw_put_initial_lat_sum[30s]),"rgw_host","$1","ceph_daemon","rgw.(.*)") / \nlabel_replace(rate(ceph_rgw_put_initial_lat_count[30s]),"rgw_host","$1","ceph_daemon","rgw.(.*)")',
532+
'label_replace(\n rate(ceph_rgw_put_initial_lat_sum[30s]) /\n rate(ceph_rgw_put_initial_lat_count[30s]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata,\n"rgw_host", "$1", "ceph_daemon", "rgw.(.*)")',
533533
'{{rgw_host}}',
534534
15, 8, 6, 6
535535
),
@@ -659,7 +659,7 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
659659
g.template.datasource('datasource', 'prometheus', 'default', label='Data Source')
660660
)
661661
.addTemplate(
662-
addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_req, ceph_daemon)', 1, true, 1, '', '')
662+
addTemplateSchema('rgw_servers', '$datasource', 'label_values(ceph_rgw_metadata, ceph_daemon)', 1, true, 1, '', '')
663663
)
664664
.addPanels([
665665
addRowSchema(false, true, 'RGW Host Detail : $rgw_servers') + {gridPos: {x: 0, y: 0, w: 24, h: 1}},
@@ -669,8 +669,8 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
669669
'',
670670
's',
671671
'short',
672-
'sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~"($rgw_servers)"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~"($rgw_servers)"}[30s]))',
673-
'sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~"($rgw_servers)"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~"($rgw_servers)"}[30s]))',
672+
'sum by (instance_id) (rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
673+
'sum by (instance_id) (rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
674674
'GET {{ceph_daemon}}',
675675
'PUT {{ceph_daemon}}',
676676
0, 1, 6, 8
@@ -681,8 +681,8 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
681681
'',
682682
'bytes',
683683
'short',
684-
'rate(ceph_rgw_get_b{ceph_daemon=~"$rgw_servers"}[30s])',
685-
'rate(ceph_rgw_put_b{ceph_daemon=~"$rgw_servers"}[30s])',
684+
'rate(ceph_rgw_get_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
685+
'rate(ceph_rgw_put_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
686686
'GETs {{ceph_daemon}}',
687687
'PUTs {{ceph_daemon}}',
688688
6, 1, 7, 8
@@ -693,22 +693,22 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
693693
'',
694694
'short',
695695
'short',
696-
'rate(ceph_rgw_failed_req{ceph_daemon=~"$rgw_servers"}[30s])',
697-
'rate(ceph_rgw_get{ceph_daemon=~"$rgw_servers"}[30s])',
696+
'rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
697+
'rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
698698
'Requests Failed {{ceph_daemon}}',
699699
'GETs {{ceph_daemon}}',
700700
13, 1, 7, 8
701701
)
702702
.addTargets(
703703
[
704704
addTargetSchema(
705-
'rate(ceph_rgw_put{ceph_daemon=~"$rgw_servers"}[30s])',
705+
'rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
706706
1,
707707
'time_series',
708708
'PUTs {{ceph_daemon}}'
709709
),
710710
addTargetSchema(
711-
'rate(ceph_rgw_req{ceph_daemon=~"$rgw_servers"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~"$rgw_servers"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~"$rgw_servers"}[30s]))',
711+
'(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}',
712712
1,
713713
'time_series',
714714
'Other {{ceph_daemon}}'
@@ -722,10 +722,10 @@ local addStyle(alias, colorMode, colors, dateFormat, decimals, mappingType, patt
722722
'Workload Breakdown',
723723
'current'
724724
)
725-
.addTarget(addTargetSchema('rate(ceph_rgw_failed_req{ceph_daemon=~"$rgw_servers"}[30s])', 1, 'time_series', 'Failures {{ceph_daemon}}'))
726-
.addTarget(addTargetSchema('rate(ceph_rgw_get{ceph_daemon=~"$rgw_servers"}[30s])', 1, 'time_series', 'GETs {{ceph_daemon}}'))
727-
.addTarget(addTargetSchema('rate(ceph_rgw_put{ceph_daemon=~"$rgw_servers"}[30s])', 1, 'time_series', 'PUTs {{ceph_daemon}}'))
728-
.addTarget(addTargetSchema('rate(ceph_rgw_req{ceph_daemon=~"$rgw_servers"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~"$rgw_servers"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~"$rgw_servers"}[30s]))', 1, 'time_series', 'Other (DELETE,LIST) {{ceph_daemon}}')) + {gridPos: {x: 20, y: 1, w: 4, h: 8}}
725+
.addTarget(addTargetSchema('rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', 1, 'time_series', 'Failures {{ceph_daemon}}'))
726+
.addTarget(addTargetSchema('rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', 1, 'time_series', 'GETs {{ceph_daemon}}'))
727+
.addTarget(addTargetSchema('rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', 1, 'time_series', 'PUTs {{ceph_daemon}}'))
728+
.addTarget(addTargetSchema('(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers"}', 1, 'time_series', 'Other (DELETE,LIST) {{ceph_daemon}}')) + {gridPos: {x: 20, y: 1, w: 4, h: 8}}
729729
])
730730
}
731731
{

monitoring/grafana/dashboards/radosgw-detail.json

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -104,14 +104,14 @@
104104
"steppedLine": false,
105105
"targets": [
106106
{
107-
"expr": "sum by (ceph_daemon) (rate(ceph_rgw_get_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_get_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))",
107+
"expr": "sum by (instance_id) (rate(ceph_rgw_get_initial_lat_sum[30s]) / rate(ceph_rgw_get_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
108108
"format": "time_series",
109109
"intervalFactor": 1,
110110
"legendFormat": "GET {{ceph_daemon}}",
111111
"refId": "A"
112112
},
113113
{
114-
"expr": "sum by (ceph_daemon)(rate(ceph_rgw_put_initial_lat_sum{ceph_daemon=~\"($rgw_servers)\"}[30s]) / rate(ceph_rgw_put_initial_lat_count{ceph_daemon=~\"($rgw_servers)\"}[30s]))",
114+
"expr": "sum by (instance_id) (rate(ceph_rgw_put_initial_lat_sum[30s]) / rate(ceph_rgw_put_initial_lat_count[30s])) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
115115
"format": "time_series",
116116
"intervalFactor": 1,
117117
"legendFormat": "PUT {{ceph_daemon}}",
@@ -196,14 +196,14 @@
196196
"steppedLine": false,
197197
"targets": [
198198
{
199-
"expr": "rate(ceph_rgw_get_b{ceph_daemon=~\"$rgw_servers\"}[30s])",
199+
"expr": "rate(ceph_rgw_get_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
200200
"format": "time_series",
201201
"intervalFactor": 1,
202202
"legendFormat": "GETs {{ceph_daemon}}",
203203
"refId": "A"
204204
},
205205
{
206-
"expr": "rate(ceph_rgw_put_b{ceph_daemon=~\"$rgw_servers\"}[30s])",
206+
"expr": "rate(ceph_rgw_put_b[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
207207
"format": "time_series",
208208
"intervalFactor": 1,
209209
"legendFormat": "PUTs {{ceph_daemon}}",
@@ -294,28 +294,28 @@
294294
"steppedLine": false,
295295
"targets": [
296296
{
297-
"expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])",
297+
"expr": "rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
298298
"format": "time_series",
299299
"intervalFactor": 1,
300300
"legendFormat": "Requests Failed {{ceph_daemon}}",
301301
"refId": "A"
302302
},
303303
{
304-
"expr": "rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])",
304+
"expr": "rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
305305
"format": "time_series",
306306
"intervalFactor": 1,
307307
"legendFormat": "GETs {{ceph_daemon}}",
308308
"refId": "B"
309309
},
310310
{
311-
"expr": "rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])",
311+
"expr": "rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
312312
"format": "time_series",
313313
"intervalFactor": 1,
314314
"legendFormat": "PUTs {{ceph_daemon}}",
315315
"refId": "C"
316316
},
317317
{
318-
"expr": "rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))",
318+
"expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
319319
"format": "time_series",
320320
"intervalFactor": 1,
321321
"legendFormat": "Other {{ceph_daemon}}",
@@ -384,28 +384,28 @@
384384
"pieType": "pie",
385385
"targets": [
386386
{
387-
"expr": "rate(ceph_rgw_failed_req{ceph_daemon=~\"$rgw_servers\"}[30s])",
387+
"expr": "rate(ceph_rgw_failed_req[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
388388
"format": "time_series",
389389
"intervalFactor": 1,
390390
"legendFormat": "Failures {{ceph_daemon}}",
391391
"refId": "A"
392392
},
393393
{
394-
"expr": "rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s])",
394+
"expr": "rate(ceph_rgw_get[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
395395
"format": "time_series",
396396
"intervalFactor": 1,
397397
"legendFormat": "GETs {{ceph_daemon}}",
398398
"refId": "B"
399399
},
400400
{
401-
"expr": "rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s])",
401+
"expr": "rate(ceph_rgw_put[30s]) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
402402
"format": "time_series",
403403
"intervalFactor": 1,
404404
"legendFormat": "PUTs {{ceph_daemon}}",
405405
"refId": "C"
406406
},
407407
{
408-
"expr": "rate(ceph_rgw_req{ceph_daemon=~\"$rgw_servers\"}[30s]) -\n (rate(ceph_rgw_get{ceph_daemon=~\"$rgw_servers\"}[30s]) +\n rate(ceph_rgw_put{ceph_daemon=~\"$rgw_servers\"}[30s]))",
408+
"expr": "(\n rate(ceph_rgw_req[30s]) -\n (rate(ceph_rgw_get[30s]) + rate(ceph_rgw_put[30s]))\n) * on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\"}",
409409
"format": "time_series",
410410
"intervalFactor": 1,
411411
"legendFormat": "Other (DELETE,LIST) {{ceph_daemon}}",
@@ -450,7 +450,7 @@
450450
"multi": false,
451451
"name": "rgw_servers",
452452
"options": [ ],
453-
"query": "label_values(ceph_rgw_req, ceph_daemon)",
453+
"query": "label_values(ceph_rgw_metadata, ceph_daemon)",
454454
"refresh": 1,
455455
"regex": "",
456456
"sort": 1,

0 commit comments

Comments
 (0)