Skip to content

Commit 610e1a2

Browse files
authored
Merge pull request ceph#60873 from rhcs-dashboard/fix-69074-main
mgr/dashboard: Add ceph_daemon filter to rgw overview grafana panel queries Reviewed-by: Afreen Misbah <[email protected]>
2 parents 1c0e9ab + 666f8fa commit 610e1a2

File tree

3 files changed

+18
-12
lines changed

3 files changed

+18
-12
lines changed

monitoring/ceph-mixin/dashboards/rgw.libsonnet

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -298,7 +298,7 @@ local g = import 'grafonnet/grafana.libsonnet';
298298
label_replace(
299299
rate(ceph_rgw_op_get_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
300300
rate(ceph_rgw_op_get_obj_lat_count{%(matchers)s}[$__rate_interval]) *
301-
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
301+
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
302302
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
303303
)
304304
||| % $.matchers(),
@@ -314,7 +314,7 @@ local g = import 'grafonnet/grafana.libsonnet';
314314
label_replace(
315315
rate(ceph_rgw_op_put_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
316316
rate(ceph_rgw_op_put_obj_lat_count{%(matchers)s}[$__rate_interval]) *
317-
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
317+
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
318318
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
319319
)
320320
||| % $.matchers(),
@@ -331,7 +331,7 @@ local g = import 'grafonnet/grafana.libsonnet';
331331
sum by (rgw_host) (
332332
label_replace(
333333
rate(ceph_rgw_req{%(matchers)s}[$__rate_interval]) *
334-
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
334+
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
335335
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
336336
)
337337
)
@@ -351,7 +351,7 @@ local g = import 'grafonnet/grafana.libsonnet';
351351
label_replace(
352352
rate(ceph_rgw_op_get_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
353353
rate(ceph_rgw_op_get_obj_lat_count{%(matchers)s}[$__rate_interval]) *
354-
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
354+
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
355355
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
356356
)
357357
||| % $.matchers(),
@@ -385,7 +385,7 @@ local g = import 'grafonnet/grafana.libsonnet';
385385
label_replace(sum by (instance_id) (
386386
rate(ceph_rgw_op_get_obj_bytes{%(matchers)s}[$__rate_interval]) +
387387
rate(ceph_rgw_op_put_obj_bytes{%(matchers)s}[$__rate_interval])) *
388-
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
388+
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
389389
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
390390
)
391391
||| % $.matchers(),
@@ -404,7 +404,7 @@ local g = import 'grafonnet/grafana.libsonnet';
404404
label_replace(
405405
rate(ceph_rgw_op_put_obj_lat_sum{%(matchers)s}[$__rate_interval]) /
406406
rate(ceph_rgw_op_put_obj_lat_count{%(matchers)s}[$__rate_interval]) *
407-
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{%(matchers)s},
407+
on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~"$rgw_servers", %(matchers)s},
408408
"rgw_host", "$1", "ceph_daemon", "rgw.(.*)"
409409
)
410410
||| % $.matchers(),

monitoring/ceph-mixin/dashboards_out/radosgw-overview.json

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -108,14 +108,14 @@
108108
"steppedLine": false,
109109
"targets": [
110110
{
111-
"expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
111+
"expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
112112
"format": "time_series",
113113
"intervalFactor": 1,
114114
"legendFormat": "GET {{rgw_host}}",
115115
"refId": "A"
116116
},
117117
{
118-
"expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
118+
"expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
119119
"format": "time_series",
120120
"intervalFactor": 1,
121121
"legendFormat": "PUT {{rgw_host}}",
@@ -210,7 +210,7 @@
210210
"steppedLine": false,
211211
"targets": [
212212
{
213-
"expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
213+
"expr": "sum by (rgw_host) (\n label_replace(\n rate(ceph_rgw_req{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n )\n)\n",
214214
"format": "time_series",
215215
"intervalFactor": 1,
216216
"legendFormat": "{{rgw_host}}",
@@ -305,7 +305,7 @@
305305
"steppedLine": false,
306306
"targets": [
307307
{
308-
"expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
308+
"expr": "label_replace(\n rate(ceph_rgw_op_get_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_get_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
309309
"format": "time_series",
310310
"intervalFactor": 1,
311311
"legendFormat": "{{rgw_host}}",
@@ -502,7 +502,7 @@
502502
"steppedLine": false,
503503
"targets": [
504504
{
505-
"expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
505+
"expr": "label_replace(sum by (instance_id) (\n rate(ceph_rgw_op_get_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval]) +\n rate(ceph_rgw_op_put_obj_bytes{cluster=~\"$cluster\", }[$__rate_interval])) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
506506
"format": "time_series",
507507
"intervalFactor": 1,
508508
"legendFormat": "{{rgw_host}}",
@@ -597,7 +597,7 @@
597597
"steppedLine": false,
598598
"targets": [
599599
{
600-
"expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
600+
"expr": "label_replace(\n rate(ceph_rgw_op_put_obj_lat_sum{cluster=~\"$cluster\", }[$__rate_interval]) /\n rate(ceph_rgw_op_put_obj_lat_count{cluster=~\"$cluster\", }[$__rate_interval]) *\n on (instance_id) group_left (ceph_daemon) ceph_rgw_metadata{ceph_daemon=~\"$rgw_servers\", cluster=~\"$cluster\", },\n \"rgw_host\", \"$1\", \"ceph_daemon\", \"rgw.(.*)\"\n)\n",
601601
"format": "time_series",
602602
"intervalFactor": 1,
603603
"legendFormat": "{{rgw_host}}",

monitoring/ceph-mixin/tests_dashboards/features/radosgw_overview.feature

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ Scenario: "Test Average GET Latencies"
77
| ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 |
88
| ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
99
When interval is `30s`
10+
And variable `rgw_servers` is `rgw.foo`
1011
Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `GET {{rgw_host}}` shows:
1112
| metrics | values |
1213
| {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1.5 |
@@ -18,6 +19,7 @@ Scenario: "Test Average PUT Latencies"
1819
| ceph_rgw_op_put_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 10 30 50 |
1920
| ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
2021
When interval is `30s`
22+
And variable `rgw_servers` is `rgw.foo`
2123
Then Grafana panel `Average GET/PUT Latencies by RGW Instance` with legend `PUT {{rgw_host}}` shows:
2224
| metrics | values |
2325
| {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1 |
@@ -28,6 +30,7 @@ Scenario: "Test Total Requests/sec by RGW Instance"
2830
| ceph_rgw_req{instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 10 50 100 |
2931
| ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 |
3032
When interval is `30s`
33+
And variable `rgw_servers` is `rgw.1`
3134
Then Grafana panel `Total Requests/sec by RGW Instance` with legend `{{rgw_host}}` shows:
3235
| metrics | values |
3336
| {rgw_host="1"} | 1.5 |
@@ -39,6 +42,7 @@ Scenario: "Test GET Latencies by RGW Instance"
3942
| ceph_rgw_op_get_obj_lat_count{instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 20 60 80 |
4043
| ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
4144
When interval is `30s`
45+
And variable `rgw_servers` is `rgw.foo`
4246
Then Grafana panel `GET Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
4347
| metrics | values |
4448
| {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1.5 |
@@ -71,6 +75,7 @@ Scenario: "Test Bandwidth by RGW Instance"
7175
| ceph_rgw_metadata{ceph_daemon="rgw.1", hostname="localhost", instance="127.0.0.1", instance_id="92806566", job="ceph", cluster="mycluster"} | 1 1 1 |
7276
When evaluation time is `1m`
7377
And interval is `30s`
78+
And variable `rgw_servers` is `rgw.1`
7479
Then Grafana panel `Bandwidth by RGW Instance` with legend `{{rgw_host}}` shows:
7580
| metrics | values |
7681
| {ceph_daemon="rgw.1", instance_id="92806566", rgw_host="1"} | 2.25 |
@@ -83,6 +88,7 @@ Scenario: "Test PUT Latencies by RGW Instance"
8388
| ceph_rgw_metadata{ceph_daemon="rgw.foo", hostname="localhost", instance="127.0.0.1", instance_id="58892247", job="ceph", cluster="mycluster"} | 1 1 1 |
8489
When evaluation time is `1m`
8590
And interval is `30s`
91+
And variable `rgw_servers` is `rgw.foo`
8692
Then Grafana panel `PUT Latencies by RGW Instance` with legend `{{rgw_host}}` shows:
8793
| metrics | values |
8894
| {ceph_daemon="rgw.foo", instance="127.0.0.1", instance_id="58892247", job="ceph", rgw_host="foo", cluster="mycluster"} | 1 |

0 commit comments

Comments
 (0)