Skip to content
This repository was archived by the owner on Apr 28, 2025. It is now read-only.

Commit 2527cf8

Browse files
authored
Merge pull request #158 from grafana/per-instance-metrics
Per Instance Read/Write Panels
2 parents b0997cd + a69330e commit 2527cf8

File tree

4 files changed

+78
-1
lines changed

4 files changed

+78
-1
lines changed

cortex-mixin/config.libsonnet

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -42,5 +42,8 @@
4242

4343
// Whether resources dashboards are enabled (based on cAdvisor metrics).
4444
resources_dashboards_enabled: false,
45+
46+
// Used on panels that show metrics per instance. i.e. 'pod' in a kubernetes install
47+
per_instance_label: 'pod',
4548
},
4649
}

cortex-mixin/dashboards/dashboard-utils.libsonnet

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,13 +38,21 @@ local utils = import 'mixin-utils/utils.libsonnet';
3838
.addMultiTemplate('namespace', 'cortex_build_info', 'namespace'),
3939
},
4040

41-
// The ,ixin allow specialism of the job selector depending on if its a single binary
41+
// The mixin allow specialism of the job selector depending on if its a single binary
4242
// deployment or a namespaced one.
4343
jobMatcher(job)::
4444
if $._config.singleBinary
4545
then 'job=~"$job"'
4646
else 'cluster=~"$cluster", job=~"($namespace)/%s"' % job,
4747

48+
// jobMatcherEquality performs exact matches on cluster and namespace. Should be used on
49+
// panels that are expected to return too many series to be useful when multiplier
50+
// namespaces or clusters are selected.
51+
jobMatcherEquality(job)::
52+
if $._config.singleBinary
53+
then 'job=~"$job"'
54+
else 'cluster="$cluster", namespace="$namespace", job=~"($namespace)/%s"' % job,
55+
4856
namespaceMatcher()::
4957
if $._config.singleBinary
5058
then 'job=~"$job"'
@@ -65,6 +73,16 @@ local utils = import 'mixin-utils/utils.libsonnet';
6573
],
6674
},
6775

76+
// hiddenLegendQueryPanel is a standard query panel designed to handle a large number of series. it hides the legend, doesn't fill the series and
77+
// sorts the tooltip descending
78+
hiddenLegendQueryPanel(queries, legends, legendLink=null)::
79+
$.queryPanel(queries, legends, legendLink) +
80+
{
81+
legend: { show: false },
82+
fill: 0,
83+
tooltip: { sort: 2 },
84+
},
85+
6886
qpsPanel(selector)::
6987
super.qpsPanel(selector) + {
7088
targets: [

cortex-mixin/dashboards/reads.libsonnet

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
1414
$.panel('Latency') +
1515
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.gateway) + [utils.selector.re('route', 'api_prom_api_v1_.+')])
1616
)
17+
.addPanel(
18+
$.panel('Per %s p99 Latency' % $._config.per_instance_label) +
19+
$.hiddenLegendQueryPanel(
20+
'histogram_quantile(0.99, sum by(le, %s) (rate(cortex_request_duration_seconds_bucket{%s, route=~"api_prom_api_v1_.+"}[$__interval])))' % [$._config.per_instance_label, $.jobMatcherEquality($._config.job_names.gateway)], ''
21+
) +
22+
{ yaxes: $.yaxes('s') }
23+
)
1724
)
1825
.addRow(
1926
$.row('Query Frontend')
@@ -25,6 +32,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
2532
$.panel('Latency') +
2633
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.query_frontend) + [utils.selector.re('route', 'api_prom_api_v1_.+')])
2734
)
35+
.addPanel(
36+
$.panel('Per %s p99 Latency' % $._config.per_instance_label) +
37+
$.hiddenLegendQueryPanel(
38+
'histogram_quantile(0.99, sum by(le, %s) (rate(cortex_request_duration_seconds_bucket{%s, route=~"api_prom_api_v1_.+"}[$__interval])))' % [$._config.per_instance_label, $.jobMatcherEquality($._config.job_names.query_frontend)], ''
39+
) +
40+
{ yaxes: $.yaxes('s') }
41+
)
2842
)
2943
.addRow(
3044
$.row('Cache - Query Results')
@@ -47,6 +61,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
4761
$.panel('Latency') +
4862
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.querier) + [utils.selector.re('route', 'api_prom_api_v1_.+')])
4963
)
64+
.addPanel(
65+
$.panel('Per %s p99 Latency' % $._config.per_instance_label) +
66+
$.hiddenLegendQueryPanel(
67+
'histogram_quantile(0.99, sum by(le, %s) (rate(cortex_request_duration_seconds_bucket{%s, route=~"api_prom_api_v1_.+"}[$__interval])))' % [$._config.per_instance_label, $.jobMatcherEquality($._config.job_names.querier)], ''
68+
) +
69+
{ yaxes: $.yaxes('s') }
70+
)
5071
)
5172
.addRow(
5273
$.row('Ingester')
@@ -58,6 +79,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
5879
$.panel('Latency') +
5980
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.ingester) + [utils.selector.re('route', '/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata')])
6081
)
82+
.addPanel(
83+
$.panel('Per %s p99 Latency' % $._config.per_instance_label) +
84+
$.hiddenLegendQueryPanel(
85+
'histogram_quantile(0.99, sum by(le, %s) (rate(cortex_request_duration_seconds_bucket{%s, route=~"/cortex.Ingester/Query(Stream)?|/cortex.Ingester/MetricsForLabelMatchers|/cortex.Ingester/LabelValues|/cortex.Ingester/MetricsMetadata"}[$__interval])))' % [$._config.per_instance_label, $.jobMatcherEquality($._config.job_names.ingester)], ''
86+
) +
87+
{ yaxes: $.yaxes('s') }
88+
)
6189
)
6290
.addRowIf(
6391
std.member($._config.storage_engine, 'blocks'),
@@ -70,6 +98,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
7098
$.panel('Latency') +
7199
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.store_gateway) + [utils.selector.re('route', '/gatewaypb.StoreGateway/.*')])
72100
)
101+
.addPanel(
102+
$.panel('Per %s p99 Latency' % $._config.per_instance_label) +
103+
$.hiddenLegendQueryPanel(
104+
'histogram_quantile(0.99, sum by(le, %s) (rate(cortex_request_duration_seconds_bucket{%s, route=~"/gatewaypb.StoreGateway/.*"}[$__interval])))' % [$._config.per_instance_label, $.jobMatcherEquality($._config.job_names.store_gateway)], ''
105+
) +
106+
{ yaxes: $.yaxes('s') }
107+
)
73108
)
74109
.addRowIf(
75110
std.member($._config.storage_engine, 'chunks'),

cortex-mixin/dashboards/writes.libsonnet

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
4040
$.panel('Latency') +
4141
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.gateway) + [utils.selector.eq('route', 'api_prom_push')])
4242
)
43+
.addPanel(
44+
$.panel('Per %s p99 Latency' % $._config.per_instance_label) +
45+
$.hiddenLegendQueryPanel(
46+
'histogram_quantile(0.99, sum by(le, %s) (rate(cortex_request_duration_seconds_bucket{%s, route="api_prom_push"}[$__interval])))' % [$._config.per_instance_label, $.jobMatcherEquality($._config.job_names.gateway)], ''
47+
) +
48+
{ yaxes: $.yaxes('s') }
49+
)
4350
)
4451
.addRow(
4552
$.row('Distributor')
@@ -51,6 +58,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
5158
$.panel('Latency') +
5259
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.distributor) + [utils.selector.re('route', '/httpgrpc.*|api_prom_push')])
5360
)
61+
.addPanel(
62+
$.panel('Per %s p99 Latency' % $._config.per_instance_label) +
63+
$.hiddenLegendQueryPanel(
64+
'histogram_quantile(0.99, sum by(le, %s) (rate(cortex_request_duration_seconds_bucket{%s, route=~"/httpgrpc.*|api_prom_push"}[$__interval])))' % [$._config.per_instance_label, $.jobMatcherEquality($._config.job_names.distributor)], ''
65+
) +
66+
{ yaxes: $.yaxes('s') }
67+
)
5468
)
5569
.addRow(
5670
$.row('KV Store (HA Dedupe)')
@@ -73,6 +87,13 @@ local utils = import 'mixin-utils/utils.libsonnet';
7387
$.panel('Latency') +
7488
utils.latencyRecordingRulePanel('cortex_request_duration_seconds', $.jobSelector($._config.job_names.ingester) + [utils.selector.eq('route', '/cortex.Ingester/Push')])
7589
)
90+
.addPanel(
91+
$.panel('Per %s p99 Latency' % $._config.per_instance_label) +
92+
$.hiddenLegendQueryPanel(
93+
'histogram_quantile(0.99, sum by(le, %s) (rate(cortex_request_duration_seconds_bucket{%s, route="/cortex.Ingester/Push"}[$__interval])))' % [$._config.per_instance_label, $.jobMatcherEquality($._config.job_names.ingester)], ''
94+
) +
95+
{ yaxes: $.yaxes('s') }
96+
)
7697
)
7798
.addRow(
7899
$.row('KV Store (Ring)')

0 commit comments

Comments
 (0)