Skip to content

Commit 461d09c

Browse files
authored
Merge pull request #259 from grafana/open-source-envoy-mixin
Open-source Envoy mixin
2 parents fddff10 + da36878 commit 461d09c

File tree

2 files changed

+129
-0
lines changed

2 files changed

+129
-0
lines changed

envoy-mixin/dashboards.libsonnet

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
local g = import 'grafana-builder/grafana.libsonnet';
2+
local template = import 'grafonnet/template.libsonnet';
3+
4+
{
5+
// Manually define the "instance" variable template in order to be able to change the "refresh" setting
6+
// and customise the all value.
7+
local instanceTemplate =
8+
template.new(
9+
name='instance',
10+
datasource='$datasource',
11+
query='label_values(envoy_server_uptime{job="$job"}, instance)',
12+
allValues='.*', // Make sure to always include all instances when "All" is selected.
13+
current='',
14+
hide='',
15+
refresh=2, // Refresh on time range change.
16+
includeAll=true,
17+
sort=1
18+
),
19+
20+
// Envoy metrics:
21+
// - HTTP: https://www.envoyproxy.io/docs/envoy/latest/configuration/http/http_filters/router_filter#statistics
22+
grafanaDashboards+:: {
23+
'envoy-overview.json':
24+
g.dashboard('Envoy Overview')
25+
.addTemplate('job', 'envoy_server_uptime', 'job')
26+
27+
// Hidden variables to be able to repeat panels for each upstream/downstream.
28+
.addMultiTemplate('envoy_cluster', 'envoy_cluster_version{job=~"$job",instance=~"$instance",envoy_cluster_name!="envoy-admin"}', 'envoy_cluster_name', 2)
29+
.addMultiTemplate('envoy_listener_filter', 'envoy_http_downstream_rq_total{job=~"$job",instance=~"$instance",envoy_http_conn_manager_prefix!~"admin|metrics",}', 'envoy_http_conn_manager_prefix', 2)
30+
31+
.addRow(
32+
g.row('Traffic')
33+
.addPanel(
34+
g.panel('Connections / sec') +
35+
g.queryPanel('sum(rate(envoy_listener_downstream_cx_total{job=~"$job",instance=~"$instance"}[$__interval]))', 'Downstream / Ingress') +
36+
g.queryPanel('sum(rate(envoy_cluster_upstream_cx_total{job=~"$job",instance=~"$instance"}[$__interval]))', 'Upstream / Egress') +
37+
{ yaxes: g.yaxes('cps') }
38+
)
39+
.addPanel(
40+
g.panel('QPS') +
41+
g.queryPanel('sum(rate(envoy_http_downstream_rq_total{job=~"$job",instance=~"$instance"}[$__interval]))', 'Downstream / Ingress') +
42+
g.queryPanel('sum(rate(envoy_cluster_upstream_rq_total{job=~"$job",instance=~"$instance"}[$__interval]))', 'Upstream / Egress') +
43+
{ yaxes: g.yaxes('rps') }
44+
)
45+
)
46+
47+
.addRow(
48+
g.row('Upstream / Egress: $envoy_cluster')
49+
.addPanel(
50+
g.panel('QPS') +
51+
$.envoyQpsPanel('envoy_cluster_upstream_rq_xx{envoy_cluster_name="$envoy_cluster",job=~"$job",instance=~"$instance"}')
52+
)
53+
.addPanel(
54+
g.panel('Latency') +
55+
// This metric is in ms, so we apply a multiplier=1
56+
g.latencyPanel('envoy_cluster_upstream_rq_time', '{envoy_cluster_name="$envoy_cluster",job=~"$job",instance=~"$instance"}', '1')
57+
)
58+
.addPanel(
59+
g.panel('Timeouts / sec') +
60+
g.queryPanel('sum(rate(envoy_cluster_upstream_rq_timeout{envoy_cluster_name="$envoy_cluster",job=~"$job",instance=~"$instance"}[$__interval]))', 'Timeouts') +
61+
{ yaxes: g.yaxes('rps') }
62+
)
63+
.addPanel(
64+
g.panel('Active') +
65+
g.queryPanel('sum(envoy_cluster_upstream_rq_active{envoy_cluster_name="$envoy_cluster",job=~"$job",instance=~"$instance"})', 'Requests') +
66+
g.queryPanel('sum(envoy_cluster_upstream_cx_active{envoy_cluster_name="$envoy_cluster",job=~"$job",instance=~"$instance"})', 'Connections')
67+
) +
68+
69+
// Repeat this row for each Envoy upstream cluster.
70+
{ repeat: 'envoy_cluster' },
71+
)
72+
73+
.addRow(
74+
g.row('Downstream / Ingress: $envoy_listener_filter')
75+
.addPanel(
76+
g.panel('QPS') +
77+
$.envoyQpsPanel('envoy_http_downstream_rq_xx{envoy_http_conn_manager_prefix="$envoy_listener_filter",job=~"$job",instance=~"$instance"}')
78+
)
79+
.addPanel(
80+
g.panel('Latency') +
81+
// This metric is in ms, so we apply a multiplier=1
82+
g.latencyPanel('envoy_http_downstream_rq_time', '{envoy_http_conn_manager_prefix="$envoy_listener_filter",job=~"$job",instance=~"$instance"}', '1')
83+
)
84+
.addPanel(
85+
g.panel('Timeouts / sec') +
86+
g.queryPanel('sum(rate(envoy_http_downstream_rq_timeout{envoy_http_conn_manager_prefix="$envoy_listener_filter",job=~"$job",instance=~"$instance"}[$__interval]))', 'Timeouts') +
87+
{ yaxes: g.yaxes('rps') }
88+
)
89+
.addPanel(
90+
g.panel('Active') +
91+
g.queryPanel('sum(envoy_http_downstream_rq_active{envoy_http_conn_manager_prefix="$envoy_listener_filter",job=~"$job",instance=~"$instance"})', 'Requests') +
92+
g.queryPanel('sum(envoy_http_downstream_cx_active{envoy_http_conn_manager_prefix="$envoy_listener_filter",job=~"$job",instance=~"$instance"})', 'Connections')
93+
) +
94+
95+
// Repeat this row for each Envoy downstream filter.
96+
{ repeat: 'envoy_listener_filter' },
97+
) + {
98+
templating+: {
99+
list+: [instanceTemplate],
100+
},
101+
},
102+
},
103+
104+
// This is a custom function used to display QPS by response status class captured
105+
// through the Envoy label "envoy_response_code_class".
106+
envoyQpsPanel(selector):: {
107+
aliasColors: {
108+
'1xx': '#EAB839',
109+
'2xx': '#7EB26D',
110+
'3xx': '#6ED0E0',
111+
'4xx': '#EF843C',
112+
'5xx': '#E24D42',
113+
},
114+
targets: [
115+
{
116+
expr: 'sum by (status) (label_replace(rate(' + selector + '[$__interval]), "status", "${1}xx", "envoy_response_code_class", "(.*)"))',
117+
format: 'time_series',
118+
intervalFactor: 2,
119+
legendFormat: '{{status}}',
120+
refId: 'A',
121+
step: 10,
122+
},
123+
],
124+
} + g.stack,
125+
}

envoy-mixin/mixin.libsonnet

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
{
2+
grafanaDashboardFolder: 'Envoy',
3+
} +
4+
(import 'dashboards.libsonnet')

0 commit comments

Comments
 (0)