Skip to content

Commit b13e96d

Browse files
committed
Changes to fix #116
Many quality of life Improvements, including: * Many snake_case labels in API Priority & Fairness panels * verb and resource filters on more request panels * Updated obsolete timeseries * Units and legends updates * Panels resize and organized
1 parent 1d8d83e commit b13e96d

File tree

5 files changed

+48
-57
lines changed

5 files changed

+48
-57
lines changed

assets/api-performance-overview/panels.libsonnet

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ local g = import 'github.com/grafana/grafonnet/gen/grafonnet-latest/main.libsonn
3131
+ options.tooltip.withSort('desc')
3232
+ timeSeries.queryOptions.withTimeFrom(null)
3333
+ timeSeries.queryOptions.withTimeShift(null)
34+
+ options.legend.withSortBy('Max')
3435
+ options.legend.withSortDesc(true),
3536

3637
legendRightPlacement(title, unit, targets, gridPos):

assets/api-performance-overview/queries.libsonnet

Lines changed: 18 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@ local prometheus = g.query.prometheus;
55
{
66
request_duration_99th_quantile: {
77
query():
8-
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[$interval])) by(verb,le))')
8+
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",subresource!="log",verb=~"$verb",verb!~"WATCH|WATCHLIST|PROXY"}[$interval])) by(verb,le))')
99
+ prometheus.withFormat('time_series')
1010
+ prometheus.withIntervalFactor(2)
1111
+ prometheus.withLegendFormat('{{verb}}')
@@ -23,7 +23,7 @@ local prometheus = g.query.prometheus;
2323

2424
requestDuarationByResource: {
2525
query():
26-
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",subresource!="log",verb!~"WATCH|WATCHLIST|PROXY"}[$interval])) by(resource,le))')
26+
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_request_duration_seconds_bucket{apiserver=~"$apiserver",instance=~"$instance",resource=~"$resource",subresource!="log",verb=~"$verb",verb!~"WATCH|WATCHLIST|PROXY"}[$interval])) by(resource,le))')
2727
+ prometheus.withFormat('time_series')
2828
+ prometheus.withIntervalFactor(2)
2929
+ prometheus.withLegendFormat('{{resource}}')
@@ -75,7 +75,7 @@ local prometheus = g.query.prometheus;
7575

7676
requestRateDropped: {
7777
query():
78-
prometheus.withExpr('sum(rate(apiserver_dropped_requests_total{instance=~"$instance"}[$interval])) by (requestKind)')
78+
prometheus.withExpr('sum(rate(apiserver_request_terminations_total{instance=~"$instance"}[$interval])) by (verb)')
7979
+ prometheus.withFormat('time_series')
8080
+ prometheus.withIntervalFactor(2)
8181
+ prometheus.withLegendFormat('')
@@ -102,7 +102,7 @@ local prometheus = g.query.prometheus;
102102

103103
requestsLongRunning: {
104104
query():
105-
prometheus.withExpr('sum(apiserver_longrunning_gauge{instance=~"$instance",resource=~"$resource",verb=~"$verb"}) by(instance)')
105+
prometheus.withExpr('sum(apiserver_longrunning_requests{instance=~"$instance",resource=~"$resource",verb=~"$verb"}) by(instance)')
106106
+ prometheus.withFormat('time_series')
107107
+ prometheus.withIntervalFactor(2)
108108
+ prometheus.withLegendFormat('{{instance}}')
@@ -111,19 +111,10 @@ local prometheus = g.query.prometheus;
111111

112112
requestInFlight: {
113113
query():
114-
prometheus.withExpr('sum(apiserver_current_inflight_requests{instance=~"$instance"}) by (instance,requestKind)')
114+
prometheus.withExpr('sum(apiserver_current_inflight_requests{instance=~"$instance"}) by (instance,request_kind)')
115115
+ prometheus.withFormat('time_series')
116116
+ prometheus.withIntervalFactor(2)
117-
+ prometheus.withLegendFormat('{{requestKind}}-{{instance}}')
118-
+ prometheus.withDatasource('$Datasource'),
119-
},
120-
121-
requestRejectPandF: {
122-
query():
123-
prometheus.withExpr('sum(rate(apiserver_flowcontrol_rejected_requests_total{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}[$interval])) by (reason)')
124-
+ prometheus.withFormat('time_series')
125-
+ prometheus.withIntervalFactor(2)
126-
+ prometheus.withLegendFormat('')
117+
+ prometheus.withLegendFormat('{{request_kind}}-{{instance}}')
127118
+ prometheus.withDatasource('$Datasource'),
128119
},
129120

@@ -138,55 +129,55 @@ local prometheus = g.query.prometheus;
138129

139130
requestQueueLengthPandF: {
140131
query():
141-
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}[$interval])) by(flowSchema, priorityLevel, le))')
132+
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{instance=~"$instance",flow_schema=~"$flow_schema",priority_level=~"$priority_level"}[$interval])) by(flow_schema, priority_level, le))')
142133
+ prometheus.withFormat('time_series')
143134
+ prometheus.withIntervalFactor(2)
144-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
135+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
145136
+ prometheus.withDatasource('$Datasource'),
146137
},
147138

148139
requestWaitDuration99QuatilePandF: {
149140
query():
150-
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{instance=~"$instance"}[5m])) by(flow_schema, priority_level, le))')
141+
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{instance=~"$instance",flow_schema=~"$flow_schema",priority_level=~"$priority_level"}[5m])) by(flow_schema, priority_level, le))')
151142
+ prometheus.withFormat('time_series')
152143
+ prometheus.withIntervalFactor(2)
153-
+ prometheus.withLegendFormat('')
144+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
154145
+ prometheus.withDatasource('$Datasource'),
155146
},
156147

157148
requestDispatchRatePandF: {
158149
query():
159-
prometheus.withExpr('sum(rate(apiserver_flowcontrol_dispatched_requests_total{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}[$interval])) by(flowSchema,priorityLevel)')
150+
prometheus.withExpr('sum(rate(apiserver_flowcontrol_dispatched_requests_total{instance=~"$instance",flow_schema=~"$flow_schema",priority_level=~"$priority_level"}[$interval])) by(flow_schema,priority_level)')
160151
+ prometheus.withFormat('time_series')
161152
+ prometheus.withIntervalFactor(2)
162-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
153+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
163154
+ prometheus.withDatasource('$Datasource'),
164155
},
165156

166157
requestExecutionDurationPandF: {
167158
query():
168-
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}[$interval])) by(flowSchema, priorityLevel, le))')
159+
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{instance=~"$instance",flow_schema=~"$flow_schema",priority_level=~"$priority_level"}[$interval])) by(flow_schema, priority_level, le))')
169160
+ prometheus.withFormat('time_series')
170161
+ prometheus.withIntervalFactor(2)
171-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
162+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
172163
+ prometheus.withDatasource('$Datasource'),
173164
},
174165

175166
pendingInQueuePandF: {
176167
query():
177-
prometheus.withExpr('sum(apiserver_flowcontrol_current_inqueue_requests{instance=~"$instance",flowSchema=~"$flowSchema",priorityLevel=~"$priorityLevel"}) by (flowSchema,priorityLevel)')
168+
prometheus.withExpr('sum(apiserver_flowcontrol_current_inqueue_requests{instance=~"$instance",flow_schema=~"$flow_schema",priority_level=~"$priority_level"}) by (flow_schema,priority_level)')
178169
+ prometheus.withFormat('time_series')
179170
+ prometheus.withIntervalFactor(2)
180-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
171+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
181172
+ prometheus.withDatasource('$Datasource'),
182173
},
183174

184175
concurrencyLimitByKubeapiserverPandF: {
185176
query():
186-
prometheus.withExpr('sum(apiserver_flowcontrol_request_concurrency_limit{instance=~".*:6443",priorityLevel=~"$priorityLevel"}) by (instance,priorityLevel)')
177+
prometheus.withExpr('sum(apiserver_flowcontrol_request_concurrency_in_use{instance=~".*:6443",priority_level=~"$priority_level"}) by (instance,flow_schema)')
187178
+ prometheus.withFormat('time_series')
188179
+ prometheus.withIntervalFactor(2)
189-
+ prometheus.withLegendFormat('')
180+
+ prometheus.withLegendFormat('{{instance}}:{{flow_schema}}')
190181
+ prometheus.withDatasource('$Datasource'),
191182
},
192183
}

assets/api-performance-overview/variables.libsonnet

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@ local var = g.dashboard.variable;
1212
+ var.query.withDatasourceFromVariable(self.Datasource)
1313
+ var.query.selectionOptions.withMulti(false)
1414
+ var.query.selectionOptions.withIncludeAll(true)
15-
+ var.query.generalOptions.withLabel('apisever')
15+
+ var.query.generalOptions.withLabel('apiserver')
1616
+ var.query.withRefresh(2),
1717

1818
instance:
@@ -47,16 +47,16 @@ local var = g.dashboard.variable;
4747
+ var.query.generalOptions.withLabel('verb')
4848
+ var.query.withRefresh(2),
4949

50-
flowSchema:
51-
var.query.new('flowSchema', 'label_values(flowSchema)')
50+
flow_schema:
51+
var.query.new('flow_schema', 'label_values(flow_schema)')
5252
+ var.query.withDatasourceFromVariable(self.Datasource)
5353
+ var.query.selectionOptions.withMulti(false)
5454
+ var.query.selectionOptions.withIncludeAll(true)
5555
+ var.query.generalOptions.withLabel('flow-schema')
5656
+ var.query.withRefresh(2),
5757

58-
priorityLevel:
59-
var.query.new('priorityLevel', 'label_values(priorityLevel)')
58+
priority_level:
59+
var.query.new('priority_level', 'label_values(priority_level)')
6060
+ var.query.withDatasourceFromVariable(self.Datasource)
6161
+ var.query.selectionOptions.withMulti(false)
6262
+ var.query.selectionOptions.withIncludeAll(true)

assets/hypershift-perf-dashboard/queries.libsonnet

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -708,7 +708,7 @@ local prometheus = g.query.prometheus;
708708

709709
requests_dropped_rate: {
710710
query():
711-
prometheus.withExpr('sum(rate(apiserver_dropped_requests_total{namespace=~"$namespace"}[2m])) by (requestKind)')
711+
prometheus.withExpr('sum(rate(apiserver_dropped_requests_total{namespace=~"$namespace"}[2m])) by (request_kind)')
712712
+ prometheus.withFormat('time_series')
713713
+ prometheus.withIntervalFactor(2)
714714
+ prometheus.withDatasource('P1BA917A37525EDF3'),
@@ -742,9 +742,9 @@ local prometheus = g.query.prometheus;
742742

743743
request_in_flight: {
744744
query():
745-
prometheus.withExpr('sum(apiserver_current_inflight_requests{namespace=~"$namespace"}) by (instance,requestKind)')
745+
prometheus.withExpr('sum(apiserver_current_inflight_requests{namespace=~"$namespace"}) by (instance,request_kind)')
746746
+ prometheus.withFormat('time_series')
747-
+ prometheus.withLegendFormat('{{requestKind}}-{{instance}}')
747+
+ prometheus.withLegendFormat('{{request_kind}}-{{instance}}')
748748
+ prometheus.withIntervalFactor(2)
749749
+ prometheus.withDatasource('P1BA917A37525EDF3'),
750750
},
@@ -768,54 +768,54 @@ local prometheus = g.query.prometheus;
768768

769769
pf_request_queue_length: {
770770
query():
771-
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{namespace=~"$namespace"}[2m])) by(flowSchema, priorityLevel, le))')
771+
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_queue_length_after_enqueue_bucket{namespace=~"$namespace"}[2m])) by(flow_schema, priority_level, le))')
772772
+ prometheus.withFormat('time_series')
773-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
773+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
774774
+ prometheus.withIntervalFactor(2)
775775
+ prometheus.withDatasource('P1BA917A37525EDF3'),
776776
},
777777

778778
pf_request_wait_duration_99th_quartile: {
779779
query():
780-
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{namespace=~"$namespace"}[2m])) by(flowSchema, priorityLevel, le))')
780+
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_wait_duration_seconds_bucket{namespace=~"$namespace"}[2m])) by(flow_schema, priority_level, le))')
781781
+ prometheus.withFormat('time_series')
782-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
782+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
783783
+ prometheus.withIntervalFactor(2)
784784
+ prometheus.withDatasource('P1BA917A37525EDF3'),
785785
},
786786

787787
pf_request_execution_duration: {
788788
query():
789-
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{namespace=~"$namespace"}[2m])) by(flowSchema, priorityLevel, le))')
789+
prometheus.withExpr('histogram_quantile(0.99, sum(rate(apiserver_flowcontrol_request_execution_seconds_bucket{namespace=~"$namespace"}[2m])) by(flow_schema, priority_level, le))')
790790
+ prometheus.withFormat('time_series')
791-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
791+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
792792
+ prometheus.withIntervalFactor(2)
793793
+ prometheus.withDatasource('P1BA917A37525EDF3'),
794794
},
795795

796796
pf_request_dispatch_rate: {
797797
query():
798-
prometheus.withExpr('sum(rate(apiserver_flowcontrol_dispatched_requests_total{namespace=~"$namespace"}[2m])) by(flowSchema,priorityLevel)')
798+
prometheus.withExpr('sum(rate(apiserver_flowcontrol_dispatched_requests_total{namespace=~"$namespace"}[2m])) by(flow_schema,priority_level)')
799799
+ prometheus.withFormat('time_series')
800-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
800+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
801801
+ prometheus.withIntervalFactor(2)
802802
+ prometheus.withDatasource('P1BA917A37525EDF3'),
803803
},
804804

805805
pf_concurrency_limit: {
806806
query():
807-
prometheus.withExpr('sum(apiserver_flowcontrol_request_concurrency_limit{namespace=~"$namespace"}) by (priorityLevel)')
807+
prometheus.withExpr('sum(apiserver_flowcontrol_request_concurrency_limit{namespace=~"$namespace"}) by (priority_level)')
808808
+ prometheus.withFormat('time_series')
809-
+ prometheus.withLegendFormat('{{priorityLevel}}')
809+
+ prometheus.withLegendFormat('{{priority_level}}')
810810
+ prometheus.withIntervalFactor(2)
811811
+ prometheus.withDatasource('P1BA917A37525EDF3'),
812812
},
813813

814814
pf_pending_in_queue: {
815815
query():
816-
prometheus.withExpr('sum(apiserver_flowcontrol_current_inqueue_requests{namespace=~"$namespace"}) by (flowSchema,priorityLevel)')
816+
prometheus.withExpr('sum(apiserver_flowcontrol_current_inqueue_requests{namespace=~"$namespace"}) by (flow_schema,priority_level)')
817817
+ prometheus.withFormat('time_series')
818-
+ prometheus.withLegendFormat('{{flowSchema}}:{{priorityLevel}}')
818+
+ prometheus.withLegendFormat('{{flow_schema}}:{{priority_level}}')
819819
+ prometheus.withIntervalFactor(2)
820820
+ prometheus.withDatasource('P1BA917A37525EDF3'),
821821
},

templates/General/api-performance-overview.jsonnet

Lines changed: 9 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -23,28 +23,27 @@ g.dashboard.new('API Performance Dashboard')
2323
variables.resource,
2424
variables.code,
2525
variables.verb,
26-
variables.flowSchema,
27-
variables.priorityLevel,
26+
variables.flow_schema,
27+
variables.priority_level,
2828
variables.interval,
2929
])
3030
+ g.dashboard.withPanels([
31-
panels.timeSeries.legendRightPlacement('request duration - 99th quantile', 'short', queries.request_duration_99th_quantile.query(), { x: 0, y: 0, w: 12, h: 8 }),
31+
panels.timeSeries.legendRightPlacement('request duration - 99th quantile', 's', queries.request_duration_99th_quantile.query(), { x: 0, y: 0, w: 12, h: 8 }),
3232
panels.timeSeries.legendRightPlacement('request rate - by instance', 'short', queries.requestRateByInstance.query(), { x: 12, y: 0, w: 12, h: 8 }),
33-
panels.timeSeries.legendRightPlacement('request duration - 99th quantile - by resource', 'short', queries.requestDuarationByResource.query(), { x: 0, y: 8, w: 12, h: 8 }),
33+
panels.timeSeries.legendRightPlacement('request duration - 99th quantile - by resource', 's', queries.requestDuarationByResource.query(), { x: 0, y: 8, w: 12, h: 8 }),
3434
panels.timeSeries.legendRightPlacement('request rate - by resource', 'short', queries.requestRateByResource.query(), { x: 12, y: 8, w: 12, h: 8 }),
35-
panels.timeSeries.legendBottomPlacement('request duration - read vs write', 'short', queries.requestDurationReadWrite.query(), { x: 0, y: 16, w: 12, h: 8 }),
35+
panels.timeSeries.legendBottomPlacement('request duration - read vs write', 's', queries.requestDurationReadWrite.query(), { x: 0, y: 16, w: 12, h: 8 }),
3636
panels.timeSeries.legendBottomPlacement('request rate - read vs write', 'short', queries.requestRateReadWrite.query(), { x: 12, y: 16, w: 12, h: 8 }),
3737
panels.timeSeries.legendBottomPlacement('requests dropped rate', 'short', queries.requestRateDropped.query(), { x: 0, y: 24, w: 12, h: 8 }),
3838
panels.timeSeries.legendBottomPlacement('requests terminated rate', 'short', queries.requestRateTerminated.query(), { x: 12, y: 24, w: 12, h: 8 }),
3939
panels.timeSeries.legendRightPlacement('requests status rate', 'short', queries.requestRateStatus.query(), { x: 0, y: 32, w: 12, h: 8 }),
4040
panels.timeSeries.legendRightPlacement('long running requests', 'short', queries.requestsLongRunning.query(), { x: 12, y: 32, w: 12, h: 8 }),
4141
panels.timeSeries.legendRightPlacement('request in flight', 'short', queries.requestInFlight.query(), { x: 0, y: 40, w: 12, h: 8 }),
42-
panels.timeSeries.legendRightPlacement('p&f - requests rejected', 'short', queries.requestRejectPandF.query(), { x: 12, y: 40, w: 12, h: 8 }),
43-
panels.timeSeries.legendRightPlacement('response size - 99th quantile', 'short', queries.responseSize99Quatile.query(), { x: 0, y: 48, w: 12, h: 8 }),
44-
panels.timeSeries.legendRightPlacement('p&f - request queue length', 'short', queries.requestQueueLengthPandF.query(), { x: 12, y: 48, w: 12, h: 8 }),
45-
panels.timeSeries.withRequestWaitDurationAggregations('p&f - request wait duration - 99th quantile', 'short', queries.requestWaitDuration99QuatilePandF.query(), { x: 0, y: 56, w: 24, h: 8 }),
42+
panels.timeSeries.legendRightPlacement('response size - 99th quantile', 'bytes', queries.responseSize99Quatile.query(), { x: 12, y: 40, w: 12, h: 8 }),
43+
panels.timeSeries.legendRightPlacement('p&f - request queue length', 'short', queries.requestQueueLengthPandF.query(), { x: 0, y: 48, w: 12, h: 8 }),
44+
panels.timeSeries.withRequestWaitDurationAggregations('p&f - request wait duration - 99th quantile', 's', queries.requestWaitDuration99QuatilePandF.query(), { x: 12, y: 48, w: 12, h: 8 }),
4645
panels.timeSeries.legendRightPlacement('p&f - request dispatch rate', 'short', queries.requestDispatchRatePandF.query(), { x: 0, y: 64, w: 12, h: 8 }),
47-
panels.timeSeries.legendRightPlacement('p&f - request execution duration', 'short', queries.requestExecutionDurationPandF.query(), { x: 12, y: 64, w: 12, h: 8 }),
46+
panels.timeSeries.legendRightPlacement('p&f - request execution duration', 's', queries.requestExecutionDurationPandF.query(), { x: 12, y: 64, w: 12, h: 8 }),
4847
panels.timeSeries.legendRightPlacement('p&f - pending in queue', 'short', queries.pendingInQueuePandF.query(), { x: 0, y: 72, w: 12, h: 8 }),
4948
panels.timeSeries.legendRightPlacement('p&f - concurrency limit by kube-apiserver', 'short', queries.concurrencyLimitByKubeapiserverPandF.query(), { x: 12, y: 72, w: 12, h: 8 }),
5049
])

0 commit comments

Comments
 (0)