Skip to content

Commit 60e2609

Browse files
committed
Add SLOs for kubeControllerManager and kubeProxy
1 parent 19b222e commit 60e2609

File tree

2 files changed

+139
-3
lines changed

2 files changed

+139
-3
lines changed

example.jsonnet

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,9 @@ local kp =
1212
common+: {
1313
namespace: 'monitoring',
1414
},
15+
kubernetesControlPlane+: {
16+
kubeProxy:true,
17+
},
1518
},
1619
};
1720

jsonnet/kube-prometheus/components/k8s-control-plane.libsonnet

Lines changed: 136 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,14 @@ local defaults = {
1919
kubeSchedulerSelector: 'job="kube-scheduler"',
2020
kubeControllerManagerSelector: 'job="kube-controller-manager"',
2121
kubeApiserverSelector: 'job="apiserver"',
22+
kubeProxySelector: 'job="kube-proxy"',
2223
coreDNSSelector: 'job="coredns"',
2324
podLabel: 'pod',
2425
runbookURLPattern: 'https://runbooks.prometheus-operator.dev/runbooks/kubernetes/%s',
2526
diskDeviceSelector: 'device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"',
2627
hostNetworkInterfaceSelector: 'device!~"veth.+"',
2728
},
2829
},
29-
kubeProxy:: false,
3030
kubelet: {
3131
slos: {
3232
requestErrors: {
@@ -39,6 +39,28 @@ local defaults = {
3939
},
4040
},
4141
},
42+
kubeControllerManager: {
43+
slos: {
44+
requestErrors: {
45+
target: '99',
46+
window: '2w',
47+
},
48+
},
49+
},
50+
kubeProxy: false,
51+
kubeProxyConfig: { // different name for backwards compatability
52+
slos: {
53+
syncRulesLatency: {
54+
target: '90',
55+
latency: '0.512', // must exist as le label
56+
window: '2w',
57+
},
58+
requestErrors: {
59+
target: '90', // kube-proxy makes very few requests
60+
window: '2w',
61+
},
62+
},
63+
},
4264
coredns: {
4365
name: 'coredns',
4466
slos: {
@@ -234,7 +256,7 @@ function(params) {
234256
indicator: {
235257
ratio: {
236258
errors: {
237-
metric: 'rest_client_requests_total{%s,code=~"5.."}' % [
259+
metric: 'rest_client_requests_total{%s,code=~"5..|<error>"}' % [
238260
k8s._config.mixin._config.kubeletSelector,
239261
],
240262
},
@@ -284,7 +306,7 @@ function(params) {
284306
},
285307
},
286308

287-
serviceMonitorKubeControllerManager: {
309+
kubeControllerManagerServiceMonitor: {
288310
apiVersion: 'monitoring.coreos.com/v1',
289311
kind: 'ServiceMonitor',
290312
metadata: k8s._metadata {
@@ -318,6 +340,43 @@ function(params) {
318340
},
319341
},
320342

343+
kubeControllerManagerSLORequestErrors: {
344+
apiVersion: 'pyrra.dev/v1alpha1',
345+
kind: 'ServiceLevelObjective',
346+
metadata: k8s._metadata {
347+
name: 'kube-controller-manager-request-errors',
348+
labels+: {
349+
'app.kubernetes.io/name': 'kube-controller-manager',
350+
prometheus: 'k8s', //TODO
351+
role: 'alert-rules',
352+
'pyrra.dev/component': 'kube-controller-manager',
353+
},
354+
},
355+
spec: {
356+
target: k8s._config.kubeControllerManager.slos.requestErrors.target,
357+
window: k8s._config.kubeControllerManager.slos.requestErrors.window,
358+
description: |||
359+
The Kubernetes controller manager is a daemon that embeds the core control loops shipped with Kubernetes.
360+
In applications of robotics and automation, a control loop is a non-terminating loop that regulates the state of the system.
361+
In Kubernetes, a controller is a control loop that watches the shared state of the cluster through the apiserver and makes changes attempting to move the current state towards the desired state. Examples of controllers that ship with Kubernetes today are the replication controller, endpoints controller, namespace controller, and serviceaccounts controller.
362+
|||,
363+
indicator: {
364+
ratio: {
365+
errors: {
366+
metric: 'rest_client_requests_total{%s,code=~"5..|<error>"}' % [
367+
k8s._config.mixin._config.kubeControllerManagerSelector,
368+
],
369+
},
370+
total: {
371+
metric: 'rest_client_requests_total{%s}' % [
372+
k8s._config.mixin._config.kubeControllerManagerSelector,
373+
],
374+
},
375+
},
376+
},
377+
},
378+
},
379+
321380
serviceMonitorApiserver: {
322381
apiVersion: 'monitoring.coreos.com/v1',
323382
kind: 'ServiceMonitor',
@@ -412,6 +471,80 @@ function(params) {
412471
},
413472
},
414473

474+
[if (defaults + params).kubeProxy then 'kubeProxySLOSyncRulesLatency']: {
475+
apiVersion: 'pyrra.dev/v1alpha1',
476+
kind: 'ServiceLevelObjective',
477+
metadata: k8s._metadata {
478+
name: 'kube-proxy-sync-rules-latency',
479+
labels+: {
480+
'app.kubernetes.io/name': 'kube-proxy',
481+
'app.kubernetes.io/component': 'controller', //TODO
482+
prometheus: 'k8s', // TODO
483+
'pyrra.dev/component': 'kube-proxy',
484+
role: 'alert-rules',
485+
},
486+
},
487+
spec: {
488+
target: k8s._config.kubeProxyConfig.slos.syncRulesLatency.target,
489+
window: k8s._config.kubeProxyConfig.slos.syncRulesLatency.window,
490+
description: |||
491+
The Kubernetes network proxy runs on each node.
492+
This reflects services as defined in the Kubernetes API on each node and can do simple TCP, UDP
493+
stream forwarding or round robin TCP,UDP forwarding across a set of backends.
494+
495+
If this is firing the networks might not be synchronized fast enough and services might be unable to reach the containers they want to reach.
496+
|||,
497+
indicator: {
498+
latency: {
499+
success: {
500+
metric: 'kubeproxy_sync_proxy_rules_duration_seconds_bucket{%s,le="%s"}' % [
501+
k8s._config.mixin._config.kubeProxySelector,
502+
k8s._config.kubeProxyConfig.slos.syncRulesLatency.latency,
503+
],
504+
},
505+
total: {
506+
metric: 'kubeproxy_sync_proxy_rules_duration_seconds_count{%s}' % [
507+
k8s._config.mixin._config.kubeProxySelector,
508+
],
509+
},
510+
},
511+
},
512+
},
513+
},
514+
515+
kubeProxySLORequestErrors: {
516+
apiVersion: 'pyrra.dev/v1alpha1',
517+
kind: 'ServiceLevelObjective',
518+
metadata: k8s._metadata {
519+
name: 'kube-proxy-request-errors',
520+
labels+: {
521+
'app.kubernetes.io/name': 'kube-proxy',
522+
'app.kubernetes.io/component': 'controller', //TODO
523+
prometheus: 'k8s', // TODO
524+
'pyrra.dev/component': 'kube-proxy',
525+
role: 'alert-rules',
526+
},
527+
},
528+
spec: {
529+
target: k8s._config.kubeProxyConfig.slos.requestErrors.target,
530+
window: k8s._config.kubeProxyConfig.slos.requestErrors.window,
531+
description: '',
532+
indicator: {
533+
ratio: {
534+
errors: {
535+
metric: 'rest_client_requests_total{%s,code=~"5..|<error>"}' % [
536+
k8s._config.mixin._config.kubeProxySelector,
537+
],
538+
},
539+
total: {
540+
metric: 'rest_client_requests_total{%s}' % [
541+
k8s._config.mixin._config.kubeProxySelector,
542+
],
543+
},
544+
},
545+
},
546+
},
547+
},
415548

416549
'coredns-ServiceMonitor': {
417550
apiVersion: 'monitoring.coreos.com/v1',

0 commit comments

Comments
 (0)