@@ -19,14 +19,14 @@ local defaults = {
1919 kubeSchedulerSelector: 'job="kube-scheduler"' ,
2020 kubeControllerManagerSelector: 'job="kube-controller-manager"' ,
2121 kubeApiserverSelector: 'job="apiserver"' ,
22+ kubeProxySelector: 'job="kube-proxy"' ,
2223 coreDNSSelector: 'job="coredns"' ,
2324 podLabel: 'pod' ,
2425 runbookURLPattern: 'https://runbooks.prometheus-operator.dev/runbooks/kubernetes/%s' ,
2526 diskDeviceSelector: 'device=~"(/dev/)?(mmcblk.p.+|nvme.+|rbd.+|sd.+|vd.+|xvd.+|dm-.+|md.+|dasd.+)"' ,
2627 hostNetworkInterfaceSelector: 'device!~"veth.+"' ,
2728 },
2829 },
29- kubeProxy:: false ,
3030 kubelet: {
3131 slos: {
3232 requestErrors: {
@@ -39,6 +39,28 @@ local defaults = {
3939 },
4040 },
4141 },
42+ kubeControllerManager: {
43+ slos: {
44+ requestErrors: {
45+ target: '99' ,
46+ window: '2w' ,
47+ },
48+ },
49+ },
50+ kubeProxy: false ,
51+ kubeProxyConfig: { // different name for backwards compatability
52+ slos: {
53+ syncRulesLatency: {
54+ target: '90' ,
55+ latency: '0.512' , // must exist as le label
56+ window: '2w' ,
57+ },
58+ requestErrors: {
59+ target: '90' , // kube-proxy makes very few requests
60+ window: '2w' ,
61+ },
62+ },
63+ },
4264 coredns: {
4365 name: 'coredns' ,
4466 slos: {
@@ -234,7 +256,7 @@ function(params) {
234256 indicator: {
235257 ratio: {
236258 errors: {
237- metric: 'rest_client_requests_total{%s,code=~"5.."}' % [
259+ metric: 'rest_client_requests_total{%s,code=~"5..|<error> "}' % [
238260 k8s._config.mixin._config.kubeletSelector,
239261 ],
240262 },
@@ -284,7 +306,7 @@ function(params) {
284306 },
285307 },
286308
287- serviceMonitorKubeControllerManager : {
309+ kubeControllerManagerServiceMonitor : {
288310 apiVersion: 'monitoring.coreos.com/v1' ,
289311 kind: 'ServiceMonitor' ,
290312 metadata: k8s._metadata {
@@ -318,6 +340,43 @@ function(params) {
318340 },
319341 },
320342
343+ kubeControllerManagerSLORequestErrors: {
344+ apiVersion: 'pyrra.dev/v1alpha1' ,
345+ kind: 'ServiceLevelObjective' ,
346+ metadata: k8s._metadata {
347+ name: 'kube-controller-manager-request-errors' ,
348+ labels+: {
349+ 'app.kubernetes.io/name' : 'kube-controller-manager' ,
350+ prometheus: 'k8s' , //TODO
351+ role: 'alert-rules' ,
352+ 'pyrra.dev/component' : 'kube-controller-manager' ,
353+ },
354+ },
355+ spec: {
356+ target: k8s._config.kubeControllerManager.slos.requestErrors.target,
357+ window: k8s._config.kubeControllerManager.slos.requestErrors.window,
358+ description: |||
359+ The Kubernetes controller manager is a daemon that embeds the core control loops shipped with Kubernetes.
360+ In applications of robotics and automation, a control loop is a non-terminating loop that regulates the state of the system.
361+ In Kubernetes, a controller is a control loop that watches the shared state of the cluster through the apiserver and makes changes attempting to move the current state towards the desired state. Examples of controllers that ship with Kubernetes today are the replication controller, endpoints controller, namespace controller, and serviceaccounts controller.
362+ ||| ,
363+ indicator: {
364+ ratio: {
365+ errors: {
366+ metric: 'rest_client_requests_total{%s,code=~"5..|<error>"}' % [
367+ k8s._config.mixin._config.kubeControllerManagerSelector,
368+ ],
369+ },
370+ total: {
371+ metric: 'rest_client_requests_total{%s}' % [
372+ k8s._config.mixin._config.kubeControllerManagerSelector,
373+ ],
374+ },
375+ },
376+ },
377+ },
378+ },
379+
321380 serviceMonitorApiserver: {
322381 apiVersion: 'monitoring.coreos.com/v1' ,
323382 kind: 'ServiceMonitor' ,
@@ -412,6 +471,80 @@ function(params) {
412471 },
413472 },
414473
474+ [if (defaults + params).kubeProxy then 'kubeProxySLOSyncRulesLatency' ]: {
475+ apiVersion: 'pyrra.dev/v1alpha1' ,
476+ kind: 'ServiceLevelObjective' ,
477+ metadata: k8s._metadata {
478+ name: 'kube-proxy-sync-rules-latency' ,
479+ labels+: {
480+ 'app.kubernetes.io/name' : 'kube-proxy' ,
481+ 'app.kubernetes.io/component' : 'controller' , //TODO
482+ prometheus: 'k8s' , // TODO
483+ 'pyrra.dev/component' : 'kube-proxy' ,
484+ role: 'alert-rules' ,
485+ },
486+ },
487+ spec: {
488+ target: k8s._config.kubeProxyConfig.slos.syncRulesLatency.target,
489+ window: k8s._config.kubeProxyConfig.slos.syncRulesLatency.window,
490+ description: |||
491+ The Kubernetes network proxy runs on each node.
492+ This reflects services as defined in the Kubernetes API on each node and can do simple TCP, UDP
493+ stream forwarding or round robin TCP,UDP forwarding across a set of backends.
494+
495+ If this is firing the networks might not be synchronized fast enough and services might be unable to reach the containers they want to reach.
496+ ||| ,
497+ indicator: {
498+ latency: {
499+ success: {
500+ metric: 'kubeproxy_sync_proxy_rules_duration_seconds_bucket{%s,le="%s"}' % [
501+ k8s._config.mixin._config.kubeProxySelector,
502+ k8s._config.kubeProxyConfig.slos.syncRulesLatency.latency,
503+ ],
504+ },
505+ total: {
506+ metric: 'kubeproxy_sync_proxy_rules_duration_seconds_count{%s}' % [
507+ k8s._config.mixin._config.kubeProxySelector,
508+ ],
509+ },
510+ },
511+ },
512+ },
513+ },
514+
515+ kubeProxySLORequestErrors: {
516+ apiVersion: 'pyrra.dev/v1alpha1' ,
517+ kind: 'ServiceLevelObjective' ,
518+ metadata: k8s._metadata {
519+ name: 'kube-proxy-request-errors' ,
520+ labels+: {
521+ 'app.kubernetes.io/name' : 'kube-proxy' ,
522+ 'app.kubernetes.io/component' : 'controller' , //TODO
523+ prometheus: 'k8s' , // TODO
524+ 'pyrra.dev/component' : 'kube-proxy' ,
525+ role: 'alert-rules' ,
526+ },
527+ },
528+ spec: {
529+ target: k8s._config.kubeProxyConfig.slos.requestErrors.target,
530+ window: k8s._config.kubeProxyConfig.slos.requestErrors.window,
531+ description: '' ,
532+ indicator: {
533+ ratio: {
534+ errors: {
535+ metric: 'rest_client_requests_total{%s,code=~"5..|<error>"}' % [
536+ k8s._config.mixin._config.kubeProxySelector,
537+ ],
538+ },
539+ total: {
540+ metric: 'rest_client_requests_total{%s}' % [
541+ k8s._config.mixin._config.kubeProxySelector,
542+ ],
543+ },
544+ },
545+ },
546+ },
547+ },
415548
416549 'coredns-ServiceMonitor' : {
417550 apiVersion: 'monitoring.coreos.com/v1' ,
0 commit comments