Skip to content

Commit 611196f

Browse files
committed
Run Pyrra with generic rules
This should allow Grafana dashboards to access the SLO time series.
1 parent 60e2609 commit 611196f

31 files changed

+756
-6
lines changed

jsonnet/kube-prometheus/components/pyrra.libsonnet

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -167,6 +167,7 @@ function(params) {
167167
image: pyrra._config.image,
168168
args: [
169169
'kubernetes',
170+
'--generic-rules',
170171
],
171172
// resources: pyrra._config.resources,
172173
ports: [{ containerPort: pyrra._config.port }],

jsonnet/kube-prometheus/main.libsonnet

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -133,8 +133,8 @@ local utils = import './lib/utils.libsonnet';
133133
},
134134
pyrra: {
135135
namespace: $.values.common.namespace,
136-
version: $.values.common.versions.nodeExporter,
137-
image: $.values.common.images.nodeExporter,
136+
version: $.values.common.versions.pyrra,
137+
image: $.values.common.images.pyrra,
138138
},
139139
},
140140

kustomization.yaml

Lines changed: 28 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,18 @@ resources:
3636
- ./manifests/kubeStateMetrics-service.yaml
3737
- ./manifests/kubeStateMetrics-serviceAccount.yaml
3838
- ./manifests/kubeStateMetrics-serviceMonitor.yaml
39+
- ./manifests/kubernetesControlPlane-coredns-ServiceMonitor.yaml
40+
- ./manifests/kubernetesControlPlane-coredns-slo-response-errors.yaml
41+
- ./manifests/kubernetesControlPlane-coredns-slo-response-latency.yaml
42+
- ./manifests/kubernetesControlPlane-kubeControllerManagerSLORequestErrors.yaml
43+
- ./manifests/kubernetesControlPlane-kubeControllerManagerServiceMonitor.yaml
44+
- ./manifests/kubernetesControlPlane-kubeProxySLORequestErrors.yaml
45+
- ./manifests/kubernetesControlPlane-kubelet-slo-request-errors.yaml
46+
- ./manifests/kubernetesControlPlane-kubelet-slo-runtime-errors.yaml
47+
- ./manifests/kubernetesControlPlane-kubeletServiceMonitor.yaml
3948
- ./manifests/kubernetesControlPlane-prometheusRule.yaml
4049
- ./manifests/kubernetesControlPlane-serviceMonitorApiserver.yaml
41-
- ./manifests/kubernetesControlPlane-serviceMonitorCoreDNS.yaml
42-
- ./manifests/kubernetesControlPlane-serviceMonitorKubeControllerManager.yaml
4350
- ./manifests/kubernetesControlPlane-serviceMonitorKubeScheduler.yaml
44-
- ./manifests/kubernetesControlPlane-serviceMonitorKubelet.yaml
4551
- ./manifests/nodeExporter-clusterRole.yaml
4652
- ./manifests/nodeExporter-clusterRoleBinding.yaml
4753
- ./manifests/nodeExporter-daemonset.yaml
@@ -85,6 +91,24 @@ resources:
8591
- ./manifests/prometheusOperator-service.yaml
8692
- ./manifests/prometheusOperator-serviceAccount.yaml
8793
- ./manifests/prometheusOperator-serviceMonitor.yaml
94+
- ./manifests/prometheusOperator-sloHTTPErrors.yaml
95+
- ./manifests/prometheusOperator-sloReconcileErrors.yaml
96+
- ./manifests/pyrra-apiDeployment.yaml
97+
- ./manifests/pyrra-apiService.yaml
98+
- ./manifests/pyrra-kubernetesClusterRole.yaml
99+
- ./manifests/pyrra-kubernetesClusterRoleBinding.yaml
100+
- ./manifests/pyrra-kubernetesDeployment.yaml
101+
- ./manifests/pyrra-kubernetesService.yaml
102+
- ./manifests/pyrra-kubernetesServiceAccount.yaml
103+
- ./manifests/pyrra-slo-apiserver-read-cluster-latency.yaml
104+
- ./manifests/pyrra-slo-apiserver-read-namespace-latency.yaml
105+
- ./manifests/pyrra-slo-apiserver-read-resource-latency.yaml
106+
- ./manifests/pyrra-slo-apiserver-read-response-errors.yaml
107+
- ./manifests/pyrra-slo-apiserver-write-response-errors.yaml
108+
- ./manifests/pyrra-slo-prometheus-notification-errors.yaml
109+
- ./manifests/pyrra-slo-prometheus-query-errors.yaml
110+
- ./manifests/pyrra-slo-prometheus-rule-evaluation-failures.yaml
111+
- ./manifests/pyrra-slo-prometheus-sd-kubernetes-errors.yaml
88112
- ./manifests/setup/0alertmanagerConfigCustomResourceDefinition.yaml
89113
- ./manifests/setup/0alertmanagerCustomResourceDefinition.yaml
90114
- ./manifests/setup/0podmonitorCustomResourceDefinition.yaml
@@ -95,4 +119,5 @@ resources:
95119
- ./manifests/setup/0scrapeconfigCustomResourceDefinition.yaml
96120
- ./manifests/setup/0servicemonitorCustomResourceDefinition.yaml
97121
- ./manifests/setup/0thanosrulerCustomResourceDefinition.yaml
122+
- ./manifests/setup/crd.yaml
98123
- ./manifests/setup/namespace.yaml

manifests/kubernetesControlPlane-serviceMonitorCoreDNS.yaml renamed to manifests/kubernetesControlPlane-coredns-ServiceMonitor.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -22,4 +22,4 @@ spec:
2222
- kube-system
2323
selector:
2424
matchLabels:
25-
k8s-app: kube-dns
25+
k8s-app: coredns
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: pyrra.dev/v1alpha1
2+
kind: ServiceLevelObjective
3+
metadata:
4+
labels:
5+
app.kubernetes.io/component: controller
6+
app.kubernetes.io/name: coredns
7+
app.kubernetes.io/part-of: kube-prometheus
8+
prometheus: k8s
9+
pyrra.dev/component: coredns
10+
role: alert-rules
11+
name: coredns-response-errors
12+
namespace: monitoring
13+
spec:
14+
description: |
15+
CoreDNS runs within a Kubernetes cluster and resolves internal requests and forward external requests.
16+
If CoreDNS fails to answer requests applications might be unable to make requests.
17+
indicator:
18+
ratio:
19+
errors:
20+
metric: coredns_dns_responses_total{job="coredns",rcode="SERVFAIL"}
21+
total:
22+
metric: coredns_dns_responses_total{job="coredns"}
23+
target: "99.99"
24+
window: 2w
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: pyrra.dev/v1alpha1
2+
kind: ServiceLevelObjective
3+
metadata:
4+
labels:
5+
app.kubernetes.io/component: controller
6+
app.kubernetes.io/name: coredns
7+
app.kubernetes.io/part-of: kube-prometheus
8+
prometheus: k8s
9+
pyrra.dev/component: coredns
10+
role: alert-rules
11+
name: coredns-response-latency
12+
namespace: monitoring
13+
spec:
14+
description: |
15+
CoreDNS runs within a Kubernetes cluster and resolves internal requests and forward external requests.
16+
If CoreDNS gets too slow it might have an impact on the latency of other applications in this cluster.
17+
indicator:
18+
latency:
19+
success:
20+
metric: coredns_dns_request_duration_seconds_bucket{job="coredns",le="0.032"}
21+
total:
22+
metric: coredns_dns_request_duration_seconds_count{job="coredns"}
23+
target: "99"
24+
window: 2w
Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
apiVersion: pyrra.dev/v1alpha1
2+
kind: ServiceLevelObjective
3+
metadata:
4+
labels:
5+
app.kubernetes.io/name: kube-controller-manager
6+
app.kubernetes.io/part-of: kube-prometheus
7+
prometheus: k8s
8+
pyrra.dev/component: kube-controller-manager
9+
role: alert-rules
10+
name: kube-controller-manager-request-errors
11+
namespace: monitoring
12+
spec:
13+
description: "The Kubernetes controller manager is a daemon that embeds the core control loops shipped with Kubernetes. \nIn applications of robotics and automation, a control loop is a non-terminating loop that regulates the state of the system. \nIn Kubernetes, a controller is a control loop that watches the shared state of the cluster through the apiserver and makes changes attempting to move the current state towards the desired state. Examples of controllers that ship with Kubernetes today are the replication controller, endpoints controller, namespace controller, and serviceaccounts controller.\n"
14+
indicator:
15+
ratio:
16+
errors:
17+
metric: rest_client_requests_total{job="kube-controller-manager",code=~"5..|<error>"}
18+
total:
19+
metric: rest_client_requests_total{job="kube-controller-manager"}
20+
target: "99"
21+
window: 2w
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
apiVersion: pyrra.dev/v1alpha1
2+
kind: ServiceLevelObjective
3+
metadata:
4+
labels:
5+
app.kubernetes.io/component: controller
6+
app.kubernetes.io/name: kube-proxy
7+
app.kubernetes.io/part-of: kube-prometheus
8+
prometheus: k8s
9+
pyrra.dev/component: kube-proxy
10+
role: alert-rules
11+
name: kube-proxy-request-errors
12+
namespace: monitoring
13+
spec:
14+
description: ""
15+
indicator:
16+
ratio:
17+
errors:
18+
metric: rest_client_requests_total{job="kube-proxy",code=~"5..|<error>"}
19+
total:
20+
metric: rest_client_requests_total{job="kube-proxy"}
21+
target: "90"
22+
window: 2w
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
apiVersion: pyrra.dev/v1alpha1
2+
kind: ServiceLevelObjective
3+
metadata:
4+
labels:
5+
app.kubernetes.io/name: kubelet
6+
app.kubernetes.io/part-of: kube-prometheus
7+
prometheus: k8s
8+
pyrra.dev/component: kubelet
9+
role: alert-rules
10+
name: kubelet-request-errors
11+
namespace: monitoring
12+
spec:
13+
description: |
14+
The kubelet is the primary “node agent” that runs on each node.
15+
The kubelet ensures that the containers are running and healthy.
16+
If these requests are failing the Kubelet might not know what to run exactly.
17+
indicator:
18+
ratio:
19+
errors:
20+
metric: rest_client_requests_total{job="kubelet", metrics_path="/metrics",code=~"5..|<error>"}
21+
total:
22+
metric: rest_client_requests_total{job="kubelet", metrics_path="/metrics"}
23+
target: "99"
24+
window: 2w

0 commit comments

Comments
 (0)