|
25 | 25 | { |
26 | 26 | alert: 'KubeCPUOvercommit', |
27 | 27 | expr: ||| |
28 | | - sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 |
| 28 | + sum(namespace_cpu:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0 |
29 | 29 | and |
30 | | - (sum(kube_node_status_allocatable{resource="cpu"}) - max(kube_node_status_allocatable{resource="cpu"})) > 0 |
| 30 | + (sum(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="cpu"}) by (%(clusterLabel)s)) > 0 |
31 | 31 | ||| % $._config, |
32 | 32 | labels: { |
33 | 33 | severity: 'warning', |
34 | 34 | }, |
35 | 35 | annotations: { |
36 | | - description: 'Cluster has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.', |
| 36 | + description: 'Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Pods by {{ $value }} CPU shares and cannot tolerate node failure.', |
37 | 37 | summary: 'Cluster has overcommitted CPU resource requests.', |
38 | 38 | }, |
39 | 39 | 'for': '10m', |
40 | 40 | }, |
41 | 41 | { |
42 | 42 | alert: 'KubeMemoryOvercommit', |
43 | 43 | expr: ||| |
44 | | - sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 |
| 44 | + sum(namespace_memory:kube_pod_container_resource_requests:sum{%(ignoringOverprovisionedWorkloadSelector)s}) by (%(clusterLabel)s) - (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0 |
45 | 45 | and |
46 | | - (sum(kube_node_status_allocatable{resource="memory"}) - max(kube_node_status_allocatable{resource="memory"})) > 0 |
| 46 | + (sum(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s) - max(kube_node_status_allocatable{resource="memory"}) by (%(clusterLabel)s)) > 0 |
47 | 47 | ||| % $._config, |
48 | 48 | labels: { |
49 | 49 | severity: 'warning', |
50 | 50 | }, |
51 | 51 | annotations: { |
52 | | - description: 'Cluster has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.', |
| 52 | + description: 'Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Pods by {{ $value | humanize }} bytes and cannot tolerate node failure.', |
53 | 53 | summary: 'Cluster has overcommitted memory resource requests.', |
54 | 54 | }, |
55 | 55 | 'for': '10m', |
56 | 56 | }, |
57 | 57 | { |
58 | 58 | alert: 'KubeCPUQuotaOvercommit', |
59 | 59 | expr: ||| |
60 | | - sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(cpu|requests.cpu)"})) |
| 60 | + sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(cpu|requests.cpu)"})) by (%(clusterLabel)s) |
61 | 61 | / |
62 | | - sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) |
| 62 | + sum(kube_node_status_allocatable{resource="cpu", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) |
63 | 63 | > %(namespaceOvercommitFactor)s |
64 | 64 | ||| % $._config, |
65 | 65 | labels: { |
66 | 66 | severity: 'warning', |
67 | 67 | }, |
68 | 68 | annotations: { |
69 | | - description: 'Cluster has overcommitted CPU resource requests for Namespaces.', |
| 69 | + description: 'Cluster {{ $labels.cluster }} has overcommitted CPU resource requests for Namespaces.', |
70 | 70 | summary: 'Cluster has overcommitted CPU resource requests.', |
71 | 71 | }, |
72 | 72 | 'for': '5m', |
73 | 73 | }, |
74 | 74 | { |
75 | 75 | alert: 'KubeMemoryQuotaOvercommit', |
76 | 76 | expr: ||| |
77 | | - sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"})) |
| 77 | + sum(min without(resource) (kube_resourcequota{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, type="hard", resource=~"(memory|requests.memory)"})) by (%(clusterLabel)s) |
78 | 78 | / |
79 | | - sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) |
| 79 | + sum(kube_node_status_allocatable{resource="memory", %(kubeStateMetricsSelector)s}) by (%(clusterLabel)s) |
80 | 80 | > %(namespaceOvercommitFactor)s |
81 | 81 | ||| % $._config, |
82 | 82 | labels: { |
83 | 83 | severity: 'warning', |
84 | 84 | }, |
85 | 85 | annotations: { |
86 | | - description: 'Cluster has overcommitted memory resource requests for Namespaces.', |
| 86 | + description: 'Cluster {{ $labels.cluster }} has overcommitted memory resource requests for Namespaces.', |
87 | 87 | summary: 'Cluster has overcommitted memory resource requests.', |
88 | 88 | }, |
89 | 89 | 'for': '5m', |
|
0 commit comments