|
27 | 27 | },
|
28 | 28 | {
|
29 | 29 | // We wrap kube_pod_owner with the topk() aggregator to ensure that
|
30 |
| - // every (namespace, pod) tuple is unique even if the "owner_kind" |
| 30 | + // every (namespace, pod, %(clusterLabel)s) tuple is unique even if the "owner_kind" |
31 | 31 | // label exists for 2 values. This avoids "many-to-many matching
|
32 | 32 | // not allowed" errors when joining with kube_pod_status_phase.
|
33 | 33 | expr: |||
|
34 |
| - sum by (namespace, pod) ( |
35 |
| - max by(namespace, pod) ( |
| 34 | + sum by (namespace, pod, %(clusterLabel)s) ( |
| 35 | + max by(namespace, pod, %(clusterLabel)s) ( |
36 | 36 | kube_pod_status_phase{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s, phase=~"Pending|Unknown"}
|
37 |
| - ) * on(namespace, pod) group_left(owner_kind) topk by(namespace, pod) ( |
38 |
| - 1, max by(namespace, pod, owner_kind) (kube_pod_owner{owner_kind!="Job"}) |
| 37 | + ) * on(namespace, pod, %(clusterLabel)s) group_left(owner_kind) topk by(namespace, pod, %(clusterLabel)s) ( |
| 38 | + 1, max by(namespace, pod, owner_kind, %(clusterLabel)s) (kube_pod_owner{owner_kind!="Job"}) |
39 | 39 | )
|
40 | 40 | ) > 0
|
41 | 41 | ||| % $._config,
|
|
193 | 193 | },
|
194 | 194 | {
|
195 | 195 | expr: |||
|
196 |
| - sum by (namespace, pod, container) (kube_pod_container_status_waiting_reason{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}) > 0 |
| 196 | + sum by (namespace, pod, container, %(clusterLabel)s) (kube_pod_container_status_waiting_reason{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s}) > 0 |
197 | 197 | ||| % $._config,
|
198 | 198 | labels: {
|
199 | 199 | severity: 'warning',
|
|
238 | 238 | {
|
239 | 239 | alert: 'KubeJobNotCompleted',
|
240 | 240 | expr: |||
|
241 |
| - time() - max by(namespace, job_name) (kube_job_status_start_time{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} |
| 241 | + time() - max by(namespace, job_name, %(clusterLabel)s) (kube_job_status_start_time{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} |
242 | 242 | and
|
243 | 243 | kube_job_status_active{%(prefixedNamespaceSelector)s%(kubeStateMetricsSelector)s} > 0) > %(kubeJobTimeoutDuration)s
|
244 | 244 | ||| % $._config,
|
|
0 commit comments