@@ -1424,3 +1424,106 @@ tests:
14241424 runbook_url : " https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubestatefulsetreplicasmismatch"
14251425 summary : " StatefulSet has not matched the expected number of replicas."
14261426
1427+ - name : KubeCPUOvercommit alert (single-node)
1428+ - interval : 1m
1429+ input_series :
1430+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1431+ values : ' 1x10'
1432+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1433+ values : ' 1x10'
1434+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}'
1435+ values : ' 1.9x10' # This value was seen on a 2x vCPU node
1436+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1437+ values : ' 1x10'
1438+ alert_rule_test :
1439+ - eval_time : 9m
1440+ alertname : KubeCPUOvercommit
1441+ - eval_time : 10m
1442+ alertname : KubeCPUOvercommit
1443+ exp_alerts :
1444+ - exp_labels :
1445+ severity : warning
1446+ exp_annotations :
1447+ description : Cluster has overcommitted CPU resource requests for Pods by 0.385 CPU shares and cannot tolerate node failure.
1448+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
1449+ summary : Cluster has overcommitted CPU resource requests.
1450+
1451+ - name : KubeCPUOvercommit alert (multi-node)
1452+ - interval : 1m
1453+ input_series :
1454+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1455+ values : ' 2x10'
1456+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1457+ values : ' 2x10'
1458+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}'
1459+ values : ' 1.9x10' # This value was seen on a 2x vCPU node
1460+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="cpu", job="kube-state-metrics"}'
1461+ values : ' 1.9x10'
1462+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1463+ values : ' 1x10'
1464+ - series : ' kube_node_info{cluster="kubernetes", node="n2", job="kube-state-metrics"}'
1465+ values : ' 1x10'
1466+ alert_rule_test :
1467+ - eval_time : 9m
1468+ alertname : KubeCPUOvercommit
1469+ - eval_time : 10m
1470+ alertname : KubeCPUOvercommit
1471+ exp_alerts :
1472+ - exp_labels :
1473+ severity : warning
1474+ exp_annotations :
1475+ description : Cluster has overcommitted CPU resource requests for Pods by 2.1 CPU shares and cannot tolerate node failure.
1476+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
1477+ summary : Cluster has overcommitted CPU resource requests.
1478+
1479+ - name : KubeMemoryOvercommit alert (single-node)
1480+ - interval : 1m
1481+ input_series :
1482+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1483+ values : ' 1000000000x10' # 1 GB
1484+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1485+ values : ' 1000000000x10'
1486+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}'
1487+ values : ' 1000000000x10'
1488+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1489+ values : ' 1x10'
1490+ alert_rule_test :
1491+ - eval_time : 9m
1492+ alertname : KubeMemoryOvercommit
1493+ - eval_time : 10m
1494+ alertname : KubeMemoryOvercommit
1495+ exp_alerts :
1496+ - exp_labels :
1497+ severity : warning
1498+ exp_annotations :
1499+ description : Cluster has overcommitted memory resource requests for Pods by 1.15G bytes and cannot tolerate node failure.
1500+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
1501+ summary : Cluster has overcommitted memory resource requests.
1502+
1503+ - name : KubeMemoryOvercommit alert (multi-node)
1504+ - interval : 1m
1505+ input_series :
1506+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1507+ values : ' 2000000000x10' # 2 GB
1508+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1509+ values : ' 2000000000x10'
1510+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}'
1511+ values : ' 1000000000x10'
1512+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="memory", job="kube-state-metrics"}'
1513+ values : ' 1000000000x10'
1514+ - series : ' kube_node_info{cluster="kubernetes", node="n1", job="kube-state-metrics"}'
1515+ values : ' 1x10'
1516+ - series : ' kube_node_info{cluster="kubernetes", node="n2", job="kube-state-metrics"}'
1517+ values : ' 1x10'
1518+ alert_rule_test :
1519+ - eval_time : 9m
1520+ alertname : KubeMemoryOvercommit
1521+ - eval_time : 10m
1522+ alertname : KubeMemoryOvercommit
1523+ exp_alerts :
1524+ - exp_labels :
1525+ severity : warning
1526+ exp_annotations :
1527+ description : Cluster has overcommitted memory resource requests for Pods by 3G bytes and cannot tolerate node failure.
1528+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
1529+ summary : Cluster has overcommitted memory resource requests.
0 commit comments