@@ -1448,7 +1448,7 @@ tests:
14481448 runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
14491449 summary : Cluster has overcommitted CPU resource requests.
14501450
1451- - name : KubeCPUOvercommit alert (multi-node)
1451+ - name : KubeCPUOvercommit alert (multi-node; non-HA )
14521452 interval : 1m
14531453 input_series :
14541454 - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
@@ -1459,9 +1459,9 @@ tests:
14591459 values : ' 1.9x10' # This value was seen on a 2x vCPU node
14601460 - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="cpu", job="kube-state-metrics"}'
14611461 values : ' 1.9x10'
1462- - series : ' kube_node_role{ cluster="kubernetes", node="n1", role="control-plane"}'
1462+ - series : ' kube_node_role{cluster="kubernetes", node="n1", role="control-plane", job="kube-state-metrics "}'
14631463 values : ' 1x10'
1464- - series : ' kube_node_role{ cluster="kubernetes", node="n2", role="control-plane"}'
1464+ - series : ' kube_node_role{cluster="kubernetes", node="n2", role="control-plane", job="kube-state-metrics "}'
14651465 values : ' 1x10'
14661466 alert_rule_test :
14671467 - eval_time : 9m
@@ -1472,10 +1472,42 @@ tests:
14721472 - exp_labels :
14731473 severity : warning
14741474 exp_annotations :
1475- description : Cluster has overcommitted CPU resource requests for Pods by 2.1 CPU shares and cannot tolerate node failure.
1475+ description : Cluster has overcommitted CPU resource requests for Pods by 0.20000000000000018 CPU shares and cannot tolerate node failure.
14761476 runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
14771477 summary : Cluster has overcommitted CPU resource requests.
14781478
1479+ - name : KubeCPUOvercommit alert (multi-node; HA)
1480+ interval : 1m
1481+ input_series :
1482+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1483+ values : ' 2x10'
1484+ - series : ' namespace_cpu:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1485+ values : ' 2x10'
1486+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="cpu", job="kube-state-metrics"}'
1487+ values : ' 1.9x10' # This value was seen on a 2x vCPU node
1488+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="cpu", job="kube-state-metrics"}'
1489+ values : ' 1.9x10'
1490+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n3", resource="cpu", job="kube-state-metrics"}'
1491+ values : ' 1.9x10'
1492+ - series : ' kube_node_role{cluster="kubernetes", node="n1", role="control-plane", job="kube-state-metrics"}'
1493+ values : ' 1x10'
1494+ - series : ' kube_node_role{cluster="kubernetes", node="n2", role="control-plane", job="kube-state-metrics"}'
1495+ values : ' 1x10'
1496+ - series : ' kube_node_role{cluster="kubernetes", node="n3", role="control-plane", job="kube-state-metrics"}'
1497+ values : ' 1x10'
1498+ alert_rule_test :
1499+ - eval_time : 9m
1500+ alertname : KubeCPUOvercommit
1501+ - eval_time : 10m
1502+ alertname : KubeCPUOvercommit
1503+ exp_alerts :
1504+ - exp_labels :
1505+ severity : warning
1506+ exp_annotations :
1507+ description : Cluster has overcommitted CPU resource requests for Pods by 0.20000000000000062 CPU shares and cannot tolerate node failure.
1508+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubecpuovercommit
1509+ summary : Cluster has overcommitted CPU resource requests.
1510+
14791511- name : KubeMemoryOvercommit alert (single-node)
14801512 interval : 1m
14811513 input_series :
@@ -1500,7 +1532,7 @@ tests:
15001532 runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
15011533 summary : Cluster has overcommitted memory resource requests.
15021534
1503- - name : KubeMemoryOvercommit alert (multi-node)
1535+ - name : KubeMemoryOvercommit alert (multi-node; non-HA )
15041536 interval : 1m
15051537 input_series :
15061538 - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
@@ -1527,3 +1559,35 @@ tests:
15271559 description : Cluster has overcommitted memory resource requests for Pods by 2G bytes and cannot tolerate node failure.
15281560 runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
15291561 summary : Cluster has overcommitted memory resource requests.
1562+
1563+ - name : KubeMemoryOvercommit alert (multi-node; HA)
1564+ interval : 1m
1565+ input_series :
1566+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="default"}'
1567+ values : ' 2000000000x10' # 2 GB
1568+ - series : ' namespace_memory:kube_pod_container_resource_requests:sum{cluster="kubernetes", namespace="kube-system"}'
1569+ values : ' 2000000000x10'
1570+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n1", resource="memory", job="kube-state-metrics"}'
1571+ values : ' 1000000000x10'
1572+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n2", resource="memory", job="kube-state-metrics"}'
1573+ values : ' 1000000000x10'
1574+ - series : ' kube_node_status_allocatable{cluster="kubernetes", node="n3", resource="memory", job="kube-state-metrics"}'
1575+ values : ' 1000000000x10'
1576+ - series : ' kube_node_role{cluster="kubernetes", node="n1", role="control-plane", job="kube-state-metrics"}'
1577+ values : ' 1x10'
1578+ - series : ' kube_node_role{cluster="kubernetes", node="n2", role="control-plane", job="kube-state-metrics"}'
1579+ values : ' 1x10'
1580+ - series : ' kube_node_role{cluster="kubernetes", node="n3", role="control-plane", job="kube-state-metrics"}'
1581+ values : ' 1x10'
1582+ alert_rule_test :
1583+ - eval_time : 9m
1584+ alertname : KubeMemoryOvercommit
1585+ - eval_time : 10m
1586+ alertname : KubeMemoryOvercommit
1587+ exp_alerts :
1588+ - exp_labels :
1589+ severity : warning
1590+ exp_annotations :
1591+ description : Cluster has overcommitted memory resource requests for Pods by 2G bytes and cannot tolerate node failure.
1592+ runbook_url : https://github.com/kubernetes-monitoring/kubernetes-mixin/tree/master/runbook.md#alert-name-kubememoryovercommit
1593+ summary : Cluster has overcommitted memory resource requests.
0 commit comments