Skip to content

Commit b7fe018

Browse files
committed
eks: Revert back to awscni_total_ip_addresses-based alert
1 parent b9c73c7 commit b7fe018

File tree

1 file changed

+25
-15
lines changed

1 file changed

+25
-15
lines changed

jsonnet/kube-prometheus/platforms/eks.libsonnet

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,13 @@
11
(import '../addons/managed-cluster.libsonnet') + {
22
values+:: {
3-
eks: {
4-
minimumAvailableIPs: 10,
5-
minimumAvailableIPsTime: '10m',
3+
awsVpcCni: {
4+
// `minimumWarmIPs` should be inferior or equal to `WARM_IP_TARGET`.
5+
//
6+
// References:
7+
// https://github.com/aws/amazon-vpc-cni-k8s/blob/v1.9.0/docs/eni-and-ip-target.md
8+
// https://github.com/aws/amazon-vpc-cni-k8s/blob/v1.9.0/pkg/ipamd/ipamd.go#L61-L71
9+
minimumWarmIPs: 10,
10+
minimumWarmIPsTime: '10m',
611
},
712
},
813
kubernetesControlPlane+: {
@@ -17,7 +22,8 @@
1722
],
1823
},
1924
},
20-
AwsEksCniMetricService: {
25+
26+
serviceAwsVpcCniMetrics: {
2127
apiVersion: 'v1',
2228
kind: 'Service',
2329
metadata: {
@@ -38,14 +44,14 @@
3844
},
3945
},
4046

41-
serviceMonitorAwsEksCNI: {
47+
serviceMonitorAwsVpcCni: {
4248
apiVersion: 'monitoring.coreos.com/v1',
4349
kind: 'ServiceMonitor',
4450
metadata: {
45-
name: 'awsekscni',
51+
name: 'aws-node',
4652
namespace: $.values.common.namespace,
4753
labels: {
48-
'app.kubernetes.io/name': 'eks-cni',
54+
'app.kubernetes.io/name': 'aws-node',
4955
},
5056
},
5157
spec: {
@@ -78,30 +84,34 @@
7884
],
7985
},
8086
},
81-
prometheusRuleEksCNI: {
87+
88+
prometheusRuleAwsVpcCni: {
8289
apiVersion: 'monitoring.coreos.com/v1',
8390
kind: 'PrometheusRule',
8491
metadata: {
8592
labels: $.prometheus._config.commonLabels + $.prometheus._config.mixin.ruleLabels,
86-
name: 'eks-rules',
93+
name: 'aws-vpc-cni-rules',
8794
namespace: $.prometheus._config.namespace,
8895
},
8996
spec: {
9097
groups: [
9198
{
92-
name: 'kube-prometheus-eks.rules',
99+
name: 'kube-prometheus-aws-vpc-cni.rules',
93100
rules: [
94101
{
95-
expr: 'sum by(instance) (awscni_ip_max) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $.values.eks.minimumAvailableIPs,
102+
expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $.values.awsVpcCni.minimumWarmIPs,
96103
labels: {
97104
severity: 'critical',
98105
},
99106
annotations: {
100-
summary: 'EKS CNI is running low on available IPs',
101-
description: 'Instance {{ $labels.instance }} has only {{ $value }} IPs available which is lower than set threshold of %s' % $.values.eks.minimumAvailableIPs,
107+
summary: 'AWS VPC CNI has a low warm IP pool',
108+
description: |||
109+
Instance {{ $labels.instance }} has only {{ $value }} warm IPs which is lower than set threshold of %s.
110+
It could mean the current subnet is out of available IP addresses or the CNI is unable to request them from the EC2 API.
111+
||| % $.values.awsVpcCni.minimumWarmIPs,
102112
},
103-
'for': $.values.eks.minimumAvailableIPsTime,
104-
alert: 'EksCNILowAvailableIPs',
113+
'for': $.values.awsVpcCni.minimumWarmIPsTime,
114+
alert: 'AwsVpcCniWarmIPsLow',
105115
},
106116
],
107117
},

0 commit comments

Comments
 (0)