|
1 | 1 | (import '../addons/managed-cluster.libsonnet') + { |
2 | 2 | values+:: { |
3 | | - eks: { |
4 | | - minimumAvailableIPs: 10, |
5 | | - minimumAvailableIPsTime: '10m', |
| 3 | + awsVpcCni: { |
| 4 | + // `minimumWarmIPs` should be inferior or equal to `WARM_IP_TARGET`. |
| 5 | + // |
| 6 | + // References: |
| 7 | + // https://github.com/aws/amazon-vpc-cni-k8s/blob/v1.9.0/docs/eni-and-ip-target.md |
| 8 | + // https://github.com/aws/amazon-vpc-cni-k8s/blob/v1.9.0/pkg/ipamd/ipamd.go#L61-L71 |
| 9 | + minimumWarmIPs: 10, |
| 10 | + minimumWarmIPsTime: '10m', |
6 | 11 | }, |
7 | 12 | }, |
8 | 13 | kubernetesControlPlane+: { |
|
17 | 22 | ], |
18 | 23 | }, |
19 | 24 | }, |
20 | | - AwsEksCniMetricService: { |
| 25 | + |
| 26 | + serviceAwsVpcCniMetrics: { |
21 | 27 | apiVersion: 'v1', |
22 | 28 | kind: 'Service', |
23 | 29 | metadata: { |
|
38 | 44 | }, |
39 | 45 | }, |
40 | 46 |
|
41 | | - serviceMonitorAwsEksCNI: { |
| 47 | + serviceMonitorAwsVpcCni: { |
42 | 48 | apiVersion: 'monitoring.coreos.com/v1', |
43 | 49 | kind: 'ServiceMonitor', |
44 | 50 | metadata: { |
45 | | - name: 'awsekscni', |
| 51 | + name: 'aws-node', |
46 | 52 | namespace: $.values.common.namespace, |
47 | 53 | labels: { |
48 | | - 'app.kubernetes.io/name': 'eks-cni', |
| 54 | + 'app.kubernetes.io/name': 'aws-node', |
49 | 55 | }, |
50 | 56 | }, |
51 | 57 | spec: { |
|
78 | 84 | ], |
79 | 85 | }, |
80 | 86 | }, |
81 | | - prometheusRuleEksCNI: { |
| 87 | + |
| 88 | + prometheusRuleAwsVpcCni: { |
82 | 89 | apiVersion: 'monitoring.coreos.com/v1', |
83 | 90 | kind: 'PrometheusRule', |
84 | 91 | metadata: { |
85 | 92 | labels: $.prometheus._config.commonLabels + $.prometheus._config.mixin.ruleLabels, |
86 | | - name: 'eks-rules', |
| 93 | + name: 'aws-vpc-cni-rules', |
87 | 94 | namespace: $.prometheus._config.namespace, |
88 | 95 | }, |
89 | 96 | spec: { |
90 | 97 | groups: [ |
91 | 98 | { |
92 | | - name: 'kube-prometheus-eks.rules', |
| 99 | + name: 'kube-prometheus-aws-vpc-cni.rules', |
93 | 100 | rules: [ |
94 | 101 | { |
95 | | - expr: 'sum by(instance) (awscni_ip_max) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $.values.eks.minimumAvailableIPs, |
| 102 | + expr: 'sum by(instance) (awscni_total_ip_addresses) - sum by(instance) (awscni_assigned_ip_addresses) < %s' % $.values.awsVpcCni.minimumWarmIPs, |
96 | 103 | labels: { |
97 | 104 | severity: 'critical', |
98 | 105 | }, |
99 | 106 | annotations: { |
100 | | - summary: 'EKS CNI is running low on available IPs', |
101 | | - description: 'Instance {{ $labels.instance }} has only {{ $value }} IPs available which is lower than set threshold of %s' % $.values.eks.minimumAvailableIPs, |
| 107 | + summary: 'AWS VPC CNI has a low warm IP pool', |
| 108 | + description: ||| |
| 109 | + Instance {{ $labels.instance }} has only {{ $value }} warm IPs which is lower than set threshold of %s. |
| 110 | + It could mean the current subnet is out of available IP addresses or the CNI is unable to request them from the EC2 API. |
| 111 | + ||| % $.values.awsVpcCni.minimumWarmIPs, |
102 | 112 | }, |
103 | | - 'for': $.values.eks.minimumAvailableIPsTime, |
104 | | - alert: 'EksCNILowAvailableIPs', |
| 113 | + 'for': $.values.awsVpcCni.minimumWarmIPsTime, |
| 114 | + alert: 'AwsVpcCniWarmIPsLow', |
105 | 115 | }, |
106 | 116 | ], |
107 | 117 | }, |
|
0 commit comments