@@ -16,10 +16,10 @@ variable "capacity_reservation_id" {
1616
1717module "eks" {
1818 source = " terraform-aws-modules/eks/aws"
19- version = " ~> 20.17 "
19+ version = " ~> 20.26 "
2020
2121 cluster_name = local. name
22- cluster_version = " 1.30 "
22+ cluster_version = " 1.31 "
2323
2424 # Give the Terraform identity admin access to the cluster
2525 # which will allow it to deploy resources into the cluster
@@ -30,7 +30,9 @@ module "eks" {
3030 coredns = {}
3131 eks-pod-identity-agent = {}
3232 kube-proxy = {}
33- vpc-cni = {}
33+ vpc-cni = {
34+ most_recent = true
35+ }
3436 }
3537
3638 # Add security group rules on the node group security group to
@@ -42,16 +44,27 @@ module "eks" {
4244
4345 eks_managed_node_groups = {
4446 cbr = {
45- # The EKS AL2 GPU AMI provides all of the necessary components
47+ # The EKS AL2023 NVIDIA AMI provides all of the necessary components
4648 # for accelerated workloads w/ EFA
47- ami_type = " AL2_x86_64_GPU"
48- instance_types = [" p5.48xlarge" ]
49-
50- pre_bootstrap_user_data = <<- EOT
51- # Mount instance store volumes in RAID-0 for kubelet and containerd
52- # https://github.com/awslabs/amazon-eks-ami/blob/master/doc/USER_GUIDE.md#raid-0-for-kubelet-and-containerd-raid0
53- /bin/setup-local-disks raid0
54- EOT
49+ ami_type = " AL2023_x86_64_NVIDIA"
50+ instance_types = [" p5e.48xlarge" ]
51+
52+ # Mount instance store volumes in RAID-0 for kubelet and containerd
53+ # https://github.com/awslabs/amazon-eks-ami/blob/master/doc/USER_GUIDE.md#raid-0-for-kubelet-and-containerd-raid0
54+ cloudinit_pre_nodeadm = [
55+ {
56+ content_type = " application/node.eks.aws"
57+ content = <<- EOT
58+ ---
59+ apiVersion: node.eks.aws/v1alpha1
60+ kind: NodeConfig
61+ spec:
62+ instance:
63+ localStorage:
64+ strategy: RAID0
65+ EOT
66+ }
67+ ]
5568
5669 min_size = 2
5770 max_size = 2
@@ -97,7 +110,7 @@ module "eks" {
97110 default = {
98111 instance_types = [" m5.large" ]
99112
100- min_size = 1
113+ min_size = 2
101114 max_size = 2
102115 desired_size = 2
103116 }
@@ -109,21 +122,31 @@ module "eks" {
109122 # the one that works for their use case.
110123 self_managed_node_groups = {
111124 cbr2 = {
112- # The EKS AL2 GPU AMI provides all of the necessary components
125+ # The EKS AL2023 NVIDIA AMI provides all of the necessary components
113126 # for accelerated workloads w/ EFA
114- ami_type = " AL2_x86_64_GPU"
115- instance_type = " p5.48xlarge"
116-
117- pre_bootstrap_user_data = <<- EOT
118- # Mount instance store volumes in RAID-0 for kubelet and containerd
119- # https://github.com/awslabs/amazon-eks-ami/blob/master/doc/USER_GUIDE.md#raid-0-for-kubelet-and-containerd-raid0
120- /bin/setup-local-disks raid0
121-
122- # Ensure only GPU workloads are scheduled on this node group
123- export KUBELET_EXTRA_ARGS='--node-labels=vpc.amazonaws.com/efa.present=true,nvidia.com/gpu.present=true \
124- --register-with-taints=nvidia.com/gpu=true:NoSchedule'
125-
126- EOT
127+ ami_type = " AL2023_x86_64_NVIDIA"
128+ instance_type = " p5e.48xlarge"
129+
130+ # Mount instance store volumes in RAID-0 for kubelet and containerd
131+ # https://github.com/awslabs/amazon-eks-ami/blob/master/doc/USER_GUIDE.md#raid-0-for-kubelet-and-containerd-raid0
132+ cloudinit_pre_nodeadm = [
133+ {
134+ content_type = " application/node.eks.aws"
135+ content = <<- EOT
136+ ---
137+ apiVersion: node.eks.aws/v1alpha1
138+ kind: NodeConfig
139+ spec:
140+ instance:
141+ localStorage:
142+ strategy: RAID0
143+ kubelet:
144+ flags:
145+ - --node-labels=vpc.amazonaws.com/efa.present=true,nvidia.com/gpu.present=true
146+ - --register-with-taints=nvidia.com/gpu=true:NoSchedule
147+ EOT
148+ }
149+ ]
127150
128151 min_size = 2
129152 max_size = 2
0 commit comments