Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/publish-docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ jobs:
- name: Install dependencies
run: |
python -m pip install --upgrade pip
python -m pip install mkdocs-material==9.5.21 \
mkdocs-include-markdown-plugin==6.0.6 \
mkdocs-awesome-pages-plugin==2.9.2
python -m pip install mkdocs-material==9.6.19 \
mkdocs-include-markdown-plugin==7.1.7 \
mkdocs-awesome-pages-plugin==2.10.1

- name: git config
run: |
Expand Down
8 changes: 4 additions & 4 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -1,16 +1,16 @@
repos:
- repo: https://github.com/streetsidesoftware/cspell-cli
rev: v9.0.1
rev: v9.2.0
hooks:
- id: cspell
args: [--exclude, 'ADOPTERS.md', --exclude, '.pre-commit-config.yaml', --exclude, '.gitignore', --exclude, '*.drawio', --exclude, 'mkdocs.yml', --exclude, '.helmignore', --exclude, '.github/workflows/*', --exclude, 'patterns/istio-multi-cluster/*', --exclude, 'patterns/blue-green-upgrade/*', --exclude, '/patterns/vpc-lattice/cross-cluster-pod-communication/*', --exclude, 'patterns/bottlerocket/*', --exclude, 'patterns/nvidia-gpu-efa/generate-efa-nccl-test.sh']
- repo: https://github.com/macisamuele/language-formatters-pre-commit-hooks
rev: v2.14.0
rev: v2.15.0
hooks:
- id: pretty-format-yaml
args: [--autofix, --indent, '2', --offset, '2', --preserve-quotes]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v5.0.0
rev: v6.0.0
hooks:
- id: trailing-whitespace
- id: end-of-file-fixer
Expand All @@ -19,7 +19,7 @@ repos:
- id: detect-aws-credentials
args: [--allow-missing-credentials]
- repo: https://github.com/antonbabenko/pre-commit-terraform
rev: v1.99.4
rev: v1.100.0
hooks:
- id: terraform_fmt
- id: terraform_docs
Expand Down
1 change: 1 addition & 0 deletions mkdocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ markdown_extensions:
- codehilite
- footnotes
- md_in_html
- pymdownx.blocks.caption
- pymdownx.critic
- pymdownx.details
- pymdownx.highlight:
Expand Down
4 changes: 2 additions & 2 deletions patterns/aws-neuron-efa/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -19,13 +19,13 @@ The following components are demonstrated in this pattern:

### Cluster

```terraform hl_lines="35-37 49-95"
```terraform hl_lines="34-85"
{% include "../../patterns/aws-neuron-efa/eks.tf" %}
```

### Device Plugins

```terraform hl_lines="9-50"
```terraform hl_lines="31-68"
{% include "../../patterns/aws-neuron-efa/helm.tf" %}
```

Expand Down
30 changes: 10 additions & 20 deletions patterns/aws-neuron-efa/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -4,22 +4,17 @@

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 20.34"
version = "~> 21.0"

cluster_name = local.name
cluster_version = "1.32"
name = local.name
kubernetes_version = "1.33"

# Give the Terraform identity admin access to the cluster
# which will allow it to deploy resources into the cluster
enable_cluster_creator_admin_permissions = true
cluster_endpoint_public_access = true
endpoint_public_access = true

# These will become the default in the next major version of the module
bootstrap_self_managed_addons = false
enable_irsa = false
enable_security_groups_for_pods = false

cluster_addons = {
addons = {
coredns = {}
eks-node-monitoring-agent = {}
eks-pod-identity-agent = {
Expand All @@ -32,19 +27,9 @@ module "eks" {
}
}

# Add security group rules on the node group security group to
# allow EFA traffic
enable_efa_support = true

vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets

eks_managed_node_group_defaults = {
node_repair_config = {
enabled = true
}
}

eks_managed_node_groups = {
neuron-efa = {
# The EKS AL2023 Neuron AMI provides all of the necessary components
Expand All @@ -69,6 +54,10 @@ module "eks" {
}
]

node_repair_config = {
enabled = true
}

min_size = 2
max_size = 2
desired_size = 2
Expand All @@ -77,6 +66,7 @@ module "eks" {
# 1. Create a placement group to place the instances close to one another
# 2. Ignore subnets that reside in AZs that do not support the instance type
# 3. Expose all of the available EFA interfaces on the launch template
# 4. Add security group w/ rules to the node group to allow EFA traffic
enable_efa_support = true

labels = {
Expand Down
32 changes: 25 additions & 7 deletions patterns/aws-neuron-efa/helm.tf
Original file line number Diff line number Diff line change
@@ -1,5 +1,27 @@
data "aws_ecrpublic_authorization_token" "token" {
provider = aws.ecr
region = "us-east-1"
}

provider "helm" {
kubernetes = {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec = {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}

registries = [
{
url = "oci://public.ecr.aws/neuron"
username = data.aws_ecrpublic_authorization_token.token.user_name
password = data.aws_ecrpublic_authorization_token.token.password
}
]
}
}

################################################################################
Expand All @@ -10,15 +32,11 @@ resource "helm_release" "neuron" {
name = "neuron"
repository = "oci://public.ecr.aws/neuron"
chart = "neuron-helm-chart"
version = "1.1.1"
version = "1.2.0"
namespace = "neuron"
create_namespace = true
wait = false

# Public ECR
repository_username = data.aws_ecrpublic_authorization_token.token.user_name
repository_password = data.aws_ecrpublic_authorization_token.token.password

values = [
<<-EOT
nodeSelector:
Expand All @@ -33,7 +51,7 @@ resource "helm_release" "aws_efa_device_plugin" {
name = "aws-efa-k8s-device-plugin"
repository = "https://aws.github.io/eks-charts"
chart = "aws-efa-k8s-device-plugin"
version = "v0.5.7"
version = "v0.5.17"
namespace = "kube-system"
wait = false

Expand Down
29 changes: 4 additions & 25 deletions patterns/aws-neuron-efa/main.tf
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
terraform {
required_version = ">= 1.3"
required_version = ">= 1.5.7"

required_providers {
aws = {
source = "hashicorp/aws"
version = ">= 5.34, < 6.0"
version = ">= 6.9"
}
helm = {
source = "hashicorp/helm"
version = ">= 2.9, < 3.0"
version = ">= 3.0"
}
}

Expand All @@ -24,27 +24,6 @@ provider "aws" {
region = local.region
}

# This provider is required for Public ECR. Public ECR is only available in us-east-1
# If your region is same as us-east-1 then you can just use one aws provider
provider "aws" {
alias = "ecr"
region = "us-east-1"
}

provider "helm" {
kubernetes {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}

################################################################################
# Common data/locals
################################################################################
Expand Down Expand Up @@ -85,7 +64,7 @@ output "configure_kubectl" {

module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "~> 5.0"
version = "~> 6.0"

name = local.name
cidr = local.vpc_cidr
Expand Down
2 changes: 1 addition & 1 deletion patterns/ipv6-eks-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -97,4 +97,4 @@ module "vpc" {
}

tags = local.tags
}
}
2 changes: 1 addition & 1 deletion patterns/ml-capacity-block/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ This pattern demonstrates how to consume/utilize ML capacity block reservations

## Code

```terraform hl_lines="5-11 108-122"
```terraform hl_lines="5-11 98-112"
{% include "../../patterns/ml-capacity-block/eks.tf" %}
```

Expand Down
30 changes: 10 additions & 20 deletions patterns/ml-capacity-block/eks.tf
Original file line number Diff line number Diff line change
Expand Up @@ -16,22 +16,17 @@ variable "capacity_reservation_id" {

module "eks" {
source = "terraform-aws-modules/eks/aws"
version = "~> 20.34"
version = "~> 21.0"

cluster_name = local.name
cluster_version = "1.32"
name = local.name
kubernetes_version = "1.33"

# Give the Terraform identity admin access to the cluster
# which will allow it to deploy resources into the cluster
enable_cluster_creator_admin_permissions = true
cluster_endpoint_public_access = true
endpoint_public_access = true

# These will become the default in the next major version of the module
bootstrap_self_managed_addons = false
enable_irsa = false
enable_security_groups_for_pods = false

cluster_addons = {
addons = {
coredns = {}
eks-node-monitoring-agent = {}
eks-pod-identity-agent = {
Expand All @@ -44,19 +39,9 @@ module "eks" {
}
}

# Add security group rules on the node group security group to
# allow EFA traffic
enable_efa_support = true

vpc_id = module.vpc.vpc_id
subnet_ids = module.vpc.private_subnets

eks_managed_node_group_defaults = {
node_repair_config = {
enabled = true
}
}

eks_managed_node_groups = {
cbr = {
# The EKS AL2023 NVIDIA AMI provides all of the necessary components
Expand All @@ -81,6 +66,10 @@ module "eks" {
}
]

node_repair_config = {
enabled = true
}

min_size = 2
max_size = 2
desired_size = 2
Expand All @@ -89,6 +78,7 @@ module "eks" {
# 1. Create a placement group to place the instances close to one another
# 2. Ignore subnets that reside in AZs that do not support the instance type
# 3. Expose all of the available EFA interfaces on the launch template
# 4. Add security group w/ rules to the node group to allow EFA traffic
enable_efa_support = true

labels = {
Expand Down
18 changes: 16 additions & 2 deletions patterns/ml-capacity-block/helm.tf
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
provider "helm" {
kubernetes = {
host = module.eks.cluster_endpoint
cluster_ca_certificate = base64decode(module.eks.cluster_certificate_authority_data)

exec = {
api_version = "client.authentication.k8s.io/v1beta1"
command = "aws"
# This requires the awscli to be installed locally where Terraform is executed
args = ["eks", "get-token", "--cluster-name", module.eks.cluster_name]
}
}
}

################################################################################
# Helm charts
################################################################################
Expand All @@ -6,7 +20,7 @@ resource "helm_release" "nvidia_device_plugin" {
name = "nvidia-device-plugin"
repository = "https://nvidia.github.io/k8s-device-plugin"
chart = "nvidia-device-plugin"
version = "0.17.1"
version = "0.17.4"
namespace = "nvidia-device-plugin"
create_namespace = true
wait = false
Expand All @@ -16,7 +30,7 @@ resource "helm_release" "aws_efa_device_plugin" {
name = "aws-efa-k8s-device-plugin"
repository = "https://aws.github.io/eks-charts"
chart = "aws-efa-k8s-device-plugin"
version = "v0.5.7"
version = "v0.5.17"
namespace = "kube-system"
wait = false

Expand Down
Loading
Loading