diff --git a/README.md b/README.md index a7ccdea..0e46960 100644 --- a/README.md +++ b/README.md @@ -327,7 +327,71 @@ Module usage with two unmanaged worker groups: > you're using. This practice ensures the stability of your infrastructure. Additionally, we recommend implementing a systematic > approach for updating versions to avoid unexpected changes. +## EKS Auto Mode +This module supports [EKS Auto Mode](https://docs.aws.amazon.com/eks/latest/userguide/automode.html) (GA December 2024), +which delegates compute, networking, and storage management to AWS. Enable it using the `auto_mode_compute_config`, +`auto_mode_storage_config`, and `auto_mode_elastic_load_balancing` variables. + +### Enabling Auto Mode + +```hcl +module "eks_cluster" { + source = "cloudposse/eks-cluster/aws" + # version = "..." + + auto_mode_compute_config = { + enabled = true + node_pools = ["general-purpose", "system"] + node_role_arn = aws_iam_role.auto_mode_node.arn + } + + auto_mode_storage_config = { + block_storage = { + enabled = true + } + } + + auto_mode_elastic_load_balancing = { + enabled = true + } + + # ... other configuration +} +``` + +When Auto Mode is enabled, this module automatically: +- Sets `bootstrap_self_managed_addons = false` (unless explicitly overridden) +- Adds `sts:TagSession` to the cluster IAM role trust policy +- Attaches 4 additional IAM policies to the cluster role: `AmazonEKSComputePolicy`, `AmazonEKSBlockStoragePolicy`, + `AmazonEKSLoadBalancingPolicy`, and `AmazonEKSNetworkingPolicy` + +### Auto Mode Managed Add-ons + +When Auto Mode is enabled, AWS manages the following add-ons automatically: + +| Add-on | Variable | What AWS Manages | +|--------|----------|-----------------| +| **Compute** | `auto_mode_compute_config` | Node provisioning via managed Karpenter | +| **Storage** | `auto_mode_storage_config` | EBS volumes via `ebs.csi.eks.amazonaws.com` | +| **Networking** | `auto_mode_elastic_load_balancing` | ALB/NLB for Services and Ingress | + +### Important Notes + +- Requires AWS provider `>= 5.79.0` and Kubernetes `>= 1.29` +- Auto Mode manages `vpc-cni`, `kube-proxy`, `coredns`, and `aws-ebs-csi-driver` add-ons automatically. + Do not include these in the `addons` variable when Auto Mode is enabled. +- Auto Mode nodes are Bottlerocket-only, immutable, with no SSH/IMDS access +- Nodes have a 21-day maximum lifetime and are automatically rotated +- The `node_role_arn` in `auto_mode_compute_config` must be an IAM role with + `AmazonEKSWorkerNodeMinimalPolicy` and `AmazonEC2ContainerRegistryPullOnly` attached + +### Cluster Version Upgrades + +With Auto Mode, Kubernetes version upgrades are simplified: +1. Bump `kubernetes_version` and apply -- control plane upgrades in place +2. Managed Karpenter detects version drift and automatically replaces nodes +3. Auto Mode-managed add-ons are automatically upgraded to compatible versions diff --git a/README.yaml b/README.yaml index 975e4b9..e336ab9 100644 --- a/README.yaml +++ b/README.yaml @@ -328,5 +328,71 @@ usage: |- > many issues you may read about that had affected prior versions. See the version 2 README and release notes > for more information on the challenges and workarounds that were required prior to v3. + ## EKS Auto Mode + + This module supports [EKS Auto Mode](https://docs.aws.amazon.com/eks/latest/userguide/automode.html) (GA December 2024), + which delegates compute, networking, and storage management to AWS. Enable it using the `auto_mode_compute_config`, + `auto_mode_storage_config`, and `auto_mode_elastic_load_balancing` variables. + + ### Enabling Auto Mode + + ```hcl + module "eks_cluster" { + source = "cloudposse/eks-cluster/aws" + # version = "..." + + auto_mode_compute_config = { + enabled = true + node_pools = ["general-purpose", "system"] + node_role_arn = aws_iam_role.auto_mode_node.arn + } + + auto_mode_storage_config = { + block_storage = { + enabled = true + } + } + + auto_mode_elastic_load_balancing = { + enabled = true + } + + # ... other configuration + } + ``` + + When Auto Mode is enabled, this module automatically: + - Sets `bootstrap_self_managed_addons = false` (unless explicitly overridden) + - Adds `sts:TagSession` to the cluster IAM role trust policy + - Attaches 4 additional IAM policies to the cluster role: `AmazonEKSComputePolicy`, `AmazonEKSBlockStoragePolicy`, + `AmazonEKSLoadBalancingPolicy`, and `AmazonEKSNetworkingPolicy` + + ### Auto Mode Managed Add-ons + + When Auto Mode is enabled, AWS manages the following add-ons automatically: + + | Add-on | Variable | What AWS Manages | + |--------|----------|-----------------| + | **Compute** | `auto_mode_compute_config` | Node provisioning via managed Karpenter | + | **Storage** | `auto_mode_storage_config` | EBS volumes via `ebs.csi.eks.amazonaws.com` | + | **Networking** | `auto_mode_elastic_load_balancing` | ALB/NLB for Services and Ingress | + + ### Important Notes + + - Requires AWS provider `>= 5.79.0` and Kubernetes `>= 1.29` + - Auto Mode manages `vpc-cni`, `kube-proxy`, `coredns`, and `aws-ebs-csi-driver` add-ons automatically. + Do not include these in the `addons` variable when Auto Mode is enabled. + - Auto Mode nodes are Bottlerocket-only, immutable, with no SSH/IMDS access + - Nodes have a 21-day maximum lifetime and are automatically rotated + - The `node_role_arn` in `auto_mode_compute_config` must be an IAM role with + `AmazonEKSWorkerNodeMinimalPolicy` and `AmazonEC2ContainerRegistryPullOnly` attached + + ### Cluster Version Upgrades + + With Auto Mode, Kubernetes version upgrades are simplified: + 1. Bump `kubernetes_version` and apply -- control plane upgrades in place + 2. Managed Karpenter detects version drift and automatically replaces nodes + 3. Auto Mode-managed add-ons are automatically upgraded to compatible versions + include: [] contributors: [] diff --git a/capabilities.tf b/capabilities.tf new file mode 100644 index 0000000..70118e4 --- /dev/null +++ b/capabilities.tf @@ -0,0 +1,126 @@ +# EKS Capabilities: Argo CD, ACK, KRO +# https://docs.aws.amazon.com/eks/latest/userguide/capabilities.html + +locals { + # Use toset of keys to ensure for_each keys are always known at plan time. + # The map keys come from var.capabilities which is a static configuration. + enabled_capability_keys = toset([ + for k, v in var.capabilities : k if local.enabled && v.enabled + ]) + + # Keys of capabilities that need auto-created IAM roles. + # Uses create_iam_role (a static bool) instead of role_arn == null + # to ensure for_each keys are always known at plan time. + capability_keys_needing_roles = toset([ + for k, v in var.capabilities : k if local.enabled && v.enabled && v.create_iam_role + ]) + + # Final role ARN map: auto-created or user-provided + capability_role_arns = { + for k in local.enabled_capability_keys : k => ( + var.capabilities[k].create_iam_role ? aws_iam_role.capability[k].arn : var.capabilities[k].role_arn + ) + } +} + +# IAM roles for capabilities that don't provide their own +module "capability_label" { + for_each = local.capability_keys_needing_roles + + source = "cloudposse/label/null" + version = "0.25.0" + + attributes = ["capability", each.key] + context = module.this.context +} + +data "aws_iam_policy_document" "capability_assume_role" { + count = length(local.capability_keys_needing_roles) > 0 ? 1 : 0 + + statement { + effect = "Allow" + actions = ["sts:AssumeRole", "sts:TagSession"] + + principals { + type = "Service" + identifiers = ["capabilities.eks.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "capability" { + for_each = local.capability_keys_needing_roles + + name = module.capability_label[each.key].id + assume_role_policy = one(data.aws_iam_policy_document.capability_assume_role[*].json) + tags = module.capability_label[each.key].tags + permissions_boundary = var.permissions_boundary +} + +resource "aws_eks_capability" "default" { + for_each = local.enabled_capability_keys + + cluster_name = local.eks_cluster_id + capability_name = each.value + type = var.capabilities[each.value].type + role_arn = local.capability_role_arns[each.value] + delete_propagation_policy = var.capabilities[each.value].delete_propagation_policy + tags = module.label.tags + + dynamic "configuration" { + # The AWS API requires configuration with argo_cd and aws_idc for ARGOCD capabilities. + # Skip the entire configuration block if aws_idc is not provided -- the capability + # cannot be created without it. Provide aws_idc in your stack config to enable. + for_each = ( + var.capabilities[each.value].type == "ARGOCD" && + var.capabilities[each.value].configuration != null && + try(var.capabilities[each.value].configuration.argo_cd.aws_idc, null) != null + ) ? [var.capabilities[each.value].configuration] : [] + content { + dynamic "argo_cd" { + for_each = configuration.value.argo_cd != null ? [configuration.value.argo_cd] : [] + content { + namespace = argo_cd.value.namespace + + aws_idc { + idc_instance_arn = argo_cd.value.aws_idc.idc_instance_arn + idc_region = argo_cd.value.aws_idc.idc_region + } + + dynamic "network_access" { + for_each = argo_cd.value.network_access != null ? [argo_cd.value.network_access] : [] + content { + vpce_ids = network_access.value.vpce_ids + } + } + + dynamic "rbac_role_mapping" { + for_each = argo_cd.value.rbac_role_mapping + content { + role = rbac_role_mapping.value.role + + dynamic "identity" { + for_each = rbac_role_mapping.value.identity + content { + id = identity.value.id + type = identity.value.type + } + } + } + } + } + } + } + } + + timeouts { + create = var.capabilities[each.value].create_timeout + update = var.capabilities[each.value].update_timeout + delete = var.capabilities[each.value].delete_timeout + } + + depends_on = [ + aws_eks_cluster.default, + aws_iam_role.capability, + ] +} diff --git a/examples/complete/main.tf b/examples/complete/main.tf index 817a5df..aee8624 100644 --- a/examples/complete/main.tf +++ b/examples/complete/main.tf @@ -2,6 +2,8 @@ provider "aws" { region = var.region } +data "aws_partition" "current" {} + module "label" { source = "cloudposse/label/null" version = "0.25.0" @@ -111,11 +113,28 @@ module "eks_cluster" { cluster_encryption_config_resources = var.cluster_encryption_config_resources addons = local.addons - addons_depends_on = [module.eks_node_group] + addons_depends_on = var.auto_mode_enabled ? null : [module.eks_node_group] bootstrap_self_managed_addons_enabled = var.bootstrap_self_managed_addons_enabled upgrade_policy = var.upgrade_policy zonal_shift_config = var.zonal_shift_config + # EKS Auto Mode + auto_mode_compute_config = { + enabled = var.auto_mode_enabled + node_pools = var.auto_mode_enabled ? ["general-purpose", "system"] : [] + node_role_arn = var.auto_mode_enabled ? one(aws_iam_role.auto_mode_node[*].arn) : null + } + + auto_mode_storage_config = { + block_storage = { + enabled = var.auto_mode_enabled + } + } + + auto_mode_elastic_load_balancing = { + enabled = var.auto_mode_enabled + } + access_entry_map = local.access_entry_map access_config = { authentication_mode = "API" @@ -136,10 +155,49 @@ module "eks_cluster" { cluster_depends_on = [module.subnets] } +# Auto Mode node role (only when auto_mode_enabled = true) +data "aws_iam_policy_document" "auto_mode_node_assume_role" { + count = local.enabled && var.auto_mode_enabled ? 1 : 0 + + statement { + effect = "Allow" + actions = ["sts:AssumeRole"] + + principals { + type = "Service" + identifiers = ["ec2.amazonaws.com"] + } + } +} + +resource "aws_iam_role" "auto_mode_node" { + count = local.enabled && var.auto_mode_enabled ? 1 : 0 + + name = "${module.label.id}-auto-mode-node" + assume_role_policy = one(data.aws_iam_policy_document.auto_mode_node_assume_role[*].json) + tags = module.label.tags +} + +resource "aws_iam_role_policy_attachment" "auto_mode_node_minimal" { + count = local.enabled && var.auto_mode_enabled ? 1 : 0 + + role = one(aws_iam_role.auto_mode_node[*].name) + policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonEKSWorkerNodeMinimalPolicy" +} + +resource "aws_iam_role_policy_attachment" "auto_mode_node_ecr" { + count = local.enabled && var.auto_mode_enabled ? 1 : 0 + + role = one(aws_iam_role.auto_mode_node[*].name) + policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonEC2ContainerRegistryPullOnly" +} + module "eks_node_group" { source = "cloudposse/eks-node-group/aws" version = "3.2.0" + enabled = local.enabled && !var.auto_mode_enabled + # node group <= 3.2 requires a non-empty list of subnet_ids, even when disabled subnet_ids = local.enabled ? module.subnets.public_subnet_ids : ["filler_string_for_enabled_is_false"] cluster_name = module.eks_cluster.eks_cluster_id diff --git a/examples/complete/variables.tf b/examples/complete/variables.tf index be3f603..74e6080 100644 --- a/examples/complete/variables.tf +++ b/examples/complete/variables.tf @@ -142,6 +142,12 @@ variable "private_ipv6_enabled" { description = "Whether to use IPv6 addresses for the pods in the node group" } +variable "auto_mode_enabled" { + type = bool + default = false + description = "Set to true to enable EKS Auto Mode" +} + variable "remote_network_config" { description = "Configuration block for the cluster remote network configuration" type = object({ diff --git a/examples/complete/vpc-cni.tf b/examples/complete/vpc-cni.tf index ef67b5e..759ca81 100644 --- a/examples/complete/vpc-cni.tf +++ b/examples/complete/vpc-cni.tf @@ -42,7 +42,7 @@ resource "aws_iam_role_policy_attachment" "vpc_cni" { count = local.vpc_cni_sa_needed ? 1 : 0 role = module.vpc_cni_eks_iam_role.service_account_role_name - policy_arn = "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy" + policy_arn = "arn:${data.aws_partition.current.partition}:iam::aws:policy/AmazonEKS_CNI_Policy" } module "vpc_cni_eks_iam_role" { diff --git a/iam.tf b/iam.tf index af894e4..f0d5788 100644 --- a/iam.tf +++ b/iam.tf @@ -8,8 +8,11 @@ data "aws_iam_policy_document" "assume_role" { count = local.create_eks_service_role ? 1 : 0 statement { - effect = "Allow" - actions = ["sts:AssumeRole"] + effect = "Allow" + actions = compact(concat( + ["sts:AssumeRole"], + local.auto_mode_all_enabled ? ["sts:TagSession"] : [] + )) principals { type = "Service" @@ -88,3 +91,20 @@ resource "aws_iam_role_policy_attachment" "cluster_elb_service_role" { policy_arn = one(aws_iam_policy.cluster_elb_service_role[*].arn) role = one(aws_iam_role.default[*].name) } + +# EKS Auto Mode requires additional cluster-level IAM policies +locals { + auto_mode_cluster_policies = local.create_eks_service_role && local.auto_mode_all_enabled ? toset([ + "arn:${one(data.aws_partition.current[*].partition)}:iam::aws:policy/AmazonEKSComputePolicy", + "arn:${one(data.aws_partition.current[*].partition)}:iam::aws:policy/AmazonEKSBlockStoragePolicy", + "arn:${one(data.aws_partition.current[*].partition)}:iam::aws:policy/AmazonEKSLoadBalancingPolicy", + "arn:${one(data.aws_partition.current[*].partition)}:iam::aws:policy/AmazonEKSNetworkingPolicy", + ]) : toset([]) +} + +resource "aws_iam_role_policy_attachment" "auto_mode" { + for_each = local.auto_mode_cluster_policies + + policy_arn = each.value + role = one(aws_iam_role.default[*].name) +} diff --git a/main.tf b/main.tf index 075b9bf..f00b425 100644 --- a/main.tf +++ b/main.tf @@ -14,6 +14,28 @@ locals { } cloudwatch_log_group_name = "/aws/eks/${module.label.id}/cluster" + + # EKS Auto Mode + auto_mode_flags = [ + var.auto_mode_compute_config.enabled, + var.auto_mode_storage_config.block_storage.enabled, + var.auto_mode_elastic_load_balancing.enabled, + ] + auto_mode_all_enabled = alltrue(local.auto_mode_flags) + auto_mode_all_disabled = !anytrue(local.auto_mode_flags) + + # Auto-set bootstrap_self_managed_addons to false when Auto Mode is enabled, unless overridden + # When user provides an explicit value, use it; otherwise default to false for Auto Mode, null otherwise + effective_bootstrap_self_managed_addons = var.bootstrap_self_managed_addons_enabled != null ? var.bootstrap_self_managed_addons_enabled : ( + local.auto_mode_all_enabled ? false : null + ) +} + +check "auto_mode_consistency" { + assert { + condition = local.auto_mode_all_enabled || local.auto_mode_all_disabled + error_message = "compute_config.enabled, storage_config.block_storage.enabled, and elastic_load_balancing.enabled must all be true or all be false." + } } module "label" { @@ -62,7 +84,7 @@ resource "aws_eks_cluster" "default" { role_arn = local.eks_service_role_arn version = var.kubernetes_version enabled_cluster_log_types = var.enabled_cluster_log_types - bootstrap_self_managed_addons = var.bootstrap_self_managed_addons_enabled + bootstrap_self_managed_addons = local.effective_bootstrap_self_managed_addons access_config { authentication_mode = var.access_config.authentication_mode @@ -96,17 +118,33 @@ resource "aws_eks_cluster" "default" { public_access_cidrs = var.public_access_cidrs } + # IPv4 kubernetes_network_config: render when service_ipv4_cidr is set or ELB is enabled (and not IPv6) dynamic "kubernetes_network_config" { - for_each = local.use_ipv6 ? [] : compact([var.service_ipv4_cidr]) + for_each = !local.use_ipv6 && (var.service_ipv4_cidr != null || var.auto_mode_elastic_load_balancing.enabled) ? [true] : [] content { - service_ipv4_cidr = kubernetes_network_config.value + service_ipv4_cidr = var.service_ipv4_cidr + + dynamic "elastic_load_balancing" { + for_each = var.auto_mode_elastic_load_balancing.enabled ? [true] : [] + content { + enabled = true + } + } } } + # IPv6 kubernetes_network_config dynamic "kubernetes_network_config" { for_each = local.use_ipv6 ? [true] : [] content { ip_family = "ipv6" + + dynamic "elastic_load_balancing" { + for_each = var.auto_mode_elastic_load_balancing.enabled ? [true] : [] + content { + enabled = true + } + } } } @@ -146,6 +184,25 @@ resource "aws_eks_cluster" "default" { } } + # EKS Auto Mode configuration + dynamic "compute_config" { + for_each = var.auto_mode_compute_config.enabled ? [var.auto_mode_compute_config] : [] + content { + enabled = true + node_pools = compute_config.value.node_pools + node_role_arn = compute_config.value.node_role_arn + } + } + + dynamic "storage_config" { + for_each = var.auto_mode_storage_config.block_storage.enabled ? [var.auto_mode_storage_config] : [] + content { + block_storage { + enabled = true + } + } + } + depends_on = [ aws_iam_role.default, aws_iam_role_policy_attachment.cluster_elb_service_role, diff --git a/outputs.tf b/outputs.tf index a9b8038..9c776c8 100644 --- a/outputs.tf +++ b/outputs.tf @@ -70,6 +70,29 @@ output "eks_addons_versions" { } : {} } +output "auto_mode_enabled" { + description = "Whether EKS Auto Mode is enabled (all three capabilities: compute, storage, networking)" + value = local.auto_mode_all_enabled +} + +output "capabilities" { + description = "Map of enabled EKS Capabilities with their ARNs and types" + value = { + for k, v in aws_eks_capability.default : k => { + arn = v.arn + type = v.type + version = v.version + } + } +} + +output "capability_role_arns" { + description = "Map of auto-created capability IAM role ARNs" + value = { + for k, v in aws_iam_role.capability : k => v.arn + } +} + output "cluster_encryption_config_enabled" { description = "If true, Cluster Encryption Configuration is enabled" value = var.cluster_encryption_config_enabled diff --git a/variables.tf b/variables.tf index 0ba3a03..2161aa7 100644 --- a/variables.tf +++ b/variables.tf @@ -201,11 +201,120 @@ variable "addons_depends_on" { } variable "bootstrap_self_managed_addons_enabled" { - description = "Manages bootstrap of default networking addons after cluster has been created" + description = "Manages bootstrap of default networking addons after cluster has been created. Must be false when Auto Mode is enabled. Changing this forces cluster recreation." type = bool default = null } +variable "auto_mode_compute_config" { + description = <<-EOT + EKS Auto Mode compute configuration. When enabled, AWS manages node + provisioning via managed Karpenter. + EOT + type = object({ + enabled = optional(bool, false) + node_pools = optional(set(string), ["general-purpose", "system"]) + node_role_arn = optional(string, null) + }) + default = {} + nullable = false +} + +variable "auto_mode_storage_config" { + description = <<-EOT + EKS Auto Mode storage configuration. When block_storage is enabled, + AWS manages EBS volumes via the ebs.csi.eks.amazonaws.com provisioner. + EOT + type = object({ + block_storage = optional(object({ + enabled = optional(bool, false) + }), {}) + }) + default = {} + nullable = false +} + +variable "auto_mode_elastic_load_balancing" { + description = <<-EOT + EKS Auto Mode elastic load balancing configuration. When enabled, + AWS manages ALB/NLB creation for Services and Ingress resources. + EOT + type = object({ + enabled = optional(bool, false) + }) + default = {} + nullable = false +} + +variable "capabilities" { + description = <<-EOT + Map of EKS Capabilities to enable on the cluster. Each key is the capability + name (must be unique within the cluster). Supported types: ACK, ARGOCD, KRO. + + When `create_iam_role` is true (default) and `role_arn` is null, an IAM + role with a trust policy for `capabilities.eks.amazonaws.com` is + automatically created. Set `create_iam_role = false` and provide `role_arn` + when the calling module creates its own IAM roles (avoids plan-time unknowns). + + The `configuration` block is only applicable to ARGOCD capabilities. + ACK and KRO do not currently support configuration. + EOT + type = map(object({ + enabled = optional(bool, true) + type = string # ACK, ARGOCD, KRO + create_iam_role = optional(bool, true) + role_arn = optional(string, null) + delete_propagation_policy = optional(string, "RETAIN") + configuration = optional(object({ + argo_cd = optional(object({ + namespace = optional(string, "argocd") + aws_idc = optional(object({ + idc_instance_arn = string + idc_region = optional(string, null) + }), null) + network_access = optional(object({ + vpce_ids = optional(list(string), []) + }), null) + rbac_role_mapping = optional(list(object({ + role = string # ADMIN, EDITOR, VIEWER + identity = list(object({ + id = string + type = string # SSO_USER, SSO_GROUP + })) + })), []) + }), null) + }), null) + create_timeout = optional(string, null) + update_timeout = optional(string, null) + delete_timeout = optional(string, null) + })) + default = {} + nullable = false + + validation { + condition = alltrue([ + for k, v in var.capabilities : contains(["ACK", "ARGOCD", "KRO"], v.type) + ]) + error_message = "Each capability type must be one of: ACK, ARGOCD, KRO." + } + + validation { + condition = alltrue([ + for k, v in var.capabilities : + v.configuration == null || v.type == "ARGOCD" + ]) + error_message = "The configuration block is only supported for ARGOCD capabilities." + } + + validation { + condition = alltrue([ + for k, v in var.capabilities : + v.type != "ARGOCD" || !v.enabled || try(v.configuration.argo_cd.aws_idc.idc_instance_arn, null) != null + ]) + error_message = "ARGOCD capabilities require configuration.argo_cd.aws_idc.idc_instance_arn. The AWS API requires AWS Identity Center configuration for Argo CD capabilities." + } +} + variable "upgrade_policy" { type = object({ support_type = optional(string, null) diff --git a/versions.tf b/versions.tf index 0379bd7..11c5df9 100644 --- a/versions.tf +++ b/versions.tf @@ -4,7 +4,7 @@ terraform { required_providers { aws = { source = "hashicorp/aws" - version = ">= 5.75.1" + version = ">= 6.25.0" } tls = { source = "hashicorp/tls"