diff --git a/aws_auth.tf b/aws_auth.tf index 487763b68e..d3007c2fa7 100644 --- a/aws_auth.tf +++ b/aws_auth.tf @@ -1,47 +1,6 @@ data "aws_caller_identity" "current" { } -data "template_file" "launch_template_worker_role_arns" { - count = var.create_eks ? local.worker_group_launch_template_count : 0 - template = file("${path.module}/templates/worker-role.tpl") - - vars = { - worker_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${element( - coalescelist( - aws_iam_instance_profile.workers_launch_template.*.role, - data.aws_iam_instance_profile.custom_worker_group_launch_template_iam_instance_profile.*.role_name, - ), - count.index, - )}" - platform = lookup( - var.worker_groups_launch_template[count.index], - "platform", - local.workers_group_defaults["platform"] - ) - } -} - -data "template_file" "worker_role_arns" { - count = var.create_eks ? local.worker_group_count : 0 - template = file("${path.module}/templates/worker-role.tpl") - - vars = { - worker_role_arn = "arn:aws:iam::${data.aws_caller_identity.current.account_id}:role/${element( - coalescelist( - aws_iam_instance_profile.workers.*.role, - data.aws_iam_instance_profile.custom_worker_group_iam_instance_profile.*.role_name, - [""] - ), - count.index, - )}" - platform = lookup( - var.worker_groups[count.index], - "platform", - local.workers_group_defaults["platform"] - ) - } -} - data "template_file" "node_group_arns" { count = var.create_eks ? length(module.node_groups.aws_auth_roles) : 0 template = file("${path.module}/templates/worker-role.tpl") @@ -52,19 +11,34 @@ data "template_file" "node_group_arns" { resource "kubernetes_config_map" "aws_auth" { count = var.create_eks && var.manage_aws_auth ? 1 : 0 depends_on = [null_resource.wait_for_cluster[0]] + #depends_on = [null_resource.wait_for_cluster[0], aws_iam_instance_profile.karpenter_node_instance_profile] metadata { name = "aws-auth" namespace = "kube-system" } +# data = { +# mapRoles = <= 1.14 ? var.cluster_version : 1.14}-*" - ) } data "aws_iam_policy_document" "workers_assume_role_policy" { @@ -33,23 +28,6 @@ data "aws_ami" "eks_worker" { owners = [var.worker_ami_owner_id] } -data "aws_ami" "eks_worker_windows" { - filter { - name = "name" - values = [local.worker_ami_name_filter_windows] - } - - filter { - name = "platform" - values = ["windows"] - } - - most_recent = true - - owners = [var.worker_ami_owner_id_windows] -} - - data "aws_iam_policy_document" "cluster_assume_role_policy" { statement { sid = "EKSClusterAssumeRole" @@ -103,121 +81,11 @@ EOF vars = { value = values(var.kubeconfig_aws_authenticator_env_variables)[count.index] - key = keys(var.kubeconfig_aws_authenticator_env_variables)[count.index] + key = keys(var.kubeconfig_aws_authenticator_env_variables)[count.index] } } -data "template_file" "userdata" { - count = var.create_eks ? local.worker_group_count : 0 - template = lookup( - var.worker_groups[count.index], - "userdata_template_file", - file( - lookup(var.worker_groups[count.index], "platform", local.workers_group_defaults["platform"]) == "windows" - ? "${path.module}/templates/userdata_windows.tpl" - : "${path.module}/templates/userdata.sh.tpl" - ) - ) - - vars = merge({ - platform = lookup(var.worker_groups[count.index], "platform", local.workers_group_defaults["platform"]) - cluster_name = aws_eks_cluster.this[0].name - endpoint = aws_eks_cluster.this[0].endpoint - cluster_auth_base64 = aws_eks_cluster.this[0].certificate_authority[0].data - pre_userdata = lookup( - var.worker_groups[count.index], - "pre_userdata", - local.workers_group_defaults["pre_userdata"], - ) - additional_userdata = lookup( - var.worker_groups[count.index], - "additional_userdata", - local.workers_group_defaults["additional_userdata"], - ) - bootstrap_extra_args = lookup( - var.worker_groups[count.index], - "bootstrap_extra_args", - local.workers_group_defaults["bootstrap_extra_args"], - ) - kubelet_extra_args = lookup( - var.worker_groups[count.index], - "kubelet_extra_args", - local.workers_group_defaults["kubelet_extra_args"], - ) - }, - lookup( - var.worker_groups[count.index], - "userdata_template_extra_args", - local.workers_group_defaults["userdata_template_extra_args"] - ) - ) -} - -data "template_file" "launch_template_userdata" { - count = var.create_eks ? local.worker_group_launch_template_count : 0 - template = lookup( - var.worker_groups_launch_template[count.index], - "userdata_template_file", - file( - lookup(var.worker_groups_launch_template[count.index], "platform", local.workers_group_defaults["platform"]) == "windows" - ? "${path.module}/templates/userdata_windows.tpl" - : "${path.module}/templates/userdata.sh.tpl" - ) - ) - - vars = merge({ - platform = lookup(var.worker_groups_launch_template[count.index], "platform", local.workers_group_defaults["platform"]) - cluster_name = aws_eks_cluster.this[0].name - endpoint = aws_eks_cluster.this[0].endpoint - cluster_auth_base64 = aws_eks_cluster.this[0].certificate_authority[0].data - pre_userdata = lookup( - var.worker_groups_launch_template[count.index], - "pre_userdata", - local.workers_group_defaults["pre_userdata"], - ) - additional_userdata = lookup( - var.worker_groups_launch_template[count.index], - "additional_userdata", - local.workers_group_defaults["additional_userdata"], - ) - bootstrap_extra_args = lookup( - var.worker_groups_launch_template[count.index], - "bootstrap_extra_args", - local.workers_group_defaults["bootstrap_extra_args"], - ) - kubelet_extra_args = lookup( - var.worker_groups_launch_template[count.index], - "kubelet_extra_args", - local.workers_group_defaults["kubelet_extra_args"], - ) - }, - lookup( - var.worker_groups_launch_template[count.index], - "userdata_template_extra_args", - local.workers_group_defaults["userdata_template_extra_args"] - ) - ) -} - data "aws_iam_role" "custom_cluster_iam_role" { count = var.manage_cluster_iam_resources ? 0 : 1 - name = var.cluster_iam_role_name -} - -data "aws_iam_instance_profile" "custom_worker_group_iam_instance_profile" { - count = var.manage_worker_iam_resources ? 0 : local.worker_group_count - name = lookup( - var.worker_groups[count.index], - "iam_instance_profile_name", - local.workers_group_defaults["iam_instance_profile_name"], - ) -} - -data "aws_iam_instance_profile" "custom_worker_group_launch_template_iam_instance_profile" { - count = var.manage_worker_iam_resources ? 0 : local.worker_group_launch_template_count - name = lookup( - var.worker_groups_launch_template[count.index], - "iam_instance_profile_name", - local.workers_group_defaults["iam_instance_profile_name"], - ) + name = var.cluster_iam_role_name } diff --git a/fsx_irsa.tf b/fsx_irsa.tf new file mode 100644 index 0000000000..9492f80021 --- /dev/null +++ b/fsx_irsa.tf @@ -0,0 +1,217 @@ +# FSx CSI Driver IAM Role for Service Account (IRSA) +module "fsx_csi_irsa" { + source = "./modules/iam-service-account" + + count = var.create_eks && var.enable_aws_fsx_csi_driver_addon ? 1 : 0 + + role_name = "fsx-csi-driver-${var.cluster_name}" + role_description = "IAM role for FSx CSI driver" + provider_url = replace(aws_eks_cluster.this[0].identity[0].oidc[0].issuer, "https://", "") + role_policy_document = data.aws_iam_policy_document.fsx_csi_driver[0].json + oidc_fully_qualified_subjects = ["system:serviceaccount:kube-system:fsx-csi-controller-sa"] + + tags = var.tags +} + +# Reference AWS managed policy for FSx CSI driver +# AWS Documentation: https://docs.aws.amazon.com/eks/latest/userguide/fsx-csi-create.html +# Policy Reference: https://docs.aws.amazon.com/aws-managed-policy/latest/reference/AmazonFSxFullAccess.html +# +# AWS RECOMMENDATION: AWS officially recommends using the AmazonFSxFullAccess managed policy +# for the FSx CSI driver IAM role. From the EKS documentation: +# "Create an IAM role and attach the AWS managed policy with the following command" +# eksctl create iamserviceaccount --attach-policy-arn arn:aws:iam::aws:policy/AmazonFSxFullAccess +# +# This implementation uses source_policy_documents to reference the AWS managed policy, +# providing the same permissions while maintaining flexibility for future customizations. +# +# Current AmazonFSxFullAccess policy content (as of 2025-09-09): +# { +# "Version": "2012-10-17", +# "Statement": [ +# { +# "Sid": "ViewAWSDSDirectories", +# "Effect": "Allow", +# "Action": [ +# "ds:DescribeDirectories" +# ], +# "Resource": "*" +# }, +# { +# "Sid": "FullAccessToFSx", +# "Effect": "Allow", +# "Action": [ +# "fsx:AssociateFileGateway", +# "fsx:AssociateFileSystemAliases", +# "fsx:CancelDataRepositoryTask", +# "fsx:CopyBackup", +# "fsx:CopySnapshotAndUpdateVolume", +# "fsx:CreateAndAttachS3AccessPoint", +# "fsx:CreateBackup", +# "fsx:CreateDataRepositoryAssociation", +# "fsx:CreateDataRepositoryTask", +# "fsx:CreateFileCache", +# "fsx:CreateFileSystem", +# "fsx:CreateFileSystemFromBackup", +# "fsx:CreateSnapshot", +# "fsx:CreateStorageVirtualMachine", +# "fsx:CreateVolume", +# "fsx:CreateVolumeFromBackup", +# "fsx:DetachAndDeleteS3AccessPoint", +# "fsx:DeleteBackup", +# "fsx:DeleteDataRepositoryAssociation", +# "fsx:DeleteFileCache", +# "fsx:DeleteFileSystem", +# "fsx:DeleteSnapshot", +# "fsx:DeleteStorageVirtualMachine", +# "fsx:DeleteVolume", +# "fsx:DescribeAssociatedFileGateways", +# "fsx:DescribeBackups", +# "fsx:DescribeDataRepositoryAssociations", +# "fsx:DescribeDataRepositoryTasks", +# "fsx:DescribeFileCaches", +# "fsx:DescribeFileSystemAliases", +# "fsx:DescribeFileSystems", +# "fsx:DescribeS3AccessPointAttachments", +# "fsx:DescribeSharedVpcConfiguration", +# "fsx:DescribeSnapshots", +# "fsx:DescribeStorageVirtualMachines", +# "fsx:DescribeVolumes", +# "fsx:DisassociateFileGateway", +# "fsx:DisassociateFileSystemAliases", +# "fsx:ListTagsForResource", +# "fsx:ManageBackupPrincipalAssociations", +# "fsx:ReleaseFileSystemNfsV3Locks", +# "fsx:RestoreVolumeFromSnapshot", +# "fsx:TagResource", +# "fsx:UntagResource", +# "fsx:UpdateDataRepositoryAssociation", +# "fsx:UpdateFileCache", +# "fsx:UpdateFileSystem", +# "fsx:UpdateSharedVpcConfiguration", +# "fsx:UpdateSnapshot", +# "fsx:UpdateStorageVirtualMachine", +# "fsx:UpdateVolume" +# ], +# "Resource": "*" +# }, +# { +# "Sid": "CreateSLRForFSx", +# "Effect": "Allow", +# "Action": "iam:CreateServiceLinkedRole", +# "Resource": "*", +# "Condition": { +# "StringEquals": { +# "iam:AWSServiceName": [ +# "fsx.amazonaws.com" +# ] +# } +# } +# }, +# { +# "Sid": "CreateSLRForLustreS3Integration", +# "Effect": "Allow", +# "Action": "iam:CreateServiceLinkedRole", +# "Resource": "*", +# "Condition": { +# "StringEquals": { +# "iam:AWSServiceName": [ +# "s3.data-source.lustre.fsx.amazonaws.com" +# ] +# } +# } +# }, +# { +# "Sid": "CreateLogsForFSxWindowsAuditLogs", +# "Effect": "Allow", +# "Action": [ +# "logs:CreateLogGroup", +# "logs:CreateLogStream", +# "logs:PutLogEvents" +# ], +# "Resource": [ +# "arn:aws:logs:*:*:log-group:/aws/fsx/*" +# ] +# }, +# { +# "Sid": "WriteToAmazonKinesisDataFirehose", +# "Effect": "Allow", +# "Action": [ +# "firehose:PutRecord" +# ], +# "Resource": [ +# "arn:aws:firehose:*:*:deliverystream/aws-fsx-*" +# ] +# }, +# { +# "Sid": "CreateTags", +# "Effect": "Allow", +# "Action": [ +# "ec2:CreateTags" +# ], +# "Resource": [ +# "arn:aws:ec2:*:*:route-table/*" +# ], +# "Condition": { +# "StringEquals": { +# "aws:RequestTag/AmazonFSx": "ManagedByAmazonFSx" +# }, +# "ForAnyValue:StringEquals": { +# "aws:CalledVia": [ +# "fsx.amazonaws.com" +# ] +# } +# } +# }, +# { +# "Sid": "DescribeEC2VpcResources", +# "Effect": "Allow", +# "Action": [ +# "ec2:DescribeSecurityGroups", +# "ec2:GetSecurityGroupsForVpc", +# "ec2:DescribeSubnets", +# "ec2:DescribeVpcs", +# "ec2:DescribeRouteTables" +# ], +# "Resource": "*", +# "Condition": { +# "ForAnyValue:StringEquals": { +# "aws:CalledVia": [ +# "fsx.amazonaws.com" +# ] +# } +# } +# }, +# { +# "Sid": "ManageCrossAccountDataReplication", +# "Effect": "Allow", +# "Action": [ +# "fsx:PutResourcePolicy", +# "fsx:GetResourcePolicy", +# "fsx:DeleteResourcePolicy" +# ], +# "Resource": "*", +# "Condition": { +# "ForAnyValue:StringEquals": { +# "aws:CalledVia": [ +# "ram.amazonaws.com" +# ] +# } +# } +# } +# ] +# } +data "aws_iam_policy_document" "fsx_csi_driver" { + count = var.create_eks && var.enable_aws_fsx_csi_driver_addon ? 1 : 0 + + # Reference the AWS managed policy + source_policy_documents = [ + data.aws_iam_policy.amazon_fsx_full_access[0].policy + ] +} + +# Get the AWS managed AmazonFSxFullAccess policy +data "aws_iam_policy" "amazon_fsx_full_access" { + count = var.create_eks && var.enable_aws_fsx_csi_driver_addon ? 1 : 0 + arn = "arn:aws:iam::aws:policy/AmazonFSxFullAccess" +} diff --git a/irsa.tf b/irsa.tf index 08096d8f08..a6c166fc13 100644 --- a/irsa.tf +++ b/irsa.tf @@ -12,4 +12,5 @@ resource "aws_iam_openid_connect_provider" "oidc_provider" { client_id_list = ["sts.amazonaws.com"] thumbprint_list = [var.eks_oidc_root_ca_thumbprint] url = flatten(concat(aws_eks_cluster.this[*].identity[*].oidc.0.issuer, [""]))[0] -} + +} \ No newline at end of file diff --git a/karpenter.tf b/karpenter.tf new file mode 100644 index 0000000000..9419e560b4 --- /dev/null +++ b/karpenter.tf @@ -0,0 +1,83 @@ +locals { + worker_policy_list = [ + "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy", + "arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy", + "arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly", + "arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore" + ] +} + +resource "aws_iam_role_policy_attachment" "karpenter_policy_attachments" { + count = var.manage_worker_iam_resources && var.create_eks ? length(local.worker_policy_list) : 0 + policy_arn = local.worker_policy_list[count.index] + role = aws_iam_role.karpenter_role[0].name +} + +# Karpenter requires a node instance profile created to be passed to the helmfile +resource "aws_iam_role" "karpenter_role" { + count = var.manage_worker_iam_resources && var.create_eks ? 1 : 0 + name = "karpenter_node_role_${var.logging_stage}" + permissions_boundary = var.permissions_boundary + path = var.iam_path + force_detach_policies = true + tags = var.tags + assume_role_policy = data.aws_iam_policy_document.workers_assume_role_policy.json +} + +resource "aws_iam_instance_profile" "karpenter_node_instance_profile" { + name = "karpenter_node_instance_profile_${var.logging_stage}" + role = aws_iam_role.karpenter_role[0].name +} + +data "aws_kms_alias" "ebs" { + name = "alias/aws/ebs" +} +resource "aws_iam_policy" "kms_key_policy" { + name = "${var.logging_stage}-kms-key-policy" + description = "Policy for kms key used by EBS volumes" + + + policy = jsonencode({ + Version : "2012-10-17" + Statement : [ + { + "Action" : [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ], + "Effect" : "Allow", + "Resource" : data.aws_kms_alias.ebs.arn, + "Sid" : "KmsKey" + }, + { + Action = [ + "route53:ListHostedZones", + "route53:ChangeResourceRecordSets", + "route53:ListResourceRecordSets" + ] + Effect = "Allow" + Resource = "*" + }, + { + Action = [ + "ec2:DescribeLaunchTemplateVersions", + "autoscaling:DescribeTags", + "autoscaling:DescribeLaunchConfigurations", + "autoscaling:DescribeAutoScalingInstances", + "autoscaling:DescribeAutoScalingGroups" + ] + Effect = "Allow" + Resource = "*" + Sid = "eksWorkerAutoscalingAll" + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "kms_key_role_policy_attachment" { + role = aws_iam_role.karpenter_role[0].name + policy_arn = aws_iam_policy.kms_key_policy.arn +} diff --git a/kms.tf b/kms.tf new file mode 100644 index 0000000000..549d92ae97 --- /dev/null +++ b/kms.tf @@ -0,0 +1,79 @@ +# kms.tf +resource "aws_kms_key" "eks_secrets" { + count = var.encryption ? 1 : 0 + description = "KMS key for encrypting EKS Kubernetes Secrets at rest" + deletion_window_in_days = 10 + enable_key_rotation = true + + policy = data.aws_iam_policy_document.kms_key_policy.json +} + +data "aws_iam_policy_document" "kms_key_policy" { + statement { + sid = "Enable IAM User Permissions" + effect = "Allow" + + principals { + type = "AWS" + identifiers = ["arn:aws:iam::${data.aws_caller_identity.current.account_id}:root"] + } + + actions = [ + "kms:*" + ] + + resources = ["*"] + } + + statement { + sid = "Allow EKS to use the key" + effect = "Allow" + + principals { + type = "Service" + identifiers = ["eks.amazonaws.com"] + } + + actions = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ] + + resources = ["*"] + } +} + + +# iam_policies.tf +resource "aws_iam_policy" "eks_secrets_encryption" { + count = var.encryption ? 1 : 0 + name = "eks-${var.cluster_name}-secrets-encryption-policy" + description = "Allows EKS to use KMS key for encrypting Kubernetes Secrets at rest" + + + policy = jsonencode({ + Version = "2012-10-17", + Statement = [ + { + Effect = "Allow", + Action = [ + "kms:Encrypt", + "kms:Decrypt", + "kms:ReEncrypt*", + "kms:GenerateDataKey*", + "kms:DescribeKey" + ], + Resource = aws_kms_key.eks_secrets[0].arn + } + ] + }) +} + +resource "aws_iam_role_policy_attachment" "eks_cluster_kms_policy" { + count = var.encryption ? 1 : 0 + policy_arn = aws_iam_policy.eks_secrets_encryption[0].arn + role = local.cluster_iam_role_name +} diff --git a/local.tf b/local.tf index 2487de3009..5bd1b21799 100644 --- a/local.tf +++ b/local.tf @@ -1,11 +1,11 @@ locals { asg_tags = [ for item in keys(var.tags) : - map( - "key", item, - "value", element(values(var.tags), index(keys(var.tags), item)), - "propagate_at_launch", "true" - ) + tomap({ + "key" = item, + "value" = element(values(var.tags), index(keys(var.tags), item)), + "propagate_at_launch" = "true" + }) ] cluster_security_group_id = var.cluster_security_group_id == "" ? join("", aws_security_group.cluster.*.id) : var.cluster_security_group_id @@ -16,73 +16,61 @@ locals { default_iam_role_id = concat(aws_iam_role.workers.*.id, [""])[0] kubeconfig_name = var.kubeconfig_name == "" ? "eks_${var.cluster_name}" : var.kubeconfig_name - worker_group_count = length(var.worker_groups) - worker_group_launch_template_count = length(var.worker_groups_launch_template) + default_ami_id_linux = data.aws_ami.eks_worker.id - default_ami_id_linux = data.aws_ami.eks_worker.id - default_ami_id_windows = data.aws_ami.eks_worker_windows.id + # workers_group_defaults = { + # name = "count.index" # Name of the worker group. Literal count.index will never be used but if name is not set, the count.index interpolation will be used. + # tags = [] # A list of map defining extra tags to be applied to the worker group autoscaling group. + # ami_id = "" # AMI ID for the eks workers. If none is provided, Terraform will search for the latest version of their EKS optimized worker AMI based on platform. + # asg_desired_capacity = "1" # Desired worker capacity in the autoscaling group and changing its value will not affect the autoscaling group's desired capacity because the cluster-autoscaler manages up and down scaling of the nodes. Cluster-autoscaler add nodes when pods are in pending state and remove the nodes when they are not required by modifying the desirec_capacity of the autoscaling group. Although an issue exists in which if the value of the asg_min_size is changed it modifies the value of asg_desired_capacity. + # asg_max_size = "3" # Maximum worker capacity in the autoscaling group. + # asg_min_size = "1" # Minimum worker capacity in the autoscaling group. NOTE: Change in this paramater will affect the asg_desired_capacity, like changing its value to 2 will change asg_desired_capacity value to 2 but bringing back it to 1 will not affect the asg_desired_capacity. + # asg_force_delete = false # Enable forced deletion for the autoscaling group. + # asg_initial_lifecycle_hooks = [] # Initital lifecycle hook for the autoscaling group. + # instance_type = "" # Size of the workers (non-managed). + # instance_types = [] # Size of the spot node group instances. + # capacity_type = null # Capacity type of node group; Can be SPOT, ONDEMAND (default is null, which creates ONDEMAND) + # spot_price = "" # Cost of spot instance. + # placement_tenancy = "" # The tenancy of the instance. Valid values are "default" or "dedicated". + # node_disk_size = "" # root volume size of nodes. + # root_volume_size = "20" # root volume size of workers instances. + # root_volume_type = "gp3" # root volume type of workers instances, can be 'standard', 'gp2', 'gp3', or 'io1' + # root_iops = "0" # The amount of provisioned IOPS. This must be set with a volume_type of "io1". + # key_name = "" # The key name that should be used for the instances in the autoscaling group + # pre_userdata = "" # userdata to pre-append to the default userdata. + # userdata_template_file = "" # alternate template to use for userdata + # userdata_template_extra_args = {} # Additional arguments to use when expanding the userdata template file + # bootstrap_extra_args = "" # Extra arguments passed to the bootstrap.sh script from the EKS AMI (Amazon Machine Image). + # additional_userdata = "" # userdata to append to the default userdata. + # ebs_optimized = true # sets whether to use ebs optimization on supported types. + # enable_monitoring = true # Enables/disables detailed monitoring. + # public_ip = false # Associate a public ip address with a worker + # kubelet_extra_args = "--pod-max-pids=${var.pod_max_pids}" # This string is passed directly to kubelet if set. Useful for adding labels or taints. + # subnets = var.subnets # A list of subnets to place the worker nodes in. i.e. ["subnet-123", "subnet-456", "subnet-789"] + # additional_security_group_ids = [] # A list of additional security group ids to include in worker launch config + # protect_from_scale_in = false # Prevent AWS from scaling in, so that cluster-autoscaler is solely responsible. + # iam_instance_profile_name = "" # A custom IAM instance profile name. Used when manage_worker_iam_resources is set to false. Incompatible with iam_role_id. + # iam_role_id = "local.default_iam_role_id" # A custom IAM role id. Incompatible with iam_instance_profile_name. Literal local.default_iam_role_id will never be used but if iam_role_id is not set, the local.default_iam_role_id interpolation will be used. + # suspended_processes = ["AZRebalance"] # A list of processes to suspend. i.e. ["AZRebalance", "HealthCheck", "ReplaceUnhealthy"] + # target_group_arns = null # A list of Application LoadBalancer (ALB) target group ARNs to be associated to the autoscaling group + # enabled_metrics = [] # A list of metrics to be collected i.e. ["GroupMinSize", "GroupMaxSize", "GroupDesiredCapacity"] + # placement_group = "" # The name of the placement group into which to launch the instances, if any. + # service_linked_role_arn = "" # Arn of custom service linked role that Auto Scaling group will use. Useful when you have encrypted EBS + # termination_policies = [] # A list of policies to decide how the instances in the auto scale group should be terminated. + # platform = "linux" # Platform of workers. either "linux" or "windows" + # # Settings for launch templates + # root_block_device_name = data.aws_ami.eks_worker.root_device_name # Root device name for workers. If non is provided, will assume default AMI was used. + # root_kms_key_id = "" # The KMS key to use when encrypting the root storage device + # launch_template_id = "" # The ID of the launch template to use in the autoscaling group/node group + # launch_template_version = "$Latest" # The lastest version of the launch template to use in the autoscaling group + # root_encrypted = "" # Whether the volume should be encrypted or not + # eni_delete = true # Delete the Elastic Network Interface (ENI) on termination (if set to false you will have to manually delete before destroying) + # } - workers_group_defaults_defaults = { - name = "count.index" # Name of the worker group. Literal count.index will never be used but if name is not set, the count.index interpolation will be used. - tags = [] # A list of map defining extra tags to be applied to the worker group autoscaling group. - ami_id = "" # AMI ID for the eks workers. If none is provided, Terraform will search for the latest version of their EKS optimized worker AMI based on platform. - asg_desired_capacity = "1" # Desired worker capacity in the autoscaling group and changing its value will not affect the autoscaling group's desired capacity because the cluster-autoscaler manages up and down scaling of the nodes. Cluster-autoscaler add nodes when pods are in pending state and remove the nodes when they are not required by modifying the desirec_capacity of the autoscaling group. Although an issue exists in which if the value of the asg_min_size is changed it modifies the value of asg_desired_capacity. - asg_max_size = "3" # Maximum worker capacity in the autoscaling group. - asg_min_size = "1" # Minimum worker capacity in the autoscaling group. NOTE: Change in this paramater will affect the asg_desired_capacity, like changing its value to 2 will change asg_desired_capacity value to 2 but bringing back it to 1 will not affect the asg_desired_capacity. - asg_force_delete = false # Enable forced deletion for the autoscaling group. - asg_initial_lifecycle_hooks = [] # Initital lifecycle hook for the autoscaling group. - asg_recreate_on_change = false # Recreate the autoscaling group when the Launch Template or Launch Configuration change. - instance_type = "m4.large" # Size of the workers instances. - spot_price = "" # Cost of spot instance. - placement_tenancy = "" # The tenancy of the instance. Valid values are "default" or "dedicated". - root_volume_size = "100" # root volume size of workers instances. - root_volume_type = "gp2" # root volume type of workers instances, can be 'standard', 'gp2', or 'io1' - root_iops = "0" # The amount of provisioned IOPS. This must be set with a volume_type of "io1". - key_name = "" # The key name that should be used for the instances in the autoscaling group - pre_userdata = "" # userdata to pre-append to the default userdata. - userdata_template_file = "" # alternate template to use for userdata - userdata_template_extra_args = {} # Additional arguments to use when expanding the userdata template file - bootstrap_extra_args = "" # Extra arguments passed to the bootstrap.sh script from the EKS AMI (Amazon Machine Image). - additional_userdata = "" # userdata to append to the default userdata. - ebs_optimized = true # sets whether to use ebs optimization on supported types. - enable_monitoring = true # Enables/disables detailed monitoring. - public_ip = false # Associate a public ip address with a worker - kubelet_extra_args = "" # This string is passed directly to kubelet if set. Useful for adding labels or taints. - subnets = var.subnets # A list of subnets to place the worker nodes in. i.e. ["subnet-123", "subnet-456", "subnet-789"] - additional_security_group_ids = [] # A list of additional security group ids to include in worker launch config - protect_from_scale_in = false # Prevent AWS from scaling in, so that cluster-autoscaler is solely responsible. - iam_instance_profile_name = "" # A custom IAM instance profile name. Used when manage_worker_iam_resources is set to false. Incompatible with iam_role_id. - iam_role_id = "local.default_iam_role_id" # A custom IAM role id. Incompatible with iam_instance_profile_name. Literal local.default_iam_role_id will never be used but if iam_role_id is not set, the local.default_iam_role_id interpolation will be used. - suspended_processes = ["AZRebalance"] # A list of processes to suspend. i.e. ["AZRebalance", "HealthCheck", "ReplaceUnhealthy"] - target_group_arns = null # A list of Application LoadBalancer (ALB) target group ARNs to be associated to the autoscaling group - enabled_metrics = [] # A list of metrics to be collected i.e. ["GroupMinSize", "GroupMaxSize", "GroupDesiredCapacity"] - placement_group = "" # The name of the placement group into which to launch the instances, if any. - service_linked_role_arn = "" # Arn of custom service linked role that Auto Scaling group will use. Useful when you have encrypted EBS - termination_policies = [] # A list of policies to decide how the instances in the auto scale group should be terminated. - platform = "linux" # Platform of workers. either "linux" or "windows" - # Settings for launch templates - root_block_device_name = data.aws_ami.eks_worker.root_device_name # Root device name for workers. If non is provided, will assume default AMI was used. - root_kms_key_id = "" # The KMS key to use when encrypting the root storage device - launch_template_version = "$Latest" # The lastest version of the launch template to use in the autoscaling group - launch_template_placement_tenancy = "default" # The placement tenancy for instances - launch_template_placement_group = "" # The name of the placement group into which to launch the instances, if any. - root_encrypted = "" # Whether the volume should be encrypted or not - eni_delete = true # Delete the Elastic Network Interface (ENI) on termination (if set to false you will have to manually delete before destroying) - cpu_credits = "standard" # T2/T3 unlimited mode, can be 'standard' or 'unlimited'. Used 'standard' mode as default to avoid paying higher costs - market_type = null - # Settings for launch templates with mixed instances policy - override_instance_types = ["m5.large", "m5a.large", "m5d.large", "m5ad.large"] # A list of override instance types for mixed instances policy - on_demand_allocation_strategy = null # Strategy to use when launching on-demand instances. Valid values: prioritized. - on_demand_base_capacity = "0" # Absolute minimum amount of desired capacity that must be fulfilled by on-demand instances - on_demand_percentage_above_base_capacity = "0" # Percentage split between on-demand and Spot instances above the base on-demand capacity - spot_allocation_strategy = "lowest-price" # Valid options are 'lowest-price' and 'capacity-optimized'. If 'lowest-price', the Auto Scaling group launches instances using the Spot pools with the lowest price, and evenly allocates your instances across the number of Spot pools. If 'capacity-optimized', the Auto Scaling group launches instances using Spot pools that are optimally chosen based on the available Spot capacity. - spot_instance_pools = 10 # "Number of Spot pools per availability zone to allocate capacity. EC2 Auto Scaling selects the cheapest Spot pools and evenly allocates Spot capacity across the number of Spot pools that you specify." - spot_max_price = "" # Maximum price per unit hour that the user is willing to pay for the Spot instances. Default is the on-demand price - } - - workers_group_defaults = merge( - local.workers_group_defaults_defaults, - var.workers_group_defaults, + nodes_groups_defaults = merge( + { node_sg_group_id = aws_security_group.workers.*.id }, + var.node_groups_defaults, + #{node_instance_profile = concat(aws_iam_instance_profile.node_group_instance_profile.*.arn, [null])[0]} ) ebs_optimized_not_supported = [ diff --git a/modules/eks-cloudwatch-alarms/alarms.tf b/modules/eks-cloudwatch-alarms/alarms.tf new file mode 100644 index 0000000000..180f3d9248 --- /dev/null +++ b/modules/eks-cloudwatch-alarms/alarms.tf @@ -0,0 +1,56 @@ +resource "aws_cloudwatch_metric_alarm" "eks_node_cpu_utilization_too_high" { + alarm_name = "${var.alert_name_prefix}eks_node_cpu_utilization_too_high" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "5" + datapoints_to_alarm = "3" + metric_name = "node_cpu_utilization" + namespace = "ContainerInsights" + period = "60" + statistic = "Average" + threshold = var.cpu_threshold + alarm_description = "eks cluster average cpu utilization greater than desired" + alarm_actions = [aws_sns_topic.default.arn] + ok_actions = [aws_sns_topic.default.arn] + + dimensions = { + ClusterName = var.cluster_name + } +} + +resource "aws_cloudwatch_metric_alarm" "eks_node_memory_utilization_too_high" { + alarm_name = "${var.alert_name_prefix}eks_node_memory_utilization_too_high" + comparison_operator = "GreaterThanOrEqualToThreshold" + evaluation_periods = "5" + datapoints_to_alarm = "3" + metric_name = "node_memory_utilization" + namespace = "ContainerInsights" + period = "60" + statistic = "Average" + threshold = var.memory_threshold + alarm_description = "eks cluster average cpu utilization greater than desired" + alarm_actions = [aws_sns_topic.default.arn] + ok_actions = [aws_sns_topic.default.arn] + + dimensions = { + ClusterName = var.cluster_name + } +} + +resource "aws_cloudwatch_metric_alarm" "eks_cluster_failed_node_count" { + alarm_name = "${var.alert_name_prefix}eks_cluster_failed_node_count" + comparison_operator = "GreaterThanThreshold" + evaluation_periods = "5" + datapoints_to_alarm = "3" + metric_name = "cluster_failed_node_count" + namespace = "ContainerInsights" + period = "60" + statistic = "Average" + threshold = var.max_failed_nodes + alarm_description = "eks cluster more nodes failed than allowed" + alarm_actions = [aws_sns_topic.default.arn] + ok_actions = [aws_sns_topic.default.arn] + + dimensions = { + ClusterName = var.cluster_name + } +} diff --git a/modules/eks-cloudwatch-alarms/main.tf b/modules/eks-cloudwatch-alarms/main.tf new file mode 100644 index 0000000000..e0c226ddec --- /dev/null +++ b/modules/eks-cloudwatch-alarms/main.tf @@ -0,0 +1,72 @@ +data "aws_caller_identity" "default" { +} + +# Make a topic +resource "aws_sns_topic" "default" { + name_prefix = "eks-alerts" +} + +resource "aws_sns_topic_policy" "default" { + arn = aws_sns_topic.default.arn + policy = data.aws_iam_policy_document.sns_topic_policy.json +} + +data "aws_iam_policy_document" "sns_topic_policy" { + policy_id = "__default_policy_ID" + + statement { + sid = "__default_statement_ID" + + actions = [ + "SNS:Subscribe", + "SNS:SetTopicAttributes", + "SNS:RemovePermission", + "SNS:Receive", + "SNS:Publish", + "SNS:ListSubscriptionsByTopic", + "SNS:GetTopicAttributes", + "SNS:DeleteTopic", + "SNS:AddPermission", + ] + + effect = "Allow" + resources = [aws_sns_topic.default.arn] + + principals { + type = "AWS" + identifiers = ["*"] + } + + condition { + test = "StringEquals" + variable = "AWS:SourceOwner" + + values = [ + data.aws_caller_identity.default.account_id, + ] + } + } + + statement { + sid = "Allow CloudwatchEvents" + actions = ["sns:Publish"] + resources = [aws_sns_topic.default.arn] + + principals { + type = "Service" + identifiers = ["events.amazonaws.com"] + } + } + + statement { + sid = "Allow RDS Event Notification" + actions = ["sns:Publish"] + resources = [aws_sns_topic.default.arn] + + principals { + type = "Service" + identifiers = ["rds.amazonaws.com"] + } + } +} + diff --git a/modules/eks-cloudwatch-alarms/outputs.tf b/modules/eks-cloudwatch-alarms/outputs.tf new file mode 100644 index 0000000000..eb4100cf6d --- /dev/null +++ b/modules/eks-cloudwatch-alarms/outputs.tf @@ -0,0 +1,9 @@ +output "sns_topic_arn" { + description = "The ARN of the SNS topic" + value = aws_sns_topic.default.arn +} + +output "sns_topic_name" { + description = "The Name of the SNS topic" + value = aws_sns_topic.default.name +} diff --git a/modules/eks-cloudwatch-alarms/variables.tf b/modules/eks-cloudwatch-alarms/variables.tf new file mode 100644 index 0000000000..9d9664024b --- /dev/null +++ b/modules/eks-cloudwatch-alarms/variables.tf @@ -0,0 +1,29 @@ +variable "alert_name_prefix" { + type = string + default = "" + description = "String to prefix CloudWatch alerts with to avoid naming collisions" +} + +variable "cpu_threshold" { + type = string + default = 90 + description = "cpu percentage threshold for the alerts" +} + +variable "memory_threshold" { + type = string + default = 90 + description = "memory percentage threshold for the alerts" +} + +variable "max_failed_nodes" { + type = string + default = 0 + description = "the number of nodes allowed to fail" +} + +variable "cluster_name" { + type = string + default = "eks" + description = "the eks cluster name" +} diff --git a/modules/iam-service-account/main.tf b/modules/iam-service-account/main.tf new file mode 100644 index 0000000000..7a1c3a1500 --- /dev/null +++ b/modules/iam-service-account/main.tf @@ -0,0 +1,51 @@ +locals { + aws_account_id = data.aws_caller_identity.current.account_id +} + +data "aws_caller_identity" "current" {} + +data "aws_iam_policy_document" "assume_role_with_oidc" { + + statement { + effect = "Allow" + + actions = ["sts:AssumeRoleWithWebIdentity"] + + principals { + type = "Federated" + + identifiers = [ + "arn:aws:iam::${local.aws_account_id}:oidc-provider/${var.provider_url}" + ] + } + + condition { + test = "StringEquals" + variable = "${var.provider_url}:sub" + values = var.oidc_fully_qualified_subjects + } + } +} + +resource "aws_iam_role" "this" { + + name = var.role_name + path = var.role_path + max_session_duration = var.max_session_duration + + permissions_boundary = var.role_permissions_boundary_arn + assume_role_policy = data.aws_iam_policy_document.assume_role_with_oidc.json + + tags = var.tags +} + +resource "aws_iam_role_policy_attachment" "custom" { + role = aws_iam_role.this.name + policy_arn = aws_iam_policy.incoming_policy.arn +} + +resource "aws_iam_policy" "incoming_policy" { + name_prefix = var.role_name + description = var.role_description + policy = var.role_policy_document +} diff --git a/modules/iam-service-account/outputs.tf b/modules/iam-service-account/outputs.tf new file mode 100644 index 0000000000..aefc13eeee --- /dev/null +++ b/modules/iam-service-account/outputs.tf @@ -0,0 +1,14 @@ +output "iam_role_arn" { + description = "ARN of IAM role" + value = aws_iam_role.this.arn +} + +output "iam_role_name" { + description = "Name of IAM role" + value = aws_iam_role.this.name +} + +output "iam_role_path" { + description = "Path of IAM role" + value = aws_iam_role.this.name +} diff --git a/modules/iam-service-account/variables.tf b/modules/iam-service-account/variables.tf new file mode 100644 index 0000000000..ca6229fe80 --- /dev/null +++ b/modules/iam-service-account/variables.tf @@ -0,0 +1,57 @@ +variable "provider_url" { + description = "URL of the OIDC Provider" + type = string +} + +variable "tags" { + description = "A map of tags to add to IAM role resources" + type = map(string) + default = {} +} + +variable "role_name" { + description = "IAM role name" + type = string + default = "" +} + +variable "role_description" { + description = "IAM role description" + type = string + default = "" +} + +variable "role_path" { + description = "Path of IAM role" + type = string + default = "/" +} + +variable "role_permissions_boundary_arn" { + description = "Permissions boundary ARN to use for IAM role" + type = string + default = "" +} + +variable "max_session_duration" { + description = "Maximum CLI/API session duration in seconds between 3600 and 43200" + type = number + default = 3600 +} + +variable "role_policy_document" { + description = "IAM Policy Document to attach" + type = string +} + +variable "oidc_fully_qualified_subjects" { + description = "The fully qualified OIDC subjects to be added to the role policy" + type = list(string) + default = [] +} + +variable "oidc_subjects_with_wildcards" { + description = "The OIDC subject using wildcards to be added to the role policy" + type = list(string) + default = [] +} diff --git a/modules/node_groups/launch_template.tf b/modules/node_groups/launch_template.tf new file mode 100644 index 0000000000..a34157deed --- /dev/null +++ b/modules/node_groups/launch_template.tf @@ -0,0 +1,115 @@ +data "cloudinit_config" "workers_userdata" { + for_each = nonsensitive(local.node_groups_expanded) + + gzip = false + base64_encode = true + boundary = "//" + + part { + content_type = "text/x-shellscript" + content = templatefile("${path.module}/templates/userdata.sh.tpl", + { + pre_userdata = each.value["pre_userdata"] + kubelet_extra_args = each.value["kubelet_extra_args"] + } + ) + } +} + +# This is based on the LT that EKS would create if no custom one is specified (aws ec2 describe-launch-template-versions --launch-template-id xxx) +# there are several more options one could set but you probably dont need to modify them +# you can take the default and add your custom AMI and/or custom tags +# +# Trivia: AWS transparently creates a copy of your LaunchTemplate and actually uses that copy then for the node group. If you DONT use a custom AMI, +# then the default user-data for bootstrapping a cluster is merged in the copy. +resource "aws_launch_template" "workers" { + for_each = nonsensitive(local.node_groups_expanded) + + name_prefix = local.node_groups_names[each.key] + description = format("EKS Managed Node Group custom LT for %s", local.node_groups_names[each.key]) + update_default_version = true + + block_device_mappings { + device_name = "/dev/xvda" + + ebs { + volume_size = lookup(each.value, "disk_size", null) + volume_type = lookup(each.value, "disk_type", null) + encrypted = lookup(each.value, "root_encrypted", null) + kms_key_id = lookup(each.value, "root_kms_key_id", null) + delete_on_termination = true + } + } + metadata_options { + http_put_response_hop_limit = 2 + } + + # iam_instance_profile { + # arn = each.value["node_instance_profile"] + # } + + monitoring { + enabled = lookup(each.value, "enable_monitoring", null) + } + + network_interfaces { + associate_public_ip_address = lookup(each.value, "public_ip", null) + delete_on_termination = lookup(each.value, "eni_delete", null) + security_groups = flatten([ + var.node_groups_defaults["node_sg_group_id"], + lookup(var.node_groups_defaults, "additional_security_group_ids", null), + lookup(each.value, "additional_security_group_ids", null) + ]) + } + + # if you want to use a custom AMI + # image_id = var.ami_id + + # If you use a custom AMI, you need to supply via user-data, the bootstrap script as EKS DOESNT merge its managed user-data then + # you can add more than the minimum code you see in the template, e.g. install SSM agent, see https://github.com/aws/containers-roadmap/issues/593#issuecomment-577181345 + # + # (optionally you can use https://registry.terraform.io/providers/hashicorp/cloudinit/latest/docs/data-sources/cloudinit_config to render the script, example: https://github.com/terraform-aws-modules/terraform-aws-eks/pull/997#issuecomment-705286151) + + user_data = data.cloudinit_config.workers_userdata[each.key].rendered + + key_name = lookup(each.value, "key_name", null) + + # Supplying custom tags to EKS instances is another use-case for LaunchTemplates + tag_specifications { + resource_type = "instance" + + tags = merge( + var.tags, + lookup(var.node_groups_defaults, "additional_tags", {}), + lookup(var.node_groups[each.key], "additional_tags", {}), + { + Name = "eks-${local.node_groups_names[each.key]}" + } + ) + } + + # Supplying custom tags to EKS instances root volumes is another use-case for LaunchTemplates. (doesnt add tags to dynamically provisioned volumes via PVC tho) + tag_specifications { + resource_type = "volume" + + tags = merge( + var.tags, + lookup(var.node_groups_defaults, "additional_tags", {}), + lookup(var.node_groups[each.key], "additional_tags", {}), + { + Name = "eks-${local.node_groups_names[each.key]}" + } + ) + } + + # Tag the LT itself + tags = merge( + var.tags, + lookup(var.node_groups_defaults, "additional_tags", {}), + lookup(var.node_groups[each.key], "additional_tags", {}), + ) + + lifecycle { + create_before_destroy = true + } +} diff --git a/modules/node_groups/locals.tf b/modules/node_groups/locals.tf index 43cf672ca0..864472e92b 100644 --- a/modules/node_groups/locals.tf +++ b/modules/node_groups/locals.tf @@ -2,15 +2,19 @@ locals { # Merge defaults and per-group values to make code cleaner node_groups_expanded = { for k, v in var.node_groups : k => merge( { - desired_capacity = var.workers_group_defaults["asg_desired_capacity"] - iam_role_arn = var.default_iam_role_arn - instance_type = var.workers_group_defaults["instance_type"] - key_name = var.workers_group_defaults["key_name"] - max_capacity = var.workers_group_defaults["asg_max_size"] - min_capacity = var.workers_group_defaults["asg_min_size"] - subnets = var.workers_group_defaults["subnets"] + iam_role_arn = var.default_iam_role_arn }, var.node_groups_defaults, v, ) if var.create_eks } + + node_groups_names = { for k, v in local.node_groups_expanded : k => lookup( + v, + "name", + lookup( + v, + "name_prefix", + join("-", [var.cluster_name, k]) + ) + ) } } diff --git a/modules/node_groups/node_groups.tf b/modules/node_groups/node_groups.tf index 62dc6bff98..fb394a9963 100644 --- a/modules/node_groups/node_groups.tf +++ b/modules/node_groups/node_groups.tf @@ -1,50 +1,61 @@ resource "aws_eks_node_group" "workers" { - for_each = local.node_groups_expanded + for_each = nonsensitive(local.node_groups_expanded) - node_group_name = lookup(each.value, "name", join("-", [var.cluster_name, each.key, random_pet.node_groups[each.key].id])) + # Calculate the prefix and ensure it does not exceed 37 characters + node_group_name_prefix = substr( + "${var.is_default ? "" : "${var.cluster_name}_"}${each.value["name"]}", + 0, + 37 + ) + version = lookup(each.value, "version", null) + capacity_type = each.value["capacity_type"] # SPOT or ON_DEMAND - cluster_name = var.cluster_name - node_role_arn = each.value["iam_role_arn"] - subnet_ids = each.value["subnets"] + force_update_version = var.force_update_version + cluster_name = var.cluster_name + node_role_arn = each.value["iam_role_arn"] + subnet_ids = each.value["subnets"] scaling_config { - desired_size = each.value["desired_capacity"] + desired_size = each.value["node_group_desired_capacity"] max_size = each.value["max_capacity"] min_size = each.value["min_capacity"] } + instance_types = each.value["instance_types"] + + # These shouldn't be needed as we specify the version ami_type = lookup(each.value, "ami_type", null) - disk_size = lookup(each.value, "disk_size", null) - instance_types = [each.value["instance_type"]] release_version = lookup(each.value, "ami_release_version", null) - - dynamic "remote_access" { - for_each = each.value["key_name"] != "" ? [{ - ec2_ssh_key = each.value["key_name"] - source_security_group_ids = lookup(each.value, "source_security_group_ids", []) - }] : [] - - content { - ec2_ssh_key = remote_access.value["ec2_ssh_key"] - source_security_group_ids = remote_access.value["source_security_group_ids"] - } + launch_template { + id = aws_launch_template.workers[each.key].id + version = aws_launch_template.workers[each.key].default_version } - version = lookup(each.value, "version", null) - labels = merge( lookup(var.node_groups_defaults, "k8s_labels", {}), lookup(var.node_groups[each.key], "k8s_labels", {}) ) - tags = merge( + { + Name = "${each.value["name"]}_node" + }, var.tags, lookup(var.node_groups_defaults, "additional_tags", {}), lookup(var.node_groups[each.key], "additional_tags", {}), ) + update_config { + max_unavailable_percentage =lookup(each.value, "max_unavailable_percentage", 25) + } lifecycle { create_before_destroy = true ignore_changes = [scaling_config.0.desired_size] } + + depends_on = [aws_launch_template.workers] + timeouts { + create = var.create_timeout + update = var.update_timeout + delete = var.delete_timeout + } } diff --git a/modules/node_groups/outputs.tf b/modules/node_groups/outputs.tf index ad148ea514..9ffa6306cd 100644 --- a/modules/node_groups/outputs.tf +++ b/modules/node_groups/outputs.tf @@ -3,8 +3,13 @@ output "node_groups" { value = aws_eks_node_group.workers } +output "launch_templates" { + value = aws_launch_template.workers +} + output "aws_auth_roles" { description = "Roles for use in aws-auth ConfigMap" + sensitive = true value = [ for k, v in local.node_groups_expanded : { worker_role_arn = lookup(v, "iam_role_arn", var.default_iam_role_arn) diff --git a/modules/node_groups/random.tf b/modules/node_groups/random.tf deleted file mode 100644 index 14e7ba2bce..0000000000 --- a/modules/node_groups/random.tf +++ /dev/null @@ -1,21 +0,0 @@ -resource "random_pet" "node_groups" { - for_each = local.node_groups_expanded - - separator = "-" - length = 2 - - keepers = { - ami_type = lookup(each.value, "ami_type", null) - disk_size = lookup(each.value, "disk_size", null) - instance_type = each.value["instance_type"] - iam_role_arn = each.value["iam_role_arn"] - - key_name = each.value["key_name"] - - source_security_group_ids = join("|", compact( - lookup(each.value, "source_security_group_ids", []) - )) - subnet_ids = join("|", each.value["subnets"]) - node_group_name = join("-", [var.cluster_name, each.key]) - } -} diff --git a/modules/node_groups/templates/userdata.sh.tpl b/modules/node_groups/templates/userdata.sh.tpl new file mode 100644 index 0000000000..ba2e5ac980 --- /dev/null +++ b/modules/node_groups/templates/userdata.sh.tpl @@ -0,0 +1,6 @@ +#!/bin/bash -e + +# Allow user supplied pre userdata code +${pre_userdata} + +sed -i '/^KUBELET_EXTRA_ARGS=/a KUBELET_EXTRA_ARGS+=" ${kubelet_extra_args}"' /etc/eks/bootstrap.sh \ No newline at end of file diff --git a/modules/node_groups/variables.tf b/modules/node_groups/variables.tf index c0eaa23d1e..5e75cb2fb8 100644 --- a/modules/node_groups/variables.tf +++ b/modules/node_groups/variables.tf @@ -14,11 +14,6 @@ variable "default_iam_role_arn" { type = string } -variable "workers_group_defaults" { - description = "Workers group defaults from parent" - type = any -} - variable "tags" { description = "A map of tags to add to all resources" type = map(string) @@ -34,3 +29,34 @@ variable "node_groups" { type = any default = {} } +variable "force_update_version" { + description = "force node group to update" + type = bool + default = false + +} + +variable "is_default" { + description = "is the default eks cluster" + type = bool + default = true +} + +variable "create_timeout" { + description = "creation time limit" + type = string + default = "60m" + +} +variable "update_timeout" { + description = "update time limit" + type = string + default = "3h" + +} +variable "delete_timeout" { + description = "deletion time limit" + type = string + default = "60m" + +} diff --git a/node_groups.tf b/node_groups.tf index 2c689c3818..894e539a8d 100644 --- a/node_groups.tf +++ b/node_groups.tf @@ -17,12 +17,16 @@ data "null_data_source" "node_groups" { } module "node_groups" { - source = "./modules/node_groups" - create_eks = var.create_eks - cluster_name = coalescelist(data.null_data_source.node_groups[*].outputs["cluster_name"], [""])[0] - default_iam_role_arn = coalescelist(aws_iam_role.workers[*].arn, [""])[0] - workers_group_defaults = local.workers_group_defaults - tags = var.tags - node_groups_defaults = var.node_groups_defaults - node_groups = var.node_groups + source = "./modules/node_groups" # git::https://github.com/cloudposse/terraform-aws-eks-node-group.git?ref=tags/0.24.0 + create_eks = var.create_eks + cluster_name = var.cluster_name + default_iam_role_arn = coalescelist(aws_iam_role.workers[*].arn, [""])[0] + tags = var.tags + node_groups_defaults = local.nodes_groups_defaults + node_groups = var.node_groups + force_update_version = var.force_update_version + is_default = var.is_default + create_timeout = var.node_groups_create_timeout + update_timeout = var.node_groups_update_timeout + delete_timeout = var.node_groups_delete_timeout } diff --git a/outputs.tf b/outputs.tf index b1f8c3c049..476e30aefc 100644 --- a/outputs.tf +++ b/outputs.tf @@ -1,26 +1,26 @@ output "cluster_id" { description = "The name/id of the EKS cluster." - value = element(concat(aws_eks_cluster.this.*.id, list("")), 0) + value = element(concat(aws_eks_cluster.this.*.id, tolist([""])), 0) } output "cluster_arn" { description = "The Amazon Resource Name (ARN) of the cluster." - value = element(concat(aws_eks_cluster.this.*.arn, list("")), 0) + value = element(concat(aws_eks_cluster.this.*.arn, tolist([""])), 0) } output "cluster_certificate_authority_data" { description = "Nested attribute containing certificate-authority-data for your cluster. This is the base64 encoded certificate data required to communicate with your cluster." - value = element(concat(aws_eks_cluster.this[*].certificate_authority[0].data, list("")), 0) + value = element(concat(aws_eks_cluster.this[*].certificate_authority[0].data, tolist([""])), 0) } output "cluster_endpoint" { description = "The endpoint for your EKS Kubernetes API." - value = element(concat(aws_eks_cluster.this.*.endpoint, list("")), 0) + value = element(concat(aws_eks_cluster.this.*.endpoint, tolist([""])), 0) } output "cluster_version" { description = "The Kubernetes server version for the EKS cluster." - value = element(concat(aws_eks_cluster.this[*].version, list("")), 0) + value = element(concat(aws_eks_cluster.this[*].version, tolist([""])), 0) } output "cluster_security_group_id" { @@ -67,93 +67,62 @@ output "oidc_provider_arn" { description = "The ARN of the OIDC Provider if `enable_irsa = true`." value = var.enable_irsa ? concat(aws_iam_openid_connect_provider.oidc_provider[*].arn, [""])[0] : null } - -output "workers_asg_arns" { - description = "IDs of the autoscaling groups containing workers." - value = concat( - aws_autoscaling_group.workers.*.arn, - aws_autoscaling_group.workers_launch_template.*.arn, - ) -} - -output "workers_asg_names" { - description = "Names of the autoscaling groups containing workers." - value = concat( - aws_autoscaling_group.workers.*.id, - aws_autoscaling_group.workers_launch_template.*.id, - ) -} - -output "workers_user_data" { - description = "User data of worker groups" - value = concat( - data.template_file.userdata.*.rendered, - data.template_file.launch_template_userdata.*.rendered, - ) -} - -output "workers_default_ami_id" { - description = "ID of the default worker group AMI" - value = data.aws_ami.eks_worker.id +output "worker_iam_role_name" { + description = "default IAM role name for EKS worker groups" + value = coalescelist( + aws_iam_role.workers.*.name, + [""] + )[0] } -output "workers_launch_template_ids" { - description = "IDs of the worker launch templates." - value = aws_launch_template.workers_launch_template.*.id +output "worker_iam_role_arn" { + description = "default IAM role ARN for EKS worker groups" + value = coalescelist( + aws_iam_role.workers.*.arn, + [""] + )[0] } -output "workers_launch_template_arns" { - description = "ARNs of the worker launch templates." - value = aws_launch_template.workers_launch_template.*.arn +output "node_groups" { + description = "Outputs from EKS node groups. Map of maps, keyed by var.node_groups keys" + value = module.node_groups.node_groups } -output "workers_launch_template_latest_versions" { - description = "Latest versions of the worker launch templates." - value = aws_launch_template.workers_launch_template.*.latest_version +output "node_group_LTs" { + value = module.node_groups.launch_templates } output "worker_security_group_id" { - description = "Security group ID attached to the EKS workers." + description = "security groups id for the worker nodes" value = local.worker_security_group_id } -output "worker_iam_instance_profile_arns" { - description = "default IAM instance profile ARN for EKS worker groups" - value = concat( - aws_iam_instance_profile.workers.*.arn, - aws_iam_instance_profile.workers_launch_template.*.arn - ) +output "worker_iam_arn" { + description = "IAM Role ARN for worker groups" + value = join("", aws_iam_role.workers.*.arn) } -output "worker_iam_instance_profile_names" { - description = "default IAM instance profile name for EKS worker groups" - value = concat( - aws_iam_instance_profile.workers.*.name, - aws_iam_instance_profile.workers_launch_template.*.name - ) +output "worker_iam_name" { + description = "IAM Role name for worker groups" + value = join("", aws_iam_role.workers.*.name) } -output "worker_iam_role_name" { - description = "default IAM role name for EKS worker groups" - value = coalescelist( - aws_iam_role.workers.*.name, - data.aws_iam_instance_profile.custom_worker_group_iam_instance_profile.*.role_name, - data.aws_iam_instance_profile.custom_worker_group_launch_template_iam_instance_profile.*.role_name, - [""] - )[0] +output "karpenter_node_instance_profile_name" { + description = "IAM Instance Profile ARN for Karpenter Node Group" + value = aws_iam_instance_profile.karpenter_node_instance_profile.name } -output "worker_iam_role_arn" { - description = "default IAM role ARN for EKS worker groups" - value = coalescelist( - aws_iam_role.workers.*.arn, - data.aws_iam_instance_profile.custom_worker_group_iam_instance_profile.*.role_arn, - data.aws_iam_instance_profile.custom_worker_group_launch_template_iam_instance_profile.*.role_arn, - [""] - )[0] +output "karpenter_node_role_arn" { + description = "IAM Instance Profile ARN for Karpenter Node Group" + value = aws_iam_role.karpenter_role[0].arn } -output "node_groups" { - description = "Outputs from EKS node groups. Map of maps, keyed by var.node_groups keys" - value = module.node_groups.node_groups +output "karpenter_iam_role_name" { + description = "value of the IAM role name for Karpenter Node Group" + value = aws_iam_role.karpenter_role[0].name +} + +output "fsx_csi_driver_role_arn" { + description = "IAM role ARN for FSx CSI driver" + value = var.enable_aws_fsx_csi_driver_addon && length(module.fsx_csi_irsa) > 0 ? module.fsx_csi_irsa[0].iam_role_arn : "" } diff --git a/variables.tf b/variables.tf index b9fa6c4c0a..ab3b0b500b 100644 --- a/variables.tf +++ b/variables.tf @@ -90,16 +90,96 @@ variable "vpc_id" { type = string } -variable "worker_groups" { - description = "A list of maps defining worker group configurations to be defined using AWS Launch Configurations. See workers_group_defaults for valid keys." - type = any - default = [] +# Cluster addons vars +variable "enable_vpc_cni_addon" { + description = "Whether or not to install the vpc-cni addon in the cluster" + type = bool + default = false } -variable "workers_group_defaults" { - description = "Override default values for target groups. See workers_group_defaults_defaults in local.tf for valid keys." - type = any - default = {} +variable "vpc_cni_version" { + description = "Version of the vpc-cni container to install" + type = string +} + +variable "vpc_cni_resolve_conflicts" { + description = "Whether or not to force overwrite of the config. Options: NONE, OVERWRITE" + type = string + default = "NONE" +} + +variable "enable_coredns_addon" { + description = "Whether or not to install the coredns addon in the cluster" + type = bool + default = false +} + +variable "coredns_version" { + description = "Version of the coredns container to install" + type = string +} + +variable "coredns_resolve_conflicts" { + description = "Whether or not to force overwrite of the config. Options: NONE, OVERWRITE" + type = string + default = "NONE" +} + +variable "coredns_scaling_enabled" { + description = "Whether or not to enable auto-scaling of coredns pods" + type = bool + default = false +} + +variable "coredns_minreplicas" { + description = "Minimum number of coredns pods (if autoscaling enabled)" + type = number + default = 2 +} + +variable "coredns_maxreplicas" { + description = "Maximum number of coredns pods (if autoscaling enabled)" + type = number + default = 10 +} + +variable "enable_kube_proxy_addon" { + description = "Whether or not to install the kube-proxy addon in the cluster" + type = bool + default = false +} + +variable "kube_proxy_version" { + description = "Version of the kube-proxy container to install" + type = string +} + +variable "kube_proxy_resolve_conflicts" { + description = "Whether or not to force overwrite of the config. Options: NONE, OVERWRITE" + type = string + default = "NONE" +} + +variable "enable_aws_ebs_csi_driver_addon" { + description = "Whether or not to install the ebs driver addon in the cluster" + type = bool + default = true +} + +variable "aws_ebs_csi_driver_version" { + description = "Version of the ebs csi driver container to install" + type = string +} + +variable "aws_ebs_csi_driver_resolve_conflicts" { + description = "Whether or not to force overwrite of the config. Options: NONE, OVERWRITE" + type = string + default = "NONE" +} + +variable "ebs_csi_driver_role_arn" { + description = "Role for ebs csi driver needed by the service accounts to perform ondemand volume resizing" + type = string } variable "worker_groups_launch_template" { @@ -120,12 +200,6 @@ variable "worker_ami_name_filter" { default = "" } -variable "worker_ami_name_filter_windows" { - description = "Name filter for AWS EKS Windows worker AMI. If not provided, the latest official AMI for the specified 'cluster_version' is used." - type = string - default = "" -} - variable "worker_ami_owner_id" { description = "The ID of the owner for the AMI to use for the AWS EKS workers. Valid values are an AWS account ID, 'self' (the current account), or an AWS owner alias (e.g. 'amazon', 'aws-marketplace', 'microsoft')." type = string @@ -192,6 +266,12 @@ variable "cluster_create_timeout" { default = "15m" } +variable "cluster_update_timeout" { + description = "Timeout value when updating the EKS cluster." + type = string + default = "15m" +} + variable "cluster_delete_timeout" { description = "Timeout value when deleting the EKS cluster." type = string @@ -299,3 +379,108 @@ variable "eks_oidc_root_ca_thumbprint" { description = "Thumbprint of Root CA for EKS OIDC, Valid until 2037" default = "9e99a48a9960b14926bb7f3b02e22da2b0ab7280" } + +variable "force_update_version" { + description = "force node group to update" + type = bool + default = false + +} + +variable "is_default" { + description = "is the default eks cluster" + type = bool + default = true +} + +variable "node_groups_create_timeout" { + description = "creation time limit" + type = string + default = "60m" + +} +variable "node_groups_update_timeout" { + description = "update time limit" + type = string + default = "3h" + +} +variable "node_groups_delete_timeout" { + description = "deletion time limit" + type = string + default = "60m" + +} +variable "allow_all_egress" { + description = "trigger to either allow all egress traffic or a more restrictive set" + type = bool + default = true +} +variable "egress_ports_allowed" { + description = "ports to allow all egress traffic" + type = list(any) + default = [] +} +variable "egress_cidr_blocks_allowed" { + description = "cidr blocks to allow all egress traffic" + type = list(any) + default = [] +} +variable "egress_custom_allowed" { + description = "cidr custom blocks to allow all egress traffic" + type = list(any) + default = [] +} + +variable "logging_stage" { + type = string + description = "Stage (e.g. `prod`, `dev`, `staging`)" +} + +# EFS CSI driver variables +variable "enable_aws_efs_csi_driver_addon" { + description = "Whether or not to install the ebs driver addon in the cluster" + type = bool + default = true +} + +variable "aws_efs_csi_driver_version" { + description = "Version of the efs csi driver container to install" + type = string +} + +variable "aws_efs_csi_driver_resolve_conflicts" { + description = "Whether or not to force overwrite of the config. Options: NONE, OVERWRITE" + type = string + default = "NONE" +} + +variable "efs_csi_driver_role_arn" { + description = "Role for efs csi driver needed by the service accounts to perform ondemand volume resizing" + type = string +} + +# FSx CSI driver variables +variable "enable_aws_fsx_csi_driver_addon" { + description = "Whether or not to install the fsx driver addon in the cluster" + type = bool + default = false +} + +variable "aws_fsx_csi_driver_version" { + description = "Version of the fsx csi driver container to install" + type = string + default = "" +} + +variable "aws_fsx_csi_driver_resolve_conflicts" { + description = "Whether or not to force overwrite of the config. Options: NONE, OVERWRITE" + type = string + default = "NONE" +} + +variable "encryption" { + description = "toggle for enabling encryption" + type = bool + default = false +} diff --git a/versions.tf b/versions.tf index 95fb1ef19e..440e885e23 100644 --- a/versions.tf +++ b/versions.tf @@ -2,7 +2,7 @@ terraform { required_version = ">= 0.12.9" required_providers { - aws = ">= 2.44.0" + aws = ">= 5.13.0" local = ">= 1.2" null = ">= 2.1" template = ">= 2.1" diff --git a/workers.tf b/workers.tf index 839452ed43..dcbb5dc4f2 100644 --- a/workers.tf +++ b/workers.tf @@ -1,223 +1,4 @@ -# Worker Groups using Launch Configurations -resource "aws_autoscaling_group" "workers" { - count = var.create_eks ? local.worker_group_count : 0 - name_prefix = join( - "-", - compact( - [ - aws_eks_cluster.this[0].name, - lookup(var.worker_groups[count.index], "name", count.index), - lookup(var.worker_groups[count.index], "asg_recreate_on_change", local.workers_group_defaults["asg_recreate_on_change"]) ? random_pet.workers[count.index].id : "" - ] - ) - ) - desired_capacity = lookup( - var.worker_groups[count.index], - "asg_desired_capacity", - local.workers_group_defaults["asg_desired_capacity"], - ) - max_size = lookup( - var.worker_groups[count.index], - "asg_max_size", - local.workers_group_defaults["asg_max_size"], - ) - min_size = lookup( - var.worker_groups[count.index], - "asg_min_size", - local.workers_group_defaults["asg_min_size"], - ) - force_delete = lookup( - var.worker_groups[count.index], - "asg_force_delete", - local.workers_group_defaults["asg_force_delete"], - ) - target_group_arns = lookup( - var.worker_groups[count.index], - "target_group_arns", - local.workers_group_defaults["target_group_arns"] - ) - service_linked_role_arn = lookup( - var.worker_groups[count.index], - "service_linked_role_arn", - local.workers_group_defaults["service_linked_role_arn"], - ) - launch_configuration = aws_launch_configuration.workers.*.id[count.index] - vpc_zone_identifier = lookup( - var.worker_groups[count.index], - "subnets", - local.workers_group_defaults["subnets"] - ) - protect_from_scale_in = lookup( - var.worker_groups[count.index], - "protect_from_scale_in", - local.workers_group_defaults["protect_from_scale_in"], - ) - suspended_processes = lookup( - var.worker_groups[count.index], - "suspended_processes", - local.workers_group_defaults["suspended_processes"] - ) - enabled_metrics = lookup( - var.worker_groups[count.index], - "enabled_metrics", - local.workers_group_defaults["enabled_metrics"] - ) - placement_group = lookup( - var.worker_groups[count.index], - "placement_group", - local.workers_group_defaults["placement_group"], - ) - termination_policies = lookup( - var.worker_groups[count.index], - "termination_policies", - local.workers_group_defaults["termination_policies"] - ) - - dynamic "initial_lifecycle_hook" { - for_each = var.worker_create_initial_lifecycle_hooks ? lookup(var.worker_groups[count.index], "asg_initial_lifecycle_hooks", local.workers_group_defaults["asg_initial_lifecycle_hooks"]) : [] - content { - name = initial_lifecycle_hook.value["name"] - lifecycle_transition = initial_lifecycle_hook.value["lifecycle_transition"] - notification_metadata = lookup(initial_lifecycle_hook.value, "notification_metadata", null) - heartbeat_timeout = lookup(initial_lifecycle_hook.value, "heartbeat_timeout", null) - notification_target_arn = lookup(initial_lifecycle_hook.value, "notification_target_arn", null) - role_arn = lookup(initial_lifecycle_hook.value, "role_arn", null) - default_result = lookup(initial_lifecycle_hook.value, "default_result", null) - } - } - - tags = concat( - [ - { - "key" = "Name" - "value" = "${aws_eks_cluster.this[0].name}-${lookup(var.worker_groups[count.index], "name", count.index)}-eks_asg" - "propagate_at_launch" = true - }, - { - "key" = "kubernetes.io/cluster/${aws_eks_cluster.this[0].name}" - "value" = "owned" - "propagate_at_launch" = true - }, - { - "key" = "k8s.io/cluster/${aws_eks_cluster.this[0].name}" - "value" = "owned" - "propagate_at_launch" = true - }, - ], - local.asg_tags, - lookup( - var.worker_groups[count.index], - "tags", - local.workers_group_defaults["tags"] - ) - ) - - lifecycle { - create_before_destroy = true - ignore_changes = [desired_capacity] - } -} - -resource "aws_launch_configuration" "workers" { - count = var.create_eks ? local.worker_group_count : 0 - name_prefix = "${aws_eks_cluster.this[0].name}-${lookup(var.worker_groups[count.index], "name", count.index)}" - associate_public_ip_address = lookup( - var.worker_groups[count.index], - "public_ip", - local.workers_group_defaults["public_ip"], - ) - security_groups = flatten([ - local.worker_security_group_id, - var.worker_additional_security_group_ids, - lookup( - var.worker_groups[count.index], - "additional_security_group_ids", - local.workers_group_defaults["additional_security_group_ids"] - ) - ]) - iam_instance_profile = coalescelist( - aws_iam_instance_profile.workers.*.id, - data.aws_iam_instance_profile.custom_worker_group_iam_instance_profile.*.name, - )[count.index] - image_id = lookup( - var.worker_groups[count.index], - "ami_id", - lookup(var.worker_groups[count.index], "platform", local.workers_group_defaults["platform"]) == "windows" ? local.default_ami_id_windows : local.default_ami_id_linux, - ) - instance_type = lookup( - var.worker_groups[count.index], - "instance_type", - local.workers_group_defaults["instance_type"], - ) - key_name = lookup( - var.worker_groups[count.index], - "key_name", - local.workers_group_defaults["key_name"], - ) - user_data_base64 = base64encode(data.template_file.userdata.*.rendered[count.index]) - ebs_optimized = lookup( - var.worker_groups[count.index], - "ebs_optimized", - ! contains( - local.ebs_optimized_not_supported, - lookup( - var.worker_groups[count.index], - "instance_type", - local.workers_group_defaults["instance_type"] - ) - ) - ) - enable_monitoring = lookup( - var.worker_groups[count.index], - "enable_monitoring", - local.workers_group_defaults["enable_monitoring"], - ) - spot_price = lookup( - var.worker_groups[count.index], - "spot_price", - local.workers_group_defaults["spot_price"], - ) - placement_tenancy = lookup( - var.worker_groups[count.index], - "placement_tenancy", - local.workers_group_defaults["placement_tenancy"], - ) - - root_block_device { - volume_size = lookup( - var.worker_groups[count.index], - "root_volume_size", - local.workers_group_defaults["root_volume_size"], - ) - volume_type = lookup( - var.worker_groups[count.index], - "root_volume_type", - local.workers_group_defaults["root_volume_type"], - ) - iops = lookup( - var.worker_groups[count.index], - "root_iops", - local.workers_group_defaults["root_iops"], - ) - delete_on_termination = true - } - - lifecycle { - create_before_destroy = true - } -} - -resource "random_pet" "workers" { - count = var.create_eks ? local.worker_group_count : 0 - - separator = "-" - length = 2 - - keepers = { - lc_name = aws_launch_configuration.workers[count.index].name - } -} resource "aws_security_group" "workers" { count = var.worker_security_group_id == "" && var.create_eks ? 1 : 0 @@ -229,12 +10,13 @@ resource "aws_security_group" "workers" { { "Name" = "${aws_eks_cluster.this[0].name}-eks_worker_sg" "kubernetes.io/cluster/${aws_eks_cluster.this[0].name}" = "owned" + "karpenter.sh/discovery" = aws_eks_cluster.this[0].name }, ) } -resource "aws_security_group_rule" "workers_egress_internet" { - count = var.worker_security_group_id == "" && var.create_eks ? 1 : 0 +resource "aws_security_group_rule" "workers_egress_whole_internet" { + count = var.worker_security_group_id == "" && var.create_eks && var.allow_all_egress ? 1 : 0 description = "Allow nodes all egress to the Internet." protocol = "-1" security_group_id = local.worker_security_group_id @@ -244,6 +26,39 @@ resource "aws_security_group_rule" "workers_egress_internet" { type = "egress" } +resource "aws_security_group_rule" "workers_egress_cidr_blocks_internet" { + count = var.worker_security_group_id == "" && var.create_eks && !var.allow_all_egress ? 1 : 0 + description = "Allow nodes all egress to these cidr blocks." + protocol = "-1" + security_group_id = local.worker_security_group_id + cidr_blocks = var.egress_cidr_blocks_allowed + from_port = 0 + to_port = 0 + type = "egress" +} + +resource "aws_security_group_rule" "workers_egress_internet_ports" { + count = var.worker_security_group_id == "" && var.create_eks && !var.allow_all_egress ? length(var.egress_ports_allowed) : 0 + description = "Allow nodes all egress to the Internet on these ports." + protocol = "tcp" + security_group_id = local.worker_security_group_id + cidr_blocks = ["0.0.0.0/0"] + from_port = var.egress_ports_allowed[count.index] + to_port = var.egress_ports_allowed[count.index] + type = "egress" +} + +resource "aws_security_group_rule" "workers_egress_custom_rules" { + count = var.worker_security_group_id == "" && var.create_eks && !var.allow_all_egress ? length(var.egress_custom_allowed) : 0 + description = "Allow nodes all egress to these custom blocks and ports." + protocol = "tcp" + security_group_id = local.worker_security_group_id + cidr_blocks = var.egress_custom_allowed[count.index].cidr_blocks + from_port = var.egress_custom_allowed[count.index].from_port + to_port = var.egress_custom_allowed[count.index].to_port + type = "egress" +} + resource "aws_security_group_rule" "workers_ingress_self" { count = var.worker_security_group_id == "" && var.create_eks ? 1 : 0 description = "Allow node to communicate with each other." @@ -299,18 +114,6 @@ resource "aws_iam_role" "workers" { tags = var.tags } -resource "aws_iam_instance_profile" "workers" { - count = var.manage_worker_iam_resources && var.create_eks ? local.worker_group_count : 0 - name_prefix = aws_eks_cluster.this[0].name - role = lookup( - var.worker_groups[count.index], - "iam_role_id", - local.default_iam_role_id, - ) - - path = var.iam_path -} - resource "aws_iam_role_policy_attachment" "workers_AmazonEKSWorkerNodePolicy" { count = var.manage_worker_iam_resources && var.create_eks ? 1 : 0 policy_arn = "arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy" diff --git a/workers_launch_template.tf b/workers_launch_template.tf deleted file mode 100644 index ba51baaf9d..0000000000 --- a/workers_launch_template.tf +++ /dev/null @@ -1,389 +0,0 @@ -# Worker Groups using Launch Templates - -resource "aws_autoscaling_group" "workers_launch_template" { - count = var.create_eks ? local.worker_group_launch_template_count : 0 - name_prefix = join( - "-", - compact( - [ - aws_eks_cluster.this[0].name, - lookup(var.worker_groups_launch_template[count.index], "name", count.index), - lookup(var.worker_groups_launch_template[count.index], "asg_recreate_on_change", local.workers_group_defaults["asg_recreate_on_change"]) ? random_pet.workers_launch_template[count.index].id : "" - ] - ) - ) - desired_capacity = lookup( - var.worker_groups_launch_template[count.index], - "asg_desired_capacity", - local.workers_group_defaults["asg_desired_capacity"], - ) - max_size = lookup( - var.worker_groups_launch_template[count.index], - "asg_max_size", - local.workers_group_defaults["asg_max_size"], - ) - min_size = lookup( - var.worker_groups_launch_template[count.index], - "asg_min_size", - local.workers_group_defaults["asg_min_size"], - ) - force_delete = lookup( - var.worker_groups_launch_template[count.index], - "asg_force_delete", - local.workers_group_defaults["asg_force_delete"], - ) - target_group_arns = lookup( - var.worker_groups_launch_template[count.index], - "target_group_arns", - local.workers_group_defaults["target_group_arns"] - ) - service_linked_role_arn = lookup( - var.worker_groups_launch_template[count.index], - "service_linked_role_arn", - local.workers_group_defaults["service_linked_role_arn"], - ) - vpc_zone_identifier = lookup( - var.worker_groups_launch_template[count.index], - "subnets", - local.workers_group_defaults["subnets"] - ) - protect_from_scale_in = lookup( - var.worker_groups_launch_template[count.index], - "protect_from_scale_in", - local.workers_group_defaults["protect_from_scale_in"], - ) - suspended_processes = lookup( - var.worker_groups_launch_template[count.index], - "suspended_processes", - local.workers_group_defaults["suspended_processes"] - ) - enabled_metrics = lookup( - var.worker_groups_launch_template[count.index], - "enabled_metrics", - local.workers_group_defaults["enabled_metrics"] - ) - placement_group = lookup( - var.worker_groups_launch_template[count.index], - "placement_group", - local.workers_group_defaults["placement_group"], - ) - termination_policies = lookup( - var.worker_groups_launch_template[count.index], - "termination_policies", - local.workers_group_defaults["termination_policies"] - ) - - dynamic mixed_instances_policy { - iterator = item - for_each = (lookup(var.worker_groups_launch_template[count.index], "override_instance_types", null) != null) || (lookup(var.worker_groups_launch_template[count.index], "on_demand_allocation_strategy", null) != null) ? list(var.worker_groups_launch_template[count.index]) : [] - - content { - instances_distribution { - on_demand_allocation_strategy = lookup( - item.value, - "on_demand_allocation_strategy", - "prioritized", - ) - on_demand_base_capacity = lookup( - item.value, - "on_demand_base_capacity", - local.workers_group_defaults["on_demand_base_capacity"], - ) - on_demand_percentage_above_base_capacity = lookup( - item.value, - "on_demand_percentage_above_base_capacity", - local.workers_group_defaults["on_demand_percentage_above_base_capacity"], - ) - spot_allocation_strategy = lookup( - item.value, - "spot_allocation_strategy", - local.workers_group_defaults["spot_allocation_strategy"], - ) - spot_instance_pools = lookup( - item.value, - "spot_instance_pools", - local.workers_group_defaults["spot_instance_pools"], - ) - spot_max_price = lookup( - item.value, - "spot_max_price", - local.workers_group_defaults["spot_max_price"], - ) - } - - launch_template { - launch_template_specification { - launch_template_id = aws_launch_template.workers_launch_template.*.id[count.index] - version = lookup( - var.worker_groups_launch_template[count.index], - "launch_template_version", - local.workers_group_defaults["launch_template_version"], - ) - } - - dynamic "override" { - for_each = lookup( - var.worker_groups_launch_template[count.index], - "override_instance_types", - local.workers_group_defaults["override_instance_types"] - ) - - content { - instance_type = override.value - } - } - - } - } - } - dynamic launch_template { - iterator = item - for_each = (lookup(var.worker_groups_launch_template[count.index], "override_instance_types", null) != null) || (lookup(var.worker_groups_launch_template[count.index], "on_demand_allocation_strategy", null) != null) ? [] : list(var.worker_groups_launch_template[count.index]) - - content { - id = aws_launch_template.workers_launch_template.*.id[count.index] - version = lookup( - var.worker_groups_launch_template[count.index], - "launch_template_version", - local.workers_group_defaults["launch_template_version"], - ) - } - } - - dynamic "initial_lifecycle_hook" { - for_each = var.worker_create_initial_lifecycle_hooks ? lookup(var.worker_groups_launch_template[count.index], "asg_initial_lifecycle_hooks", local.workers_group_defaults["asg_initial_lifecycle_hooks"]) : [] - content { - name = initial_lifecycle_hook.value["name"] - lifecycle_transition = initial_lifecycle_hook.value["lifecycle_transition"] - notification_metadata = lookup(initial_lifecycle_hook.value, "notification_metadata", null) - heartbeat_timeout = lookup(initial_lifecycle_hook.value, "heartbeat_timeout", null) - notification_target_arn = lookup(initial_lifecycle_hook.value, "notification_target_arn", null) - role_arn = lookup(initial_lifecycle_hook.value, "role_arn", null) - default_result = lookup(initial_lifecycle_hook.value, "default_result", null) - } - } - - tags = concat( - [ - { - "key" = "Name" - "value" = "${aws_eks_cluster.this[0].name}-${lookup( - var.worker_groups_launch_template[count.index], - "name", - count.index, - )}-eks_asg" - "propagate_at_launch" = true - }, - { - "key" = "kubernetes.io/cluster/${aws_eks_cluster.this[0].name}" - "value" = "owned" - "propagate_at_launch" = true - }, - ], - local.asg_tags, - lookup( - var.worker_groups_launch_template[count.index], - "tags", - local.workers_group_defaults["tags"] - ) - ) - - lifecycle { - create_before_destroy = true - ignore_changes = [desired_capacity] - } -} - -resource "aws_launch_template" "workers_launch_template" { - count = var.create_eks ? (local.worker_group_launch_template_count) : 0 - name_prefix = "${aws_eks_cluster.this[0].name}-${lookup( - var.worker_groups_launch_template[count.index], - "name", - count.index, - )}" - - network_interfaces { - associate_public_ip_address = lookup( - var.worker_groups_launch_template[count.index], - "public_ip", - local.workers_group_defaults["public_ip"], - ) - delete_on_termination = lookup( - var.worker_groups_launch_template[count.index], - "eni_delete", - local.workers_group_defaults["eni_delete"], - ) - security_groups = flatten([ - local.worker_security_group_id, - var.worker_additional_security_group_ids, - lookup( - var.worker_groups_launch_template[count.index], - "additional_security_group_ids", - local.workers_group_defaults["additional_security_group_ids"], - ), - ]) - } - - iam_instance_profile { - name = coalescelist( - aws_iam_instance_profile.workers_launch_template.*.name, - data.aws_iam_instance_profile.custom_worker_group_launch_template_iam_instance_profile.*.name, - )[count.index] - } - - image_id = lookup( - var.worker_groups_launch_template[count.index], - "ami_id", - lookup(var.worker_groups_launch_template[count.index], "platform", local.workers_group_defaults["platform"]) == "windows" ? local.default_ami_id_windows : local.default_ami_id_linux, - ) - instance_type = lookup( - var.worker_groups_launch_template[count.index], - "instance_type", - local.workers_group_defaults["instance_type"], - ) - key_name = lookup( - var.worker_groups_launch_template[count.index], - "key_name", - local.workers_group_defaults["key_name"], - ) - user_data = base64encode( - data.template_file.launch_template_userdata.*.rendered[count.index], - ) - - ebs_optimized = lookup( - var.worker_groups_launch_template[count.index], - "ebs_optimized", - ! contains( - local.ebs_optimized_not_supported, - lookup( - var.worker_groups_launch_template[count.index], - "instance_type", - local.workers_group_defaults["instance_type"], - ) - ) - ) - - credit_specification { - cpu_credits = lookup( - var.worker_groups_launch_template[count.index], - "cpu_credits", - local.workers_group_defaults["cpu_credits"] - ) - } - - monitoring { - enabled = lookup( - var.worker_groups_launch_template[count.index], - "enable_monitoring", - local.workers_group_defaults["enable_monitoring"], - ) - } - - placement { - tenancy = lookup( - var.worker_groups_launch_template[count.index], - "launch_template_placement_tenancy", - local.workers_group_defaults["launch_template_placement_tenancy"], - ) - group_name = lookup( - var.worker_groups_launch_template[count.index], - "launch_template_placement_group", - local.workers_group_defaults["launch_template_placement_group"], - ) - } - - dynamic instance_market_options { - for_each = lookup(var.worker_groups_launch_template[count.index], "market_type", null) == null ? [] : list(lookup(var.worker_groups_launch_template[count.index], "market_type", null)) - content { - market_type = instance_market_options.value - } - } - - block_device_mappings { - device_name = lookup( - var.worker_groups_launch_template[count.index], - "root_block_device_name", - local.workers_group_defaults["root_block_device_name"], - ) - - ebs { - volume_size = lookup( - var.worker_groups_launch_template[count.index], - "root_volume_size", - local.workers_group_defaults["root_volume_size"], - ) - volume_type = lookup( - var.worker_groups_launch_template[count.index], - "root_volume_type", - local.workers_group_defaults["root_volume_type"], - ) - iops = lookup( - var.worker_groups_launch_template[count.index], - "root_iops", - local.workers_group_defaults["root_iops"], - ) - encrypted = lookup( - var.worker_groups_launch_template[count.index], - "root_encrypted", - local.workers_group_defaults["root_encrypted"], - ) - kms_key_id = lookup( - var.worker_groups_launch_template[count.index], - "root_kms_key_id", - local.workers_group_defaults["root_kms_key_id"], - ) - delete_on_termination = true - } - } - - tag_specifications { - resource_type = "volume" - - tags = merge( - { - "Name" = "${aws_eks_cluster.this[0].name}-${lookup( - var.worker_groups_launch_template[count.index], - "name", - count.index, - )}-eks_asg" - }, - var.tags, - ) - } - - tags = var.tags - - lifecycle { - create_before_destroy = true - } -} - -resource "random_pet" "workers_launch_template" { - count = var.create_eks ? local.worker_group_launch_template_count : 0 - - separator = "-" - length = 2 - - keepers = { - lt_name = join( - "-", - compact( - [ - aws_launch_template.workers_launch_template[count.index].name, - aws_launch_template.workers_launch_template[count.index].latest_version - ] - ) - ) - } -} - -resource "aws_iam_instance_profile" "workers_launch_template" { - count = var.manage_worker_iam_resources && var.create_eks ? local.worker_group_launch_template_count : 0 - name_prefix = aws_eks_cluster.this[0].name - role = lookup( - var.worker_groups_launch_template[count.index], - "iam_role_id", - local.default_iam_role_id, - ) - path = var.iam_path -}