Skip to content

Commit ae2f8e5

Browse files
Adding new mixed type of worker group with instance overrides and mixed instances policy (#371)
* Adding new mixed type of worker group with instance overrides and mixed instances policy * moving all count and lifecycle rule parameters to top/bottom * adding custom IAM parts * updating doc with new options * fixes for spot instances
1 parent 2439c25 commit ae2f8e5

13 files changed

+289
-121
lines changed

CHANGELOG.md

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,9 +13,10 @@ project adheres to [Semantic Versioning](http://semver.org/).
1313

1414
- Added support for custom service linked role for Auto Scaling group (by @voanhduy1512)
1515
- Added support for custom IAM roles for cluster and workers (by @erks)
16-
- Add cluster arn to outputs (by @alexsn)
16+
- Added cluster ARN to outputs (by @alexsn)
1717
- Added outputs for `workers_user_data` and `workers_default_ami_id` (by @max-rocket-internet)
1818
- Added doc about spot instances (by @max-rocket-internet)
19+
- Added new worker group option with a mixed instances policy (by @max-rocket-internet)
1920

2021
### Changed
2122

README.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,8 @@ MIT Licensed. See [LICENSE](https://github.com/terraform-aws-modules/terraform-a
148148
| worker\_create\_security\_group | Whether to create a security group for the workers or attach the workers to `worker_security_group_id`. | string | `"true"` | no |
149149
| worker\_group\_count | The number of maps contained within the worker_groups list. | string | `"1"` | no |
150150
| worker\_group\_launch\_template\_count | The number of maps contained within the worker_groups_launch_template list. | string | `"0"` | no |
151+
| worker\_group\_launch\_template\_mixed | A list of maps defining worker group configurations to be defined using AWS Launch Templates. See workers_group_defaults for valid keys. | list | `[ { "name": "default" } ]` | no |
152+
| worker\_group\_launch\_template\_mixed\_count | The number of maps contained within the worker_group_launch_template_mixed list. | string | `"0"` | no |
151153
| worker\_group\_tags | A map defining extra tags to be applied to the worker group ASG. | map | `{ "default": [] }` | no |
152154
| worker\_groups | A list of maps defining worker group configurations to be defined using AWS Launch Configurations. See workers_group_defaults for valid keys. | list | `[ { "name": "default" } ]` | no |
153155
| worker\_groups\_launch\_template | A list of maps defining worker group configurations to be defined using AWS Launch Templates. See workers_group_defaults for valid keys. | list | `[ { "name": "default" } ]` | no |

aws_auth.tf

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,11 @@
11
resource "local_file" "config_map_aws_auth" {
2+
count = "${var.write_aws_auth_config ? 1 : 0}"
23
content = "${data.template_file.config_map_aws_auth.rendered}"
34
filename = "${var.config_output_path}config-map-aws-auth_${var.cluster_name}.yaml"
4-
count = "${var.write_aws_auth_config ? 1 : 0}"
55
}
66

77
resource "null_resource" "update_config_map_aws_auth" {
8+
count = "${var.manage_aws_auth ? 1 : 0}"
89
depends_on = ["aws_eks_cluster.this"]
910

1011
provisioner "local-exec" {
@@ -28,8 +29,6 @@ EOS
2829
config_map_rendered = "${data.template_file.config_map_aws_auth.rendered}"
2930
endpoint = "${aws_eks_cluster.this.endpoint}"
3031
}
31-
32-
count = "${var.manage_aws_auth ? 1 : 0}"
3332
}
3433

3534
data "aws_caller_identity" "current" {}

cluster.tf

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,52 +23,52 @@ resource "aws_eks_cluster" "this" {
2323
}
2424

2525
resource "aws_security_group" "cluster" {
26+
count = "${var.cluster_create_security_group ? 1 : 0}"
2627
name_prefix = "${var.cluster_name}"
2728
description = "EKS cluster security group."
2829
vpc_id = "${var.vpc_id}"
2930
tags = "${merge(var.tags, map("Name", "${var.cluster_name}-eks_cluster_sg"))}"
30-
count = "${var.cluster_create_security_group ? 1 : 0}"
3131
}
3232

3333
resource "aws_security_group_rule" "cluster_egress_internet" {
34+
count = "${var.cluster_create_security_group ? 1 : 0}"
3435
description = "Allow cluster egress access to the Internet."
3536
protocol = "-1"
3637
security_group_id = "${aws_security_group.cluster.id}"
3738
cidr_blocks = ["0.0.0.0/0"]
3839
from_port = 0
3940
to_port = 0
4041
type = "egress"
41-
count = "${var.cluster_create_security_group ? 1 : 0}"
4242
}
4343

4444
resource "aws_security_group_rule" "cluster_https_worker_ingress" {
45+
count = "${var.cluster_create_security_group ? 1 : 0}"
4546
description = "Allow pods to communicate with the EKS cluster API."
4647
protocol = "tcp"
4748
security_group_id = "${aws_security_group.cluster.id}"
4849
source_security_group_id = "${local.worker_security_group_id}"
4950
from_port = 443
5051
to_port = 443
5152
type = "ingress"
52-
count = "${var.cluster_create_security_group ? 1 : 0}"
5353
}
5454

5555
resource "aws_iam_role" "cluster" {
56+
count = "${var.manage_cluster_iam_resources ? 1 : 0}"
5657
name_prefix = "${var.cluster_name}"
5758
assume_role_policy = "${data.aws_iam_policy_document.cluster_assume_role_policy.json}"
5859
permissions_boundary = "${var.permissions_boundary}"
5960
path = "${var.iam_path}"
6061
force_detach_policies = true
61-
count = "${var.manage_cluster_iam_resources ? 1 : 0}"
6262
}
6363

6464
resource "aws_iam_role_policy_attachment" "cluster_AmazonEKSClusterPolicy" {
65+
count = "${var.manage_cluster_iam_resources ? 1 : 0}"
6566
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
6667
role = "${aws_iam_role.cluster.name}"
67-
count = "${var.manage_cluster_iam_resources ? 1 : 0}"
6868
}
6969

7070
resource "aws_iam_role_policy_attachment" "cluster_AmazonEKSServicePolicy" {
71+
count = "${var.manage_cluster_iam_resources ? 1 : 0}"
7172
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSServicePolicy"
7273
role = "${aws_iam_role.cluster.name}"
73-
count = "${var.manage_cluster_iam_resources ? 1 : 0}"
7474
}

data.tf

Lines changed: 27 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -58,22 +58,22 @@ data "template_file" "kubeconfig" {
5858
}
5959

6060
data "template_file" "aws_authenticator_env_variables" {
61+
count = "${length(var.kubeconfig_aws_authenticator_env_variables)}"
62+
6163
template = <<EOF
6264
- name: $${key}
6365
value: $${value}
6466
EOF
6567

66-
count = "${length(var.kubeconfig_aws_authenticator_env_variables)}"
67-
6868
vars {
6969
value = "${element(values(var.kubeconfig_aws_authenticator_env_variables), count.index)}"
7070
key = "${element(keys(var.kubeconfig_aws_authenticator_env_variables), count.index)}"
7171
}
7272
}
7373

7474
data "template_file" "userdata" {
75-
template = "${file("${path.module}/templates/userdata.sh.tpl")}"
7675
count = "${var.worker_group_count}"
76+
template = "${file("${path.module}/templates/userdata.sh.tpl")}"
7777

7878
vars {
7979
cluster_name = "${aws_eks_cluster.this.name}"
@@ -87,8 +87,23 @@ data "template_file" "userdata" {
8787
}
8888

8989
data "template_file" "launch_template_userdata" {
90-
template = "${file("${path.module}/templates/userdata.sh.tpl")}"
9190
count = "${var.worker_group_launch_template_count}"
91+
template = "${file("${path.module}/templates/userdata.sh.tpl")}"
92+
93+
vars {
94+
cluster_name = "${aws_eks_cluster.this.name}"
95+
endpoint = "${aws_eks_cluster.this.endpoint}"
96+
cluster_auth_base64 = "${aws_eks_cluster.this.certificate_authority.0.data}"
97+
pre_userdata = "${lookup(var.worker_groups_launch_template[count.index], "pre_userdata", local.workers_group_defaults["pre_userdata"])}"
98+
additional_userdata = "${lookup(var.worker_groups_launch_template[count.index], "additional_userdata", local.workers_group_defaults["additional_userdata"])}"
99+
bootstrap_extra_args = "${lookup(var.worker_groups_launch_template[count.index], "bootstrap_extra_args", local.workers_group_defaults["bootstrap_extra_args"])}"
100+
kubelet_extra_args = "${lookup(var.worker_groups_launch_template[count.index], "kubelet_extra_args", local.workers_group_defaults["kubelet_extra_args"])}"
101+
}
102+
}
103+
104+
data "template_file" "workers_launch_template_mixed" {
105+
count = "${var.worker_group_launch_template_mixed_count}"
106+
template = "${file("${path.module}/templates/userdata.sh.tpl")}"
92107

93108
vars {
94109
cluster_name = "${aws_eks_cluster.this.name}"
@@ -102,16 +117,21 @@ data "template_file" "launch_template_userdata" {
102117
}
103118

104119
data "aws_iam_role" "custom_cluster_iam_role" {
105-
name = "${var.cluster_iam_role_name}"
106120
count = "${var.manage_cluster_iam_resources ? 0 : 1}"
121+
name = "${var.cluster_iam_role_name}"
107122
}
108123

109124
data "aws_iam_instance_profile" "custom_worker_group_iam_instance_profile" {
110-
name = "${lookup(var.worker_groups[count.index], "iam_instance_profile_name", local.workers_group_defaults["iam_instance_profile_name"])}"
111125
count = "${var.manage_worker_iam_resources ? 0 : var.worker_group_count}"
126+
name = "${lookup(var.worker_groups[count.index], "iam_instance_profile_name", local.workers_group_defaults["iam_instance_profile_name"])}"
112127
}
113128

114129
data "aws_iam_instance_profile" "custom_worker_group_launch_template_iam_instance_profile" {
115-
name = "${lookup(var.worker_groups_launch_template[count.index], "iam_instance_profile_name", local.workers_group_defaults["iam_instance_profile_name"])}"
116130
count = "${var.manage_worker_iam_resources ? 0 : var.worker_group_launch_template_count}"
131+
name = "${lookup(var.worker_groups_launch_template[count.index], "iam_instance_profile_name", local.workers_group_defaults["iam_instance_profile_name"])}"
132+
}
133+
134+
data "aws_iam_instance_profile" "custom_worker_group_launch_template_mixed_iam_instance_profile" {
135+
count = "${var.manage_worker_iam_resources ? 0 : var.worker_group_launch_template_mixed_count}"
136+
name = "${lookup(var.worker_group_launch_template_mixed[count.index], "iam_instance_profile_name", local.workers_group_defaults["iam_instance_profile_name"])}"
117137
}

docs/spot-instances.md

Lines changed: 64 additions & 67 deletions
Original file line numberDiff line numberDiff line change
@@ -1,101 +1,98 @@
11
# Using spot instances
22

3-
Spot instances usually cost around 30-70% less than an on-demand instance. So using them for your EKS workloads can save a lot of money but requires some special considerations as they will be terminated with only 2 minutes warning.
3+
Spot instances usually cost around 30-70% less than an on-demand instance. So using them for your EKS workloads can save a lot of money but requires some special considerations as they could be terminated with only 2 minutes warning.
44

55
You need to install a daemonset to catch the 2 minute warning before termination. This will ensure the node is gracefully drained before termination. You can install the [k8s-spot-termination-handler](https://github.com/kube-aws/kube-spot-termination-notice-handler) for this. There's a [Helm chart](https://github.com/helm/charts/tree/master/stable/k8s-spot-termination-handler):
66

77
```
88
helm install stable/k8s-spot-termination-handler --namespace kube-system
99
```
1010

11-
In the following examples at least 1 worker group that uses on-demand instances is included. This worker group has an added node label that can be used in scheduling. This could be used to schedule any workload but is important for the [cluster-autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) as it might be end up unscheduled when spot instances are terminated. You can add this to the values of the [cluster-autoscaler helm chart](https://github.com/helm/charts/tree/master/stable/cluster-autoscaler):
11+
In the following examples at least 1 worker group that uses on-demand instances is included. This worker group has an added node label that can be used in scheduling. This could be used to schedule any workload not suitable for spot instances but is important for the [cluster-autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) as it might be end up unscheduled when spot instances are terminated. You can add this to the values of the [cluster-autoscaler helm chart](https://github.com/helm/charts/tree/master/stable/cluster-autoscaler):
1212

1313
```yaml
1414
nodeSelector:
15-
spot: "false"
15+
kubernetes.io/lifecycle: spot
1616
```
1717
1818
Notes:
1919
2020
- The `spot_price` is set to the on-demand price so that the spot instances will run as long as they are the cheaper.
2121
- It's best to have a broad range of instance types to ensure there's always some instances to run when prices fluctuate.
22-
- Using an [AWS Spot Fleet](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-fleet-requests.html) is the best option but is not supported by this module yet.
2322
- There is an AWS blog article about this [here](https://aws.amazon.com/blogs/compute/run-your-kubernetes-workloads-on-amazon-ec2-spot-instances-with-amazon-eks/).
2423
- Consider using [k8s-spot-rescheduler](https://github.com/pusher/k8s-spot-rescheduler) to move pods from on-demand to spot instances.
2524

2625
## Using Launch Configuration
2726

28-
Example Terraform worker group configuration that use an ASG with launch configuration:
27+
Example worker group configuration that uses an ASG with launch configuration for each worker group:
2928

3029
```hcl
31-
worker_group_count = 3
32-
33-
worker_groups = [
34-
{
35-
name = "on-demand-1"
36-
instance_type = "m4.xlarge"
37-
asg_max_size = 1
38-
autoscaling_enabled = true
39-
kubelet_extra_args = "--node-labels=spot=false"
40-
suspended_processes = "AZRebalance"
41-
},
42-
{
43-
name = "spot-1"
44-
spot_price = "0.39"
45-
instance_type = "c4.2xlarge"
46-
asg_max_size = 20
47-
autoscaling_enabled = true
48-
kubelet_extra_args = "--node-labels=spot=true"
49-
suspended_processes = "AZRebalance"
50-
},
51-
{
52-
name = "spot-2"
53-
spot_price = "0.40"
54-
instance_type = "m4.2xlarge"
55-
asg_max_size = 20
56-
autoscaling_enabled = true
57-
kubelet_extra_args = "--node-labels=spot=true"
58-
suspended_processes = "AZRebalance"
59-
}
60-
]
30+
worker_group_count = 3
31+
32+
worker_groups = [
33+
{
34+
name = "on-demand-1"
35+
instance_type = "m4.xlarge"
36+
asg_max_size = 1
37+
autoscaling_enabled = true
38+
kubelet_extra_args = "--node-labels=kubernetes.io/lifecycle=normal"
39+
suspended_processes = "AZRebalance"
40+
},
41+
{
42+
name = "spot-1"
43+
spot_price = "0.199"
44+
instance_type = "c4.xlarge"
45+
asg_max_size = 20
46+
autoscaling_enabled = true
47+
kubelet_extra_args = "--node-labels=kubernetes.io/lifecycle=spot"
48+
suspended_processes = "AZRebalance"
49+
},
50+
{
51+
name = "spot-2"
52+
spot_price = "0.20"
53+
instance_type = "m4.xlarge"
54+
asg_max_size = 20
55+
autoscaling_enabled = true
56+
kubelet_extra_args = "--node-labels=kubernetes.io/lifecycle=spot"
57+
suspended_processes = "AZRebalance"
58+
}
59+
]
6160
```
6261

6362
## Using Launch Templates
6463

65-
Launch Template support is a recent addition to both AWS and this module. It might not be as tried and tested.
66-
67-
Example Terraform worker group configuration that use an ASG with a launch template:
64+
Launch Template support is a recent addition to both AWS and this module. It might not be as tried and tested but it's more suitable for spot instances as it allowed multiple instance types in the same worker group:
6865

6966
```hcl
70-
71-
worker_group_count = 1
72-
73-
worker_groups = [
74-
{
75-
name = "on-demand-1"
76-
instance_type = "m4.xlarge"
77-
asg_max_size = 10
78-
autoscaling_enabled = true
79-
kubelet_extra_args = "--node-labels=spot=false"
80-
suspended_processes = "AZRebalance"
81-
}
82-
]
83-
84-
worker_group_launch_template_count = 1
85-
86-
worker_groups_launch_template = [
87-
{
88-
name = "spot-1"
89-
instance_type = "m5.xlarge"
90-
override_instance_type = "m4.xlarge"
91-
spot_instance_pools = 2
92-
on_demand_percentage_above_base_capacity = 0
93-
spot_max_price = "0.384"
94-
asg_max_size = 10
95-
autoscaling_enabled = true
96-
kubelet_extra_args = "--node-labels=spot=true"
97-
}
98-
]
67+
worker_group_count = 1
68+
69+
worker_groups = [
70+
{
71+
name = "on-demand-1"
72+
instance_type = "m4.xlarge"
73+
asg_max_size = 10
74+
autoscaling_enabled = true
75+
kubelet_extra_args = "--node-labels=spot=false"
76+
suspended_processes = "AZRebalance"
77+
}
78+
]
79+
80+
worker_group_launch_template_mixed_count = 1
81+
82+
worker_group_launch_template_mixed = [
83+
{
84+
name = "spot-1"
85+
override_instance_type_1 = "m5.large"
86+
override_instance_type_2 = "c5.large"
87+
override_instance_type_3 = "t3.large"
88+
override_instance_type_4 = "r5.large"
89+
spot_instance_pools = 3
90+
asg_max_size = 5
91+
asg_desired_size = 5
92+
autoscaling_enabled = true
93+
kubelet_extra_args = "--node-labels=kubernetes.io/lifecycle=spot"
94+
}
95+
]
9996
```
10097

10198
## Important issues

kubectl.tf

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
resource "local_file" "kubeconfig" {
2+
count = "${var.write_kubeconfig ? 1 : 0}"
23
content = "${data.template_file.kubeconfig.rendered}"
34
filename = "${var.config_output_path}kubeconfig_${var.cluster_name}"
4-
count = "${var.write_kubeconfig ? 1 : 0}"
55
}

local.tf

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,18 @@ locals {
5353
launch_template_placement_group = "" # The name of the placement group into which to launch the instances, if any.
5454
root_encrypted = "" # Whether the volume should be encrypted or not
5555
eni_delete = true # Delete the ENI on termination (if set to false you will have to manually delete before destroying)
56+
57+
# Settings for launch templates with mixed instances policy
58+
override_instance_type_1 = "m5.large" # Override instance type 1 for mixed instances policy
59+
override_instance_type_2 = "c5.large" # Override instance type 2 for mixed instances policy
60+
override_instance_type_3 = "t3.large" # Override instance type 3 for mixed instances policy
61+
override_instance_type_4 = "r5.large" # Override instance type 4 for mixed instances policy
62+
on_demand_allocation_strategy = "prioritized" # Strategy to use when launching on-demand instances. Valid values: prioritized.
63+
on_demand_base_capacity = "0" # Absolute minimum amount of desired capacity that must be fulfilled by on-demand instances
64+
on_demand_percentage_above_base_capacity = "0" # Percentage split between on-demand and Spot instances above the base on-demand capacity
65+
spot_allocation_strategy = "lowest-price" # The only valid value is lowest-price, which is also the default value. The Auto Scaling group selects the cheapest Spot pools and evenly allocates your Spot capacity across the number of Spot pools that you specify.
66+
spot_instance_pools = 10 # "Number of Spot pools per availability zone to allocate capacity. EC2 Auto Scaling selects the cheapest Spot pools and evenly allocates Spot capacity across the number of Spot pools that you specify."
67+
spot_max_price = "" # Maximum price per unit hour that the user is willing to pay for the Spot instances. Default is the on-demand price
5668
}
5769

5870
workers_group_defaults = "${merge(local.workers_group_defaults_defaults, var.workers_group_defaults)}"

0 commit comments

Comments
 (0)