Skip to content

Commit 87065b4

Browse files
Swap support.
Swap support. This requires an updated helm chart, which isn't released yet, and an updated environmentd version. I tested it by pointing at my local main branch of the materialize repo. Adds a new `swap_enabled` variable, which defaults to false. Also bumps openebs and disables all its features we don't use. The previous version pulled an ancient kubectl image that was no longer available for a `Job`, which blocked its deployment.
1 parent 8b8cd9f commit 87065b4

File tree

10 files changed

+298
-1
lines changed

10 files changed

+298
-1
lines changed

examples/simple/main.tf

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ module "materialize_infrastructure" {
5858
node_group_capacity_type = "ON_DEMAND"
5959
enable_cluster_creator_admin_permissions = true
6060

61+
swap_enabled = var.swap_enabled
62+
6163
# Storage Configuration
6264
bucket_force_destroy = true
6365

examples/simple/variables.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
variable "swap_enabled" {
2+
description = "Enable swap for Materialize. When enabled, this configures swap on a new nodepool, and adds it to the clusterd node selectors."
3+
type = bool
4+
default = false
5+
}

main.tf

Lines changed: 34 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,38 @@ module "eks" {
4747
]
4848
}
4949

50+
module "swap_node_group" {
51+
source = "./modules/eks-node-group"
52+
count = var.swap_enabled ? 1 : 0
53+
54+
cluster_name = module.eks.cluster_name
55+
subnet_ids = local.network_private_subnet_ids
56+
node_group_name = "${local.name_prefix}-mz-swap"
57+
instance_types = var.node_group_instance_types
58+
swap_enabled = true
59+
min_size = var.node_group_min_size
60+
max_size = var.node_group_max_size
61+
desired_size = var.node_group_desired_size
62+
cluster_service_cidr = module.eks.cluster_service_cidr
63+
cluster_primary_security_group_id = module.eks.node_security_group_id
64+
65+
labels = {
66+
"materialize.cloud/swap" = "true"
67+
"workload" = "materialize-instance"
68+
}
69+
70+
tags = merge(
71+
local.common_tags,
72+
{
73+
Swap = "true"
74+
}
75+
)
76+
77+
depends_on = [
78+
module.eks,
79+
]
80+
}
81+
5082
module "aws_lbc" {
5183
source = "./modules/aws-lbc"
5284
count = var.install_aws_load_balancer_controller ? 1 : 0
@@ -136,6 +168,7 @@ module "operator" {
136168

137169
depends_on = [
138170
module.eks,
171+
module.swap_node_group,
139172
module.database,
140173
module.storage,
141174
module.networking,
@@ -199,7 +232,7 @@ locals {
199232
}
200233
operator = {
201234
clusters = {
202-
swap_enabled = false
235+
swap_enabled = var.swap_enabled
203236
}
204237
image = var.orchestratord_version == null ? {} : {
205238
tag = var.orchestratord_version

modules/eks-node-group/README.md

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
## Requirements
2+
3+
| Name | Version |
4+
|------|---------|
5+
| <a name="requirement_terraform"></a> [terraform](#requirement\_terraform) | >= 1.0 |
6+
| <a name="requirement_aws"></a> [aws](#requirement\_aws) | ~> 5.0 |
7+
| <a name="requirement_kubernetes"></a> [kubernetes](#requirement\_kubernetes) | ~> 2.0 |
8+
9+
## Providers
10+
11+
No providers.
12+
13+
## Modules
14+
15+
| Name | Source | Version |
16+
|------|--------|---------|
17+
| <a name="module_node_group"></a> [node\_group](#module\_node\_group) | terraform-aws-modules/eks/aws//modules/eks-managed-node-group | ~> 20.0 |
18+
19+
## Resources
20+
21+
No resources.
22+
23+
## Inputs
24+
25+
| Name | Description | Type | Default | Required |
26+
|------|-------------|------|---------|:--------:|
27+
| <a name="input_ami_type"></a> [ami\_type](#input\_ami\_type) | AMI type for the node group. | `string` | `"BOTTLEROCKET_ARM_64"` | no |
28+
| <a name="input_capacity_type"></a> [capacity\_type](#input\_capacity\_type) | Capacity type for worker nodes (ON\_DEMAND or SPOT). | `string` | `"ON_DEMAND"` | no |
29+
| <a name="input_cluster_name"></a> [cluster\_name](#input\_cluster\_name) | Name of the EKS cluster to attach the node group to. | `string` | n/a | yes |
30+
| <a name="input_cluster_primary_security_group_id"></a> [cluster\_primary\_security\_group\_id](#input\_cluster\_primary\_security\_group\_id) | The ID of the primary security group for the cluster | `string` | n/a | yes |
31+
| <a name="input_cluster_service_cidr"></a> [cluster\_service\_cidr](#input\_cluster\_service\_cidr) | The CIDR block for the cluster service | `string` | n/a | yes |
32+
| <a name="input_desired_size"></a> [desired\_size](#input\_desired\_size) | Desired number of worker nodes. | `number` | `1` | no |
33+
| <a name="input_disk_setup_image"></a> [disk\_setup\_image](#input\_disk\_setup\_image) | Docker image for the disk setup script | `string` | `"docker.io/materialize/ephemeral-storage-setup-image:v0.4.0"` | no |
34+
| <a name="input_iam_role_use_name_prefix"></a> [iam\_role\_use\_name\_prefix](#input\_iam\_role\_use\_name\_prefix) | Use name prefix for IAM roles | `bool` | `true` | no |
35+
| <a name="input_instance_types"></a> [instance\_types](#input\_instance\_types) | Instance types for worker nodes.<br/><br/>Recommended Configuration:<br/>- For other workloads: `r7g`, `r6g` families (ARM-based Graviton, without local disks)<br/>- For materialize instance workloads: `r6gd`, `r7gd` families (ARM-based Graviton, with local NVMe disks)<br/>- Enable disk setup when using instance types with local storage | `list(string)` | n/a | yes |
36+
| <a name="input_labels"></a> [labels](#input\_labels) | Labels to apply to the node group. | `map(string)` | `{}` | no |
37+
| <a name="input_max_size"></a> [max\_size](#input\_max\_size) | Maximum number of worker nodes. | `number` | `4` | no |
38+
| <a name="input_min_size"></a> [min\_size](#input\_min\_size) | Minimum number of worker nodes. | `number` | `1` | no |
39+
| <a name="input_node_group_name"></a> [node\_group\_name](#input\_node\_group\_name) | Name of the node group. | `string` | n/a | yes |
40+
| <a name="input_node_taints"></a> [node\_taints](#input\_node\_taints) | Taints to apply to the node group. | <pre>list(object({<br/> key = string<br/> value = string<br/> effect = string<br/> }))</pre> | `[]` | no |
41+
| <a name="input_subnet_ids"></a> [subnet\_ids](#input\_subnet\_ids) | List of subnet IDs for the node group. | `list(string)` | n/a | yes |
42+
| <a name="input_swap_enabled"></a> [swap\_enabled](#input\_swap\_enabled) | Whether to enable swap on the local NVMe disks. | `bool` | `true` | no |
43+
| <a name="input_tags"></a> [tags](#input\_tags) | Tags to apply to all resources | `map(string)` | `{}` | no |
44+
45+
## Outputs
46+
47+
| Name | Description |
48+
|------|-------------|
49+
| <a name="output_node_group_arn"></a> [node\_group\_arn](#output\_node\_group\_arn) | ARN of the EKS managed node group. |
50+
| <a name="output_node_group_id"></a> [node\_group\_id](#output\_node\_group\_id) | ID of the EKS managed node group. |

modules/eks-node-group/main.tf

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
locals {
2+
node_labels = merge(
3+
var.labels,
4+
var.swap_enabled ? {
5+
"materialize.cloud/swap" = "true"
6+
"materialize.cloud/disk-config-required" = "true"
7+
} : {}
8+
)
9+
10+
swap_bootstrap_args = <<-EOF
11+
[settings.bootstrap-containers.diskstrap]
12+
source = "${var.disk_setup_image}"
13+
mode = "always"
14+
essential = true
15+
user-data = "${base64encode(jsonencode(["swap", "--cloud-provider", "aws", "--bottlerocket-enable-swap"]))}"
16+
17+
[settings.kernel.sysctl]
18+
"vm.swappiness" = "100"
19+
"vm.min_free_kbytes" = "1048576"
20+
"vm.watermark_scale_factor" = "100"
21+
EOF
22+
}
23+
24+
module "node_group" {
25+
source = "terraform-aws-modules/eks/aws//modules/eks-managed-node-group"
26+
version = "~> 20.0"
27+
28+
cluster_name = var.cluster_name
29+
subnet_ids = var.subnet_ids
30+
name = var.node_group_name
31+
desired_size = var.desired_size
32+
min_size = var.min_size
33+
max_size = var.max_size
34+
instance_types = var.instance_types
35+
capacity_type = var.capacity_type
36+
ami_type = var.ami_type
37+
labels = local.node_labels
38+
39+
taints = var.node_taints
40+
41+
# useful to disable this when prefix might be too long and hit following char limit
42+
# expected length of name_prefix to be in the range (1 - 38)
43+
iam_role_use_name_prefix = var.iam_role_use_name_prefix
44+
45+
bootstrap_extra_args = var.swap_enabled ? local.swap_bootstrap_args : ""
46+
47+
cluster_service_cidr = var.cluster_service_cidr
48+
cluster_primary_security_group_id = var.cluster_primary_security_group_id
49+
50+
tags = var.tags
51+
}

modules/eks-node-group/outputs.tf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
output "node_group_arn" {
2+
description = "ARN of the EKS managed node group."
3+
value = module.node_group.node_group_arn
4+
}
5+
6+
output "node_group_id" {
7+
description = "ID of the EKS managed node group."
8+
value = module.node_group.node_group_id
9+
}
Lines changed: 122 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,122 @@
1+
variable "cluster_name" {
2+
description = "Name of the EKS cluster to attach the node group to."
3+
type = string
4+
nullable = false
5+
}
6+
7+
variable "subnet_ids" {
8+
description = "List of subnet IDs for the node group."
9+
type = list(string)
10+
nullable = false
11+
}
12+
13+
variable "node_group_name" {
14+
description = "Name of the node group."
15+
type = string
16+
nullable = false
17+
}
18+
19+
variable "desired_size" {
20+
description = "Desired number of worker nodes."
21+
type = number
22+
default = 1
23+
nullable = false
24+
}
25+
26+
variable "min_size" {
27+
description = "Minimum number of worker nodes."
28+
type = number
29+
default = 1
30+
nullable = false
31+
}
32+
33+
variable "max_size" {
34+
description = "Maximum number of worker nodes."
35+
type = number
36+
default = 4
37+
nullable = false
38+
}
39+
40+
variable "instance_types" {
41+
description = <<EOF
42+
Instance types for worker nodes.
43+
44+
Recommended Configuration:
45+
- For other workloads: `r7g`, `r6g` families (ARM-based Graviton, without local disks)
46+
- For materialize instance workloads: `r6gd`, `r7gd` families (ARM-based Graviton, with local NVMe disks)
47+
- Enable disk setup when using instance types with local storage
48+
EOF
49+
type = list(string)
50+
nullable = false
51+
}
52+
53+
variable "capacity_type" {
54+
description = "Capacity type for worker nodes (ON_DEMAND or SPOT)."
55+
type = string
56+
default = "ON_DEMAND"
57+
validation {
58+
condition = contains(["ON_DEMAND", "SPOT"], var.capacity_type)
59+
error_message = "Capacity type must be either ON_DEMAND or SPOT."
60+
}
61+
}
62+
63+
variable "ami_type" {
64+
description = "AMI type for the node group."
65+
type = string
66+
default = "BOTTLEROCKET_ARM_64"
67+
nullable = false
68+
}
69+
70+
variable "labels" {
71+
description = "Labels to apply to the node group."
72+
type = map(string)
73+
default = {}
74+
}
75+
76+
variable "node_taints" {
77+
description = "Taints to apply to the node group."
78+
type = list(object({
79+
key = string
80+
value = string
81+
effect = string
82+
}))
83+
default = []
84+
}
85+
86+
variable "tags" {
87+
description = "Tags to apply to all resources"
88+
type = map(string)
89+
default = {}
90+
}
91+
92+
variable "swap_enabled" {
93+
description = "Whether to enable swap on the local NVMe disks."
94+
type = bool
95+
default = true
96+
nullable = false
97+
}
98+
99+
variable "disk_setup_image" {
100+
description = "Docker image for the disk setup script"
101+
type = string
102+
default = "docker.io/materialize/ephemeral-storage-setup-image:v0.4.0"
103+
nullable = false
104+
}
105+
106+
variable "cluster_service_cidr" {
107+
description = "The CIDR block for the cluster service"
108+
type = string
109+
nullable = false
110+
}
111+
112+
variable "cluster_primary_security_group_id" {
113+
description = "The ID of the primary security group for the cluster"
114+
type = string
115+
nullable = false
116+
}
117+
118+
variable "iam_role_use_name_prefix" {
119+
description = "Use name prefix for IAM roles"
120+
type = bool
121+
default = true
122+
}

modules/eks-node-group/versions.tf

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
terraform {
2+
required_version = ">= 1.0"
3+
4+
required_providers {
5+
aws = {
6+
source = "hashicorp/aws"
7+
version = "~> 5.0"
8+
}
9+
kubernetes = {
10+
source = "hashicorp/kubernetes"
11+
version = "~> 2.0"
12+
}
13+
}
14+
}

modules/eks/outputs.tf

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,11 @@ output "cluster_name" {
88
value = module.eks.cluster_name
99
}
1010

11+
output "cluster_service_cidr" {
12+
description = "CIDR of kubernetes service IP addresses"
13+
value = module.eks.cluster_service_cidr
14+
}
15+
1116
output "cluster_security_group_id" {
1217
description = "Security group ID attached to the EKS cluster"
1318
value = module.eks.cluster_security_group_id

variables.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,12 @@ variable "enable_disk_support" {
404404
default = true
405405
}
406406

407+
variable "swap_enabled" {
408+
description = "Enable swap for Materialize. When enabled, this configures swap on a new nodepool, and adds it to the clusterd node selectors."
409+
type = bool
410+
default = false
411+
}
412+
407413
variable "disk_support_config" {
408414
description = "Advanced configuration for disk support (only used when enable_disk_support = true)"
409415
type = object({

0 commit comments

Comments
 (0)