Skip to content

Commit 783010b

Browse files
committed
Switched from ELB to NLB
Signed-off-by: Philip Schmid <phisch@cisco.com>
1 parent b45ec59 commit 783010b

File tree

17 files changed

+189
-117
lines changed

17 files changed

+189
-117
lines changed

.github/workflows/conformance-pr.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,7 @@ jobs:
7373
run: |
7474
cd test/conformance
7575
# Wait until the apiserver LB is ready
76-
timeout 120 bash -c "until curl -sS https://$(terraform output -raw elb_dns_name):443 -k -m 3; do sleep 1 && echo 'waiting for apiserver'; done"
76+
timeout 120 bash -c "until curl -sS https://$(terraform output -raw lb_dns_name):443 -k -m 3; do sleep 1 && echo 'waiting for apiserver'; done"
7777
if [ $? -ne 0 ]; then
7878
echo "API Server LB failed to become available."
7979
exit 1

.github/workflows/conformance.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -153,7 +153,7 @@ jobs:
153153
- name: Install Cilium
154154
run: |
155155
cd test/conformance
156-
timeout 120 bash -c "until curl -sS https://$(terraform output -raw elb_dns_name):443 -k -m 3; do sleep 1 && echo 'waiting for apiserver'; done"
156+
timeout 120 bash -c "until curl -sS https://$(terraform output -raw lb_dns_name):443 -k -m 3; do sleep 1 && echo 'waiting for apiserver'; done"
157157
if [ $? -ne 0 ]; then
158158
echo "API Server LB failed to become available."
159159
exit 1

00-locals.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ locals {
3535
},
3636
apiServer = {
3737
certSANs = [
38-
module.elb_k8s_elb.elb_dns_name
38+
aws_lb.api.dns_name
3939
],
4040
extraArgs = {
4141
enable-admission-plugins = var.admission_plugins
@@ -65,7 +65,7 @@ locals {
6565
},
6666
machine = {
6767
certSANs = [
68-
module.elb_k8s_elb.elb_dns_name
68+
aws_lb.api.dns_name
6969
],
7070
kubelet = {
7171
extraArgs = {

00-outputs.tf

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,24 @@ output "path_to_kubeconfig_file" {
88
value = local.path_to_kubeconfig_file
99
}
1010

11+
output "lb_dns_name" {
12+
description = "Public NLB DNS name."
13+
value = aws_lb.api.dns_name
14+
}
15+
1116
output "elb_dns_name" {
12-
description = "Public ELB DNS name."
13-
value = module.elb_k8s_elb.elb_dns_name
17+
description = "[DEPRECATED: Use lb_dns_name instead] Public load balancer DNS name."
18+
value = aws_lb.api.dns_name
19+
}
20+
21+
output "lb_zone_id" {
22+
description = "The zone_id of the NLB for Route53 alias records."
23+
value = aws_lb.api.zone_id
24+
}
25+
26+
output "lb_arn" {
27+
description = "The ARN of the Network Load Balancer."
28+
value = aws_lb.api.arn
1429
}
1530

1631
output "cluster_name" {

00-variables.tf

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -192,9 +192,3 @@ variable "external_cloud_provider_manifest" {
192192
description = "externalCloudProvider manifest to be applied if var.enable_external_cloud_provider is enabled. If you want to deploy it manually (e.g., via Helm chart), enable var.enable_external_cloud_provider but set this value to an empty string (\"\"). See https://kubernetes.io/docs/tasks/administer-cluster/running-cloud-controller/."
193193
type = string
194194
}
195-
196-
variable "use_private_ips_only" {
197-
description = "When true (default), Talos cluster nodes will NOT receive public IPv4 addresses. The Kubernetes/Talos API is still exposed via a public ELB (restricted by security groups via var.external_source_cidrs). When set to false, public IPv4 addresses are allocated and the ELB is internet-facing."
198-
type = bool
199-
default = true
200-
}

01-vpc.tf

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,6 @@ data "aws_subnets" "public" {
2525
}
2626

2727
// Used to list all private subnets in the VPC.
28-
// This is required when `var.use_private_ips_only` is enabled (default).
2928
data "aws_subnets" "private" {
3029
depends_on = [
3130
null_resource.wait_for_subnets,
@@ -45,10 +44,8 @@ data "aws_subnets" "private" {
4544
}
4645

4746
// Used to wait for at least one of the subnets to exist.
48-
// Unfortunately there doesn't seem to be a better way to do this in Terraform.
4947
resource "null_resource" "wait_for_subnets" {
5048
provisioner "local-exec" {
51-
command = "${path.module}/scripts/wait-for-subnets.sh -v ${data.aws_vpc.vpc.id} -r ${data.aws_region.current.name} -t ${var.use_private_ips_only ? "private" : "public"}"
49+
command = "${path.module}/scripts/wait-for-subnets.sh -v ${data.aws_vpc.vpc.id} -r ${data.aws_region.current.name} -t public"
5250
}
5351
}
54-

02-infra.tf

Lines changed: 103 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
# Public-facing security group for the external load balancer
2-
module "elb_sg" {
2+
module "nlb_sg" {
33
source = "terraform-aws-modules/security-group/aws"
44
version = "~> 5.3"
55

6-
name = "${var.cluster_name}-elb"
6+
name = "${var.cluster_name}-nlb"
77
description = "Public-facing LB for Kubernetes and Talos API"
88
vpc_id = var.vpc_id
99
tags = var.tags
@@ -34,66 +34,136 @@ module "cluster_sg" {
3434
version = "~> 5.3"
3535

3636
name = var.cluster_name
37-
description = "Intra-cluster & traffic from ELB"
37+
description = "Intra-cluster & traffic from NLB"
3838
vpc_id = var.vpc_id
3939
tags = var.tags
4040

4141
# Node-to-node communications
4242
ingress_with_self = [{ rule = "all-all" }]
4343

44-
# Allow API traffic coming *from* the ELB
44+
# Allow API traffic coming *from* the NLB (required for the health checks)
4545
ingress_with_source_security_group_id = [
4646
{
4747
from_port = 6443
4848
to_port = 6443
4949
protocol = "tcp"
50-
description = "Kubernetes API from ELB"
51-
source_security_group_id = module.elb_sg.security_group_id
50+
description = "Kubernetes API from NLB"
51+
source_security_group_id = module.nlb_sg.security_group_id
5252
},
5353
{
5454
from_port = 50000
5555
to_port = 50000
5656
protocol = "tcp"
57-
description = "Talos API from ELB"
58-
source_security_group_id = module.elb_sg.security_group_id
57+
description = "Talos API from NLB"
58+
source_security_group_id = module.nlb_sg.security_group_id
59+
}
60+
]
61+
62+
# Allow API traffic directly from external clients (with preserved IPs through NLB)
63+
ingress_cidr_blocks = var.external_source_cidrs
64+
ingress_with_cidr_blocks = [
65+
{
66+
from_port = 6443
67+
to_port = 6443
68+
protocol = "tcp"
69+
description = "Kubernetes API from external clients"
70+
},
71+
{
72+
from_port = 50000
73+
to_port = 50000
74+
protocol = "tcp"
75+
description = "Talos API from external clients"
5976
},
6077
]
6178

6279
egress_with_cidr_blocks = [{ rule = "all-all", cidr_blocks = "0.0.0.0/0" }]
6380
}
6481

65-
module "elb_k8s_elb" {
66-
source = "terraform-aws-modules/elb/aws"
67-
version = "~> 4.0"
82+
# Network Load Balancer for Kubernetes & Talos API
83+
resource "aws_lb" "api" {
84+
name = "${var.cluster_name}-api"
85+
86+
load_balancer_type = "network"
87+
internal = false
6888

69-
name = "${var.cluster_name}-k8s-api"
7089
subnets = data.aws_subnets.public.ids
71-
tags = merge(var.tags, local.cluster_required_tags)
72-
security_groups = [module.elb_sg.security_group_id]
90+
security_groups = [module.nlb_sg.security_group_id]
7391

74-
listener = [
75-
{
76-
lb_port = 443
77-
lb_protocol = "tcp"
78-
instance_port = 6443
79-
instance_protocol = "tcp"
80-
},
81-
{
82-
lb_port = 50000
83-
lb_protocol = "tcp"
84-
instance_port = 50000
85-
instance_protocol = "tcp"
86-
}
87-
]
92+
enable_cross_zone_load_balancing = true
93+
tags = merge(var.tags, local.cluster_required_tags)
94+
}
8895

89-
health_check = {
90-
target = "tcp:50000"
96+
# Target Group: Kubernetes API (6443)
97+
resource "aws_lb_target_group" "k8s" {
98+
name_prefix = "k8s-"
99+
port = 6443
100+
protocol = "TCP"
101+
preserve_client_ip = true
102+
vpc_id = data.aws_vpc.vpc.id
103+
health_check {
104+
healthy_threshold = 3
105+
unhealthy_threshold = 2
106+
timeout = 3
91107
interval = 5
108+
protocol = "TCP"
109+
port = 6443
110+
}
111+
target_type = "instance"
112+
tags = merge(var.tags, local.cluster_required_tags)
113+
}
114+
115+
# Target Group: Talos API (50000)
116+
resource "aws_lb_target_group" "talos" {
117+
name_prefix = "tal-"
118+
port = 50000
119+
protocol = "TCP"
120+
preserve_client_ip = true
121+
vpc_id = data.aws_vpc.vpc.id
122+
health_check {
92123
healthy_threshold = 3
93124
unhealthy_threshold = 2
94-
timeout = 2
125+
timeout = 3
126+
interval = 5
127+
protocol = "TCP"
128+
port = 50000
95129
}
130+
target_type = "instance"
131+
tags = merge(var.tags, local.cluster_required_tags)
132+
}
133+
134+
# Listener 443 -> TG k8s
135+
resource "aws_lb_listener" "https" {
136+
load_balancer_arn = aws_lb.api.arn
137+
port = 443
138+
protocol = "TCP"
139+
default_action {
140+
type = "forward"
141+
target_group_arn = aws_lb_target_group.k8s.arn
142+
}
143+
}
144+
145+
# Listener 50000 -> TG talos
146+
resource "aws_lb_listener" "talos" {
147+
load_balancer_arn = aws_lb.api.arn
148+
port = 50000
149+
protocol = "TCP"
150+
default_action {
151+
type = "forward"
152+
target_group_arn = aws_lb_target_group.talos.arn
153+
}
154+
}
155+
156+
# Attach control-plane instances to both target groups
157+
resource "aws_lb_target_group_attachment" "cp_k8s" {
158+
for_each = { for idx, id in module.talos_control_plane_nodes.*.id : idx => id }
159+
target_group_arn = aws_lb_target_group.k8s.arn
160+
target_id = each.value
161+
port = 6443
162+
}
96163

97-
number_of_instances = var.controlplane_count
98-
instances = module.talos_control_plane_nodes.*.id
164+
resource "aws_lb_target_group_attachment" "cp_talos" {
165+
for_each = { for idx, id in module.talos_control_plane_nodes.*.id : idx => id }
166+
target_group_arn = aws_lb_target_group.talos.arn
167+
target_id = each.value
168+
port = 50000
99169
}

03-talos.tf

Lines changed: 28 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,8 @@ module "talos_control_plane_nodes" {
121121
name = "${var.cluster_name}-control-plane-${count.index}"
122122
ami = data.aws_ami.talos.id
123123
instance_type = var.control_plane.instance_type
124-
subnet_id = var.use_private_ips_only ? element(data.aws_subnets.private.ids, count.index) : element(data.aws_subnets.public.ids, count.index)
125-
associate_public_ip_address = !var.use_private_ips_only
124+
subnet_id = element(data.aws_subnets.public.ids, count.index)
125+
associate_public_ip_address = true
126126
tags = merge(var.tags, local.cluster_required_tags)
127127
metadata_options = var.metadata_options
128128
ignore_ami_changes = true
@@ -151,8 +151,8 @@ module "talos_worker_group" {
151151
name = "${var.cluster_name}-worker-group-${each.value.name}-${trimprefix(each.key, "${each.value.name}.")}"
152152
ami = data.aws_ami.talos.id
153153
instance_type = each.value.instance_type
154-
subnet_id = var.use_private_ips_only ? element(data.aws_subnets.private.ids, tonumber(trimprefix(each.key, "${each.value.name}."))) : element(data.aws_subnets.public.ids, tonumber(trimprefix(each.key, "${each.value.name}.")))
155-
associate_public_ip_address = !var.use_private_ips_only
154+
subnet_id = element(data.aws_subnets.public.ids, tonumber(trimprefix(each.key, "${each.value.name}.")))
155+
associate_public_ip_address = true
156156
tags = merge(each.value.tags, var.tags, local.cluster_required_tags)
157157
metadata_options = var.metadata_options
158158
ignore_ami_changes = true
@@ -178,7 +178,7 @@ resource "talos_machine_secrets" "this" {
178178

179179
data "talos_machine_configuration" "controlplane" {
180180
cluster_name = var.cluster_name
181-
cluster_endpoint = "https://${module.elb_k8s_elb.elb_dns_name}"
181+
cluster_endpoint = "https://${aws_lb.api.dns_name}"
182182
machine_type = "controlplane"
183183
machine_secrets = talos_machine_secrets.this.machine_secrets
184184
kubernetes_version = var.kubernetes_version
@@ -191,11 +191,27 @@ data "talos_machine_configuration" "controlplane" {
191191
)
192192
}
193193

194+
resource "talos_machine_configuration_apply" "controlplane" {
195+
for_each = { for index, instance in module.talos_control_plane_nodes : index => instance }
196+
client_configuration = talos_machine_secrets.this.client_configuration
197+
machine_configuration_input = data.talos_machine_configuration.controlplane.machine_configuration
198+
endpoint = module.talos_control_plane_nodes[each.key].public_ip
199+
node = module.talos_control_plane_nodes[each.key].private_ip
200+
}
201+
202+
resource "talos_machine_bootstrap" "this" {
203+
depends_on = [talos_machine_configuration_apply.controlplane]
204+
205+
client_configuration = talos_machine_secrets.this.client_configuration
206+
endpoint = module.talos_control_plane_nodes.0.public_ip
207+
node = module.talos_control_plane_nodes.0.private_ip
208+
}
209+
194210
data "talos_machine_configuration" "worker_group" {
195211
for_each = merge([for info in var.worker_groups : { for index in range(0, var.workers_count) : "${info.name}.${index}" => info }]...)
196212

197213
cluster_name = var.cluster_name
198-
cluster_endpoint = "https://${module.elb_k8s_elb.elb_dns_name}"
214+
cluster_endpoint = "https://${aws_lb.api.dns_name}"
199215
machine_type = "worker"
200216
machine_secrets = talos_machine_secrets.this.machine_secrets
201217
kubernetes_version = var.kubernetes_version
@@ -208,44 +224,20 @@ data "talos_machine_configuration" "worker_group" {
208224
)
209225
}
210226

211-
resource "talos_machine_configuration_apply" "controlplane" {
212-
for_each = { for index, instance in module.talos_control_plane_nodes : index => instance }
213-
client_configuration = talos_machine_secrets.this.client_configuration
214-
machine_configuration_input = data.talos_machine_configuration.controlplane.machine_configuration
215-
endpoint = module.elb_k8s_elb.elb_dns_name
216-
node = var.use_private_ips_only ? module.talos_control_plane_nodes[each.key].private_ip : module.talos_control_plane_nodes[each.key].public_ip
217-
}
218-
219-
# Wait until Talos APID has rotated its cert & ELB sees the node healthy
220-
resource "time_sleep" "wait_api_ready" {
221-
depends_on = [talos_machine_bootstrap.this]
222-
create_duration = "30s"
223-
}
224-
225227
resource "talos_machine_configuration_apply" "worker_group" {
226-
depends_on = [time_sleep.wait_api_ready]
227-
228228
for_each = merge([for info in var.worker_groups : { for index in range(0, var.workers_count) : "${info.name}.${index}" => info }]...)
229229

230230
client_configuration = talos_machine_secrets.this.client_configuration
231231
machine_configuration_input = data.talos_machine_configuration.worker_group[each.key].machine_configuration
232-
endpoint = module.elb_k8s_elb.elb_dns_name
233-
node = var.use_private_ips_only ? module.talos_worker_group[each.key].private_ip : module.talos_worker_group[each.key].public_ip
234-
}
235-
236-
resource "talos_machine_bootstrap" "this" {
237-
depends_on = [talos_machine_configuration_apply.controlplane]
238-
239-
client_configuration = talos_machine_secrets.this.client_configuration
240-
endpoint = module.elb_k8s_elb.elb_dns_name
241-
node = var.use_private_ips_only ? module.talos_control_plane_nodes.0.private_ip : module.talos_control_plane_nodes.0.public_ip
232+
endpoint = module.talos_worker_group[each.key].public_ip
233+
node = module.talos_worker_group[each.key].private_ip
242234
}
243235

244236
data "talos_client_configuration" "this" {
245237
cluster_name = var.cluster_name
246238
client_configuration = talos_machine_secrets.this.client_configuration
247-
endpoints = [module.elb_k8s_elb.elb_dns_name]
248-
nodes = var.use_private_ips_only ? module.talos_control_plane_nodes.*.private_ip : module.talos_control_plane_nodes.*.public_ip
239+
endpoints = [aws_lb.api.dns_name]
240+
nodes = flatten([module.talos_control_plane_nodes.*.private_ip, flatten([for node in module.talos_worker_group : node.private_ip])])
249241
}
250242

251243
resource "local_file" "talosconfig" {
@@ -257,8 +249,8 @@ resource "talos_cluster_kubeconfig" "this" {
257249
depends_on = [talos_machine_bootstrap.this]
258250

259251
client_configuration = talos_machine_secrets.this.client_configuration
260-
endpoint = module.elb_k8s_elb.elb_dns_name
261-
node = var.use_private_ips_only ? module.talos_control_plane_nodes.0.private_ip : module.talos_control_plane_nodes.0.public_ip
252+
endpoint = module.talos_control_plane_nodes.0.public_ip
253+
node = module.talos_control_plane_nodes.0.private_ip
262254
}
263255

264256
resource "local_file" "kubeconfig" {

0 commit comments

Comments
 (0)