Skip to content

Commit 53a4d2a

Browse files
authored
Merge branch 'main' into kubernetes-cluster
2 parents d126d5e + 68566b5 commit 53a4d2a

File tree

135 files changed

+2373
-368
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

135 files changed

+2373
-368
lines changed

cspell.config.yaml

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,7 +212,8 @@ words: [
212212
"astro",
213213
"shellsession",
214214
"downscaling",
215-
"mongodbatlas"
215+
"mongodbatlas",
216+
"PersistentVolumes",
216217
]
217218
languageSettings:
218219
- languageId: markdown,mdx

flake.lock

Lines changed: 4 additions & 4 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

flake.nix

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
tfUtilsPkgsSrc.url = "github:NixOS/nixpkgs/93dc9803a1ee435e590b02cde9589038d5cc3a4e";
1010
buildkitPkgsSrc.url = "github:NixOS/nixpkgs/226216574ada4c3ecefcbbec41f39ce4655f78ef";
1111
redisPkgsSrc.url = "github:NixOS/nixpkgs/226216574ada4c3ecefcbbec41f39ce4655f78ef";
12-
postgresPkgsSrc.url = "github:NixOS/nixpkgs/daf7bb95821b789db24fc1ac21f613db0c1bf2cb";
12+
postgresPkgsSrc.url = "github:NixOS/nixpkgs/16e046229f3b4f53257973a5532bcbb72457d2f2";
1313
vaultPkgsSrc.url = "github:NixOS/nixpkgs/325eb628b89b9a8183256f62d017bfb499b19bd9";
1414
linkerdPkgsSrc.url = "github:NixOS/nixpkgs/226216574ada4c3ecefcbbec41f39ce4655f78ef";
1515
kyvernoPkgsSrc.url = "github:NixOS/nixpkgs/226216574ada4c3ecefcbbec41f39ce4655f78ef";

packages/infrastructure/aws_eks/main.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -571,7 +571,7 @@ resource "aws_eks_node_group" "controllers" {
571571
# During bootstrapping, we should prevent disruptions as much as possible
572572
# but after Karpenter is running, we should make the EKS nodes spot as
573573
# they can already be disrupted at inconvenient times due to 'force_update_version = true'
574-
capacity_type = var.bootstrap_mode_enabled ? "ON_DEMAND" : "SPOT"
574+
capacity_type = var.bootstrap_mode_enabled || !var.spot_nodes_enabled ? "ON_DEMAND" : "SPOT"
575575

576576
tags = merge(local.instance_tags, {
577577
description = local.controller_nodes_description
@@ -586,7 +586,7 @@ resource "aws_eks_node_group" "controllers" {
586586
value = module.constants.cilium_taint.value
587587
}
588588
dynamic "taint" {
589-
for_each = var.bootstrap_mode_enabled ? toset(["arm64"]) : toset(["burstable", "spot", "arm64"])
589+
for_each = var.bootstrap_mode_enabled ? toset(["arm64"]) : (var.spot_nodes_enabled ? toset(["spot", "burstable", "arm64"]) : toset(["burstable", "arm64"]))
590590
content {
591591
effect = "NO_SCHEDULE"
592592
key = taint.key

packages/infrastructure/aws_eks/vars.tf

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,12 @@ variable "node_ami_name" {
118118
default = "bottlerocket-aws-k8s-1.30-aarch64-v1.28.0-0ab4fab4"
119119
}
120120

121+
variable "spot_nodes_enabled" {
122+
description = "Whether to create spot instances instead of on-demand instances"
123+
type = bool
124+
default = true
125+
}
126+
121127
################################################################################
122128
## Access Control
123129
################################################################################

packages/infrastructure/kube_argo/main.tf

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,8 +55,9 @@ module "util_controller" {
5555
az_spread_required = false // single instance
5656
panfactum_scheduler_enabled = var.panfactum_scheduler_enabled
5757
pull_through_cache_enabled = var.pull_through_cache_enabled
58-
burstable_nodes_enabled = true
59-
controller_nodes_enabled = true
58+
burstable_nodes_enabled = var.burstable_nodes_enabled
59+
controller_nodes_enabled = var.controller_nodes_enabled
60+
spot_nodes_enabled = var.spot_nodes_enabled
6061
extra_labels = data.pf_kube_labels.labels.labels
6162
}
6263

@@ -68,8 +69,9 @@ module "util_server" {
6869
az_spread_preferred = var.sla_target >= 2
6970
panfactum_scheduler_enabled = var.panfactum_scheduler_enabled
7071
pull_through_cache_enabled = var.pull_through_cache_enabled
71-
burstable_nodes_enabled = true
72-
controller_nodes_enabled = true
72+
burstable_nodes_enabled = var.burstable_nodes_enabled
73+
controller_nodes_enabled = var.controller_nodes_enabled
74+
spot_nodes_enabled = var.spot_nodes_enabled
7375
extra_labels = data.pf_kube_labels.labels.labels
7476
}
7577

@@ -82,8 +84,9 @@ module "util_events_controller" {
8284
instance_type_anti_affinity_required = false // single instance
8385
az_spread_preferred = false // single instance
8486
az_spread_required = false // single instance
85-
burstable_nodes_enabled = true
86-
controller_nodes_enabled = true
87+
burstable_nodes_enabled = var.burstable_nodes_enabled
88+
controller_nodes_enabled = var.controller_nodes_enabled
89+
spot_nodes_enabled = var.spot_nodes_enabled
8790
extra_labels = data.pf_kube_labels.labels.labels
8891
}
8992

@@ -95,8 +98,9 @@ module "util_webhook" {
9598
az_spread_preferred = var.sla_target >= 2
9699
panfactum_scheduler_enabled = var.panfactum_scheduler_enabled
97100
pull_through_cache_enabled = var.pull_through_cache_enabled
98-
burstable_nodes_enabled = true
99-
controller_nodes_enabled = true
101+
burstable_nodes_enabled = var.burstable_nodes_enabled
102+
controller_nodes_enabled = var.controller_nodes_enabled
103+
spot_nodes_enabled = var.spot_nodes_enabled
100104
extra_labels = data.pf_kube_labels.labels.labels
101105
}
102106

@@ -258,7 +262,9 @@ module "database" {
258262
pg_smart_shutdown_timeout = 2
259263
aws_iam_ip_allow_list = var.aws_iam_ip_allow_list
260264
pull_through_cache_enabled = var.pull_through_cache_enabled
261-
burstable_nodes_enabled = true
265+
burstable_nodes_enabled = var.burstable_nodes_enabled
266+
spot_nodes_enabled = var.spot_nodes_enabled
267+
controller_nodes_enabled = false // should not run on controller nodes which can cause disruptions
262268
backups_force_delete = true
263269
monitoring_enabled = var.monitoring_enabled
264270
panfactum_scheduler_enabled = var.panfactum_scheduler_enabled
@@ -274,6 +280,7 @@ module "database" {
274280
pgbouncer_minimum_memory_mb = var.pgbouncer_minimum_memory_mb
275281
pgbouncer_maximum_memory_mb = var.pgbouncer_maximum_memory_mb
276282

283+
pg_backup_directory = var.db_backup_directory
277284
pg_recovery_mode_enabled = var.db_recovery_mode_enabled
278285
pg_recovery_directory = var.db_recovery_directory
279286
pg_recovery_target_time = var.db_recovery_target_time

packages/infrastructure/kube_argo/outputs.tf

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,12 @@ output "artifact_bucket_name" {
88
value = module.artifact_bucket.bucket_name
99
}
1010

11-
output "db_recovery_directory" {
11+
output "db_backup_bucket" {
12+
description = "The name of the S3 bucket that contains the PostgreSQL backups and WAL archives"
13+
value = module.database.backup_bucket_name
14+
}
15+
16+
output "db_backup_directory" {
1217
description = "The name of the directory in the backup bucket that contains the PostgreSQL backups and WAL archives"
13-
value = module.database.recovery_directory
18+
value = module.database.backup_directory
1419
}

packages/infrastructure/kube_argo/vars.tf

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -104,6 +104,12 @@ variable "db_recovery_directory" {
104104
default = null
105105
}
106106

107+
variable "db_backup_directory" {
108+
description = "The name of the directory in the backup bucket containing the backups files for the database."
109+
type = string
110+
default = "initial"
111+
}
112+
107113
variable "db_recovery_target_time" {
108114
description = "If provided, will recover the PostgreSQL database to the indicated target time in RFC 3339 format rather than to the latest data."
109115
type = string
@@ -113,11 +119,11 @@ variable "db_recovery_target_time" {
113119
variable "pg_minimum_memory_mb" {
114120
description = "The minimum amount of memory to allocate to the postgres pods (in Mi)"
115121
type = number
116-
default = 400
122+
default = 500
117123

118124
validation {
119-
condition = var.pg_minimum_memory_mb >= 400
120-
error_message = "Must provide at least 400MB of memory"
125+
condition = var.pg_minimum_memory_mb >= 500
126+
error_message = "Must provide at least 500MB of memory"
121127
}
122128
}
123129

@@ -188,4 +194,22 @@ variable "wait" {
188194
description = "Wait for resources to be in a ready state before proceeding. Disabling this flag will allow upgrades to proceed faster but will disable automatic rollbacks. As a result, manual intervention may be required for deployment failures."
189195
type = bool
190196
default = true
197+
}
198+
199+
variable "spot_nodes_enabled" {
200+
description = "Whether to allow pods to schedule on spot nodes"
201+
type = bool
202+
default = true
203+
}
204+
205+
variable "burstable_nodes_enabled" {
206+
description = "Whether to allow pods to schedule on burstable nodes"
207+
type = bool
208+
default = true
209+
}
210+
211+
variable "controller_nodes_enabled" {
212+
description = "Whether to allow pods to schedule on EKS Node Group nodes (controller nodes)"
213+
type = bool
214+
default = true
191215
}

packages/infrastructure/kube_argo_event_source/main.tf

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,8 @@ module "util" {
4545
instance_type_anti_affinity_required = var.replicas > 1 && var.instance_type_anti_affinity_required
4646
az_spread_preferred = var.replicas > 1 && var.az_spread_preferred
4747

48-
burstable_nodes_enabled = true
49-
controller_nodes_enabled = true
48+
burstable_nodes_enabled = var.burstable_nodes_enabled
49+
controller_nodes_enabled = var.controller_nodes_enabled
5050
spot_nodes_enabled = var.spot_nodes_enabled
5151
panfactum_scheduler_enabled = var.panfactum_scheduler_enabled
5252
extra_labels = data.pf_kube_labels.labels.labels

packages/infrastructure/kube_argo_event_source/vars.tf

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,19 @@ variable "panfactum_scheduler_enabled" {
3838
}
3939

4040
variable "spot_nodes_enabled" {
41-
description = "Whether EventSource pods can be run on spot nodes"
41+
description = "Whether to allow pods to schedule on spot nodes"
42+
type = bool
43+
default = true
44+
}
45+
46+
variable "burstable_nodes_enabled" {
47+
description = "Whether to allow pods to schedule on burstable nodes"
48+
type = bool
49+
default = true
50+
}
51+
52+
variable "controller_nodes_enabled" {
53+
description = "Whether to allow pods to schedule on EKS Node Group nodes (controller nodes)"
4254
type = bool
4355
default = true
4456
}

0 commit comments

Comments
 (0)