Skip to content

Commit 5c0c3d8

Browse files
[CI] Refactor GKE cluster creation into a module
This patch refactors GKE cluster creation into a module. This enables reuse later on when we want to create a new cluster in a different region for a HA setup. Reviewers: gburgessiv, dschuff, lnihlen, Keenuts, cmtice Reviewed By: lnihlen Pull Request: #441
1 parent bf4cd44 commit 5c0c3d8

File tree

4 files changed

+142
-91
lines changed

4 files changed

+142
-91
lines changed

premerge/gke_cluster/main.tf

Lines changed: 92 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
resource "google_container_cluster" "llvm_premerge" {
2+
name = var.cluster_name
3+
location = var.region
4+
5+
# We can't create a cluster with no node pool defined, but we want to only use
6+
# separately managed node pools. So we create the smallest possible default
7+
# node pool and immediately delete it.
8+
remove_default_node_pool = true
9+
initial_node_count = 1
10+
11+
# Set the networking mode to VPC Native to enable IP aliasing, which is required
12+
# for adding windows nodes to the cluster.
13+
networking_mode = "VPC_NATIVE"
14+
ip_allocation_policy {}
15+
}
16+
17+
resource "google_container_node_pool" "llvm_premerge_linux_service" {
18+
name = "llvm-premerge-linux-service"
19+
location = var.region
20+
cluster = google_container_cluster.llvm_premerge.name
21+
node_count = 3
22+
23+
node_config {
24+
machine_type = "e2-highcpu-4"
25+
}
26+
}
27+
28+
resource "google_container_node_pool" "llvm_premerge_linux" {
29+
name = "llvm-premerge-linux"
30+
location = var.region
31+
cluster = google_container_cluster.llvm_premerge.name
32+
initial_node_count = 0
33+
34+
autoscaling {
35+
total_min_node_count = 0
36+
total_max_node_count = 8
37+
}
38+
39+
node_config {
40+
machine_type = "n2-standard-64"
41+
taint {
42+
key = "premerge-platform"
43+
value = "linux"
44+
effect = "NO_SCHEDULE"
45+
}
46+
labels = {
47+
"premerge-platform" : "linux"
48+
}
49+
disk_size_gb = 200
50+
# Terraform wants to recreate the node pool everytime whe running
51+
# terraform apply unless we explicitly set this.
52+
# TODO(boomanaiden154): Look into why terraform is doing this so we do
53+
# not need this hack.
54+
resource_labels = {
55+
"goog-gke-node-pool-provisioning-model" = "on-demand"
56+
}
57+
}
58+
}
59+
60+
resource "google_container_node_pool" "llvm_premerge_windows" {
61+
name = "llvm-premerge-windows"
62+
location = var.region
63+
cluster = google_container_cluster.llvm_premerge.name
64+
initial_node_count = 0
65+
66+
autoscaling {
67+
total_min_node_count = 0
68+
total_max_node_count = 16
69+
}
70+
71+
# We do not set a taint for the windows nodes as kubernetes by default sets
72+
# a node.kubernetes.io/os taint for windows nodes.
73+
node_config {
74+
machine_type = "n2-standard-32"
75+
labels = {
76+
"premerge-platform" : "windows"
77+
}
78+
image_type = "WINDOWS_LTSC_CONTAINERD"
79+
# Add a script that runs on the initial boot to disable Windows Defender.
80+
# Windows Defender causes an increase in test times by approximately an
81+
# order of magnitude.
82+
metadata = {
83+
"sysprep-specialize-script-ps1" = "Set-MpPreference -DisableRealtimeMonitoring $true"
84+
# Terraform wants to recreate the node pool everytime whe running
85+
# terraform apply unless we explicitly set this.
86+
# TODO(boomanaiden154): Look into why terraform is doing this so we do
87+
# not need this hack.
88+
"disable-legacy-endpoints" = "true"
89+
}
90+
disk_size_gb = 200
91+
}
92+
}

premerge/gke_cluster/outputs.tf

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
output "endpoint" {
2+
value = google_container_cluster.llvm_premerge.endpoint
3+
}
4+
5+
output "client_certificate" {
6+
value = google_container_cluster.llvm_premerge.master_auth.0.client_certificate
7+
}
8+
9+
output "client_key" {
10+
value = google_container_cluster.llvm_premerge.master_auth.0.client_key
11+
}
12+
13+
output "cluster_ca_certificate" {
14+
value = google_container_cluster.llvm_premerge.master_auth.0.cluster_ca_certificate
15+
}

premerge/gke_cluster/variables.tf

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
variable "cluster_name" {
2+
description = "The name of the cluster"
3+
type = string
4+
}
5+
6+
variable "region" {
7+
description = "The region to run the cluster in"
8+
type = string
9+
}

premerge/main.tf

Lines changed: 26 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -43,106 +43,41 @@ resource "local_file" "terraform_state" {
4343

4444
data "google_client_config" "current" {}
4545

46-
resource "google_container_cluster" "llvm_premerge" {
47-
name = var.cluster_name
48-
location = "us-central1-a"
49-
50-
# We can't create a cluster with no node pool defined, but we want to only use
51-
# separately managed node pools. So we create the smallest possible default
52-
# node pool and immediately delete it.
53-
remove_default_node_pool = true
54-
initial_node_count = 1
55-
56-
# Set the networking mode to VPC Native to enable IP aliasing, which is required
57-
# for adding windows nodes to the cluster.
58-
networking_mode = "VPC_NATIVE"
59-
ip_allocation_policy {}
60-
}
61-
62-
resource "google_container_node_pool" "llvm_premerge_linux_service" {
63-
name = "llvm-premerge-linux-service"
64-
location = "us-central1-a"
65-
cluster = google_container_cluster.llvm_premerge.name
66-
node_count = 3
67-
68-
node_config {
69-
machine_type = "e2-highcpu-4"
70-
}
46+
module "premerge_cluster" {
47+
source = "./gke_cluster"
48+
cluster_name = "llvm-premerge-prototype"
49+
region = "us-central1-a"
7150
}
7251

73-
resource "google_container_node_pool" "llvm_premerge_linux" {
74-
name = "llvm-premerge-linux"
75-
location = "us-central1-a"
76-
cluster = google_container_cluster.llvm_premerge.name
77-
initial_node_count = 0
78-
79-
autoscaling {
80-
total_min_node_count = 0
81-
total_max_node_count = 8
82-
}
83-
84-
node_config {
85-
machine_type = "n2-standard-64"
86-
taint {
87-
key = "premerge-platform"
88-
value = "linux"
89-
effect = "NO_SCHEDULE"
90-
}
91-
labels = {
92-
"premerge-platform" : "linux"
93-
}
94-
disk_size_gb = 200
95-
# Terraform wants to recreate the node pool everytime whe running
96-
# terraform apply unless we explicitly set this.
97-
# TODO(boomanaiden154): Look into why terraform is doing this so we do
98-
# not need this hack.
99-
resource_labels = {
100-
"goog-gke-node-pool-provisioning-model" = "on-demand"
101-
}
102-
}
52+
# TODO(boomanaiden154): Remove these moved blocks once we have finished
53+
# updating everything to use the new module.
54+
moved {
55+
from = google_container_cluster.llvm_premerge
56+
to = module.premerge_cluster.google_container_cluster.llvm_premerge
10357
}
10458

105-
resource "google_container_node_pool" "llvm_premerge_windows" {
106-
name = "llvm-premerge-windows"
107-
location = "us-central1-a"
108-
cluster = google_container_cluster.llvm_premerge.name
109-
initial_node_count = 0
59+
moved {
60+
from = google_container_node_pool.llvm_premerge_linux
61+
to = module.premerge_cluster.google_container_node_pool.llvm_premerge_linux
62+
}
11063

111-
autoscaling {
112-
total_min_node_count = 0
113-
total_max_node_count = 16
114-
}
64+
moved {
65+
from = google_container_node_pool.llvm_premerge_linux_service
66+
to = module.premerge_cluster.google_container_node_pool.llvm_premerge_linux_service
67+
}
11568

116-
# We do not set a taint for the windows nodes as kubernetes by default sets
117-
# a node.kubernetes.io/os taint for windows nodes.
118-
node_config {
119-
machine_type = "n2-standard-32"
120-
labels = {
121-
"premerge-platform" : "windows"
122-
}
123-
image_type = "WINDOWS_LTSC_CONTAINERD"
124-
# Add a script that runs on the initial boot to disable Windows Defender.
125-
# Windows Defender causes an increase in test times by approximately an
126-
# order of magnitude.
127-
metadata = {
128-
"sysprep-specialize-script-ps1" = "Set-MpPreference -DisableRealtimeMonitoring $true"
129-
# Terraform wants to recreate the node pool everytime whe running
130-
# terraform apply unless we explicitly set this.
131-
# TODO(boomanaiden154): Look into why terraform is doing this so we do
132-
# not need this hack.
133-
"disable-legacy-endpoints" = "true"
134-
}
135-
disk_size_gb = 200
136-
}
69+
moved {
70+
from = google_container_node_pool.llvm_premerge_windows
71+
to = module.premerge_cluster.google_container_node_pool.llvm_premerge_windows
13772
}
13873

13974
provider "helm" {
14075
kubernetes {
141-
host = google_container_cluster.llvm_premerge.endpoint
76+
host = module.premerge_cluster.endpoint
14277
token = data.google_client_config.current.access_token
143-
client_certificate = base64decode(google_container_cluster.llvm_premerge.master_auth.0.client_certificate)
144-
client_key = base64decode(google_container_cluster.llvm_premerge.master_auth.0.client_key)
145-
cluster_ca_certificate = base64decode(google_container_cluster.llvm_premerge.master_auth.0.cluster_ca_certificate)
78+
client_certificate = base64decode(module.premerge_cluster.client_certificate)
79+
client_key = base64decode(module.premerge_cluster.client_key)
80+
cluster_ca_certificate = base64decode(module.premerge_cluster.cluster_ca_certificate)
14681
}
14782
}
14883

@@ -163,10 +98,10 @@ data "google_secret_manager_secret_version" "grafana_token" {
16398
}
16499

165100
provider "kubernetes" {
166-
host = "https://${google_container_cluster.llvm_premerge.endpoint}"
101+
host = "https://${module.premerge_cluster.endpoint}"
167102
token = data.google_client_config.current.access_token
168103
cluster_ca_certificate = base64decode(
169-
google_container_cluster.llvm_premerge.master_auth[0].cluster_ca_certificate,
104+
module.premerge_cluster.cluster_ca_certificate
170105
)
171106
}
172107

0 commit comments

Comments
 (0)