Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
34 changes: 32 additions & 2 deletions autogen/main/cluster.tf.tmpl
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ resource "google_container_cluster" "primary" {
for_each = local.confidential_node_config
content {
enabled = confidential_nodes.value.enabled
{% if autopilot_cluster != true %}
confidential_instance_type = lookup(var.node_pools[0], "confidential_instance_type", null)
{% endif %}
}
}

Expand Down Expand Up @@ -594,13 +597,39 @@ resource "google_container_cluster" "primary" {
min_cpu_platform = lookup(var.node_pools[0], "min_cpu_platform", "")
enable_confidential_storage = lookup(var.node_pools[0], "enable_confidential_storage", false)
disk_type = lookup(var.node_pools[0], "disk_type", null)
preemptible = lookup(var.node_pools[0], "preemptible", false)
spot = lookup(var.node_pools[0], "spot", false)
dynamic "gcfs_config" {
for_each = lookup(var.node_pools[0], "enable_gcfs", null) != null ? [var.node_pools[0].enable_gcfs] : []
content {
enabled = gcfs_config.value
}
}

dynamic "guest_accelerator" {
for_each = lookup(var.node_pools[0], "accelerator_count", 0) > 0 ? [1] : []
content {
type = lookup(var.node_pools[0], "accelerator_type", "")
count = lookup(var.node_pools[0], "accelerator_count", 0)
gpu_partition_size = lookup(var.node_pools[0], "gpu_partition_size", null)

dynamic "gpu_driver_installation_config" {
for_each = lookup(var.node_pools[0], "gpu_driver_version", "") != "" ? [1] : []
content {
gpu_driver_version = lookup(var.node_pools[0], "gpu_driver_version", "")
}
}

dynamic "gpu_sharing_config" {
for_each = lookup(var.node_pools[0], "gpu_sharing_strategy", "") != "" ? [1] : []
content {
gpu_sharing_strategy = lookup(var.node_pools[0], "gpu_sharing_strategy", "")
max_shared_clients_per_gpu = lookup(var.node_pools[0], "max_shared_clients_per_gpu", 2)
}
}
}
}

dynamic "gvnic" {
for_each = lookup(var.node_pools[0], "enable_gvnic", false) ? [true] : []
content {
Expand Down Expand Up @@ -1300,9 +1329,10 @@ resource "google_container_node_pool" "windows_pools" {
}

dynamic "confidential_nodes" {
for_each = lookup(each.value, "enable_confidential_nodes", null) != null ? [each.value.enable_confidential_nodes] : []
for_each = lookup(each.value, "enable_confidential_nodes", null) != null ? [{ enabled = each.value.enable_confidential_nodes, confidential_instance_type = lookup(each.value, "confidential_instance_type", null) }] : []
content {
enabled = confidential_nodes.value
enabled = confidential_nodes.enabled
confidential_instance_type = confidential_nodes.confidential_instance_type
}
}

Expand Down
39 changes: 34 additions & 5 deletions cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,8 @@ resource "google_container_cluster" "primary" {
dynamic "confidential_nodes" {
for_each = local.confidential_node_config
content {
enabled = confidential_nodes.value.enabled
enabled = confidential_nodes.value.enabled
confidential_instance_type = lookup(var.node_pools[0], "confidential_instance_type", null)
}
}

Expand Down Expand Up @@ -454,13 +455,39 @@ resource "google_container_cluster" "primary" {
min_cpu_platform = lookup(var.node_pools[0], "min_cpu_platform", "")
enable_confidential_storage = lookup(var.node_pools[0], "enable_confidential_storage", false)
disk_type = lookup(var.node_pools[0], "disk_type", null)
preemptible = lookup(var.node_pools[0], "preemptible", false)
spot = lookup(var.node_pools[0], "spot", false)
dynamic "gcfs_config" {
for_each = lookup(var.node_pools[0], "enable_gcfs", null) != null ? [var.node_pools[0].enable_gcfs] : []
content {
enabled = gcfs_config.value
}
}

dynamic "guest_accelerator" {
for_each = lookup(var.node_pools[0], "accelerator_count", 0) > 0 ? [1] : []
content {
type = lookup(var.node_pools[0], "accelerator_type", "")
count = lookup(var.node_pools[0], "accelerator_count", 0)
gpu_partition_size = lookup(var.node_pools[0], "gpu_partition_size", null)

dynamic "gpu_driver_installation_config" {
for_each = lookup(var.node_pools[0], "gpu_driver_version", "") != "" ? [1] : []
content {
gpu_driver_version = lookup(var.node_pools[0], "gpu_driver_version", "")
}
}

dynamic "gpu_sharing_config" {
for_each = lookup(var.node_pools[0], "gpu_sharing_strategy", "") != "" ? [1] : []
content {
gpu_sharing_strategy = lookup(var.node_pools[0], "gpu_sharing_strategy", "")
max_shared_clients_per_gpu = lookup(var.node_pools[0], "max_shared_clients_per_gpu", 2)
}
}
}
}

dynamic "gvnic" {
for_each = lookup(var.node_pools[0], "enable_gvnic", false) ? [true] : []
content {
Expand Down Expand Up @@ -984,9 +1011,10 @@ resource "google_container_node_pool" "pools" {
}

dynamic "confidential_nodes" {
for_each = lookup(each.value, "enable_confidential_nodes", null) != null ? [each.value.enable_confidential_nodes] : []
for_each = lookup(each.value, "enable_confidential_nodes", null) != null ? [{ enabled = each.value.enable_confidential_nodes, confidential_instance_type = lookup(each.value, "confidential_instance_type", null) }] : []
content {
enabled = confidential_nodes.value
enabled = confidential_nodes.enabled
confidential_instance_type = confidential_nodes.confidential_instance_type
}
}

Expand Down Expand Up @@ -1317,9 +1345,10 @@ resource "google_container_node_pool" "windows_pools" {
}

dynamic "confidential_nodes" {
for_each = lookup(each.value, "enable_confidential_nodes", null) != null ? [each.value.enable_confidential_nodes] : []
for_each = lookup(each.value, "enable_confidential_nodes", null) != null ? [{ enabled = each.value.enable_confidential_nodes, confidential_instance_type = lookup(each.value, "confidential_instance_type", null) }] : []
content {
enabled = confidential_nodes.value
enabled = confidential_nodes.enabled
confidential_instance_type = confidential_nodes.confidential_instance_type
}
}

Expand Down
43 changes: 43 additions & 0 deletions examples/confidential_gpu_public/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
# Confidential GPU GKE Cluster

This example illustrates how to instantiate the Beta Public Cluster module
with confidential nodes enabled, database encrypted with KMS key
and encrypted GPU Workload with NVIDIA Confidential Computing.
This module also installs the NVIDIA drivers on the GPU, so it's
ready to receive workloads.
See more: https://cloud.google.com/kubernetes-engine/docs/how-to/gpus-confidential-nodes.

<!-- BEGINNING OF PRE-COMMIT-TERRAFORM DOCS HOOK -->
## Inputs

| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| project\_id | The project ID to host the cluster in. | `string` | n/a | yes |
| region | The region to host the cluster in. | `string` | `"us-central1"` | no |
| zones | The zones to host the nodes in. The nodes must be in a zone that supports NVIDIA Confidential Computing. For more information, [view supported zones](https://cloud.google.com/confidential-computing/confidential-vm/docs/supported-configurations#nvidia-confidential-computing_1). | `list(string)` | <pre>[<br> "us-central1-a"<br>]</pre> | no |

## Outputs

| Name | Description |
|------|-------------|
| ca\_certificate | The cluster ca certificate (base64 encoded). |
| cluster\_name | Cluster name. |
| keyring | The name of the keyring. |
| kms\_key\_name | KMS Key Name. |
| kubernetes\_endpoint | The cluster endpoint. |
| location | n/a |
| master\_kubernetes\_version | Kubernetes version of the master. |
| network\_name | The name of the VPC being created. |
| project\_id | The project ID the cluster is in. |
| region | The region in which the cluster resides. |
| service\_account | The service account to default running nodes as if not overridden in `node_pools`. |
| subnet\_names | The names of the subnet being created. |
| zones | List of zones in which the cluster resides. |

<!-- END OF PRE-COMMIT-TERRAFORM DOCS HOOK -->

To provision this example, run the following from within this directory:
- `terraform init` to get the plugins
- `terraform plan` to see the infrastructure plan
- `terraform apply` to apply the infrastructure build
- `terraform destroy` to destroy the built infrastructure
41 changes: 41 additions & 0 deletions examples/confidential_gpu_public/kms.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

locals {
key_name = "gke-key-${random_string.suffix.result}"
}

module "kms" {
source = "terraform-google-modules/kms/google"
version = "~> 4.0"
project_id = var.project_id
location = var.region
keyring = "gke-keyring-${random_string.suffix.result}"
keys = [local.key_name]
prevent_destroy = false
}

resource "google_project_service_identity" "container_identity" {
provider = google-beta
project = var.project_id
service = "container.googleapis.com"
}

resource "google_kms_crypto_key_iam_member" "sm_sa_encrypter_decrypter" {
role = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
member = google_project_service_identity.container_identity.member
crypto_key_id = module.kms.keys[local.key_name]
}
110 changes: 110 additions & 0 deletions examples/confidential_gpu_public/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

locals {
cluster_type = "confidential-gpu"
network_name = "confidential-gpu-network-${random_string.suffix.result}"
subnet_name = "confidential-gpu-subnet"
master_auth_subnetwork = "confidential-gpu-master-subnet"
pods_range_name = "ip-range-pods-${random_string.suffix.result}"
svc_range_name = "ip-range-svc-${random_string.suffix.result}"
subnet_names = [for subnet_self_link in module.gcp-network.subnets_self_links : split("/", subnet_self_link)[length(split("/", subnet_self_link)) - 1]]
}

resource "random_string" "suffix" {
length = 4
special = false
upper = false
}

data "google_project" "main" {
project_id = var.project_id
}

resource "google_kms_crypto_key_iam_member" "main" {
crypto_key_id = module.kms.keys[local.key_name]
role = "roles/cloudkms.cryptoKeyEncrypterDecrypter"
member = "serviceAccount:service-${data.google_project.main.number}@compute-system.iam.gserviceaccount.com"
}

data "google_client_config" "default" {}

provider "kubernetes" {
host = "https://${module.gke.endpoint}"
token = data.google_client_config.default.access_token
cluster_ca_certificate = base64decode(module.gke.ca_certificate)
}

module "gke" {
source = "terraform-google-modules/kubernetes-engine/google//modules/beta-public-cluster"
version = "~> 37.0"

project_id = var.project_id
name = "${local.cluster_type}-cluster-${random_string.suffix.result}"
region = var.region
zones = var.zones
network = module.gcp-network.network_name
subnetwork = local.subnet_names[index(module.gcp-network.subnets_names, local.subnet_name)]
ip_range_pods = local.pods_range_name
ip_range_services = local.svc_range_name
create_service_account = false
initial_node_count = 1
remove_default_node_pool = true
disable_legacy_metadata_endpoints = false
deletion_protection = false
service_account = "default"
logging_variant = "MAX_THROUGHPUT"
dns_allow_external_traffic = true

enable_confidential_nodes = true

database_encryption = [
{
"key_name" : module.kms.keys[local.key_name],
"state" : "ENCRYPTED"
}
]

node_pools = [
{
name = "default"
machine_type = "a3-highgpu-1g"
confidential_instance_type = "TDX"
spot = true
disk_type = "hyperdisk-balanced"
boot_disk_kms_key = module.kms.keys[local.key_name]
enable_confidential_storage = true
accelerator_count = 1
accelerator_type = "nvidia-h100-80gb"
gpu_driver_version = "INSTALLATION_DISABLED"
node_locations = join(",", var.zones)
local_ssd_ephemeral_storage_count = 2
},
]
}

module "kubectl" {
source = "terraform-google-modules/gcloud/google//modules/kubectl-wrapper"
version = "~> 3.0"

project_id = var.project_id
cluster_name = module.gke.name
cluster_location = module.gke.location
module_depends_on = [module.gke.endpoint]
kubectl_create_command = "kubectl apply -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/refs/heads/master/nvidia-driver-installer/cos/daemonset-confidential.yaml"
kubectl_destroy_command = "kubectl delete -f https://raw.githubusercontent.com/GoogleCloudPlatform/container-engine-accelerators/refs/heads/master/nvidia-driver-installer/cos/daemonset-confidential.yaml"
skip_download = true
}
52 changes: 52 additions & 0 deletions examples/confidential_gpu_public/network.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
/**
* Copyright 2025 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

module "gcp-network" {
source = "terraform-google-modules/network/google"
version = "~> 11.0"

project_id = var.project_id
network_name = local.network_name
routing_mode = "GLOBAL"

subnets = [
{
subnet_name = local.subnet_name
subnet_ip = "10.0.0.0/17"
subnet_region = var.region
subnet_private_access = true
},
{
subnet_name = local.master_auth_subnetwork
subnet_ip = "10.60.0.0/17"
subnet_region = var.region
subnet_private_access = true
},
]

secondary_ranges = {
(local.subnet_name) = [
{
range_name = local.pods_range_name
ip_cidr_range = "192.168.0.0/18"
},
{
range_name = local.svc_range_name
ip_cidr_range = "192.168.64.0/18"
},
]
}
}
Loading