diff --git a/.changelog/15474.txt b/.changelog/15474.txt new file mode 100644 index 00000000000..c01fbd36b36 --- /dev/null +++ b/.changelog/15474.txt @@ -0,0 +1 @@ +unknown: Add support for GKE Accelerator Network Profile \ No newline at end of file diff --git a/google-beta/services/container/resource_container_cluster.go b/google-beta/services/container/resource_container_cluster.go index 32d5157cb4e..81136dfeea5 100644 --- a/google-beta/services/container/resource_container_cluster.go +++ b/google-beta/services/container/resource_container_cluster.go @@ -248,6 +248,8 @@ func ResourceContainerCluster() *schema.Resource { containerClusterEnableK8sBetaApisCustomizeDiff, containerClusterNodeVersionCustomizeDiff, tpgresource.SetDiffForLabelsWithCustomizedName("resource_labels"), + + clusterAcceleratorNetworkProfileCustomizeDiff, ), Timeouts: &schema.ResourceTimeout{ @@ -8417,3 +8419,156 @@ func containerClusterNodeVersionCustomizeDiffFunc(diff tpgresource.TerraformReso return nil } + +func clusterAcceleratorNetworkProfileCustomizeDiff(_ context.Context, diff *schema.ResourceDiff, meta any) error { + // 1. SKIP ON CREATE + if diff.Id() == "" { + return nil + } + + // 2. PREPARE TO UPDATE THE FULL LIST + oldNodePools := diff.Get("node_pool").([]interface{}) + newNodePools := make([]interface{}, len(oldNodePools)) + listChanged := false + + // We need Raw Config to check what the user actually wrote + rawConfig := diff.GetRawConfig() + rawNodePools := rawConfig.GetAttr("node_pool") + + // 3. ITERATE OVER ALL POOLS IN STATE + for i, np := range oldNodePools { + // Deep copy the node pool map + npMap := np.(map[string]interface{}) + newNpMap := make(map[string]interface{}) + for k, v := range npMap { + newNpMap[k] = v + } + + // Check if this specific node pool is actually defined in the Raw Config (Inline). + // If it is not in Raw Config, it is a Standalone resource (or API generated). + // We must not touch Standalone resources from the Cluster resource. + isInline := false + currentName := npMap["name"].(string) + + // Iterate over Raw Config to find a match by name + if !rawNodePools.IsNull() && rawNodePools.Type().IsCollectionType() { + it := rawNodePools.ElementIterator() + for it.Next() { + _, val := it.Element() + rawNameVal := val.GetAttr("name") + if !rawNameVal.IsNull() && rawNameVal.AsString() == currentName { + isInline = true + break + } + } + } + + // If this is NOT an inline pool, copy it as-is and skip logic. + if !isInline { + newNodePools[i] = newNpMap + continue + } + + // A. DETECT USER CONFIG (Raw Config Check for this specific pool) + userHasAdditionalConfigs := false + + // Re-find the specific raw block for logic checking + if !rawNodePools.IsNull() { + it := rawNodePools.ElementIterator() + for it.Next() { + _, val := it.Element() + rawNameVal := val.GetAttr("name") + if !rawNameVal.IsNull() && rawNameVal.AsString() == currentName { + // We found the matching raw block, now check its network config + rawNc := val.GetAttr("network_config") + if !rawNc.IsNull() && rawNc.Type().IsCollectionType() { + ncIt := rawNc.ElementIterator() + for ncIt.Next() { + _, ncVal := ncIt.Element() + userConfig := ncVal.GetAttr("additional_node_network_configs") + if !userConfig.IsNull() && userConfig.LengthInt() > 0 { + userHasAdditionalConfigs = true + } + } + } + break + } + } + } + + // B. CHECK TRANSITION LOGIC + shouldClear := false + basePath := fmt.Sprintf("node_pool.%d", i) + networkConfigPath := basePath + ".network_config.0" + + oldProfile, newProfile := diff.GetChange(networkConfigPath + ".accelerator_network_profile") + + newProfileStr := "" + if newProfile != nil { + newProfileStr = newProfile.(string) + } + oldProfileStr := "" + if oldProfile != nil { + oldProfileStr = oldProfile.(string) + } + + anpIsActive := newProfileStr != "" + anpIsChanging := oldProfileStr != newProfileStr + + if !userHasAdditionalConfigs { + if anpIsActive && anpIsChanging { + shouldClear = true + } + if !anpIsActive { + shouldClear = true + } + } + + // Check if additional configs currently exist to avoid no-op + currentCount := 0 + if c, ok := diff.Get(networkConfigPath + ".additional_node_network_configs.#").(int); ok { + currentCount = c + } + if shouldClear && currentCount == 0 { + shouldClear = false + } + + // C. APPLY FIX TO THE MAP + if shouldClear { + log.Printf("[DEBUG] Cluster ANP CustomizeDiff: Clearing additional configs for INLINE pool %s", currentName) + + var newConfigMap map[string]interface{} + + if ncList, ok := newNpMap["network_config"].([]interface{}); ok && len(ncList) > 0 { + if existingMap, ok := ncList[0].(map[string]interface{}); ok { + newConfigMap = make(map[string]interface{}) + for k, v := range existingMap { + newConfigMap[k] = v + } + } + } + + if newConfigMap == nil { + newConfigMap = make(map[string]interface{}) + } + + newConfigMap["additional_node_network_configs"] = []interface{}{} + + if !anpIsActive { + newConfigMap["accelerator_network_profile"] = "" + } + + newNpMap["network_config"] = []interface{}{newConfigMap} + listChanged = true + } + + newNodePools[i] = newNpMap + } + + // 4. WRITE THE FULL LIST BACK + if listChanged { + return diff.SetNew("node_pool", newNodePools) + } + + return nil +} diff --git a/google-beta/services/container/resource_container_cluster_meta.yaml b/google-beta/services/container/resource_container_cluster_meta.yaml index 1399adcb9fe..76578c3bfc1 100644 --- a/google-beta/services/container/resource_container_cluster_meta.yaml +++ b/google-beta/services/container/resource_container_cluster_meta.yaml @@ -398,6 +398,8 @@ fields: api_field: 'nodePools.name' - field: 'node_pool.name_prefix' provider_only: true + - field: 'node_pool.network_config.accelerator_network_profile' + api_field: 'nodePools.networkConfig.acceleratorNetworkProfile' - field: 'node_pool.network_config.additional_node_network_configs.network' api_field: 'nodePools.networkConfig.additionalNodeNetworkConfigs.network' - field: 'node_pool.network_config.additional_node_network_configs.subnetwork' diff --git a/google-beta/services/container/resource_container_cluster_test.go b/google-beta/services/container/resource_container_cluster_test.go index 80487adabe9..f52f3896d88 100644 --- a/google-beta/services/container/resource_container_cluster_test.go +++ b/google-beta/services/container/resource_container_cluster_test.go @@ -16517,3 +16517,381 @@ resource "google_container_cluster" "with_kubelet_config" { } `, clusterName, networkName, subnetworkName, npName, npName) } + +func testAccContainerCluster_nodePool_acceleratorNetworkProfile(clusterName string) string { + return fmt.Sprintf(` +resource "google_container_cluster" "primary" { + name = "%s" + location = "us-central1-c" + + datapath_provider = "ADVANCED_DATAPATH" + ip_allocation_policy {} + deletion_protection = false + + node_pool { + name = "anp-pool" + initial_node_count = 0 + + // Flex start + queued_provisioning { + enabled = true + } + autoscaling { + min_node_count = 0 + max_node_count = 1 + } + node_config { + machine_type = "a3-highgpu-8g" + guest_accelerator { + type = "nvidia-h100-80gb" + count = 8 + gpu_driver_installation_config { + gpu_driver_version = "LATEST" + } + } + + // Disable Reservations for Flex Start + reservation_affinity { + consume_reservation_type = "NO_RESERVATION" + } + ephemeral_storage_local_ssd_config { + local_ssd_count = 16 + } + oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] + } + + network_config { + accelerator_network_profile = "auto" + } + } +} +`, clusterName) +} + +func TestAccContainerCluster_nodePool_acceleratorNetworkProfile(t *testing.T) { + t.Parallel() + + clusterName := fmt.Sprintf("tf-test-cluster-%s", acctest.RandString(t, 10)) + resourceName := "google_container_cluster.primary" + + acctest.VcrTest(t, resource.TestCase{ + PreCheck: func() { acctest.AccTestPreCheck(t) }, + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories(t), + CheckDestroy: testAccCheckContainerClusterDestroyProducer(t), + Steps: []resource.TestStep{ + { + Config: testAccContainerCluster_nodePool_acceleratorNetworkProfile(clusterName), + Check: resource.ComposeTestCheckFunc( + resource.TestCheckResourceAttr(resourceName, "name", clusterName), + resource.TestCheckResourceAttr(resourceName, "node_pool.#", "1"), + resource.TestCheckResourceAttr(resourceName, "node_pool.0.name", "anp-pool"), + resource.TestCheckResourceAttr(resourceName, "node_pool.0.node_config.0.machine_type", "a3-highgpu-8g"), + resource.TestCheckResourceAttr(resourceName, "node_pool.0.network_config.0.accelerator_network_profile", "auto"), + resource.TestCheckResourceAttrSet(resourceName, "node_pool.0.network_config.0.additional_node_network_configs.0.network"), + resource.TestCheckResourceAttrSet(resourceName, "node_pool.0.network_config.0.additional_node_network_configs.0.subnetwork"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: []string{"terraform_labels", "deletion_protection"}, + }, + }, + }) +} + +func testAccContainerCluster_nodePool_additionalNodeNetworkConfigs_manual(clusterName string) string { + return fmt.Sprintf(` +resource "google_compute_network" "main_net" { + name = "%[1]s-main-net" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "main_subnet" { + name = "%[1]s-main-subnet" + network = google_compute_network.main_net.name + ip_cidr_range = "10.0.0.0/24" + region = "us-central1" + private_ip_google_access = true +} + +// Secondary Network (Required for Manual Config) +resource "google_compute_network" "add_net" { + name = "%[1]s-add-net" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "add_subnet" { + name = "%[1]s-add-subnet" + network = google_compute_network.add_net.name + ip_cidr_range = "10.1.0.0/24" + region = "us-central1" + private_ip_google_access = true +} + +resource "google_container_cluster" "primary" { + name = "%[1]s" + location = "us-central1-c" + + network = google_compute_network.main_net.name + subnetwork = google_compute_subnetwork.main_subnet.name + + datapath_provider = "ADVANCED_DATAPATH" + ip_allocation_policy { + cluster_ipv4_cidr_block = "/16" + services_ipv4_cidr_block = "/22" + } + + deletion_protection = false + + node_pool { + name = "anp-pool" + initial_node_count = 0 + + // Enabling Flex Start + queued_provisioning { + enabled = true + } + autoscaling { + min_node_count = 0 + max_node_count = 1 + } + + node_config { + machine_type = "a3-highgpu-8g" + oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] + guest_accelerator { + type = "nvidia-h100-80gb" + count = 8 + gpu_driver_installation_config { + gpu_driver_version = "LATEST" + } + } + // Flex Start requirement + reservation_affinity { + consume_reservation_type = "NO_RESERVATION" + } + ephemeral_storage_local_ssd_config { + local_ssd_count = 16 + } + } + + network_config { + additional_node_network_configs { + network = google_compute_network.add_net.name + subnetwork = google_compute_subnetwork.add_subnet.name + } + } + } +} +`, clusterName) +} + +func testAccContainerCluster_nodePool_acceleratorNetworkProfile_basic(clusterName string) string { + return fmt.Sprintf(` +resource "google_compute_network" "main_net" { + name = "%[1]s-main-net" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "main_subnet" { + name = "%[1]s-main-subnet" + network = google_compute_network.main_net.name + ip_cidr_range = "10.0.0.0/24" + region = "us-central1" + private_ip_google_access = true +} + +resource "google_container_cluster" "primary" { + name = "%[1]s" + location = "us-central1-c" + + network = google_compute_network.main_net.name + subnetwork = google_compute_subnetwork.main_subnet.name + + datapath_provider = "ADVANCED_DATAPATH" + ip_allocation_policy { + cluster_ipv4_cidr_block = "/16" + services_ipv4_cidr_block = "/22" + } + + deletion_protection = false + + node_pool { + name = "anp-pool" + initial_node_count = 0 + + // Enabling Flex Start + queued_provisioning { + enabled = true + } + autoscaling { + min_node_count = 0 + max_node_count = 1 + } + + node_config { + machine_type = "a3-highgpu-8g" + oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] + guest_accelerator { + type = "nvidia-h100-80gb" + count = 8 + gpu_driver_installation_config { + gpu_driver_version = "LATEST" + } + } + // Flex Start requirement + reservation_affinity { + consume_reservation_type = "NO_RESERVATION" + } + ephemeral_storage_local_ssd_config { + local_ssd_count = 16 + } + } + + // TEST CHANGE: Removing network_config entirely + } +} +`, clusterName) +} + +func testAccContainerCluster_nodePool_acceleratorNetworkProfile_auto(clusterName string) string { + return fmt.Sprintf(` +resource "google_compute_network" "main_net" { + name = "%[1]s-main-net" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "main_subnet" { + name = "%[1]s-main-subnet" + network = google_compute_network.main_net.name + ip_cidr_range = "10.0.0.0/24" + region = "us-central1" + private_ip_google_access = true +} + +resource "google_container_cluster" "primary" { + name = "%[1]s" + location = "us-central1-c" + + network = google_compute_network.main_net.name + subnetwork = google_compute_subnetwork.main_subnet.name + + datapath_provider = "ADVANCED_DATAPATH" + ip_allocation_policy { + cluster_ipv4_cidr_block = "/16" + services_ipv4_cidr_block = "/22" + } + + deletion_protection = false + + node_pool { + name = "anp-pool" + initial_node_count = 0 + + // Enabling Flex Start + queued_provisioning { enabled = true } + autoscaling { + min_node_count = 0 + max_node_count = 1 + } + + node_config { + machine_type = "a3-highgpu-8g" + oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] + guest_accelerator { + type = "nvidia-h100-80gb" + count = 8 + gpu_driver_installation_config { + gpu_driver_version = "LATEST" + } + } + // Flex Start requirement + reservation_affinity { + consume_reservation_type = "NO_RESERVATION" + } + ephemeral_storage_local_ssd_config { + local_ssd_count = 16 + } + } + + // Setting ANP to AUTO + network_config { + accelerator_network_profile = "auto" + } + } +} +`, clusterName) +} + +func TestAccContainerCluster_nodePool_acceleratorNetworkProfile_Lifecycle(t *testing.T) { + t.Parallel() + + clusterName := fmt.Sprintf("tf-test-cluster-%s", acctest.RandString(t, 10)) + resourceName := "google_container_cluster.primary" + importIgnore := []string{"deletion_protection", "terraform_labels", "initial_node_count"} + + acctest.VcrTest(t, resource.TestCase{ + PreCheck: func() { acctest.AccTestPreCheck(t) }, + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories(t), + CheckDestroy: testAccCheckContainerClusterDestroyProducer(t), + Steps: []resource.TestStep{ + // Step 1: Create with Manual Config (ANP Off) + { + Config: testAccContainerCluster_nodePool_additionalNodeNetworkConfigs_manual(clusterName), + Check: resource.ComposeTestCheckFunc( + resource.TestCheckResourceAttr(resourceName, "name", clusterName), + // Verify manual config exists + resource.TestCheckResourceAttr(resourceName, "node_pool.0.network_config.0.additional_node_network_configs.#", "1"), + resource.TestCheckResourceAttr(resourceName, "node_pool.0.network_config.0.accelerator_network_profile", ""), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: importIgnore, + }, + // Step 2: Remove Manual Config (Expect Replacement) + { + Config: testAccContainerCluster_nodePool_acceleratorNetworkProfile_basic(clusterName), + ConfigPlanChecks: resource.ConfigPlanChecks{ + PreApply: []plancheck.PlanCheck{ + plancheck.ExpectResourceAction(resourceName, plancheck.ResourceActionReplace), + }, + }, + Check: resource.ComposeTestCheckFunc( + // Verify additional configs are gone + resource.TestCheckResourceAttr(resourceName, "node_pool.0.network_config.0.additional_node_network_configs.#", "0"), + resource.TestCheckResourceAttr(resourceName, "node_pool.0.network_config.0.accelerator_network_profile", ""), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: importIgnore, + }, + // Step 3: Enable ANP (Auto) (Expect Replacement Again) + { + Config: testAccContainerCluster_nodePool_acceleratorNetworkProfile_auto(clusterName), + ConfigPlanChecks: resource.ConfigPlanChecks{ + PreApply: []plancheck.PlanCheck{ + plancheck.ExpectResourceAction(resourceName, plancheck.ResourceActionReplace), + }, + }, + Check: resource.ComposeTestCheckFunc( + // Verify Auto is set + resource.TestCheckResourceAttr(resourceName, "node_pool.0.network_config.0.accelerator_network_profile", "auto"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: importIgnore, + }, + }, + }) +} diff --git a/google-beta/services/container/resource_container_node_pool.go b/google-beta/services/container/resource_container_node_pool.go index e00e6a786fb..65a88498346 100644 --- a/google-beta/services/container/resource_container_node_pool.go +++ b/google-beta/services/container/resource_container_node_pool.go @@ -212,6 +212,8 @@ func ResourceContainerNodePool() *schema.Resource { CustomizeDiff: customdiff.All( tpgresource.DefaultProviderProject, resourceNodeConfigEmptyGuestAccelerator, + + nodePoolAcceleratorNetworkProfileCustomizeDiff, ), UseJSONNumber: true, @@ -545,6 +547,13 @@ var schemaNodePool = map[string]*schema.Schema{ Description: `Networking configuration for this NodePool. If specified, it overrides the cluster-level defaults.`, Elem: &schema.Resource{ Schema: map[string]*schema.Schema{ + + "accelerator_network_profile": { + Type: schema.TypeString, + Description: "The accelerator network profile to use for this node pool.", + Optional: true, + ForceNew: true, + }, "create_pod_range": { Type: schema.TypeBool, Optional: true, @@ -575,6 +584,7 @@ var schemaNodePool = map[string]*schema.Schema{ "additional_node_network_configs": { Type: schema.TypeList, Optional: true, + Computed: true, ForceNew: true, Description: `We specify the additional node networks for this node pool using this list. Each node network corresponds to an additional interface`, Elem: &schema.Resource{ @@ -582,12 +592,14 @@ var schemaNodePool = map[string]*schema.Schema{ "network": { Type: schema.TypeString, Optional: true, + Computed: true, ForceNew: true, Description: `Name of the VPC where the additional interface belongs.`, }, "subnetwork": { Type: schema.TypeString, Optional: true, + Computed: true, ForceNew: true, Description: `Name of the subnetwork where the additional interface belongs.`, }, @@ -1486,6 +1498,8 @@ func flattenNodeNetworkConfig(c *container.NodeNetworkConfig, d *schema.Resource "additional_node_network_configs": flattenAdditionalNodeNetworkConfig(c.AdditionalNodeNetworkConfigs), "additional_pod_network_configs": flattenAdditionalPodNetworkConfig(c.AdditionalPodNetworkConfigs), "subnetwork": c.Subnetwork, + + "accelerator_network_profile": c.AcceleratorNetworkProfile, }) } return result @@ -1601,6 +1615,9 @@ func expandNodeNetworkConfig(v interface{}) *container.NodeNetworkConfig { } } + if v, ok := networkNodeConfig["accelerator_network_profile"]; ok { + nnc.AcceleratorNetworkProfile = v.(string) + } return nnc } @@ -2010,3 +2027,85 @@ func retryWhileIncompatibleOperation(timeout time.Duration, lockKey string, f fu return nil }) } + +func nodePoolAcceleratorNetworkProfileCustomizeDiff(_ context.Context, diff *schema.ResourceDiff, meta any) error { + log.Printf("[DEBUG] ANP CustomizeDiff: Running...") + + // 1. SKIP ON CREATE + if diff.Id() == "" { + return nil + } + + // 2. DETECT USER CONFIG + userHasAdditionalConfigs := false + rawConfig := diff.GetRawConfig() + rawNetworkConfig := rawConfig.GetAttr("network_config") + + if !rawNetworkConfig.IsNull() { + if rawNetworkConfig.Type().IsCollectionType() { + it := rawNetworkConfig.ElementIterator() + for it.Next() { + _, val := it.Element() + userConfig := val.GetAttr("additional_node_network_configs") + if !userConfig.IsNull() && userConfig.LengthInt() > 0 { + userHasAdditionalConfigs = true + break + } + } + } + } + + // 3. LOGIC CHECK + shouldClear := false + + oldProfile, newProfile := diff.GetChange("network_config.0.accelerator_network_profile") + anpIsActive := newProfile.(string) != "" + anpIsChanging := oldProfile.(string) != newProfile.(string) + + if !userHasAdditionalConfigs { + if anpIsActive && anpIsChanging { + shouldClear = true + } + if !anpIsActive { + shouldClear = true + } + } + + // Check the OLD state to avoid "Plan Not Empty" on defaults + currentCount := 0 + if c, ok := diff.Get("network_config.0.additional_node_network_configs.#").(int); ok { + currentCount = c + } + + if shouldClear && currentCount == 0 { + shouldClear = false + } + + // 4. EXECUTE THE FIX + if shouldClear { + var newConfigMap map[string]interface{} + existingConfigs := diff.Get("network_config").([]interface{}) + + if len(existingConfigs) > 0 && existingConfigs[0] != nil { + currentMap := existingConfigs[0].(map[string]interface{}) + newConfigMap = make(map[string]interface{}) + for k, v := range currentMap { + newConfigMap[k] = v + } + } else { + newConfigMap = make(map[string]interface{}) + } + + newConfigMap["additional_node_network_configs"] = []interface{}{} + if !anpIsActive { + newConfigMap["accelerator_network_profile"] = "" + } + + err := diff.SetNew("network_config", []interface{}{newConfigMap}) + if err != nil { + return fmt.Errorf("Error updating network_config: %s", err) + } + } + + return nil +} diff --git a/google-beta/services/container/resource_container_node_pool_meta.yaml b/google-beta/services/container/resource_container_node_pool_meta.yaml index 4f609bbe7e2..772e50d2eb0 100644 --- a/google-beta/services/container/resource_container_node_pool_meta.yaml +++ b/google-beta/services/container/resource_container_node_pool_meta.yaml @@ -22,6 +22,9 @@ fields: - api_field: 'name' - field: 'name_prefix' provider_only: true + + - field: 'network_config.accelerator_network_profile' + - api_field: 'networkConfig.acceleratorNetworkProfile' - api_field: 'networkConfig.additionalNodeNetworkConfigs.network' - api_field: 'networkConfig.additionalNodeNetworkConfigs.subnetwork' - field: 'network_config.additional_pod_network_configs.max_pods_per_node' diff --git a/google-beta/services/container/resource_container_node_pool_test.go b/google-beta/services/container/resource_container_node_pool_test.go index 51ac3544f9c..dabcdb74abc 100644 --- a/google-beta/services/container/resource_container_node_pool_test.go +++ b/google-beta/services/container/resource_container_node_pool_test.go @@ -6490,3 +6490,402 @@ resource "google_container_node_pool" "np" { } `, cluster, np, networkName, subnetworkName, storagePoolResourceName, location) } + +func testAccContainerNodePool_acceleratorNetworkProfile(clusterName, npName string) string { + return fmt.Sprintf(` +resource "google_container_cluster" "primary" { + name = "%[1]s" + location = "us-central1-c" + + remove_default_node_pool = true + initial_node_count = 1 + + datapath_provider = "ADVANCED_DATAPATH" + ip_allocation_policy {} + deletion_protection = false +} + +resource "google_container_node_pool" "np" { + name = "%[2]s" + cluster = google_container_cluster.primary.id + location = "us-central1-c" + + initial_node_count = 0 + + // Flex start configuration + queued_provisioning { + enabled = true + } + + autoscaling { + min_node_count = 0 + max_node_count = 1 + } + + node_config { + machine_type = "a3-highgpu-8g" + + guest_accelerator { + type = "nvidia-h100-80gb" + count = 8 + gpu_driver_installation_config { + gpu_driver_version = "LATEST" + } + } + + // Disable Reservations for Flex Start + reservation_affinity { + consume_reservation_type = "NO_RESERVATION" + } + + ephemeral_storage_local_ssd_config { + local_ssd_count = 16 + } + + oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] + } + + network_config { + accelerator_network_profile = "auto" + } +} +`, clusterName, npName) +} + +func TestAccContainerNodePool_acceleratorNetworkProfile(t *testing.T) { + t.Parallel() + + clusterName := fmt.Sprintf("tf-test-cluster-%s", acctest.RandString(t, 10)) + npName := fmt.Sprintf("tf-test-nodepool-%s", acctest.RandString(t, 10)) + + resourceName := "google_container_node_pool.np" + + acctest.VcrTest(t, resource.TestCase{ + PreCheck: func() { acctest.AccTestPreCheck(t) }, + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories(t), + CheckDestroy: testAccCheckContainerNodePoolDestroyProducer(t), + Steps: []resource.TestStep{ + { + Config: testAccContainerNodePool_acceleratorNetworkProfile(clusterName, npName), + Check: resource.ComposeTestCheckFunc( + resource.TestCheckResourceAttr(resourceName, "name", npName), + resource.TestCheckResourceAttr(resourceName, "node_config.0.machine_type", "a3-highgpu-8g"), + resource.TestCheckResourceAttr(resourceName, "network_config.0.accelerator_network_profile", "auto"), + resource.TestCheckResourceAttrSet(resourceName, "network_config.0.additional_node_network_configs.0.network"), + resource.TestCheckResourceAttrSet(resourceName, "network_config.0.additional_node_network_configs.0.subnetwork"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: []string{"initial_node_count", "cluster", "terraform_labels", "deletion_protection"}, + }, + }, + }) +} + +func testAccContainerNodePool_acceleratorNetworkProfile_manual(clusterName, npName string) string { + return fmt.Sprintf(` +resource "google_compute_network" "main_net" { + name = "%[1]s-main-net" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "main_subnet" { + name = "%[1]s-main-subnet" + network = google_compute_network.main_net.name + ip_cidr_range = "10.0.0.0/24" + region = "us-central1" + private_ip_google_access = true +} + +// The Additional Network +resource "google_compute_network" "add_net" { + name = "%[1]s-add-net" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "add_subnet" { + name = "%[1]s-add-subnet" + network = google_compute_network.add_net.name + ip_cidr_range = "10.1.0.0/24" + region = "us-central1" +} + +resource "google_container_cluster" "cluster" { + name = "%[1]s" + location = "us-central1-c" + + network = google_compute_network.main_net.id + subnetwork = google_compute_subnetwork.main_subnet.id + + datapath_provider = "ADVANCED_DATAPATH" + + // Standard VPC-native setup + ip_allocation_policy { + cluster_ipv4_cidr_block = "/16" + services_ipv4_cidr_block = "/22" + } + + deletion_protection = false + initial_node_count = 1 +} + +resource "google_container_node_pool" "np" { + name = "%[2]s" + cluster = google_container_cluster.cluster.id + location = "us-central1-c" + initial_node_count = 0 + + // Enabling Flex Start + queued_provisioning { + enabled = true + } + autoscaling { + min_node_count = 0 + max_node_count = 1 + } + node_config { + machine_type = "a3-highgpu-8g" + oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] + guest_accelerator { + type = "nvidia-h100-80gb" + count = 8 + gpu_driver_installation_config { + gpu_driver_version = "LATEST" + } + } + // Flex Start requirement + reservation_affinity { + consume_reservation_type = "NO_RESERVATION" + } + ephemeral_storage_local_ssd_config { + local_ssd_count = 16 + } + } + + network_config { + additional_node_network_configs { + network = google_compute_network.add_net.name + subnetwork = google_compute_subnetwork.add_subnet.name + } + } +} +`, clusterName, npName) +} + +func testAccContainerNodePool_acceleratorNetworkProfile_basic(clusterName, npName string) string { + return fmt.Sprintf(` +resource "google_compute_network" "main_net" { + name = "%[1]s-main-net" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "main_subnet" { + name = "%[1]s-main-subnet" + network = google_compute_network.main_net.name + ip_cidr_range = "10.0.0.0/24" + region = "us-central1" + private_ip_google_access = true +} + +resource "google_container_cluster" "cluster" { + name = "%[1]s" + location = "us-central1-c" + + network = google_compute_network.main_net.id + subnetwork = google_compute_subnetwork.main_subnet.id + + datapath_provider = "ADVANCED_DATAPATH" + + ip_allocation_policy { + cluster_ipv4_cidr_block = "/16" + services_ipv4_cidr_block = "/22" + } + + deletion_protection = false + initial_node_count = 1 +} + +resource "google_container_node_pool" "np" { + name = "%[2]s" + cluster = google_container_cluster.cluster.id + location = "us-central1-c" + initial_node_count = 0 + + // Enabling Flex Start + queued_provisioning { + enabled = true + } + autoscaling { + min_node_count = 0 + max_node_count = 1 + } + node_config { + machine_type = "a3-highgpu-8g" + oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] + guest_accelerator { + type = "nvidia-h100-80gb" + count = 8 + gpu_driver_installation_config { + gpu_driver_version = "LATEST" + } + } + // Flex Start requirement + reservation_affinity { + consume_reservation_type = "NO_RESERVATION" + } + ephemeral_storage_local_ssd_config { + local_ssd_count = 16 + } + } + + // TEST CHANGE: Removing network_config entirely +} +`, clusterName, npName) +} + +func testAccContainerNodePool_acceleratorNetworkProfile_auto(clusterName, npName string) string { + return fmt.Sprintf(` +resource "google_compute_network" "main_net" { + name = "%[1]s-main-net" + auto_create_subnetworks = false +} + +resource "google_compute_subnetwork" "main_subnet" { + name = "%[1]s-main-subnet" + network = google_compute_network.main_net.name + ip_cidr_range = "10.0.0.0/24" + region = "us-central1" + private_ip_google_access = true +} + +resource "google_container_cluster" "cluster" { + name = "%[1]s" + location = "us-central1-c" + + network = google_compute_network.main_net.id + subnetwork = google_compute_subnetwork.main_subnet.id + + datapath_provider = "ADVANCED_DATAPATH" + + ip_allocation_policy { + cluster_ipv4_cidr_block = "/16" + services_ipv4_cidr_block = "/22" + } + + deletion_protection = false + initial_node_count = 1 +} + +resource "google_container_node_pool" "np" { + name = "%[2]s" + cluster = google_container_cluster.cluster.id + location = "us-central1-c" + initial_node_count = 0 + + // Enabling Flex Start + queued_provisioning { + enabled = true + } + autoscaling { + min_node_count = 0 + max_node_count = 1 + } + node_config { + machine_type = "a3-highgpu-8g" + oauth_scopes = [ "https://www.googleapis.com/auth/cloud-platform" ] + guest_accelerator { + type = "nvidia-h100-80gb" + count = 8 + gpu_driver_installation_config { + gpu_driver_version = "LATEST" + } + } + // Flex Start requirement + reservation_affinity { + consume_reservation_type = "NO_RESERVATION" + } + ephemeral_storage_local_ssd_config { + local_ssd_count = 16 + } + } + + // Setting ANP to AUTO + network_config { + accelerator_network_profile = "auto" + } +} +`, clusterName, npName) +} + +func TestAccContainerNodePool_acceleratorNetworkProfile_Lifecycle(t *testing.T) { + t.Parallel() + + clusterName := fmt.Sprintf("tf-test-cluster-%s", acctest.RandString(t, 10)) + npName := fmt.Sprintf("tf-test-nodepool-%s", acctest.RandString(t, 10)) + resourceName := "google_container_node_pool.np" + importIgnore := []string{"deletion_protection", "cluster", "initial_node_count"} + + acctest.VcrTest(t, resource.TestCase{ + PreCheck: func() { acctest.AccTestPreCheck(t) }, + ProtoV5ProviderFactories: acctest.ProtoV5ProviderFactories(t), + CheckDestroy: testAccCheckContainerNodePoolDestroyProducer(t), + Steps: []resource.TestStep{ + // Step 1: Create with Manual Config (ANP Off) + { + Config: testAccContainerNodePool_acceleratorNetworkProfile_manual(clusterName, npName), + Check: resource.ComposeTestCheckFunc( + resource.TestCheckResourceAttr(resourceName, "name", npName), + resource.TestCheckResourceAttr(resourceName, "node_config.0.machine_type", "a3-highgpu-8g"), + resource.TestCheckResourceAttr(resourceName, "network_config.0.additional_node_network_configs.#", "1"), + resource.TestCheckResourceAttr(resourceName, "network_config.0.accelerator_network_profile", ""), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: importIgnore, + }, + // Step 2: Remove Manual Config (Expect Replacement) + { + Config: testAccContainerNodePool_acceleratorNetworkProfile_basic(clusterName, npName), + ConfigPlanChecks: resource.ConfigPlanChecks{ + PreApply: []plancheck.PlanCheck{ + plancheck.ExpectResourceAction(resourceName, plancheck.ResourceActionReplace), + }, + }, + Check: resource.ComposeTestCheckFunc( + resource.TestCheckResourceAttr(resourceName, "network_config.0.additional_node_network_configs.#", "0"), + resource.TestCheckResourceAttr(resourceName, "network_config.0.accelerator_network_profile", ""), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: importIgnore, + }, + // Step 3: Enable ANP (Auto) (Expect Replacement Again) + { + Config: testAccContainerNodePool_acceleratorNetworkProfile_auto(clusterName, npName), + ConfigPlanChecks: resource.ConfigPlanChecks{ + PreApply: []plancheck.PlanCheck{ + plancheck.ExpectResourceAction(resourceName, plancheck.ResourceActionReplace), + }, + }, + Check: resource.ComposeTestCheckFunc( + resource.TestCheckResourceAttr(resourceName, "network_config.0.accelerator_network_profile", "auto"), + ), + }, + { + ResourceName: resourceName, + ImportState: true, + ImportStateVerify: true, + ImportStateVerifyIgnore: importIgnore, + }, + }, + }) +} diff --git a/website/docs/r/container_node_pool.html.markdown b/website/docs/r/container_node_pool.html.markdown index fc688534ac2..866bbdc47b5 100644 --- a/website/docs/r/container_node_pool.html.markdown +++ b/website/docs/r/container_node_pool.html.markdown @@ -239,6 +239,8 @@ cluster. * `subnetwork` - (Optional) The subnetwork path for the node pool. Format: `projects/{project}/regions/{region}/subnetworks/{subnetwork}`. If the cluster is associated with multiple subnetworks, the subnetwork for the node pool is picked based on the IP utilization during node pool creation and is immutable +* `accelerator_network_profile` (Optional, [Beta](https://terraform.io/docs/providers/google/guides/provider_versions.html)) - Specifies the accelerator network profile for nodes in this node pool. Setting to `"auto"` enables GKE to automatically configure high-performance networking settings for nodes with accelerators (like GPUs). GKE manages the underlying resources (like VPCs and subnets) for this configuration. + The `additional_node_network_configs` block supports: * `network` - Name of the VPC where the additional interface belongs.