@@ -33,7 +33,7 @@ const (
3333 webhookFirewallRuleName = "allow-9443-port-for-webhook"
3434 internalFirewallRuleName = "allow-internal"
3535 defaultNodePool = "default-pool" // default node pool name for GKE clusters
36- subnetSuffix = "subnet-1" // suffix for dynamically created subnets
36+ subnetSuffix = "subnet" // suffix for dynamically created subnets
3737)
3838
3939// Helper functions to get network configuration from common.go
@@ -252,14 +252,60 @@ func (r *GcpRegion) TeardownInfra(t *testing.T) {
252252 computeService , err := createComputeServiceClient (ctx )
253253 require .NoError (t , err )
254254
255- // 1) Delete GKE clusters via gcloud (ensures proper cleanup of node pools, etc.)
255+ gkeService , err := createGKEServiceClient (ctx )
256+ require .NoError (t , err )
257+
258+ // 1) Delete GKE clusters
256259 for _ , cfg := range clusterConfigurations [:len (r .Clusters )] {
257260 t .Logf ("[%s] Deleting GKE cluster '%s'" , ProviderGCP , cfg .ClusterName )
261+
262+ // Check for ongoing operations
263+ clusterPath := fmt .Sprintf ("projects/%s/locations/%s/clusters/%s" , projectID , cfg .Region , cfg .ClusterName )
264+ cluster , err := gkeService .Projects .Locations .Clusters .Get (clusterPath ).Context (ctx ).Do ()
265+ if err != nil {
266+ if IsResourceNotFound (err ) {
267+ t .Logf ("[%s] Cluster %s already deleted" , ProviderGCP , cfg .ClusterName )
268+ continue
269+ }
270+ t .Logf ("[%s] Warning: error checking cluster %s status: %v" , ProviderGCP , cfg .ClusterName , err )
271+ continue
272+ }
273+
274+ // If cluster is in a transitioning state, wait for current operation to complete
275+ if cluster .Status != "RUNNING" && cluster .Status != "ERROR" {
276+ t .Logf ("[%s] Cluster %s is in %s state, waiting for operation to complete..." , ProviderGCP , cfg .ClusterName , cluster .Status )
277+ if cluster .CurrentMasterVersion != "" { // Check if there's an ongoing operation
278+ err = waitForGKEOperation (gkeService , cluster .CurrentMasterVersion , cfg .Region , "" )
279+ if err != nil {
280+ t .Logf ("[%s] Warning: error waiting for operation on cluster %s: %v" , ProviderGCP , cfg .ClusterName , err )
281+ }
282+ }
283+ }
284+
285+ // Now try to delete the cluster
258286 delCmd := exec .Command ("gcloud" , "container" , "clusters" , "delete" , cfg .ClusterName ,
259- "--region" , cfg .Region , "--project" , projectID , "--quiet" )
287+ "--region" , cfg .Region , "--project" , projectID , "--quiet" , "--async" )
260288 delCmd .Stdout = os .Stdout
261289 delCmd .Stderr = os .Stderr
262- _ = delCmd .Run ()
290+ if err := delCmd .Run (); err != nil {
291+ t .Logf ("[%s] Warning: error initiating deletion of cluster %s: %v" , ProviderGCP , cfg .ClusterName , err )
292+ }
293+ }
294+
295+ // Wait for all cluster deletions to complete
296+ for _ , cfg := range clusterConfigurations [:len (r .Clusters )] {
297+ clusterPath := fmt .Sprintf ("projects/%s/locations/%s/clusters/%s" , projectID , cfg .Region , cfg .ClusterName )
298+ for retries := 0 ; retries < 10 ; retries ++ {
299+ _ , err := gkeService .Projects .Locations .Clusters .Get (clusterPath ).Context (ctx ).Do ()
300+ if IsResourceNotFound (err ) {
301+ t .Logf ("[%s] Confirmed deletion of cluster %s" , ProviderGCP , cfg .ClusterName )
302+ break
303+ }
304+ if retries == 9 {
305+ t .Logf ("[%s] Warning: timed out waiting for cluster %s deletion" , ProviderGCP , cfg .ClusterName )
306+ }
307+ time .Sleep (30 * time .Second )
308+ }
263309 }
264310
265311 // 2) Delete static IPs (unreserve)
@@ -296,22 +342,23 @@ func (r *GcpRegion) TeardownInfra(t *testing.T) {
296342 t .Logf ("[%s] Infrastructure teardown completed" , ProviderGCP )
297343}
298344
345+ // This is a no-op right now
299346func (r * GcpRegion ) ScaleNodePool (t * testing.T , location string , nodeCount , index int ) {
300- t .Logf ("[%s] Scaling node pool for cluster '%s' to %d nodes" , ProviderGCP , clusterConfigurations [index ].ClusterName , nodeCount )
301-
302- ctx := context .Background ()
303- gkeService , err := createGKEServiceClient (ctx )
304- require .NoError (t , err , "failed to create GKE client" )
305-
306- scaleOp , err := scaleNodePool (ctx , gkeService , projectID , location , clusterConfigurations [index ].ClusterName , defaultNodePool , int64 (nodeCount ))
307- require .NoError (t , err , "error initiating scaling for node pool" )
308-
309- err = waitForGKEOperation (gkeService , scaleOp .Name , location , "" )
310- if err != nil {
311- t .Logf ("[%s] Error during scaling operation for node pool '%s': %v" , ProviderGCP , defaultNodePool , err )
312- } else {
313- t .Logf ("[%s] Successfully scaled node pool '%s' to %d nodes" , ProviderGCP , defaultNodePool , nodeCount )
314- }
347+ // t.Logf("[%s] Scaling node pool for cluster '%s' to %d nodes", ProviderGCP, clusterConfigurations[index].ClusterName, nodeCount)
348+ //
349+ // ctx := context.Background()
350+ // gkeService, err := createGKEServiceClient(ctx)
351+ // require.NoError(t, err, "failed to create GKE client")
352+ //
353+ // scaleOp, err := scaleNodePool(ctx, gkeService, projectID, location, clusterConfigurations[index].ClusterName, defaultNodePool, int64(1 ))
354+ // require.NoError(t, err, "error initiating scaling for node pool")
355+ //
356+ // err = waitForGKEOperation(gkeService, scaleOp.Name, location, "")
357+ // if err != nil {
358+ // t.Logf("[%s] Error during scaling operation for node pool '%s': %v", ProviderGCP, defaultNodePool, err)
359+ // } else {
360+ // t.Logf("[%s] Successfully scaled node pool '%s' to %d nodes", ProviderGCP, defaultNodePool, nodeCount)
361+ // }
315362}
316363
317364// getServiceAccountKeyPath returns the path to the service account key file
@@ -490,7 +537,10 @@ func createGKERegionalCluster(ctx context.Context, client *container.Service, se
490537 "--tags" , strings .Join ([]string {defaultNodeTag }, "," ), // Join tags if there are multiple
491538 "--enable-master-authorized-networks" ,
492539 "--master-authorized-networks" , strings .Join ([]string {"0.0.0.0/0" }, "," ),
493- "--num-nodes" , fmt .Sprint (DefaultNodesPerZone ), // For regional, this is total nodes spread across 3 zones by default
540+ "--num-nodes" , fmt .Sprint (DefaultNodesPerZone ),
541+ "--min-nodes" , fmt .Sprint (DefaultNodesPerZone ),
542+ "--max-nodes" , fmt .Sprint (DefaultNodesPerZone + 1 ), // Needed for scaling cluster
543+ "--enable-autoscaling" , // Enable autoscaling
494544 "--autoprovisioning-network-tags" , strings .Join ([]string {defaultNodeTag }, "," ),
495545 "--machine-type" , GCPDefaultMachineType ,
496546 "--quiet" , // Suppress interactive prompts
0 commit comments