@@ -65,20 +65,49 @@ def get_cluster_status(cluster_name, region):
6565 except Exception as e :
6666 raise AssertionError (f"Failed to get cluster status: { e } " )
6767
68+
69+ def wait_for_stack_complete (stack_name , region , timeout_minutes = 15 ):
70+ """Wait for CloudFormation stack to be CREATE_COMPLETE."""
71+ import boto3
72+ client = boto3 .client ('cloudformation' , region_name = region )
73+
74+ deadline = time .time () + (timeout_minutes * 60 )
75+ while time .time () < deadline :
76+ try :
77+ response = client .describe_stacks (StackName = stack_name )
78+ status = response ['Stacks' ][0 ]['StackStatus' ]
79+
80+ if status == 'CREATE_COMPLETE' :
81+ return True
82+ elif status in ['CREATE_FAILED' , 'ROLLBACK_COMPLETE' ]:
83+ raise AssertionError (f"Stack creation failed with status: { status } " )
84+
85+ time .sleep (30 )
86+ except Exception as e :
87+ if "does not exist" in str (e ).lower ():
88+ print (f"[STATUS] Stack '{ stack_name } ' not found yet, waiting for creation..." )
89+ else :
90+ print (f"[ERROR] Error checking stack status: { e } " )
91+ time .sleep (30 )
92+
93+ raise AssertionError (f"Stack did not complete after { timeout_minutes } minutes" )
94+
95+
6896# --------- Test Configuration ---------
6997REGION = "us-east-2"
7098
7199# Global variables to share data between tests
72100STACK_NAME = None
73101CREATE_TIME = None
102+ UNIQUE_TIMESTAMP = int (time .time () * 1000 )
74103
75104@pytest .fixture (scope = "module" )
76105def runner ():
77106 return CliRunner ()
78107
79108@pytest .fixture (scope = "module" )
80109def cluster_name ():
81- return "hyperpod-cluster"
110+ return f "hyperpod-{ UNIQUE_TIMESTAMP } - cluster-integ-test "
82111
83112@pytest .fixture (scope = "module" )
84113def create_time ():
@@ -109,7 +138,8 @@ def test_configure_cluster(runner, cluster_name):
109138 # Configuration mapping for cleaner code
110139 config_options = {
111140 "stage" : "prod" ,
112- "resource-name-prefix" : f"hyperpod-cli-integ-test-{ int (time .time ())} " ,
141+ "resource-name-prefix" : f"hyperpod-cli-integ-test-{ UNIQUE_TIMESTAMP } " ,
142+ "hyperpod-cluster-name" : cluster_name ,
113143 "create-vpc-stack" : "true" ,
114144 "create-security-group-stack" : "true" ,
115145 "create-eks-cluster-stack" : "true" ,
@@ -236,7 +266,6 @@ def test_describe_cluster_via_cli(runner, cluster_name):
236266
237267
238268# --------- Extended Cluster Resource Verification Tests ---------
239-
240269@pytest .mark .dependency (name = "wait_for_cluster" , depends = ["verify_submission" ])
241270def test_wait_for_cluster_ready (runner , cluster_name ):
242271 """Wait for cluster to be ready by polling cluster status until InService.
@@ -271,9 +300,12 @@ def test_wait_for_cluster_ready(runner, cluster_name):
271300 assert False , f"Cluster creation failed with status: { status } "
272301
273302 except AssertionError as e :
274- if "AWS CLI not available" in str (e ) or "timed out" in str (e ):
303+ if "ResourceNotFound" in str (e ) or "not found" in str (e ):
304+ print (f"[STATUS] Cluster '{ cluster_name } ' not created yet, waiting..." )
305+ elif "AWS CLI not available" in str (e ) or "timed out" in str (e ):
275306 assert False , str (e )
276- print (f"[ERROR] Error during polling: { e } " )
307+ else :
308+ print (f"[ERROR] Error during polling: { e } " )
277309
278310 time .sleep (poll_interval )
279311 # Exponential backoff with cap
@@ -282,7 +314,19 @@ def test_wait_for_cluster_ready(runner, cluster_name):
282314 assert False , f"Timed out waiting for cluster '{ cluster_name } ' to be InService after { timeout_minutes } minutes"
283315
284316
285- @pytest .mark .dependency (name = "update_cluster" , depends = ["wait_for_cluster" ])
317+ # Add this test after cluster is InService but before cleanup
318+ @pytest .mark .dependency (name = "wait_for_stack" , depends = ["wait_for_cluster" ])
319+ def test_wait_for_stack_completion (runner , cluster_name ):
320+ """Wait for CloudFormation stack to be fully complete."""
321+ global STACK_NAME
322+ assert STACK_NAME , "Stack name should be available"
323+
324+ print (f"⏳ Waiting for CloudFormation stack { STACK_NAME } to be CREATE_COMPLETE..." )
325+ wait_for_stack_complete (STACK_NAME , REGION )
326+ print (f"✅ Stack { STACK_NAME } is now CREATE_COMPLETE" )
327+
328+
329+ @pytest .mark .dependency (name = "update_cluster" , depends = ["wait_for_stack" ])
286330def test_cluster_update_workflow (runner , cluster_name ):
287331 """Test hyp update-cluster command by toggling node recovery setting."""
288332 global STACK_NAME
0 commit comments