Skip to content

Commit b8de113

Browse files
committed
Fixed cfn cleanup, wait for cfn completion before deletion
1 parent 12730ca commit b8de113

File tree

1 file changed

+46
-4
lines changed

1 file changed

+46
-4
lines changed

test/integration_tests/cluster_management/test_hp_cluster_creation.py

Lines changed: 46 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,34 @@ def get_cluster_status(cluster_name, region):
6565
except Exception as e:
6666
raise AssertionError(f"Failed to get cluster status: {e}")
6767

68+
69+
def wait_for_stack_complete(stack_name, region, timeout_minutes=45):
70+
"""Wait for CloudFormation stack to be CREATE_COMPLETE."""
71+
import boto3
72+
client = boto3.client('cloudformation', region_name=region)
73+
74+
deadline = time.time() + (timeout_minutes * 60)
75+
while time.time() < deadline:
76+
try:
77+
response = client.describe_stacks(StackName=stack_name)
78+
status = response['Stacks'][0]['StackStatus']
79+
80+
if status == 'CREATE_COMPLETE':
81+
return True
82+
elif status in ['CREATE_FAILED', 'ROLLBACK_COMPLETE']:
83+
raise AssertionError(f"Stack creation failed with status: {status}")
84+
85+
time.sleep(30)
86+
except Exception as e:
87+
if "does not exist" in str(e).lower():
88+
print(f"[STATUS] Stack '{stack_name}' not found yet, waiting for creation...")
89+
else:
90+
print(f"[ERROR] Error checking stack status: {e}")
91+
time.sleep(30)
92+
93+
raise AssertionError(f"Stack did not complete after {timeout_minutes} minutes")
94+
95+
6896
# --------- Test Configuration ---------
6997
REGION = "us-east-2"
7098

@@ -236,7 +264,6 @@ def test_describe_cluster_via_cli(runner, cluster_name):
236264

237265

238266
# --------- Extended Cluster Resource Verification Tests ---------
239-
240267
@pytest.mark.dependency(name="wait_for_cluster", depends=["verify_submission"])
241268
def test_wait_for_cluster_ready(runner, cluster_name):
242269
"""Wait for cluster to be ready by polling cluster status until InService.
@@ -271,9 +298,12 @@ def test_wait_for_cluster_ready(runner, cluster_name):
271298
assert False, f"Cluster creation failed with status: {status}"
272299

273300
except AssertionError as e:
274-
if "AWS CLI not available" in str(e) or "timed out" in str(e):
301+
if "ResourceNotFound" in str(e) or "not found" in str(e):
302+
print(f"[STATUS] Cluster '{cluster_name}' not created yet, waiting...")
303+
elif "AWS CLI not available" in str(e) or "timed out" in str(e):
275304
assert False, str(e)
276-
print(f"[ERROR] Error during polling: {e}")
305+
else:
306+
print(f"[ERROR] Error during polling: {e}")
277307

278308
time.sleep(poll_interval)
279309
# Exponential backoff with cap
@@ -282,7 +312,19 @@ def test_wait_for_cluster_ready(runner, cluster_name):
282312
assert False, f"Timed out waiting for cluster '{cluster_name}' to be InService after {timeout_minutes} minutes"
283313

284314

285-
@pytest.mark.dependency(name="update_cluster", depends=["wait_for_cluster"])
315+
# Add this test after cluster is InService but before cleanup
316+
@pytest.mark.dependency(name="wait_for_stack", depends=["wait_for_cluster"])
317+
def test_wait_for_stack_completion(runner, cluster_name):
318+
"""Wait for CloudFormation stack to be fully complete."""
319+
global STACK_NAME
320+
assert STACK_NAME, "Stack name should be available"
321+
322+
print(f"⏳ Waiting for CloudFormation stack {STACK_NAME} to be CREATE_COMPLETE...")
323+
wait_for_stack_complete(STACK_NAME, REGION)
324+
print(f"✅ Stack {STACK_NAME} is now CREATE_COMPLETE")
325+
326+
327+
@pytest.mark.dependency(name="update_cluster", depends=["wait_for_stack"])
286328
def test_cluster_update_workflow(runner, cluster_name):
287329
"""Test hyp update-cluster command by toggling node recovery setting."""
288330
global STACK_NAME

0 commit comments

Comments
 (0)