@@ -294,11 +294,10 @@ def list_cluster_stacks(region, debug, status):
294294@click .command ("cluster-stack" )
295295@click .argument ("stack-name" , required = True )
296296@click .option ("--retain-resources" , help = "Comma-separated list of resources to retain during deletion" )
297- @click .option ("--force-with-retain" , is_flag = True , help = "Force deletion with retention of failed resources" )
298- @click .option ("--region" , help = "AWS region" )
297+ @click .option ("--region" , required = True , help = "AWS region (required)" )
299298@click .option ("--debug" , is_flag = True , help = "Enable debug logging" )
300299@_hyperpod_telemetry_emitter (Feature .HYPERPOD_CLI , "delete_cluster_stack_cli" )
301- def delete_cluster_stack (stack_name : str , retain_resources : str , force_with_retain : bool , region : str , debug : bool ) -> None :
300+ def delete_cluster_stack (stack_name : str , retain_resources : str , region : str , debug : bool ) -> None :
302301 """Delete a HyperPod cluster stack.
303302
304303 Removes the specified CloudFormation stack and all associated AWS resources.
@@ -310,13 +309,10 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
310309 .. code-block:: bash
311310
312311 # Delete a cluster stack
313- hyp delete hyp-cluster my-stack-name
314-
315- # Delete with retained resources
316- hyp delete hyp-cluster my-stack-name --retain-resources S3Bucket-TrainingData,EFSFileSystem-Models
312+ hyp delete cluster-stack my-stack-name --region us-west-2
317313
318- # Force deletion with retention
319- hyp delete hyp- cluster my-stack-name --retain-resources S3Bucket-TrainingData --force-with-retain
314+ # Delete with retained resources (only works on DELETE_FAILED stacks)
315+ hyp delete cluster-stack my-stack-name --retain-resources S3Bucket-TrainingData,EFSFileSystem-Models --region us-west-2
320316 """
321317 logger = setup_logging (logging .getLogger (__name__ ), debug )
322318
@@ -342,7 +338,29 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
342338 click .secho (f"❌ No resources found in stack '{ stack_name } '" , fg = 'red' )
343339 return
344340
345- # Categorize resources
341+ # Validate retain resources exist in stack
342+ if retain_list :
343+ existing_resource_names = {r .get ('LogicalResourceId' , '' ) for r in resources }
344+ valid_retain_resources = []
345+ invalid_retain_resources = []
346+
347+ for resource in retain_list :
348+ if resource in existing_resource_names :
349+ valid_retain_resources .append (resource )
350+ else :
351+ invalid_retain_resources .append (resource )
352+
353+ # Show warning for non-existent resources
354+ if invalid_retain_resources :
355+ click .secho (f"⚠️ Warning: The following { len (invalid_retain_resources )} resources don't exist in the stack:" , fg = 'yellow' )
356+ for resource in invalid_retain_resources :
357+ click .secho (f" - { resource } (not found)" , fg = 'yellow' )
358+ click .echo ()
359+
360+ # Update retain_list to only include valid resources
361+ retain_list = valid_retain_resources
362+
363+ # Categorize resources (excluding retained ones from deletion display)
346364 resource_categories = {
347365 'EC2 Instances' : [],
348366 'Networking' : [],
@@ -356,6 +374,10 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
356374 resource_name = resource .get ('LogicalResourceId' , '' )
357375 physical_id = resource .get ('PhysicalResourceId' , '' )
358376
377+ # Skip resources that will be retained
378+ if resource_name in retain_list :
379+ continue
380+
359381 if 'EC2::Instance' in resource_type :
360382 resource_categories ['EC2 Instances' ].append (f" - { resource_name } ({ physical_id } )" )
361383 elif any (net_type in resource_type for net_type in ['VPC' , 'SecurityGroup' , 'InternetGateway' , 'Subnet' , 'RouteTable' ]):
@@ -367,7 +389,7 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
367389 else :
368390 resource_categories ['Other' ].append (f" - { resource_name } " )
369391
370- # Count total resources
392+ # Count total resources (excluding retained ones)
371393 total_resources = sum (len (category ) for category in resource_categories .values ())
372394 retained_count = len (retain_list )
373395
@@ -388,11 +410,10 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
388410 click .secho (f" ✓ { resource } (retained)" , fg = 'green' )
389411 click .echo ()
390412
391- # Confirmation prompt (skip if force flag is used)
392- if not force_with_retain :
393- if not click .confirm ("Continue?" , default = False ):
394- click .echo ("Operation cancelled." )
395- return
413+ # Confirmation prompt
414+ if not click .confirm ("Continue?" , default = False ):
415+ click .echo ("Operation cancelled." )
416+ return
396417
397418 # Perform deletion
398419 delete_params = {'StackName' : stack_name }
@@ -404,26 +425,16 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
404425 try :
405426 cf_client .delete_stack (** delete_params )
406427
407- if force_with_retain :
408- click .secho ("✓ Force deletion completed" , fg = 'green' )
409- click .secho (f"✓ Deleted all possible resources ({ total_resources - retained_count } /{ total_resources } )" , fg = 'green' )
410-
411- if retain_list :
412- click .echo ()
413- click .secho (f"Retained due to user request ({ len (retain_list )} ):" , fg = 'green' )
414- for resource in retain_list :
415- click .secho (f" ✓ { resource } (user requested)" , fg = 'green' )
416-
428+ click .secho (f"✓ Stack '{ stack_name } ' deletion initiated successfully" , fg = 'green' )
429+
430+ if retain_list :
417431 click .echo ()
418- click .secho (f"✓ Stack '{ stack_name } ' deletion completed with retentions" , fg = 'green' )
419- else :
420- click .secho (f"✓ Stack '{ stack_name } ' deletion initiated successfully" , fg = 'green' )
421-
422- if retain_list :
423- click .echo ()
424- click .secho (f"Successfully retained as requested ({ len (retain_list )} ):" , fg = 'green' )
425- for resource in retain_list :
426- click .secho (f" ✓ { resource } (retained)" , fg = 'green' )
432+ click .secho (f"Successfully retained as requested ({ len (retain_list )} ):" , fg = 'green' )
433+ for resource in retain_list :
434+ click .secho (f" ✓ { resource } (retained)" , fg = 'green' )
435+ click .echo ()
436+ click .secho ("💡 Retained resources will remain as standalone AWS resources" , fg = 'cyan' )
437+ click .secho (" You can access them directly via AWS Console/CLI using their physical resource IDs" , fg = 'cyan' )
427438
428439 except Exception as delete_error :
429440 # Handle termination protection specifically
@@ -436,6 +447,21 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
436447 click .secho ("Then retry the delete command." , fg = 'yellow' )
437448 raise click .ClickException ("Termination protection must be disabled before deletion" )
438449
450+ # Handle CloudFormation retain-resources limitation
451+ if retain_list and "specify which resources to retain only when the stack is in the DELETE_FAILED state" in str (delete_error ):
452+ click .secho ("❌ CloudFormation limitation: --retain-resources only works on failed deletions" , fg = 'red' )
453+ click .echo ()
454+ click .secho ("💡 Recommended workflow:" , fg = 'yellow' )
455+ click .secho ("1. First try deleting without --retain-resources:" , fg = 'cyan' )
456+ click .secho (f" hyp delete cluster-stack { stack_name } --region { region or 'us-west-2' } " , fg = 'cyan' )
457+ click .echo ()
458+ click .secho ("2. If deletion fails, the stack will be in DELETE_FAILED state" , fg = 'cyan' )
459+ click .secho ("3. Then retry with --retain-resources to keep specific resources:" , fg = 'cyan' )
460+ click .secho (f" hyp delete cluster-stack { stack_name } --retain-resources { retain_resources } --region { region or 'us-west-2' } " , fg = 'cyan' )
461+ click .echo ()
462+ click .secho ("⚠️ Alternative: Delete without retention and manually preserve important data first" , fg = 'yellow' )
463+ return # Exit gracefully without raising exception
464+
439465 # Handle partial deletion failures
440466 click .secho ("✗ Stack deletion failed" , fg = 'red' )
441467
@@ -467,7 +493,8 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
467493 click .secho (f" ✓ { resource } (retained)" , fg = 'green' )
468494
469495 click .echo ()
470- click .secho ("Run with --force-with-retain to complete deletion of remaining resources" , fg = 'yellow' )
496+ click .secho ("💡 Note: Some resources may have dependencies preventing deletion" , fg = 'yellow' )
497+ click .secho (" Check the AWS CloudFormation console for detailed dependency information" , fg = 'cyan' )
471498
472499 except :
473500 # If we can't get current resources, show generic error
@@ -487,7 +514,9 @@ def delete_cluster_stack(stack_name: str, retain_resources: str, force_with_reta
487514 else :
488515 click .secho (f"❌ Error deleting stack: { e } " , fg = 'red' )
489516
490- raise click .ClickException (str (e ))
517+ # Only raise exception for truly unexpected errors, not user-facing ones
518+ if not any (msg in str (e ) for msg in ["does not exist" , "AccessDenied" ]):
519+ raise click .ClickException (str (e ))
491520
492521@click .command ("cluster" )
493522@click .option ("--cluster-name" , required = True , help = "The name of the cluster to update" )
0 commit comments