99 "fmt"
1010 "strings"
1111 "text/template"
12+ "time"
1213
1314 "github.com/go-logr/logr"
1415 "github.com/spf13/pflag"
@@ -41,6 +42,11 @@ const (
4142 cleanupStatusCompleted = "completed"
4243 cleanupStatusInProgress = "in-progress"
4344 cleanupStatusNotStarted = "not-started"
45+ cleanupStatusTimedOut = "timed-out"
46+
47+ // helmUninstallTimeout is the maximum time to wait for HelmChartProxy deletion
48+ // before giving up and allowing cluster deletion to proceed
49+ helmUninstallTimeout = 5 * time .Minute
4450)
4551
4652type Config struct {
@@ -344,7 +350,7 @@ func (n *DefaultKonnectorAgent) BeforeClusterDelete(
344350 }
345351
346352 // Check if cleanup is already in progress or completed
347- cleanupStatus , err := n .checkCleanupStatus (ctx , cluster , log )
353+ cleanupStatus , statusMsg , err := n .checkCleanupStatus (ctx , cluster , log )
348354 if err != nil {
349355 log .Error (err , "Failed to check cleanup status" )
350356 resp .SetStatus (runtimehooksv1 .ResponseStatusFailure )
@@ -357,10 +363,32 @@ func (n *DefaultKonnectorAgent) BeforeClusterDelete(
357363 log .Info ("Konnector Agent cleanup already completed" )
358364 resp .SetStatus (runtimehooksv1 .ResponseStatusSuccess )
359365 return
366+ case cleanupStatusTimedOut :
367+ // Log the error prominently and block cluster deletion
368+ log .Error (
369+ fmt .Errorf ("konnector Agent helm uninstallation timed out" ),
370+ "ERROR: Konnector Agent cleanup timed out - blocking cluster deletion" ,
371+ "details" , statusMsg ,
372+ "action" , "Manual intervention required - check HelmChartProxy status and remove finalizers if needed" ,
373+ )
374+ resp .SetStatus (runtimehooksv1 .ResponseStatusFailure )
375+ resp .SetMessage (fmt .Sprintf (
376+ "Konnector Agent helm uninstallation timed out after %v. " +
377+ "The HelmChartProxy is stuck in deletion state. " +
378+ "Manual intervention required: Check HelmChartProxy status and remove finalizers if needed. " +
379+ "Details: %s" ,
380+ helmUninstallTimeout ,
381+ statusMsg ,
382+ ))
383+ return
360384 case cleanupStatusInProgress :
361- log .Info ("Konnector Agent cleanup in progress, requesting retry" )
385+ log .Info ("Konnector Agent cleanup in progress, requesting retry" , "details" , statusMsg )
362386 resp .SetStatus (runtimehooksv1 .ResponseStatusFailure )
363387 resp .SetRetryAfterSeconds (5 ) // Retry after 5 seconds
388+ resp .SetMessage (fmt .Sprintf (
389+ "Konnector Agent cleanup in progress. Waiting for HelmChartProxy deletion to complete. %s" ,
390+ statusMsg ,
391+ ))
364392 return
365393 case cleanupStatusNotStarted :
366394 log .Info ("Starting Konnector Agent cleanup" )
@@ -369,16 +397,17 @@ func (n *DefaultKonnectorAgent) BeforeClusterDelete(
369397
370398 err = n .deleteHelmChartProxy (ctx , cluster , log )
371399 if err != nil {
372- log .Error (err , "Failed to delete helm chart " )
400+ log .Error (err , "Failed to delete HelmChartProxy " )
373401 resp .SetStatus (runtimehooksv1 .ResponseStatusFailure )
374- resp .SetMessage (err . Error ( ))
402+ resp .SetMessage (fmt . Sprintf ( "Failed to delete Konnector Agent HelmChartProxy: %v" , err ))
375403 return
376404 }
377405
378406 // After initiating cleanup, request a retry to monitor completion
379407 log .Info ("Konnector Agent cleanup initiated, will monitor progress" )
380408 resp .SetStatus (runtimehooksv1 .ResponseStatusFailure )
381409 resp .SetRetryAfterSeconds (5 ) // Quick retry to start monitoring
410+ resp .SetMessage ("Konnector Agent cleanup initiated. Waiting for HelmChartProxy deletion to start." )
382411}
383412
384413func (n * DefaultKonnectorAgent ) deleteHelmChartProxy (
@@ -434,15 +463,15 @@ func (n *DefaultKonnectorAgent) deleteHelmChartProxy(
434463}
435464
436465// checkCleanupStatus checks the current status of Konnector Agent cleanup.
437- // Returns: "completed", "in-progress", or "not-started".
466+ // Returns: status ( "completed", "in-progress", "not-started", or "timed-out"), status message, and error .
438467func (n * DefaultKonnectorAgent ) checkCleanupStatus (
439468 ctx context.Context ,
440469 cluster * clusterv1.Cluster ,
441470 log logr.Logger ,
442- ) (string , error ) {
471+ ) (string , string , error ) {
443472 clusterUUID , ok := cluster .Annotations [v1alpha1 .ClusterUUIDAnnotationKey ]
444473 if ! ok {
445- return cleanupStatusCompleted , nil // If no UUID, assume no agent was installed
474+ return cleanupStatusCompleted , "No cluster UUID found, assuming no agent installed" , nil
446475 }
447476
448477 // Check if HelmChartProxy exists
@@ -457,18 +486,52 @@ func (n *DefaultKonnectorAgent) checkCleanupStatus(
457486 if err != nil {
458487 if apierrors .IsNotFound (err ) {
459488 log .Info ("HelmChartProxy not found, cleanup completed" , "name" , hcp .Name )
460- return cleanupStatusCompleted , nil
489+ return cleanupStatusCompleted , "HelmChartProxy successfully deleted" , nil
461490 }
462- return "" , fmt .Errorf ("failed to get HelmChartProxy %q: %w" , ctrlclient .ObjectKeyFromObject (hcp ), err )
491+ return "" , "" , fmt .Errorf ("failed to get HelmChartProxy %q: %w" , ctrlclient .ObjectKeyFromObject (hcp ), err )
463492 }
464493
465494 // HCP exists - check if it's being deleted
466495 if hcp .DeletionTimestamp != nil {
467- log .Info ("HelmChartProxy is being deleted, cleanup in progress" , "name" , hcp .Name )
468- return cleanupStatusInProgress , nil
496+ // Check if deletion has timed out
497+ deletionDuration := time .Since (hcp .DeletionTimestamp .Time )
498+ if deletionDuration > helmUninstallTimeout {
499+ statusMsg := fmt .Sprintf (
500+ "HelmChartProxy %q has been in deletion state for %v (timeout: %v). " +
501+ "Possible causes: stuck finalizers, helm uninstall failure, or workload cluster unreachable. " +
502+ "HelmChartProxy status: %+v" ,
503+ ctrlclient .ObjectKeyFromObject (hcp ),
504+ deletionDuration ,
505+ helmUninstallTimeout ,
506+ hcp .Status ,
507+ )
508+ log .Error (
509+ fmt .Errorf ("helm uninstall timeout exceeded" ),
510+ "HelmChartProxy deletion timed out" ,
511+ "name" , hcp .Name ,
512+ "deletionTimestamp" , hcp .DeletionTimestamp .Time ,
513+ "duration" , deletionDuration ,
514+ "timeout" , helmUninstallTimeout ,
515+ "finalizers" , hcp .Finalizers ,
516+ "status" , hcp .Status ,
517+ )
518+ return cleanupStatusTimedOut , statusMsg , nil
519+ }
520+
521+ statusMsg := fmt .Sprintf (
522+ "HelmChartProxy is being deleted (in progress for %v, timeout in %v)" ,
523+ deletionDuration ,
524+ helmUninstallTimeout - deletionDuration ,
525+ )
526+ log .Info ("HelmChartProxy is being deleted, cleanup in progress" ,
527+ "name" , hcp .Name ,
528+ "deletionDuration" , deletionDuration ,
529+ "remainingTime" , helmUninstallTimeout - deletionDuration ,
530+ )
531+ return cleanupStatusInProgress , statusMsg , nil
469532 }
470533
471534 // HCP exists and is not being deleted
472535 log .Info ("HelmChartProxy exists, cleanup not started" , "name" , hcp .Name )
473- return cleanupStatusNotStarted , nil
536+ return cleanupStatusNotStarted , "HelmChartProxy exists and needs to be deleted" , nil
474537}
0 commit comments