@@ -9,11 +9,14 @@ import (
99 "regexp"
1010 "slices"
1111 "strings"
12+ "time"
1213
1314 certificatesv1 "k8s.io/api/certificates/v1"
1415 corev1 "k8s.io/api/core/v1"
1516 kerrs "k8s.io/apimachinery/pkg/api/errors"
1617 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+ "k8s.io/apimachinery/pkg/util/wait"
19+ workapi "open-cluster-management.io/api/client/work/clientset/versioned"
1720 clusterv1 "open-cluster-management.io/api/cluster/v1"
1821 operatorv1 "open-cluster-management.io/api/operator/v1"
1922 workv1 "open-cluster-management.io/api/work/v1"
@@ -32,8 +35,10 @@ import (
3235var csrSuffixPattern = regexp .MustCompile (`-[a-zA-Z0-9]{5}$` )
3336
3437const (
35- amwExistsError = "you should manually clean them, uninstall kluster will cause those works out of control."
36- managedClusterAddOn = "ManagedClusterAddOn"
38+ amwExistsError = "you should manually clean them, uninstall kluster will cause those works out of control."
39+ managedClusterAddOn = "ManagedClusterAddOn"
40+ addonCleanupTimeout = 1 * time .Minute
41+ addonCleanupPollInterval = 2 * time .Second
3742)
3843
3944// handleSpokes manages Spoke cluster join and upgrade operations
@@ -575,6 +580,7 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
575580 if err != nil {
576581 return err
577582 }
583+
578584 // skip clean up if the ManagedCluster resource is not found or if any manifestWorks exist
579585 managedCluster , err := clusterC .ClusterV1 ().ManagedClusters ().Get (ctx , spoke .Name , metav1.GetOptions {})
580586 if kerrs .IsNotFound (err ) {
@@ -599,16 +605,25 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
599605 // remove addons only after confirming that the cluster can be unjoined - this avoids leaving dangling resources that may rely on the addon
600606 if err := handleAddonDisable (ctx , spoke .Name , spoke .EnabledAddons ); err != nil {
601607 fc .SetConditions (true , v1alpha1 .NewCondition (
602- "AddonsDisabled" , spoke .AddonDisableType (), metav1 .ConditionFalse , metav1 .ConditionTrue ,
608+ err . Error () , spoke .AddonDisableType (), metav1 .ConditionFalse , metav1 .ConditionTrue ,
603609 ))
604610 return err
605611 }
612+
606613 if len (spoke .EnabledAddons ) > 0 {
614+ // Wait for addon manifestWorks to be fully cleaned up before proceeding with unjoin
615+ if err := waitForAddonManifestWorksCleanup (ctx , workC , spoke .Name , addonCleanupTimeout ); err != nil {
616+ fc .SetConditions (true , v1alpha1 .NewCondition (
617+ err .Error (), spoke .AddonDisableType (), metav1 .ConditionFalse , metav1 .ConditionTrue ,
618+ ))
619+ return fmt .Errorf ("addon manifestWorks cleanup failed: %w" , err )
620+ }
607621 fc .SetConditions (true , v1alpha1 .NewCondition (
608622 "AddonsDisabled" , spoke .AddonDisableType (), metav1 .ConditionTrue , metav1 .ConditionTrue ,
609623 ))
610624 }
611- // unjoin spoke
625+
626+ // unjoin spoke - safe to proceed now that addon cleanup is confirmed
612627 if err := unjoinSpoke (ctx , kClient , fc , spoke ); err != nil {
613628 return err
614629 }
@@ -628,14 +643,14 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
628643 }
629644
630645 // remove ManagedCluster
631- if err = clusterC .ClusterV1 ().ManagedClusters ().Delete (ctx , spoke .Name , metav1.DeleteOptions {}); err != nil && ! kerrs . IsNotFound ( err ) {
632- return err
646+ if err = clusterC .ClusterV1 ().ManagedClusters ().Delete (ctx , spoke .Name , metav1.DeleteOptions {}); err != nil {
647+ return client . IgnoreNotFound ( err )
633648 }
634649
635650 // remove Namespace
636651 ns := & corev1.Namespace {ObjectMeta : metav1.ObjectMeta {Name : spoke .Name }}
637- if err := kClient .Delete (ctx , ns ); err != nil && ! kerrs . IsNotFound ( err ) {
638- return err
652+ if err := kClient .Delete (ctx , ns ); err != nil {
653+ return client . IgnoreNotFound ( err )
639654 }
640655
641656 return nil
@@ -652,6 +667,41 @@ func allOwnersAddOns(mws []workv1.ManifestWork) bool {
652667 return true
653668}
654669
670+ // waitForAddonManifestWorksCleanup polls for addon-related manifestWorks to be removed
671+ // after addon disable operation to avoid race conditions during spoke unjoin
672+ func waitForAddonManifestWorksCleanup (ctx context.Context , workC * workapi.Clientset , spokeName string , timeout time.Duration ) error {
673+ logger := log .FromContext (ctx )
674+ logger .V (1 ).Info ("waiting for addon manifestWorks cleanup" , "spokeName" , spokeName , "timeout" , timeout )
675+
676+ err := wait .PollUntilContextTimeout (ctx , addonCleanupPollInterval , timeout , true , func (ctx context.Context ) (bool , error ) {
677+ manifestWorks , err := workC .WorkV1 ().ManifestWorks (spokeName ).List (ctx , metav1.ListOptions {})
678+ if err != nil {
679+ logger .V (3 ).Info ("failed to list manifestWorks during cleanup wait" , "error" , err )
680+ // Return false to continue polling on transient errors
681+ return false , nil
682+ }
683+
684+ // Success condition: no manifestWorks remaining
685+ if len (manifestWorks .Items ) == 0 {
686+ logger .V (1 ).Info ("addon manifestWorks cleanup completed" , "spokeName" , spokeName , "remainingManifestWorks" , len (manifestWorks .Items ))
687+ return true , nil
688+ }
689+
690+ logger .V (3 ).Info ("waiting for addon manifestWorks cleanup" ,
691+ "spokeName" , spokeName ,
692+ "addonManifestWorks" , len (manifestWorks .Items ))
693+
694+ // Continue polling
695+ return false , nil
696+ })
697+
698+ if err != nil {
699+ return fmt .Errorf ("timeout waiting for addon manifestWorks cleanup for spoke %s: %w" , spokeName , err )
700+ }
701+
702+ return nil
703+ }
704+
655705// prepareKlusterletValuesFile creates a temporary file with klusterlet values and returns
656706// args to append and a cleanup function. Returns empty slice if values are empty.
657707func prepareKlusterletValuesFile (values * v1alpha1.KlusterletChartConfig ) ([]string , func (), error ) {
0 commit comments