Skip to content

Commit 3e6de26

Browse files
authored
fix poll until addon manifestworks have been cleaned up (#47)
* fix: poll until addon manifestworks have been cleaned up Signed-off-by: Artur Shad Nik <[email protected]> * refactor: check existing mcao when deregistering spoke Signed-off-by: Artur Shad Nik <[email protected]> * refactor: revert 'check existing mcao when deregistering spoke' Signed-off-by: Artur Shad Nik <[email protected]> --------- Signed-off-by: Artur Shad Nik <[email protected]>
1 parent 53b12bf commit 3e6de26

File tree

3 files changed

+74
-10
lines changed

3 files changed

+74
-10
lines changed

.gitignore

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ coverage.*
2020
profile.cov
2121
*hub-bundle*.tar.gz
2222
*spoke-bundle*.tar.gz
23+
*fleetconfig-support-bundle/
2324

2425
# Dependency directories (remove the comment below to include it)
2526
# vendor/
@@ -40,6 +41,7 @@ go.work.sum
4041
venv/
4142
*__pycache__/
4243
requirements.txt
44+
.DS_Store
4345

4446
# Temp files
45-
tmp/
47+
tmp/

fleetconfig-controller/internal/controller/addon.go

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -470,7 +470,19 @@ func handleAddonDisable(ctx context.Context, spokeName string, addons []string)
470470
stdout, stderr, err := exec_utils.CmdWithLogs(ctx, cmd, "waiting for 'clusteradm addon disable' to complete...")
471471
if err != nil {
472472
out := append(stdout, stderr...)
473-
return fmt.Errorf("failed to disable addons: %v, output: %s", err, string(out))
473+
outStr := string(out)
474+
475+
// Check if the error is due to addon not being found or cluster not found - these are success cases
476+
if strings.Contains(outStr, "add-on not found") {
477+
logger.V(5).Info("addon already disabled (not found)", "managedcluster", spokeName, "addons", addons, "output", outStr)
478+
return nil
479+
}
480+
if strings.Contains(outStr, "managedclusters.cluster.open-cluster-management.io") && strings.Contains(outStr, "not found") {
481+
logger.V(5).Info("addon disable skipped (cluster not found)", "managedcluster", spokeName, "addons", addons, "output", outStr)
482+
return nil
483+
}
484+
485+
return fmt.Errorf("failed to disable addons: %v, output: %s", err, outStr)
474486
}
475487
logger.V(1).Info("disabled addons", "managedcluster", spokeName, "addons", addons, "output", string(stdout))
476488
return nil

fleetconfig-controller/internal/controller/spoke.go

Lines changed: 58 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,14 @@ import (
99
"regexp"
1010
"slices"
1111
"strings"
12+
"time"
1213

1314
certificatesv1 "k8s.io/api/certificates/v1"
1415
corev1 "k8s.io/api/core/v1"
1516
kerrs "k8s.io/apimachinery/pkg/api/errors"
1617
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
18+
"k8s.io/apimachinery/pkg/util/wait"
19+
workapi "open-cluster-management.io/api/client/work/clientset/versioned"
1720
clusterv1 "open-cluster-management.io/api/cluster/v1"
1821
operatorv1 "open-cluster-management.io/api/operator/v1"
1922
workv1 "open-cluster-management.io/api/work/v1"
@@ -32,8 +35,10 @@ import (
3235
var csrSuffixPattern = regexp.MustCompile(`-[a-zA-Z0-9]{5}$`)
3336

3437
const (
35-
amwExistsError = "you should manually clean them, uninstall kluster will cause those works out of control."
36-
managedClusterAddOn = "ManagedClusterAddOn"
38+
amwExistsError = "you should manually clean them, uninstall kluster will cause those works out of control."
39+
managedClusterAddOn = "ManagedClusterAddOn"
40+
addonCleanupTimeout = 1 * time.Minute
41+
addonCleanupPollInterval = 2 * time.Second
3742
)
3843

3944
// handleSpokes manages Spoke cluster join and upgrade operations
@@ -575,6 +580,7 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
575580
if err != nil {
576581
return err
577582
}
583+
578584
// skip clean up if the ManagedCluster resource is not found or if any manifestWorks exist
579585
managedCluster, err := clusterC.ClusterV1().ManagedClusters().Get(ctx, spoke.Name, metav1.GetOptions{})
580586
if kerrs.IsNotFound(err) {
@@ -599,16 +605,25 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
599605
// remove addons only after confirming that the cluster can be unjoined - this avoids leaving dangling resources that may rely on the addon
600606
if err := handleAddonDisable(ctx, spoke.Name, spoke.EnabledAddons); err != nil {
601607
fc.SetConditions(true, v1alpha1.NewCondition(
602-
"AddonsDisabled", spoke.AddonDisableType(), metav1.ConditionFalse, metav1.ConditionTrue,
608+
err.Error(), spoke.AddonDisableType(), metav1.ConditionFalse, metav1.ConditionTrue,
603609
))
604610
return err
605611
}
612+
606613
if len(spoke.EnabledAddons) > 0 {
614+
// Wait for addon manifestWorks to be fully cleaned up before proceeding with unjoin
615+
if err := waitForAddonManifestWorksCleanup(ctx, workC, spoke.Name, addonCleanupTimeout); err != nil {
616+
fc.SetConditions(true, v1alpha1.NewCondition(
617+
err.Error(), spoke.AddonDisableType(), metav1.ConditionFalse, metav1.ConditionTrue,
618+
))
619+
return fmt.Errorf("addon manifestWorks cleanup failed: %w", err)
620+
}
607621
fc.SetConditions(true, v1alpha1.NewCondition(
608622
"AddonsDisabled", spoke.AddonDisableType(), metav1.ConditionTrue, metav1.ConditionTrue,
609623
))
610624
}
611-
// unjoin spoke
625+
626+
// unjoin spoke - safe to proceed now that addon cleanup is confirmed
612627
if err := unjoinSpoke(ctx, kClient, fc, spoke); err != nil {
613628
return err
614629
}
@@ -628,14 +643,14 @@ func deregisterSpoke(ctx context.Context, kClient client.Client, hubKubeconfig [
628643
}
629644

630645
// remove ManagedCluster
631-
if err = clusterC.ClusterV1().ManagedClusters().Delete(ctx, spoke.Name, metav1.DeleteOptions{}); err != nil && !kerrs.IsNotFound(err) {
632-
return err
646+
if err = clusterC.ClusterV1().ManagedClusters().Delete(ctx, spoke.Name, metav1.DeleteOptions{}); err != nil {
647+
return client.IgnoreNotFound(err)
633648
}
634649

635650
// remove Namespace
636651
ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: spoke.Name}}
637-
if err := kClient.Delete(ctx, ns); err != nil && !kerrs.IsNotFound(err) {
638-
return err
652+
if err := kClient.Delete(ctx, ns); err != nil {
653+
return client.IgnoreNotFound(err)
639654
}
640655

641656
return nil
@@ -652,6 +667,41 @@ func allOwnersAddOns(mws []workv1.ManifestWork) bool {
652667
return true
653668
}
654669

670+
// waitForAddonManifestWorksCleanup polls for addon-related manifestWorks to be removed
671+
// after addon disable operation to avoid race conditions during spoke unjoin
672+
func waitForAddonManifestWorksCleanup(ctx context.Context, workC *workapi.Clientset, spokeName string, timeout time.Duration) error {
673+
logger := log.FromContext(ctx)
674+
logger.V(1).Info("waiting for addon manifestWorks cleanup", "spokeName", spokeName, "timeout", timeout)
675+
676+
err := wait.PollUntilContextTimeout(ctx, addonCleanupPollInterval, timeout, true, func(ctx context.Context) (bool, error) {
677+
manifestWorks, err := workC.WorkV1().ManifestWorks(spokeName).List(ctx, metav1.ListOptions{})
678+
if err != nil {
679+
logger.V(3).Info("failed to list manifestWorks during cleanup wait", "error", err)
680+
// Return false to continue polling on transient errors
681+
return false, nil
682+
}
683+
684+
// Success condition: no manifestWorks remaining
685+
if len(manifestWorks.Items) == 0 {
686+
logger.V(1).Info("addon manifestWorks cleanup completed", "spokeName", spokeName, "remainingManifestWorks", len(manifestWorks.Items))
687+
return true, nil
688+
}
689+
690+
logger.V(3).Info("waiting for addon manifestWorks cleanup",
691+
"spokeName", spokeName,
692+
"addonManifestWorks", len(manifestWorks.Items))
693+
694+
// Continue polling
695+
return false, nil
696+
})
697+
698+
if err != nil {
699+
return fmt.Errorf("timeout waiting for addon manifestWorks cleanup for spoke %s: %w", spokeName, err)
700+
}
701+
702+
return nil
703+
}
704+
655705
// prepareKlusterletValuesFile creates a temporary file with klusterlet values and returns
656706
// args to append and a cleanup function. Returns empty slice if values are empty.
657707
func prepareKlusterletValuesFile(values *v1alpha1.KlusterletChartConfig) ([]string, func(), error) {

0 commit comments

Comments
 (0)