diff --git a/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml b/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml index 2c1c65830..d47da5cc3 100644 --- a/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml +++ b/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml @@ -33,10 +33,10 @@ socketLB: envoy: image: useDigest: false -k8sServiceHost: auto {{- with .ControlPlane }} {{- range $key, $val := .metadata.annotations }} {{- if eq $key "controlplane.cluster.x-k8s.io/skip-kube-proxy" }} +k8sServiceHost: auto kubeProxyReplacement: true{{ break }} {{- end }} {{- end }} diff --git a/common/pkg/capi/utils/annotations.go b/common/pkg/capi/utils/annotations.go new file mode 100644 index 000000000..bfe87966d --- /dev/null +++ b/common/pkg/capi/utils/annotations.go @@ -0,0 +1,18 @@ +// Copyright 2025 Nutanix. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package utils + +import ( + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" +) + +// ShouldSkipKubeProxy returns true if the cluster is configured to skip kube proxy installation. +func ShouldSkipKubeProxy(cluster *clusterv1.Cluster) bool { + if cluster.Spec.Topology != nil { + _, isSkipKubeProxy := cluster.Spec.Topology.ControlPlane.Metadata.Annotations[controlplanev1.SkipKubeProxyAnnotation] + return isSkipKubeProxy + } + return false +} diff --git a/pkg/handlers/generic/lifecycle/addons/helmaddon.go b/pkg/handlers/generic/lifecycle/addons/helmaddon.go index 46b025f4b..d920dd901 100644 --- a/pkg/handlers/generic/lifecycle/addons/helmaddon.go +++ b/pkg/handlers/generic/lifecycle/addons/helmaddon.go @@ -231,7 +231,7 @@ func waitToBeReady( if obj.Generation != obj.Status.ObservedGeneration { return false, nil } - return conditions.IsTrue(obj, caaphv1.HelmReleaseProxiesReadyCondition), nil + return conditions.IsTrue(obj, clusterv1.ReadyCondition), nil }, Interval: 5 * time.Second, Timeout: 30 * time.Second, diff --git a/pkg/handlers/generic/lifecycle/addons/test.go b/pkg/handlers/generic/lifecycle/addons/test.go new file mode 100644 index 000000000..eb5da9206 --- /dev/null +++ b/pkg/handlers/generic/lifecycle/addons/test.go @@ -0,0 +1,28 @@ +// Copyright 2025 Nutanix. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package addons + +import ( + "context" + + "github.com/go-logr/logr" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" +) + +type TestStrategy struct { + err error +} + +func NewTestStrategy(err error) *TestStrategy { + return &TestStrategy{err: err} +} + +func (s TestStrategy) Apply( + ctx context.Context, + cluster *clusterv1.Cluster, + defaultsNamespace string, + log logr.Logger, +) error { + return s.err +} diff --git a/pkg/handlers/generic/lifecycle/cni/cilium/handler.go b/pkg/handlers/generic/lifecycle/cni/cilium/handler.go index ff9d14cc2..50473c2f5 100644 --- a/pkg/handlers/generic/lifecycle/cni/cilium/handler.go +++ b/pkg/handlers/generic/lifecycle/cni/cilium/handler.go @@ -6,11 +6,15 @@ package cilium import ( "context" "fmt" + "time" + "github.com/go-logr/logr" "github.com/spf13/pflag" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1" ctrl "sigs.k8s.io/controller-runtime" ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" @@ -19,10 +23,12 @@ import ( commonhandlers "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/handlers" "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/handlers/lifecycle" "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/variables" + capiutils "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/utils" "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/generic/lifecycle/addons" "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/generic/lifecycle/config" "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/options" handlersutils "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/utils" + "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/wait" ) type CNIConfig struct { @@ -221,7 +227,8 @@ func (c *CiliumCNI) apply( ), c.client, helmChart, - ) + ). + WithDefaultWaiter() case "": resp.SetStatus(runtimehooksv1.ResponseStatusFailure) resp.SetMessage("strategy not specified for Cilium CNI addon") @@ -231,7 +238,7 @@ func (c *CiliumCNI) apply( return } - if err := strategy.Apply(ctx, cluster, targetNamespace, log); err != nil { + if err := runApply(ctx, c.client, cluster, strategy, targetNamespace, log); err != nil { resp.SetStatus(runtimehooksv1.ResponseStatusFailure) resp.SetMessage(err.Error()) return @@ -239,3 +246,121 @@ func (c *CiliumCNI) apply( resp.SetStatus(runtimehooksv1.ResponseStatusSuccess) } + +func runApply( + ctx context.Context, + client ctrlclient.Client, + cluster *clusterv1.Cluster, + strategy addons.Applier, + targetNamespace string, + log logr.Logger, +) error { + if err := strategy.Apply(ctx, cluster, targetNamespace, log); err != nil { + return err + } + + // If skip kube-proxy is not set, return early. + // Otherwise, wait for Cilium to be rolled out and then cleanup kube-proxy if installed. + if !capiutils.ShouldSkipKubeProxy(cluster) { + return nil + } + + log.Info( + fmt.Sprintf("Waiting for Cilium to be ready for cluster %s", ctrlclient.ObjectKeyFromObject(cluster)), + ) + if err := waitForCiliumToBeReady(ctx, client, cluster); err != nil { + return fmt.Errorf("failed to wait for Cilium to be ready: %w", err) + } + + log.Info( + fmt.Sprintf("Cleaning up kube-proxy for cluster %s", ctrlclient.ObjectKeyFromObject(cluster)), + ) + if err := cleanupKubeProxy(ctx, client, cluster); err != nil { + return fmt.Errorf("failed to cleanup kube-proxy: %w", err) + } + + return nil +} + +const ( + kubeProxyName = "kube-proxy" + kubeProxyNamespace = "kube-system" +) + +func waitForCiliumToBeReady( + ctx context.Context, + c ctrlclient.Client, + cluster *clusterv1.Cluster, +) error { + remoteClient, err := remote.NewClusterClient( + ctx, + "", + c, + ctrlclient.ObjectKeyFromObject(cluster), + ) + if err != nil { + return fmt.Errorf("error creating remote cluster client: %w", err) + } + + ds := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: defaultCiliumReleaseName, + Namespace: defaultCiliumNamespace, + }, + } + if err := wait.ForObject( + ctx, + wait.ForObjectInput[*appsv1.DaemonSet]{ + Reader: remoteClient, + Target: ds.DeepCopy(), + Check: func(_ context.Context, obj *appsv1.DaemonSet) (bool, error) { + return obj.Status.NumberAvailable == obj.Status.DesiredNumberScheduled && obj.Status.NumberUnavailable == 0, nil + }, + Interval: 1 * time.Second, + Timeout: 30 * time.Second, + }, + ); err != nil { + return fmt.Errorf( + "failed to wait for DaemonSet %s to be Ready: %w", + ctrlclient.ObjectKeyFromObject(ds), + err, + ) + } + + return nil +} + +// cleanupKubeProxy cleans up kube-proxy DaemonSet and ConfigMap on the remote cluster when kube-proxy is disabled. +func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client, cluster *clusterv1.Cluster) error { + remoteClient, err := remote.NewClusterClient( + ctx, + "", + c, + ctrlclient.ObjectKeyFromObject(cluster), + ) + if err != nil { + return fmt.Errorf("error creating remote cluster client: %w", err) + } + + objs := []ctrlclient.Object{ + &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: kubeProxyName, + Namespace: kubeProxyNamespace, + }, + }, + &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kubeProxyName, + Namespace: kubeProxyNamespace, + }, + }, + } + for _, obj := range objs { + if err := ctrlclient.IgnoreNotFound(remoteClient.Delete(ctx, obj)); err != nil { + return fmt.Errorf("failed to delete %s/%s: %w", obj.GetNamespace(), obj.GetName(), err) + } + } + + return nil +} diff --git a/pkg/handlers/generic/lifecycle/cni/cilium/handler_integration_test.go b/pkg/handlers/generic/lifecycle/cni/cilium/handler_integration_test.go new file mode 100644 index 000000000..75ff16bf2 --- /dev/null +++ b/pkg/handlers/generic/lifecycle/cni/cilium/handler_integration_test.go @@ -0,0 +1,197 @@ +// Copyright 2025 Nutanix. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package cilium + +import ( + "fmt" + + "github.com/go-logr/logr" + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/runtime" + utilruntime "k8s.io/apimachinery/pkg/util/runtime" + clientgoscheme "k8s.io/client-go/kubernetes/scheme" + clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1" + "sigs.k8s.io/cluster-api/controllers/remote" + controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1" + ctrlclient "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/generic/lifecycle/addons" + "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/test/helpers" +) + +var _ = Describe("Test runApply", func() { + clientScheme := runtime.NewScheme() + utilruntime.Must(clientgoscheme.AddToScheme(clientScheme)) + utilruntime.Must(clusterv1.AddToScheme(clientScheme)) + + It("should clean kube-proxy when neccesery", func(ctx SpecContext) { + c, err := helpers.TestEnv.GetK8sClientWithScheme(clientScheme) + Expect(err).To(BeNil()) + + cluster, remoteClient := setupTestCluster(ctx, c) + strategy := addons.NewTestStrategy(nil) + + By("Should not delete kube-proxy when skip kube-proxy is not set") + err = runApply(ctx, c, cluster, strategy, cluster.Namespace, logr.Discard()) + Expect(err).To(BeNil()) + + // Verify that the kube-proxy DaemonSet and ConfigMap are not deleted + daemonSet := &appsv1.DaemonSet{} + err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, daemonSet) + Expect(err).To(BeNil()) + Expect(daemonSet).ToNot(BeNil()) + configMap := &corev1.ConfigMap{} + err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, configMap) + Expect(err).To(BeNil()) + Expect(configMap).ToNot(BeNil()) + + By("Should not delete when the addon is not applied") + err = runApply( + ctx, + c, + cluster, + addons.NewTestStrategy(fmt.Errorf("test error")), + cluster.Namespace, + logr.Discard(), + ) + Expect(err).ToNot(BeNil()) + + // Verify that the kube-proxy DaemonSet and ConfigMap are not deleted when the addon upgrade errors + err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, daemonSet) + Expect(err).To(BeNil()) + Expect(daemonSet).ToNot(BeNil()) + err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, configMap) + Expect(err).To(BeNil()) + Expect(configMap).ToNot(BeNil()) + + By("Should delete kube-proxy when skip kube-proxy is set") + cluster.Spec.Topology.ControlPlane.Metadata.Annotations = map[string]string{ + controlplanev1.SkipKubeProxyAnnotation: "", + } + + err = runApply(ctx, c, cluster, strategy, cluster.Namespace, logr.Discard()) + Expect(err).To(BeNil()) + + // Verify that the kube-proxy DaemonSet and ConfigMap are deleted. + err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, daemonSet) + Expect(err).ToNot(BeNil()) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, configMap) + Expect(err).ToNot(BeNil()) + Expect(apierrors.IsNotFound(err)).To(BeTrue()) + + By("Should not fail when kube-proxy is not already installed") + err = runApply(ctx, c, cluster, strategy, cluster.Namespace, logr.Discard()) + Expect(err).To(BeNil()) + }) +}) + +func setupTestCluster( + ctx SpecContext, + c ctrlclient.Client, +) (*clusterv1.Cluster, ctrlclient.Client) { + cluster := &clusterv1.Cluster{ + ObjectMeta: metav1.ObjectMeta{ + GenerateName: "test-cluster-", + Namespace: corev1.NamespaceDefault, + }, + Spec: clusterv1.ClusterSpec{ + Topology: &clusterv1.Topology{ + Class: "dummy-class", + Version: "dummy-version", + }, + }, + } + Expect(c.Create(ctx, cluster)).To(Succeed()) + + Expect(helpers.TestEnv.WithFakeRemoteClusterClient(cluster)).To(Succeed()) + remoteClient, err := remote.NewClusterClient(ctx, "", c, ctrlclient.ObjectKeyFromObject(cluster)) + Expect(err).To(BeNil()) + + // Create kube-proxy DaemonSet and ConfigMap + daemonSet := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: kubeProxyName, + Namespace: kubeProxyNamespace, + Labels: map[string]string{ + "app": kubeProxyName, + }, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": kubeProxyName, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: kubeProxyName, + Namespace: kubeProxyNamespace, + Labels: map[string]string{ + "app": kubeProxyName, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: kubeProxyName, + Image: kubeProxyName, + }, + }, + }, + }, + }, + } + Expect(remoteClient.Create(ctx, daemonSet)).To(Succeed()) + + configMap := &corev1.ConfigMap{ + ObjectMeta: metav1.ObjectMeta{ + Name: kubeProxyName, + Namespace: kubeProxyNamespace, + }, + } + Expect(remoteClient.Create(ctx, configMap)).To(Succeed()) + + ciliumDaemonSet := &appsv1.DaemonSet{ + ObjectMeta: metav1.ObjectMeta{ + Name: defaultCiliumReleaseName, + Namespace: defaultCiliumNamespace, + Labels: map[string]string{ + "app": defaultCiliumReleaseName, + }, + }, + Spec: appsv1.DaemonSetSpec{ + Selector: &metav1.LabelSelector{ + MatchLabels: map[string]string{ + "app": defaultCiliumReleaseName, + }, + }, + Template: corev1.PodTemplateSpec{ + ObjectMeta: metav1.ObjectMeta{ + Name: defaultCiliumReleaseName, + Namespace: defaultCiliumNamespace, + Labels: map[string]string{ + "app": defaultCiliumReleaseName, + }, + }, + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: defaultCiliumReleaseName, + Image: defaultCiliumReleaseName, + }, + }, + }, + }, + }, + } + Expect(remoteClient.Create(ctx, ciliumDaemonSet)).To(Succeed()) + + return cluster, remoteClient +} diff --git a/pkg/handlers/generic/lifecycle/cni/cilium/utils_suite_test.go b/pkg/handlers/generic/lifecycle/cni/cilium/utils_suite_test.go new file mode 100644 index 000000000..cd07f5992 --- /dev/null +++ b/pkg/handlers/generic/lifecycle/cni/cilium/utils_suite_test.go @@ -0,0 +1,17 @@ +// Copyright 2025 Nutanix. All rights reserved. +// SPDX-License-Identifier: Apache-2.0 + +package cilium + +import ( + "testing" + + . "github.com/onsi/ginkgo/v2" + . "github.com/onsi/gomega" +) + +// TestCiliumHandler is the entrypoint for integration (envtest) tests. +func TestCiliumHandler(t *testing.T) { + RegisterFailHandler(Fail) + RunSpecs(t, "Cilium") +} diff --git a/pkg/handlers/generic/mutation/kubeproxymode/inject.go b/pkg/handlers/generic/mutation/kubeproxymode/inject.go index e87150cef..19e318757 100644 --- a/pkg/handlers/generic/mutation/kubeproxymode/inject.go +++ b/pkg/handlers/generic/mutation/kubeproxymode/inject.go @@ -26,6 +26,7 @@ import ( "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/patches" "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/patches/selectors" "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/variables" + capiutils "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/utils" ) const ( @@ -89,11 +90,6 @@ func (h *kubeProxyMode) Mutate( return fmt.Errorf("failed to get cluster for kube proxy mode mutation: %w", err) } - isSkipProxy := false - if cluster.Spec.Topology != nil { - _, isSkipProxy = cluster.Spec.Topology.ControlPlane.Metadata.Annotations[controlplanev1.SkipKubeProxyAnnotation] - } - kubeProxyMode, err := variables.Get[v1alpha1.KubeProxyMode]( vars, h.variableName, @@ -112,6 +108,8 @@ func (h *kubeProxyMode) Mutate( kubeProxyMode, ) + isSkipProxy := capiutils.ShouldSkipKubeProxy(cluster) + if kubeProxyMode == "" && !isSkipProxy { log.V(5).Info("kube proxy mode is not set or skipped, skipping mutation") return nil