Skip to content

Commit 6fe03f8

Browse files
authored
feat: automatically enable Cilium kube-proxy replacement feature (#1286)
**What problem does this PR solve?**: This PR enables Cilium's kube-proxy replacement feature automatically when clusters disable kube-proxy installation during upgrades. The Cilium handler will apply the new configuration, wait for the DaemonSet to be rolled out and then delete the kube-proxy DaemonSet and its ConfigMap. **Which issue(s) this PR fixes**: Fixes # **How Has This Been Tested?**: <!-- Please describe the tests that you ran to verify your changes. Provide output from the tests and any manual steps needed to replicate the tests. --> **Special notes for your reviewer**: <!-- Use this to provide any additional information to the reviewers. This may include: - Best way to review the PR. - Where the author wants the most review attention on. - etc. --> Stacked on #1288
1 parent 3ac9bab commit 6fe03f8

File tree

8 files changed

+392
-9
lines changed

8 files changed

+392
-9
lines changed

charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,10 +33,10 @@ socketLB:
3333
envoy:
3434
image:
3535
useDigest: false
36-
k8sServiceHost: auto
3736
{{- with .ControlPlane }}
3837
{{- range $key, $val := .metadata.annotations }}
3938
{{- if eq $key "controlplane.cluster.x-k8s.io/skip-kube-proxy" }}
39+
k8sServiceHost: auto
4040
kubeProxyReplacement: true{{ break }}
4141
{{- end }}
4242
{{- end }}
Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
// Copyright 2025 Nutanix. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package utils
5+
6+
import (
7+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
8+
controlplanev1 "sigs.k8s.io/cluster-api/controlplane/kubeadm/api/v1beta1"
9+
)
10+
11+
// ShouldSkipKubeProxy returns true if the cluster is configured to skip kube proxy installation.
12+
func ShouldSkipKubeProxy(cluster *clusterv1.Cluster) bool {
13+
if cluster.Spec.Topology != nil {
14+
_, isSkipKubeProxy := cluster.Spec.Topology.ControlPlane.Metadata.Annotations[controlplanev1.SkipKubeProxyAnnotation]
15+
return isSkipKubeProxy
16+
}
17+
return false
18+
}

pkg/handlers/generic/lifecycle/addons/helmaddon.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -231,7 +231,7 @@ func waitToBeReady(
231231
if obj.Generation != obj.Status.ObservedGeneration {
232232
return false, nil
233233
}
234-
return conditions.IsTrue(obj, caaphv1.HelmReleaseProxiesReadyCondition), nil
234+
return conditions.IsTrue(obj, clusterv1.ReadyCondition), nil
235235
},
236236
Interval: 5 * time.Second,
237237
Timeout: 30 * time.Second,
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
// Copyright 2025 Nutanix. All rights reserved.
2+
// SPDX-License-Identifier: Apache-2.0
3+
4+
package addons
5+
6+
import (
7+
"context"
8+
9+
"github.com/go-logr/logr"
10+
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
11+
)
12+
13+
type TestStrategy struct {
14+
err error
15+
}
16+
17+
func NewTestStrategy(err error) *TestStrategy {
18+
return &TestStrategy{err: err}
19+
}
20+
21+
func (s TestStrategy) Apply(
22+
ctx context.Context,
23+
cluster *clusterv1.Cluster,
24+
defaultsNamespace string,
25+
log logr.Logger,
26+
) error {
27+
return s.err
28+
}

pkg/handlers/generic/lifecycle/cni/cilium/handler.go

Lines changed: 127 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,11 +6,15 @@ package cilium
66
import (
77
"context"
88
"fmt"
9+
"time"
910

11+
"github.com/go-logr/logr"
1012
"github.com/spf13/pflag"
13+
appsv1 "k8s.io/api/apps/v1"
1114
corev1 "k8s.io/api/core/v1"
1215
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1316
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
17+
"sigs.k8s.io/cluster-api/controllers/remote"
1418
runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1"
1519
ctrl "sigs.k8s.io/controller-runtime"
1620
ctrlclient "sigs.k8s.io/controller-runtime/pkg/client"
@@ -19,10 +23,12 @@ import (
1923
commonhandlers "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/handlers"
2024
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/handlers/lifecycle"
2125
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/clustertopology/variables"
26+
capiutils "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/common/pkg/capi/utils"
2227
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/generic/lifecycle/addons"
2328
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/generic/lifecycle/config"
2429
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/options"
2530
handlersutils "github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/handlers/utils"
31+
"github.com/nutanix-cloud-native/cluster-api-runtime-extensions-nutanix/pkg/wait"
2632
)
2733

2834
type CNIConfig struct {
@@ -221,7 +227,8 @@ func (c *CiliumCNI) apply(
221227
),
222228
c.client,
223229
helmChart,
224-
)
230+
).
231+
WithDefaultWaiter()
225232
case "":
226233
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
227234
resp.SetMessage("strategy not specified for Cilium CNI addon")
@@ -231,11 +238,129 @@ func (c *CiliumCNI) apply(
231238
return
232239
}
233240

234-
if err := strategy.Apply(ctx, cluster, targetNamespace, log); err != nil {
241+
if err := runApply(ctx, c.client, cluster, strategy, targetNamespace, log); err != nil {
235242
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
236243
resp.SetMessage(err.Error())
237244
return
238245
}
239246

240247
resp.SetStatus(runtimehooksv1.ResponseStatusSuccess)
241248
}
249+
250+
func runApply(
251+
ctx context.Context,
252+
client ctrlclient.Client,
253+
cluster *clusterv1.Cluster,
254+
strategy addons.Applier,
255+
targetNamespace string,
256+
log logr.Logger,
257+
) error {
258+
if err := strategy.Apply(ctx, cluster, targetNamespace, log); err != nil {
259+
return err
260+
}
261+
262+
// If skip kube-proxy is not set, return early.
263+
// Otherwise, wait for Cilium to be rolled out and then cleanup kube-proxy if installed.
264+
if !capiutils.ShouldSkipKubeProxy(cluster) {
265+
return nil
266+
}
267+
268+
log.Info(
269+
fmt.Sprintf("Waiting for Cilium to be ready for cluster %s", ctrlclient.ObjectKeyFromObject(cluster)),
270+
)
271+
if err := waitForCiliumToBeReady(ctx, client, cluster); err != nil {
272+
return fmt.Errorf("failed to wait for Cilium to be ready: %w", err)
273+
}
274+
275+
log.Info(
276+
fmt.Sprintf("Cleaning up kube-proxy for cluster %s", ctrlclient.ObjectKeyFromObject(cluster)),
277+
)
278+
if err := cleanupKubeProxy(ctx, client, cluster); err != nil {
279+
return fmt.Errorf("failed to cleanup kube-proxy: %w", err)
280+
}
281+
282+
return nil
283+
}
284+
285+
const (
286+
kubeProxyName = "kube-proxy"
287+
kubeProxyNamespace = "kube-system"
288+
)
289+
290+
func waitForCiliumToBeReady(
291+
ctx context.Context,
292+
c ctrlclient.Client,
293+
cluster *clusterv1.Cluster,
294+
) error {
295+
remoteClient, err := remote.NewClusterClient(
296+
ctx,
297+
"",
298+
c,
299+
ctrlclient.ObjectKeyFromObject(cluster),
300+
)
301+
if err != nil {
302+
return fmt.Errorf("error creating remote cluster client: %w", err)
303+
}
304+
305+
ds := &appsv1.DaemonSet{
306+
ObjectMeta: metav1.ObjectMeta{
307+
Name: defaultCiliumReleaseName,
308+
Namespace: defaultCiliumNamespace,
309+
},
310+
}
311+
if err := wait.ForObject(
312+
ctx,
313+
wait.ForObjectInput[*appsv1.DaemonSet]{
314+
Reader: remoteClient,
315+
Target: ds.DeepCopy(),
316+
Check: func(_ context.Context, obj *appsv1.DaemonSet) (bool, error) {
317+
return obj.Status.NumberAvailable == obj.Status.DesiredNumberScheduled && obj.Status.NumberUnavailable == 0, nil
318+
},
319+
Interval: 1 * time.Second,
320+
Timeout: 30 * time.Second,
321+
},
322+
); err != nil {
323+
return fmt.Errorf(
324+
"failed to wait for DaemonSet %s to be Ready: %w",
325+
ctrlclient.ObjectKeyFromObject(ds),
326+
err,
327+
)
328+
}
329+
330+
return nil
331+
}
332+
333+
// cleanupKubeProxy cleans up kube-proxy DaemonSet and ConfigMap on the remote cluster when kube-proxy is disabled.
334+
func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client, cluster *clusterv1.Cluster) error {
335+
remoteClient, err := remote.NewClusterClient(
336+
ctx,
337+
"",
338+
c,
339+
ctrlclient.ObjectKeyFromObject(cluster),
340+
)
341+
if err != nil {
342+
return fmt.Errorf("error creating remote cluster client: %w", err)
343+
}
344+
345+
objs := []ctrlclient.Object{
346+
&appsv1.DaemonSet{
347+
ObjectMeta: metav1.ObjectMeta{
348+
Name: kubeProxyName,
349+
Namespace: kubeProxyNamespace,
350+
},
351+
},
352+
&corev1.ConfigMap{
353+
ObjectMeta: metav1.ObjectMeta{
354+
Name: kubeProxyName,
355+
Namespace: kubeProxyNamespace,
356+
},
357+
},
358+
}
359+
for _, obj := range objs {
360+
if err := ctrlclient.IgnoreNotFound(remoteClient.Delete(ctx, obj)); err != nil {
361+
return fmt.Errorf("failed to delete %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
362+
}
363+
}
364+
365+
return nil
366+
}

0 commit comments

Comments
 (0)