Skip to content

Commit 6b71df2

Browse files
dkoshkinjimmidyson
andauthored
fix: Ciliums kube-proxy replacement rollout and wait (#1307)
**What problem does this PR solve?**: Make the migration process from kube-proxy to Cilium's kube-proxy replacement more resilient. Just setting `kubeProxyReplacement: true` is not enough for the Cilium operator to restart the DaemonSet Pods and pickup the new configuration. Instead of relying on `k8sServiceHost` to cause a rollout, this change forces a rollout during the migration process. This also fixes a potential race where the Cilium DaemonSet wait returned early and delete kube-proxy before all the Pods were restarted. Another fix here is that this whole migration process is now safer and only done once when kube-proxy is installed. Pulled out from #1295 **Which issue(s) this PR fixes**: Fixes # **How Has This Been Tested?**: <!-- Please describe the tests that you ran to verify your changes. Provide output from the tests and any manual steps needed to replicate the tests. --> **Special notes for your reviewer**: <!-- Use this to provide any additional information to the reviewers. This may include: - Best way to review the PR. - Where the author wants the most review attention on. - etc. --> --------- Co-authored-by: Jimmi Dyson <[email protected]>
1 parent 7b3ba01 commit 6b71df2

File tree

7 files changed

+243
-50
lines changed

7 files changed

+243
-50
lines changed

charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,7 @@ socketLB:
3333
envoy:
3434
image:
3535
useDigest: false
36-
{{- with .ControlPlane }}
37-
{{- range $key, $val := .metadata.annotations }}
38-
{{- if eq $key "controlplane.cluster.x-k8s.io/skip-kube-proxy" }}
3936
k8sServiceHost: auto
40-
kubeProxyReplacement: true{{ break }}
41-
{{- end }}
42-
{{- end }}
37+
{{- if .EnableKubeProxyReplacement }}
38+
kubeProxyReplacement: true
4339
{{- end }}

hack/addons/update-cilium-manifests.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ mkdir -p "${ASSETS_DIR}/cilium"
2424
envsubst -no-unset <"${KUSTOMIZE_BASE_DIR}/kustomization.yaml.tmpl" >"${ASSETS_DIR}/kustomization.yaml"
2525

2626
cat <<EOF >"${ASSETS_DIR}/gomplate-context.yaml"
27-
ControlPlane: {}
27+
EnableKubeProxyReplacement: false
2828
EOF
2929
gomplate -f "${GIT_REPO_ROOT}/charts/cluster-api-runtime-extensions-nutanix/addons/cni/cilium/values-template.yaml" \
3030
--context .="${ASSETS_DIR}/gomplate-context.yaml" \

hack/tools/fetch-images/main.go

Lines changed: 2 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -266,16 +266,10 @@ func getValuesFileForChartIfNeeded(chartName, carenChartDirectory string) (strin
266266
}
267267

268268
type input struct {
269-
ControlPlane map[string]interface{}
269+
EnableKubeProxyReplacement bool
270270
}
271271
templateInput := input{
272-
ControlPlane: map[string]interface{}{
273-
"metadata": map[string]interface{}{
274-
"annotations": map[string]interface{}{
275-
"controlplane.cluster.x-k8s.io/skip-kube-proxy": "",
276-
},
277-
},
278-
},
272+
EnableKubeProxyReplacement: true,
279273
}
280274

281275
err = template.Must(template.New(defaultHelmAddonFilename).ParseFiles(f)).Execute(tempFile, &templateInput)

pkg/handlers/generic/lifecycle/cni/cilium/handler.go

Lines changed: 122 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"github.com/spf13/pflag"
1313
appsv1 "k8s.io/api/apps/v1"
1414
corev1 "k8s.io/api/core/v1"
15+
apierrors "k8s.io/apimachinery/pkg/api/errors"
1516
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1617
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1718
"sigs.k8s.io/cluster-api/controllers/remote"
@@ -228,6 +229,7 @@ func (c *CiliumCNI) apply(
228229
c.client,
229230
helmChart,
230231
).
232+
WithValueTemplater(templateValues).
231233
WithDefaultWaiter()
232234
case "":
233235
resp.SetStatus(runtimehooksv1.ResponseStatusFailure)
@@ -259,65 +261,145 @@ func runApply(
259261
return err
260262
}
261263

264+
// It is possible to disable kube-proxy and migrate to Cilium's kube-proxy replacement feature in a running cluster.
265+
// In this case, we need to wait for Cilium to be restarted with new configuration and then cleanup kube-proxy.
266+
262267
// If skip kube-proxy is not set, return early.
263-
// Otherwise, wait for Cilium to be rolled out and then cleanup kube-proxy if installed.
264268
if !capiutils.ShouldSkipKubeProxy(cluster) {
265269
return nil
266270
}
267271

272+
remoteClient, err := remote.NewClusterClient(
273+
ctx,
274+
"",
275+
client,
276+
ctrlclient.ObjectKeyFromObject(cluster),
277+
)
278+
if err != nil {
279+
return fmt.Errorf("error creating remote cluster client: %w", err)
280+
}
281+
282+
// If kube-proxy is not installed,
283+
// assume that the one-time migration of kube-proxy is complete and return early.
284+
isKubeProxyInstalled, err := isKubeProxyInstalled(ctx, remoteClient)
285+
if err != nil {
286+
return fmt.Errorf("failed to check if kube-proxy is installed: %w", err)
287+
}
288+
if !isKubeProxyInstalled {
289+
return nil
290+
}
291+
292+
log.Info(
293+
fmt.Sprintf(
294+
"Waiting for Cilium ConfigMap to be updated with new configuration for cluster %s",
295+
ctrlclient.ObjectKeyFromObject(cluster),
296+
),
297+
)
298+
if err := waitForCiliumConfigMapToBeUpdatedWithKubeProxyReplacement(ctx, remoteClient); err != nil {
299+
return fmt.Errorf("failed to wait for Cilium ConfigMap to be updated: %w", err)
300+
}
301+
268302
log.Info(
269-
fmt.Sprintf("Waiting for Cilium to be ready for cluster %s", ctrlclient.ObjectKeyFromObject(cluster)),
303+
fmt.Sprintf(
304+
"Trigger a rollout of Cilium DaemonSet Pods for cluster %s",
305+
ctrlclient.ObjectKeyFromObject(cluster),
306+
),
270307
)
271-
if err := waitForCiliumToBeReady(ctx, client, cluster); err != nil {
272-
return fmt.Errorf("failed to wait for Cilium to be ready: %w", err)
308+
if err := forceCiliumRollout(ctx, remoteClient); err != nil {
309+
return fmt.Errorf("failed to force trigger a rollout of Cilium DaemonSet Pods: %w", err)
273310
}
274311

275312
log.Info(
276313
fmt.Sprintf("Cleaning up kube-proxy for cluster %s", ctrlclient.ObjectKeyFromObject(cluster)),
277314
)
278-
if err := cleanupKubeProxy(ctx, client, cluster); err != nil {
315+
if err := cleanupKubeProxy(ctx, remoteClient); err != nil {
279316
return fmt.Errorf("failed to cleanup kube-proxy: %w", err)
280317
}
281318

282319
return nil
283320
}
284321

285322
const (
323+
kubeProxyReplacementConfigKey = "kube-proxy-replacement"
324+
ciliumConfigMapName = "cilium-config"
325+
326+
restartedAtAnnotation = "caren.nutanix.com/restartedAt"
327+
286328
kubeProxyName = "kube-proxy"
287329
kubeProxyNamespace = "kube-system"
288330
)
289331

290-
func waitForCiliumToBeReady(
291-
ctx context.Context,
292-
c ctrlclient.Client,
293-
cluster *clusterv1.Cluster,
294-
) error {
295-
remoteClient, err := remote.NewClusterClient(
332+
// Use vars to override in integration tests.
333+
var (
334+
waitInterval = 1 * time.Second
335+
waitTimeout = 30 * time.Second
336+
)
337+
338+
func waitForCiliumConfigMapToBeUpdatedWithKubeProxyReplacement(ctx context.Context, c ctrlclient.Client) error {
339+
cm := &corev1.ConfigMap{
340+
ObjectMeta: metav1.ObjectMeta{
341+
Name: ciliumConfigMapName,
342+
Namespace: defaultCiliumNamespace,
343+
},
344+
}
345+
if err := wait.ForObject(
296346
ctx,
297-
"",
298-
c,
299-
ctrlclient.ObjectKeyFromObject(cluster),
300-
)
301-
if err != nil {
302-
return fmt.Errorf("error creating remote cluster client: %w", err)
347+
wait.ForObjectInput[*corev1.ConfigMap]{
348+
Reader: c,
349+
Target: cm.DeepCopy(),
350+
Check: func(_ context.Context, obj *corev1.ConfigMap) (bool, error) {
351+
return obj.Data[kubeProxyReplacementConfigKey] == "true", nil
352+
},
353+
Interval: waitInterval,
354+
Timeout: waitTimeout,
355+
},
356+
); err != nil {
357+
return fmt.Errorf("failed to wait for ConfigMap %s to be updated: %w", ctrlclient.ObjectKeyFromObject(cm), err)
303358
}
359+
return nil
360+
}
304361

362+
func forceCiliumRollout(ctx context.Context, c ctrlclient.Client) error {
305363
ds := &appsv1.DaemonSet{
306364
ObjectMeta: metav1.ObjectMeta{
307365
Name: defaultCiliumReleaseName,
308366
Namespace: defaultCiliumNamespace,
309367
},
310368
}
369+
if err := c.Get(ctx, ctrlclient.ObjectKeyFromObject(ds), ds); err != nil {
370+
return fmt.Errorf("failed to get cilium daemon set: %w", err)
371+
}
372+
373+
// Update the DaemonSet to force a rollout.
374+
annotations := ds.Spec.Template.Annotations
375+
if annotations == nil {
376+
annotations = make(map[string]string, 1)
377+
}
378+
if _, ok := annotations[restartedAtAnnotation]; !ok {
379+
// Only set the annotation once to avoid a race conditition where rollouts are triggered repeatedly.
380+
annotations[restartedAtAnnotation] = time.Now().UTC().Format(time.RFC3339)
381+
}
382+
ds.Spec.Template.Annotations = annotations
383+
if err := c.Update(ctx, ds); err != nil {
384+
return fmt.Errorf("failed to update cilium daemon set: %w", err)
385+
}
386+
311387
if err := wait.ForObject(
312388
ctx,
313389
wait.ForObjectInput[*appsv1.DaemonSet]{
314-
Reader: remoteClient,
390+
Reader: c,
315391
Target: ds.DeepCopy(),
316392
Check: func(_ context.Context, obj *appsv1.DaemonSet) (bool, error) {
317-
return obj.Status.NumberAvailable == obj.Status.DesiredNumberScheduled && obj.Status.NumberUnavailable == 0, nil
393+
if obj.Generation != obj.Status.ObservedGeneration {
394+
return false, nil
395+
}
396+
isUpdated := obj.Status.NumberAvailable == obj.Status.DesiredNumberScheduled &&
397+
// We're forcing a rollout so we expect the UpdatedNumberScheduled to be always set.
398+
obj.Status.UpdatedNumberScheduled == obj.Status.DesiredNumberScheduled
399+
return isUpdated, nil
318400
},
319-
Interval: 1 * time.Second,
320-
Timeout: 30 * time.Second,
401+
Interval: waitInterval,
402+
Timeout: waitTimeout,
321403
},
322404
); err != nil {
323405
return fmt.Errorf(
@@ -331,17 +413,7 @@ func waitForCiliumToBeReady(
331413
}
332414

333415
// cleanupKubeProxy cleans up kube-proxy DaemonSet and ConfigMap on the remote cluster when kube-proxy is disabled.
334-
func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client, cluster *clusterv1.Cluster) error {
335-
remoteClient, err := remote.NewClusterClient(
336-
ctx,
337-
"",
338-
c,
339-
ctrlclient.ObjectKeyFromObject(cluster),
340-
)
341-
if err != nil {
342-
return fmt.Errorf("error creating remote cluster client: %w", err)
343-
}
344-
416+
func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client) error {
345417
objs := []ctrlclient.Object{
346418
&appsv1.DaemonSet{
347419
ObjectMeta: metav1.ObjectMeta{
@@ -357,10 +429,27 @@ func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client, cluster *cluster
357429
},
358430
}
359431
for _, obj := range objs {
360-
if err := ctrlclient.IgnoreNotFound(remoteClient.Delete(ctx, obj)); err != nil {
432+
if err := ctrlclient.IgnoreNotFound(c.Delete(ctx, obj)); err != nil {
361433
return fmt.Errorf("failed to delete %s/%s: %w", obj.GetNamespace(), obj.GetName(), err)
362434
}
363435
}
364436

365437
return nil
366438
}
439+
440+
func isKubeProxyInstalled(ctx context.Context, c ctrlclient.Client) (bool, error) {
441+
ds := &appsv1.DaemonSet{
442+
ObjectMeta: metav1.ObjectMeta{
443+
Name: kubeProxyName,
444+
Namespace: kubeProxyNamespace,
445+
},
446+
}
447+
err := c.Get(ctx, ctrlclient.ObjectKeyFromObject(ds), ds)
448+
if err != nil {
449+
if apierrors.IsNotFound(err) {
450+
return false, nil
451+
}
452+
return false, err
453+
}
454+
return true, nil
455+
}

pkg/handlers/generic/lifecycle/cni/cilium/handler_integration_test.go

Lines changed: 71 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ package cilium
55

66
import (
77
"fmt"
8+
"time"
89

910
"github.com/go-logr/logr"
1011
. "github.com/onsi/ginkgo/v2"
@@ -51,7 +52,7 @@ var _ = Describe("Test runApply", func() {
5152
Expect(err).To(BeNil())
5253
Expect(configMap).ToNot(BeNil())
5354

54-
By("Should not delete when the addon is not applied")
55+
By("Should not delete kube-proxy when the addon is not applied")
5556
err = runApply(
5657
ctx,
5758
c,
@@ -70,14 +71,65 @@ var _ = Describe("Test runApply", func() {
7071
Expect(err).To(BeNil())
7172
Expect(configMap).ToNot(BeNil())
7273

73-
By("Should delete kube-proxy when skip kube-proxy is set")
74+
By("Should not delete kube-proxy when Cilium DaemonSet is not updated")
7475
cluster.Spec.Topology.ControlPlane.Metadata.Annotations = map[string]string{
7576
controlplanev1.SkipKubeProxyAnnotation: "",
7677
}
78+
// Speed up the test.
79+
waitTimeout = 1 * time.Second
80+
err = runApply(ctx, c, cluster, strategy, cluster.Namespace, logr.Discard())
81+
Expect(err).ToNot(BeNil())
82+
83+
// Verify that the kube-proxy DaemonSet and ConfigMap are not deleted when Cilium DaemonSet is not updated
84+
err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, daemonSet)
85+
Expect(err).To(BeNil())
86+
Expect(daemonSet).ToNot(BeNil())
87+
err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, configMap)
88+
Expect(err).To(BeNil())
89+
Expect(configMap).ToNot(BeNil())
90+
91+
By("Should delete kube-proxy when skip kube-proxy is set")
92+
// Update the status of the Cilium DaemonSet to simulate a roll out.
93+
ciliumDaemonSet := &appsv1.DaemonSet{
94+
ObjectMeta: metav1.ObjectMeta{
95+
Name: defaultCiliumReleaseName,
96+
Namespace: defaultCiliumNamespace,
97+
},
98+
}
99+
err = remoteClient.Get(
100+
ctx,
101+
ctrlclient.ObjectKey{Name: defaultCiliumReleaseName, Namespace: defaultCiliumNamespace},
102+
ciliumDaemonSet,
103+
)
104+
Expect(err).To(BeNil())
105+
ciliumDaemonSet.Status = appsv1.DaemonSetStatus{
106+
ObservedGeneration: 2,
107+
NumberAvailable: 2,
108+
DesiredNumberScheduled: 2,
109+
UpdatedNumberScheduled: 2,
110+
NumberUnavailable: 0,
111+
}
112+
Expect(remoteClient.Status().Update(ctx, ciliumDaemonSet)).To(Succeed())
77113

78114
err = runApply(ctx, c, cluster, strategy, cluster.Namespace, logr.Discard())
79115
Expect(err).To(BeNil())
80116

117+
// Verify that the Cilium DaemonSet was updated.
118+
ciliumDaemonSet = &appsv1.DaemonSet{
119+
ObjectMeta: metav1.ObjectMeta{
120+
Name: defaultCiliumReleaseName,
121+
Namespace: defaultCiliumNamespace,
122+
},
123+
}
124+
err = remoteClient.Get(
125+
ctx,
126+
ctrlclient.ObjectKeyFromObject(ciliumDaemonSet),
127+
ciliumDaemonSet,
128+
)
129+
Expect(err).To(BeNil())
130+
Expect(ciliumDaemonSet).ToNot(BeNil())
131+
Expect(ciliumDaemonSet.Spec.Template.Annotations).To(HaveKey(restartedAtAnnotation))
132+
81133
// Verify that the kube-proxy DaemonSet and ConfigMap are deleted.
82134
err = remoteClient.Get(ctx, ctrlclient.ObjectKey{Name: kubeProxyName, Namespace: kubeProxyNamespace}, daemonSet)
83135
Expect(err).ToNot(BeNil())
@@ -158,13 +210,15 @@ func setupTestCluster(
158210
}
159211
Expect(remoteClient.Create(ctx, configMap)).To(Succeed())
160212

213+
// Cilium DaemonSet, Pods and ConfigMap
161214
ciliumDaemonSet := &appsv1.DaemonSet{
162215
ObjectMeta: metav1.ObjectMeta{
163216
Name: defaultCiliumReleaseName,
164217
Namespace: defaultCiliumNamespace,
165218
Labels: map[string]string{
166219
"app": defaultCiliumReleaseName,
167220
},
221+
Generation: 1,
168222
},
169223
Spec: appsv1.DaemonSetSpec{
170224
Selector: &metav1.LabelSelector{
@@ -192,6 +246,21 @@ func setupTestCluster(
192246
},
193247
}
194248
Expect(remoteClient.Create(ctx, ciliumDaemonSet)).To(Succeed())
249+
ciliumDaemonSet.Status = appsv1.DaemonSetStatus{
250+
ObservedGeneration: 1,
251+
}
252+
Expect(remoteClient.Status().Update(ctx, ciliumDaemonSet)).To(Succeed())
253+
254+
configMap = &corev1.ConfigMap{
255+
ObjectMeta: metav1.ObjectMeta{
256+
Name: ciliumConfigMapName,
257+
Namespace: defaultCiliumNamespace,
258+
},
259+
Data: map[string]string{
260+
kubeProxyReplacementConfigKey: "true",
261+
},
262+
}
263+
Expect(remoteClient.Create(ctx, configMap)).To(Succeed())
195264

196265
return cluster, remoteClient
197266
}

0 commit comments

Comments
 (0)