@@ -12,6 +12,7 @@ import (
1212 "github.com/spf13/pflag"
1313 appsv1 "k8s.io/api/apps/v1"
1414 corev1 "k8s.io/api/core/v1"
15+ apierrors "k8s.io/apimachinery/pkg/api/errors"
1516 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1617 clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1718 "sigs.k8s.io/cluster-api/controllers/remote"
@@ -228,6 +229,7 @@ func (c *CiliumCNI) apply(
228229 c .client ,
229230 helmChart ,
230231 ).
232+ WithValueTemplater (templateValues ).
231233 WithDefaultWaiter ()
232234 case "" :
233235 resp .SetStatus (runtimehooksv1 .ResponseStatusFailure )
@@ -259,65 +261,144 @@ func runApply(
259261 return err
260262 }
261263
264+ // It is possible to disable kube-proxy and migrate to Cilium's kube-proxy replacement feature in a running cluster.
265+ // In this case, we need to wait for Cilium to be restated with new configuration and then cleanup kube-proxy.
266+
262267 // If skip kube-proxy is not set, return early.
263- // Otherwise, wait for Cilium to be rolled out and then cleanup kube-proxy if installed.
264268 if ! capiutils .ShouldSkipKubeProxy (cluster ) {
265269 return nil
266270 }
267271
272+ remoteClient , err := remote .NewClusterClient (
273+ ctx ,
274+ "" ,
275+ client ,
276+ ctrlclient .ObjectKeyFromObject (cluster ),
277+ )
278+ if err != nil {
279+ return fmt .Errorf ("error creating remote cluster client: %w" , err )
280+ }
281+
282+ // If kube-proxy is not installed, assume that the one-time migration of kube-proxy to Cilium is complete and return early.
283+ isKubeProxyInstalled , err := isKubeProxyInstalled (ctx , remoteClient )
284+ if err != nil {
285+ return fmt .Errorf ("failed to check if kube-proxy is installed: %w" , err )
286+ }
287+ if ! isKubeProxyInstalled {
288+ return nil
289+ }
290+
291+ log .Info (
292+ fmt .Sprintf (
293+ "Waiting for Cilium ConfigMap to be updated with new configuration for cluster %s" ,
294+ ctrlclient .ObjectKeyFromObject (cluster ),
295+ ),
296+ )
297+ if err := waitForCiliumConfigMapToBeUpdatedWithKubeProxyReplacement (ctx , remoteClient ); err != nil {
298+ return fmt .Errorf ("failed to wait for Cilium ConfigMap to be updated: %w" , err )
299+ }
300+
268301 log .Info (
269- fmt .Sprintf ("Waiting for Cilium to be ready for cluster %s" , ctrlclient .ObjectKeyFromObject (cluster )),
302+ fmt .Sprintf (
303+ "Trigger a rollout of Cilium DaemonSet Pods for cluster %s" ,
304+ ctrlclient .ObjectKeyFromObject (cluster ),
305+ ),
270306 )
271- if err := waitForCiliumToBeReady (ctx , client , cluster ); err != nil {
272- return fmt .Errorf ("failed to wait for Cilium to be ready : %w" , err )
307+ if err := forceCiliumRollout (ctx , remoteClient ); err != nil {
308+ return fmt .Errorf ("failed to force trigger a rollout of Cilium DaemonSet Pods : %w" , err )
273309 }
274310
275311 log .Info (
276312 fmt .Sprintf ("Cleaning up kube-proxy for cluster %s" , ctrlclient .ObjectKeyFromObject (cluster )),
277313 )
278- if err := cleanupKubeProxy (ctx , client , cluster ); err != nil {
314+ if err := cleanupKubeProxy (ctx , remoteClient ); err != nil {
279315 return fmt .Errorf ("failed to cleanup kube-proxy: %w" , err )
280316 }
281317
282318 return nil
283319}
284320
285321const (
322+ kubeProxyReplacementConfigKey = "kube-proxy-replacement"
323+ ciliumConfigMapName = "cilium-config"
324+
325+ restartedAtAnnotation = "caren.nutanix.com/restartedAt"
326+
286327 kubeProxyName = "kube-proxy"
287328 kubeProxyNamespace = "kube-system"
288329)
289330
290- func waitForCiliumToBeReady (
291- ctx context.Context ,
292- c ctrlclient.Client ,
293- cluster * clusterv1.Cluster ,
294- ) error {
295- remoteClient , err := remote .NewClusterClient (
331+ // Use vars to override in integration tests.
332+ var (
333+ waitInterval = 1 * time .Second
334+ waitTimeout = 30 * time .Second
335+ )
336+
337+ func waitForCiliumConfigMapToBeUpdatedWithKubeProxyReplacement (ctx context.Context , c ctrlclient.Client ) error {
338+ cm := & corev1.ConfigMap {
339+ ObjectMeta : metav1.ObjectMeta {
340+ Name : ciliumConfigMapName ,
341+ Namespace : defaultCiliumNamespace ,
342+ },
343+ }
344+ if err := wait .ForObject (
296345 ctx ,
297- "" ,
298- c ,
299- ctrlclient .ObjectKeyFromObject (cluster ),
300- )
301- if err != nil {
302- return fmt .Errorf ("error creating remote cluster client: %w" , err )
346+ wait.ForObjectInput [* corev1.ConfigMap ]{
347+ Reader : c ,
348+ Target : cm .DeepCopy (),
349+ Check : func (_ context.Context , obj * corev1.ConfigMap ) (bool , error ) {
350+ return obj .Data [kubeProxyReplacementConfigKey ] == "true" , nil
351+ },
352+ Interval : waitInterval ,
353+ Timeout : waitTimeout ,
354+ },
355+ ); err != nil {
356+ return fmt .Errorf ("failed to wait for ConfigMap %s to be updated: %w" , ctrlclient .ObjectKeyFromObject (cm ), err )
303357 }
358+ return nil
359+ }
304360
361+ func forceCiliumRollout (ctx context.Context , c ctrlclient.Client ) error {
305362 ds := & appsv1.DaemonSet {
306363 ObjectMeta : metav1.ObjectMeta {
307364 Name : defaultCiliumReleaseName ,
308365 Namespace : defaultCiliumNamespace ,
309366 },
310367 }
368+ if err := c .Get (ctx , ctrlclient .ObjectKeyFromObject (ds ), ds ); err != nil {
369+ return fmt .Errorf ("failed to get cilium daemon set: %w" , err )
370+ }
371+
372+ // Update the DaemonSet to force a rollout.
373+ annotations := ds .Spec .Template .Annotations
374+ if annotations == nil {
375+ annotations = make (map [string ]string )
376+ }
377+ if _ , ok := annotations [restartedAtAnnotation ]; ! ok {
378+ // Only set the annotation once to avoid a race conditition where rollouts are triggered repeatedly.
379+ annotations [restartedAtAnnotation ] = time .Now ().UTC ().Format (time .RFC3339Nano )
380+ }
381+ ds .Spec .Template .Annotations = annotations
382+ if err := c .Update (ctx , ds ); err != nil {
383+ return fmt .Errorf ("failed to update cilium daemon set: %w" , err )
384+ }
385+
311386 if err := wait .ForObject (
312387 ctx ,
313388 wait.ForObjectInput [* appsv1.DaemonSet ]{
314- Reader : remoteClient ,
389+ Reader : c ,
315390 Target : ds .DeepCopy (),
316391 Check : func (_ context.Context , obj * appsv1.DaemonSet ) (bool , error ) {
317- return obj .Status .NumberAvailable == obj .Status .DesiredNumberScheduled && obj .Status .NumberUnavailable == 0 , nil
392+ if obj .Generation != obj .Status .ObservedGeneration {
393+ return false , nil
394+ }
395+ isUpdated := obj .Status .NumberAvailable == obj .Status .DesiredNumberScheduled &&
396+ // We're forcing a rollout so we expect the UpdatedNumberScheduled to be always set.
397+ obj .Status .UpdatedNumberScheduled == obj .Status .DesiredNumberScheduled
398+ return isUpdated , nil
318399 },
319- Interval : 1 * time . Second ,
320- Timeout : 30 * time . Second ,
400+ Interval : waitInterval ,
401+ Timeout : waitTimeout ,
321402 },
322403 ); err != nil {
323404 return fmt .Errorf (
@@ -331,17 +412,7 @@ func waitForCiliumToBeReady(
331412}
332413
333414// cleanupKubeProxy cleans up kube-proxy DaemonSet and ConfigMap on the remote cluster when kube-proxy is disabled.
334- func cleanupKubeProxy (ctx context.Context , c ctrlclient.Client , cluster * clusterv1.Cluster ) error {
335- remoteClient , err := remote .NewClusterClient (
336- ctx ,
337- "" ,
338- c ,
339- ctrlclient .ObjectKeyFromObject (cluster ),
340- )
341- if err != nil {
342- return fmt .Errorf ("error creating remote cluster client: %w" , err )
343- }
344-
415+ func cleanupKubeProxy (ctx context.Context , c ctrlclient.Client ) error {
345416 objs := []ctrlclient.Object {
346417 & appsv1.DaemonSet {
347418 ObjectMeta : metav1.ObjectMeta {
@@ -357,10 +428,27 @@ func cleanupKubeProxy(ctx context.Context, c ctrlclient.Client, cluster *cluster
357428 },
358429 }
359430 for _ , obj := range objs {
360- if err := ctrlclient .IgnoreNotFound (remoteClient .Delete (ctx , obj )); err != nil {
431+ if err := ctrlclient .IgnoreNotFound (c .Delete (ctx , obj )); err != nil {
361432 return fmt .Errorf ("failed to delete %s/%s: %w" , obj .GetNamespace (), obj .GetName (), err )
362433 }
363434 }
364435
365436 return nil
366437}
438+
439+ func isKubeProxyInstalled (ctx context.Context , c ctrlclient.Client ) (bool , error ) {
440+ ds := & appsv1.DaemonSet {
441+ ObjectMeta : metav1.ObjectMeta {
442+ Name : kubeProxyName ,
443+ Namespace : kubeProxyNamespace ,
444+ },
445+ }
446+ err := c .Get (ctx , ctrlclient .ObjectKeyFromObject (ds ), ds )
447+ if err != nil {
448+ if apierrors .IsNotFound (err ) {
449+ return false , nil
450+ }
451+ return false , err
452+ }
453+ return true , nil
454+ }
0 commit comments