From 7917b469fef7913ded76eee0dc6749ebe8662993 Mon Sep 17 00:00:00 2001 From: Tyler Lloyd Date: Wed, 18 Dec 2024 16:36:38 -0500 Subject: [PATCH 1/5] fix: let NNCReconciler process every update when IPAMv2 is enabled --- cns/kubecontroller/nodenetworkconfig/reconciler.go | 9 +++++++-- cns/service/main.go | 6 +++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/cns/kubecontroller/nodenetworkconfig/reconciler.go b/cns/kubecontroller/nodenetworkconfig/reconciler.go index 10b252b48a..d4bc9ad62a 100644 --- a/cns/kubecontroller/nodenetworkconfig/reconciler.go +++ b/cns/kubecontroller/nodenetworkconfig/reconciler.go @@ -5,6 +5,7 @@ import ( "sync" "github.com/Azure/azure-container-networking/cns" + "github.com/Azure/azure-container-networking/cns/configuration" "github.com/Azure/azure-container-networking/cns/logger" "github.com/Azure/azure-container-networking/cns/restserver" cnstypes "github.com/Azure/azure-container-networking/cns/types" @@ -157,7 +158,7 @@ func (r *Reconciler) Started(ctx context.Context) (bool, error) { } // SetupWithManager Sets up the reconciler with a new manager, filtering using NodeNetworkConfigFilter on nodeName. -func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node) error { +func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node, cnsconfig *configuration.CNSConfig) error { r.nnccli = nodenetworkconfig.NewClient(mgr.GetClient()) err := ctrl.NewControllerManagedBy(mgr). For(&v1alpha.NodeNetworkConfig{}). @@ -172,11 +173,15 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node) error { return metav1.IsControlledBy(object, node) })). WithEventFilter(predicate.Funcs{ - // check that the generation is the same - status changes don't update generation. + // check that the generation is the same iff IPAMv1 - status changes don't update generation. UpdateFunc: func(ue event.UpdateEvent) bool { if ue.ObjectOld == nil || ue.ObjectNew == nil { return false } + // IPAMv2 is idempotent and can process every update event. + if cnsconfig != nil && cnsconfig.EnableIPAMv2 { + return true + } return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() }, }). diff --git a/cns/service/main.go b/cns/service/main.go index 36f24dffa7..3ee777bda5 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -981,7 +981,7 @@ func main() { // Start fs watcher here z.Info("AsyncPodDelete is enabled") logger.Printf("AsyncPodDelete is enabled") - cnsclient, err := cnsclient.New("", cnsReqTimeout) //nolint + cnsclient, err := cnsclient.New("", cnsReqTimeout) // nolint if err != nil { z.Error("failed to create cnsclient", zap.Error(err)) } @@ -1412,7 +1412,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn nodeIP := configuration.NodeIP() nncReconciler := nncctrl.NewReconciler(httpRestServiceImplementation, poolMonitor, nodeIP) // pass Node to the Reconciler for Controller xref - if err := nncReconciler.SetupWithManager(manager, node); err != nil { //nolint:govet // intentional shadow + if err := nncReconciler.SetupWithManager(manager, node, cnsconfig); err != nil { //nolint:govet // intentional shadow return errors.Wrapf(err, "failed to setup nnc reconciler with manager") } @@ -1482,7 +1482,7 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn // wait for the Reconciler to run once on a NNC that was made for this Node. // the nncReadyCtx has a timeout of 15 minutes, after which we will consider // this false and the NNC Reconciler stuck/failed, log and retry. - nncReadyCtx, cancel := context.WithTimeout(ctx, 15*time.Minute) //nolint // it will time out and not leak + nncReadyCtx, cancel := context.WithTimeout(ctx, 15*time.Minute) // nolint // it will time out and not leak if started, err := nncReconciler.Started(nncReadyCtx); !started { logger.Errorf("NNC reconciler has not started, does the NNC exist? err: %v", err) nncReconcilerStartFailures.Inc() From fa3a9475f70f42901a0f834eb6c985b0259c1e97 Mon Sep 17 00:00:00 2001 From: Tyler Lloyd Date: Wed, 18 Dec 2024 16:39:45 -0500 Subject: [PATCH 2/5] refactor: cleanup PredicateFunc usage Group the predicate funcs into either specific event filters or the "universal" filters which get applied to every event type. Then And() them which is what controller-runtime event_handler is functionally doing anyway. link: https://github.com/kubernetes-sigs/controller-runtime/blob/aea2e32a936584b06ae6f7992f856fe7292b0297/pkg/internal/source/event_handler.go#L116 --- .../nodenetworkconfig/reconciler.go | 66 +++++++++++-------- 1 file changed, 39 insertions(+), 27 deletions(-) diff --git a/cns/kubecontroller/nodenetworkconfig/reconciler.go b/cns/kubecontroller/nodenetworkconfig/reconciler.go index d4bc9ad62a..50403151bd 100644 --- a/cns/kubecontroller/nodenetworkconfig/reconciler.go +++ b/cns/kubecontroller/nodenetworkconfig/reconciler.go @@ -160,35 +160,47 @@ func (r *Reconciler) Started(ctx context.Context) (bool, error) { // SetupWithManager Sets up the reconciler with a new manager, filtering using NodeNetworkConfigFilter on nodeName. func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node, cnsconfig *configuration.CNSConfig) error { r.nnccli = nodenetworkconfig.NewClient(mgr.GetClient()) + + // specific event funcs + eventFuncs := predicate.Funcs{ + // ignore delete events. + DeleteFunc: func(event.DeleteEvent) bool { + return false + }, + UpdateFunc: func(ue event.UpdateEvent) bool { + if ue.ObjectOld == nil || ue.ObjectNew == nil { + return false + } + // check that the generation is the same - status changes don't update generation. But in IPAMv2 + // it is safe to reconcile objects with a changed generation (typically means it was patched by + // CNS itself). This saves us from filtering out updates that were made while this controller's + // watch was down. + if cnsconfig != nil && cnsconfig.EnableIPAMv2 { + return true + } + return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() + }, + } + + // these are applied to every event type + universalFuncs := predicate.NewPredicateFuncs(func(object client.Object) bool { + // match on node controller ref for all other events. + if !metav1.IsControlledBy(object, node) { + return false + } + + // only process events on objects that are not being deleted. + if !object.GetDeletionTimestamp().IsZero() { + return false + } + return true + }) + + filters := predicate.And(eventFuncs, universalFuncs) + err := ctrl.NewControllerManagedBy(mgr). For(&v1alpha.NodeNetworkConfig{}). - WithEventFilter(predicate.Funcs{ - // ignore delete events. - DeleteFunc: func(event.DeleteEvent) bool { - return false - }, - }). - WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { - // match on node controller ref for all other events. - return metav1.IsControlledBy(object, node) - })). - WithEventFilter(predicate.Funcs{ - // check that the generation is the same iff IPAMv1 - status changes don't update generation. - UpdateFunc: func(ue event.UpdateEvent) bool { - if ue.ObjectOld == nil || ue.ObjectNew == nil { - return false - } - // IPAMv2 is idempotent and can process every update event. - if cnsconfig != nil && cnsconfig.EnableIPAMv2 { - return true - } - return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() - }, - }). - WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { - // only process events on objects that are not being deleted. - return object.GetDeletionTimestamp().IsZero() - })). + WithEventFilter(filters). Complete(r) if err != nil { return errors.Wrap(err, "failed to set up reconciler with manager") From fbf5530373401870d2659983002cafe8b9cf4a64 Mon Sep 17 00:00:00 2001 From: Tyler Lloyd Date: Tue, 31 Dec 2024 13:27:31 -0500 Subject: [PATCH 3/5] revert: "refactor: cleanup PredicateFunc usage" This reverts commit 67983d0c7c387c53ddafeed73add7f412ed659ba. --- .../nodenetworkconfig/reconciler.go | 66 ++++++++----------- 1 file changed, 27 insertions(+), 39 deletions(-) diff --git a/cns/kubecontroller/nodenetworkconfig/reconciler.go b/cns/kubecontroller/nodenetworkconfig/reconciler.go index 50403151bd..d4bc9ad62a 100644 --- a/cns/kubecontroller/nodenetworkconfig/reconciler.go +++ b/cns/kubecontroller/nodenetworkconfig/reconciler.go @@ -160,47 +160,35 @@ func (r *Reconciler) Started(ctx context.Context) (bool, error) { // SetupWithManager Sets up the reconciler with a new manager, filtering using NodeNetworkConfigFilter on nodeName. func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node, cnsconfig *configuration.CNSConfig) error { r.nnccli = nodenetworkconfig.NewClient(mgr.GetClient()) - - // specific event funcs - eventFuncs := predicate.Funcs{ - // ignore delete events. - DeleteFunc: func(event.DeleteEvent) bool { - return false - }, - UpdateFunc: func(ue event.UpdateEvent) bool { - if ue.ObjectOld == nil || ue.ObjectNew == nil { - return false - } - // check that the generation is the same - status changes don't update generation. But in IPAMv2 - // it is safe to reconcile objects with a changed generation (typically means it was patched by - // CNS itself). This saves us from filtering out updates that were made while this controller's - // watch was down. - if cnsconfig != nil && cnsconfig.EnableIPAMv2 { - return true - } - return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() - }, - } - - // these are applied to every event type - universalFuncs := predicate.NewPredicateFuncs(func(object client.Object) bool { - // match on node controller ref for all other events. - if !metav1.IsControlledBy(object, node) { - return false - } - - // only process events on objects that are not being deleted. - if !object.GetDeletionTimestamp().IsZero() { - return false - } - return true - }) - - filters := predicate.And(eventFuncs, universalFuncs) - err := ctrl.NewControllerManagedBy(mgr). For(&v1alpha.NodeNetworkConfig{}). - WithEventFilter(filters). + WithEventFilter(predicate.Funcs{ + // ignore delete events. + DeleteFunc: func(event.DeleteEvent) bool { + return false + }, + }). + WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { + // match on node controller ref for all other events. + return metav1.IsControlledBy(object, node) + })). + WithEventFilter(predicate.Funcs{ + // check that the generation is the same iff IPAMv1 - status changes don't update generation. + UpdateFunc: func(ue event.UpdateEvent) bool { + if ue.ObjectOld == nil || ue.ObjectNew == nil { + return false + } + // IPAMv2 is idempotent and can process every update event. + if cnsconfig != nil && cnsconfig.EnableIPAMv2 { + return true + } + return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() + }, + }). + WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { + // only process events on objects that are not being deleted. + return object.GetDeletionTimestamp().IsZero() + })). Complete(r) if err != nil { return errors.Wrap(err, "failed to set up reconciler with manager") From a001931cce73647bdac9a802d7ab0c9e24239a9e Mon Sep 17 00:00:00 2001 From: Tyler Lloyd Date: Tue, 31 Dec 2024 13:30:37 -0500 Subject: [PATCH 4/5] chore: reduce refactoring --- .../nodenetworkconfig/reconciler.go | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/cns/kubecontroller/nodenetworkconfig/reconciler.go b/cns/kubecontroller/nodenetworkconfig/reconciler.go index d4bc9ad62a..a6c94be45e 100644 --- a/cns/kubecontroller/nodenetworkconfig/reconciler.go +++ b/cns/kubecontroller/nodenetworkconfig/reconciler.go @@ -160,6 +160,7 @@ func (r *Reconciler) Started(ctx context.Context) (bool, error) { // SetupWithManager Sets up the reconciler with a new manager, filtering using NodeNetworkConfigFilter on nodeName. func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node, cnsconfig *configuration.CNSConfig) error { r.nnccli = nodenetworkconfig.NewClient(mgr.GetClient()) + ipamV2Enabled := cnsconfig != nil && cnsconfig.EnableIPAMv2 err := ctrl.NewControllerManagedBy(mgr). For(&v1alpha.NodeNetworkConfig{}). WithEventFilter(predicate.Funcs{ @@ -167,24 +168,22 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node, cnsconfig DeleteFunc: func(event.DeleteEvent) bool { return false }, - }). - WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { - // match on node controller ref for all other events. - return metav1.IsControlledBy(object, node) - })). - WithEventFilter(predicate.Funcs{ - // check that the generation is the same iff IPAMv1 - status changes don't update generation. + // check that the generation is the same if IPAMv1 - status changes don't update generation. UpdateFunc: func(ue event.UpdateEvent) bool { if ue.ObjectOld == nil || ue.ObjectNew == nil { return false } // IPAMv2 is idempotent and can process every update event. - if cnsconfig != nil && cnsconfig.EnableIPAMv2 { + if ipamV2Enabled { return true } return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() }, }). + WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { + // match on node controller ref for all other events. + return metav1.IsControlledBy(object, node) + })). WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { // only process events on objects that are not being deleted. return object.GetDeletionTimestamp().IsZero() From 82ac8823d4ad55c4146dd2f2a01149b4519509b3 Mon Sep 17 00:00:00 2001 From: Tyler Lloyd Date: Mon, 6 Jan 2025 19:02:56 -0500 Subject: [PATCH 5/5] chore: just pass bool --- cns/kubecontroller/nodenetworkconfig/reconciler.go | 14 ++++++-------- cns/service/main.go | 5 ++++- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/cns/kubecontroller/nodenetworkconfig/reconciler.go b/cns/kubecontroller/nodenetworkconfig/reconciler.go index a6c94be45e..40b8fd1c05 100644 --- a/cns/kubecontroller/nodenetworkconfig/reconciler.go +++ b/cns/kubecontroller/nodenetworkconfig/reconciler.go @@ -5,7 +5,6 @@ import ( "sync" "github.com/Azure/azure-container-networking/cns" - "github.com/Azure/azure-container-networking/cns/configuration" "github.com/Azure/azure-container-networking/cns/logger" "github.com/Azure/azure-container-networking/cns/restserver" cnstypes "github.com/Azure/azure-container-networking/cns/types" @@ -158,9 +157,10 @@ func (r *Reconciler) Started(ctx context.Context) (bool, error) { } // SetupWithManager Sets up the reconciler with a new manager, filtering using NodeNetworkConfigFilter on nodeName. -func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node, cnsconfig *configuration.CNSConfig) error { +// filterGenerationChange will check the old and new object's generation and only reconcile updates where the +// generation is the same. This is typically used in IPAMv1 but should be set to false in IPAMv2. +func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node, filterGenerationChange bool) error { r.nnccli = nodenetworkconfig.NewClient(mgr.GetClient()) - ipamV2Enabled := cnsconfig != nil && cnsconfig.EnableIPAMv2 err := ctrl.NewControllerManagedBy(mgr). For(&v1alpha.NodeNetworkConfig{}). WithEventFilter(predicate.Funcs{ @@ -168,16 +168,14 @@ func (r *Reconciler) SetupWithManager(mgr ctrl.Manager, node *v1.Node, cnsconfig DeleteFunc: func(event.DeleteEvent) bool { return false }, - // check that the generation is the same if IPAMv1 - status changes don't update generation. UpdateFunc: func(ue event.UpdateEvent) bool { if ue.ObjectOld == nil || ue.ObjectNew == nil { return false } - // IPAMv2 is idempotent and can process every update event. - if ipamV2Enabled { - return true + if filterGenerationChange { + return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() } - return ue.ObjectOld.GetGeneration() == ue.ObjectNew.GetGeneration() + return true }, }). WithEventFilter(predicate.NewPredicateFuncs(func(object client.Object) bool { diff --git a/cns/service/main.go b/cns/service/main.go index 3ee777bda5..0aa3b46a86 100644 --- a/cns/service/main.go +++ b/cns/service/main.go @@ -1412,7 +1412,10 @@ func InitializeCRDState(ctx context.Context, httpRestService cns.HTTPService, cn nodeIP := configuration.NodeIP() nncReconciler := nncctrl.NewReconciler(httpRestServiceImplementation, poolMonitor, nodeIP) // pass Node to the Reconciler for Controller xref - if err := nncReconciler.SetupWithManager(manager, node, cnsconfig); err != nil { //nolint:govet // intentional shadow + // IPAMv1 - reconcile only status changes (where generation doesn't change). + // IPAMv2 - reconcile all updates. + filterGenerationChange := !cnsconfig.EnableIPAMv2 + if err := nncReconciler.SetupWithManager(manager, node, filterGenerationChange); err != nil { //nolint:govet // intentional shadow return errors.Wrapf(err, "failed to setup nnc reconciler with manager") }