diff --git a/controllers/deployment/reconcile.go b/controllers/deployment/reconcile.go index 52289fb..3d3cbc3 100644 --- a/controllers/deployment/reconcile.go +++ b/controllers/deployment/reconcile.go @@ -14,6 +14,7 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/util/sets" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallDeployment]) error { @@ -138,6 +139,8 @@ func (c *controller) createFirewallSet(r *controllers.Ctx[*v2.FirewallDeployment }, } + _ = controllerutil.AddFinalizer(set, v2.FinalizerName) + err = c.c.GetSeedClient().Create(r.Ctx, set, &client.CreateOptions{}) if err != nil { cond := v2.NewCondition(v2.FirewallDeplomentProgressing, v2.ConditionFalse, "FirewallSetCreateError", fmt.Sprintf("Error creating firewall set: %s.", err)) diff --git a/controllers/generic_controller.go b/controllers/generic_controller.go index 17772fc..fdee2b5 100644 --- a/controllers/generic_controller.go +++ b/controllers/generic_controller.go @@ -97,36 +97,31 @@ func (g GenericController[O]) Reconcile(ctx context.Context, req ctrl.Request) ( } if !o.GetDeletionTimestamp().IsZero() { - if controllerutil.ContainsFinalizer(o, v2.FinalizerName) { - log.Info("reconciling resource deletion flow") - err := g.reconciler.Delete(rctx) - if err != nil { - var requeueErr *requeueError - if errors.As(err, &requeueErr) { - log.Info(requeueErr.Error()) - return ctrl.Result{RequeueAfter: requeueErr.after}, nil //nolint:nilerr we need to return nil such that the requeue works - } - - log.Error(err, "error during deletion flow") - return ctrl.Result{}, err - } + if !controllerutil.ContainsFinalizer(o, v2.FinalizerName) { + log.Info("resource has no finalizer anymore, nothing further to do for deletion") + return ctrl.Result{}, nil + } - log.Info("deletion finished, removing finalizer") - controllerutil.RemoveFinalizer(o, v2.FinalizerName) - if err := g.c.Update(ctx, o); err != nil { - return ctrl.Result{}, err + log.Info("reconciling resource deletion flow") + err := g.reconciler.Delete(rctx) + if err != nil { + var requeueErr *requeueError + if errors.As(err, &requeueErr) { + log.Info(requeueErr.Error()) + return ctrl.Result{RequeueAfter: requeueErr.after}, nil //nolint:nilerr we need to return nil such that the requeue works } - } - return ctrl.Result{}, nil - } + log.Error(err, "error during deletion flow") + return ctrl.Result{}, err + } - if !controllerutil.ContainsFinalizer(o, v2.FinalizerName) { - log.Info("adding finalizer") - controllerutil.AddFinalizer(o, v2.FinalizerName) + log.Info("deletion finished, removing finalizer") + controllerutil.RemoveFinalizer(o, v2.FinalizerName) if err := g.c.Update(ctx, o); err != nil { - return ctrl.Result{}, fmt.Errorf("unable to add finalizer: %w", err) + return ctrl.Result{}, err } + + return ctrl.Result{}, nil } var statusErr error diff --git a/controllers/monitor/reconcile.go b/controllers/monitor/reconcile.go index a60abab..b28addb 100644 --- a/controllers/monitor/reconcile.go +++ b/controllers/monitor/reconcile.go @@ -14,19 +14,22 @@ import ( corev1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallMonitor]) error { - fw, err := c.updateFirewallStatus(r) + err := c.updateFirewallStatus(r) if err != nil { r.Log.Error(err, "unable to update firewall status") - return controllers.RequeueAfter(3*time.Second, "unable to update firewall status, retrying") + // the update on the firewall resource is racing with the set controller update on the firewall resource + // we choose a small requeue period to enforce the update... + return controllers.RequeueAfter(3*time.Millisecond, "unable to update firewall status, retrying") } - err = c.offerFirewallControllerMigrationSecret(r, fw) + err = c.offerFirewallControllerMigrationSecret(r) if err != nil { r.Log.Error(err, "unable to offer firewall-controller migration secret") return controllers.RequeueAfter(10*time.Second, "unable to offer firewall-controller migration secret, retrying") @@ -41,32 +44,47 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallMonitor]) error { return controllers.RequeueAfter(2*time.Minute, "continue reconciling monitor") } -func (c *controller) updateFirewallStatus(r *controllers.Ctx[*v2.FirewallMonitor]) (*v2.Firewall, error) { +func (c *controller) updateFirewallStatus(r *controllers.Ctx[*v2.FirewallMonitor]) error { fw := &v2.Firewall{ ObjectMeta: metav1.ObjectMeta{ Name: r.Target.Name, Namespace: c.c.GetSeedNamespace(), }, } - err := c.c.GetSeedClient().Get(r.Ctx, client.ObjectKeyFromObject(fw), fw) - if err != nil { - return nil, fmt.Errorf("associated firewall of monitor not found: %w", err) - } - firewall.SetFirewallStatusFromMonitor(fw, r.Target) + return retry.RetryOnConflict(retry.DefaultRetry, func() error { + err := c.c.GetSeedClient().Get(r.Ctx, client.ObjectKeyFromObject(fw), fw) + if err != nil { + return fmt.Errorf("associated firewall of monitor not found: %w", err) + } - err = c.c.GetSeedClient().Status().Update(r.Ctx, fw) - if err != nil { - return nil, fmt.Errorf("unable to update firewall status: %w", err) - } + firewall.SetFirewallStatusFromMonitor(fw, r.Target) + + err = c.c.GetSeedClient().Status().Update(r.Ctx, fw) + if err != nil { + return fmt.Errorf("unable to update firewall status: %w", err) + } - return fw, nil + return nil + }) } // offerFirewallControllerMigrationSecret provides a secret that the firewall-controller can use to update from v1.x to v2.x // // this function can be removed when all firewall-controllers are running v2.x or newer. -func (c *controller) offerFirewallControllerMigrationSecret(r *controllers.Ctx[*v2.FirewallMonitor], fw *v2.Firewall) error { +func (c *controller) offerFirewallControllerMigrationSecret(r *controllers.Ctx[*v2.FirewallMonitor]) error { + fw := &v2.Firewall{ + ObjectMeta: metav1.ObjectMeta{ + Name: r.Target.Name, + Namespace: c.c.GetSeedNamespace(), + }, + } + + err := c.c.GetSeedClient().Get(r.Ctx, client.ObjectKeyFromObject(fw), fw) + if err != nil { + return fmt.Errorf("associated firewall of monitor not found: %w", err) + } + if metav1.GetControllerOf(fw) == nil { // it can be that there is no set or deployment governing the firewall. // in this case there may be no rbac resources deployed for seed access, so we cannot offer a migration secret. diff --git a/controllers/set/controller_test.go b/controllers/set/controller_test.go index 13a3bce..3d0853c 100644 --- a/controllers/set/controller_test.go +++ b/controllers/set/controller_test.go @@ -108,6 +108,10 @@ var _ = Context("firewall set controller", Ordered, func() { if !fw.CreationTimestamp.Time.Before(newest.CreationTimestamp.Time) { newest = &fw } + + // as there is no firewall controller running, we need to take out the finalizers + fw.Finalizers = nil + Expect(k8sClient.Update(ctx, &fw)).To(Succeed()) } Expect(newest).NotTo(BeNil()) diff --git a/controllers/set/reconcile.go b/controllers/set/reconcile.go index 6591ea3..259b91c 100644 --- a/controllers/set/reconcile.go +++ b/controllers/set/reconcile.go @@ -8,7 +8,9 @@ import ( v2 "github.com/metal-stack/firewall-controller-manager/api/v2" "github.com/metal-stack/firewall-controller-manager/controllers" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/client-go/util/retry" "sigs.k8s.io/controller-runtime/pkg/client" + "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" ) func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallSet]) error { @@ -32,48 +34,55 @@ func (c *controller) Reconcile(r *controllers.Ctx[*v2.FirewallSet]) error { v2.SortFirewallsByImportance(ownedFirewalls) for i, ownedFw := range ownedFirewalls { - fw := &v2.Firewall{} - err := c.c.GetSeedClient().Get(r.Ctx, client.ObjectKeyFromObject(ownedFw), fw) - if err != nil { - return fmt.Errorf("error fetching firewall: %w", err) - } + err := retry.RetryOnConflict(retry.DefaultRetry, func() error { + fw := &v2.Firewall{} + err := c.c.GetSeedClient().Get(r.Ctx, client.ObjectKeyFromObject(ownedFw), fw) + if err != nil { + return fmt.Errorf("error fetching firewall: %w", err) + } - fw.Spec = r.Target.Spec.Template.Spec - - // stagger firewall replicas to achieve active/standby behavior - // - // we give the most important firewall the shortest distance within a set - // this firewall attracts the traffic within the replica set - // - // the second most important firewall gets a longer distance - // such that it takes over the traffic in case the first replica dies - // (first-level backup) - // - // the rest of the firewalls get an even higher distance - // (third-level backup) - // one of them moves up on next set sync after deletion of the "active" replica - var distance v2.FirewallDistance - switch i { - case 0: - distance = r.Target.Spec.Distance + 0 - case 1: - distance = r.Target.Spec.Distance + 1 - default: - distance = r.Target.Spec.Distance + 2 - } + fw.Spec = r.Target.Spec.Template.Spec + + // stagger firewall replicas to achieve active/standby behavior + // + // we give the most important firewall the shortest distance within a set + // this firewall attracts the traffic within the replica set + // + // the second most important firewall gets a longer distance + // such that it takes over the traffic in case the first replica dies + // (first-level backup) + // + // the rest of the firewalls get an even higher distance + // (third-level backup) + // one of them moves up on next set sync after deletion of the "active" replica + var distance v2.FirewallDistance + switch i { + case 0: + distance = r.Target.Spec.Distance + 0 + case 1: + distance = r.Target.Spec.Distance + 1 + default: + distance = r.Target.Spec.Distance + 2 + } - if distance > v2.FirewallLongestDistance { - distance = v2.FirewallLongestDistance - } + if distance > v2.FirewallLongestDistance { + distance = v2.FirewallLongestDistance + } + + fw.Distance = distance - fw.Distance = distance + err = c.c.GetSeedClient().Update(r.Ctx, fw, &client.UpdateOptions{}) + if err != nil { + return fmt.Errorf("error updating firewall spec: %w", err) + } - err = c.c.GetSeedClient().Update(r.Ctx, fw, &client.UpdateOptions{}) + r.Log.Info("updated/synced firewall", "name", fw.Name, "distance", distance) + + return nil + }) if err != nil { - return fmt.Errorf("error updating firewall spec: %w", err) + return err } - - r.Log.Info("updated/synced firewall", "name", fw.Name, "distance", distance) } currentAmount := len(ownedFirewalls) @@ -163,6 +172,8 @@ func (c *controller) createFirewall(r *controllers.Ctx[*v2.FirewallSet]) (*v2.Fi Distance: r.Target.Spec.Distance, } + _ = controllerutil.AddFinalizer(fw, v2.FinalizerName) + err = c.c.GetSeedClient().Create(r.Ctx, fw, &client.CreateOptions{}) if err != nil { return nil, fmt.Errorf("unable to create firewall resource: %w", err) @@ -204,6 +215,7 @@ func (c *controller) adoptFirewall(r *controllers.Ctx[*v2.FirewallSet], fw *v2.F } fw.OwnerReferences = append(fw.OwnerReferences, *metav1.NewControllerRef(r.Target, v2.GroupVersion.WithKind("FirewallSet"))) + _ = controllerutil.AddFinalizer(fw, v2.FinalizerName) err = c.c.GetSeedClient().Update(r.Ctx, fw) if err != nil { diff --git a/integration/integration_test.go b/integration/integration_test.go index 4e3ded5..c11f113 100644 --- a/integration/integration_test.go +++ b/integration/integration_test.go @@ -123,8 +123,9 @@ var _ = Context("integration test", Ordered, func() { deployment = func() *v2.FirewallDeployment { return &v2.FirewallDeployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "test", - Namespace: namespaceName, + Name: "test", + Namespace: namespaceName, + Finalizers: []string{v2.FinalizerName}, }, Spec: v2.FirewallDeploymentSpec{ Replicas: 1, @@ -1558,8 +1559,9 @@ var _ = Context("integration test", Ordered, func() { deployment = func() *v2.FirewallDeployment { return &v2.FirewallDeployment{ ObjectMeta: metav1.ObjectMeta{ - Name: "test", - Namespace: namespaceName, + Name: "test", + Namespace: namespaceName, + Finalizers: []string{v2.FinalizerName}, }, Spec: v2.FirewallDeploymentSpec{ Replicas: 1,