diff --git a/api/v1/hypervisor_types.go b/api/v1/hypervisor_types.go index 8aacb3a..b285b95 100644 --- a/api/v1/hypervisor_types.go +++ b/api/v1/hypervisor_types.go @@ -32,6 +32,9 @@ const ( // ConditionTypeOnboarding is the type of condition for onboarding status ConditionTypeOnboarding = "Onboarding" + // ConditionTypeOffboarded is the type of condition for the completed offboarding + ConditionTypeOffboarded = "Offboarded" + // ConditionTypeReady is the type of condition for ready status of a hypervisor ConditionTypeReady = "Ready" diff --git a/cmd/main.go b/cmd/main.go index aa8d6e3..0375247 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -236,14 +236,6 @@ func main() { os.Exit(1) } - if err = (&controller.NodeEvictionLabelReconciler{ - Client: mgr.GetClient(), - Scheme: mgr.GetScheme(), - }).SetupWithManager(mgr); err != nil { - setupLog.Error(err, "unable to create controller", "controller", "Node") - os.Exit(1) - } - if err = (&controller.NodeDecommissionReconciler{ Client: mgr.GetClient(), Scheme: mgr.GetScheme(), diff --git a/internal/controller/decomission_controller.go b/internal/controller/decomission_controller.go index b6367ac..355ad05 100644 --- a/internal/controller/decomission_controller.go +++ b/internal/controller/decomission_controller.go @@ -28,14 +28,11 @@ import ( "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/aggregates" "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/services" "github.com/gophercloud/gophercloud/v2/openstack/placement/v1/resourceproviders" - corev1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" - "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logger "sigs.k8s.io/controller-runtime/pkg/log" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" @@ -43,8 +40,7 @@ import ( ) const ( - decommissionFinalizerName = "cobaltcore.cloud.sap/decommission-hypervisor" - DecommissionControllerName = "nodeDecommission" + DecommissionControllerName = "offboarding" ) type NodeDecommissionReconciler struct { @@ -57,8 +53,6 @@ type NodeDecommissionReconciler struct { // The counter-side in gardener is here: // https://github.com/gardener/machine-controller-manager/blob/rel-v0.56/pkg/util/provider/machinecontroller/machine.go#L646 -// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;patch;update -// +kubebuilder:rbac:groups="",resources=nodes/finalizers,verbs=update // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch // +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors/status,verbs=get;list;watch;update;patch func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { @@ -66,57 +60,33 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req log := logger.FromContext(ctx).WithName(req.Name).WithValues("hostname", hostname) ctx = logger.IntoContext(ctx, log) - node := &corev1.Node{} - if err := r.Get(ctx, req.NamespacedName, node); err != nil { - return ctrl.Result{}, k8sclient.IgnoreNotFound(err) - } - - // Fetch HV to check if lifecycle management is enabled hv := &kvmv1.Hypervisor{} if err := r.Get(ctx, k8sclient.ObjectKey{Name: hostname}, hv); err != nil { // ignore not found errors, could be deleted return ctrl.Result{}, k8sclient.IgnoreNotFound(err) } - if !hv.Spec.LifecycleEnabled { - // Get out of the way - return r.removeFinalizer(ctx, node) - } - - if !controllerutil.ContainsFinalizer(node, decommissionFinalizerName) { - return ctrl.Result{}, retry.RetryOnConflict(retry.DefaultRetry, func() error { - patch := k8sclient.MergeFrom(node.DeepCopy()) - controllerutil.AddFinalizer(node, decommissionFinalizerName) - if err := r.Patch(ctx, node, patch); err != nil { - return fmt.Errorf("failed to add finalizer due to %w", err) - } - log.Info("Added finalizer") - return nil - }) - } - // Not yet deleting hv, nothing more to do - if node.DeletionTimestamp.IsZero() { + if !hv.Spec.LifecycleEnabled || hv.Spec.Maintenance != kvmv1.MaintenanceTermination { return ctrl.Result{}, nil } - // Someone is just deleting the hv, without going through termination - // See: https://github.com/gardener/machine-controller-manager/blob/rel-v0.56/pkg/util/provider/machinecontroller/machine.go#L658-L659 - if !IsNodeConditionTrue(node.Status.Conditions, "Terminating") { - log.Info("removing finalizer since not terminating") - // So we just get out of the way for now - return r.removeFinalizer(ctx, node) - } - if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeReady) { return r.setDecommissioningCondition(ctx, hv, "Node is being decommissioned, removing host from nova") } - log.Info("removing host from nova") + if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeOffboarded) { + return ctrl.Result{}, nil + } + + if !meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) { + // Either has not evicted yet, or is still evicting VMs, so we have to wait for that to finish + return ctrl.Result{}, nil + } hypervisor, err := openstack.GetHypervisorByName(ctx, r.computeClient, hostname, true) if errors.Is(err, openstack.ErrNoHypervisor) { // We are (hopefully) done - return r.removeFinalizer(ctx, node) + return ctrl.Result{}, r.markOffboarded(ctx, hv) } // TODO: remove since RunningVMs is only available until micro-version 2.87, and also is updated asynchronously @@ -141,7 +111,7 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot list aggregates due to %v", err)) } - host := node.Name + host := hv.Name for name, aggregate := range aggs { if slices.Contains(aggregate.Hosts, host) { opts := aggregates.RemoveHostOpts{Host: host} @@ -168,19 +138,7 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot clean up resource provider: %v", err)) } - return r.removeFinalizer(ctx, node) -} - -func (r *NodeDecommissionReconciler) removeFinalizer(ctx context.Context, node *corev1.Node) (ctrl.Result, error) { - if !controllerutil.ContainsFinalizer(node, decommissionFinalizerName) { - return ctrl.Result{}, nil - } - - nodeBase := node.DeepCopy() - controllerutil.RemoveFinalizer(node, decommissionFinalizerName) - err := r.Patch(ctx, node, k8sclient.MergeFromWithOptions(nodeBase, - k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)) - return ctrl.Result{}, err + return ctrl.Result{}, r.markOffboarded(ctx, hv) } func (r *NodeDecommissionReconciler) setDecommissioningCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) { @@ -195,7 +153,22 @@ func (r *NodeDecommissionReconciler) setDecommissioningCondition(ctx context.Con k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)); err != nil { return ctrl.Result{}, fmt.Errorf("cannot update hypervisor status due to %w", err) } - return ctrl.Result{RequeueAfter: shortRetryTime}, nil + return ctrl.Result{}, nil +} + +func (r *NodeDecommissionReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error { + base := hv.DeepCopy() + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOffboarded, + Status: metav1.ConditionTrue, + Reason: "Offboarded", + Message: "Offboarding successful", + }) + if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, + k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)); err != nil { + return fmt.Errorf("cannot update hypervisor status due to %w", err) + } + return nil } // SetupWithManager sets up the controller with the Manager. @@ -217,6 +190,6 @@ func (r *NodeDecommissionReconciler) SetupWithManager(mgr ctrl.Manager) error { return ctrl.NewControllerManagedBy(mgr). Named(DecommissionControllerName). - For(&corev1.Node{}). + For(&kvmv1.Hypervisor{}). Complete(r) } diff --git a/internal/controller/decomission_controller_test.go b/internal/controller/decomission_controller_test.go index c1e12d6..8aed4ad 100644 --- a/internal/controller/decomission_controller_test.go +++ b/internal/controller/decomission_controller_test.go @@ -68,9 +68,9 @@ const ( var _ = Describe("Decommission Controller", func() { var ( r *NodeDecommissionReconciler - nodeName = types.NamespacedName{Name: hypervisorName} + resourceName = types.NamespacedName{Name: hypervisorName} reconcileReq = ctrl.Request{ - NamespacedName: nodeName, + NamespacedName: resourceName, } fakeServer testhelper.FakeServer ) @@ -99,30 +99,10 @@ var _ = Describe("Decommission Controller", func() { Expect(k8sClient.Delete(ctx, ns)).To(Succeed()) }) - By("creating the core resource for the Kind Node") - node := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName.Name, - Labels: map[string]string{labelEvictionRequired: "true"}, - }, - } - Expect(k8sClient.Create(ctx, node)).To(Succeed()) - DeferCleanup(func(ctx SpecContext) { - node := &corev1.Node{} - Expect(k8sclient.IgnoreNotFound(k8sClient.Get(ctx, nodeName, node))).To(Succeed()) - if len(node.Finalizers) > 0 { - node.Finalizers = make([]string, 0) - Expect(k8sClient.Update(ctx, node)).To(Succeed()) - } - if node.Name != "" { - Expect(k8sclient.IgnoreNotFound(k8sClient.Delete(ctx, node))).To(Succeed()) - } - }) - By("Create the hypervisor resource with lifecycle enabled") hypervisor := &kvmv1.Hypervisor{ ObjectMeta: metav1.ObjectMeta{ - Name: nodeName.Name, + Name: resourceName.Name, }, Spec: kvmv1.HypervisorSpec{ LifecycleEnabled: true, @@ -134,47 +114,32 @@ var _ = Describe("Decommission Controller", func() { }) }) - Context("When reconciling a node", func() { - It("should set the finalizer", func(ctx SpecContext) { - By("reconciling the created resource") - _, err := r.Reconcile(ctx, reconcileReq) - Expect(err).NotTo(HaveOccurred()) - node := &corev1.Node{} - - Expect(k8sClient.Get(ctx, nodeName, node)).To(Succeed()) - Expect(node.Finalizers).To(ContainElement(decommissionFinalizerName)) - }) - }) - - Context("When terminating a node", func() { + Context("When marking the hypervisor terminating", func() { JustBeforeEach(func(ctx SpecContext) { By("reconciling first reconciling the to add the finalizer") _, err := r.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) - node := &corev1.Node{} - Expect(k8sClient.Get(ctx, nodeName, node)).To(Succeed()) - Expect(node.Finalizers).To(ContainElement(decommissionFinalizerName)) + hypervisor := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed()) - By("and then terminating then node") - node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{ - Type: "Terminating", - Status: corev1.ConditionTrue, + By("and then marking the hypervisor terminating") + hypervisor.Spec.Maintenance = kvmv1.MaintenanceTermination + Expect(k8sClient.Update(ctx, hypervisor)).To(Succeed()) + meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeTerminating, + Status: metav1.ConditionTrue, Reason: "dontcare", Message: "dontcare", }) - Expect(k8sClient.Status().Update(ctx, node)).To(Succeed()) - Expect(k8sClient.Delete(ctx, node)).To(Succeed()) - nodelist := &corev1.NodeList{} - Expect(k8sClient.List(ctx, nodelist)).To(Succeed()) - Expect(nodelist.Items).NotTo(BeEmpty()) + Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed()) }) - When("the hypervisor was set to ready", func() { + When("the hypervisor was set to ready and has been evicted", func() { getHypervisorsCalled := 0 BeforeEach(func(ctx SpecContext) { hv := &kvmv1.Hypervisor{} - Expect(k8sClient.Get(ctx, nodeName, hv)).To(Succeed()) + Expect(k8sClient.Get(ctx, resourceName, hv)).To(Succeed()) meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ Type: kvmv1.ConditionTypeReady, @@ -183,6 +148,12 @@ var _ = Describe("Decommission Controller", func() { Message: "dontcare", }, ) + meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeEvicting, + Status: metav1.ConditionFalse, + Reason: "dontcare", + Message: "dontcare", + }) Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed()) fakeServer.Mux.HandleFunc("GET /os-hypervisors/detail", func(w http.ResponseWriter, r *http.Request) { @@ -239,12 +210,12 @@ var _ = Describe("Decommission Controller", func() { }) }) - It("should set the hypervisor condition", func(ctx SpecContext) { + It("should set the hypervisor ready condition", func(ctx SpecContext) { By("reconciling the created resource") _, err := r.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) hypervisor := &kvmv1.Hypervisor{} - Expect(k8sClient.Get(ctx, nodeName, hypervisor)).To(Succeed()) + Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed()) Expect(hypervisor.Status.Conditions).To(ContainElement( SatisfyAll( HaveField("Type", kvmv1.ConditionTypeReady), @@ -254,7 +225,7 @@ var _ = Describe("Decommission Controller", func() { )) }) - It("should remove the finalizer", func(ctx SpecContext) { + It("should set the hypervisor offboarded condition", func(ctx SpecContext) { By("reconciling the created resource") for range 3 { _, err := r.Reconcile(ctx, reconcileReq) @@ -262,10 +233,14 @@ var _ = Describe("Decommission Controller", func() { } Expect(getHypervisorsCalled).To(BeNumerically(">", 0)) - node := &corev1.Node{} - err := k8sClient.Get(ctx, nodeName, node) - Expect(err).To(HaveOccurred()) - Expect(k8sclient.IgnoreNotFound(err)).To(Succeed()) + hypervisor := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed()) + Expect(hypervisor.Status.Conditions).To(ContainElement( + SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeOffboarded), + HaveField("Status", metav1.ConditionTrue), + ), + )) }) }) diff --git a/internal/controller/eviction_controller.go b/internal/controller/eviction_controller.go index 28d669d..a9fad6c 100644 --- a/internal/controller/eviction_controller.go +++ b/internal/controller/eviction_controller.go @@ -28,14 +28,12 @@ import ( "github.com/gophercloud/gophercloud/v2" "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/hypervisors" "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/servers" - "github.com/gophercloud/gophercloud/v2/openstack/compute/v2/services" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" logger "sigs.k8s.io/controller-runtime/pkg/log" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" @@ -50,7 +48,6 @@ type EvictionReconciler struct { } const ( - evictionFinalizerName = "eviction-controller.cloud.sap/finalizer" EvictionControllerName = "eviction" shortRetryTime = 1 * time.Second defaultPollTime = 10 * time.Second @@ -84,15 +81,7 @@ func (r *EvictionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (c // Being deleted if !eviction.DeletionTimestamp.IsZero() { - err := r.handleFinalizer(ctx, eviction, hv) - if err != nil { - if errors.Is(err, ErrRetry) { - return ctrl.Result{RequeueAfter: defaultWaitTime}, nil - } - return ctrl.Result{}, err - } - log.Info("deleted") - return ctrl.Result{}, err + return ctrl.Result{}, nil } statusCondition := meta.FindStatusCondition(eviction.Status.Conditions, kvmv1.ConditionTypeEvicting) @@ -157,9 +146,25 @@ func (r *EvictionReconciler) updateStatus(ctx context.Context, eviction, base *k func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv1.Eviction, hv *kvmv1.Hypervisor) (ctrl.Result, error) { base := eviction.DeepCopy() - expectHypervisor := HasStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) - // Does the hypervisor even exist? Is it enabled/disabled? - hypervisor, err := hypervisors.Get(ctx, r.computeClient, hv.Status.HypervisorID).Extract() + expectHypervisor := hv.Status.HypervisorID != "" && hv.Status.ServiceID != "" // The hypervisor has been registered + + // If the hypervisor should exist, then it should also be disabled before we evict + if expectHypervisor && !meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) { + // Hypervisor is not disabled/ensured, so we need to disable it + if meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypePreflight, + Status: metav1.ConditionFalse, + Message: "hypervisor not disabled", + Reason: kvmv1.ConditionReasonFailed, + }) { + return ctrl.Result{}, r.updateStatus(ctx, eviction, base) + } + return ctrl.Result{RequeueAfter: defaultPollTime}, nil // Wait for hypervisor to be disabled + } + + // Fetch all virtual machines on the hypervisor + trueVal := true + hypervisor, err := hypervisors.GetExt(ctx, r.computeClient, hv.Status.HypervisorID, hypervisors.GetOpts{WithServers: &trueVal}).Extract() if err != nil { if !gophercloud.ResponseCodeIs(err, http.StatusNotFound) { return ctrl.Result{}, err @@ -190,18 +195,6 @@ func (r *EvictionReconciler) handlePreflight(ctx context.Context, eviction *kvmv } } - if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) { - // Hypervisor is not disabled/ensured, so we need to disable it - return ctrl.Result{}, r.disableHypervisor(ctx, hypervisor, eviction) - } - - // Fetch all virtual machines on the hypervisor - trueVal := true - hypervisor, err = hypervisors.GetExt(ctx, r.computeClient, hv.Status.HypervisorID, hypervisors.GetOpts{WithServers: &trueVal}).Extract() - if err != nil { - return ctrl.Result{}, err - } - if hypervisor.Servers != nil { uuids := make([]string, len(*hypervisor.Servers)) for i, server := range *hypervisor.Servers { @@ -281,11 +274,9 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic Message: fmt.Sprintf("Migration of instance %s failed: %s", vm.ID, vm.Fault.Message), Reason: kvmv1.ConditionReasonFailed, }) - if err := r.updateStatus(ctx, eviction, base); err != nil { - return ctrl.Result{}, err - } - return ctrl.Result{}, fmt.Errorf("error migrating instance %v", uuid) + return ctrl.Result{}, errors.Join(fmt.Errorf("error migrating instance %v", uuid), + r.updateStatus(ctx, eviction, base)) } currentHypervisor, _, _ := strings.Cut(vm.HypervisorHostname, ".") @@ -358,169 +349,6 @@ func (r *EvictionReconciler) evictNext(ctx context.Context, eviction *kvmv1.Evic return ctrl.Result{RequeueAfter: defaultPollTime}, err } -func (r *EvictionReconciler) evictionReason(eviction *kvmv1.Eviction) string { - return fmt.Sprintf("Eviction %v/%v: %v", eviction.Namespace, eviction.Name, eviction.Spec.Reason) -} - -func (r *EvictionReconciler) handleFinalizer(ctx context.Context, eviction *kvmv1.Eviction, hypervisor *kvmv1.Hypervisor) error { - if !controllerutil.ContainsFinalizer(eviction, evictionFinalizerName) { - return nil - } - - // As long as we didn't succeed to re-enable the hypervisor, which includes - // - the hypervisor being gone, because it has been torn down - // - the hypervisor having been enabled by someone else - if !meta.IsStatusConditionTrue(eviction.Status.Conditions, kvmv1.ConditionTypeHypervisorReEnabled) { - err := r.enableHypervisorService(ctx, eviction, hypervisor) - if err != nil { - return err - } - } - - base := eviction.DeepCopy() - controllerutil.RemoveFinalizer(eviction, evictionFinalizerName) - return r.Patch(ctx, eviction, client.MergeFromWithOptions(base, client.MergeFromWithOptimisticLock{})) -} - -func (r *EvictionReconciler) enableHypervisorService(ctx context.Context, eviction *kvmv1.Eviction, hv *kvmv1.Hypervisor) error { - log := logger.FromContext(ctx) - - hypervisor, err := hypervisors.Get(ctx, r.computeClient, hv.Status.HypervisorID).Extract() - if err != nil { - if errors.Is(err, openstack.ErrNoHypervisor) { - base := eviction.DeepCopy() - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor is gone, no need to re-enable", - Reason: kvmv1.ConditionReasonSucceeded, - }) - if changed { - return r.updateStatus(ctx, eviction, base) - } else { - return nil - } - } else { - base := eviction.DeepCopy() - errorMessage := fmt.Sprintf("failed to get hypervisor due to %s", err) - // update the condition to reflect the error, and retry the reconciliation - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionFalse, - Message: errorMessage, - Reason: kvmv1.ConditionReasonFailed, - }) - - if changed { - if err2 := r.updateStatus(ctx, eviction, base); err2 != nil { - log.Error(err, "failed to store error message in condition", "message", errorMessage) - return err2 - } - } - - return ErrRetry - } - } - - if hypervisor.Service.DisabledReason != r.evictionReason(eviction) { - base := eviction.DeepCopy() - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor already re-enabled for reason:" + hypervisor.Service.DisabledReason, - Reason: kvmv1.ConditionReasonSucceeded, - }) - if changed { - return r.updateStatus(ctx, eviction, base) - } else { - return nil - } - } - - enableService := services.UpdateOpts{Status: services.ServiceEnabled} - log.Info("Enabling hypervisor", "id", hypervisor.Service.ID) - _, err = services.Update(ctx, r.computeClient, hypervisor.Service.ID, enableService).Extract() - - if err != nil { - base := eviction.DeepCopy() - errorMessage := fmt.Sprintf("failed to enable hypervisor due to %s", err) - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionFalse, - Message: errorMessage, - Reason: kvmv1.ConditionReasonFailed, - }) - if changed { - if err2 := r.updateStatus(ctx, eviction, base); err2 != nil { - log.Error(err, "failed to store error message in condition", "message", errorMessage) - } - } - return ErrRetry - } else { - base := eviction.DeepCopy() - changed := meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorReEnabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor re-enabled successfully", - Reason: kvmv1.ConditionReasonSucceeded, - }) - if changed { - return r.updateStatus(ctx, eviction, base) - } else { - return nil - } - } -} - -// disableHypervisor disables the hypervisor service and adds a finalizer to the eviction -// will add Condition HypervisorDisabled to the eviction status with the outcome -func (r *EvictionReconciler) disableHypervisor(ctx context.Context, hypervisor *hypervisors.Hypervisor, eviction *kvmv1.Eviction) error { - evictionReason := r.evictionReason(eviction) - disabledReason := hypervisor.Service.DisabledReason - - if disabledReason != "" && disabledReason != evictionReason { - base := eviction.DeepCopy() - // Disabled for another reason already - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionTrue, - Message: fmt.Sprintf("Hypervisor already disabled for reason %q", disabledReason), - Reason: kvmv1.ConditionReasonSucceeded, - }) - return r.updateStatus(ctx, eviction, base) - } - - if !controllerutil.ContainsFinalizer(eviction, evictionFinalizerName) { - base := eviction.DeepCopy() - controllerutil.AddFinalizer(eviction, evictionFinalizerName) - return r.Patch(ctx, eviction, client.MergeFromWithOptions(base, client.MergeFromWithOptimisticLock{}), client.FieldOwner(EvictionControllerName)) - } - - disableService := services.UpdateOpts{Status: services.ServiceDisabled, - DisabledReason: r.evictionReason(eviction)} - - _, err := services.Update(ctx, r.computeClient, hypervisor.Service.ID, disableService).Extract() - base := eviction.DeepCopy() - if err != nil { - // We expect OpenStack calls to be transient errors, so we retry for the next reconciliation - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionFalse, - Message: fmt.Sprintf("Failed to disable hypervisor: %v", err), - Reason: kvmv1.ConditionReasonFailed, - }) - return r.updateStatus(ctx, eviction, base) - } - - meta.SetStatusCondition(&eviction.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionTrue, - Message: "Hypervisor disabled successfully", - Reason: kvmv1.ConditionReasonSucceeded, - }) - return r.updateStatus(ctx, eviction, base) -} - func (r *EvictionReconciler) liveMigrate(ctx context.Context, uuid string, eviction *kvmv1.Eviction) error { log := logger.FromContext(ctx) diff --git a/internal/controller/eviction_controller_test.go b/internal/controller/eviction_controller_test.go index 3b9e45f..96c8a42 100644 --- a/internal/controller/eviction_controller_test.go +++ b/internal/controller/eviction_controller_test.go @@ -25,6 +25,7 @@ import ( "github.com/gophercloud/gophercloud/v2/testhelper/client" . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + gomegatypes "github.com/onsi/gomega/types" "k8s.io/apimachinery/pkg/api/errors" "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" @@ -186,12 +187,19 @@ var _ = Describe("Eviction Controller", func() { }) hypervisor.Status.HypervisorID = hypervisorId + hypervisor.Status.ServiceID = serviceId meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ Type: kvmv1.ConditionTypeOnboarding, Status: metav1.ConditionTrue, Reason: "dontcare", Message: "dontcare", }) + meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeHypervisorDisabled, + Status: metav1.ConditionTrue, + Reason: "dontcare", + Message: "dontcare", + }) Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed()) By("creating the eviction") @@ -255,57 +263,32 @@ var _ = Describe("Eviction Controller", func() { }) }) It("should succeed the reconciliation", func(ctx SpecContext) { - runningCond := &metav1.Condition{ - Type: kvmv1.ConditionTypeEvicting, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonRunning, - Message: "Running", - } - - hypervisorDisabledCond := &metav1.Condition{ - Type: kvmv1.ConditionTypeHypervisorDisabled, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "Hypervisor disabled successfully", - } - - preflightCond := &metav1.Condition{ - Type: kvmv1.ConditionTypePreflight, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "Preflight checks passed", - } - - expectations := []struct { - conditions []*metav1.Condition - finalizers []string - }{ + runningCond := SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeEvicting), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", kvmv1.ConditionReasonRunning), + HaveField("Message", "Running"), + ) + + preflightCond := SatisfyAll( + HaveField("Type", kvmv1.ConditionTypePreflight), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", kvmv1.ConditionReasonSucceeded), + HaveField("Message", ContainSubstring("Preflight checks passed")), + ) + + expectations := []gomegatypes.GomegaMatcher{ // 1. expect the Condition Evicting to be true - {conditions: []*metav1.Condition{runningCond}, finalizers: nil}, - - // 2. expect the Finalizer to be added - {conditions: []*metav1.Condition{runningCond}, finalizers: []string{evictionFinalizerName}}, - - // 3. expect the hypervisor to be disabled - { - conditions: []*metav1.Condition{runningCond, hypervisorDisabledCond}, - finalizers: []string{evictionFinalizerName}, - }, - - // 4. expect the preflight condition to be set to succeeded - { - conditions: []*metav1.Condition{runningCond, hypervisorDisabledCond, preflightCond}, - finalizers: []string{evictionFinalizerName}, - }, - - // 5. expect the eviction condition to be set to succeeded - { - conditions: []*metav1.Condition{{ - Type: kvmv1.ConditionTypeEvicting, - Status: metav1.ConditionFalse, - Reason: kvmv1.ConditionReasonSucceeded, - Message: "eviction completed successfully"}}, - finalizers: []string{evictionFinalizerName}}, + ContainElements(runningCond), + // 2. expect the preflight condition to be set to succeeded + ContainElements(runningCond, preflightCond), + // 3. expect the eviction condition to be set to succeeded + ContainElements(SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeEvicting), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", kvmv1.ConditionReasonSucceeded), + HaveField("Message", ContainSubstring("eviction completed successfully")), + )), } for i, expectation := range expectations { @@ -319,15 +302,7 @@ var _ = Describe("Eviction Controller", func() { Expect(k8sClient.Get(ctx, typeNamespacedName, resource)).NotTo(HaveOccurred()) // Check the condition - for _, expect := range expectation.conditions { - reconcileStatus := meta.FindStatusCondition(resource.Status.Conditions, expect.Type) - Expect(reconcileStatus).NotTo(BeNil()) - Expect(reconcileStatus.Status).To(Equal(expect.Status)) - Expect(reconcileStatus.Reason).To(Equal(expect.Reason)) - Expect(reconcileStatus.Message).To(ContainSubstring(expect.Message)) - } - // Check finalizers - Expect(resource.GetFinalizers()).To(Equal(expectation.finalizers)) + Expect(resource.Status.Conditions).To(expectation) } }) }) @@ -349,8 +324,9 @@ var _ = Describe("Eviction Controller", func() { Expect(err).To(Succeed()) }) }) + It("should succeed the reconciliation", func(ctx SpecContext) { - for range 3 { + for range 1 { _, err := controllerReconciler.Reconcile(ctx, reconcileRequest) Expect(err).NotTo(HaveOccurred()) } @@ -360,27 +336,29 @@ var _ = Describe("Eviction Controller", func() { Expect(err).NotTo(HaveOccurred()) // expect eviction condition to be true - reconcileStatus := meta.FindStatusCondition(resource.Status.Conditions, kvmv1.ConditionTypeEvicting) - Expect(reconcileStatus).NotTo(BeNil()) - Expect(reconcileStatus.Status).To(Equal(metav1.ConditionTrue)) - Expect(reconcileStatus.Reason).To(Equal(kvmv1.ConditionReasonRunning)) + Expect(resource.Status.Conditions).To(ContainElement( + SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeEvicting), + HaveField("Status", metav1.ConditionTrue), + HaveField("Reason", kvmv1.ConditionReasonRunning), + ), + )) - // expect hypervisor disabled condition to be true for reason of already disabled - reconcileStatus = meta.FindStatusCondition(resource.Status.Conditions, kvmv1.ConditionTypeHypervisorDisabled) - Expect(reconcileStatus.Message).To(ContainSubstring("already disabled")) - - _, err = controllerReconciler.Reconcile(ctx, reconcileRequest) - Expect(err).NotTo(HaveOccurred()) + for range 3 { + _, err = controllerReconciler.Reconcile(ctx, reconcileRequest) + Expect(err).NotTo(HaveOccurred()) + } err = k8sClient.Get(ctx, typeNamespacedName, resource) Expect(err).NotTo(HaveOccurred()) // expect reconciliation to be successfully finished - reconcileStatus = meta.FindStatusCondition(resource.Status.Conditions, kvmv1.ConditionTypeEvicting) - Expect(reconcileStatus).NotTo(BeNil()) - Expect(reconcileStatus.Status).To(Equal(metav1.ConditionFalse)) - Expect(reconcileStatus.Reason).To(Equal(kvmv1.ConditionReasonSucceeded)) - - Expect(resource.GetFinalizers()).To(BeEmpty()) + Expect(resource.Status.Conditions).To(ContainElement( + SatisfyAll( + HaveField("Type", kvmv1.ConditionTypeEvicting), + HaveField("Status", metav1.ConditionFalse), + HaveField("Reason", kvmv1.ConditionReasonSucceeded), + ), + )) }) }) }) diff --git a/internal/controller/gardener_node_lifecycle_controller.go b/internal/controller/gardener_node_lifecycle_controller.go index 91d04fb..1ae8b0f 100644 --- a/internal/controller/gardener_node_lifecycle_controller.go +++ b/internal/controller/gardener_node_lifecycle_controller.go @@ -32,7 +32,6 @@ import ( corev1ac "k8s.io/client-go/applyconfigurations/core/v1" v1 "k8s.io/client-go/applyconfigurations/meta/v1" policyv1ac "k8s.io/client-go/applyconfigurations/policy/v1" - "k8s.io/client-go/util/retry" ctrl "sigs.k8s.io/controller-runtime" k8sclient "sigs.k8s.io/controller-runtime/pkg/client" "sigs.k8s.io/controller-runtime/pkg/client/apiutil" @@ -72,40 +71,52 @@ func (r *GardenerNodeLifecycleController) Reconcile(ctx context.Context, req ctr return ctrl.Result{}, k8sclient.IgnoreNotFound(err) } - hv := kvmv1.Hypervisor{} - if err := r.Get(ctx, k8sclient.ObjectKey{Name: req.Name}, &hv); k8sclient.IgnoreNotFound(err) != nil { + hv := &kvmv1.Hypervisor{} + if err := r.Get(ctx, k8sclient.ObjectKey{Name: req.Name}, hv); k8sclient.IgnoreNotFound(err) != nil { return ctrl.Result{}, err } + if !hv.Spec.LifecycleEnabled { // Nothing to be done return ctrl.Result{}, nil } - if isTerminating(node) { - changed, err := setNodeLabels(ctx, r.Client, node, map[string]string{labelEvictionRequired: valueReasonTerminating}) - if changed || err != nil { - return ctrl.Result{}, err + // Only, if the maintenance controller is not active + if _, found := node.Labels["cloud.sap/maintenance-profile"]; !found { + // Sync the terminating status into the hypervisor spec + if isTerminating(node) && hv.Spec.Maintenance != kvmv1.MaintenanceTermination { + base := hv.DeepCopy() + hv.Spec.Maintenance = kvmv1.MaintenanceTermination + if err := r.Patch(ctx, hv, k8sclient.MergeFromWithOptions(base, k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(MaintenanceControllerName)); err != nil { + return ctrl.Result{}, err + } } } // We do not care about the particular value, as long as it isn't an error var minAvailable int32 = 1 - evictionValue, found := node.Labels[labelEvictionApproved] - if found && evictionValue != "false" { + + // Onboarding is not in progress anymore, i.e. the host is onboarded + onboardingCompleted := meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) + // Evicting is not in progress anymore, i.e. the host is empty + offboarded := meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeOffboarded) + + if offboarded { minAvailable = 0 + + if onboardingCompleted && isTerminating(node) { + // Onboarded & terminating & eviction complete -> disable HA + if err := disableInstanceHA(hv); err != nil { + return ctrl.Result{}, err + } + } } - if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - return r.ensureBlockingPodDisruptionBudget(ctx, node, minAvailable) - }); err != nil { + if err := r.ensureBlockingPodDisruptionBudget(ctx, node, minAvailable); err != nil { return ctrl.Result{}, err } - onboardingCompleted := meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) - - if err := retry.RetryOnConflict(retry.DefaultRetry, func() error { - return r.ensureSignallingDeployment(ctx, node, minAvailable, onboardingCompleted) - }); err != nil { + if err := r.ensureSignallingDeployment(ctx, node, minAvailable, onboardingCompleted); err != nil { return ctrl.Result{}, err } diff --git a/internal/controller/gardener_node_lifecycle_controller_test.go b/internal/controller/gardener_node_lifecycle_controller_test.go index 1af06de..83dca0c 100644 --- a/internal/controller/gardener_node_lifecycle_controller_test.go +++ b/internal/controller/gardener_node_lifecycle_controller_test.go @@ -18,18 +18,29 @@ limitations under the License. package controller import ( + "fmt" + . "github.com/onsi/ginkgo/v2" . "github.com/onsi/gomega" + appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + policyv1 "k8s.io/api/policy/v1" + "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" + + kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" ) var _ = Describe("Gardener Maintenance Controller", func() { const nodeName = "node-test" - var controller *GardenerNodeLifecycleController + var ( + controller *GardenerNodeLifecycleController + name = types.NamespacedName{Name: nodeName} + reconcileReq = ctrl.Request{NamespacedName: name} + maintenanceName = types.NamespacedName{Name: fmt.Sprintf("maint-%v", nodeName), Namespace: "kube-system"} + ) BeforeEach(func(ctx SpecContext) { controller = &GardenerNodeLifecycleController{ @@ -37,32 +48,113 @@ var _ = Describe("Gardener Maintenance Controller", func() { Scheme: k8sClient.Scheme(), } - By("creating the namespace for the reconciler") - ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "monsoon3"}} - Expect(client.IgnoreAlreadyExists(k8sClient.Create(ctx, ns))).To(Succeed()) - By("creating the core resource for the Kind Node") - resource := &corev1.Node{ + node := &corev1.Node{ ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Labels: map[string]string{labelEvictionRequired: "true"}, + Name: nodeName, }, } - Expect(k8sClient.Create(ctx, resource)).To(Succeed()) + Expect(k8sClient.Create(ctx, node)).To(Succeed()) DeferCleanup(func(ctx SpecContext) { - Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, resource))).To(Succeed()) + By("Cleanup the specific node") + Expect(k8sClient.Delete(ctx, node)).To(Succeed()) + }) + + By("creating the core resource for the Kind hypervisor") + hypervisor := &kvmv1.Hypervisor{ + ObjectMeta: metav1.ObjectMeta{ + Name: nodeName, + }, + Spec: kvmv1.HypervisorSpec{ + LifecycleEnabled: true, + }, + } + Expect(k8sClient.Create(ctx, hypervisor)).To(Succeed()) + DeferCleanup(func(ctx SpecContext) { + Expect(k8sClient.Delete(ctx, hypervisor)).To(Succeed()) }) }) - Context("When reconciling a node", func() { - It("should successfully reconcile the resource", func(ctx SpecContext) { - req := ctrl.Request{ - NamespacedName: types.NamespacedName{Name: nodeName}, - } + Context("When reconciling a terminating node", func() { + BeforeEach(func(ctx SpecContext) { + By("Marking the node as terminating") + node := &corev1.Node{} + Expect(k8sClient.Get(ctx, name, node)).To(Succeed()) + node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{ + Type: "Terminating", + }) + Expect(k8sClient.Status().Update(ctx, node)).To(Succeed()) + }) + It("should successfully reconcile the resource", func(ctx SpecContext) { By("Reconciling the created resource") - _, err := controller.Reconcile(ctx, req) + _, err := controller.Reconcile(ctx, reconcileReq) + Expect(err).NotTo(HaveOccurred()) + + hypervisor := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, name, hypervisor)).To(Succeed()) + Expect(hypervisor.Spec.Maintenance).To(Equal(kvmv1.MaintenanceTermination)) + }) + }) + + Context("When reconciling a node", func() { + JustBeforeEach(func(ctx SpecContext) { + _, err := controller.Reconcile(ctx, reconcileReq) Expect(err).NotTo(HaveOccurred()) }) + It("should create a poddisruptionbudget", func(ctx SpecContext) { + pdb := &policyv1.PodDisruptionBudget{} + Expect(k8sClient.Get(ctx, maintenanceName, pdb)).To(Succeed()) + Expect(pdb.Spec.MinAvailable).To(HaveField("IntVal", BeNumerically("==", 1))) + }) + + It("should create a failing deployment to signal onboarding not being completed", func(ctx SpecContext) { + dep := &appsv1.Deployment{} + Expect(k8sClient.Get(ctx, maintenanceName, dep)).To(Succeed()) + Expect(dep.Spec.Template.Spec.Containers).To(HaveLen(1)) + Expect(dep.Spec.Template.Spec.Containers[0].StartupProbe.Exec.Command).To(Equal([]string{"/bin/false"})) + }) + + When("the node has been onboarded", func() { + BeforeEach(func(ctx SpecContext) { + hypervisor := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, name, hypervisor)).To(Succeed()) + meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOnboarding, + Status: metav1.ConditionFalse, + Reason: "dontcare", + Message: "dontcare", + }) + Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed()) + }) + + It("should create a deployment with onboarding completed", func(ctx SpecContext) { + dep := &appsv1.Deployment{} + Expect(k8sClient.Get(ctx, maintenanceName, dep)).To(Succeed()) + Expect(dep.Spec.Template.Spec.Containers).To(HaveLen(1)) + Expect(dep.Spec.Template.Spec.Containers[0].StartupProbe.Exec.Command).To(Equal([]string{"/bin/true"})) + }) + }) + + When("the node has been offboarded", func() { + BeforeEach(func(ctx SpecContext) { + hypervisor := &kvmv1.Hypervisor{} + Expect(k8sClient.Get(ctx, name, hypervisor)).To(Succeed()) + meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ + Type: kvmv1.ConditionTypeOffboarded, + Status: metav1.ConditionTrue, + Reason: "dontcare", + Message: "dontcare", + }) + Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed()) + }) + + It("should update the poddisruptionbudget to minAvailable 0", func(ctx SpecContext) { + pdb := &policyv1.PodDisruptionBudget{} + Expect(k8sClient.Get(ctx, maintenanceName, pdb)).To(Succeed()) + Expect(pdb.Spec.MinAvailable).To(HaveField("IntVal", BeNumerically("==", int32(0)))) + }) + }) + }) }) diff --git a/internal/controller/node_eviction_label_controller.go b/internal/controller/node_eviction_label_controller.go deleted file mode 100644 index e31dc1e..0000000 --- a/internal/controller/node_eviction_label_controller.go +++ /dev/null @@ -1,196 +0,0 @@ -/* -SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors -SPDX-License-Identifier: Apache-2.0 - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controller - -import ( - "context" - "fmt" - "maps" - "strings" - - corev1 "k8s.io/api/core/v1" - k8serrors "k8s.io/apimachinery/pkg/api/errors" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/runtime" - ctrl "sigs.k8s.io/controller-runtime" - k8sclient "sigs.k8s.io/controller-runtime/pkg/client" - "sigs.k8s.io/controller-runtime/pkg/controller/controllerutil" - logger "sigs.k8s.io/controller-runtime/pkg/log" - - kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" -) - -const ( - disabledSuffix = "-disabled" - labelMl2MechanismDriver = "neutron.openstack.cloud.sap/ml2-mechanism-driver" - NodeEvictionLabelControllerName = "nodeEvictionLabel" -) - -type NodeEvictionLabelReconciler struct { - k8sclient.Client - Scheme *runtime.Scheme -} - -// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;patch -// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=evictions,verbs=get;list;watch;create;update;patch;delete - -func (r *NodeEvictionLabelReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) { - log := logger.FromContext(ctx).WithName(req.Name) - ctx = logger.IntoContext(ctx, log) - - node := &corev1.Node{} - if err := r.Get(ctx, req.NamespacedName, node); err != nil { - // ignore not found errors, could be deleted - return ctrl.Result{}, k8sclient.IgnoreNotFound(err) - } - - hostname, found := node.Labels[corev1.LabelHostname] - if !found { - // Should never happen (tm) - return ctrl.Result{}, nil - } - - maintenanceValue, found := node.Labels[labelEvictionRequired] - eviction := &kvmv1.Eviction{ - ObjectMeta: metav1.ObjectMeta{ - Name: hostname, - }, - } - - var err error - if !found { - newNode := node.DeepCopy() - permitAgentsLabels(newNode.Labels) - delete(newNode.Labels, labelEvictionApproved) - if !maps.Equal(newNode.Labels, node.Labels) { - err = r.Patch(ctx, newNode, k8sclient.MergeFrom(node)) - if err != nil { - return ctrl.Result{}, k8sclient.IgnoreNotFound(err) - } - } - err = r.Delete(ctx, eviction) - return ctrl.Result{}, k8sclient.IgnoreNotFound(err) - } - - var value string - hv := &kvmv1.Hypervisor{} - if err := r.Get(ctx, k8sclient.ObjectKey{Name: node.Name}, hv); err != nil { - return ctrl.Result{}, err - } - - if !HasStatusCondition(hv.Status.Conditions, kvmv1.ConditionTypeOnboarding) { - // Hasn't even started to onboard that node, so nothing to evict for sure - value = "true" - } else { - // check for existing eviction, else create it - value, err = r.reconcileEviction(ctx, eviction, hv, hostname, maintenanceValue) - if err != nil { - return ctrl.Result{}, err - } - } - - if value != "" { - err = disableInstanceHA(hv) - if err != nil { - return ctrl.Result{}, err - } - - newNode := node.DeepCopy() - if value == "true" { - evictAgentsLabels(newNode.Labels) - } - newNode.Labels[labelEvictionApproved] = maintenanceValue - - if !maps.Equal(newNode.Labels, node.Labels) { - err = r.Patch(ctx, newNode, k8sclient.MergeFrom(node)) - } - } - - return ctrl.Result{}, k8sclient.IgnoreNotFound(err) -} - -func (r *NodeEvictionLabelReconciler) reconcileEviction(ctx context.Context, eviction *kvmv1.Eviction, hypervisor *kvmv1.Hypervisor, hostname, maintenanceValue string) (string, error) { - log := logger.FromContext(ctx) - if err := r.Get(ctx, k8sclient.ObjectKeyFromObject(eviction), eviction); err != nil { - if !k8serrors.IsNotFound(err) { - return "", err - } - if err := controllerutil.SetOwnerReference(hypervisor, eviction, r.Scheme); err != nil { - return "", err - } - log.Info("Creating new eviction", "name", eviction.Name) - eviction.Spec = kvmv1.EvictionSpec{ - Hypervisor: hostname, - Reason: fmt.Sprintf("openstack-hypervisor-operator: label %v=%v", labelEvictionRequired, maintenanceValue), - } - - transportLabels(&hypervisor.ObjectMeta, &eviction.ObjectMeta) - if err = r.Create(ctx, eviction); err != nil { - return "", fmt.Errorf("failed to create eviction due to %w", err) - } - } - - // check if the eviction is already succeeded - var evictionState string - if status := meta.FindStatusCondition(eviction.Status.Conditions, kvmv1.ConditionTypeEvicting); status != nil { - evictionState = status.Reason - } - switch evictionState { - case "Succeeded": - return "true", nil - case "Failed": - return "false", nil - default: - return "", nil - } -} - -func evictAgentsLabels(labels map[string]string) { - hypervisorType, found := labels[labelHypervisor] - if found && !strings.HasSuffix(hypervisorType, disabledSuffix) { - labels[labelHypervisor] = hypervisorType + disabledSuffix - } - ml2MechanismDriver, found := labels[labelMl2MechanismDriver] - if found && !strings.HasSuffix(ml2MechanismDriver, disabledSuffix) { - labels[labelMl2MechanismDriver] = ml2MechanismDriver + disabledSuffix - } -} - -func permitAgentsLabels(labels map[string]string) { - hypervisorType, found := labels[labelHypervisor] - if found && strings.HasSuffix(hypervisorType, disabledSuffix) { - labels[labelHypervisor] = strings.TrimSuffix(hypervisorType, disabledSuffix) - } - ml2MechanismDriver, found := labels[labelMl2MechanismDriver] - if found && strings.HasSuffix(ml2MechanismDriver, disabledSuffix) { - labels[labelMl2MechanismDriver] = strings.TrimSuffix(ml2MechanismDriver, disabledSuffix) - } -} - -// SetupWithManager sets up the controller with the Manager. -func (r *NodeEvictionLabelReconciler) SetupWithManager(mgr ctrl.Manager) error { - ctx := context.Background() - _ = logger.FromContext(ctx) - - return ctrl.NewControllerManagedBy(mgr). - Named(NodeEvictionLabelControllerName). - For(&corev1.Node{}). // trigger the r.Reconcile whenever a node is created/updated/deleted. - Owns(&kvmv1.Eviction{}). // trigger the r.Reconcile whenever an Own-ed eviction is created/updated/deleted - Complete(r) -} diff --git a/internal/controller/node_eviction_label_controller_test.go b/internal/controller/node_eviction_label_controller_test.go deleted file mode 100644 index 6866a55..0000000 --- a/internal/controller/node_eviction_label_controller_test.go +++ /dev/null @@ -1,142 +0,0 @@ -/* -SPDX-FileCopyrightText: Copyright 2024 SAP SE or an SAP affiliate company and cobaltcore-dev contributors -SPDX-License-Identifier: Apache-2.0 - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -*/ - -package controller - -import ( - "os" - - "github.com/gophercloud/gophercloud/v2/testhelper" - . "github.com/onsi/ginkgo/v2" - . "github.com/onsi/gomega" - corev1 "k8s.io/api/core/v1" - "k8s.io/apimachinery/pkg/api/meta" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/apimachinery/pkg/types" - ctrl "sigs.k8s.io/controller-runtime" - "sigs.k8s.io/controller-runtime/pkg/client" - - kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" -) - -var _ = Describe("Node Eviction Label Controller", func() { - const ( - nodeName = "test-node" - hostName = "test-hostname" - region = "region" - zone = "zone" - ) - var ( - reconciler *NodeEvictionLabelReconciler - req = ctrl.Request{NamespacedName: types.NamespacedName{Name: nodeName}} - fakeServer testhelper.FakeServer - reconcileNodeLoop = func(ctx SpecContext, steps int) (res ctrl.Result, err error) { - for range steps { - res, err = reconciler.Reconcile(ctx, req) - if err != nil { - return - } - } - return - } - ) - - BeforeEach(func(ctx SpecContext) { - fakeServer = testhelper.SetupHTTP() - Expect(os.Setenv("KVM_HA_SERVICE_URL", fakeServer.Endpoint())).To(Succeed()) - - DeferCleanup(func() { - Expect(os.Unsetenv("KVM_HA_SERVICE_URL")).To(Succeed()) - fakeServer.Teardown() - }) - - reconciler = &NodeEvictionLabelReconciler{ - Client: k8sClient, - Scheme: k8sClient.Scheme(), - } - - By("creating the namespace for the reconciler") - ns := &corev1.Namespace{ObjectMeta: metav1.ObjectMeta{Name: "monsoon3"}} - Expect(client.IgnoreAlreadyExists(k8sClient.Create(ctx, ns))).To(Succeed()) - - By("creating the node resource") - resource := &corev1.Node{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Labels: map[string]string{ - corev1.LabelHostname: hostName, - corev1.LabelTopologyRegion: region, - corev1.LabelTopologyZone: zone, - labelEvictionRequired: "true", - }, - }, - } - Expect(k8sClient.Create(ctx, resource)).To(Succeed()) - - DeferCleanup(func(ctx SpecContext) { - By("Cleanup the specific node") - Expect(k8sClient.Delete(ctx, resource)).To(Succeed()) - }) - - By("creating the hypervisor resource") - hypervisor := &kvmv1.Hypervisor{ - ObjectMeta: metav1.ObjectMeta{ - Name: nodeName, - Labels: map[string]string{ - corev1.LabelHostname: nodeName, - }, - }, - Spec: kvmv1.HypervisorSpec{ - LifecycleEnabled: true, - }, - } - Expect(k8sClient.Create(ctx, hypervisor)).To(Succeed()) - DeferCleanup(func(ctx SpecContext) { - By("Cleanup the specific hypervisor") - Expect(client.IgnoreNotFound(k8sClient.Delete(ctx, hypervisor))).To(Succeed()) - }) - - }) - - Context("When reconciling a node", func() { - BeforeEach(func(ctx SpecContext) { - hypervisor := &kvmv1.Hypervisor{} - Expect(k8sClient.Get(ctx, types.NamespacedName{Name: nodeName}, hypervisor)).To(Succeed()) - By("updating the hypervisor status sub-resource") - meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{ - Type: kvmv1.ConditionTypeOnboarding, - Status: metav1.ConditionTrue, - Reason: kvmv1.ConditionReasonInitial, - Message: "Initial onboarding", - }) - Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed()) - }) - - It("should successfully reconcile the resource", func(ctx SpecContext) { - By("ConditionType the created resource") - _, err := reconcileNodeLoop(ctx, 5) - Expect(err).NotTo(HaveOccurred()) - - // expect node controller to create an eviction for the node - err = k8sClient.Get(ctx, types.NamespacedName{ - Name: hostName, - Namespace: "monsoon3", - }, &kvmv1.Eviction{}) - Expect(err).NotTo(HaveOccurred()) - }) - }) -}) diff --git a/internal/controller/utils.go b/internal/controller/utils.go index d562a1b..75625a9 100644 --- a/internal/controller/utils.go +++ b/internal/controller/utils.go @@ -19,11 +19,9 @@ package controller import ( "bytes" - "context" "errors" "fmt" "io" - "maps" "net/http" "os" "slices" @@ -33,22 +31,10 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime/schema" v1ac "k8s.io/client-go/applyconfigurations/meta/v1" - "sigs.k8s.io/controller-runtime/pkg/client" kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1" ) -// setNodeLabels sets the labels on the node. -func setNodeLabels(ctx context.Context, writer client.Writer, node *corev1.Node, labels map[string]string) (bool, error) { - newNode := node.DeepCopy() - maps.Copy(newNode.Labels, labels) - if maps.Equal(node.Labels, newNode.Labels) { - return false, nil - } - - return true, writer.Patch(ctx, newNode, client.MergeFrom(node)) -} - func InstanceHaUrl(region, zone, hostname string) string { if haURL, found := os.LookupEnv("KVM_HA_SERVICE_URL"); found { return haURL