Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions api/v1/hypervisor_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ const (
// ConditionTypeOnboarding is the type of condition for onboarding status
ConditionTypeOnboarding = "Onboarding"

// ConditionTypeOffboarded is the type of condition for the completed offboarding
ConditionTypeOffboarded = "Offboarded"

// ConditionTypeReady is the type of condition for ready status of a hypervisor
ConditionTypeReady = "Ready"

Expand Down
8 changes: 0 additions & 8 deletions cmd/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -236,14 +236,6 @@ func main() {
os.Exit(1)
}

if err = (&controller.NodeEvictionLabelReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "Node")
os.Exit(1)
}

if err = (&controller.NodeDecommissionReconciler{
Client: mgr.GetClient(),
Scheme: mgr.GetScheme(),
Expand Down
87 changes: 30 additions & 57 deletions internal/controller/decomission_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,23 +28,19 @@ import (
"github.com/gophercloud/gophercloud/v2/openstack/compute/v2/aggregates"
"github.com/gophercloud/gophercloud/v2/openstack/compute/v2/services"
"github.com/gophercloud/gophercloud/v2/openstack/placement/v1/resourceproviders"
corev1 "k8s.io/api/core/v1"
"k8s.io/apimachinery/pkg/api/meta"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/runtime"
"k8s.io/client-go/util/retry"
ctrl "sigs.k8s.io/controller-runtime"
k8sclient "sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
logger "sigs.k8s.io/controller-runtime/pkg/log"

kvmv1 "github.com/cobaltcore-dev/openstack-hypervisor-operator/api/v1"
"github.com/cobaltcore-dev/openstack-hypervisor-operator/internal/openstack"
)

const (
decommissionFinalizerName = "cobaltcore.cloud.sap/decommission-hypervisor"
DecommissionControllerName = "nodeDecommission"
DecommissionControllerName = "offboarding"
)

type NodeDecommissionReconciler struct {
Expand All @@ -57,66 +53,40 @@ type NodeDecommissionReconciler struct {
// The counter-side in gardener is here:
// https://github.com/gardener/machine-controller-manager/blob/rel-v0.56/pkg/util/provider/machinecontroller/machine.go#L646

// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch;patch;update
// +kubebuilder:rbac:groups="",resources=nodes/finalizers,verbs=update
// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch
// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors/status,verbs=get;list;watch;update;patch
func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
hostname := req.Name
log := logger.FromContext(ctx).WithName(req.Name).WithValues("hostname", hostname)
ctx = logger.IntoContext(ctx, log)

node := &corev1.Node{}
if err := r.Get(ctx, req.NamespacedName, node); err != nil {
return ctrl.Result{}, k8sclient.IgnoreNotFound(err)
}

// Fetch HV to check if lifecycle management is enabled
hv := &kvmv1.Hypervisor{}
if err := r.Get(ctx, k8sclient.ObjectKey{Name: hostname}, hv); err != nil {
// ignore not found errors, could be deleted
return ctrl.Result{}, k8sclient.IgnoreNotFound(err)
}
if !hv.Spec.LifecycleEnabled {
// Get out of the way
return r.removeFinalizer(ctx, node)
}

if !controllerutil.ContainsFinalizer(node, decommissionFinalizerName) {
return ctrl.Result{}, retry.RetryOnConflict(retry.DefaultRetry, func() error {
patch := k8sclient.MergeFrom(node.DeepCopy())
controllerutil.AddFinalizer(node, decommissionFinalizerName)
if err := r.Patch(ctx, node, patch); err != nil {
return fmt.Errorf("failed to add finalizer due to %w", err)
}
log.Info("Added finalizer")
return nil
})
}

// Not yet deleting hv, nothing more to do
if node.DeletionTimestamp.IsZero() {
if !hv.Spec.LifecycleEnabled || hv.Spec.Maintenance != kvmv1.MaintenanceTermination {
return ctrl.Result{}, nil
}

// Someone is just deleting the hv, without going through termination
// See: https://github.com/gardener/machine-controller-manager/blob/rel-v0.56/pkg/util/provider/machinecontroller/machine.go#L658-L659
if !IsNodeConditionTrue(node.Status.Conditions, "Terminating") {
log.Info("removing finalizer since not terminating")
// So we just get out of the way for now
return r.removeFinalizer(ctx, node)
}

if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeReady) {
return r.setDecommissioningCondition(ctx, hv, "Node is being decommissioned, removing host from nova")
}

log.Info("removing host from nova")
if meta.IsStatusConditionTrue(hv.Status.Conditions, kvmv1.ConditionTypeOffboarded) {
return ctrl.Result{}, nil
}

if !meta.IsStatusConditionFalse(hv.Status.Conditions, kvmv1.ConditionTypeEvicting) {
// Either has not evicted yet, or is still evicting VMs, so we have to wait for that to finish
return ctrl.Result{}, nil
}

hypervisor, err := openstack.GetHypervisorByName(ctx, r.computeClient, hostname, true)
if errors.Is(err, openstack.ErrNoHypervisor) {
// We are (hopefully) done
return r.removeFinalizer(ctx, node)
return ctrl.Result{}, r.markOffboarded(ctx, hv)
}

// TODO: remove since RunningVMs is only available until micro-version 2.87, and also is updated asynchronously
Expand All @@ -141,7 +111,7 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req
return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot list aggregates due to %v", err))
}

host := node.Name
host := hv.Name
for name, aggregate := range aggs {
if slices.Contains(aggregate.Hosts, host) {
opts := aggregates.RemoveHostOpts{Host: host}
Expand All @@ -168,19 +138,7 @@ func (r *NodeDecommissionReconciler) Reconcile(ctx context.Context, req ctrl.Req
return r.setDecommissioningCondition(ctx, hv, fmt.Sprintf("cannot clean up resource provider: %v", err))
}

return r.removeFinalizer(ctx, node)
}

func (r *NodeDecommissionReconciler) removeFinalizer(ctx context.Context, node *corev1.Node) (ctrl.Result, error) {
if !controllerutil.ContainsFinalizer(node, decommissionFinalizerName) {
return ctrl.Result{}, nil
}

nodeBase := node.DeepCopy()
controllerutil.RemoveFinalizer(node, decommissionFinalizerName)
err := r.Patch(ctx, node, k8sclient.MergeFromWithOptions(nodeBase,
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName))
return ctrl.Result{}, err
return ctrl.Result{}, r.markOffboarded(ctx, hv)
}

func (r *NodeDecommissionReconciler) setDecommissioningCondition(ctx context.Context, hv *kvmv1.Hypervisor, message string) (ctrl.Result, error) {
Expand All @@ -195,7 +153,22 @@ func (r *NodeDecommissionReconciler) setDecommissioningCondition(ctx context.Con
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)); err != nil {
return ctrl.Result{}, fmt.Errorf("cannot update hypervisor status due to %w", err)
}
return ctrl.Result{RequeueAfter: shortRetryTime}, nil
return ctrl.Result{}, nil
}

func (r *NodeDecommissionReconciler) markOffboarded(ctx context.Context, hv *kvmv1.Hypervisor) error {
base := hv.DeepCopy()
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
Type: kvmv1.ConditionTypeOffboarded,
Status: metav1.ConditionTrue,
Reason: "Offboarded",
Message: "Offboarding successful",
})
if err := r.Status().Patch(ctx, hv, k8sclient.MergeFromWithOptions(base,
k8sclient.MergeFromWithOptimisticLock{}), k8sclient.FieldOwner(DecommissionControllerName)); err != nil {
return fmt.Errorf("cannot update hypervisor status due to %w", err)
}
return nil
}

// SetupWithManager sets up the controller with the Manager.
Expand All @@ -217,6 +190,6 @@ func (r *NodeDecommissionReconciler) SetupWithManager(mgr ctrl.Manager) error {

return ctrl.NewControllerManagedBy(mgr).
Named(DecommissionControllerName).
For(&corev1.Node{}).
For(&kvmv1.Hypervisor{}).
Complete(r)
}
89 changes: 32 additions & 57 deletions internal/controller/decomission_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,9 +68,9 @@ const (
var _ = Describe("Decommission Controller", func() {
var (
r *NodeDecommissionReconciler
nodeName = types.NamespacedName{Name: hypervisorName}
resourceName = types.NamespacedName{Name: hypervisorName}
reconcileReq = ctrl.Request{
NamespacedName: nodeName,
NamespacedName: resourceName,
}
fakeServer testhelper.FakeServer
)
Expand Down Expand Up @@ -99,30 +99,10 @@ var _ = Describe("Decommission Controller", func() {
Expect(k8sClient.Delete(ctx, ns)).To(Succeed())
})

By("creating the core resource for the Kind Node")
node := &corev1.Node{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName.Name,
Labels: map[string]string{labelEvictionRequired: "true"},
},
}
Expect(k8sClient.Create(ctx, node)).To(Succeed())
DeferCleanup(func(ctx SpecContext) {
node := &corev1.Node{}
Expect(k8sclient.IgnoreNotFound(k8sClient.Get(ctx, nodeName, node))).To(Succeed())
if len(node.Finalizers) > 0 {
node.Finalizers = make([]string, 0)
Expect(k8sClient.Update(ctx, node)).To(Succeed())
}
if node.Name != "" {
Expect(k8sclient.IgnoreNotFound(k8sClient.Delete(ctx, node))).To(Succeed())
}
})

By("Create the hypervisor resource with lifecycle enabled")
hypervisor := &kvmv1.Hypervisor{
ObjectMeta: metav1.ObjectMeta{
Name: nodeName.Name,
Name: resourceName.Name,
},
Spec: kvmv1.HypervisorSpec{
LifecycleEnabled: true,
Expand All @@ -134,47 +114,32 @@ var _ = Describe("Decommission Controller", func() {
})
})

Context("When reconciling a node", func() {
It("should set the finalizer", func(ctx SpecContext) {
By("reconciling the created resource")
_, err := r.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
node := &corev1.Node{}

Expect(k8sClient.Get(ctx, nodeName, node)).To(Succeed())
Expect(node.Finalizers).To(ContainElement(decommissionFinalizerName))
})
})

Context("When terminating a node", func() {
Context("When marking the hypervisor terminating", func() {
JustBeforeEach(func(ctx SpecContext) {
By("reconciling first reconciling the to add the finalizer")
_, err := r.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
node := &corev1.Node{}

Expect(k8sClient.Get(ctx, nodeName, node)).To(Succeed())
Expect(node.Finalizers).To(ContainElement(decommissionFinalizerName))
hypervisor := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed())

By("and then terminating then node")
node.Status.Conditions = append(node.Status.Conditions, corev1.NodeCondition{
Type: "Terminating",
Status: corev1.ConditionTrue,
By("and then marking the hypervisor terminating")
hypervisor.Spec.Maintenance = kvmv1.MaintenanceTermination
Expect(k8sClient.Update(ctx, hypervisor)).To(Succeed())
meta.SetStatusCondition(&hypervisor.Status.Conditions, metav1.Condition{
Type: kvmv1.ConditionTypeTerminating,
Status: metav1.ConditionTrue,
Reason: "dontcare",
Message: "dontcare",
})
Expect(k8sClient.Status().Update(ctx, node)).To(Succeed())
Expect(k8sClient.Delete(ctx, node)).To(Succeed())
nodelist := &corev1.NodeList{}
Expect(k8sClient.List(ctx, nodelist)).To(Succeed())
Expect(nodelist.Items).NotTo(BeEmpty())
Expect(k8sClient.Status().Update(ctx, hypervisor)).To(Succeed())
})

When("the hypervisor was set to ready", func() {
When("the hypervisor was set to ready and has been evicted", func() {
getHypervisorsCalled := 0
BeforeEach(func(ctx SpecContext) {
hv := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, nodeName, hv)).To(Succeed())
Expect(k8sClient.Get(ctx, resourceName, hv)).To(Succeed())
meta.SetStatusCondition(&hv.Status.Conditions,
metav1.Condition{
Type: kvmv1.ConditionTypeReady,
Expand All @@ -183,6 +148,12 @@ var _ = Describe("Decommission Controller", func() {
Message: "dontcare",
},
)
meta.SetStatusCondition(&hv.Status.Conditions, metav1.Condition{
Type: kvmv1.ConditionTypeEvicting,
Status: metav1.ConditionFalse,
Reason: "dontcare",
Message: "dontcare",
})
Expect(k8sClient.Status().Update(ctx, hv)).To(Succeed())

fakeServer.Mux.HandleFunc("GET /os-hypervisors/detail", func(w http.ResponseWriter, r *http.Request) {
Expand Down Expand Up @@ -239,12 +210,12 @@ var _ = Describe("Decommission Controller", func() {
})
})

It("should set the hypervisor condition", func(ctx SpecContext) {
It("should set the hypervisor ready condition", func(ctx SpecContext) {
By("reconciling the created resource")
_, err := r.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
hypervisor := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, nodeName, hypervisor)).To(Succeed())
Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed())
Expect(hypervisor.Status.Conditions).To(ContainElement(
SatisfyAll(
HaveField("Type", kvmv1.ConditionTypeReady),
Expand All @@ -254,18 +225,22 @@ var _ = Describe("Decommission Controller", func() {
))
})

It("should remove the finalizer", func(ctx SpecContext) {
It("should set the hypervisor offboarded condition", func(ctx SpecContext) {
By("reconciling the created resource")
for range 3 {
_, err := r.Reconcile(ctx, reconcileReq)
Expect(err).NotTo(HaveOccurred())
}
Expect(getHypervisorsCalled).To(BeNumerically(">", 0))

node := &corev1.Node{}
err := k8sClient.Get(ctx, nodeName, node)
Expect(err).To(HaveOccurred())
Expect(k8sclient.IgnoreNotFound(err)).To(Succeed())
hypervisor := &kvmv1.Hypervisor{}
Expect(k8sClient.Get(ctx, resourceName, hypervisor)).To(Succeed())
Expect(hypervisor.Status.Conditions).To(ContainElement(
SatisfyAll(
HaveField("Type", kvmv1.ConditionTypeOffboarded),
HaveField("Status", metav1.ConditionTrue),
),
))
})
})

Expand Down
Loading