Skip to content

Commit 4c4321c

Browse files
committed
Add ownership relationship for node, optimize, add migration origin/dest
1 parent be59b18 commit 4c4321c

File tree

8 files changed

+106
-85
lines changed

8 files changed

+106
-85
lines changed

api/v1alpha1/migration_types.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ type MigrationSpec struct {
3030

3131
// MigrationStatus defines the observed state of Migration.
3232
type MigrationStatus struct {
33-
Host string `json:"host"`
33+
Origin string `json:"origin"`
34+
Destination string `json:"destination"`
3435
Type string `json:"type,omitempty"`
3536
Started metav1.Time `json:"started,omitempty"`
3637
ErrMsg string `json:"errMsg,omitempty"`
@@ -62,7 +63,8 @@ type MigrationStatus struct {
6263

6364
// +kubebuilder:object:root=true
6465
// +kubebuilder:subresource:status
65-
// +kubebuilder:printcolumn:name="Host",type=string,JSONPath=`.status.host`
66+
// +kubebuilder:printcolumn:name="Origin",type=string,JSONPath=`.status.origin`
67+
// +kubebuilder:printcolumn:name="Destination",type=string,JSONPath=`.status.destination`
6668
// +kubebuilder:printcolumn:name="Type",type=string,JSONPath=`.status.type`
6769
// +kubebuilder:printcolumn:name="Operation",type=string,JSONPath=`.status.operation`
6870
// +kubebuilder:printcolumn:name="Started",type=date,JSONPath=`.status.started`

charts/kvm-node-agent/crds/migration-crd.yaml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,11 @@ spec:
1414
scope: Namespaced
1515
versions:
1616
- additionalPrinterColumns:
17-
- jsonPath: .status.host
18-
name: Host
17+
- jsonPath: .status.origin
18+
name: Origin
19+
type: string
20+
- jsonPath: .status.destination
21+
name: Destination
1922
type: string
2023
- jsonPath: .status.type
2124
name: Type
@@ -83,6 +86,8 @@ spec:
8386
type: string
8487
dataTotal:
8588
type: string
89+
destination:
90+
type: string
8691
diskBps:
8792
type: string
8893
diskProcessed:
@@ -95,8 +100,6 @@ spec:
95100
type: string
96101
errMsg:
97102
type: string
98-
host:
99-
type: string
100103
memBps:
101104
type: string
102105
memConstant:
@@ -125,6 +128,8 @@ spec:
125128
type: string
126129
operation:
127130
type: string
131+
origin:
132+
type: string
128133
setupTime:
129134
type: string
130135
started:
@@ -137,7 +142,8 @@ spec:
137142
type:
138143
type: string
139144
required:
140-
- host
145+
- destination
146+
- origin
141147
type: object
142148
type: object
143149
served: true

cmd/main.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,23 +21,29 @@ import (
2121
"context"
2222
"crypto/tls"
2323
"flag"
24+
"fmt"
2425
"os"
2526

2627
logger "sigs.k8s.io/controller-runtime/pkg/log"
2728

2829
"github.com/cobaltcode-dev/kvm-node-agent/internal/emulator"
2930
"github.com/cobaltcode-dev/kvm-node-agent/internal/libvirt"
31+
"github.com/cobaltcode-dev/kvm-node-agent/internal/sys"
3032
"github.com/cobaltcode-dev/kvm-node-agent/internal/systemd"
3133

3234
// Import all Kubernetes client auth plugins (e.g. Azure, GCP, OIDC, etc.)
3335
// to ensure that exec-entrypoint and run can make use of them.
3436
_ "k8s.io/client-go/plugin/pkg/client/auth"
3537

3638
certmanagerv1 "github.com/cert-manager/cert-manager/pkg/apis/certmanager/v1"
39+
corev1 "k8s.io/api/core/v1"
40+
"k8s.io/apimachinery/pkg/fields"
3741
"k8s.io/apimachinery/pkg/runtime"
3842
utilruntime "k8s.io/apimachinery/pkg/util/runtime"
3943
clientgoscheme "k8s.io/client-go/kubernetes/scheme"
4044
ctrl "sigs.k8s.io/controller-runtime"
45+
"sigs.k8s.io/controller-runtime/pkg/cache"
46+
"sigs.k8s.io/controller-runtime/pkg/client"
4147
"sigs.k8s.io/controller-runtime/pkg/healthz"
4248
"sigs.k8s.io/controller-runtime/pkg/log/zap"
4349
"sigs.k8s.io/controller-runtime/pkg/metrics/filters"
@@ -143,6 +149,19 @@ func main() {
143149
// if you are doing or is intended to do any operation such as perform cleanups
144150
// after the manager stops then its usage might be unsafe.
145151
// LeaderElectionReleaseOnCancel: true,
152+
153+
// Cache options allow to subscribe to events from Kubernetes objects and to read
154+
// those objects more efficiently by avoiding to call out to the API.
155+
Cache: cache.Options{
156+
ByObject: map[client.Object]cache.ByObject{
157+
&corev1.Node{}: {
158+
Field: fields.ParseSelectorOrDie(fmt.Sprintf("metadata.name=%s", sys.Hostname)),
159+
},
160+
&kvmv1alpha1.Hypervisor{}: {
161+
Field: fields.ParseSelectorOrDie(fmt.Sprintf("metadata.name=%s", sys.Hostname)),
162+
},
163+
},
164+
},
146165
})
147166
if err != nil {
148167
setupLog.Error(err, "unable to start manager")

config/crd/bases/kvm.cloud.sap_migrations.yaml

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,11 @@ spec:
1515
scope: Namespaced
1616
versions:
1717
- additionalPrinterColumns:
18-
- jsonPath: .status.host
19-
name: Host
18+
- jsonPath: .status.origin
19+
name: Origin
20+
type: string
21+
- jsonPath: .status.destination
22+
name: Destination
2023
type: string
2124
- jsonPath: .status.type
2225
name: Type
@@ -84,6 +87,8 @@ spec:
8487
type: string
8588
dataTotal:
8689
type: string
90+
destination:
91+
type: string
8792
diskBps:
8893
type: string
8994
diskProcessed:
@@ -96,8 +101,6 @@ spec:
96101
type: string
97102
errMsg:
98103
type: string
99-
host:
100-
type: string
101104
memBps:
102105
type: string
103106
memConstant:
@@ -126,6 +129,8 @@ spec:
126129
type: string
127130
operation:
128131
type: string
132+
origin:
133+
type: string
129134
setupTime:
130135
type: string
131136
started:
@@ -138,7 +143,8 @@ spec:
138143
type:
139144
type: string
140145
required:
141-
- host
146+
- destination
147+
- origin
142148
type: object
143149
type: object
144150
served: true

internal/controller/hypervisor_controller.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
"k8s.io/apimachinery/pkg/api/meta"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/runtime"
30+
"k8s.io/apimachinery/pkg/types"
3031
ctrl "sigs.k8s.io/controller-runtime"
3132
"sigs.k8s.io/controller-runtime/pkg/client"
3233
logger "sigs.k8s.io/controller-runtime/pkg/log"
@@ -68,8 +69,7 @@ func (r *HypervisorReconciler) Reconcile(ctx context.Context, req ctrl.Request)
6869

6970
// only reconcile the node I am running on
7071
if req.Name != sys.Hostname {
71-
// only reconcile the node I am running on
72-
return ctrl.Result{}, nil
72+
panic(fmt.Sprintf("reconciling hypervisor %s, but I am running on %s", req.Name, sys.Hostname))
7373
}
7474
log.Info("Reconcile", "name", req.Name, "namespace", req.Namespace)
7575

@@ -79,6 +79,13 @@ func (r *HypervisorReconciler) Reconcile(ctx context.Context, req ctrl.Request)
7979
return ctrl.Result{}, client.IgnoreNotFound(err)
8080
}
8181

82+
// ====================================================================================================
83+
// Hypervisor Metal node name
84+
// ====================================================================================================
85+
if name, ok := hypervisor.Labels[LabelMetalNodeName]; ok {
86+
hypervisor.Status.Node = types.NodeName(name)
87+
}
88+
8289
if hypervisor.Spec.EvacuateOnReboot != r.evacuateOnReboot {
8390
if hypervisor.Spec.EvacuateOnReboot {
8491
e := &evacuation.EvictionController{Client: r.Client}

internal/controller/node_controller.go

Lines changed: 45 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@ import (
2121
"context"
2222
"fmt"
2323

24-
v1 "k8s.io/api/core/v1"
24+
corev1 "k8s.io/api/core/v1"
25+
k8serrors "k8s.io/apimachinery/pkg/api/errors"
2526
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2627
"k8s.io/apimachinery/pkg/runtime"
27-
"k8s.io/apimachinery/pkg/types"
2828
ctrl "sigs.k8s.io/controller-runtime"
2929
"sigs.k8s.io/controller-runtime/pkg/client"
30+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
3031
logger "sigs.k8s.io/controller-runtime/pkg/log"
3132

3233
kvmv1alpha1 "github.com/cobaltcode-dev/kvm-node-agent/api/v1alpha1"
@@ -46,93 +47,68 @@ const LabelMetalNodeName = "kubernetes.metal.cloud.sap/name"
4647

4748
// +kubebuilder:rbac:groups="",resources=nodes,verbs=get;list;watch
4849
// +kubebuilder:rbac:groups="",resources=nodes/status,verbs=get
49-
// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch;create;update;patch;delete
50-
51-
func (r *NodeReconciler) getNode(ctx context.Context) (*v1.Node, error) {
52-
// Fetch the Node we're current running on
53-
var nodes v1.NodeList
54-
err := r.List(ctx, &nodes, client.MatchingLabels{v1.LabelHostname: sys.Hostname})
55-
if client.IgnoreNotFound(err) != nil {
56-
return nil, fmt.Errorf("failed fetching nodes: %w", err)
57-
}
58-
59-
switch len(nodes.Items) {
60-
case 0:
61-
return nil, nil
62-
case 1:
63-
return &nodes.Items[0], nil
64-
default:
65-
return nil, fmt.Errorf("found more than one node with label %s=%s", v1.LabelHostname, sys.Hostname)
66-
}
67-
68-
}
50+
// +kubebuilder:rbac:groups=kvm.cloud.sap,resources=hypervisors,verbs=get;list;watch;create;delete
6951

7052
func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
7153
log := logger.FromContext(ctx, "controller", "node")
7254

7355
if req.Name != sys.Hostname {
74-
// only reconcile the node I am running on
75-
return ctrl.Result{}, nil
56+
panic(fmt.Sprintf("reconciling node %s, but I am running on %s", req.Name, sys.Hostname))
7657
}
7758

78-
namespace := req.Namespace
79-
if namespace == "" {
80-
namespace = sys.Namespace
59+
node := &corev1.Node{}
60+
if err := r.Get(ctx, req.NamespacedName, node); client.IgnoreNotFound(err) != nil {
61+
// ignore not found errors, could be deleted
62+
return ctrl.Result{}, err
8163
}
8264

83-
node, err := r.getNode(ctx)
84-
if err != nil {
85-
return ctrl.Result{}, fmt.Errorf("failed fetching node: %w", err)
65+
metalNodeName := sys.Hostname
66+
if name, ok := node.Labels[LabelMetalNodeName]; ok {
67+
metalNodeName = name
8668
}
87-
if node == nil {
88-
return ctrl.Result{}, nil
69+
70+
hypervisor := &kvmv1alpha1.Hypervisor{
71+
ObjectMeta: metav1.ObjectMeta{
72+
Name: node.Name,
73+
Namespace: sys.Namespace,
74+
Labels: map[string]string{
75+
corev1.LabelHostname: sys.Hostname,
76+
LabelMetalNodeName: metalNodeName,
77+
},
78+
},
79+
Spec: kvmv1alpha1.HypervisorSpec{
80+
Reboot: r.Reboot,
81+
EvacuateOnReboot: r.EvacuateOnReboot,
82+
CreateCertManagerCertificate: r.CreateCertManagerCertificate,
83+
},
8984
}
90-
// Todo: check I am really an hypervisor?
9185

9286
// Ensure corresponding hypervisor exists
9387
log.Info("Reconcile", "name", req.Name, "namespace", req.Namespace)
94-
var hypervisors kvmv1alpha1.HypervisorList
95-
if err = r.List(ctx, &hypervisors, client.MatchingLabels{v1.LabelHostname: sys.Hostname}); err != nil {
96-
return ctrl.Result{}, fmt.Errorf("failed fetching hypervisors: %w", err)
97-
}
98-
99-
if len(hypervisors.Items) == 0 {
100-
// create hypervisor
101-
if err = r.Create(ctx, &kvmv1alpha1.Hypervisor{
102-
ObjectMeta: metav1.ObjectMeta{
103-
Name: node.Name,
104-
Namespace: namespace,
105-
Labels: map[string]string{v1.LabelHostname: sys.Hostname},
106-
},
107-
Spec: kvmv1alpha1.HypervisorSpec{
108-
Reboot: r.Reboot,
109-
EvacuateOnReboot: r.EvacuateOnReboot,
110-
CreateCertManagerCertificate: r.CreateCertManagerCertificate,
111-
},
112-
}); err != nil {
113-
return ctrl.Result{}, fmt.Errorf("failed creating hypervisor: %w", err)
88+
if err := r.Get(ctx, client.ObjectKeyFromObject(hypervisor), hypervisor); err != nil {
89+
if k8serrors.IsNotFound(err) {
90+
// attach ownerReference for cascading deletion
91+
if err = controllerutil.SetControllerReference(node, hypervisor, r.Scheme); err != nil {
92+
return ctrl.Result{}, fmt.Errorf("failed setting controller reference: %w", err)
93+
}
94+
95+
log.Info("Creating new hypervisor", "name", node.Name)
96+
if err = r.Create(ctx, hypervisor); err != nil {
97+
return ctrl.Result{}, err
98+
}
99+
100+
// Requeue to update status
101+
return ctrl.Result{}, nil
114102
}
115-
return ctrl.Result{}, nil
103+
104+
return ctrl.Result{}, err
116105
}
117106

118107
if node.ObjectMeta.DeletionTimestamp != nil {
119108
// node is being deleted, cleanup hypervisor
120-
if err = r.Delete(ctx, &hypervisors.Items[0]); client.IgnoreNotFound(err) != nil {
109+
if err := r.Delete(ctx, hypervisor); client.IgnoreNotFound(err) != nil {
121110
return ctrl.Result{}, fmt.Errorf("failed cleanup up hypervisor: %w", err)
122111
}
123-
return ctrl.Result{}, nil
124-
}
125-
126-
hv := hypervisors.Items[0].DeepCopy()
127-
hv.Status.Node = types.NodeName(node.Name)
128-
if name, ok := node.Labels[LabelMetalNodeName]; ok {
129-
hv.Status.Node = types.NodeName(name)
130-
}
131-
132-
// Update Status
133-
err = r.Status().Patch(ctx, hv, client.MergeFrom(&hypervisors.Items[0]))
134-
if err != nil {
135-
return ctrl.Result{}, fmt.Errorf("failed patching hypervisor status: %w", err)
136112
}
137113

138114
return ctrl.Result{}, nil
@@ -141,6 +117,6 @@ func (r *NodeReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.
141117
// SetupWithManager sets up the controller with the Manager.
142118
func (r *NodeReconciler) SetupWithManager(mgr ctrl.Manager) error {
143119
return ctrl.NewControllerManagedBy(mgr).
144-
For(&v1.Node{}).
120+
For(&corev1.Node{}).
145121
Complete(r)
146122
}

internal/libvirt/libvirt_events.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ func (l *LibVirt) startMigrationWatch(ctx context.Context, domain libvirt.Domain
239239
}
240240
patched := original.DeepCopy()
241241
patched.Status.Started = metav1.Now()
242-
patched.Status.Host = sys.Hostname
242+
patched.Status.Origin = sys.NodeLabelName
243243
if err := l.client.Status().Patch(ctx, patched, client.MergeFrom(&original)); err != nil {
244244
return fmt.Errorf("failed to patch migration status time: %w", err)
245245
}
@@ -332,7 +332,7 @@ func (l *LibVirt) populateDomainJobInfo(domain libvirt.Domain, migration *kvmv1a
332332
flags = libvirt.DomainJobStatsCompleted
333333
}
334334

335-
migration.Status.Host = sys.Hostname
335+
migration.Status.Destination = sys.NodeLabelName
336336
rType, params, err := l.virt.DomainGetJobStats(domain, flags)
337337
if err != nil {
338338
return err

internal/sys/hostname.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424
)
2525

2626
var Hostname string
27+
var NodeLabelName string
2728

2829
func init() {
2930
var err error
@@ -33,4 +34,8 @@ func init() {
3334
log.Log.Error(err, "failed fetching hostname")
3435
}
3536
}
37+
38+
if NodeLabelName = os.Getenv("NODE_LABEL"); NodeLabelName == "" {
39+
NodeLabelName = Hostname
40+
}
3641
}

0 commit comments

Comments
 (0)