Skip to content

Commit 4e9efd9

Browse files
authored
Merge pull request #8818 from fabriziopandini/fix-delete-inmemory-provider
🐛 fix cluster deletion in the in-memory API server
2 parents f3e3bda + fa1cd34 commit 4e9efd9

File tree

5 files changed

+217
-16
lines changed

5 files changed

+217
-16
lines changed

test/infrastructure/inmemory/internal/controllers/goofycluster_controller.go

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -114,7 +114,7 @@ func (r *InMemoryClusterReconciler) Reconcile(ctx context.Context, req ctrl.Requ
114114

115115
// Handle deleted clusters
116116
if !inMemoryCluster.DeletionTimestamp.IsZero() {
117-
return r.reconcileDelete(ctx, inMemoryCluster)
117+
return r.reconcileDelete(ctx, cluster, inMemoryCluster)
118118
}
119119

120120
// Handle non-deleted clusters
@@ -187,11 +187,19 @@ func (r *InMemoryClusterReconciler) reconcileNormal(_ context.Context, cluster *
187187
return ctrl.Result{}, nil
188188
}
189189

190-
//nolint:unparam // once we implemented this func we will also return errors
191-
func (r *InMemoryClusterReconciler) reconcileDelete(_ context.Context, inMemoryCluster *infrav1.InMemoryCluster) (ctrl.Result, error) {
192-
// TODO: implement
193-
controllerutil.RemoveFinalizer(inMemoryCluster, infrav1.ClusterFinalizer)
190+
func (r *InMemoryClusterReconciler) reconcileDelete(_ context.Context, cluster *clusterv1.Cluster, inMemoryCluster *infrav1.InMemoryCluster) (ctrl.Result, error) {
191+
// Compute the resource group unique name.
192+
resourceGroup := klog.KObj(cluster).String()
193+
194+
// Delete the resource group hosting all the cloud resources belonging the workload cluster;
195+
r.CloudManager.DeleteResourceGroup(resourceGroup)
194196

197+
// Delete the listener for the workload cluster;
198+
if err := r.APIServerMux.DeleteWorkloadClusterListener(resourceGroup); err != nil {
199+
return ctrl.Result{}, err
200+
}
201+
202+
controllerutil.RemoveFinalizer(inMemoryCluster, infrav1.ClusterFinalizer)
195203
return ctrl.Result{}, nil
196204
}
197205

test/infrastructure/inmemory/internal/controllers/goofymachine_controller.go

Lines changed: 83 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -155,7 +155,7 @@ func (r *InMemoryMachineReconciler) Reconcile(ctx context.Context, req ctrl.Requ
155155

156156
// Handle deleted machines
157157
if !inMemoryMachine.DeletionTimestamp.IsZero() {
158-
return r.reconcileDelete(ctx, inMemoryMachine)
158+
return r.reconcileDelete(ctx, cluster, machine, inMemoryMachine)
159159
}
160160

161161
// Handle non-deleted machines
@@ -324,7 +324,7 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster
324324
}
325325
if err := cloudClient.Get(ctx, client.ObjectKeyFromObject(etcdPod), etcdPod); err != nil {
326326
if !apierrors.IsNotFound(err) {
327-
return ctrl.Result{}, errors.Wrapf(err, "failed to get etcdPod Pod")
327+
return ctrl.Result{}, errors.Wrapf(err, "failed to get etcd Pod")
328328
}
329329

330330
etcdPod.Labels = map[string]string{
@@ -468,11 +468,88 @@ func (r *InMemoryMachineReconciler) reconcileNormal(ctx context.Context, cluster
468468
return ctrl.Result{}, nil
469469
}
470470

471-
//nolint:unparam // once we implemented this func we will also return errors
472-
func (r *InMemoryMachineReconciler) reconcileDelete(_ context.Context, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) {
473-
// TODO: implement
474-
controllerutil.RemoveFinalizer(inMemoryMachine, infrav1.MachineFinalizer)
471+
func (r *InMemoryMachineReconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster, machine *clusterv1.Machine, inMemoryMachine *infrav1.InMemoryMachine) (ctrl.Result, error) {
472+
// Compute the resource group unique name.
473+
// NOTE: We are using reconcilerGroup also as a name for the listener for sake of simplicity.
474+
resourceGroup := klog.KObj(cluster).String()
475+
cloudClient := r.CloudManager.GetResourceGroup(resourceGroup).GetClient()
475476

477+
// Delete VM
478+
cloudMachine := &cloudv1.CloudMachine{
479+
ObjectMeta: metav1.ObjectMeta{
480+
Name: inMemoryMachine.Name,
481+
},
482+
}
483+
if err := cloudClient.Delete(ctx, cloudMachine); err != nil && !apierrors.IsNotFound(err) {
484+
return ctrl.Result{}, errors.Wrapf(err, "failed to delete CloudMachine")
485+
}
486+
487+
// Delete Node
488+
node := &corev1.Node{
489+
ObjectMeta: metav1.ObjectMeta{
490+
Name: inMemoryMachine.Name,
491+
},
492+
}
493+
if err := cloudClient.Delete(ctx, node); err != nil && !apierrors.IsNotFound(err) {
494+
return ctrl.Result{}, errors.Wrapf(err, "failed to delete Node")
495+
}
496+
497+
if util.IsControlPlaneMachine(machine) {
498+
controllerManagerPod := &corev1.Pod{
499+
ObjectMeta: metav1.ObjectMeta{
500+
Namespace: metav1.NamespaceSystem,
501+
Name: fmt.Sprintf("kube-controller-manager-%s", inMemoryMachine.Name),
502+
},
503+
}
504+
if err := cloudClient.Delete(ctx, controllerManagerPod); err != nil && !apierrors.IsNotFound(err) {
505+
return ctrl.Result{}, errors.Wrapf(err, "failed to controller manager Pod")
506+
}
507+
508+
schedulerPod := &corev1.Pod{
509+
ObjectMeta: metav1.ObjectMeta{
510+
Namespace: metav1.NamespaceSystem,
511+
Name: fmt.Sprintf("kube-scheduler-%s", inMemoryMachine.Name),
512+
},
513+
}
514+
if err := cloudClient.Delete(ctx, schedulerPod); err != nil && !apierrors.IsNotFound(err) {
515+
return ctrl.Result{}, errors.Wrapf(err, "failed to scheduler Pod")
516+
}
517+
518+
apiServer := fmt.Sprintf("kube-apiserver-%s", inMemoryMachine.Name)
519+
apiServerPod := &corev1.Pod{
520+
ObjectMeta: metav1.ObjectMeta{
521+
Namespace: metav1.NamespaceSystem,
522+
Name: apiServer,
523+
},
524+
}
525+
if err := cloudClient.Delete(ctx, apiServerPod); err != nil && !apierrors.IsNotFound(err) {
526+
return ctrl.Result{}, errors.Wrapf(err, "failed to apiServer Pod")
527+
}
528+
if err := r.APIServerMux.DeleteAPIServer(resourceGroup, apiServer); err != nil {
529+
return ctrl.Result{}, err
530+
}
531+
532+
// TODO: if all the API server are gone, cleanup all the k8s objects from the resource group.
533+
// note: it is not possible to delete the resource group, because cloud resources should be preserved.
534+
// given that, in order to implement this it is required to find a way to identify all the k8s resources (might be via gvk);
535+
// also, deletion must happen suddently, without respecting finalizers or owner references links.
536+
537+
etcdMember := fmt.Sprintf("etcd-%s", inMemoryMachine.Name)
538+
etcdPod := &corev1.Pod{
539+
ObjectMeta: metav1.ObjectMeta{
540+
Namespace: metav1.NamespaceSystem,
541+
Name: etcdMember,
542+
},
543+
}
544+
if err := cloudClient.Delete(ctx, etcdPod); err != nil && !apierrors.IsNotFound(err) {
545+
return ctrl.Result{}, errors.Wrapf(err, "failed to etcd Pod")
546+
}
547+
if err := r.APIServerMux.DeleteEtcdMember(resourceGroup, etcdMember); err != nil {
548+
return ctrl.Result{}, err
549+
}
550+
}
551+
552+
controllerutil.RemoveFinalizer(inMemoryMachine, infrav1.MachineFinalizer)
476553
return ctrl.Result{}, nil
477554
}
478555

test/infrastructure/inmemory/internal/server/api/handler.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -77,29 +77,29 @@ func NewAPIServerHandler(manager cmanager.Manager, log logr.Logger, resolver Res
7777
ws.Route(ws.GET("/api/v1/{resource}/{name}").To(apiServer.apiV1Get))
7878
ws.Route(ws.PUT("/api/v1/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Update))
7979
ws.Route(ws.PATCH("/api/v1/{resource}/{name}").Consumes(string(types.MergePatchType), string(types.StrategicMergePatchType)).To(apiServer.apiV1Patch))
80-
ws.Route(ws.DELETE("/api/v1/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Delete))
80+
ws.Route(ws.DELETE("/api/v1/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf, runtime.ContentTypeJSON).To(apiServer.apiV1Delete))
8181

8282
ws.Route(ws.POST("/apis/{group}/{version}/{resource}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Create))
8383
ws.Route(ws.GET("/apis/{group}/{version}/{resource}").To(apiServer.apiV1List))
8484
ws.Route(ws.GET("/apis/{group}/{version}/{resource}/{name}").To(apiServer.apiV1Get))
8585
ws.Route(ws.PUT("/apis/{group}/{version}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Update))
8686
ws.Route(ws.PATCH("/apis/{group}/{version}/{resource}/{name}").Consumes(string(types.MergePatchType), string(types.StrategicMergePatchType)).To(apiServer.apiV1Patch))
87-
ws.Route(ws.DELETE("/apis/{group}/{version}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Delete))
87+
ws.Route(ws.DELETE("/apis/{group}/{version}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf, runtime.ContentTypeJSON).To(apiServer.apiV1Delete))
8888

8989
// CRUD endpoints (namespaced objects)
9090
ws.Route(ws.POST("/api/v1/namespaces/{namespace}/{resource}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Create))
9191
ws.Route(ws.GET("/api/v1/namespaces/{namespace}/{resource}").To(apiServer.apiV1List))
9292
ws.Route(ws.GET("/api/v1/namespaces/{namespace}/{resource}/{name}").To(apiServer.apiV1Get))
9393
ws.Route(ws.PUT("/api/v1/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Update))
9494
ws.Route(ws.PATCH("/api/v1/namespaces/{namespace}/{resource}/{name}").Consumes(string(types.MergePatchType), string(types.StrategicMergePatchType)).To(apiServer.apiV1Patch))
95-
ws.Route(ws.DELETE("/api/v1/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Delete))
95+
ws.Route(ws.DELETE("/api/v1/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf, runtime.ContentTypeJSON).To(apiServer.apiV1Delete))
9696

9797
ws.Route(ws.POST("/apis/{group}/{version}/namespaces/{namespace}/{resource}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Create))
9898
ws.Route(ws.GET("/apis/{group}/{version}/namespaces/{namespace}/{resource}").To(apiServer.apiV1List))
9999
ws.Route(ws.GET("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").To(apiServer.apiV1Get))
100100
ws.Route(ws.PUT("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Update))
101101
ws.Route(ws.PATCH("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").Consumes(string(types.MergePatchType), string(types.StrategicMergePatchType)).To(apiServer.apiV1Patch))
102-
ws.Route(ws.DELETE("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf).To(apiServer.apiV1Delete))
102+
ws.Route(ws.DELETE("/apis/{group}/{version}/namespaces/{namespace}/{resource}/{name}").Consumes(runtime.ContentTypeProtobuf, runtime.ContentTypeJSON).To(apiServer.apiV1Delete))
103103

104104
// Port forward endpoints
105105
ws.Route(ws.GET("/api/v1/namespaces/{namespace}/pods/{name}/portforward").To(apiServer.apiV1PortForward))

test/infrastructure/inmemory/internal/server/mux.go

Lines changed: 62 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -350,6 +350,28 @@ func (m *WorkloadClustersMux) AddAPIServer(wclName, podName string, caCert *x509
350350
return nil
351351
}
352352

353+
// DeleteAPIServer removes an API server instance from the WorkloadClusterListener.
354+
func (m *WorkloadClustersMux) DeleteAPIServer(wclName, podName string) error {
355+
m.lock.Lock()
356+
defer m.lock.Unlock()
357+
358+
wcl, ok := m.workloadClusterListeners[wclName]
359+
if !ok {
360+
return errors.Errorf("workloadClusterListener with name %s must be initialized before removing an APIserver", wclName)
361+
}
362+
wcl.apiServers.Delete(podName)
363+
m.log.Info("APIServer instance removed from the workloadClusterListener", "listenerName", wclName, "address", wcl.Address(), "podName", podName)
364+
365+
if wcl.apiServers.Len() < 1 && wcl.listener != nil {
366+
if err := wcl.listener.Close(); err != nil {
367+
return errors.Wrapf(err, "failed to stop WorkloadClusterListener %s, %s", wclName, wcl.HostPort())
368+
}
369+
wcl.listener = nil
370+
m.log.Info("WorkloadClusterListener stopped because there are no APIServer left", "listenerName", wclName, "address", wcl.Address())
371+
}
372+
return nil
373+
}
374+
353375
// HasAPIServer returns true if the workload cluster already has an apiserver with podName.
354376
func (m *WorkloadClustersMux) HasAPIServer(wclName, podName string) bool {
355377
m.lock.RLock()
@@ -371,7 +393,7 @@ func (m *WorkloadClustersMux) AddEtcdMember(wclName, podName string, caCert *x50
371393

372394
wcl, ok := m.workloadClusterListeners[wclName]
373395
if !ok {
374-
return errors.Errorf("workloadClusterListener with name %s must be initialized before adding an APIserver", wclName)
396+
return errors.Errorf("workloadClusterListener with name %s must be initialized before adding an etcd member", wclName)
375397
}
376398
wcl.etcdMembers.Insert(podName)
377399
m.log.Info("Etcd member added to WorkloadClusterListener", "listenerName", wclName, "address", wcl.Address(), "podName", podName)
@@ -406,6 +428,22 @@ func (m *WorkloadClustersMux) HasEtcdMember(wclName, podName string) bool {
406428
return wcl.etcdMembers.Has(podName)
407429
}
408430

431+
// DeleteEtcdMember removes an etcd Member from the WorkloadClusterListener.
432+
func (m *WorkloadClustersMux) DeleteEtcdMember(wclName, podName string) error {
433+
m.lock.Lock()
434+
defer m.lock.Unlock()
435+
436+
wcl, ok := m.workloadClusterListeners[wclName]
437+
if !ok {
438+
return errors.Errorf("workloadClusterListener with name %s must be initialized before removing an etcd member", wclName)
439+
}
440+
wcl.etcdMembers.Delete(podName)
441+
delete(wcl.etcdServingCertificates, podName)
442+
m.log.Info("Etcd member removed from WorkloadClusterListener", "listenerName", wclName, "address", wcl.Address(), "podName", podName)
443+
444+
return nil
445+
}
446+
409447
// ListListeners implements api.DebugInfoProvider.
410448
func (m *WorkloadClustersMux) ListListeners() map[string]string {
411449
m.lock.RLock()
@@ -418,6 +456,29 @@ func (m *WorkloadClustersMux) ListListeners() map[string]string {
418456
return ret
419457
}
420458

459+
// DeleteWorkloadClusterListener deletes a WorkloadClusterListener.
460+
func (m *WorkloadClustersMux) DeleteWorkloadClusterListener(wclName string) error {
461+
m.lock.Lock()
462+
defer m.lock.Unlock()
463+
464+
wcl, ok := m.workloadClusterListeners[wclName]
465+
if !ok {
466+
return nil
467+
}
468+
469+
if wcl.listener != nil {
470+
if err := wcl.listener.Close(); err != nil {
471+
return errors.Wrapf(err, "failed to stop WorkloadClusterListener %s, %s", wclName, wcl.HostPort())
472+
}
473+
}
474+
475+
delete(m.workloadClusterListeners, wclName)
476+
delete(m.workloadClusterNameByHost, wcl.HostPort())
477+
478+
m.log.Info("Workload cluster listener deleted", "listenerName", wclName, "address", wcl.Address())
479+
return nil
480+
}
481+
421482
// Shutdown shuts down the workload cluster mux.
422483
func (m *WorkloadClustersMux) Shutdown(ctx context.Context) error {
423484
m.lock.Lock()

test/infrastructure/inmemory/internal/server/mux_test.go

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,61 @@ func init() {
5959
ctrl.SetLogger(klog.Background())
6060
}
6161

62+
func TestMux(t *testing.T) {
63+
g := NewWithT(t)
64+
65+
manager := cmanager.New(scheme)
66+
67+
wcl := "workload-cluster"
68+
host := "127.0.0.1" //nolint:goconst
69+
wcmux := NewWorkloadClustersMux(manager, host)
70+
71+
listener, err := wcmux.InitWorkloadClusterListener(wcl)
72+
g.Expect(err).ToNot(HaveOccurred())
73+
g.Expect(listener.Host()).To(Equal(host))
74+
g.Expect(listener.Port()).ToNot(BeZero())
75+
76+
caCert, caKey, err := newCertificateAuthority()
77+
g.Expect(err).ToNot(HaveOccurred())
78+
79+
etcdCert, etcdKey, err := newCertificateAuthority()
80+
g.Expect(err).ToNot(HaveOccurred())
81+
82+
apiServerPod1 := "apiserver1"
83+
err = wcmux.AddAPIServer(wcl, apiServerPod1, caCert, caKey)
84+
g.Expect(err).ToNot(HaveOccurred())
85+
86+
etcdPodMember1 := "etcd1"
87+
err = wcmux.AddEtcdMember(wcl, etcdPodMember1, etcdCert, etcdKey)
88+
g.Expect(err).ToNot(HaveOccurred())
89+
90+
apiServerPod2 := "apiserver2"
91+
err = wcmux.AddAPIServer(wcl, apiServerPod2, caCert, caKey)
92+
g.Expect(err).ToNot(HaveOccurred())
93+
94+
etcdPodMember2 := "etcd2"
95+
err = wcmux.AddEtcdMember(wcl, etcdPodMember2, etcdCert, etcdKey)
96+
g.Expect(err).ToNot(HaveOccurred())
97+
98+
err = wcmux.DeleteAPIServer(wcl, apiServerPod2)
99+
g.Expect(err).ToNot(HaveOccurred())
100+
101+
err = wcmux.DeleteEtcdMember(wcl, etcdPodMember2)
102+
g.Expect(err).ToNot(HaveOccurred())
103+
104+
err = wcmux.DeleteAPIServer(wcl, apiServerPod1)
105+
g.Expect(err).ToNot(HaveOccurred())
106+
107+
err = wcmux.DeleteEtcdMember(wcl, etcdPodMember1)
108+
g.Expect(err).ToNot(HaveOccurred())
109+
110+
err = wcmux.DeleteWorkloadClusterListener(wcl)
111+
g.Expect(err).ToNot(HaveOccurred())
112+
113+
err = wcmux.Shutdown(ctx)
114+
g.Expect(err).ToNot(HaveOccurred())
115+
}
116+
62117
func TestAPI_corev1_CRUD(t *testing.T) {
63118
g := NewWithT(t)
64119

0 commit comments

Comments
 (0)