Skip to content

Commit 6606010

Browse files
committed
use deleting machine method to clear failure domain
1 parent 23d447b commit 6606010

File tree

2 files changed

+134
-12
lines changed

2 files changed

+134
-12
lines changed

config/rbac/role.yaml

Lines changed: 32 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,3 @@
1-
21
---
32
apiVersion: rbac.authorization.k8s.io/v1
43
kind: ClusterRole
@@ -68,6 +67,14 @@ rules:
6867
- get
6968
- list
7069
- watch
70+
- apiGroups:
71+
- cluster.x-k8s.io
72+
resources:
73+
- machinesets/status
74+
verbs:
75+
- get
76+
- list
77+
- watch
7178
- apiGroups:
7279
- controlplane.cluster.x-k8s.io
7380
resources:
@@ -76,6 +83,30 @@ rules:
7683
- get
7784
- list
7885
- watch
86+
- apiGroups:
87+
- controlplane.cluster.x-k8s.io
88+
resources:
89+
- kubeadmcontrolplanes/status
90+
verbs:
91+
- get
92+
- list
93+
- watch
94+
- apiGroups:
95+
- etcdcluster.cluster.x-k8s.io
96+
resources:
97+
- etcdadmclusters
98+
verbs:
99+
- get
100+
- list
101+
- watch
102+
- apiGroups:
103+
- etcdcluster.cluster.x-k8s.io
104+
resources:
105+
- etcdadmclusters/status
106+
verbs:
107+
- get
108+
- list
109+
- watch
79110
- apiGroups:
80111
- infrastructure.cluster.x-k8s.io
81112
resources:

controllers/cloudstackfailuredomain_controller.go

Lines changed: 102 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -18,12 +18,14 @@ package controllers
1818

1919
import (
2020
"context"
21-
2221
"github.com/pkg/errors"
22+
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
23+
"k8s.io/apimachinery/pkg/runtime/schema"
2324
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
2425
ctrl "sigs.k8s.io/controller-runtime"
2526
"sigs.k8s.io/controller-runtime/pkg/client"
2627
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
28+
"sort"
2729

2830
infrav1 "sigs.k8s.io/cluster-api-provider-cloudstack/api/v1beta2"
2931
csCtrlrUtils "sigs.k8s.io/cluster-api-provider-cloudstack/controllers/utils"
@@ -38,13 +40,20 @@ type CloudStackFailureDomainReconciler struct {
3840
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=cloudstackfailuredomains,verbs=get;list;watch;create;update;patch;delete
3941
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=cloudstackfailuredomains/status,verbs=get;update;patch
4042
//+kubebuilder:rbac:groups=infrastructure.cluster.x-k8s.io,resources=cloudstackfailuredomains/finalizers,verbs=update
43+
//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets,verbs=get;list;watch
44+
//+kubebuilder:rbac:groups=etcdcluster.cluster.x-k8s.io,resources=etcdadmclusters,verbs=get;list;watch
45+
//+kubebuilder:rbac:groups=controlplane.cluster.x-k8s.io,resources=kubeadmcontrolplanes,verbs=get;list;watch
46+
//+kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinesets/status,verbs=get;list;watch
47+
//+kubebuilder:rbac:groups=etcdcluster.cluster.x-k8s.io,resources=etcdadmclusters/status,verbs=get;list;watch
48+
//+kubebuilder:rbac:groups=controlplane.cluster.x-k8s.io,resources=kubeadmcontrolplanes/status,verbs=get;list;watch
4149

4250
// CloudStackFailureDomainReconciliationRunner is a ReconciliationRunner with extensions specific to CloudStackFailureDomains.
4351
// The runner does the actual reconciliation.
4452
type CloudStackFailureDomainReconciliationRunner struct {
4553
*csCtrlrUtils.ReconciliationRunner
4654
ReconciliationSubject *infrav1.CloudStackFailureDomain
4755
IsoNet *infrav1.CloudStackIsolatedNetwork
56+
Machines []infrav1.CloudStackMachine
4857
}
4958

5059
// Initialize a new CloudStackFailureDomain reconciliation runner with concrete types and initialized member fields.
@@ -112,6 +121,8 @@ func (r *CloudStackFailureDomainReconciliationRunner) ReconcileDelete() (ctrl.Re
112121
r.Log.Info("Deleting CloudStackFailureDomain")
113122

114123
return r.RunReconciliationStages(
124+
r.GetAllMachinesInFailureDomain,
125+
r.AllMachinesCanBeCleared,
115126
r.ClearMachines,
116127
r.DeleteOwnedObjects(
117128
infrav1.GroupVersion.WithKind("CloudStackAffinityGroup"),
@@ -123,29 +134,109 @@ func (r *CloudStackFailureDomainReconciliationRunner) ReconcileDelete() (ctrl.Re
123134
)
124135
}
125136

126-
// ClearMachines checks for any machines in failure domain, deletes the CAPI machine for any still in FailureDomain,
127-
// and requeus until all CloudStack machines are cleared from the FailureDomain.
128-
func (r *CloudStackFailureDomainReconciliationRunner) ClearMachines() (ctrl.Result, error) {
137+
// GetAllMachinesInFailureDomain get all cloudstack machines deployed in this failure domain.
138+
// machines are sorted by name so that it can be processed one by one in a determined order.
139+
func (r *CloudStackFailureDomainReconciliationRunner) GetAllMachinesInFailureDomain() (ctrl.Result, error) {
129140
machines := &infrav1.CloudStackMachineList{}
130141
if err := r.K8sClient.List(r.RequestCtx, machines, client.MatchingLabels{infrav1.FailureDomainLabelName: r.ReconciliationSubject.Name}); err != nil {
131142
return ctrl.Result{}, err
132143
}
133-
// Deleted CAPI machines for CloudStack machines found.
134-
for _, machine := range machines.Items {
144+
items := machines.Items
145+
sort.Slice(items, func(i, j int) bool {
146+
return items[i].Name < items[j].Name
147+
})
148+
r.Machines = items
149+
return ctrl.Result{}, nil
150+
}
151+
152+
// AllMachinesCanBeCleared checks for each machine in failure domain, check if it is possible to delete it.
153+
// if machine is the only machine in worker node group, it cannot be deleted.
154+
// if machine is the only machine in control plane, it cannot be deleted.
155+
// if machine is the only machine in etcdadmcluster, it cannot be deleted.
156+
// if deletes the CAPI machine for any still in FailureDomain,
157+
// and requeus until all CloudStack machines are cleared from the FailureDomain.
158+
func (r *CloudStackFailureDomainReconciliationRunner) AllMachinesCanBeCleared() (ctrl.Result, error) {
159+
// check CAPI machines for CloudStack machines found.
160+
for _, machine := range r.Machines {
135161
for _, ref := range machine.OwnerReferences {
162+
if ref.Kind != "Machine" {
163+
owner := &unstructured.Unstructured{}
164+
owner.SetGroupVersionKind(schema.FromAPIVersionAndKind(ref.APIVersion, ref.Kind))
165+
if err := r.K8sClient.Get(r.RequestCtx, client.ObjectKey{Namespace: machine.Namespace, Name: ref.Name}, owner); err != nil {
166+
return ctrl.Result{}, err
167+
}
168+
specReplicas, statusReplicas, err := replicasLargerThanOne(owner, ref.Name, machine.Name)
169+
if err != nil {
170+
return ctrl.Result{}, err
171+
}
172+
if specReplicas != statusReplicas {
173+
return r.RequeueWithMessage("spec.replicas <> status.replicas, ", "machineOwner", "owner", ref.Name)
174+
}
175+
176+
statusReady, found, err := unstructured.NestedBool(owner.Object, "status", "ready")
177+
if found && err != nil {
178+
return ctrl.Result{}, err
179+
}
180+
if found && !statusReady {
181+
return r.RequeueWithMessage("status.ready not true, ", "owner", ref.Name)
182+
}
183+
184+
statusReadyReplicas, found, err := unstructured.NestedInt64(owner.Object, "status", "readyReplicas")
185+
if found && err != nil {
186+
return ctrl.Result{}, err
187+
}
188+
if found && statusReadyReplicas != statusReplicas {
189+
return r.RequeueWithMessage("status.replicas <> status.readyReplicas, ", "owner", ref.Name, "status.replicas", statusReplicas, "status.readyReplicas", statusReadyReplicas)
190+
}
191+
}
192+
}
193+
}
194+
return ctrl.Result{}, nil
195+
}
196+
197+
func replicasLargerThanOne(owner *unstructured.Unstructured, ownerName, machineName string) (int64, int64, error) {
198+
specReplicas, found, err := unstructured.NestedInt64(owner.Object, "spec", "replicas")
199+
if err != nil {
200+
return 0, 0, err
201+
}
202+
if !found {
203+
return 0, 0, errors.Errorf("spec.replicas not found in %s", ownerName)
204+
}
205+
206+
statusReplicas, found, err := unstructured.NestedInt64(owner.Object, "status", "replicas")
207+
if err != nil {
208+
return specReplicas, 0, err
209+
}
210+
if !found {
211+
return specReplicas, 0, errors.Errorf("status.replicas not found in %s", ownerName)
212+
}
213+
214+
if specReplicas < 2 {
215+
return specReplicas, 0, errors.Errorf("spec.replicas < 2 in %s, %s cannot be moved away from failure domain", ownerName, machineName)
216+
}
217+
218+
return specReplicas, statusReplicas, nil
219+
}
220+
221+
// ClearMachines deletes the CAPI machine in FailureDomain.
222+
func (r *CloudStackFailureDomainReconciliationRunner) ClearMachines() (ctrl.Result, error) {
223+
for _, csMachine := range r.Machines {
224+
for _, ref := range csMachine.OwnerReferences {
136225
if ref.Kind == "Machine" {
137226
machine := &clusterv1.Machine{}
138-
machine.Name = ref.Name
139-
machine.Namespace = r.ReconciliationSubject.Namespace
227+
if err := r.K8sClient.Get(r.RequestCtx, client.ObjectKey{Namespace: r.ReconciliationSubject.Namespace, Name: ref.Name}, machine); err != nil {
228+
return ctrl.Result{}, err
229+
}
230+
if !machine.DeletionTimestamp.IsZero() {
231+
return r.RequeueWithMessage("machine is being deleted, ", "machine", machine.Name)
232+
}
140233
if err := r.K8sClient.Delete(r.RequestCtx, machine); err != nil {
141234
return ctrl.Result{}, err
142235
}
236+
return r.RequeueWithMessage("start to delete machine, ", "machine", machine.Name)
143237
}
144238
}
145239
}
146-
if len(machines.Items) > 0 {
147-
return r.RequeueWithMessage("FailureDomain still has machine(s) in it.")
148-
}
149240
return ctrl.Result{}, nil
150241
}
151242

0 commit comments

Comments
 (0)