Skip to content

Commit 4cd8938

Browse files
authored
Merge pull request #180 from wanyufe/main
Update failuredomain regexp and prevent race condition while removing failure domain
2 parents 6141437 + dff64cc commit 4cd8938

File tree

4 files changed

+105
-50
lines changed

4 files changed

+105
-50
lines changed

controllers/cloudstackfailuredomain_controller.go

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,11 @@ import (
3131
csCtrlrUtils "sigs.k8s.io/cluster-api-provider-cloudstack/controllers/utils"
3232
)
3333

34+
const (
35+
conditionTypeReady = "Ready"
36+
conditionStatusFalse = "False"
37+
)
38+
3439
// CloudStackFailureDomainReconciler is the k8s controller manager's interface to reconcile a CloudStackFailureDomain.
3540
// This is primarily to adapt to k8s.
3641
type CloudStackFailureDomainReconciler struct {
@@ -122,6 +127,7 @@ func (r *CloudStackFailureDomainReconciliationRunner) ReconcileDelete() (ctrl.Re
122127

123128
return r.RunReconciliationStages(
124129
r.GetAllMachinesInFailureDomain,
130+
r.RequeueIfClusterNotReady,
125131
r.RequeueIfMachineCannotBeRemoved,
126132
r.ClearMachines,
127133
r.DeleteOwnedObjects(
@@ -148,6 +154,21 @@ func (r *CloudStackFailureDomainReconciliationRunner) GetAllMachinesInFailureDom
148154
return ctrl.Result{}, nil
149155
}
150156

157+
// RequeueIfClusterNotReady check cluster to see if there is any rolling update going on.
158+
func (r *CloudStackFailureDomainReconciliationRunner) RequeueIfClusterNotReady() (ctrl.Result, error) {
159+
if len(r.Machines) > 0 {
160+
if !r.CAPICluster.DeletionTimestamp.IsZero() {
161+
return ctrl.Result{}, nil
162+
}
163+
for _, condition := range r.CAPICluster.Status.Conditions {
164+
if condition.Type == conditionTypeReady && condition.Status == conditionStatusFalse {
165+
return r.RequeueWithMessage("cluster status not ready,")
166+
}
167+
}
168+
}
169+
return ctrl.Result{}, nil
170+
}
171+
151172
// RequeueIfMachineCannotBeRemoved checks for each machine to confirm it is not risky to remove it.
152173
func (r *CloudStackFailureDomainReconciliationRunner) RequeueIfMachineCannotBeRemoved() (ctrl.Result, error) {
153174
// check CAPI machines for CloudStack machines found.

controllers/cloudstackfailuredomain_controller_test.go

Lines changed: 60 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -21,10 +21,12 @@ import (
2121
. "github.com/onsi/ginkgo/v2"
2222
. "github.com/onsi/gomega"
2323
"k8s.io/apimachinery/pkg/api/errors"
24+
"k8s.io/utils/pointer"
2425
infrav1 "sigs.k8s.io/cluster-api-provider-cloudstack/api/v1beta2"
2526
"sigs.k8s.io/cluster-api-provider-cloudstack/pkg/cloud"
2627
dummies "sigs.k8s.io/cluster-api-provider-cloudstack/test/dummies/v1beta2"
2728
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
29+
"sigs.k8s.io/cluster-api/util/patch"
2830
"sigs.k8s.io/controller-runtime/pkg/client"
2931
)
3032

@@ -47,84 +49,93 @@ var _ = Describe("CloudStackFailureDomainReconciler", func() {
4749
arg1.(*infrav1.CloudStackZoneSpec).Network.ID = "SomeID"
4850
arg1.(*infrav1.CloudStackZoneSpec).Network.Type = cloud.NetworkTypeShared
4951
}).MinTimes(1)
50-
5152
})
5253

53-
It("Should set failure domain Status.Ready to true.", func() {
54-
assertFailureDomainCreated()
55-
})
5654
It("Should delete failure domain if no VM under this failure domain.", func() {
57-
assertFailureDomainCreated()
55+
Eventually(func() bool {
56+
return getFailuredomainStatus(dummies.CSFailureDomain1)
57+
}, timeout).WithPolling(pollInterval).Should(BeTrue())
58+
5859
Ω(k8sClient.Delete(ctx, dummies.CSFailureDomain1))
5960

60-
assertFailureDomainNotExisted()
61+
tempfd := &infrav1.CloudStackFailureDomain{}
62+
Eventually(func() bool {
63+
key := client.ObjectKeyFromObject(dummies.CSFailureDomain1)
64+
if err := k8sClient.Get(ctx, key, tempfd); err != nil {
65+
return errors.IsNotFound(err)
66+
}
67+
return false
68+
}, timeout).WithPolling(pollInterval).Should(BeTrue())
6169
})
70+
6271
DescribeTable("Should function in different replicas conditions",
63-
func(shouldDeleteVM bool, specReplicas, statusReplicas, statusReadyReplicas *int32, statusReady *bool) {
64-
assertFailureDomainCreated()
72+
func(shouldDeleteVM bool, specReplicas, statusReplicas, statusReadyReplicas *int32, statusReady *bool, controlPlaneReady bool) {
73+
Eventually(func() bool {
74+
return getFailuredomainStatus(dummies.CSFailureDomain1)
75+
}, timeout).WithPolling(pollInterval).Should(BeTrue())
76+
6577
setCSMachineOwnerCRD(dummies.CSMachineOwner, specReplicas, statusReplicas, statusReadyReplicas, statusReady)
6678
setCAPIMachineAndCSMachineCRDs(dummies.CSMachine1, dummies.CAPIMachine)
6779
setMachineOwnerReference(dummies.CSMachine1, dummies.CSMachineOwnerReference)
6880
labelMachineFailuredomain(dummies.CSMachine1, dummies.CSFailureDomain1)
6981

82+
if !controlPlaneReady {
83+
Eventually(func() error {
84+
ph, err := patch.NewHelper(dummies.CAPICluster, k8sClient)
85+
Ω(err).ShouldNot(HaveOccurred())
86+
dummies.CAPICluster.Status.Conditions = []clusterv1.Condition{
87+
{
88+
Type: "Ready",
89+
Status: "False",
90+
},
91+
}
92+
return ph.Patch(ctx, dummies.CAPICluster, patch.WithStatusObservedGeneration{})
93+
}, timeout).Should(Succeed())
94+
}
95+
7096
Ω(k8sClient.Delete(ctx, dummies.CSFailureDomain1))
7197

7298
CAPIMachine := &clusterv1.Machine{}
73-
Eventually(func() bool {
74-
key := client.ObjectKey{Namespace: dummies.ClusterNameSpace, Name: dummies.CAPIMachine.Name}
75-
if shouldDeleteVM {
99+
if shouldDeleteVM {
100+
Eventually(func() bool {
101+
key := client.ObjectKey{Namespace: dummies.ClusterNameSpace, Name: dummies.CAPIMachine.Name}
76102
if err := k8sClient.Get(ctx, key, CAPIMachine); err != nil {
77103
return errors.IsNotFound(err)
78104
}
79-
} else {
105+
return false
106+
}, timeout).WithPolling(pollInterval).Should(BeTrue())
107+
} else {
108+
Consistently(func() bool {
109+
key := client.ObjectKey{Namespace: dummies.ClusterNameSpace, Name: dummies.CAPIMachine.Name}
80110
if err := k8sClient.Get(ctx, key, CAPIMachine); err == nil {
81111
return CAPIMachine.DeletionTimestamp.IsZero()
82112
}
83-
}
84-
return false
85-
}, timeout).WithPolling(pollInterval).Should(BeTrue())
113+
return false
114+
}, timeout).WithPolling(pollInterval).Should(BeTrue())
115+
}
116+
86117
},
87118
// should delete - simulate owner is kubeadmcontrolplane
88-
Entry("Should delete machine if spec.replicas > 1", true, int32Pointer(2), int32Pointer(2), int32Pointer(2), boolPointer(true)),
119+
Entry("Should delete machine if spec.replicas > 1", true, pointer.Int32(2), pointer.Int32(2), pointer.Int32(2), pointer.Bool(true), true),
89120
// should delete - simulate owner is etcdadmcluster
90-
Entry("Should delete machine if status.readyReplica does not exist", true, int32Pointer(2), int32Pointer(2), nil, boolPointer(true)),
121+
Entry("Should delete machine if status.readyReplica does not exist", true, pointer.Int32(2), pointer.Int32(2), nil, pointer.Bool(true), true),
91122
// should delete - simulate owner is machineset
92-
Entry("Should delete machine if status.ready does not exist", true, int32Pointer(2), int32Pointer(2), int32Pointer(2), nil),
123+
Entry("Should delete machine if status.ready does not exist", true, pointer.Int32(2), pointer.Int32(2), pointer.Int32(2), nil, true),
93124
// should not delete if condition not met
94-
Entry("Should return error if status.replicas < spec.replicas", false, int32Pointer(2), int32Pointer(1), int32Pointer(1), boolPointer(true)),
95-
Entry("Should return error if spec.replicas < 2", false, int32Pointer(1), int32Pointer(1), int32Pointer(1), boolPointer(true)),
96-
Entry("Should return error if status.ready is false", false, int32Pointer(2), int32Pointer(2), int32Pointer(2), boolPointer(false)),
97-
Entry("Should return error if status.readyReplicas <> status.replicas", false, int32Pointer(2), int32Pointer(2), int32Pointer(1), boolPointer(true)),
125+
Entry("Should not delete machine if cluster control plane not ready", false, pointer.Int32(2), pointer.Int32(2), pointer.Int32(2), pointer.Bool(true), false),
126+
Entry("Should not delete machine if status.replicas < spec.replicas", false, pointer.Int32(2), pointer.Int32(1), pointer.Int32(1), pointer.Bool(true), true),
127+
Entry("Should not delete machine if spec.replicas < 2", false, pointer.Int32(1), pointer.Int32(1), pointer.Int32(1), pointer.Bool(true), true),
128+
Entry("Should not delete machine if status.ready is false", false, pointer.Int32(2), pointer.Int32(2), pointer.Int32(2), pointer.Bool(false), true),
129+
Entry("Should not delete machine if status.readyReplicas <> status.replicas", false, pointer.Int32(2), pointer.Int32(2), pointer.Int32(1), pointer.Bool(true), true),
98130
)
99131
})
100132
})
101133

102-
func assertFailureDomainCreated() {
103-
tempfd := &infrav1.CloudStackFailureDomain{}
104-
Eventually(func() bool {
105-
key := client.ObjectKeyFromObject(dummies.CSFailureDomain1)
106-
if err := k8sClient.Get(ctx, key, tempfd); err == nil {
107-
return tempfd.Status.Ready
108-
}
109-
return false
110-
}, timeout).WithPolling(pollInterval).Should(BeTrue())
111-
}
112-
113-
func assertFailureDomainNotExisted() {
134+
func getFailuredomainStatus(failureDomain *infrav1.CloudStackFailureDomain) bool {
114135
tempfd := &infrav1.CloudStackFailureDomain{}
115-
Eventually(func() bool {
116-
key := client.ObjectKeyFromObject(dummies.CSFailureDomain1)
117-
if err := k8sClient.Get(ctx, key, tempfd); err != nil {
118-
return true
119-
}
120-
return false
121-
}, timeout).WithPolling(pollInterval).Should(BeTrue())
122-
}
123-
124-
func boolPointer(b bool) *bool {
125-
return &b
126-
}
127-
128-
func int32Pointer(b int32) *int32 {
129-
return &b
136+
key := client.ObjectKeyFromObject(failureDomain)
137+
if err := k8sClient.Get(ctx, key, tempfd); err == nil {
138+
return tempfd.Status.Ready
139+
}
140+
return false
130141
}

controllers/cloudstackmachine_controller.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -99,6 +99,7 @@ func (reconciler *CloudStackMachineReconciler) Reconcile(ctx context.Context, re
9999

100100
func (r *CloudStackMachineReconciliationRunner) Reconcile() (retRes ctrl.Result, reterr error) {
101101
return r.RunReconciliationStages(
102+
r.DeleteMachineIfFailuredomainNotExist,
102103
r.GetObjectByName("placeholder", r.IsoNet,
103104
func() string { return r.IsoNetMetaName(r.FailureDomain.Spec.Zone.Network.Name) }),
104105
r.RunIf(func() bool { return r.FailureDomain.Spec.Zone.Network.Type == cloud.NetworkTypeIsolated },
@@ -156,6 +157,29 @@ func (r *CloudStackMachineReconciliationRunner) SetFailureDomainOnCSMachine() (r
156157
return ctrl.Result{}, nil
157158
}
158159

160+
// DeleteMachineIfFailuredomainNotExist delete CAPI machine if machine is deployed in a failuredomain that does not exist anymore.
161+
func (r *CloudStackMachineReconciliationRunner) DeleteMachineIfFailuredomainNotExist() (retRes ctrl.Result, reterr error) {
162+
if r.CAPIMachine.Spec.FailureDomain == nil {
163+
return ctrl.Result{}, nil
164+
}
165+
capiAssignedFailuredomainName := *r.CAPIMachine.Spec.FailureDomain
166+
exist := false
167+
for _, fd := range r.CSCluster.Spec.FailureDomains {
168+
if capiAssignedFailuredomainName == fd.Name {
169+
exist = true
170+
break
171+
}
172+
}
173+
if !exist {
174+
r.Log.Info("CAPI Machine in non-existent failuredomain. Deleting associated Machine.", "csMachine", r.ReconciliationSubject.GetName(), "failuredomain", capiAssignedFailuredomainName)
175+
if err := r.K8sClient.Delete(r.RequestCtx, r.CAPIMachine); err != nil {
176+
return ctrl.Result{}, err
177+
}
178+
}
179+
180+
return ctrl.Result{}, nil
181+
}
182+
159183
// GetOrCreateVMInstance gets or creates a VM instance.
160184
// Implicitly it also fetches its bootstrap secret in order to create said instance.
161185
func (r *CloudStackMachineReconciliationRunner) GetOrCreateVMInstance() (retRes ctrl.Result, reterr error) {

go.mod

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,6 @@ require (
1717
golang.org/x/text v0.3.7
1818
gopkg.in/yaml.v3 v3.0.0-20210107192922-496545a6307b
1919
k8s.io/api v0.23.0
20-
k8s.io/apiextensions-apiserver v0.23.0
2120
k8s.io/apimachinery v0.23.0
2221
k8s.io/client-go v0.23.0
2322
k8s.io/klog/v2 v2.30.0

0 commit comments

Comments
 (0)