Skip to content

Commit 0195739

Browse files
authored
Merge pull request #6178 from fabriziopandini/CAPD-failure-domains
✨ CAPD and E2E framework tests failure domains
2 parents 37149b0 + 13545c2 commit 0195739

19 files changed

+214
-66
lines changed

docs/book/src/developer/providers/v1.1-to-v1.2.md

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -74,3 +74,10 @@ in ClusterAPI are kept in sync with the versions used by `sigs.k8s.io/controller
7474

7575
This change has been introduced in CAPI in the following PRs: [#6072](https://github.com/kubernetes-sigs/cluster-api/pull/6072), [#6190](https://github.com/kubernetes-sigs/cluster-api/pull/6190).</br>
7676
**Note**: This change is not mandatory for providers, but highly recommended.
77+
78+
- Following E2E framework functions are now checking that machines are created in the expected failure domain (if defined);
79+
all E2E tests can now verify failure domains too.
80+
- `ApplyClusterTemplateAndWait`
81+
- `WaitForControlPlaneAndMachinesReady`
82+
- `DiscoveryAndWaitForMachineDeployments`
83+
- The `AssertControlPlaneFailureDomains` function in the E2E test framework has been modified to allow proper failure domain testing.

test/e2e/cluster_upgrade.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ func ClusterUpgradeConformanceSpec(ctx context.Context, inputGetter func() Clust
112112
clusterResources = new(clusterctl.ApplyClusterTemplateAndWaitResult)
113113
})
114114

115-
It("Should create and upgrade a workload cluster and run kubetest", func() {
115+
It("Should create and upgrade a workload cluster and eventually run kubetest", func() {
116116
By("Creating a workload cluster")
117117

118118
clusterctl.ApplyClusterTemplateAndWait(ctx, clusterctl.ApplyClusterTemplateAndWaitInput{

test/e2e/data/infrastructure-docker/v1beta1/bases/cluster-with-kcp.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,24 @@ apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
44
kind: DockerCluster
55
metadata:
66
name: '${CLUSTER_NAME}'
7+
spec:
8+
failureDomains:
9+
fd1:
10+
controlPlane: true
11+
fd2:
12+
controlPlane: true
13+
fd3:
14+
controlPlane: true
15+
fd4:
16+
controlPlane: false
17+
fd5:
18+
controlPlane: false
19+
fd6:
20+
controlPlane: false
21+
fd7:
22+
controlPlane: false
23+
fd8:
24+
controlPlane: false
725
---
826
# Cluster object with
927
# - Reference to the KubeadmControlPlane object

test/e2e/data/infrastructure-docker/v1beta1/bases/cluster-with-topology.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ spec:
2323
- class: "default-worker"
2424
name: "md-0"
2525
replicas: ${WORKER_MACHINE_COUNT}
26+
failureDomain: fd4
2627
variables:
2728
# The imageRepository variable is defaulted by the Cluster webhook.
2829
#- name: imageRepository

test/e2e/data/infrastructure-docker/v1beta1/bases/md.yaml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,3 +52,4 @@ spec:
5252
name: "${CLUSTER_NAME}-md-0"
5353
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1
5454
kind: DockerMachineTemplate
55+
failureDomain: fd4

test/e2e/data/infrastructure-docker/v1beta1/bases/mp.yaml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,12 @@ spec:
2020
kind: DockerMachinePool
2121
name: "${CLUSTER_NAME}-dmp-0"
2222
version: "${KUBERNETES_VERSION}"
23+
failureDomains:
24+
- fd4
25+
- fd5
26+
- fd6
27+
- fd7
28+
- fd8
2329
---
2430
# DockerMachinePool using default values referenced by the MachinePool
2531
apiVersion: infrastructure.cluster.x-k8s.io/v1beta1

test/e2e/data/infrastructure-docker/v1beta1/clusterclass-quick-start.yaml

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,24 @@ metadata:
134134
name: quick-start-my-cluster
135135
spec:
136136
template:
137-
spec: {}
137+
spec:
138+
failureDomains:
139+
fd1:
140+
controlPlane: true
141+
fd2:
142+
controlPlane: true
143+
fd3:
144+
controlPlane: true
145+
fd4:
146+
controlPlane: false
147+
fd5:
148+
controlPlane: false
149+
fd6:
150+
controlPlane: false
151+
fd7:
152+
controlPlane: false
153+
fd8:
154+
controlPlane: false
138155
---
139156
kind: KubeadmControlPlaneTemplate
140157
apiVersion: controlplane.cluster.x-k8s.io/v1beta1

test/framework/controlplane_helpers.go

Lines changed: 28 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -18,13 +18,15 @@ package framework
1818

1919
import (
2020
"context"
21+
"fmt"
2122

2223
. "github.com/onsi/ginkgo"
2324
. "github.com/onsi/gomega"
2425
. "github.com/onsi/gomega/gstruct"
2526
"github.com/pkg/errors"
2627
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2728
"k8s.io/apimachinery/pkg/labels"
29+
"k8s.io/apimachinery/pkg/util/sets"
2830
"k8s.io/utils/pointer"
2931
"sigs.k8s.io/controller-runtime/pkg/client"
3032

@@ -175,45 +177,44 @@ func WaitForControlPlaneToBeReady(ctx context.Context, input WaitForControlPlane
175177

176178
// AssertControlPlaneFailureDomainsInput is the input for AssertControlPlaneFailureDomains.
177179
type AssertControlPlaneFailureDomainsInput struct {
178-
GetLister GetLister
179-
ClusterKey client.ObjectKey
180-
// ExpectedFailureDomains is required because this function cannot (easily) infer what success looks like.
181-
// In theory this field is not strictly necessary and could be replaced with enough clever logic/math.
182-
ExpectedFailureDomains map[string]int
180+
Lister Lister
181+
Cluster *clusterv1.Cluster
183182
}
184183

185184
// AssertControlPlaneFailureDomains will look at all control plane machines and see what failure domains they were
186185
// placed in. If machines were placed in unexpected or wrong failure domains the expectation will fail.
187186
func AssertControlPlaneFailureDomains(ctx context.Context, input AssertControlPlaneFailureDomainsInput) {
188-
failureDomainCounts := map[string]int{}
189-
190-
// Look up the cluster object to find all known failure domains.
191-
cluster := &clusterv1.Cluster{}
192-
Expect(input.GetLister.Get(ctx, input.ClusterKey, cluster)).To(Succeed())
193-
194-
for fd := range cluster.Status.FailureDomains {
195-
failureDomainCounts[fd] = 0
187+
Expect(ctx).NotTo(BeNil(), "ctx is required for AssertControlPlaneFailureDomains")
188+
Expect(input.Lister).ToNot(BeNil(), "Invalid argument. input.Lister can't be nil when calling AssertControlPlaneFailureDomains")
189+
Expect(input.Cluster).ToNot(BeNil(), "Invalid argument. input.Cluster can't be nil when calling AssertControlPlaneFailureDomains")
190+
191+
By("Checking all the the control plane machines are in the expected failure domains")
192+
controlPlaneFailureDomains := sets.NewString()
193+
for fd, fdSettings := range input.Cluster.Status.FailureDomains {
194+
if fdSettings.ControlPlane {
195+
controlPlaneFailureDomains.Insert(fd)
196+
}
196197
}
197198

198199
// Look up all the control plane machines.
199-
inClustersNamespaceListOption := client.InNamespace(input.ClusterKey.Namespace)
200+
inClustersNamespaceListOption := client.InNamespace(input.Cluster.Namespace)
200201
matchClusterListOption := client.MatchingLabels{
201-
clusterv1.ClusterLabelName: input.ClusterKey.Name,
202+
clusterv1.ClusterLabelName: input.Cluster.Name,
202203
clusterv1.MachineControlPlaneLabelName: "",
203204
}
204205

205206
machineList := &clusterv1.MachineList{}
206-
Expect(input.GetLister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption)).
207-
To(Succeed(), "Couldn't list machines for the cluster %q", input.ClusterKey.Name)
207+
Expect(input.Lister.List(ctx, machineList, inClustersNamespaceListOption, matchClusterListOption)).
208+
To(Succeed(), "Couldn't list control-plane machines for the cluster %q", input.Cluster.Name)
208209

209-
// Count all control plane machine failure domains.
210210
for _, machine := range machineList.Items {
211-
if machine.Spec.FailureDomain == nil {
212-
continue
211+
if machine.Spec.FailureDomain != nil {
212+
machineFD := *machine.Spec.FailureDomain
213+
if !controlPlaneFailureDomains.Has(machineFD) {
214+
Fail(fmt.Sprintf("Machine %s is in the %q failure domain, expecting one of the failure domain defined at cluster level", machine.Name, machineFD))
215+
}
213216
}
214-
failureDomainCounts[*machine.Spec.FailureDomain]++
215217
}
216-
Expect(failureDomainCounts).To(Equal(input.ExpectedFailureDomains))
217218
}
218219

219220
// DiscoveryAndWaitForControlPlaneInitializedInput is the input type for DiscoveryAndWaitForControlPlaneInitialized.
@@ -277,6 +278,11 @@ func WaitForControlPlaneAndMachinesReady(ctx context.Context, input WaitForContr
277278
ControlPlane: input.ControlPlane,
278279
}
279280
WaitForControlPlaneToBeReady(ctx, waitForControlPlaneToBeReadyInput, intervals...)
281+
282+
AssertControlPlaneFailureDomains(ctx, AssertControlPlaneFailureDomainsInput{
283+
Lister: input.GetLister,
284+
Cluster: input.Cluster,
285+
})
280286
}
281287

282288
// UpgradeControlPlaneAndWaitForUpgradeInput is the input type for UpgradeControlPlaneAndWaitForUpgrade.

test/framework/machinedeployment_helpers.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,48 @@ func WaitForMachineDeploymentNodesToExist(ctx context.Context, input WaitForMach
121121
}, intervals...).Should(Equal(int(*input.MachineDeployment.Spec.Replicas)))
122122
}
123123

124+
// AssertMachineDeploymentFailureDomainsInput is the input for AssertMachineDeploymentFailureDomains.
125+
type AssertMachineDeploymentFailureDomainsInput struct {
126+
Lister Lister
127+
Cluster *clusterv1.Cluster
128+
MachineDeployment *clusterv1.MachineDeployment
129+
}
130+
131+
// AssertMachineDeploymentFailureDomains will look at all MachineDeployment machines and see what failure domains they were
132+
// placed in. If machines were placed in unexpected or wrong failure domains the expectation will fail.
133+
func AssertMachineDeploymentFailureDomains(ctx context.Context, input AssertMachineDeploymentFailureDomainsInput) {
134+
Expect(ctx).NotTo(BeNil(), "ctx is required for AssertMachineDeploymentFailureDomains")
135+
Expect(input.Lister).ToNot(BeNil(), "Invalid argument. input.Lister can't be nil when calling AssertMachineDeploymentFailureDomains")
136+
Expect(input.MachineDeployment).ToNot(BeNil(), "Invalid argument. input.MachineDeployment can't be nil when calling AssertMachineDeploymentFailureDomains")
137+
138+
machineDeploymentFD := pointer.StringDeref(input.MachineDeployment.Spec.Template.Spec.FailureDomain, "<None>")
139+
140+
By(fmt.Sprintf("Checking all the machines controlled by %s are in the %q failure domain", input.MachineDeployment.Name, machineDeploymentFD))
141+
selectorMap, err := metav1.LabelSelectorAsMap(&input.MachineDeployment.Spec.Selector)
142+
Expect(err).NotTo(HaveOccurred())
143+
144+
ms := &clusterv1.MachineSetList{}
145+
err = input.Lister.List(ctx, ms, client.InNamespace(input.Cluster.Namespace), client.MatchingLabels(selectorMap))
146+
Expect(err).NotTo(HaveOccurred())
147+
148+
for _, machineSet := range ms.Items {
149+
machineSetFD := pointer.StringDeref(machineSet.Spec.Template.Spec.FailureDomain, "<None>")
150+
Expect(machineSetFD).To(Equal(machineDeploymentFD), "MachineSet %s is in the %q failure domain, expecting %q", machineSet.Name, machineSetFD, machineDeploymentFD)
151+
152+
selectorMap, err = metav1.LabelSelectorAsMap(&machineSet.Spec.Selector)
153+
Expect(err).NotTo(HaveOccurred())
154+
155+
machines := &clusterv1.MachineList{}
156+
err = input.Lister.List(ctx, machines, client.InNamespace(machineSet.Namespace), client.MatchingLabels(selectorMap))
157+
Expect(err).NotTo(HaveOccurred())
158+
159+
for _, machine := range machines.Items {
160+
machineFD := pointer.StringDeref(machine.Spec.FailureDomain, "<None>")
161+
Expect(machineFD).To(Equal(machineDeploymentFD), "Machine %s is in the %q failure domain, expecting %q", machine.Name, machineFD, machineDeploymentFD)
162+
}
163+
}
164+
}
165+
124166
// DiscoveryAndWaitForMachineDeploymentsInput is the input type for DiscoveryAndWaitForMachineDeployments.
125167
type DiscoveryAndWaitForMachineDeploymentsInput struct {
126168
Lister Lister
@@ -144,6 +186,12 @@ func DiscoveryAndWaitForMachineDeployments(ctx context.Context, input DiscoveryA
144186
Cluster: input.Cluster,
145187
MachineDeployment: deployment,
146188
}, intervals...)
189+
190+
AssertMachineDeploymentFailureDomains(ctx, AssertMachineDeploymentFailureDomainsInput{
191+
Lister: input.Lister,
192+
Cluster: input.Cluster,
193+
MachineDeployment: deployment,
194+
})
147195
}
148196
return machineDeployments
149197
}

test/framework/machinepool_helpers.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,9 @@ func DiscoveryAndWaitForMachinePools(ctx context.Context, input DiscoveryAndWait
107107
Getter: input.Getter,
108108
MachinePool: machinepool,
109109
}, intervals...)
110+
111+
// TODO: check for failure domains; currently MP doesn't provide a way to check where Machine are placed
112+
// (checking infrastructure is the only alternative, but this makes test not portable)
110113
}
111114
return machinePools
112115
}

0 commit comments

Comments
 (0)