Skip to content

Commit 063ce23

Browse files
Add failure domain support to CAPD
1 parent eecabe4 commit 063ce23

File tree

9 files changed

+83
-42
lines changed

9 files changed

+83
-42
lines changed

test/infrastructure/docker/api/v1beta1/dockercluster_types.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ type DockerClusterSpec struct {
3737
// +optional
3838
ControlPlaneEndpoint APIEndpoint `json:"controlPlaneEndpoint"`
3939

40-
// FailureDomains are not usulaly defined on the spec.
40+
// FailureDomains are usually not defined in the spec.
4141
// The docker provider is special since failure domains don't mean anything in a local docker environment.
4242
// Instead, the docker cluster controller will simply copy these into the Status and allow the Cluster API
4343
// controllers to do what they will with the defined failure domains.

test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclusters.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -371,7 +371,7 @@ spec:
371371
is suitable for use by control plane machines.
372372
type: boolean
373373
type: object
374-
description: FailureDomains are not usulaly defined on the spec. The
374+
description: FailureDomains are usually not defined in the spec. The
375375
docker provider is special since failure domains don't mean anything
376376
in a local docker environment. Instead, the docker cluster controller
377377
will simply copy these into the Status and allow the Cluster API

test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockerclustertemplates.yaml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -203,7 +203,7 @@ spec:
203203
domain is suitable for use by control plane machines.
204204
type: boolean
205205
type: object
206-
description: FailureDomains are not usulaly defined on the
206+
description: FailureDomains are usually not defined in the
207207
spec. The docker provider is special since failure domains
208208
don't mean anything in a local docker environment. Instead,
209209
the docker cluster controller will simply copy these into

test/infrastructure/docker/exp/internal/controllers/dockermachinepool_controller.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ package controllers
2020
import (
2121
"context"
2222
"fmt"
23+
"time"
2324

2425
"github.com/pkg/errors"
2526
apierrors "k8s.io/apimachinery/pkg/api/errors"
@@ -185,7 +186,8 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust
185186
}
186187

187188
// Reconcile machines and updates Status.Instances
188-
if res, err := pool.ReconcileMachines(ctx); err != nil || !res.IsZero() {
189+
res, err := pool.ReconcileMachines(ctx)
190+
if err != nil {
189191
return res, err
190192
}
191193

@@ -207,7 +209,12 @@ func (r *DockerMachinePoolReconciler) reconcileNormal(ctx context.Context, clust
207209
}
208210

209211
dockerMachinePool.Status.Ready = len(dockerMachinePool.Spec.ProviderIDList) == int(*machinePool.Spec.Replicas)
210-
return ctrl.Result{}, nil
212+
213+
// if some machine is still provisioning, force reconcile in few seconds to check again infrastructure.
214+
if !dockerMachinePool.Status.Ready && res.IsZero() {
215+
return ctrl.Result{RequeueAfter: 10 * time.Second}, nil
216+
}
217+
return res, nil
211218
}
212219

213220
func getDockerMachinePoolProviderID(clusterName, dockerMachinePoolName string) string {

test/infrastructure/docker/exp/internal/docker/nodepool.go

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,8 @@ import (
2121
"context"
2222
"encoding/base64"
2323
"fmt"
24+
"math/rand"
25+
"strings"
2426
"time"
2527

2628
"github.com/pkg/errors"
@@ -43,7 +45,7 @@ const (
4345
)
4446

4547
// NodePool is a wrapper around a collection of like machines which are owned by a DockerMachinePool. A node pool
46-
// provides a friendly way of managing (adding, deleting, reimaging) a set of docker machines. The node pool will also
48+
// provides a friendly way of managing (adding, deleting, updating) a set of docker machines. The node pool will also
4749
// sync the docker machine pool status Instances field with the state of the docker machines.
4850
type NodePool struct {
4951
client client.Client
@@ -86,7 +88,7 @@ func (np *NodePool) ReconcileMachines(ctx context.Context) (ctrl.Result, error)
8688
for _, machine := range np.machines {
8789
totalNumberOfMachines++
8890
if totalNumberOfMachines > desiredReplicas || !np.isMachineMatchingInfrastructureSpec(machine) {
89-
externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.dockerMachinePool.Spec.Template.CustomImage, np.labelFilters)
91+
externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.labelFilters)
9092
if err != nil {
9193
return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", machine.Name())
9294
}
@@ -151,7 +153,7 @@ func (np *NodePool) ReconcileMachines(ctx context.Context) (ctrl.Result, error)
151153
// Delete will delete all of the machines in the node pool.
152154
func (np *NodePool) Delete(ctx context.Context) error {
153155
for _, machine := range np.machines {
154-
externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.dockerMachinePool.Spec.Template.CustomImage, np.labelFilters)
156+
externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.labelFilters)
155157
if err != nil {
156158
return errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", machine.Name())
157159
}
@@ -165,7 +167,15 @@ func (np *NodePool) Delete(ctx context.Context) error {
165167
}
166168

167169
func (np *NodePool) isMachineMatchingInfrastructureSpec(machine *docker.Machine) bool {
168-
return machine.ImageVersion() == container.SemverToOCIImageTag(*np.machinePool.Spec.Template.Spec.Version)
170+
return imageVersion(machine) == container.SemverToOCIImageTag(*np.machinePool.Spec.Template.Spec.Version)
171+
}
172+
173+
// ImageVersion returns the version of the image used or nil if not specified
174+
// NOTE: Image version might be different from the Kubernetes version, because some characters
175+
// allowed by semver (e.g. +) can't be used for image tags, so they are replaced with "_".
176+
func imageVersion(m *docker.Machine) string {
177+
containerImage := m.ContainerImage()
178+
return containerImage[strings.LastIndex(containerImage, ":")+1:]
169179
}
170180

171181
// machinesMatchingInfrastructureSpec returns all of the docker.Machines which match the machine pool / docker machine pool spec.
@@ -183,12 +193,28 @@ func (np *NodePool) machinesMatchingInfrastructureSpec() []*docker.Machine {
183193
// addMachine will add a new machine to the node pool and update the docker machine pool status.
184194
func (np *NodePool) addMachine(ctx context.Context) error {
185195
instanceName := fmt.Sprintf("worker-%s", util.RandomString(6))
186-
externalMachine, err := docker.NewMachine(ctx, np.cluster, instanceName, np.dockerMachinePool.Spec.Template.CustomImage, np.labelFilters)
196+
externalMachine, err := docker.NewMachine(ctx, np.cluster, instanceName, np.labelFilters)
187197
if err != nil {
188198
return errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", instanceName)
189199
}
190200

191-
if err := externalMachine.Create(ctx, constants.WorkerNodeRoleValue, np.machinePool.Spec.Template.Spec.Version, np.dockerMachinePool.Spec.Template.ExtraMounts); err != nil {
201+
// NOTE: FailureDomains don't mean much in CAPD since it's all local, but we are setting a label on
202+
// each container, so we can check placement.
203+
labels := map[string]string{}
204+
for k, v := range np.labelFilters {
205+
labels[k] = v
206+
}
207+
208+
if len(np.machinePool.Spec.FailureDomains) > 0 {
209+
// For MachinePools placement is expected to be managed by the underlying infrastructure primitive, but
210+
// given that there is no such an thing in CAPD, we are picking a random failure domain.
211+
randomIndex := rand.Intn(len(np.machinePool.Spec.FailureDomains)) //nolint:gosec
212+
for k, v := range docker.FailureDomainLabel(&np.machinePool.Spec.FailureDomains[randomIndex]) {
213+
labels[k] = v
214+
}
215+
}
216+
217+
if err := externalMachine.Create(ctx, np.dockerMachinePool.Spec.Template.CustomImage, constants.WorkerNodeRoleValue, np.machinePool.Spec.Template.Spec.Version, labels, np.dockerMachinePool.Spec.Template.ExtraMounts); err != nil {
192218
return errors.Wrapf(err, "failed to create docker machine with instance name %s", instanceName)
193219
}
194220
return nil
@@ -244,7 +270,7 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin
244270
}
245271
}()
246272

247-
externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.dockerMachinePool.Spec.Template.CustomImage, np.labelFilters)
273+
externalMachine, err := docker.NewMachine(ctx, np.cluster, machine.Name(), np.labelFilters)
248274
if err != nil {
249275
return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine named %s", machine.Name())
250276
}

test/infrastructure/docker/internal/controllers/dockermachine_controller.go

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,7 @@ func (r *DockerMachineReconciler) Reconcile(ctx context.Context, req ctrl.Reques
143143
}
144144

145145
// Create a helper for managing the docker container hosting the machine.
146-
externalMachine, err := docker.NewMachine(ctx, cluster, machine.Name, dockerMachine.Spec.CustomImage, nil)
146+
externalMachine, err := docker.NewMachine(ctx, cluster, machine.Name, nil)
147147
if err != nil {
148148
return ctrl.Result{}, errors.Wrapf(err, "failed to create helper for managing the externalMachine")
149149
}
@@ -231,7 +231,9 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster *
231231

232232
// Create the machine if not existing yet
233233
if !externalMachine.Exists() {
234-
if err := externalMachine.Create(ctx, role, machine.Spec.Version, dockerMachine.Spec.ExtraMounts); err != nil {
234+
// NOTE: FailureDomains don't mean much in CAPD since it's all local, but we are setting a label on
235+
// each container, so we can check placement.
236+
if err := externalMachine.Create(ctx, dockerMachine.Spec.CustomImage, role, machine.Spec.Version, docker.FailureDomainLabel(machine.Spec.FailureDomain), dockerMachine.Spec.ExtraMounts); err != nil {
235237
return ctrl.Result{}, errors.Wrap(err, "failed to create worker DockerMachine")
236238
}
237239
}

test/infrastructure/docker/internal/docker/kind_manager.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ func (m *Manager) CreateControlPlaneNode(ctx context.Context, name, image, clust
7979
Role: constants.ControlPlaneNodeRoleValue,
8080
PortMappings: portMappingsWithAPIServer,
8181
Mounts: mounts,
82+
Labels: labels,
8283
IPFamily: ipFamily,
8384
}
8485
node, err := createNode(ctx, createOpts)

test/infrastructure/docker/internal/docker/machine.go

Lines changed: 16 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -58,16 +58,14 @@ type nodeCreator interface {
5858
type Machine struct {
5959
cluster string
6060
machine string
61-
image string
6261
ipFamily clusterv1.ClusterIPFamily
63-
labels map[string]string
6462
container *types.Node
6563

6664
nodeCreator nodeCreator
6765
}
6866

6967
// NewMachine returns a new Machine service for the given Cluster/DockerCluster pair.
70-
func NewMachine(ctx context.Context, cluster *clusterv1.Cluster, machine, image string, labels map[string]string) (*Machine, error) {
68+
func NewMachine(ctx context.Context, cluster *clusterv1.Cluster, machine string, filterLabels map[string]string) (*Machine, error) {
7169
if cluster == nil {
7270
return nil, errors.New("cluster is required when creating a docker.Machine")
7371
}
@@ -81,7 +79,7 @@ func NewMachine(ctx context.Context, cluster *clusterv1.Cluster, machine, image
8179
filters := container.FilterBuilder{}
8280
filters.AddKeyNameValue(filterLabel, clusterLabelKey, cluster.Name)
8381
filters.AddKeyValue(filterName, fmt.Sprintf("^%s$", machineContainerName(cluster.Name, machine)))
84-
for key, val := range labels {
82+
for key, val := range filterLabels {
8583
filters.AddKeyNameValue(filterLabel, key, val)
8684
}
8785

@@ -98,10 +96,8 @@ func NewMachine(ctx context.Context, cluster *clusterv1.Cluster, machine, image
9896
return &Machine{
9997
cluster: cluster.Name,
10098
machine: machine,
101-
image: image,
10299
ipFamily: ipFamily,
103100
container: newContainer,
104-
labels: labels,
105101
nodeCreator: &Manager{},
106102
}, nil
107103
}
@@ -136,9 +132,7 @@ func ListMachinesByCluster(ctx context.Context, cluster *clusterv1.Cluster, labe
136132
machines[i] = &Machine{
137133
cluster: cluster.Name,
138134
machine: machineFromContainerName(cluster.Name, containerNode.Name),
139-
image: containerNode.Image,
140135
ipFamily: ipFamily,
141-
labels: labels,
142136
container: containerNode,
143137
nodeCreator: &Manager{},
144138
}
@@ -155,17 +149,6 @@ func (m *Machine) IsControlPlane() bool {
155149
return m.container.ClusterRole == constants.ControlPlaneNodeRoleValue
156150
}
157151

158-
// ImageVersion returns the version of the image used or nil if not specified
159-
// NOTE: Image version might be different from the Kubernetes version, because some characters
160-
// allowed by semver (e.g. +) can't be used for image tags, so they are replaced with "_".
161-
func (m *Machine) ImageVersion() string {
162-
if m.image == "" {
163-
return defaultImageTag
164-
}
165-
166-
return m.image[strings.LastIndex(m.image, ":")+1 : len(m.image)]
167-
}
168-
169152
// Exists returns true if the container for this machine exists.
170153
func (m *Machine) Exists() bool {
171154
return m.container != nil
@@ -200,17 +183,26 @@ func (m *Machine) Address(ctx context.Context) (string, error) {
200183
return ipv4, nil
201184
}
202185

186+
// ContainerImage return the image of the container for this machine
187+
// or empty string if the container does not exist yet.
188+
func (m *Machine) ContainerImage() string {
189+
if m.container == nil {
190+
return ""
191+
}
192+
return m.container.Image
193+
}
194+
203195
// Create creates a docker container hosting a Kubernetes node.
204-
func (m *Machine) Create(ctx context.Context, role string, version *string, mounts []infrav1.Mount) error {
196+
func (m *Machine) Create(ctx context.Context, image string, role string, version *string, labels map[string]string, mounts []infrav1.Mount) error {
205197
log := ctrl.LoggerFrom(ctx)
206198

207199
// Create if not exists.
208200
if m.container == nil {
209201
var err error
210202

211203
machineImage := m.machineImage(version)
212-
if m.image != "" {
213-
machineImage = m.image
204+
if image != "" {
205+
machineImage = image
214206
}
215207

216208
switch role {
@@ -225,7 +217,7 @@ func (m *Machine) Create(ctx context.Context, role string, version *string, moun
225217
0,
226218
kindMounts(mounts),
227219
nil,
228-
m.labels,
220+
labels,
229221
m.ipFamily,
230222
)
231223
if err != nil {
@@ -240,7 +232,7 @@ func (m *Machine) Create(ctx context.Context, role string, version *string, moun
240232
m.cluster,
241233
kindMounts(mounts),
242234
nil,
243-
m.labels,
235+
labels,
244236
m.ipFamily,
245237
)
246238
if err != nil {

test/infrastructure/docker/internal/docker/util.go

Lines changed: 17 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -27,10 +27,22 @@ import (
2727
"sigs.k8s.io/cluster-api/test/infrastructure/docker/internal/docker/types"
2828
)
2929

30-
const clusterLabelKey = "io.x-k8s.kind.cluster"
31-
const nodeRoleLabelKey = "io.x-k8s.kind.role"
32-
const filterLabel = "label"
33-
const filterName = "name"
30+
const (
31+
clusterLabelKey = "io.x-k8s.kind.cluster"
32+
nodeRoleLabelKey = "io.x-k8s.kind.role"
33+
filterLabel = "label"
34+
filterName = "name"
35+
36+
failureDomainLabelKey = "io.x-k8s.cluster.failureDomain"
37+
)
38+
39+
// FailureDomainLabel returns a map with the docker label for the given failure domain.
40+
func FailureDomainLabel(failureDomain *string) map[string]string {
41+
if failureDomain != nil && *failureDomain != "" {
42+
return map[string]string{failureDomainLabelKey: *failureDomain}
43+
}
44+
return nil
45+
}
3446

3547
func machineContainerName(cluster, machine string) string {
3648
if strings.HasPrefix(machine, cluster) {
@@ -100,6 +112,7 @@ func list(ctx context.Context, visit func(context.Context, string, *types.Node),
100112
cluster := clusterLabelKey
101113
image := cntr.Image
102114
status := cntr.Status
115+
103116
visit(ctx, cluster, types.NewNode(name, image, "undetermined").WithStatus(status))
104117
}
105118

0 commit comments

Comments
 (0)