Skip to content

Commit f28fc73

Browse files
authored
Merge pull request #9016 from willie-yao/cc-mp-squashed
✨ Add MachinePool workers support in ClusterClass
2 parents a595a0c + 8ae27c1 commit f28fc73

36 files changed

+1974
-84
lines changed

api/v1beta1/condition_consts.go

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -299,6 +299,19 @@ const (
299299
// not yet completed because the upgrade for at least one of the MachineDeployments has been deferred.
300300
TopologyReconciledMachineDeploymentsUpgradeDeferredReason = "MachineDeploymentsUpgradeDeferred"
301301

302+
// TopologyReconciledMachinePoolsUpgradePendingReason (Severity=Info) documents reconciliation of a Cluster topology
303+
// not yet completed because at least one of the MachinePools is not yet updated to match the desired topology spec.
304+
TopologyReconciledMachinePoolsUpgradePendingReason = "MachinePoolsUpgradePending"
305+
306+
// TopologyReconciledMachinePoolsCreatePendingReason (Severity=Info) documents reconciliation of a Cluster topology
307+
// not yet completed because at least one of the MachinePools is yet to be created.
308+
// This generally happens because new MachinePool creations are held off while the ControlPlane is not stable.
309+
TopologyReconciledMachinePoolsCreatePendingReason = "MachinePoolsCreatePending"
310+
311+
// TopologyReconciledMachinePoolsUpgradeDeferredReason (Severity=Info) documents reconciliation of a Cluster topology
312+
// not yet completed because the upgrade for at least one of the MachinePools has been deferred.
313+
TopologyReconciledMachinePoolsUpgradeDeferredReason = "MachinePoolsUpgradeDeferred"
314+
302315
// TopologyReconciledHookBlockingReason (Severity=Info) documents reconciliation of a Cluster topology
303316
// not yet completed because at least one of the lifecycle hooks is blocking.
304317
TopologyReconciledHookBlockingReason = "LifecycleHookBlocking"

cmd/clusterctl/client/cluster/topology.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -802,6 +802,17 @@ func clusterClassUsesTemplate(cc *clusterv1.ClusterClass, templateRef *corev1.Ob
802802
}
803803
}
804804

805+
for _, mpClass := range cc.Spec.Workers.MachinePools {
806+
// Check the bootstrap ref
807+
if equalRef(mpClass.Template.Bootstrap.Ref, templateRef) {
808+
return true
809+
}
810+
// Check the infrastructure ref.
811+
if equalRef(mpClass.Template.Infrastructure.Ref, templateRef) {
812+
return true
813+
}
814+
}
815+
805816
return false
806817
}
807818

config/rbac/role.yaml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -187,6 +187,18 @@ rules:
187187
- patch
188188
- update
189189
- watch
190+
- apiGroups:
191+
- cluster.x-k8s.io
192+
resources:
193+
- machinepools
194+
verbs:
195+
- create
196+
- delete
197+
- get
198+
- list
199+
- patch
200+
- update
201+
- watch
190202
- apiGroups:
191203
- cluster.x-k8s.io
192204
resources:

controllers/alias.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -143,7 +143,8 @@ func (r *MachineHealthCheckReconciler) SetupWithManager(ctx context.Context, mgr
143143

144144
// ClusterTopologyReconciler reconciles a managed topology for a Cluster object.
145145
type ClusterTopologyReconciler struct {
146-
Client client.Client
146+
Client client.Client
147+
Tracker *remote.ClusterCacheTracker
147148
// APIReader is used to list MachineSets directly via the API server to avoid
148149
// race conditions caused by an outdated cache.
149150
APIReader client.Reader
@@ -162,6 +163,7 @@ func (r *ClusterTopologyReconciler) SetupWithManager(ctx context.Context, mgr ct
162163
return (&clustertopologycontroller.Reconciler{
163164
Client: r.Client,
164165
APIReader: r.APIReader,
166+
Tracker: r.Tracker,
165167
RuntimeClient: r.RuntimeClient,
166168
UnstructuredCachingClient: r.UnstructuredCachingClient,
167169
WatchFilterValue: r.WatchFilterValue,

internal/controllers/clusterclass/clusterclass_controller.go

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,11 +169,20 @@ func (r *Reconciler) reconcileExternalReferences(ctx context.Context, clusterCla
169169
}
170170
}
171171

172+
for _, mpClass := range clusterClass.Spec.Workers.MachinePools {
173+
if mpClass.Template.Bootstrap.Ref != nil {
174+
refs = append(refs, mpClass.Template.Bootstrap.Ref)
175+
}
176+
if mpClass.Template.Infrastructure.Ref != nil {
177+
refs = append(refs, mpClass.Template.Infrastructure.Ref)
178+
}
179+
}
180+
172181
// Ensure all referenced objects are owned by the ClusterClass.
173182
// Nb. Some external objects can be referenced multiple times in the ClusterClass,
174183
// but we only want to set the owner reference once per unique external object.
175184
// For example the same KubeadmConfigTemplate could be referenced in multiple MachineDeployment
176-
// classes.
185+
// or MachinePool classes.
177186
errs := []error{}
178187
reconciledRefs := sets.Set[string]{}
179188
outdatedRefs := map[*corev1.ObjectReference]*corev1.ObjectReference{}

internal/controllers/topology/cluster/blueprint.go

Lines changed: 27 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
3434
Topology: cluster.Spec.Topology,
3535
ClusterClass: clusterClass,
3636
MachineDeployments: map[string]*scope.MachineDeploymentBlueprint{},
37+
MachinePools: map[string]*scope.MachinePoolBlueprint{},
3738
}
3839

3940
var err error
@@ -82,7 +83,7 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
8283
// Get the bootstrap machine template.
8384
machineDeploymentBlueprint.BootstrapTemplate, err = r.getReference(ctx, machineDeploymentClass.Template.Bootstrap.Ref)
8485
if err != nil {
85-
return nil, errors.Wrapf(err, "failed to get bootstrap machine template for %s, MachineDeployment class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machineDeploymentClass.Class)
86+
return nil, errors.Wrapf(err, "failed to get bootstrap config template for %s, MachineDeployment class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machineDeploymentClass.Class)
8687
}
8788

8889
// If the machineDeploymentClass defines a MachineHealthCheck add it to the blueprint.
@@ -92,5 +93,30 @@ func (r *Reconciler) getBlueprint(ctx context.Context, cluster *clusterv1.Cluste
9293
blueprint.MachineDeployments[machineDeploymentClass.Class] = machineDeploymentBlueprint
9394
}
9495

96+
// Loop over the machine pool classes in ClusterClass
97+
// and fetch the related templates.
98+
for _, machinePoolClass := range blueprint.ClusterClass.Spec.Workers.MachinePools {
99+
machinePoolBlueprint := &scope.MachinePoolBlueprint{}
100+
101+
// Make sure to copy the metadata from the blueprint, which is later layered
102+
// with the additional metadata defined in the Cluster's topology section
103+
// for the MachinePool that is created or updated.
104+
machinePoolClass.Template.Metadata.DeepCopyInto(&machinePoolBlueprint.Metadata)
105+
106+
// Get the InfrastructureMachinePoolTemplate.
107+
machinePoolBlueprint.InfrastructureMachinePoolTemplate, err = r.getReference(ctx, machinePoolClass.Template.Infrastructure.Ref)
108+
if err != nil {
109+
return nil, errors.Wrapf(err, "failed to get InfrastructureMachinePoolTemplate for %s, MachinePool class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machinePoolClass.Class)
110+
}
111+
112+
// Get the bootstrap config.
113+
machinePoolBlueprint.BootstrapTemplate, err = r.getReference(ctx, machinePoolClass.Template.Bootstrap.Ref)
114+
if err != nil {
115+
return nil, errors.Wrapf(err, "failed to get bootstrap config for %s, MachinePool class %q", tlog.KObj{Obj: blueprint.ClusterClass}, machinePoolClass.Class)
116+
}
117+
118+
blueprint.MachinePools[machinePoolClass.Class] = machinePoolBlueprint
119+
}
120+
95121
return blueprint, nil
96122
}

internal/controllers/topology/cluster/cluster_controller.go

Lines changed: 40 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ import (
3636
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3737
"sigs.k8s.io/cluster-api/api/v1beta1/index"
3838
"sigs.k8s.io/cluster-api/controllers/external"
39+
"sigs.k8s.io/cluster-api/controllers/remote"
40+
expv1 "sigs.k8s.io/cluster-api/exp/api/v1beta1"
3941
runtimecatalog "sigs.k8s.io/cluster-api/exp/runtime/catalog"
4042
runtimehooksv1 "sigs.k8s.io/cluster-api/exp/runtime/hooks/api/v1alpha1"
4143
"sigs.k8s.io/cluster-api/feature"
@@ -57,13 +59,15 @@ import (
5759
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusters;clusters/status,verbs=get;list;watch;create;update;patch;delete
5860
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=clusterclasses,verbs=get;list;watch;create;update;patch;delete
5961
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinedeployments,verbs=get;list;watch;create;update;patch;delete
62+
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinepools,verbs=get;list;watch;create;update;patch;delete
6063
// +kubebuilder:rbac:groups=cluster.x-k8s.io,resources=machinehealthchecks,verbs=get;list;watch;create;update;patch;delete
6164
// +kubebuilder:rbac:groups=apiextensions.k8s.io,resources=customresourcedefinitions,verbs=get;list;watch
6265
// +kubebuilder:rbac:groups=core,resources=secrets,verbs=get;create;delete
6366

6467
// Reconciler reconciles a managed topology for a Cluster object.
6568
type Reconciler struct {
66-
Client client.Client
69+
Client client.Client
70+
Tracker *remote.ClusterCacheTracker
6771
// APIReader is used to list MachineSets directly via the API server to avoid
6872
// race conditions caused by an outdated cache.
6973
APIReader client.Reader
@@ -103,6 +107,12 @@ func (r *Reconciler) SetupWithManager(ctx context.Context, mgr ctrl.Manager, opt
103107
// Only trigger Cluster reconciliation if the MachineDeployment is topology owned.
104108
builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))),
105109
).
110+
Watches(
111+
&expv1.MachinePool{},
112+
handler.EnqueueRequestsFromMapFunc(r.machinePoolToCluster),
113+
// Only trigger Cluster reconciliation if the MachinePool is topology owned.
114+
builder.WithPredicates(predicates.ResourceIsTopologyOwned(ctrl.LoggerFrom(ctx))),
115+
).
106116
WithOptions(options).
107117
WithEventFilter(predicates.ResourceNotPausedAndHasFilterLabel(ctrl.LoggerFrom(ctx), r.WatchFilterValue)).
108118
Build(r)
@@ -193,7 +203,16 @@ func (r *Reconciler) Reconcile(ctx context.Context, req ctrl.Request) (_ ctrl.Re
193203
}
194204

195205
// Handle normal reconciliation loop.
196-
return r.reconcile(ctx, s)
206+
result, err := r.reconcile(ctx, s)
207+
if err != nil {
208+
// Requeue if the reconcile failed because the ClusterCacheTracker was locked for
209+
// the current cluster because of concurrent access.
210+
if errors.Is(err, remote.ErrClusterLocked) {
211+
log.V(5).Info("Requeuing because another worker has the lock on the ClusterCacheTracker")
212+
return ctrl.Result{Requeue: true}, nil
213+
}
214+
}
215+
return result, err
197216
}
198217

199218
// reconcile handles cluster reconciliation.
@@ -360,6 +379,25 @@ func (r *Reconciler) machineDeploymentToCluster(_ context.Context, o client.Obje
360379
}}
361380
}
362381

382+
// machinePoolToCluster is a handler.ToRequestsFunc to be used to enqueue requests for reconciliation
383+
// for Cluster to update when one of its own MachinePools gets updated.
384+
func (r *Reconciler) machinePoolToCluster(_ context.Context, o client.Object) []ctrl.Request {
385+
mp, ok := o.(*expv1.MachinePool)
386+
if !ok {
387+
panic(fmt.Sprintf("Expected a MachinePool but got a %T", o))
388+
}
389+
if mp.Spec.ClusterName == "" {
390+
return nil
391+
}
392+
393+
return []ctrl.Request{{
394+
NamespacedName: types.NamespacedName{
395+
Namespace: mp.Namespace,
396+
Name: mp.Spec.ClusterName,
397+
},
398+
}}
399+
}
400+
363401
func (r *Reconciler) reconcileDelete(ctx context.Context, cluster *clusterv1.Cluster) (ctrl.Result, error) {
364402
// Call the BeforeClusterDelete hook if the 'ok-to-delete' annotation is not set
365403
// and add the annotation to the cluster after receiving a successful non-blocking response.

internal/controllers/topology/cluster/conditions.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -143,6 +143,23 @@ func (r *Reconciler) reconcileTopologyReconciledCondition(s *scope.Scope, cluste
143143
s.Blueprint.Topology.Version,
144144
)
145145
reason = clusterv1.TopologyReconciledMachineDeploymentsUpgradeDeferredReason
146+
case s.UpgradeTracker.MachinePools.IsAnyPendingUpgrade():
147+
fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s on hold.",
148+
computeNameList(s.UpgradeTracker.MachinePools.PendingUpgradeNames()),
149+
s.Blueprint.Topology.Version,
150+
)
151+
reason = clusterv1.TopologyReconciledMachinePoolsUpgradePendingReason
152+
case s.UpgradeTracker.MachinePools.IsAnyPendingCreate():
153+
fmt.Fprintf(msgBuilder, "MachinePool(s) for Topologies %s creation on hold.",
154+
computeNameList(s.UpgradeTracker.MachinePools.PendingCreateTopologyNames()),
155+
)
156+
reason = clusterv1.TopologyReconciledMachinePoolsCreatePendingReason
157+
case s.UpgradeTracker.MachinePools.DeferredUpgrade():
158+
fmt.Fprintf(msgBuilder, "MachinePool(s) %s rollout and upgrade to version %s deferred.",
159+
computeNameList(s.UpgradeTracker.MachinePools.DeferredUpgradeNames()),
160+
s.Blueprint.Topology.Version,
161+
)
162+
reason = clusterv1.TopologyReconciledMachinePoolsUpgradeDeferredReason
146163
}
147164

148165
switch {

0 commit comments

Comments
 (0)