Skip to content

Commit 2d57f77

Browse files
committed
fix: scale down the control plane
Includes the following changes to fix status: - Use client.Status().Patch - Skip over machines without a nodeRef Signed-off-by: Andrew Rynhard <[email protected]>
1 parent 5b021b8 commit 2d57f77

File tree

1 file changed

+49
-30
lines changed

1 file changed

+49
-30
lines changed

controllers/taloscontrolplane_controller.go

Lines changed: 49 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,7 @@ const requeueDuration = 30 * time.Second
5555
type ControlPlane struct {
5656
TCP *controlplanev1.TalosControlPlane
5757
Cluster *capiv1.Cluster
58-
Machines []*capiv1.Machine
58+
Machines []capiv1.Machine
5959
}
6060

6161
// TalosControlPlaneReconciler reconciles a TalosControlPlane object
@@ -123,13 +123,6 @@ func (r *TalosControlPlaneReconciler) Reconcile(req ctrl.Request) (res ctrl.Resu
123123
return ctrl.Result{Requeue: true}, nil
124124
}
125125

126-
// Initialize the patch helper.
127-
patchHelper, err := patch.NewHelper(tcp, r.Client)
128-
if err != nil {
129-
logger.Error(err, "Failed to configure the patch helper")
130-
return ctrl.Result{Requeue: true}, nil
131-
}
132-
133126
// If object doesn't have a finalizer, add one.
134127
controllerutil.AddFinalizer(tcp, controlplanev1.TalosControlPlaneFinalizer)
135128

@@ -141,14 +134,14 @@ func (r *TalosControlPlaneReconciler) Reconcile(req ctrl.Request) (res ctrl.Resu
141134
}
142135
}
143136

144-
// // Always attempt to update status.
137+
// Always attempt to update status.
145138
if err := r.updateStatus(ctx, tcp, cluster); err != nil {
146139
logger.Error(err, "Failed to update TalosControlPlane Status")
147140
reterr = kerrors.NewAggregate([]error{reterr, err})
148141
}
149142

150143
// Always attempt to Patch the TalosControlPlane object and status after each reconciliation.
151-
if err := patchHelper.Patch(ctx, tcp); err != nil {
144+
if err := r.Client.Status().Update(ctx, tcp); err != nil {
152145
logger.Error(err, "Failed to patch TalosControlPlane")
153146
reterr = kerrors.NewAggregate([]error{reterr, err})
154147
}
@@ -206,7 +199,7 @@ func (r *TalosControlPlaneReconciler) Reconcile(req ctrl.Request) (res ctrl.Resu
206199
// We are scaling down
207200
case numMachines > desiredReplicas:
208201
logger.Info("Scaling down control plane", "Desired", desiredReplicas, "Existing", numMachines)
209-
//return r.scaleDownControlPlane(ctx, cluster, tcp, controlPlane)
202+
return r.scaleDownControlPlane(ctx, util.ObjectKey(cluster), controlPlane.TCP.Name)
210203
}
211204

212205
// Generate Cluster Kubeconfig if needed
@@ -255,7 +248,7 @@ func (r *TalosControlPlaneReconciler) reconcileDelete(ctx context.Context, clust
255248
continue
256249
}
257250
// Submit deletion request
258-
if err := r.Client.Delete(ctx, ownedMachine); err != nil && !apierrors.IsNotFound(err) {
251+
if err := r.Client.Delete(ctx, &ownedMachine); err != nil && !apierrors.IsNotFound(err) {
259252
r.Log.Error(err, "Failed to cleanup owned machine")
260253
return ctrl.Result{}, err
261254
}
@@ -266,19 +259,53 @@ func (r *TalosControlPlaneReconciler) reconcileDelete(ctx context.Context, clust
266259
}
267260

268261
// newControlPlane returns an instantiated ControlPlane.
269-
func newControlPlane(cluster *capiv1.Cluster, tcp *controlplanev1.TalosControlPlane, machines []*capiv1.Machine) *ControlPlane {
262+
func newControlPlane(cluster *capiv1.Cluster, tcp *controlplanev1.TalosControlPlane, machines []capiv1.Machine) *ControlPlane {
270263
return &ControlPlane{
271264
TCP: tcp,
272265
Cluster: cluster,
273266
Machines: machines,
274267
}
275268
}
276269

277-
func (r *TalosControlPlaneReconciler) getControlPlaneMachinesForCluster(ctx context.Context, cluster client.ObjectKey, cpName string) ([]*capiv1.Machine, error) {
278-
returnList := []*capiv1.Machine{}
270+
func (r *TalosControlPlaneReconciler) scaleDownControlPlane(ctx context.Context, cluster client.ObjectKey, cpName string) (ctrl.Result, error) {
271+
machines, err := r.getControlPlaneMachinesForCluster(ctx, cluster, cpName)
272+
if err != nil {
273+
return ctrl.Result{}, err
274+
}
275+
276+
if len(machines) == 0 {
277+
return ctrl.Result{}, fmt.Errorf("no machines found")
278+
}
279+
280+
r.Log.Info("Found control plane machines", "machines", len(machines))
281+
282+
oldest := machines[0]
283+
for _, machine := range machines {
284+
if machine.CreationTimestamp.Before(&oldest.CreationTimestamp) {
285+
oldest = machine
286+
}
287+
}
288+
289+
r.Log.Info("Deleting control plane machine", "machine", oldest.Name)
279290

291+
// TODO: We need to remove the etcd member. This can be done by calling the
292+
// Talos reset API.
293+
// TODO: We should use the control plane ready count to know if we can safely
294+
// remove a node.
295+
// TODO: Delete the node from the workload cluster.
296+
err = r.Client.Delete(ctx, &oldest)
297+
if err != nil {
298+
return ctrl.Result{}, err
299+
}
300+
301+
// Requeue so that we handle any additional scaling.
302+
return ctrl.Result{Requeue: true, RequeueAfter: time.Minute}, nil
303+
}
304+
305+
func (r *TalosControlPlaneReconciler) getControlPlaneMachinesForCluster(ctx context.Context, cluster client.ObjectKey, cpName string) ([]capiv1.Machine, error) {
280306
selector := map[string]string{
281-
capiv1.ClusterLabelName: cluster.Name,
307+
capiv1.ClusterLabelName: cluster.Name,
308+
capiv1.MachineControlPlaneLabelName: "",
282309
}
283310

284311
machineList := capiv1.MachineList{}
@@ -288,21 +315,10 @@ func (r *TalosControlPlaneReconciler) getControlPlaneMachinesForCluster(ctx cont
288315
client.InNamespace(cluster.Namespace),
289316
client.MatchingLabels(selector),
290317
); err != nil {
291-
return returnList, err
318+
return nil, err
292319
}
293320

294-
for _, machine := range machineList.Items {
295-
controllerRef := metav1.GetControllerOf(&machine)
296-
if controllerRef == nil {
297-
continue
298-
}
299-
300-
if controllerRef.Kind == "TalosControlPlane" && controllerRef.Name == cpName {
301-
returnList = append(returnList, &machine)
302-
}
303-
}
304-
305-
return returnList, nil
321+
return machineList.Items, nil
306322

307323
}
308324

@@ -479,9 +495,12 @@ func (r *TalosControlPlaneReconciler) updateStatus(ctx context.Context, tcp *con
479495

480496
for _, ownedMachine := range ownedMachines {
481497
if ownedMachine.Status.NodeRef == nil {
482-
return fmt.Errorf("owned machine does not yet have noderef")
498+
r.Log.Info("owned machine does not yet have noderef", "machine", ownedMachine.Name)
499+
continue
483500
}
484501

502+
// If this fails for whatever reason, we can't accurately set the status
503+
// of the control plane.
485504
node, err := clientset.CoreV1().Nodes().Get(ownedMachine.Status.NodeRef.Name, metav1.GetOptions{})
486505
if err != nil {
487506
return err

0 commit comments

Comments
 (0)