Skip to content

Commit cdadf3c

Browse files
authored
support ssa patch (#136)
Signed-off-by: nasusoba <[email protected]> finished implementation Signed-off-by: nasusoba <[email protected]> add test Signed-off-by: nasusoba <[email protected]> fix ssaCache init Signed-off-by: nasusoba <[email protected]> increase control plane replicas for test Signed-off-by: nasusoba <[email protected]> fix typo Signed-off-by: nasusoba <[email protected]> fix typo
1 parent 030a7bc commit cdadf3c

File tree

18 files changed

+1491
-46
lines changed

18 files changed

+1491
-46
lines changed

.golangci.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ linters-settings:
7171
- github.com/go-logr/logr
7272
- github.com/coredns/corefile-migration/migration
7373
- github.com/pkg/errors
74+
- github.com/davecgh/go-spew/spew
7475

7576
- k8s.io/api
7677
- k8s.io/apimachinery/pkg

controlplane/controllers/const.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,6 @@ const (
3636
etcdRemovalRequeueAfter = 30 * time.Second
3737

3838
k3sHookName = "k3s"
39+
40+
kcpManagerName = "capi-kthreescontrolplane"
3941
)

controlplane/controllers/kthreescontrolplane_controller.go

Lines changed: 97 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
"k8s.io/apimachinery/pkg/runtime"
3131
kerrors "k8s.io/apimachinery/pkg/util/errors"
3232
"k8s.io/client-go/tools/record"
33+
"k8s.io/klog/v2"
3334
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3435
"sigs.k8s.io/cluster-api/controllers/external"
3536
"sigs.k8s.io/cluster-api/util"
@@ -53,6 +54,8 @@ import (
5354
"github.com/k3s-io/cluster-api-k3s/pkg/machinefilters"
5455
"github.com/k3s-io/cluster-api-k3s/pkg/secret"
5556
"github.com/k3s-io/cluster-api-k3s/pkg/token"
57+
"github.com/k3s-io/cluster-api-k3s/pkg/util/contract"
58+
"github.com/k3s-io/cluster-api-k3s/pkg/util/ssa"
5659
)
5760

5861
// KThreesControlPlaneReconciler reconciles a KThreesControlPlane object.
@@ -68,6 +71,7 @@ type KThreesControlPlaneReconciler struct {
6871

6972
managementCluster k3s.ManagementCluster
7073
managementClusterUncached k3s.ManagementCluster
74+
ssaCache ssa.Cache
7175
}
7276

7377
// +kubebuilder:rbac:groups=core,resources=events,verbs=get;list;watch;create;patch
@@ -302,6 +306,7 @@ func (r *KThreesControlPlaneReconciler) SetupWithManager(ctx context.Context, mg
302306
r.Scheme = mgr.GetScheme()
303307
r.controller = c
304308
r.recorder = mgr.GetEventRecorderFor("k3s-control-plane-controller")
309+
r.ssaCache = ssa.NewCache()
305310

306311
if r.managementCluster == nil {
307312
r.managementCluster = &k3s.Management{
@@ -516,6 +521,10 @@ func (r *KThreesControlPlaneReconciler) reconcile(ctx context.Context, cluster *
516521
return reconcile.Result{}, err
517522
}
518523

524+
if err := r.syncMachines(ctx, controlPlane); err != nil {
525+
return ctrl.Result{}, errors.Wrap(err, "failed to sync Machines")
526+
}
527+
519528
// Aggregate the operational state of all the machines; while aggregating we are adding the
520529
// source ref (reason@machine/name) so the problem can be easily tracked down to its source machine.
521530
conditions.SetAggregate(controlPlane.KCP, controlplanev1.MachinesReadyCondition, ownedMachines.ConditionGetters(), conditions.AddSourceRef(), conditions.WithStepCounterIf(false))
@@ -673,6 +682,94 @@ func (r *KThreesControlPlaneReconciler) reconcileKubeconfig(ctx context.Context,
673682
return reconcile.Result{}, nil
674683
}
675684

685+
// syncMachines updates Machines, InfrastructureMachines and KThreesConfigs to propagate in-place mutable fields from KCP.
686+
// Note: It also cleans up managed fields of all Machines so that Machines that were
687+
// created/patched before (<= v0.2.0) the controller adopted Server-Side-Apply (SSA) can also work with SSA.
688+
// Note: For InfrastructureMachines and KThreesConfigs it also drops ownership of "metadata.labels" and
689+
// "metadata.annotations" from "manager" so that "capi-kthreescontrolplane" can own these fields and can work with SSA.
690+
// Otherwise, fields would be co-owned by our "old" "manager" and "capi-kthreescontrolplane" and then we would not be
691+
// able to e.g. drop labels and annotations.
692+
func (r *KThreesControlPlaneReconciler) syncMachines(ctx context.Context, controlPlane *k3s.ControlPlane) error {
693+
patchHelpers := map[string]*patch.Helper{}
694+
for machineName := range controlPlane.Machines {
695+
m := controlPlane.Machines[machineName]
696+
// If the machine is already being deleted, we don't need to update it.
697+
if !m.DeletionTimestamp.IsZero() {
698+
continue
699+
}
700+
701+
// Cleanup managed fields of all Machines.
702+
// We do this so that Machines that were created/patched before the controller adopted Server-Side-Apply (SSA)
703+
// (<= v0.2.0) can also work with SSA. Otherwise, fields would be co-owned by our "old" "manager" and
704+
// "capi-kthreescontrolplane" and then we would not be able to e.g. drop labels and annotations.
705+
if err := ssa.CleanUpManagedFieldsForSSAAdoption(ctx, r.Client, m, kcpManagerName); err != nil {
706+
return errors.Wrapf(err, "failed to update Machine: failed to adjust the managedFields of the Machine %s", klog.KObj(m))
707+
}
708+
// Update Machine to propagate in-place mutable fields from KCP.
709+
updatedMachine, err := r.updateMachine(ctx, m, controlPlane.KCP, controlPlane.Cluster)
710+
if err != nil {
711+
return errors.Wrapf(err, "failed to update Machine: %s", klog.KObj(m))
712+
}
713+
controlPlane.Machines[machineName] = updatedMachine
714+
// Since the machine is updated, re-create the patch helper so that any subsequent
715+
// Patch calls use the correct base machine object to calculate the diffs.
716+
// Example: reconcileControlPlaneConditions patches the machine objects in a subsequent call
717+
// and, it should use the updated machine to calculate the diff.
718+
// Note: If the patchHelpers are not re-computed based on the new updated machines, subsequent
719+
// Patch calls will fail because the patch will be calculated based on an outdated machine and will error
720+
// because of outdated resourceVersion.
721+
// TODO: This should be cleaned-up to have a more streamline way of constructing and using patchHelpers.
722+
patchHelper, err := patch.NewHelper(updatedMachine, r.Client)
723+
if err != nil {
724+
return err
725+
}
726+
patchHelpers[machineName] = patchHelper
727+
728+
labelsAndAnnotationsManagedFieldPaths := []contract.Path{
729+
{"f:metadata", "f:annotations"},
730+
{"f:metadata", "f:labels"},
731+
}
732+
infraMachine, infraMachineFound := controlPlane.InfraResources[machineName]
733+
// Only update the InfraMachine if it is already found, otherwise just skip it.
734+
// This could happen e.g. if the cache is not up-to-date yet.
735+
if infraMachineFound {
736+
// Cleanup managed fields of all InfrastructureMachines to drop ownership of labels and annotations
737+
// from "manager". We do this so that InfrastructureMachines that are created using the Create method
738+
// can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager"
739+
// and "capi-kthreescontrolplane" and then we would not be able to e.g. drop labels and annotations.
740+
if err := ssa.DropManagedFields(ctx, r.Client, infraMachine, kcpManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil {
741+
return errors.Wrapf(err, "failed to clean up managedFields of InfrastructureMachine %s", klog.KObj(infraMachine))
742+
}
743+
// Update in-place mutating fields on InfrastructureMachine.
744+
if err := r.updateExternalObject(ctx, infraMachine, controlPlane.KCP, controlPlane.Cluster); err != nil {
745+
return errors.Wrapf(err, "failed to update InfrastructureMachine %s", klog.KObj(infraMachine))
746+
}
747+
}
748+
749+
kthreesConfigs, kthreesConfigsFound := controlPlane.KthreesConfigs[machineName]
750+
// Only update the kthreesConfigs if it is already found, otherwise just skip it.
751+
// This could happen e.g. if the cache is not up-to-date yet.
752+
if kthreesConfigsFound {
753+
// Note: Set the GroupVersionKind because updateExternalObject depends on it.
754+
kthreesConfigs.SetGroupVersionKind(m.Spec.Bootstrap.ConfigRef.GroupVersionKind())
755+
// Cleanup managed fields of all KThreesConfigs to drop ownership of labels and annotations
756+
// from "manager". We do this so that KThreesConfigs that are created using the Create method
757+
// can also work with SSA. Otherwise, labels and annotations would be co-owned by our "old" "manager"
758+
// and "capi-kthreescontrolplane" and then we would not be able to e.g. drop labels and annotations.
759+
if err := ssa.DropManagedFields(ctx, r.Client, kthreesConfigs, kcpManagerName, labelsAndAnnotationsManagedFieldPaths); err != nil {
760+
return errors.Wrapf(err, "failed to clean up managedFields of kthreesConfigs %s", klog.KObj(kthreesConfigs))
761+
}
762+
// Update in-place mutating fields on BootstrapConfig.
763+
if err := r.updateExternalObject(ctx, kthreesConfigs, controlPlane.KCP, controlPlane.Cluster); err != nil {
764+
return errors.Wrapf(err, "failed to update KThreesConfigs %s", klog.KObj(kthreesConfigs))
765+
}
766+
}
767+
}
768+
// Update the patch helpers.
769+
controlPlane.SetPatchHelpers(patchHelpers)
770+
return nil
771+
}
772+
676773
// reconcileControlPlaneConditions is responsible of reconciling conditions reporting the status of static pods and
677774
// the status of the etcd cluster.
678775
func (r *KThreesControlPlaneReconciler) reconcileControlPlaneConditions(ctx context.Context, controlPlane *k3s.ControlPlane) error {

controlplane/controllers/scale.go

Lines changed: 124 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@ import (
2727
apierrors "k8s.io/apimachinery/pkg/api/errors"
2828
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
2929
"k8s.io/apimachinery/pkg/apis/meta/v1/unstructured"
30+
"k8s.io/apimachinery/pkg/types"
3031
kerrors "k8s.io/apimachinery/pkg/util/errors"
3132
"k8s.io/apiserver/pkg/storage/names"
3233
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
@@ -36,10 +37,12 @@ import (
3637
"sigs.k8s.io/cluster-api/util/conditions"
3738
"sigs.k8s.io/cluster-api/util/patch"
3839
ctrl "sigs.k8s.io/controller-runtime"
40+
"sigs.k8s.io/controller-runtime/pkg/client"
3941

4042
bootstrapv1 "github.com/k3s-io/cluster-api-k3s/bootstrap/api/v1beta2"
4143
controlplanev1 "github.com/k3s-io/cluster-api-k3s/controlplane/api/v1beta2"
4244
k3s "github.com/k3s-io/cluster-api-k3s/pkg/k3s"
45+
"github.com/k3s-io/cluster-api-k3s/pkg/util/ssa"
4346
)
4447

4548
var ErrPreConditionFailed = errors.New("precondition check failed")
@@ -253,6 +256,12 @@ func selectMachineForScaleDown(ctx context.Context, controlPlane *k3s.ControlPla
253256
func (r *KThreesControlPlaneReconciler) cloneConfigsAndGenerateMachine(ctx context.Context, cluster *clusterv1.Cluster, kcp *controlplanev1.KThreesControlPlane, bootstrapSpec *bootstrapv1.KThreesConfigSpec, failureDomain *string) error {
254257
var errs []error
255258

259+
// Compute desired Machine
260+
machine, err := r.computeDesiredMachine(kcp, cluster, failureDomain, nil)
261+
if err != nil {
262+
return errors.Wrap(err, "failed to create Machine: failed to compute desired Machine")
263+
}
264+
256265
// Since the cloned resource should eventually have a controller ref for the Machine, we create an
257266
// OwnerReference here without the Controller field set
258267
infraCloneOwner := &metav1.OwnerReference{
@@ -275,6 +284,7 @@ func (r *KThreesControlPlaneReconciler) cloneConfigsAndGenerateMachine(ctx conte
275284
// Safe to return early here since no resources have been created yet.
276285
return fmt.Errorf("failed to clone infrastructure template: %w", err)
277286
}
287+
machine.Spec.InfrastructureRef = *infraRef
278288

279289
// Clone the bootstrap configuration
280290
bootstrapRef, err := r.generateKThreesConfig(ctx, kcp, cluster, bootstrapSpec)
@@ -284,8 +294,9 @@ func (r *KThreesControlPlaneReconciler) cloneConfigsAndGenerateMachine(ctx conte
284294

285295
// Only proceed to generating the Machine if we haven't encountered an error
286296
if len(errs) == 0 {
287-
if err := r.generateMachine(ctx, kcp, cluster, infraRef, bootstrapRef, failureDomain); err != nil {
288-
errs = append(errs, fmt.Errorf("failed to create Machine: %w", err))
297+
machine.Spec.Bootstrap.ConfigRef = bootstrapRef
298+
if err := r.createMachine(ctx, kcp, machine); err != nil {
299+
errs = append(errs, errors.Wrap(err, "failed to create Machine"))
289300
}
290301
}
291302

@@ -355,55 +366,135 @@ func (r *KThreesControlPlaneReconciler) generateKThreesConfig(ctx context.Contex
355366
return bootstrapRef, nil
356367
}
357368

358-
func (r *KThreesControlPlaneReconciler) generateMachine(ctx context.Context, kcp *controlplanev1.KThreesControlPlane, cluster *clusterv1.Cluster, infraRef, bootstrapRef *corev1.ObjectReference, failureDomain *string) error {
359-
machine := &clusterv1.Machine{
369+
// updateExternalObject updates the external object with the labels and annotations from KCP.
370+
func (r *KThreesControlPlaneReconciler) updateExternalObject(ctx context.Context, obj client.Object, kcp *controlplanev1.KThreesControlPlane, cluster *clusterv1.Cluster) error {
371+
updatedObject := &unstructured.Unstructured{}
372+
updatedObject.SetGroupVersionKind(obj.GetObjectKind().GroupVersionKind())
373+
updatedObject.SetNamespace(obj.GetNamespace())
374+
updatedObject.SetName(obj.GetName())
375+
// Set the UID to ensure that Server-Side-Apply only performs an update
376+
// and does not perform an accidental create.
377+
updatedObject.SetUID(obj.GetUID())
378+
379+
// Update labels
380+
updatedObject.SetLabels(k3s.ControlPlaneLabelsForCluster(cluster.Name, kcp.Spec.MachineTemplate))
381+
// Update annotations
382+
updatedObject.SetAnnotations(kcp.Spec.MachineTemplate.ObjectMeta.Annotations)
383+
384+
if err := ssa.Patch(ctx, r.Client, kcpManagerName, updatedObject, ssa.WithCachingProxy{Cache: r.ssaCache, Original: obj}); err != nil {
385+
return errors.Wrapf(err, "failed to update %s", obj.GetObjectKind().GroupVersionKind().Kind)
386+
}
387+
return nil
388+
}
389+
390+
func (r *KThreesControlPlaneReconciler) createMachine(ctx context.Context, kcp *controlplanev1.KThreesControlPlane, machine *clusterv1.Machine) error {
391+
if err := ssa.Patch(ctx, r.Client, kcpManagerName, machine); err != nil {
392+
return errors.Wrap(err, "failed to create Machine")
393+
}
394+
// Remove the annotation tracking that a remediation is in progress (the remediation completed when
395+
// the replacement machine has been created above).
396+
delete(kcp.Annotations, controlplanev1.RemediationInProgressAnnotation)
397+
return nil
398+
}
399+
400+
func (r *KThreesControlPlaneReconciler) updateMachine(ctx context.Context, machine *clusterv1.Machine, kcp *controlplanev1.KThreesControlPlane, cluster *clusterv1.Cluster) (*clusterv1.Machine, error) {
401+
updatedMachine, err := r.computeDesiredMachine(kcp, cluster, machine.Spec.FailureDomain, machine)
402+
if err != nil {
403+
return nil, errors.Wrap(err, "failed to update Machine: failed to compute desired Machine")
404+
}
405+
406+
err = ssa.Patch(ctx, r.Client, kcpManagerName, updatedMachine, ssa.WithCachingProxy{Cache: r.ssaCache, Original: machine})
407+
if err != nil {
408+
return nil, errors.Wrap(err, "failed to update Machine")
409+
}
410+
return updatedMachine, nil
411+
}
412+
413+
// computeDesiredMachine computes the desired Machine.
414+
// This Machine will be used during reconciliation to:
415+
// * create a new Machine
416+
// * update an existing Machine
417+
// Because we are using Server-Side-Apply we always have to calculate the full object.
418+
// There are small differences in how we calculate the Machine depending on if it
419+
// is a create or update. Example: for a new Machine we have to calculate a new name,
420+
// while for an existing Machine we have to use the name of the existing Machine.
421+
// Also, for an existing Machine, we will not copy its labels, as they are not managed by the KThreesControlPlane controller.
422+
func (r *KThreesControlPlaneReconciler) computeDesiredMachine(kcp *controlplanev1.KThreesControlPlane, cluster *clusterv1.Cluster, failureDomain *string, existingMachine *clusterv1.Machine) (*clusterv1.Machine, error) {
423+
var machineName string
424+
var machineUID types.UID
425+
var version *string
426+
annotations := map[string]string{}
427+
if existingMachine == nil {
428+
// Creating a new machine
429+
machineName = names.SimpleNameGenerator.GenerateName(kcp.Name + "-")
430+
version = &kcp.Spec.Version
431+
432+
// Machine's bootstrap config may be missing ClusterConfiguration if it is not the first machine in the control plane.
433+
// We store ClusterConfiguration as annotation here to detect any changes in KCP ClusterConfiguration and rollout the machine if any.
434+
serverConfig, err := json.Marshal(kcp.Spec.KThreesConfigSpec.ServerConfig)
435+
if err != nil {
436+
return nil, errors.Wrap(err, "failed to marshal cluster configuration")
437+
}
438+
annotations[controlplanev1.KThreesServerConfigurationAnnotation] = string(serverConfig)
439+
440+
// In case this machine is being created as a consequence of a remediation, then add an annotation
441+
// tracking remediating data.
442+
// NOTE: This is required in order to track remediation retries.
443+
if remediationData, ok := kcp.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok {
444+
annotations[controlplanev1.RemediationForAnnotation] = remediationData
445+
}
446+
} else {
447+
// Updating an existing machine
448+
machineName = existingMachine.Name
449+
machineUID = existingMachine.UID
450+
version = existingMachine.Spec.Version
451+
452+
// For existing machine only set the ClusterConfiguration annotation if the machine already has it.
453+
// We should not add the annotation if it was missing in the first place because we do not have enough
454+
// information.
455+
if serverConfig, ok := existingMachine.Annotations[controlplanev1.KThreesServerConfigurationAnnotation]; ok {
456+
annotations[controlplanev1.KThreesServerConfigurationAnnotation] = serverConfig
457+
}
458+
459+
// If the machine already has remediation data then preserve it.
460+
// NOTE: This is required in order to track remediation retries.
461+
if remediationData, ok := existingMachine.Annotations[controlplanev1.RemediationForAnnotation]; ok {
462+
annotations[controlplanev1.RemediationForAnnotation] = remediationData
463+
}
464+
}
465+
466+
// Construct the basic Machine.
467+
desiredMachine := &clusterv1.Machine{
360468
ObjectMeta: metav1.ObjectMeta{
361-
Name: names.SimpleNameGenerator.GenerateName(kcp.Name + "-"),
469+
Name: machineName,
362470
Namespace: kcp.Namespace,
471+
UID: machineUID,
363472
Labels: k3s.ControlPlaneLabelsForCluster(cluster.Name, kcp.Spec.MachineTemplate),
364473
OwnerReferences: []metav1.OwnerReference{
365474
*metav1.NewControllerRef(kcp, controlplanev1.GroupVersion.WithKind("KThreesControlPlane")),
366475
},
367476
},
368477
Spec: clusterv1.MachineSpec{
369-
ClusterName: cluster.Name,
370-
Version: &kcp.Spec.Version,
371-
InfrastructureRef: *infraRef,
372-
Bootstrap: clusterv1.Bootstrap{
373-
ConfigRef: bootstrapRef,
374-
},
478+
ClusterName: cluster.Name,
479+
Version: version,
375480
FailureDomain: failureDomain,
376481
NodeDrainTimeout: kcp.Spec.MachineTemplate.NodeDrainTimeout,
377482
NodeVolumeDetachTimeout: kcp.Spec.MachineTemplate.NodeVolumeDetachTimeout,
378483
NodeDeletionTimeout: kcp.Spec.MachineTemplate.NodeDeletionTimeout,
379484
},
380485
}
381486

382-
annotations := map[string]string{}
383-
384-
// Machine's bootstrap config may be missing ClusterConfiguration if it is not the first machine in the control plane.
385-
// We store ClusterConfiguration as annotation here to detect any changes in KCP ClusterConfiguration and rollout the machine if any.
386-
serverConfig, err := json.Marshal(kcp.Spec.KThreesConfigSpec.ServerConfig)
387-
if err != nil {
388-
return fmt.Errorf("failed to marshal cluster configuration: %w", err)
389-
}
390-
annotations[controlplanev1.KThreesServerConfigurationAnnotation] = string(serverConfig)
391-
392-
// In case this machine is being created as a consequence of a remediation, then add an annotation
393-
// tracking remediating data.
394-
// NOTE: This is required in order to track remediation retries.
395-
if remediationData, ok := kcp.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok {
396-
annotations[controlplanev1.RemediationForAnnotation] = remediationData
487+
// Set annotations
488+
for k, v := range kcp.Spec.MachineTemplate.ObjectMeta.Annotations {
489+
annotations[k] = v
397490
}
398491

399-
machine.SetAnnotations(annotations)
492+
desiredMachine.SetAnnotations(annotations)
400493

401-
if err := r.Client.Create(ctx, machine); err != nil {
402-
return fmt.Errorf("failed to create machine: %w", err)
494+
if existingMachine != nil {
495+
desiredMachine.Spec.InfrastructureRef = existingMachine.Spec.InfrastructureRef
496+
desiredMachine.Spec.Bootstrap.ConfigRef = existingMachine.Spec.Bootstrap.ConfigRef
403497
}
404498

405-
// Remove the annotation tracking that a remediation is in progress (the remediation completed when
406-
// the replacement machine has been created above).
407-
delete(kcp.Annotations, controlplanev1.RemediationInProgressAnnotation)
408-
return nil
499+
return desiredMachine, nil
409500
}

0 commit comments

Comments
 (0)