Skip to content

Commit 5c72dca

Browse files
authored
fix remediation (#109)
Signed-off-by: nasusoba <[email protected]> fix lint Signed-off-by: nasusoba <[email protected]> fix comment
1 parent 0527d86 commit 5c72dca

File tree

13 files changed

+645
-70
lines changed

13 files changed

+645
-70
lines changed

bootstrap/controllers/kthreesconfig_controller.go

Lines changed: 2 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,6 @@ import (
4545
"github.com/k3s-io/cluster-api-k3s/pkg/cloudinit"
4646
"github.com/k3s-io/cluster-api-k3s/pkg/etcd"
4747
"github.com/k3s-io/cluster-api-k3s/pkg/k3s"
48-
"github.com/k3s-io/cluster-api-k3s/pkg/kubeconfig"
4948
"github.com/k3s-io/cluster-api-k3s/pkg/locking"
5049
"github.com/k3s-io/cluster-api-k3s/pkg/secret"
5150
"github.com/k3s-io/cluster-api-k3s/pkg/token"
@@ -177,9 +176,6 @@ func (r *KThreesConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques
177176
log.Info("Cluster infrastructure is not ready, waiting")
178177
conditions.MarkFalse(config, bootstrapv1.DataSecretAvailableCondition, bootstrapv1.WaitingForClusterInfrastructureReason, clusterv1.ConditionSeverityInfo, "")
179178
return ctrl.Result{RequeueAfter: 1 * time.Minute}, nil
180-
// Migrate plaintext data to secret.
181-
case config.Status.BootstrapData != nil && config.Status.DataSecretName == nil:
182-
return ctrl.Result{}, r.storeBootstrapData(ctx, scope, config.Status.BootstrapData)
183179
// Reconcile status for machines that already have a secret reference, but our status isn't up to date.
184180
// This case solves the pivoting scenario (or a backup restore) which doesn't preserve the status subresource on objects.
185181
case configOwner.DataSecretName() != nil && (!config.Status.Ready || config.Status.DataSecretName == nil):
@@ -500,13 +496,12 @@ func (r *KThreesConfigReconciler) handleClusterNotInitialized(ctx context.Contex
500496
return ctrl.Result{}, err
501497
}
502498

503-
// TODO: move to controlplane provider
504-
return r.reconcileKubeconfig(ctx, scope)
499+
return ctrl.Result{}, nil
505500
}
506501

507502
func (r *KThreesConfigReconciler) SetupWithManager(mgr ctrl.Manager) error {
508503
if r.KThreesInitLock == nil {
509-
r.KThreesInitLock = locking.NewControlPlaneInitMutex(ctrl.Log.WithName("init-locker"), mgr.GetClient())
504+
r.KThreesInitLock = locking.NewControlPlaneInitMutex(mgr.GetClient())
510505
}
511506

512507
return ctrl.NewControllerManagedBy(mgr).
@@ -558,26 +553,6 @@ func (r *KThreesConfigReconciler) storeBootstrapData(ctx context.Context, scope
558553
return nil
559554
}
560555

561-
func (r *KThreesConfigReconciler) reconcileKubeconfig(ctx context.Context, scope *Scope) (ctrl.Result, error) {
562-
logger := r.Log.WithValues("cluster", scope.Cluster.Name, "namespace", scope.Cluster.Namespace)
563-
564-
_, err := secret.Get(ctx, r.Client, util.ObjectKey(scope.Cluster), secret.Kubeconfig)
565-
switch {
566-
case apierrors.IsNotFound(err):
567-
if err := kubeconfig.CreateSecret(ctx, r.Client, scope.Cluster); err != nil {
568-
if errors.Is(err, kubeconfig.ErrDependentCertificateNotFound) {
569-
logger.Info("could not find secret for cluster, requeuing", "secret", secret.ClusterCA)
570-
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
571-
}
572-
return ctrl.Result{}, err
573-
}
574-
case err != nil:
575-
return ctrl.Result{}, fmt.Errorf("failed to retrieve Kubeconfig Secret for Cluster %q in namespace %q: %w", scope.Cluster.Name, scope.Cluster.Namespace, err)
576-
}
577-
578-
return ctrl.Result{}, nil
579-
}
580-
581556
func (r *KThreesConfigReconciler) reconcileTopLevelObjectSettings(_ *clusterv1.Cluster, machine *clusterv1.Machine, config *bootstrapv1.KThreesConfig) {
582557
log := r.Log.WithValues("kthreesconfig", fmt.Sprintf("%s/%s", config.Namespace, config.Name))
583558

controlplane/controllers/kthreescontrolplane_controller.go

Lines changed: 30 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -124,9 +124,7 @@ func (r *KThreesControlPlaneReconciler) Reconcile(ctx context.Context, req ctrl.
124124
// because the main defer may take too much time to get cluster status
125125
// Patch ObservedGeneration only if the reconciliation completed successfully
126126
patchOpts := []patch.Option{}
127-
if err == nil {
128-
patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{})
129-
}
127+
patchOpts = append(patchOpts, patch.WithStatusObservedGeneration{})
130128
if err := patchHelper.Patch(ctx, kcp, patchOpts...); err != nil {
131129
logger.Error(err, "Failed to patch KThreesControlPlane to add finalizer")
132130
return reconcile.Result{}, err
@@ -403,6 +401,35 @@ func (r *KThreesControlPlaneReconciler) updateStatus(ctx context.Context, kcp *c
403401
conditions.MarkTrue(kcp, controlplanev1.AvailableCondition)
404402
}
405403

404+
// Surface lastRemediation data in status.
405+
// LastRemediation is the remediation currently in progress, in any, or the
406+
// most recent of the remediation we are keeping track on machines.
407+
var lastRemediation *RemediationData
408+
409+
if v, ok := controlPlane.KCP.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok {
410+
remediationData, err := RemediationDataFromAnnotation(v)
411+
if err != nil {
412+
return err
413+
}
414+
lastRemediation = remediationData
415+
} else {
416+
for _, m := range controlPlane.Machines.UnsortedList() {
417+
if v, ok := m.Annotations[controlplanev1.RemediationForAnnotation]; ok {
418+
remediationData, err := RemediationDataFromAnnotation(v)
419+
if err != nil {
420+
return err
421+
}
422+
if lastRemediation == nil || lastRemediation.Timestamp.Time.Before(remediationData.Timestamp.Time) {
423+
lastRemediation = remediationData
424+
}
425+
}
426+
}
427+
}
428+
429+
if lastRemediation != nil {
430+
controlPlane.KCP.Status.LastRemediation = lastRemediation.ToStatus()
431+
}
432+
406433
return nil
407434
}
408435

controlplane/controllers/machine_controller.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -77,10 +77,10 @@ func (r *MachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
7777
return ctrl.Result{}, nil
7878
}
7979

80-
// if machine registered PreTerminate hook, wait for capi to drain and deattach volume, then remove etcd member
80+
// if machine registered PreTerminate hook, wait for capi asks to resolve PreTerminateDeleteHook
8181
if annotations.HasWithPrefix(clusterv1.PreTerminateDeleteHookAnnotationPrefix, m.ObjectMeta.Annotations) &&
8282
m.ObjectMeta.Annotations[clusterv1.PreTerminateDeleteHookAnnotationPrefix] == k3sHookName {
83-
if !conditions.IsTrue(m, clusterv1.DrainingSucceededCondition) || !conditions.IsTrue(m, clusterv1.VolumeDetachSucceededCondition) {
83+
if !conditions.IsFalse(m, clusterv1.PreTerminateDeleteHookSucceededCondition) {
8484
logger.Info("wait for machine drain and detech volume operation complete.")
8585
return ctrl.Result{}, nil
8686
}
@@ -106,7 +106,9 @@ func (r *MachineReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
106106
logger.Info("wait k3s embedded etcd controller to remove etcd")
107107
return ctrl.Result{Requeue: true}, err
108108
}
109-
logger.Info("etcd remove etcd member succeeded", "node", m.Status.NodeRef.Name)
109+
110+
// It is possible that the machine has no machine ref yet, will record the machine name in log
111+
logger.Info("etcd remove etcd member succeeded", "machine name", m.Name)
110112

111113
patchHelper, err := patch.NewHelper(m, r.Client)
112114
if err != nil {

controlplane/controllers/remediation.go

Lines changed: 68 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,7 @@ import (
3030
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
3131
"sigs.k8s.io/cluster-api/util"
3232
"sigs.k8s.io/cluster-api/util/annotations"
33+
"sigs.k8s.io/cluster-api/util/collections"
3334
"sigs.k8s.io/cluster-api/util/conditions"
3435
"sigs.k8s.io/cluster-api/util/patch"
3536
ctrl "sigs.k8s.io/controller-runtime"
@@ -41,7 +42,7 @@ import (
4142
// reconcileUnhealthyMachines tries to remediate KThreesControlPlane unhealthy machines
4243
// based on the process described in https://github.com/kubernetes-sigs/cluster-api/blob/main/docs/proposals/20191017-kubeadm-based-control-plane.md#remediation-using-delete-and-recreate
4344
// taken from the kubeadm codebase and adapted for the k3s provider.
44-
func (r *KThreesControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.Context, controlPlane *k3s.ControlPlane) (ret ctrl.Result, retErr error) {
45+
func (r *KThreesControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.Context, controlPlane *k3s.ControlPlane) (ret ctrl.Result, retErr error) { //nolint:gocyclo
4546
log := ctrl.LoggerFrom(ctx)
4647
reconciliationTime := time.Now().UTC()
4748

@@ -81,12 +82,13 @@ func (r *KThreesControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
8182
return ctrl.Result{}, nil
8283
}
8384

84-
// Select the machine to be remediated, which is the oldest machine marked as unhealthy.
85+
// Select the machine to be remediated, which is the oldest machine marked as unhealthy not yet provisioned (if any)
86+
// or the oldest machine marked as unhealthy.
8587
//
8688
// NOTE: The current solution is considered acceptable for the most frequent use case (only one unhealthy machine),
8789
// however, in the future this could potentially be improved for the scenario where more than one unhealthy machine exists
8890
// by considering which machine has lower impact on etcd quorum.
89-
machineToBeRemediated := unhealthyMachines.Oldest()
91+
machineToBeRemediated := getMachineToBeRemediated(unhealthyMachines)
9092

9193
// Returns if the machine is in the process of being deleted.
9294
if !machineToBeRemediated.ObjectMeta.DeletionTimestamp.IsZero() {
@@ -147,6 +149,13 @@ func (r *KThreesControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
147149
return ctrl.Result{}, nil
148150
}
149151

152+
// The cluster MUST NOT have healthy machines still being provisioned. This rule prevents KCP taking actions while the cluster is in a transitional state.
153+
if controlPlane.HasHealthyMachineStillProvisioning() {
154+
log.Info("A control plane machine needs remediation, but there are other control-plane machines being provisioned. Skipping remediation")
155+
conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP waiting for control plane machine provisioning to complete before triggering remediation")
156+
return ctrl.Result{}, nil
157+
}
158+
150159
// The cluster MUST have no machines with a deletion timestamp. This rule prevents KCP taking actions while the cluster is in a transitional state.
151160
if controlPlane.HasDeletingMachine() {
152161
log.Info("A control plane machine needs remediation, but there are other control-plane machines being deleted. Skipping remediation")
@@ -157,7 +166,11 @@ func (r *KThreesControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
157166
// Remediation MUST preserve etcd quorum. This rule ensures that KCP will not remove a member that would result in etcd
158167
// losing a majority of members and thus become unable to field new requests.
159168
if controlPlane.IsEtcdManaged() {
160-
canSafelyRemediate := r.canSafelyRemoveEtcdMember(ctx, controlPlane, machineToBeRemediated)
169+
canSafelyRemediate, err := r.canSafelyRemoveEtcdMember(ctx, controlPlane, machineToBeRemediated)
170+
if err != nil {
171+
conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.RemediationFailedReason, clusterv1.ConditionSeverityError, err.Error())
172+
return ctrl.Result{}, err
173+
}
161174
if !canSafelyRemediate {
162175
log.Info("A control plane machine needs remediation, but removing this machine could result in etcd quorum loss. Skipping remediation")
163176
conditions.MarkFalse(machineToBeRemediated, clusterv1.MachineOwnerRemediatedCondition, clusterv1.WaitingForRemediationReason, clusterv1.ConditionSeverityWarning, "KCP can't remediate this machine because this could result in etcd loosing quorum")
@@ -232,6 +245,16 @@ func (r *KThreesControlPlaneReconciler) reconcileUnhealthyMachines(ctx context.C
232245
return ctrl.Result{Requeue: true}, nil
233246
}
234247

248+
// Gets the machine to be remediated, which is the oldest machine marked as unhealthy not yet provisioned (if any)
249+
// or the oldest machine marked as unhealthy.
250+
func getMachineToBeRemediated(unhealthyMachines collections.Machines) *clusterv1.Machine {
251+
machineToBeRemediated := unhealthyMachines.Filter(collections.Not(collections.HasNode())).Oldest()
252+
if machineToBeRemediated == nil {
253+
machineToBeRemediated = unhealthyMachines.Oldest()
254+
}
255+
return machineToBeRemediated
256+
}
257+
235258
// checkRetryLimits checks if KCP is allowed to remediate considering retry limits:
236259
// - Remediation cannot happen because retryPeriod is not yet expired.
237260
// - KCP already reached the maximum number of retries for a machine.
@@ -346,10 +369,23 @@ func max(x, y time.Duration) time.Duration {
346369
// as well as reconcileControlPlaneConditions before this.
347370
//
348371
// adapted from kubeadm controller and makes the assumption that the set of controplane nodes equals the set of etcd nodes.
349-
func (r *KThreesControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Context, controlPlane *k3s.ControlPlane, machineToBeRemediated *clusterv1.Machine) bool {
372+
func (r *KThreesControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Context, controlPlane *k3s.ControlPlane, machineToBeRemediated *clusterv1.Machine) (bool, error) {
350373
log := ctrl.LoggerFrom(ctx)
351374

352-
currentTotalMembers := len(controlPlane.Machines)
375+
workloadCluster, err := r.managementCluster.GetWorkloadCluster(ctx, util.ObjectKey(controlPlane.Cluster))
376+
if err != nil {
377+
return false, errors.Wrapf(err, "failed to get client for workload cluster %s", controlPlane.Cluster.Name)
378+
}
379+
380+
// Gets the etcd status
381+
382+
// This makes it possible to have a set of etcd members status different from the MHC unhealthy/unhealthy conditions.
383+
etcdMembers, err := workloadCluster.EtcdMembers(ctx)
384+
if err != nil {
385+
return false, errors.Wrapf(err, "failed to get etcdStatus for workload cluster %s", controlPlane.Cluster.Name)
386+
}
387+
388+
currentTotalMembers := len(etcdMembers)
353389

354390
log.Info("etcd cluster before remediation",
355391
"currentTotalMembers", currentTotalMembers)
@@ -360,23 +396,43 @@ func (r *KThreesControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Co
360396

361397
healthyMembers := []string{}
362398
unhealthyMembers := []string{}
363-
for _, m := range controlPlane.Machines {
399+
for _, etcdMember := range etcdMembers {
364400
// Skip the machine to be deleted because it won't be part of the target etcd cluster.
365-
if machineToBeRemediated.Status.NodeRef != nil && machineToBeRemediated.Status.NodeRef.Name == m.Status.NodeRef.Name {
401+
if machineToBeRemediated.Status.NodeRef != nil && machineToBeRemediated.Status.NodeRef.Name == etcdMember {
366402
continue
367403
}
368404

369405
// Include the member in the target etcd cluster.
370406
targetTotalMembers++
371407

408+
// Search for the machine corresponding to the etcd member.
409+
var machine *clusterv1.Machine
410+
for _, m := range controlPlane.Machines {
411+
if m.Status.NodeRef != nil && m.Status.NodeRef.Name == etcdMember {
412+
machine = m
413+
break
414+
}
415+
}
416+
417+
// If an etcd member does not have a corresponding machine it is not possible to retrieve etcd member health,
418+
// so KCP is assuming the worst scenario and considering the member unhealthy.
419+
//
420+
// NOTE: This should not happen given that KCP is running reconcileEtcdMembers before calling this method.
421+
if machine == nil {
422+
log.Info("An etcd member does not have a corresponding machine, assuming this member is unhealthy", "MemberName", etcdMember)
423+
targetUnhealthyMembers++
424+
unhealthyMembers = append(unhealthyMembers, fmt.Sprintf("%s (no machine)", etcdMember))
425+
continue
426+
}
427+
372428
// Check member health as reported by machine's health conditions
373-
if !conditions.IsTrue(m, controlplanev1.MachineEtcdMemberHealthyCondition) {
429+
if !conditions.IsTrue(machine, controlplanev1.MachineEtcdMemberHealthyCondition) {
374430
targetUnhealthyMembers++
375-
unhealthyMembers = append(unhealthyMembers, fmt.Sprintf("%s (%s)", m.Status.NodeRef.Name, m.Name))
431+
unhealthyMembers = append(unhealthyMembers, fmt.Sprintf("%s (%s)", etcdMember, machine.Name))
376432
continue
377433
}
378434

379-
healthyMembers = append(healthyMembers, fmt.Sprintf("%s (%s)", m.Status.NodeRef.Name, m.Name))
435+
healthyMembers = append(healthyMembers, fmt.Sprintf("%s (%s)", etcdMember, machine.Name))
380436
}
381437

382438
// See https://etcd.io/docs/v3.3/faq/#what-is-failure-tolerance for fault tolerance formula explanation.
@@ -391,7 +447,7 @@ func (r *KThreesControlPlaneReconciler) canSafelyRemoveEtcdMember(ctx context.Co
391447
"targetUnhealthyMembers", targetUnhealthyMembers,
392448
"canSafelyRemediate", canSafelyRemediate)
393449

394-
return canSafelyRemediate
450+
return canSafelyRemediate, nil
395451
}
396452

397453
// RemediationData struct is used to keep track of information stored in the RemediationInProgressAnnotation in KCP

controlplane/controllers/scale.go

Lines changed: 16 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -377,16 +377,31 @@ func (r *KThreesControlPlaneReconciler) generateMachine(ctx context.Context, kcp
377377
},
378378
}
379379

380+
annotations := map[string]string{}
381+
380382
// Machine's bootstrap config may be missing ClusterConfiguration if it is not the first machine in the control plane.
381383
// We store ClusterConfiguration as annotation here to detect any changes in KCP ClusterConfiguration and rollout the machine if any.
382384
serverConfig, err := json.Marshal(kcp.Spec.KThreesConfigSpec.ServerConfig)
383385
if err != nil {
384386
return fmt.Errorf("failed to marshal cluster configuration: %w", err)
385387
}
386-
machine.SetAnnotations(map[string]string{controlplanev1.KThreesServerConfigurationAnnotation: string(serverConfig)})
388+
annotations[controlplanev1.KThreesServerConfigurationAnnotation] = string(serverConfig)
389+
390+
// In case this machine is being created as a consequence of a remediation, then add an annotation
391+
// tracking remediating data.
392+
// NOTE: This is required in order to track remediation retries.
393+
if remediationData, ok := kcp.Annotations[controlplanev1.RemediationInProgressAnnotation]; ok {
394+
annotations[controlplanev1.RemediationForAnnotation] = remediationData
395+
}
396+
397+
machine.SetAnnotations(annotations)
387398

388399
if err := r.Client.Create(ctx, machine); err != nil {
389400
return fmt.Errorf("failed to create machine: %w", err)
390401
}
402+
403+
// Remove the annotation tracking that a remediation is in progress (the remediation completed when
404+
// the replacement machine has been created above).
405+
delete(kcp.Annotations, controlplanev1.RemediationInProgressAnnotation)
391406
return nil
392407
}

pkg/k3s/control_plane.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,11 @@ func (c *ControlPlane) NeedsReplacementNode() bool {
267267
return len(c.Machines)+1 == int(*c.KCP.Spec.Replicas)
268268
}
269269

270+
// HasHealthyMachineStillProvisioning returns true if any healthy machine in the control plane is still in the process of being provisioned.
271+
func (c *ControlPlane) HasHealthyMachineStillProvisioning() bool {
272+
return len(c.HealthyMachines().Filter(collections.Not(collections.HasNode()))) > 0
273+
}
274+
270275
// HasDeletingMachine returns true if any machine in the control plane is in the process of being deleted.
271276
func (c *ControlPlane) HasDeletingMachine() bool {
272277
return len(c.Machines.Filter(collections.HasDeletionTimestamp)) > 0

pkg/k3s/workload_cluster_etcd.go

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@ package k3s
1818

1919
import (
2020
"context"
21-
"fmt"
2221

2322
"github.com/pkg/errors"
2423
corev1 "k8s.io/api/core/v1"
@@ -167,7 +166,8 @@ func (w *Workload) removeMemberForNode(ctx context.Context, name string) (bool,
167166
}
168167

169168
if removingNode.Name != name {
170-
return false, errors.New(fmt.Sprintf("node %s not found", name))
169+
// If the node is already removed, treat it as the etcd member is removed.
170+
return true, nil
171171
}
172172

173173
annotations := removingNode.GetAnnotations()
@@ -236,3 +236,38 @@ func (w *Workload) ForwardEtcdLeadership(ctx context.Context, machine *clusterv1
236236
}
237237
return nil
238238
}
239+
240+
// EtcdMembers returns the current set of members in an etcd cluster.
241+
// It will convert the etcd members to a list of node names,
242+
// and return a list of node names.
243+
//
244+
// NOTE: This methods uses control plane machines/nodes only to get in contact with etcd,
245+
// but then it relies on etcd as ultimate source of truth for the list of members.
246+
// This is intended to allow informed decisions on actions impacting etcd quorum.
247+
func (w *Workload) EtcdMembers(ctx context.Context) ([]string, error) {
248+
nodes, err := w.getControlPlaneNodes(ctx)
249+
if err != nil {
250+
return nil, errors.Wrap(err, "failed to list control plane nodes")
251+
}
252+
nodeNames := make([]string, 0, len(nodes.Items))
253+
for _, node := range nodes.Items {
254+
nodeNames = append(nodeNames, node.Name)
255+
}
256+
etcdClient, err := w.etcdClientGenerator.forLeader(ctx, nodeNames)
257+
if err != nil {
258+
return nil, errors.Wrap(err, "failed to create etcd client")
259+
}
260+
defer etcdClient.Close()
261+
262+
members, err := etcdClient.Members(ctx)
263+
if err != nil {
264+
return nil, errors.Wrap(err, "failed to list etcd members using etcd client")
265+
}
266+
267+
names := []string{}
268+
for _, member := range members {
269+
// Convert etcd member to node name
270+
names = append(names, etcdutil.NodeNameFromMember(member))
271+
}
272+
return names, nil
273+
}

0 commit comments

Comments
 (0)