Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 10 additions & 5 deletions operator/cmd/run/run.go
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,7 @@ func Command() *cobra.Command {
ghostbuster bool
unbindPVCsAfter time.Duration
unbinderSelector LabelSelectorValue
allowPVRebinding bool
autoDeletePVCs bool
webhookCertPath string
webhookCertName string
Expand Down Expand Up @@ -213,6 +214,7 @@ func Command() *cobra.Command {
ghostbuster,
unbindPVCsAfter,
unbinderSelector.Selector,
allowPVRebinding,
autoDeletePVCs,
pprofAddr,
webhookCertPath,
Expand Down Expand Up @@ -263,6 +265,7 @@ func Command() *cobra.Command {
cmd.Flags().StringSliceVar(&additionalControllers, "additional-controllers", []string{""}, fmt.Sprintf("which controllers to run, available: all, %s", strings.Join(availableControllers, ", ")))
cmd.Flags().BoolVar(&operatorMode, "operator-mode", true, "enables to run as an operator, setting this to false will disable cluster (deprecated), redpanda resources reconciliation.")
cmd.Flags().DurationVar(&unbindPVCsAfter, "unbind-pvcs-after", 0, "if not zero, runs the PVCUnbinder controller which attempts to 'unbind' the PVCs' of Pods that are Pending for longer than the given duration")
cmd.Flags().BoolVar(&allowPVRebinding, "allow-pv-rebinding", false, "controls whether or not PVs unbound by the PVCUnbinder have their .ClaimRef cleared, which allows them to be reused")
cmd.Flags().Var(&unbinderSelector, "unbinder-label-selector", "if provided, a Kubernetes label selector that will filter Pods to be considered by the PVCUnbinder.")
cmd.Flags().BoolVar(&autoDeletePVCs, "auto-delete-pvcs", false, "Use StatefulSet PersistentVolumeClaimRetentionPolicy to auto delete PVCs on scale down and Cluster resource delete.")
cmd.Flags().StringVar(&webhookCertPath, "webhook-cert-path", "", "The directory that contains the webhook certificate.")
Expand Down Expand Up @@ -329,6 +332,7 @@ func Run(
ghostbuster bool,
unbindPVCsAfter time.Duration,
unbinderSelector labels.Selector,
allowPVRebinding bool,
autoDeletePVCs bool,
pprofAddr string,
webhookCertPath string,
Expand Down Expand Up @@ -684,14 +688,15 @@ func Run(

// The unbinder gets to run in any mode, if it's enabled.
if unbindPVCsAfter <= 0 {
setupLog.Info("PVCUnbinder controller not active", "unbind-after", unbindPVCsAfter, "selector", unbinderSelector)
setupLog.Info("PVCUnbinder controller not active", "unbind-after", unbindPVCsAfter, "selector", unbinderSelector, "allow-pv-rebinding", allowPVRebinding)
} else {
setupLog.Info("starting PVCUnbinder controller", "unbind-after", unbindPVCsAfter, "selector", unbinderSelector)
setupLog.Info("starting PVCUnbinder controller", "unbind-after", unbindPVCsAfter, "selector", unbinderSelector, "allow-pv-rebinding", allowPVRebinding)

if err := (&pvcunbinder.Controller{
Client: mgr.GetClient(),
Timeout: unbindPVCsAfter,
Selector: unbinderSelector,
Client: mgr.GetClient(),
Timeout: unbindPVCsAfter,
Selector: unbinderSelector,
AllowRebinding: allowPVRebinding,
}).SetupWithManager(mgr); err != nil {
setupLog.Error(err, "unable to create controller", "controller", "PVCUnbinder")
return err
Expand Down
72 changes: 66 additions & 6 deletions operator/internal/controller/pvcunbinder/pvcunbinder.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ package pvcunbinder

import (
"context"
"fmt"
"regexp"
"slices"
"strings"
Expand Down Expand Up @@ -44,7 +45,11 @@ var schedulingFailureRE = regexp.MustCompile(`(^0/[1-9]\d* nodes are available)|
// 2. Ensure that all PVs in question have a Retain policy
// 3. Delete all PVCs from step 1. (PVCs are immutable after creation,
// deletion is the only option)
// 4. Deleting the Pod to re-trigger PVC creation and rebinding.
// 4. (Optionally) "Recycle" all PVs from step 1 by clearing the ClaimRef.
// Kubernetes will only consider binding PVs that have a satisfiable
// NodeAffinity. By "recycling" we permit Flakey Nodes to rejoin the cluster
// which _might_ reclaim the now freed volume.
// 5. Deleting the Pod to re-trigger PVC creation and rebinding.
type Controller struct {
Client client.Client
// Timeout is the duration a Pod must be stuck in Pending before
Expand All @@ -57,6 +62,15 @@ type Controller struct {
// Reconciler will consider for remediation via some sort of filtering
// function.
Filter func(ctx context.Context, pod *corev1.Pod) (bool, error)
// AllowRebinding optionally enables clearing of the unbound PV's ClaimRef
// which effectively makes the PVs "re-bindable" if the underlying Node
// become capable of scheduling Pods once again.
// NOTE: This option can present problems when a Node's name is reused and
// using HostPath volumes and LocalPathProvisioner. In such a case, the
// helper Pod of LocalPathProvisioner will NOT run a second time as the
// Volume is assumed to exist. This can lead to Permission errors or
// referencing a directory that does not exist.
AllowRebinding bool
}

func FilterPodOwner(ownerNamespace, ownerName string) func(ctx context.Context, pod *corev1.Pod) (bool, error) {
Expand Down Expand Up @@ -130,9 +144,9 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
return ctrl.Result{}, err
}

// Filter PVs down to ones that are:
// 1. Bound to a PVC we care about.
// 2. Have a NodeAffinity (which we assume is the cause of our Pod being in Pending)
// 1. Filter PVs down to ones that are:
// - Bound to a PVC we care about.
// - Have a NodeAffinity (which we assume is the cause of our Pod being in Pending)
var pvs []*corev1.PersistentVolume
for i := range pvList.Items {
pv := &pvList.Items[i]
Expand Down Expand Up @@ -161,14 +175,14 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
pvs = append(pvs, pv)
}

// 3. Ensure that all PVs have reclaim set to Retain
// 2. Ensure that all PVs have reclaim set to Retain
for _, pv := range pvs {
if err := r.ensureRetainPolicy(ctx, pv); err != nil {
return ctrl.Result{}, err
}
}

// 4. Delete all Bound PVCs
// 3. Delete all Bound PVCs
for key, pvc := range pvcByKey {
if pvc == nil || pvc.Spec.VolumeName == "" {
continue
Expand All @@ -188,6 +202,14 @@ func (r *Controller) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Resu
pvcByKey[key] = nil
}

// 4. "Recycle" PVs that have been released. Technically optional, this
// allows disks to rebind if a Node happens to recover.
for _, pv := range pvs {
if err := r.maybeRecyclePersistentVolume(ctx, pv); err != nil {
return ctrl.Result{}, err
}
}

missingPVCs := false
for _, pvc := range pvcByKey {
if pvc == nil {
Expand Down Expand Up @@ -231,6 +253,44 @@ func (r *Controller) ensureRetainPolicy(ctx context.Context, pv *corev1.Persiste
return nil
}

// maybeRecyclePersistentVolume "recycles" a released PV by clearing it's .ClaimRef
// which makes it available for binding once again IF AllowRebinding is true.
// This strategy is only valid for volumes that utilize .HostPath or .Local.
func (r *Controller) maybeRecyclePersistentVolume(ctx context.Context, pv *corev1.PersistentVolume) error {
// This case should never hit as we filter out such PVs earlier in the
// controller though it's likely we don't handle such cases well aside from
// not unbinding them.
// TODO(chrisseto): Remove this check and add better clarify the expected
// behavior of this controller if it encounters network backed disks.
if pv.Spec.HostPath == nil && pv.Spec.Local == nil {
return fmt.Errorf("%T must specify .Spec.HostPath or .Spec.Local for recycling: %q", pv, pv.Name)
}

// NB: We handle this flag here to ensure we get explicit the log messages
// for all PVs we would have cleared the ClaimRef of.
if !r.AllowRebinding {
log.FromContext(ctx).Info("Skipping .ClaimRef clearing of PersistentVolume", "name", pv.Name, "AllowRebinding", r.AllowRebinding)
return nil
}

// Skip over unbound PVs.
if pv.Spec.ClaimRef == nil {
return nil
}

log.FromContext(ctx).Info("Clearing .ClaimRef of PersistentVolume", "name", pv.Name, "AllowRebinding", r.AllowRebinding)

// NB: We explicitly don't use an optimistic lock here as the control plane
// will likely have updated this PV's Status to indicate that it's now
// Released.
patch := client.StrategicMergeFrom(pv.DeepCopy())
pv.Spec.ClaimRef = nil
if err := r.Client.Patch(ctx, pv, patch); err != nil {
return err
}
return nil
}

func (r *Controller) ShouldRemediate(ctx context.Context, pod *corev1.Pod) (bool, time.Duration) {
if r.Selector != nil && !r.Selector.Matches(labels.Set(pod.Labels)) {
log.FromContext(ctx).Info("selector not satisfied; skipping", "name", pod.Name, "labels", pod.Labels, "selector", r.Selector.String())
Expand Down