Skip to content

Commit bae05ed

Browse files
birdayzRafalKorepta
authored andcommitted
operator v1: scheduled requeue for ghost broker decommissioner
Sometimes, it does not get triggered. Only after up to 10h, the reconciler gets called (syncPeriod). The recommended way to do a schedule is RequeueAfter: kubernetes-sigs/kubebuilder#1015 (comment) (cherry picked from commit dc8996c)
1 parent 6f251cd commit bae05ed

File tree

2 files changed

+47
-32
lines changed

2 files changed

+47
-32
lines changed

operator/cmd/run/run.go

Lines changed: 36 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -129,37 +129,38 @@ func (s *LabelSelectorValue) Type() string {
129129

130130
func Command() *cobra.Command {
131131
var (
132-
clusterDomain string
133-
metricsAddr string
134-
secureMetrics bool
135-
enableHTTP2 bool
136-
probeAddr string
137-
pprofAddr string
138-
enableLeaderElection bool
139-
webhookEnabled bool
140-
configuratorBaseImage string
141-
configuratorTag string
142-
configuratorImagePullPolicy string
143-
decommissionWaitInterval time.Duration
144-
metricsTimeout time.Duration
145-
restrictToRedpandaVersion string
146-
namespace string
147-
additionalControllers []string
148-
operatorMode bool
149-
enableHelmControllers bool
150-
ghostbuster bool
151-
unbindPVCsAfter time.Duration
152-
unbinderSelector LabelSelectorValue
153-
autoDeletePVCs bool
154-
forceDefluxedMode bool
155-
helmRepositoryURL string
156-
webhookCertPath string
157-
webhookCertName string
158-
webhookCertKey string
159-
metricsCertPath string
160-
metricsCertName string
161-
metricsCertKey string
162-
enableGhostBrokerDecommissioner bool
132+
clusterDomain string
133+
metricsAddr string
134+
secureMetrics bool
135+
enableHTTP2 bool
136+
probeAddr string
137+
pprofAddr string
138+
enableLeaderElection bool
139+
webhookEnabled bool
140+
configuratorBaseImage string
141+
configuratorTag string
142+
configuratorImagePullPolicy string
143+
decommissionWaitInterval time.Duration
144+
metricsTimeout time.Duration
145+
restrictToRedpandaVersion string
146+
namespace string
147+
additionalControllers []string
148+
operatorMode bool
149+
enableHelmControllers bool
150+
ghostbuster bool
151+
unbindPVCsAfter time.Duration
152+
unbinderSelector LabelSelectorValue
153+
autoDeletePVCs bool
154+
forceDefluxedMode bool
155+
helmRepositoryURL string
156+
webhookCertPath string
157+
webhookCertName string
158+
webhookCertKey string
159+
metricsCertPath string
160+
metricsCertName string
161+
metricsCertKey string
162+
enableGhostBrokerDecommissioner bool
163+
ghostBrokerDecommissionerSyncPeriod time.Duration
163164
)
164165

165166
cmd := &cobra.Command{
@@ -201,6 +202,7 @@ func Command() *cobra.Command {
201202
metricsCertName,
202203
metricsCertKey,
203204
enableGhostBrokerDecommissioner,
205+
ghostBrokerDecommissionerSyncPeriod,
204206
)
205207
},
206208
}
@@ -246,6 +248,7 @@ func Command() *cobra.Command {
246248
cmd.Flags().StringVar(&metricsCertName, "metrics-cert-name", "tls.crt", "The name of the metrics server certificate file.")
247249
cmd.Flags().StringVar(&metricsCertKey, "metrics-cert-key", "tls.key", "The name of the metrics server key file.")
248250
cmd.Flags().BoolVar(&enableGhostBrokerDecommissioner, "enable-ghost-broker-decommissioner", false, "Enable ghost broker decommissioner.")
251+
cmd.Flags().DurationVar(&ghostBrokerDecommissionerSyncPeriod, "ghost-broker-decommissioner-sync-period", time.Minute*5, "Ghost broker sync period. The Ghost Broker Decommissioner is guaranteed to be called after this period.")
249252

250253
// 3rd party flags.
251254
clientOptions.BindFlags(cmd.Flags())
@@ -307,6 +310,7 @@ func Run(
307310
metricsCertName string,
308311
metricsCertKey string,
309312
enableGhostBrokerDecommissioner bool,
313+
ghostBrokerDecommissionerSyncPeriod time.Duration,
310314
) error {
311315
setupLog := ctrl.LoggerFrom(ctx).WithName("setup")
312316

@@ -671,6 +675,7 @@ func Run(
671675

672676
if enableGhostBrokerDecommissioner {
673677
d := decommissioning.NewStatefulSetDecommissioner(mgr, &v1Fetcher{client: mgr.GetClient()},
678+
decommissioning.WithSyncPeriod(ghostBrokerDecommissionerSyncPeriod),
674679
decommissioning.WithCleanupPVCs(false),
675680
decommissioning.WithFactory(internalclient.NewFactory(mgr.GetConfig(), mgr.GetClient())),
676681
decommissioning.WithFilter(func(ctx context.Context, sts *appsv1.StatefulSet) (bool, error) {

operator/internal/controller/decommissioning/statefulset_decomissioner.go

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,12 @@ func WithCleanupPVCs(cleanup bool) Option {
117117
}
118118
}
119119

120+
func WithSyncPeriod(d time.Duration) Option {
121+
return func(ssd *StatefulSetDecomissioner) {
122+
ssd.syncPeriod = d
123+
}
124+
}
125+
120126
type StatefulSetDecomissioner struct {
121127
client client.Client
122128
factory internalclient.ClientFactory
@@ -129,6 +135,7 @@ type StatefulSetDecomissioner struct {
129135
delayedVolumeCache *CategorizedDelayedCache[types.NamespacedName, types.NamespacedName]
130136
filter func(ctx context.Context, set *appsv1.StatefulSet) (bool, error)
131137
cleanupPVCs bool
138+
syncPeriod time.Duration
132139
}
133140

134141
func NewStatefulSetDecommissioner(mgr ctrl.Manager, fetcher Fetcher, options ...Option) *StatefulSetDecomissioner {
@@ -260,7 +267,10 @@ func (s *StatefulSetDecomissioner) Reconcile(ctx context.Context, req ctrl.Reque
260267
return ctrl.Result{RequeueAfter: timeout}, nil
261268
}
262269

263-
return ctrl.Result{}, nil
270+
// If a schedule is configured, just requeue with this period.
271+
// Sometimes, the node disappears and no event about sts/pod is fired, or fired before we can actually decommission the ghost broker.
272+
// Running the reconciler regularly ensures we're not missing any ghost brokers.
273+
return ctrl.Result{RequeueAfter: s.syncPeriod}, nil
264274
}
265275

266276
// Decommission decommissions any stray resources for a StatefulSet. This includes:

0 commit comments

Comments
 (0)