Skip to content

Commit cb32cb8

Browse files
committed
cvo: use early gate checker to avoid delayed initialization
Previous changes prepared the code for this switch by adding code to detect enabled CVO gates and featureset early in the CVO execution, right before the controllers are initialized (well before CVO acquires its leader lease and actually starts the controllers). These early detected gate checker was not used anywhere yet. This change makes CVO stop using its late detected gate checker (detected while loading its initial payload) and start using the early one instead. This eliminates the need for the panicking gate checker that was used to guard against checking gates before the actual checker could be established. The change also allows to simplify some code layers. Late initialization on initial payload load is now only necessary for the actual CVO controller, not the featurechangestopper (it is already initialized at that time), so the `Context` layer does not need to coordinate anymore. The starting features set can be initialized in CVO right away, avoiding the need to wire it through call chains; it can simply be an `Operator` member.
1 parent b83ac37 commit cb32cb8

File tree

6 files changed

+67
-128
lines changed

6 files changed

+67
-128
lines changed

pkg/cvo/cvo.go

Lines changed: 29 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,19 @@ type Operator struct {
174174
// via annotation
175175
exclude string
176176

177+
// requiredFeatureSet is the feature set that was detected in the cluster when CVO was started and is used
178+
// to select the manifests that will be applied in the cluster. The starting value cannot be changed in the executing
179+
// CVO but the featurechangestopper controller will detect a feature set change in the cluster and shutdown the CVO.
180+
// Enforcing featuresets is a standard GA CVO behavior that supports the feature gating functionality across the whole
181+
// cluster, as opposed to the enabledFeatureGates which controls what gated behaviors of CVO itself are enabled by
182+
// the cluster feature gates.
183+
// See: https://github.com/openshift/enhancements/blob/master/enhancements/update/cvo-techpreview-manifests.md
184+
requiredFeatureSet configv1.FeatureSet
185+
186+
// enabledFeatureGates is the checker for what gated CVO behaviors are enabled or disabled by specific cluster-level
187+
// feature gates. It allows multiplexing the cluster-level feature gates to more granular CVO-level gates. Similarly
188+
// to the requiredFeatureSet, the enabledFeatureGates cannot be changed in the executing CVO but the
189+
// featurechangestopper controller will detect when cluster feature gate config changes and shutdown the CVO.
177190
enabledFeatureGates featuregates.CvoGateChecker
178191

179192
clusterProfile string
@@ -210,6 +223,8 @@ func New(
210223
injectClusterIdIntoPromQL bool,
211224
updateService string,
212225
alwaysEnableCapabilities []configv1.ClusterVersionCapability,
226+
featureSet configv1.FeatureSet,
227+
cvoGates featuregates.CvoGateChecker,
213228
) (*Operator, error) {
214229
eventBroadcaster := record.NewBroadcaster()
215230
eventBroadcaster.StartLogging(klog.Infof)
@@ -243,10 +258,9 @@ func New(
243258
conditionRegistry: standard.NewConditionRegistry(promqlTarget),
244259
injectClusterIdIntoPromQL: injectClusterIdIntoPromQL,
245260

246-
// Because of OCPBUGS-30080, we can only detect the enabled feature gates after Operator loads the initial payload
247-
// from disk via LoadInitialPayload. We must not have any gate-checking code until that happens, so we initialize
248-
// this field with a checker that panics when used.
249-
enabledFeatureGates: featuregates.PanicOnUsageBeforeInitialization,
261+
requiredFeatureSet: featureSet,
262+
enabledFeatureGates: cvoGates,
263+
250264
alwaysEnableCapabilities: alwaysEnableCapabilities,
251265
}
252266

@@ -283,7 +297,7 @@ func New(
283297

284298
// LoadInitialPayload waits until a ClusterVersion object exists. It then retrieves the payload contents, verifies the
285299
// initial state and returns it. If the payload is invalid, an error is returned.
286-
func (optr *Operator) LoadInitialPayload(ctx context.Context, startingRequiredFeatureSet configv1.FeatureSet, restConfig *rest.Config) (*payload.Update, error) {
300+
func (optr *Operator) LoadInitialPayload(ctx context.Context, restConfig *rest.Config) (*payload.Update, error) {
287301

288302
// wait until cluster version object exists
289303
if err := wait.PollUntilContextCancel(ctx, 3*time.Second, true, func(ctx context.Context) (bool, error) {
@@ -303,7 +317,7 @@ func (optr *Operator) LoadInitialPayload(ctx context.Context, startingRequiredFe
303317
return nil, fmt.Errorf("Error when attempting to get cluster version object: %w", err)
304318
}
305319

306-
update, err := payload.LoadUpdate(optr.defaultPayloadDir(), optr.release.Image, optr.exclude, string(startingRequiredFeatureSet),
320+
update, err := payload.LoadUpdate(optr.defaultPayloadDir(), optr.release.Image, optr.exclude, string(optr.requiredFeatureSet),
307321
optr.clusterProfile, configv1.KnownClusterVersionCapabilities)
308322

309323
if err != nil {
@@ -339,8 +353,12 @@ func (optr *Operator) LoadInitialPayload(ctx context.Context, startingRequiredFe
339353

340354
// InitializeFromPayload configures the controller that loads and applies content to the cluster given an initial payload
341355
// and feature gate data.
342-
func (optr *Operator) InitializeFromPayload(update *payload.Update, requiredFeatureSet configv1.FeatureSet, cvoFlags featuregates.CvoGateChecker, restConfig *rest.Config, burstRestConfig *rest.Config) {
343-
optr.enabledFeatureGates = cvoFlags
356+
func (optr *Operator) InitializeFromPayload(ctx context.Context, restConfig *rest.Config, burstRestConfig *rest.Config) error {
357+
update, err := optr.LoadInitialPayload(ctx, restConfig)
358+
if err != nil {
359+
return err
360+
}
361+
344362
optr.release = update.Release
345363
optr.releaseCreated = update.ImageRef.CreationTimestamp.Time
346364

@@ -358,11 +376,13 @@ func (optr *Operator) InitializeFromPayload(update *payload.Update, requiredFeat
358376
Cap: time.Second * 15,
359377
},
360378
optr.exclude,
361-
requiredFeatureSet,
379+
optr.requiredFeatureSet,
362380
optr.eventRecorder,
363381
optr.clusterProfile,
364382
optr.alwaysEnableCapabilities,
365383
)
384+
385+
return nil
366386
}
367387

368388
// ownerReferenceModifier sets the owner reference to the current CV resource if no other reference exists. It also resets

pkg/featuregates/featurechangestopper.go

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -30,12 +30,16 @@ type ChangeStopper struct {
3030
}
3131

3232
// NewChangeStopper returns a new ChangeStopper.
33-
func NewChangeStopper(featureGateInformer configinformersv1.FeatureGateInformer) (*ChangeStopper, error) {
33+
func NewChangeStopper(featureGateInformer configinformersv1.FeatureGateInformer, requiredFeatureSet configv1.FeatureSet, cvoGates CvoGates) (*ChangeStopper, error) {
3434
c := &ChangeStopper{
3535
featureGateLister: featureGateInformer.Lister(),
3636
cacheSynced: []cache.InformerSynced{featureGateInformer.Informer().HasSynced},
3737
queue: workqueue.NewTypedRateLimitingQueueWithConfig[any](workqueue.DefaultTypedControllerRateLimiter[any](), workqueue.TypedRateLimitingQueueConfig[any]{Name: "feature-gate-stopper"}),
38+
39+
startingRequiredFeatureSet: &requiredFeatureSet,
40+
startingCvoGates: &cvoGates,
3841
}
42+
3943
c.queue.Add("cluster") // seed an initial sync, in case startingRequiredFeatureSet is wrong
4044
if _, err := featureGateInformer.Informer().AddEventHandler(cache.ResourceEventHandlerFuncs{
4145
AddFunc: func(_ interface{}) {
@@ -54,11 +58,6 @@ func NewChangeStopper(featureGateInformer configinformersv1.FeatureGateInformer)
5458
return c, nil
5559
}
5660

57-
func (c *ChangeStopper) SetStartingFeatures(requiredFeatureSet configv1.FeatureSet, cvoGates CvoGates) {
58-
c.startingRequiredFeatureSet = &requiredFeatureSet
59-
c.startingCvoGates = &cvoGates
60-
}
61-
6261
// syncHandler processes a single work entry, with the
6362
// processNextWorkItem caller handling the queue management. It returns
6463
// done when there will be no more work (because the feature gate changed).

pkg/featuregates/featurechangestopper_test.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,10 @@ func TestTechPreviewChangeStopper(t *testing.T) {
101101
client := fakeconfigv1client.NewSimpleClientset(fg)
102102

103103
informerFactory := configv1informer.NewSharedInformerFactory(client, 0)
104-
c, err := NewChangeStopper(informerFactory.Config().V1().FeatureGates())
104+
c, err := NewChangeStopper(informerFactory.Config().V1().FeatureGates(), tt.startingRequiredFeatureSet, tt.startingCvoFeatureGates)
105105
if err != nil {
106106
t.Fatal(err)
107107
}
108-
c.SetStartingFeatures(tt.startingRequiredFeatureSet, tt.startingCvoFeatureGates)
109108
informerFactory.Start(ctx.Done())
110109

111110
if err := c.Run(ctx, shutdownFn); err != nil {

pkg/featuregates/featuregates.go

Lines changed: 0 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -45,36 +45,6 @@ type CvoGateChecker interface {
4545
CVOConfiguration() bool
4646
}
4747

48-
type panicOnUsageBeforeInitializationFunc func()
49-
50-
func panicOnUsageBeforeInitialization() {
51-
panic("CVO feature flags were used before they were initialized")
52-
}
53-
54-
// PanicOnUsageBeforeInitialization is a CvoGateChecker that panics if any of its methods are called. This checker should
55-
// be used before CVO feature gates are actually known and some code tries to check them.
56-
var PanicOnUsageBeforeInitialization = panicOnUsageBeforeInitializationFunc(panicOnUsageBeforeInitialization)
57-
58-
func (p panicOnUsageBeforeInitializationFunc) ReconciliationIssuesCondition() bool {
59-
p()
60-
return false
61-
}
62-
63-
func (p panicOnUsageBeforeInitializationFunc) StatusReleaseArchitecture() bool {
64-
p()
65-
return false
66-
}
67-
68-
func (p panicOnUsageBeforeInitializationFunc) UnknownVersion() bool {
69-
p()
70-
return false
71-
}
72-
73-
func (p panicOnUsageBeforeInitializationFunc) CVOConfiguration() bool {
74-
p()
75-
return false
76-
}
77-
7848
// CvoGates contains flags that control CVO functionality gated by product feature gates. The
7949
// names do not correspond to product feature gates, the booleans here are "smaller" (product-level
8050
// gate will enable multiple CVO behaviors).

pkg/start/start.go

Lines changed: 17 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@ import (
3535
configv1 "github.com/openshift/api/config/v1"
3636
clientset "github.com/openshift/client-go/config/clientset/versioned"
3737
configinformers "github.com/openshift/client-go/config/informers/externalversions"
38-
configlistersv1 "github.com/openshift/client-go/config/listers/config/v1"
3938
operatorclientset "github.com/openshift/client-go/operator/clientset/versioned"
4039
operatorinformers "github.com/openshift/client-go/operator/informers/externalversions"
4140
"github.com/openshift/library-go/pkg/config/clusterstatus"
@@ -190,13 +189,13 @@ func (o *Options) Run(ctx context.Context) error {
190189
}
191190

192191
clusterVersionConfigInformerFactory, configInformerFactory := o.prepareConfigInformerFactories(cb)
193-
_, _, err = o.processInitialFeatureGate(ctx, configInformerFactory)
192+
startingFeatureSet, startingCvoGates, err := o.processInitialFeatureGate(ctx, configInformerFactory)
194193
if err != nil {
195194
return fmt.Errorf("error processing feature gates: %w", err)
196195
}
197196

198197
// initialize the controllers and attempt to load the payload information
199-
controllerCtx, err := o.NewControllerContext(cb, clusterVersionConfigInformerFactory, configInformerFactory)
198+
controllerCtx, err := o.NewControllerContext(cb, startingFeatureSet, startingCvoGates, clusterVersionConfigInformerFactory, configInformerFactory)
200199
if err != nil {
201200
return err
202201
}
@@ -244,7 +243,7 @@ func (o *Options) getOpenShiftVersion() string {
244243
return releaseMetadata.Version
245244
}
246245

247-
func (o *Options) processInitialFeatureGate(ctx context.Context, configInformerFactory configinformers.SharedInformerFactory) (configv1.FeatureSet, *featuregates.CvoGates, error) {
246+
func (o *Options) processInitialFeatureGate(ctx context.Context, configInformerFactory configinformers.SharedInformerFactory) (configv1.FeatureSet, featuregates.CvoGates, error) {
248247
featureGates := configInformerFactory.Config().V1().FeatureGates().Lister()
249248
configInformerFactory.Start(ctx.Done())
250249
configInformerFactory.WaitForCacheSync(ctx.Done())
@@ -284,17 +283,17 @@ func (o *Options) processInitialFeatureGate(ctx context.Context, configInformerF
284283
}
285284
}); err != nil {
286285
if lastError != nil {
287-
return "", nil, lastError
286+
return "", cvoGates, lastError
288287
}
289-
return "", nil, err
288+
return "", cvoGates, err
290289
}
291290

292291
if cvoGates.UnknownVersion() {
293292
klog.Warningf("CVO features for version %s could not be detected from FeatureGate; will use defaults plus special UnknownVersion feature gate", cvoOpenShiftVersion)
294293
}
295294
klog.Infof("CVO features for version %s enabled at startup: %+v", cvoOpenShiftVersion, cvoGates)
296295

297-
return startingFeatureSet, &cvoGates, nil
296+
return startingFeatureSet, cvoGates, nil
298297
}
299298

300299
// run launches a number of goroutines to handle manifest application,
@@ -362,7 +361,7 @@ func (o *Options) run(ctx context.Context, controllerCtx *Context, lock resource
362361
resultChannel <- asyncResult{name: "metrics server", error: err}
363362
}()
364363
}
365-
if err := controllerCtx.InitializeFromPayload(runContext, restConfig, burstRestConfig); err != nil {
364+
if err := controllerCtx.CVO.InitializeFromPayload(runContext, restConfig, burstRestConfig); err != nil {
366365
if firstError == nil {
367366
firstError = err
368367
}
@@ -577,13 +576,17 @@ type Context struct {
577576
// OperatorInformerFactory should be used to get informers / listers for code that works with resources from the
578577
// operator.openshift.io group
579578
OperatorInformerFactory operatorinformers.SharedInformerFactory
580-
581-
fgLister configlistersv1.FeatureGateLister
582579
}
583580

584581
// NewControllerContext initializes the default Context for the current Options. It does
585582
// not start any background processes.
586-
func (o *Options) NewControllerContext(cb *ClientBuilder, clusterVersionConfigInformerFactory, configInformerFactory configinformers.SharedInformerFactory) (*Context, error) {
583+
func (o *Options) NewControllerContext(
584+
cb *ClientBuilder,
585+
startingFeatureSet configv1.FeatureSet,
586+
startingCvoGates featuregates.CvoGates,
587+
clusterVersionConfigInformerFactory,
588+
configInformerFactory configinformers.SharedInformerFactory,
589+
) (*Context, error) {
587590
kubeClient := cb.KubeClientOrDie(internal.ConfigNamespace, useProtobuf)
588591
openshiftConfigInformerFactory := coreinformers.NewSharedInformerFactoryWithOptions(kubeClient, resyncPeriod(o.ResyncInterval), coreinformers.WithNamespace(internal.ConfigNamespace))
589592
openshiftConfigManagedInformerFactory := coreinformers.NewSharedInformerFactoryWithOptions(kubeClient, resyncPeriod(o.ResyncInterval), coreinformers.WithNamespace(internal.ConfigManagedNamespace))
@@ -621,12 +624,14 @@ func (o *Options) NewControllerContext(cb *ClientBuilder, clusterVersionConfigIn
621624
o.InjectClusterIdIntoPromQL,
622625
o.UpdateService,
623626
stringsToCapabilities(o.AlwaysEnableCapabilities),
627+
startingFeatureSet,
628+
startingCvoGates,
624629
)
625630
if err != nil {
626631
return nil, err
627632
}
628633

629-
featureChangeStopper, err := featuregates.NewChangeStopper(configInformerFactory.Config().V1().FeatureGates())
634+
featureChangeStopper, err := featuregates.NewChangeStopper(configInformerFactory.Config().V1().FeatureGates(), startingFeatureSet, startingCvoGates)
630635
if err != nil {
631636
return nil, err
632637
}
@@ -639,8 +644,6 @@ func (o *Options) NewControllerContext(cb *ClientBuilder, clusterVersionConfigIn
639644
OperatorInformerFactory: operatorInformerFactory,
640645
CVO: cvo,
641646
StopOnFeatureGateChange: featureChangeStopper,
642-
643-
fgLister: configInformerFactory.Config().V1().FeatureGates().Lister(),
644647
}
645648

646649
if o.EnableAutoUpdate {
@@ -663,70 +666,6 @@ func (o *Options) NewControllerContext(cb *ClientBuilder, clusterVersionConfigIn
663666
return ctx, nil
664667
}
665668

666-
// InitializeFromPayload initializes the CVO and FeatureGate ChangeStoppers controllers from the payload. It extracts the
667-
// current CVO version from the initial payload and uses it to determine the initial the required featureset and enabled
668-
// feature gates. Both the payload and determined feature information are used to initialize CVO and feature gate
669-
// ChangeStopper controllers.
670-
func (c *Context) InitializeFromPayload(ctx context.Context, restConfig *rest.Config, burstRestConfig *rest.Config) error {
671-
var startingFeatureSet configv1.FeatureSet
672-
var clusterFeatureGate *configv1.FeatureGate
673-
674-
// client-go automatically retries some network blip errors on GETs for 30s by default, and we want to
675-
// retry the remaining ones ourselves. If we fail longer than that, CVO won't be able to do work anyway.
676-
// Return the error and crashloop.
677-
//
678-
// We implement the timeout with a context because the timeout in PollImmediateWithContext does not behave
679-
// well when ConditionFunc takes longer time to execute, like here where the GET can be retried by client-go
680-
var lastError error
681-
if err := wait.PollUntilContextTimeout(context.Background(), 2*time.Second, 25*time.Second, true, func(ctx context.Context) (bool, error) {
682-
gate, fgErr := c.fgLister.Get("cluster")
683-
switch {
684-
case apierrors.IsNotFound(fgErr):
685-
// if we have no featuregates, then the cluster is using the default featureset, which is "".
686-
// This excludes everything that could possibly depend on a different feature set.
687-
startingFeatureSet = ""
688-
klog.Infof("FeatureGate not found in cluster, using default feature set %q at startup", startingFeatureSet)
689-
return true, nil
690-
case fgErr != nil:
691-
lastError = fgErr
692-
klog.Warningf("Failed to get FeatureGate from cluster: %v", fgErr)
693-
return false, nil
694-
default:
695-
clusterFeatureGate = gate
696-
startingFeatureSet = gate.Spec.FeatureSet
697-
klog.Infof("FeatureGate found in cluster, using its feature set %q at startup", startingFeatureSet)
698-
return true, nil
699-
}
700-
}); err != nil {
701-
if lastError != nil {
702-
return lastError
703-
}
704-
return err
705-
}
706-
707-
payload, err := c.CVO.LoadInitialPayload(ctx, startingFeatureSet, restConfig)
708-
if err != nil {
709-
return err
710-
}
711-
712-
var cvoGates featuregates.CvoGates
713-
if clusterFeatureGate != nil {
714-
cvoGates = featuregates.CvoGatesFromFeatureGate(clusterFeatureGate, payload.Release.Version)
715-
} else {
716-
cvoGates = featuregates.DefaultCvoGates(payload.Release.Version)
717-
}
718-
719-
if cvoGates.UnknownVersion() {
720-
klog.Infof("CVO features for version %s could not be detected from FeatureGate; will use defaults plus special UnknownVersion feature gate", payload.Release.Version)
721-
}
722-
klog.Infof("CVO features for version %s enabled at startup: %+v", payload.Release.Version, cvoGates)
723-
724-
c.StopOnFeatureGateChange.SetStartingFeatures(startingFeatureSet, cvoGates)
725-
c.CVO.InitializeFromPayload(payload, startingFeatureSet, cvoGates, restConfig, burstRestConfig)
726-
727-
return nil
728-
}
729-
730669
func stringsToCapabilities(names []string) []configv1.ClusterVersionCapability {
731670
caps := make([]configv1.ClusterVersionCapability, len(names))
732671
for i, c := range names {

pkg/start/start_integration_test.go

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -190,7 +190,11 @@ func TestIntegrationCVO_initializeAndUpgrade(t *testing.T) {
190190
}
191191

192192
cvif, cif := options.prepareConfigInformerFactories(cb)
193-
controllers, err := options.NewControllerContext(cb, cvif, cif)
193+
featureset, gates, err := options.processInitialFeatureGate(context.Background(), cvif)
194+
if err != nil {
195+
t.Fatal(err)
196+
}
197+
controllers, err := options.NewControllerContext(cb, featureset, gates, cvif, cif)
194198
if err != nil {
195199
t.Fatal(err)
196200
}
@@ -327,7 +331,11 @@ func TestIntegrationCVO_gracefulStepDown(t *testing.T) {
327331
}
328332

329333
cvif, cif := options.prepareConfigInformerFactories(cb)
330-
controllers, err := options.NewControllerContext(cb, cvif, cif)
334+
featureset, gates, err := options.processInitialFeatureGate(context.Background(), cvif)
335+
if err != nil {
336+
t.Fatal(err)
337+
}
338+
controllers, err := options.NewControllerContext(cb, featureset, gates, cvif, cif)
331339
if err != nil {
332340
t.Fatal(err)
333341
}
@@ -526,7 +534,11 @@ metadata:
526534
}
527535

528536
cvif, cif := options.prepareConfigInformerFactories(cb)
529-
controllers, err := options.NewControllerContext(cb, cvif, cif)
537+
featureset, gates, err := options.processInitialFeatureGate(context.Background(), cvif)
538+
if err != nil {
539+
t.Fatal(err)
540+
}
541+
controllers, err := options.NewControllerContext(cb, featureset, gates, cvif, cif)
530542
if err != nil {
531543
t.Fatal(err)
532544
}

0 commit comments

Comments
 (0)