Skip to content

Commit 2a469e3

Browse files
cvo: When installing or upgrading, fast-fill cluster-operators
The must-gather and insights operator depend on cluster operators and related objects in order to identify resources to create. Because cluster operators are delegated to the operator install and upgrade failures of new operators can fail to gather the requisite info if the cluster degrades before those steps. Add a new selective Precreating install mode and do a single pass over all cluster operators in the payload without retries at the beginning of an initializing or upgrading sync pass to attempt to create the ClusterOperators if they don't exist. If we succeed at creating the object, try exactly once to update status so that relatedObjects can be set.
1 parent 294dc15 commit 2a469e3

File tree

5 files changed

+69
-11
lines changed

5 files changed

+69
-11
lines changed

lib/resourcebuilder/interface.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ const (
7171
UpdatingMode Mode = iota
7272
ReconcilingMode
7373
InitializingMode
74+
PrecreatingMode
7475
)
7576

7677
type Interface interface {

pkg/cvo/cvo.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -663,7 +663,11 @@ func (b *resourceBuilder) builderFor(m *lib.Manifest, state payload.State) (reso
663663
}
664664

665665
if b.clusterOperators != nil && m.GVK == configv1.SchemeGroupVersion.WithKind("ClusterOperator") {
666-
return cvointernal.NewClusterOperatorBuilder(b.clusterOperators, *m), nil
666+
client, err := clientset.NewForConfig(config)
667+
if err != nil {
668+
return nil, err
669+
}
670+
return cvointernal.NewClusterOperatorBuilder(b.clusterOperators, client.ConfigV1().ClusterOperators(), *m), nil
667671
}
668672
if resourcebuilder.Mapper.Exists(m.GVK) {
669673
return resourcebuilder.New(resourcebuilder.Mapper, config, *m)
@@ -694,6 +698,8 @@ func stateToMode(state payload.State) resourcebuilder.Mode {
694698
return resourcebuilder.UpdatingMode
695699
case payload.ReconcilingPayload:
696700
return resourcebuilder.ReconcilingMode
701+
case payload.PrecreatingPayload:
702+
return resourcebuilder.PrecreatingMode
697703
default:
698704
panic(fmt.Sprintf("unexpected payload state %d", int(state)))
699705
}

pkg/cvo/internal/operatorstatus.go

Lines changed: 32 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"time"
99
"unicode"
1010

11+
kerrors "k8s.io/apimachinery/pkg/api/errors"
1112
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1213
"k8s.io/apimachinery/pkg/runtime"
1314
"k8s.io/apimachinery/pkg/runtime/serializer"
@@ -48,16 +49,16 @@ func readClusterOperatorV1OrDie(objBytes []byte) *configv1.ClusterOperator {
4849
}
4950

5051
type clusterOperatorBuilder struct {
51-
client ClusterOperatorsGetter
52-
raw []byte
53-
modifier resourcebuilder.MetaV1ObjectModifierFunc
54-
mode resourcebuilder.Mode
52+
client ClusterOperatorsGetter
53+
createClient configclientv1.ClusterOperatorInterface
54+
raw []byte
55+
modifier resourcebuilder.MetaV1ObjectModifierFunc
56+
mode resourcebuilder.Mode
5557
}
5658

5759
func newClusterOperatorBuilder(config *rest.Config, m lib.Manifest) resourcebuilder.Interface {
58-
return NewClusterOperatorBuilder(clientClusterOperatorsGetter{
59-
getter: configclientv1.NewForConfigOrDie(config).ClusterOperators(),
60-
}, m)
60+
client := configclientv1.NewForConfigOrDie(config).ClusterOperators()
61+
return NewClusterOperatorBuilder(clientClusterOperatorsGetter{getter: client}, client, m)
6162
}
6263

6364
// ClusterOperatorsGetter abstracts object access with a client or a cache lister.
@@ -75,10 +76,11 @@ func (g clientClusterOperatorsGetter) Get(name string) (*configv1.ClusterOperato
7576

7677
// NewClusterOperatorBuilder accepts the ClusterOperatorsGetter interface which may be implemented by a
7778
// client or a lister cache.
78-
func NewClusterOperatorBuilder(client ClusterOperatorsGetter, m lib.Manifest) resourcebuilder.Interface {
79+
func NewClusterOperatorBuilder(client ClusterOperatorsGetter, createClient configclientv1.ClusterOperatorInterface, m lib.Manifest) resourcebuilder.Interface {
7980
return &clusterOperatorBuilder{
80-
client: client,
81-
raw: m.Raw,
81+
client: client,
82+
createClient: createClient,
83+
raw: m.Raw,
8284
}
8385
}
8486

@@ -97,6 +99,26 @@ func (b *clusterOperatorBuilder) Do(ctx context.Context) error {
9799
if b.modifier != nil {
98100
b.modifier(os)
99101
}
102+
103+
// create the object, and if we successfully created, update the status
104+
if b.mode == resourcebuilder.PrecreatingMode {
105+
clusterOperator, err := b.createClient.Create(os)
106+
if err != nil {
107+
if kerrors.IsAlreadyExists(err) {
108+
return nil
109+
}
110+
return err
111+
}
112+
clusterOperator.Status.RelatedObjects = os.Status.DeepCopy().RelatedObjects
113+
if _, err := b.createClient.UpdateStatus(clusterOperator); err != nil {
114+
if kerrors.IsConflict(err) {
115+
return nil
116+
}
117+
return err
118+
}
119+
return nil
120+
}
121+
100122
return waitForOperatorStatusToBeDone(ctx, 1*time.Second, b.client, os, b.mode)
101123
}
102124

pkg/cvo/sync_worker.go

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,12 +585,14 @@ func (w *SyncWorker) apply(ctx context.Context, payloadUpdate *payload.Update, w
585585
}
586586
graph := payload.NewTaskGraph(tasks)
587587
graph.Split(payload.SplitOnJobs)
588+
var precreateObjects bool
588589
switch work.State {
589590
case payload.InitializingPayload:
590591
// Create every component in parallel to maximize reaching steady
591592
// state.
592593
graph.Parallelize(payload.FlattenByNumberAndComponent)
593594
maxWorkers = len(graph.Nodes)
595+
precreateObjects = true
594596
case payload.ReconcilingPayload:
595597
// Run the graph in random order during reconcile so that we don't
596598
// hang on any particular component - we seed from the number of
@@ -608,6 +610,28 @@ func (w *SyncWorker) apply(ctx context.Context, payloadUpdate *payload.Update, w
608610
// perform an orderly roll out by payload order, using some parallelization
609611
// but avoiding out of order creation so components have some base
610612
graph.Parallelize(payload.ByNumberAndComponent)
613+
precreateObjects = true
614+
}
615+
616+
// in specific modes, attempt to precreate a set of known types (currently ClusterOperator) without
617+
// retries
618+
if precreateObjects {
619+
payload.RunGraph(ctx, graph, 8, func(ctx context.Context, tasks []*payload.Task) error {
620+
for _, task := range tasks {
621+
if contextIsCancelled(ctx) {
622+
return cr.CancelError()
623+
}
624+
if task.Manifest.GVK != configv1.SchemeGroupVersion.WithKind("ClusterOperator") {
625+
continue
626+
}
627+
if err := w.builder.Apply(ctx, task.Manifest, payload.PrecreatingPayload); err != nil {
628+
klog.V(2).Infof("Unable to precreate resource %s: %v", task, err)
629+
continue
630+
}
631+
klog.V(4).Infof("Precreated resource %s", task)
632+
}
633+
return nil
634+
})
611635
}
612636

613637
// update each object

pkg/payload/payload.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ const (
5656
// Our goal is to get the entire payload created, even if some
5757
// operators are still converging.
5858
InitializingPayload
59+
// PrecreatingPayload indicates we are selectively creating
60+
// specific resources during a first pass of the payload to
61+
// provide better visibility during install and upgrade of
62+
// error conditions.
63+
PrecreatingPayload
5964
)
6065

6166
// Initializing is true if the state is InitializingPayload.

0 commit comments

Comments
 (0)