Skip to content

Commit 88dec15

Browse files
Merge pull request openshift#8693 from r4f4/capi-run-controller-fail-fix
OCPBUGS-36378: capi: start controllers after WaitGroup is created
2 parents 05dc6ca + 752f075 commit 88dec15

File tree

2 files changed

+26
-27
lines changed

2 files changed

+26
-27
lines changed

pkg/clusterapi/system.go

Lines changed: 12 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -316,13 +316,6 @@ func (c *system) Run(ctx context.Context) error {
316316
// We only show controller logs if the log level is DEBUG or above
317317
c.logWriter = logrus.StandardLogger().WriterLevel(logrus.DebugLevel)
318318

319-
// Run the controllers.
320-
for _, ct := range controllers {
321-
if err := c.runController(ctx, ct); err != nil {
322-
return fmt.Errorf("failed to run controller %q: %w", ct.Name, err)
323-
}
324-
}
325-
326319
// We create a wait group to wait for the controllers to stop,
327320
// this waitgroup is a global, and is used by the Teardown function
328321
// which is expected to be called when the program exits.
@@ -331,6 +324,7 @@ func (c *system) Run(ctx context.Context) error {
331324
defer c.wg.Done()
332325
// Stop the controllers when the context is cancelled.
333326
<-ctx.Done()
327+
logrus.Info("Shutting down local Cluster API controllers...")
334328
for _, ct := range controllers {
335329
if ct.state != nil {
336330
if err := ct.state.Stop(); err != nil {
@@ -340,12 +334,14 @@ func (c *system) Run(ctx context.Context) error {
340334
logrus.Infof("Stopped controller: %s", ct.Name)
341335
}
342336
}
337+
}()
343338

344-
// Stop the local control plane.
345-
if err := c.lcp.Stop(); err != nil {
346-
logrus.Warnf("Failed to stop local Cluster API control plane: %v", err)
339+
// Run the controllers.
340+
for _, ct := range controllers {
341+
if err := c.runController(ctx, ct); err != nil {
342+
return fmt.Errorf("failed to run controller %q: %w", ct.Name, err)
347343
}
348-
}()
344+
}
349345

350346
return nil
351347
}
@@ -377,10 +373,13 @@ func (c *system) Teardown() {
377373
// Proceed to shutdown.
378374
c.teardownOnce.Do(func() {
379375
c.cancel()
380-
logrus.Info("Shutting down local Cluster API control plane...")
381376
ch := make(chan struct{})
382377
go func() {
383378
c.wg.Wait()
379+
logrus.Info("Shutting down local Cluster API control plane...")
380+
if err := c.lcp.Stop(); err != nil {
381+
logrus.Warnf("Failed to stop local Cluster API control plane: %v", err)
382+
}
384383
close(ch)
385384
}()
386385
select {
@@ -463,7 +462,7 @@ func (c *system) runController(ctx context.Context, ct *controller) error {
463462
// If the provider is not empty, we extract it to the binaries directory.
464463
if ct.Provider != nil {
465464
if err := ct.Provider.Extract(c.lcp.BinDir); err != nil {
466-
logrus.Fatal(err)
465+
return fmt.Errorf("failed to extract provider %q: %w", ct.Name, err)
467466
}
468467
}
469468

pkg/infrastructure/clusterapi/clusterapi.go

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -128,16 +128,6 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
128128
logrus.Debugf("No pre-provisioning requirements for the %s provider", i.impl.Name())
129129
}
130130

131-
// Run the CAPI system.
132-
timer.StartTimer(infrastructureStage)
133-
capiSystem := clusterapi.System()
134-
if err := capiSystem.Run(ctx); err != nil {
135-
return fileList, fmt.Errorf("failed to run cluster api system: %w", err)
136-
}
137-
138-
// Grab the client.
139-
cl := capiSystem.Client()
140-
141131
// If we're skipping bootstrap destroy, shutdown the local control plane.
142132
// Otherwise, we will shut it down after bootstrap destroy.
143133
// This has to execute as the last defer in the stack since previous defers might still need the local controlplane.
@@ -149,17 +139,27 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
149139
}
150140

151141
// Make sure to always return generated manifests, even if errors happened
152-
defer func(ctx context.Context, cl client.Client) {
142+
defer func(ctx context.Context) {
153143
var errs []error
154144
// Overriding the named return with the generated list
155-
fileList, errs = i.collectManifests(ctx, cl)
145+
fileList, errs = i.collectManifests(ctx, clusterapi.System().Client())
156146
// If Provision returned an error, add it to the list
157147
if err != nil {
158-
capiSystem.CleanEtcd()
148+
clusterapi.System().CleanEtcd()
159149
errs = append(errs, err)
160150
}
161151
err = utilerrors.NewAggregate(errs)
162-
}(ctx, cl)
152+
}(ctx)
153+
154+
// Run the CAPI system.
155+
timer.StartTimer(infrastructureStage)
156+
capiSystem := clusterapi.System()
157+
if err := capiSystem.Run(ctx); err != nil {
158+
return fileList, fmt.Errorf("failed to run cluster api system: %w", err)
159+
}
160+
161+
// Grab the client.
162+
cl := capiSystem.Client()
163163

164164
i.appliedManifests = []client.Object{}
165165

0 commit comments

Comments
 (0)