Skip to content

Commit 4e2c8f6

Browse files
committed
Add timers to CAPI infrastructure provisioning
Adds timers to each stage of CAPI infrastructure provisioning. These times will be logged at install complete, and can be used as a guide if we need to change the provisioning timeouts.
1 parent 2c82a98 commit 4e2c8f6

File tree

1 file changed

+40
-6
lines changed

1 file changed

+40
-6
lines changed

pkg/infrastructure/clusterapi/clusterapi.go

Lines changed: 40 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import (
3232
"github.com/openshift/installer/pkg/asset/rhcos"
3333
"github.com/openshift/installer/pkg/clusterapi"
3434
"github.com/openshift/installer/pkg/infrastructure"
35+
"github.com/openshift/installer/pkg/metrics/timer"
3536
"github.com/openshift/installer/pkg/types"
3637
)
3738

@@ -40,8 +41,17 @@ import (
4041
// interface the installer uses to call this provider.
4142
var _ infrastructure.Provider = (*InfraProvider)(nil)
4243

43-
// timeout for each provisioning step.
44-
const timeout = 15 * time.Minute
44+
const (
45+
// timeout for each provisioning step.
46+
timeout = 15 * time.Minute
47+
48+
preProvisionStage = "Infrastructure Pre-provisioning"
49+
infrastructureStage = "Network-infrastructure Provisioning"
50+
infrastructureReadyStage = "Post-network, pre-machine Provisioning"
51+
ignitionStage = "Bootstrap Ignition Provisioning"
52+
machineStage = "Machine Provisioning"
53+
postProvisionStage = "Infrastructure Post-provisioning"
54+
)
4555

4656
// InfraProvider implements common Cluster API logic and
4757
// contains the platform CAPI provider, which is called
@@ -110,15 +120,17 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
110120
MachineManifests: machineManifests,
111121
WorkersAsset: workersAsset,
112122
}
113-
123+
timer.StartTimer(preProvisionStage)
114124
if err := p.PreProvision(ctx, preProvisionInput); err != nil {
115125
return fileList, fmt.Errorf("failed during pre-provisioning: %w", err)
116126
}
127+
timer.StopTimer(preProvisionStage)
117128
} else {
118129
logrus.Debugf("No pre-provisioning requirements for the %s provider", i.impl.Name())
119130
}
120131

121132
// Run the CAPI system.
133+
timer.StartTimer(infrastructureStage)
122134
capiSystem := clusterapi.System()
123135
if err := capiSystem.Run(ctx, installConfig); err != nil {
124136
return fileList, fmt.Errorf("failed to run cluster api system: %w", err)
@@ -156,6 +168,9 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
156168

157169
// Wait for successful provisioning by checking the InfrastructureReady
158170
// status on the cluster object.
171+
untilTime := time.Now().Add(timeout)
172+
timezone, _ := untilTime.Zone()
173+
logrus.Infof("Waiting up to %v (until %v %s) for network infrastructure to become ready...", timeout, untilTime.Format(time.Kitchen), timezone)
159174
var cluster *clusterv1.Cluster
160175
{
161176
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
@@ -177,7 +192,10 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
177192
cluster = c
178193
return cluster.Status.InfrastructureReady, nil
179194
}); err != nil {
180-
return fileList, fmt.Errorf("infrastructure was not ready within %v: %w", timeout, err)
195+
if wait.Interrupted(err) {
196+
return fileList, fmt.Errorf("infrastructure was not ready within %v: %w", timeout, err)
197+
}
198+
return fileList, fmt.Errorf("infrastructure is not ready: %w", err)
181199
}
182200
if cluster == nil {
183201
return fileList, fmt.Errorf("error occurred during load balancer ready check")
@@ -186,6 +204,8 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
186204
return fileList, fmt.Errorf("control plane endpoint is not set")
187205
}
188206
}
207+
timer.StopTimer(infrastructureStage)
208+
logrus.Info("Netork infrastructure is ready")
189209

190210
if p, ok := i.impl.(InfraReadyProvider); ok {
191211
infraReadyInput := InfraReadyInput{
@@ -194,9 +214,11 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
194214
InfraID: clusterID.InfraID,
195215
}
196216

217+
timer.StartTimer(infrastructureReadyStage)
197218
if err := p.InfraReady(ctx, infraReadyInput); err != nil {
198219
return fileList, fmt.Errorf("failed provisioning resources after infrastructure ready: %w", err)
199220
}
221+
timer.StopTimer(infrastructureReadyStage)
200222
} else {
201223
logrus.Debugf("No infrastructure ready requirements for the %s provider", i.impl.Name())
202224
}
@@ -217,16 +239,19 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
217239
TFVarsAsset: tfvarsAsset,
218240
}
219241

242+
timer.StartTimer(ignitionStage)
220243
if bootstrapIgnData, err = p.Ignition(ctx, ignInput); err != nil {
221244
return fileList, fmt.Errorf("failed preparing ignition data: %w", err)
222245
}
246+
timer.StopTimer(ignitionStage)
223247
} else {
224248
logrus.Debugf("No Ignition requirements for the %s provider", i.impl.Name())
225249
}
226250
bootstrapIgnSecret := IgnitionSecret(bootstrapIgnData, clusterID.InfraID, "bootstrap")
227251
masterIgnSecret := IgnitionSecret(masterIgnAsset.Files()[0].Data, clusterID.InfraID, "master")
228252
machineManifests = append(machineManifests, bootstrapIgnSecret, masterIgnSecret)
229253

254+
timer.StartTimer(machineStage)
230255
// Create the machine manifests.
231256
for _, m := range machineManifests {
232257
m.SetNamespace(capiutils.Namespace)
@@ -242,7 +267,9 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
242267
masterCount = *reps
243268
}
244269

245-
logrus.Debugf("Waiting for machines to provision")
270+
untilTime := time.Now().Add(timeout)
271+
timezone, _ := untilTime.Zone()
272+
logrus.Infof("Waiting up to %v (until %v %s) for machines to provision...", timeout, untilTime.Format(time.Kitchen), timezone)
246273
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
247274
Duration: time.Second * 10,
248275
Factor: float64(1.5),
@@ -271,9 +298,14 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
271298
}
272299
return true, nil
273300
}); err != nil {
274-
return fileList, fmt.Errorf("machines were not provisioned within %v: %w", timeout, err)
301+
if wait.Interrupted(err) {
302+
return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", timeout, err)
303+
}
304+
return fileList, fmt.Errorf("control-plane machines are not ready: %w", err)
275305
}
276306
}
307+
timer.StopTimer(machineStage)
308+
logrus.Info("Control-plane machines are ready")
277309

278310
if p, ok := i.impl.(PostProvider); ok {
279311
postMachineInput := PostProvisionInput{
@@ -282,9 +314,11 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
282314
InfraID: clusterID.InfraID,
283315
}
284316

317+
timer.StartTimer(postProvisionStage)
285318
if err = p.PostProvision(ctx, postMachineInput); err != nil {
286319
return fileList, fmt.Errorf("failed during post-machine creation hook: %w", err)
287320
}
321+
timer.StopTimer(postProvisionStage)
288322
}
289323

290324
// For each manifest we created, retrieve it and store it in the asset.

0 commit comments

Comments
 (0)