@@ -32,6 +32,7 @@ import (
3232 "github.com/openshift/installer/pkg/asset/rhcos"
3333 "github.com/openshift/installer/pkg/clusterapi"
3434 "github.com/openshift/installer/pkg/infrastructure"
35+ "github.com/openshift/installer/pkg/metrics/timer"
3536 "github.com/openshift/installer/pkg/types"
3637)
3738
@@ -40,8 +41,17 @@ import (
4041// interface the installer uses to call this provider.
4142var _ infrastructure.Provider = (* InfraProvider )(nil )
4243
43- // timeout for each provisioning step.
44- const timeout = 15 * time .Minute
44+ const (
45+ // timeout for each provisioning step.
46+ timeout = 15 * time .Minute
47+
48+ preProvisionStage = "Infrastructure Pre-provisioning"
49+ infrastructureStage = "Network-infrastructure Provisioning"
50+ infrastructureReadyStage = "Post-network, pre-machine Provisioning"
51+ ignitionStage = "Bootstrap Ignition Provisioning"
52+ machineStage = "Machine Provisioning"
53+ postProvisionStage = "Infrastructure Post-provisioning"
54+ )
4555
4656// InfraProvider implements common Cluster API logic and
4757// contains the platform CAPI provider, which is called
@@ -110,15 +120,17 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
110120 MachineManifests : machineManifests ,
111121 WorkersAsset : workersAsset ,
112122 }
113-
123+ timer . StartTimer ( preProvisionStage )
114124 if err := p .PreProvision (ctx , preProvisionInput ); err != nil {
115125 return fileList , fmt .Errorf ("failed during pre-provisioning: %w" , err )
116126 }
127+ timer .StopTimer (preProvisionStage )
117128 } else {
118129 logrus .Debugf ("No pre-provisioning requirements for the %s provider" , i .impl .Name ())
119130 }
120131
121132 // Run the CAPI system.
133+ timer .StartTimer (infrastructureStage )
122134 capiSystem := clusterapi .System ()
123135 if err := capiSystem .Run (ctx , installConfig ); err != nil {
124136 return fileList , fmt .Errorf ("failed to run cluster api system: %w" , err )
@@ -156,6 +168,9 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
156168
157169 // Wait for successful provisioning by checking the InfrastructureReady
158170 // status on the cluster object.
171+ untilTime := time .Now ().Add (timeout )
172+ timezone , _ := untilTime .Zone ()
173+ logrus .Infof ("Waiting up to %v (until %v %s) for network infrastructure to become ready..." , timeout , untilTime .Format (time .Kitchen ), timezone )
159174 var cluster * clusterv1.Cluster
160175 {
161176 if err := wait .ExponentialBackoffWithContext (ctx , wait.Backoff {
@@ -177,7 +192,10 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
177192 cluster = c
178193 return cluster .Status .InfrastructureReady , nil
179194 }); err != nil {
180- return fileList , fmt .Errorf ("infrastructure was not ready within %v: %w" , timeout , err )
195+ if wait .Interrupted (err ) {
196+ return fileList , fmt .Errorf ("infrastructure was not ready within %v: %w" , timeout , err )
197+ }
198+ return fileList , fmt .Errorf ("infrastructure is not ready: %w" , err )
181199 }
182200 if cluster == nil {
183201 return fileList , fmt .Errorf ("error occurred during load balancer ready check" )
@@ -186,6 +204,8 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
186204 return fileList , fmt .Errorf ("control plane endpoint is not set" )
187205 }
188206 }
207+ timer .StopTimer (infrastructureStage )
208+ logrus .Info ("Netork infrastructure is ready" )
189209
190210 if p , ok := i .impl .(InfraReadyProvider ); ok {
191211 infraReadyInput := InfraReadyInput {
@@ -194,9 +214,11 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
194214 InfraID : clusterID .InfraID ,
195215 }
196216
217+ timer .StartTimer (infrastructureReadyStage )
197218 if err := p .InfraReady (ctx , infraReadyInput ); err != nil {
198219 return fileList , fmt .Errorf ("failed provisioning resources after infrastructure ready: %w" , err )
199220 }
221+ timer .StopTimer (infrastructureReadyStage )
200222 } else {
201223 logrus .Debugf ("No infrastructure ready requirements for the %s provider" , i .impl .Name ())
202224 }
@@ -217,16 +239,19 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
217239 TFVarsAsset : tfvarsAsset ,
218240 }
219241
242+ timer .StartTimer (ignitionStage )
220243 if bootstrapIgnData , err = p .Ignition (ctx , ignInput ); err != nil {
221244 return fileList , fmt .Errorf ("failed preparing ignition data: %w" , err )
222245 }
246+ timer .StopTimer (ignitionStage )
223247 } else {
224248 logrus .Debugf ("No Ignition requirements for the %s provider" , i .impl .Name ())
225249 }
226250 bootstrapIgnSecret := IgnitionSecret (bootstrapIgnData , clusterID .InfraID , "bootstrap" )
227251 masterIgnSecret := IgnitionSecret (masterIgnAsset .Files ()[0 ].Data , clusterID .InfraID , "master" )
228252 machineManifests = append (machineManifests , bootstrapIgnSecret , masterIgnSecret )
229253
254+ timer .StartTimer (machineStage )
230255 // Create the machine manifests.
231256 for _ , m := range machineManifests {
232257 m .SetNamespace (capiutils .Namespace )
@@ -242,7 +267,9 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
242267 masterCount = * reps
243268 }
244269
245- logrus .Debugf ("Waiting for machines to provision" )
270+ untilTime := time .Now ().Add (timeout )
271+ timezone , _ := untilTime .Zone ()
272+ logrus .Infof ("Waiting up to %v (until %v %s) for machines to provision..." , timeout , untilTime .Format (time .Kitchen ), timezone )
246273 if err := wait .ExponentialBackoffWithContext (ctx , wait.Backoff {
247274 Duration : time .Second * 10 ,
248275 Factor : float64 (1.5 ),
@@ -271,9 +298,14 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
271298 }
272299 return true , nil
273300 }); err != nil {
274- return fileList , fmt .Errorf ("machines were not provisioned within %v: %w" , timeout , err )
301+ if wait .Interrupted (err ) {
302+ return fileList , fmt .Errorf ("control-plane machines were not provisioned within %v: %w" , timeout , err )
303+ }
304+ return fileList , fmt .Errorf ("control-plane machines are not ready: %w" , err )
275305 }
276306 }
307+ timer .StopTimer (machineStage )
308+ logrus .Info ("Control-plane machines are ready" )
277309
278310 if p , ok := i .impl .(PostProvider ); ok {
279311 postMachineInput := PostProvisionInput {
@@ -282,9 +314,11 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
282314 InfraID : clusterID .InfraID ,
283315 }
284316
317+ timer .StartTimer (postProvisionStage )
285318 if err = p .PostProvision (ctx , postMachineInput ); err != nil {
286319 return fileList , fmt .Errorf ("failed during post-machine creation hook: %w" , err )
287320 }
321+ timer .StopTimer (postProvisionStage )
288322 }
289323
290324 // For each manifest we created, retrieve it and store it in the asset.
0 commit comments