@@ -32,6 +32,7 @@ import (
3232 "github.com/openshift/installer/pkg/asset/rhcos"
3333 "github.com/openshift/installer/pkg/clusterapi"
3434 "github.com/openshift/installer/pkg/infrastructure"
35+ "github.com/openshift/installer/pkg/metrics/timer"
3536 "github.com/openshift/installer/pkg/types"
3637)
3738
@@ -40,6 +41,18 @@ import (
4041// interface the installer uses to call this provider.
4142var _ infrastructure.Provider = (* InfraProvider )(nil )
4243
44+ const (
45+ // timeout for each provisioning step.
46+ timeout = 15 * time .Minute
47+
48+ preProvisionStage = "Infrastructure Pre-provisioning"
49+ infrastructureStage = "Network-infrastructure Provisioning"
50+ infrastructureReadyStage = "Post-network, pre-machine Provisioning"
51+ ignitionStage = "Bootstrap Ignition Provisioning"
52+ machineStage = "Machine Provisioning"
53+ postProvisionStage = "Infrastructure Post-provisioning"
54+ )
55+
4356// InfraProvider implements common Cluster API logic and
4457// contains the platform CAPI provider, which is called
4558// in the lifecycle defined by the Provider interface.
@@ -107,15 +120,17 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
107120 MachineManifests : machineManifests ,
108121 WorkersAsset : workersAsset ,
109122 }
110-
123+ timer . StartTimer ( preProvisionStage )
111124 if err := p .PreProvision (ctx , preProvisionInput ); err != nil {
112125 return fileList , fmt .Errorf ("failed during pre-provisioning: %w" , err )
113126 }
127+ timer .StopTimer (preProvisionStage )
114128 } else {
115129 logrus .Debugf ("No pre-provisioning requirements for the %s provider" , i .impl .Name ())
116130 }
117131
118132 // Run the CAPI system.
133+ timer .StartTimer (infrastructureStage )
119134 capiSystem := clusterapi .System ()
120135 if err := capiSystem .Run (ctx , installConfig ); err != nil {
121136 return fileList , fmt .Errorf ("failed to run cluster api system: %w" , err )
@@ -153,12 +168,16 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
153168
154169 // Wait for successful provisioning by checking the InfrastructureReady
155170 // status on the cluster object.
171+ untilTime := time .Now ().Add (timeout )
172+ timezone , _ := untilTime .Zone ()
173+ logrus .Infof ("Waiting up to %v (until %v %s) for network infrastructure to become ready..." , timeout , untilTime .Format (time .Kitchen ), timezone )
156174 var cluster * clusterv1.Cluster
157175 {
158176 if err := wait .ExponentialBackoffWithContext (ctx , wait.Backoff {
159177 Duration : time .Second * 10 ,
160178 Factor : float64 (1.5 ),
161179 Steps : 32 ,
180+ Cap : timeout ,
162181 }, func (ctx context.Context ) (bool , error ) {
163182 c := & clusterv1.Cluster {}
164183 if err := cl .Get (ctx , client.ObjectKey {
@@ -173,7 +192,10 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
173192 cluster = c
174193 return cluster .Status .InfrastructureReady , nil
175194 }); err != nil {
176- return fileList , err
195+ if wait .Interrupted (err ) {
196+ return fileList , fmt .Errorf ("infrastructure was not ready within %v: %w" , timeout , err )
197+ }
198+ return fileList , fmt .Errorf ("infrastructure is not ready: %w" , err )
177199 }
178200 if cluster == nil {
179201 return fileList , fmt .Errorf ("error occurred during load balancer ready check" )
@@ -182,6 +204,8 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
182204 return fileList , fmt .Errorf ("control plane endpoint is not set" )
183205 }
184206 }
207+ timer .StopTimer (infrastructureStage )
208+ logrus .Info ("Netork infrastructure is ready" )
185209
186210 if p , ok := i .impl .(InfraReadyProvider ); ok {
187211 infraReadyInput := InfraReadyInput {
@@ -190,9 +214,11 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
190214 InfraID : clusterID .InfraID ,
191215 }
192216
217+ timer .StartTimer (infrastructureReadyStage )
193218 if err := p .InfraReady (ctx , infraReadyInput ); err != nil {
194219 return fileList , fmt .Errorf ("failed provisioning resources after infrastructure ready: %w" , err )
195220 }
221+ timer .StopTimer (infrastructureReadyStage )
196222 } else {
197223 logrus .Debugf ("No infrastructure ready requirements for the %s provider" , i .impl .Name ())
198224 }
@@ -213,16 +239,19 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
213239 TFVarsAsset : tfvarsAsset ,
214240 }
215241
242+ timer .StartTimer (ignitionStage )
216243 if bootstrapIgnData , err = p .Ignition (ctx , ignInput ); err != nil {
217244 return fileList , fmt .Errorf ("failed preparing ignition data: %w" , err )
218245 }
246+ timer .StopTimer (ignitionStage )
219247 } else {
220248 logrus .Debugf ("No Ignition requirements for the %s provider" , i .impl .Name ())
221249 }
222250 bootstrapIgnSecret := IgnitionSecret (bootstrapIgnData , clusterID .InfraID , "bootstrap" )
223251 masterIgnSecret := IgnitionSecret (masterIgnAsset .Files ()[0 ].Data , clusterID .InfraID , "master" )
224252 machineManifests = append (machineManifests , bootstrapIgnSecret , masterIgnSecret )
225253
254+ timer .StartTimer (machineStage )
226255 // Create the machine manifests.
227256 for _ , m := range machineManifests {
228257 m .SetNamespace (capiutils .Namespace )
@@ -238,11 +267,14 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
238267 masterCount = * reps
239268 }
240269
241- logrus .Debugf ("Waiting for machines to provision" )
270+ untilTime := time .Now ().Add (timeout )
271+ timezone , _ := untilTime .Zone ()
272+ logrus .Infof ("Waiting up to %v (until %v %s) for machines to provision..." , timeout , untilTime .Format (time .Kitchen ), timezone )
242273 if err := wait .ExponentialBackoffWithContext (ctx , wait.Backoff {
243274 Duration : time .Second * 10 ,
244275 Factor : float64 (1.5 ),
245276 Steps : 32 ,
277+ Cap : timeout ,
246278 }, func (ctx context.Context ) (bool , error ) {
247279 for i := int64 (0 ); i < masterCount ; i ++ {
248280 machine := & clusterv1.Machine {}
@@ -266,9 +298,14 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
266298 }
267299 return true , nil
268300 }); err != nil {
269- return fileList , err
301+ if wait .Interrupted (err ) {
302+ return fileList , fmt .Errorf ("control-plane machines were not provisioned within %v: %w" , timeout , err )
303+ }
304+ return fileList , fmt .Errorf ("control-plane machines are not ready: %w" , err )
270305 }
271306 }
307+ timer .StopTimer (machineStage )
308+ logrus .Info ("Control-plane machines are ready" )
272309
273310 if p , ok := i .impl .(PostProvider ); ok {
274311 postMachineInput := PostProvisionInput {
@@ -277,9 +314,11 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
277314 InfraID : clusterID .InfraID ,
278315 }
279316
317+ timer .StartTimer (postProvisionStage )
280318 if err = p .PostProvision (ctx , postMachineInput ); err != nil {
281319 return fileList , fmt .Errorf ("failed during post-machine creation hook: %w" , err )
282320 }
321+ timer .StopTimer (postProvisionStage )
283322 }
284323
285324 // For each manifest we created, retrieve it and store it in the asset.
0 commit comments