Skip to content

Commit 532abc8

Browse files
committed
clusterapi: poll when waiting for provisioning
This commit switches the infrastructure and machine checks to poll instead of using an exponential backoff. The exponential backoff is ineffective with our model as the time between checks can become quite large in later steps. As we are using a local control plane and there is no concern with rate limiting, a check at a regular interval is a better solution.
1 parent 9f02731 commit 532abc8

File tree

1 file changed

+40
-48
lines changed

1 file changed

+40
-48
lines changed

pkg/infrastructure/clusterapi/clusterapi.go

Lines changed: 40 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -189,25 +189,21 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
189189
logrus.Infof("Waiting up to %v (until %v %s) for network infrastructure to become ready...", timeout, untilTime.Format(time.Kitchen), timezone)
190190
var cluster *clusterv1.Cluster
191191
{
192-
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
193-
Duration: time.Second * 10,
194-
Factor: float64(1.5),
195-
Steps: 32,
196-
Cap: timeout,
197-
}, func(ctx context.Context) (bool, error) {
198-
c := &clusterv1.Cluster{}
199-
if err := cl.Get(ctx, client.ObjectKey{
200-
Name: clusterID.InfraID,
201-
Namespace: capiutils.Namespace,
202-
}, c); err != nil {
203-
if apierrors.IsNotFound(err) {
204-
return false, nil
192+
if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, timeout, true,
193+
func(ctx context.Context) (bool, error) {
194+
c := &clusterv1.Cluster{}
195+
if err := cl.Get(ctx, client.ObjectKey{
196+
Name: clusterID.InfraID,
197+
Namespace: capiutils.Namespace,
198+
}, c); err != nil {
199+
if apierrors.IsNotFound(err) {
200+
return false, nil
201+
}
202+
return false, err
205203
}
206-
return false, err
207-
}
208-
cluster = c
209-
return cluster.Status.InfrastructureReady, nil
210-
}); err != nil {
204+
cluster = c
205+
return cluster.Status.InfrastructureReady, nil
206+
}); err != nil {
211207
if wait.Interrupted(err) {
212208
return fileList, fmt.Errorf("infrastructure was not ready within %v: %w", timeout, err)
213209
}
@@ -289,38 +285,34 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
289285
timezone, _ := untilTime.Zone()
290286
reqBootstrapPubIP := installConfig.Config.Publish == types.ExternalPublishingStrategy && i.impl.BootstrapHasPublicIP()
291287
logrus.Infof("Waiting up to %v (until %v %s) for machines %v to provision...", timeout, untilTime.Format(time.Kitchen), timezone, machineNames)
292-
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
293-
Duration: time.Second * 10,
294-
Factor: float64(1.5),
295-
Steps: 32,
296-
Cap: timeout,
297-
}, func(ctx context.Context) (bool, error) {
298-
allReady := true
299-
for _, machineName := range machineNames {
300-
machine := &clusterv1.Machine{}
301-
if err := cl.Get(ctx, client.ObjectKey{
302-
Name: machineName,
303-
Namespace: capiutils.Namespace,
304-
}, machine); err != nil {
305-
if apierrors.IsNotFound(err) {
306-
logrus.Debugf("Not found")
307-
return false, nil
288+
if err := wait.PollUntilContextTimeout(ctx, 15*time.Second, timeout, true,
289+
func(ctx context.Context) (bool, error) {
290+
allReady := true
291+
for _, machineName := range machineNames {
292+
machine := &clusterv1.Machine{}
293+
if err := cl.Get(ctx, client.ObjectKey{
294+
Name: machineName,
295+
Namespace: capiutils.Namespace,
296+
}, machine); err != nil {
297+
if apierrors.IsNotFound(err) {
298+
logrus.Debugf("Not found")
299+
return false, nil
300+
}
301+
return false, err
302+
}
303+
reqPubIP := reqBootstrapPubIP && machineName == capiutils.GenerateBoostrapMachineName(clusterID.InfraID)
304+
ready, err := checkMachineReady(machine, reqPubIP)
305+
if err != nil {
306+
return false, fmt.Errorf("failed waiting for machines: %w", err)
307+
}
308+
if !ready {
309+
allReady = false
310+
} else {
311+
logrus.Debugf("Machine %s is ready. Phase: %s", machine.Name, machine.Status.Phase)
308312
}
309-
return false, err
310-
}
311-
reqPubIP := reqBootstrapPubIP && machineName == capiutils.GenerateBoostrapMachineName(clusterID.InfraID)
312-
ready, err := checkMachineReady(machine, reqPubIP)
313-
if err != nil {
314-
return false, fmt.Errorf("failed waiting for machines: %w", err)
315-
}
316-
if !ready {
317-
allReady = false
318-
} else {
319-
logrus.Debugf("Machine %s is ready. Phase: %s", machine.Name, machine.Status.Phase)
320313
}
321-
}
322-
return allReady, nil
323-
}); err != nil {
314+
return allReady, nil
315+
}); err != nil {
324316
if wait.Interrupted(err) {
325317
return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", timeout, err)
326318
}

0 commit comments

Comments
 (0)