Skip to content

Commit 943f807

Browse files
committed
OCPBUGS-34295: wait for ip address on machines
Updates the post-machine-creation logic to wait for the IP addresses to be populated. This ensures that when we write the manifests to disk the IP address will be present in case we need it for must gather. Adds debug logging for checking when machines are provisioned and have obtained the required IP addresses.
1 parent 5c8b919 commit 943f807

File tree

1 file changed

+48
-8
lines changed

1 file changed

+48
-8
lines changed

pkg/infrastructure/clusterapi/clusterapi.go

Lines changed: 48 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import (
1515
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1616
utilerrors "k8s.io/apimachinery/pkg/util/errors"
1717
"k8s.io/apimachinery/pkg/util/wait"
18+
"k8s.io/utils/ptr"
1819
clusterv1 "sigs.k8s.io/cluster-api/api/v1beta1"
1920
utilkubeconfig "sigs.k8s.io/cluster-api/util/kubeconfig"
2021
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -289,13 +290,15 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
289290
{
290291
untilTime := time.Now().Add(timeout)
291292
timezone, _ := untilTime.Zone()
292-
logrus.Infof("Waiting up to %v (until %v %s) for machines to provision...", timeout, untilTime.Format(time.Kitchen), timezone)
293+
reqBootstrapPubIP := installConfig.Config.Publish == types.ExternalPublishingStrategy && i.impl.BootstrapHasPublicIP()
294+
logrus.Infof("Waiting up to %v (until %v %s) for machines %v to provision...", timeout, untilTime.Format(time.Kitchen), timezone, machineNames)
293295
if err := wait.ExponentialBackoffWithContext(ctx, wait.Backoff{
294296
Duration: time.Second * 10,
295297
Factor: float64(1.5),
296298
Steps: 32,
297299
Cap: timeout,
298300
}, func(ctx context.Context) (bool, error) {
301+
allReady := true
299302
for _, machineName := range machineNames {
300303
machine := &clusterv1.Machine{}
301304
if err := cl.Get(ctx, client.ObjectKey{
@@ -308,15 +311,18 @@ func (i *InfraProvider) Provision(ctx context.Context, dir string, parents asset
308311
}
309312
return false, err
310313
}
311-
if machine.Status.Phase != string(clusterv1.MachinePhaseProvisioned) &&
312-
machine.Status.Phase != string(clusterv1.MachinePhaseRunning) {
313-
return false, nil
314-
} else if machine.Status.Phase == string(clusterv1.MachinePhaseFailed) {
315-
return false, fmt.Errorf("machine %s failed to provision: %q", machine.Name, *machine.Status.FailureMessage)
314+
reqPubIP := reqBootstrapPubIP && machineName == capiutils.GenerateBoostrapMachineName(clusterID.InfraID)
315+
ready, err := checkMachineReady(machine, reqPubIP)
316+
if err != nil {
317+
return false, fmt.Errorf("failed waiting for machines: %w", err)
318+
}
319+
if !ready {
320+
allReady = false
321+
} else {
322+
logrus.Debugf("Machine %s is ready. Phase: %s", machine.Name, machine.Status.Phase)
316323
}
317-
logrus.Debugf("Machine %s is ready. Phase: %s", machine.Name, machine.Status.Phase)
318324
}
319-
return true, nil
325+
return allReady, nil
320326
}); err != nil {
321327
if wait.Interrupted(err) {
322328
return fileList, fmt.Errorf("control-plane machines were not provisioned within %v: %w", timeout, err)
@@ -551,3 +557,37 @@ func (i *InfraProvider) collectManifests(ctx context.Context, cl client.Client)
551557
}
552558
return fileList, errorList
553559
}
560+
561+
func checkMachineReady(machine *clusterv1.Machine, requirePublicIP bool) (bool, error) {
562+
logrus.Debugf("Checking that machine %s has provisioned...", machine.Name)
563+
if machine.Status.Phase != string(clusterv1.MachinePhaseProvisioned) &&
564+
machine.Status.Phase != string(clusterv1.MachinePhaseRunning) {
565+
logrus.Debugf("Machine %s has not yet provisioned: %s", machine.Name, machine.Status.Phase)
566+
return false, nil
567+
} else if machine.Status.Phase == string(clusterv1.MachinePhaseFailed) {
568+
msg := ptr.Deref(machine.Status.FailureMessage, "machine.Status.FailureMessage was not set")
569+
return false, fmt.Errorf("machine %s failed to provision: %s", machine.Name, msg)
570+
}
571+
logrus.Debugf("Machine %s has status: %s", machine.Name, machine.Status.Phase)
572+
return hasRequiredIP(machine, requirePublicIP), nil
573+
}
574+
575+
func hasRequiredIP(machine *clusterv1.Machine, requirePublicIP bool) bool {
576+
logrus.Debugf("Checking that IP addresses are populated in the status of machine %s...", machine.Name)
577+
578+
for _, addr := range machine.Status.Addresses {
579+
switch {
580+
case len(addr.Address) == 0:
581+
continue
582+
case addr.Type == clusterv1.MachineExternalIP:
583+
logrus.Debugf("Found external IP address: %s", addr.Address)
584+
return true
585+
case addr.Type == clusterv1.MachineInternalIP && !requirePublicIP:
586+
logrus.Debugf("Found internal IP address: %s", addr.Address)
587+
return true
588+
}
589+
logrus.Debugf("Checked IP %s: %s", addr.Type, addr.Address)
590+
}
591+
logrus.Debugf("Still waiting for machine %s to get required IPs", machine.Name)
592+
return false
593+
}

0 commit comments

Comments
 (0)