Skip to content

Commit 5eaa629

Browse files
authored
Merge pull request #7271 from sbueringer/pr-make-CAPD-bootstrap-reentrant
🌱 CAPD: make Machine bootstrap reentrant
2 parents fa239cd + 38653d8 commit 5eaa629

File tree

8 files changed

+63
-43
lines changed

8 files changed

+63
-43
lines changed

test/infrastructure/docker/api/v1beta1/dockermachine_types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ type DockerMachineSpec struct {
5151

5252
// Bootstrapped is true when the kubeadm bootstrapping has been run
5353
// against this machine
54+
// Deprecated: This field will be removed in the next apiVersion.
5455
// +optional
5556
Bootstrapped bool `json:"bootstrapped,omitempty"`
5657
}

test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinepools.yaml

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachines.yaml

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/infrastructure/docker/config/crd/bases/infrastructure.cluster.x-k8s.io_dockermachinetemplates.yaml

Lines changed: 3 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

test/infrastructure/docker/exp/api/v1beta1/dockermachinepool_types.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -107,6 +107,7 @@ type DockerMachinePoolInstanceStatus struct {
107107

108108
// Bootstrapped is true when the kubeadm bootstrapping has been run
109109
// against this machine
110+
// Deprecated: This field will be removed in the next apiVersion.
110111
// +optional
111112
Bootstrapped bool `json:"bootstrapped,omitempty"`
112113
}

test/infrastructure/docker/exp/internal/docker/nodepool.go

Lines changed: 25 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -278,28 +278,35 @@ func (np *NodePool) reconcileMachine(ctx context.Context, machine *docker.Machin
278278

279279
// if the machine isn't bootstrapped, only then run bootstrap scripts
280280
if !machineStatus.Bootstrapped {
281-
log.Info("Bootstrapping instance", "instance", machine.Name())
282-
if err := externalMachine.PreloadLoadImages(ctx, np.dockerMachinePool.Spec.Template.PreLoadImages); err != nil {
283-
return ctrl.Result{}, errors.Wrapf(err, "failed to pre-load images into the docker machine with instance name %s", machine.Name())
284-
}
285-
286-
bootstrapData, format, err := getBootstrapData(ctx, np.client, np.machinePool)
287-
if err != nil {
288-
return ctrl.Result{}, errors.Wrapf(err, "failed to get bootstrap data for instance named %s", machine.Name())
289-
}
290-
291-
timeoutctx, cancel := context.WithTimeout(ctx, 3*time.Minute)
281+
timeoutCtx, cancel := context.WithTimeout(ctx, 3*time.Minute)
292282
defer cancel()
293-
// Run the bootstrap script. Simulates cloud-init/Ignition.
294-
if err := externalMachine.ExecBootstrap(timeoutctx, bootstrapData, format); err != nil {
295-
return ctrl.Result{}, errors.Wrapf(err, "failed to exec DockerMachinePool instance bootstrap for instance named %s", machine.Name())
296-
}
283+
297284
// Check for bootstrap success
298-
if err := externalMachine.CheckForBootstrapSuccess(timeoutctx); err != nil {
299-
return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete")
300-
}
285+
// We have to check here to make this reentrant for cases where the bootstrap works
286+
// but bootstrapped is never set on the object. We only try to bootstrap if the machine
287+
// is not already bootstrapped.
288+
if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, false); err != nil {
289+
log.Info("Bootstrapping instance", "instance", machine.Name())
290+
if err := externalMachine.PreloadLoadImages(timeoutCtx, np.dockerMachinePool.Spec.Template.PreLoadImages); err != nil {
291+
return ctrl.Result{}, errors.Wrapf(err, "failed to pre-load images into the docker machine with instance name %s", machine.Name())
292+
}
293+
294+
bootstrapData, format, err := getBootstrapData(timeoutCtx, np.client, np.machinePool)
295+
if err != nil {
296+
return ctrl.Result{}, errors.Wrapf(err, "failed to get bootstrap data for instance named %s", machine.Name())
297+
}
301298

299+
// Run the bootstrap script. Simulates cloud-init/Ignition.
300+
if err := externalMachine.ExecBootstrap(timeoutCtx, bootstrapData, format); err != nil {
301+
return ctrl.Result{}, errors.Wrapf(err, "failed to exec DockerMachinePool instance bootstrap for instance named %s", machine.Name())
302+
}
303+
// Check for bootstrap success
304+
if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, true); err != nil {
305+
return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete")
306+
}
307+
}
302308
machineStatus.Bootstrapped = true
309+
303310
// return to surface the machine has been bootstrapped.
304311
return ctrl.Result{Requeue: true}, nil
305312
}

test/infrastructure/docker/internal/controllers/dockermachine_controller.go

Lines changed: 22 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -288,25 +288,31 @@ func (r *DockerMachineReconciler) reconcileNormal(ctx context.Context, cluster *
288288

289289
// if the machine isn't bootstrapped, only then run bootstrap scripts
290290
if !dockerMachine.Spec.Bootstrapped {
291-
bootstrapData, format, err := r.getBootstrapData(ctx, machine)
292-
if err != nil {
293-
log.Error(err, "failed to get bootstrap data")
294-
return ctrl.Result{}, err
295-
}
296-
297-
timeoutctx, cancel := context.WithTimeout(ctx, 3*time.Minute)
291+
timeoutCtx, cancel := context.WithTimeout(ctx, 3*time.Minute)
298292
defer cancel()
299-
// Run the bootstrap script. Simulates cloud-init/Ignition.
300-
if err := externalMachine.ExecBootstrap(timeoutctx, bootstrapData, format); err != nil {
301-
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
302-
return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap")
303-
}
293+
304294
// Check for bootstrap success
305-
if err := externalMachine.CheckForBootstrapSuccess(timeoutctx); err != nil {
306-
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
307-
return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete")
308-
}
295+
// We have to check here to make this reentrant for cases where the bootstrap works
296+
// but bootstrapped is never set on the object. We only try to bootstrap if the machine
297+
// is not already bootstrapped.
298+
if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, false); err != nil {
299+
bootstrapData, format, err := r.getBootstrapData(timeoutCtx, machine)
300+
if err != nil {
301+
log.Error(err, "failed to get bootstrap data")
302+
return ctrl.Result{}, err
303+
}
309304

305+
// Run the bootstrap script. Simulates cloud-init/Ignition.
306+
if err := externalMachine.ExecBootstrap(timeoutCtx, bootstrapData, format); err != nil {
307+
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
308+
return ctrl.Result{}, errors.Wrap(err, "failed to exec DockerMachine bootstrap")
309+
}
310+
// Check for bootstrap success
311+
if err := externalMachine.CheckForBootstrapSuccess(timeoutCtx, true); err != nil {
312+
conditions.MarkFalse(dockerMachine, infrav1.BootstrapExecSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityWarning, "Repeating bootstrap")
313+
return ctrl.Result{}, errors.Wrap(err, "failed to check for existence of bootstrap success file at /run/cluster-api/bootstrap-success.complete")
314+
}
315+
}
310316
dockerMachine.Spec.Bootstrapped = true
311317
}
312318

test/infrastructure/docker/internal/docker/machine.go

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -355,7 +355,7 @@ func (m *Machine) ExecBootstrap(ctx context.Context, data string, format bootstr
355355
}
356356
err := cmd.Run(ctx)
357357
if err != nil {
358-
log.Info("Failed running command", "command", command, "stdout", outStd.String(), "stderr", outErr.String(), "bootstrap data", data)
358+
log.Info("Failed running command", "instance", m.Name(), "command", command, "stdout", outStd.String(), "stderr", outErr.String(), "bootstrap data", data)
359359
logContainerDebugInfo(ctx, log, m.ContainerName())
360360
return errors.Wrap(errors.WithStack(err), "failed to run cloud config")
361361
}
@@ -365,7 +365,7 @@ func (m *Machine) ExecBootstrap(ctx context.Context, data string, format bootstr
365365
}
366366

367367
// CheckForBootstrapSuccess checks if bootstrap was successful by checking for existence of the sentinel file.
368-
func (m *Machine) CheckForBootstrapSuccess(ctx context.Context) error {
368+
func (m *Machine) CheckForBootstrapSuccess(ctx context.Context, logResult bool) error {
369369
log := ctrl.LoggerFrom(ctx)
370370

371371
if m.container == nil {
@@ -378,7 +378,9 @@ func (m *Machine) CheckForBootstrapSuccess(ctx context.Context) error {
378378
cmd.SetStderr(&outErr)
379379
cmd.SetStdout(&outStd)
380380
if err := cmd.Run(ctx); err != nil {
381-
log.Info("Failed running command", "command", "test -f /run/cluster-api/bootstrap-success.complete", "stdout", outStd.String(), "stderr", outErr.String())
381+
if logResult {
382+
log.Info("Failed running command", "command", "test -f /run/cluster-api/bootstrap-success.complete", "stdout", outStd.String(), "stderr", outErr.String())
383+
}
382384
return errors.Wrap(errors.WithStack(err), "failed to run bootstrap check")
383385
}
384386
return nil

0 commit comments

Comments
 (0)