Skip to content

Commit 805c12d

Browse files
authored
Merge pull request #3145 from k8s-infra-cherrypick-robot/cherry-pick-3096-to-release-1.7
[release-1.7] Don't delete VMSS upon failure and add bootstrap status condition
2 parents 7df6ccf + 2a90739 commit 805c12d

File tree

6 files changed

+51
-16
lines changed

6 files changed

+51
-16
lines changed

azure/converters/vmss.go

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@ package converters
1919
import (
2020
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2021-11-01/compute"
2121
"github.com/Azure/go-autorest/autorest/to"
22+
"k8s.io/utils/pointer"
2223
azprovider "sigs.k8s.io/cloud-provider-azure/pkg/provider"
2324
infrav1 "sigs.k8s.io/cluster-api-provider-azure/api/v1beta1"
2425
"sigs.k8s.io/cluster-api-provider-azure/azure"
@@ -121,6 +122,16 @@ func SDKToVMSSVM(sdkInstance compute.VirtualMachineScaleSetVM) *azure.VMSSVM {
121122
instance.Name = *sdkInstance.OsProfile.ComputerName
122123
}
123124

125+
if sdkInstance.Resources != nil {
126+
for _, r := range *sdkInstance.Resources {
127+
if r.ProvisioningState != nil && r.Name != nil &&
128+
(*r.Name == azure.BootstrappingExtensionLinux || *r.Name == azure.BootstrappingExtensionWindows) {
129+
instance.BootstrappingState = infrav1.ProvisioningState(pointer.StringDeref(r.ProvisioningState, ""))
130+
break
131+
}
132+
}
133+
}
134+
124135
if sdkInstance.StorageProfile != nil && sdkInstance.StorageProfile.ImageReference != nil {
125136
imageRef := sdkInstance.StorageProfile.ImageReference
126137
instance.Image = SDKImageToImage(imageRef, sdkInstance.Plan != nil)

azure/defaults.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,13 @@ const (
5151
WindowsOS = "Windows"
5252
)
5353

54+
const (
55+
// BootstrappingExtensionLinux is the name of the Linux CAPZ bootstrapping VM extension.
56+
BootstrappingExtensionLinux = "CAPZ.Linux.Bootstrapping"
57+
// BootstrappingExtensionWindows is the name of the Windows CAPZ bootstrapping VM extension.
58+
BootstrappingExtensionWindows = "CAPZ.Windows.Bootstrapping"
59+
)
60+
5461
const (
5562
// DefaultWindowsOsAndVersion is the default Windows Server version to use when
5663
// genearating default images for Windows nodes.
@@ -309,7 +316,7 @@ func GetBootstrappingVMExtension(osType string, cloud string, vmName string) *Ex
309316
if osType == LinuxOS && cloud == azureautorest.PublicCloud.Name {
310317
// The command checks for the existence of the bootstrapSentinelFile on the machine, with retries and sleep between retries.
311318
return &ExtensionSpec{
312-
Name: "CAPZ.Linux.Bootstrapping",
319+
Name: BootstrappingExtensionLinux,
313320
VMName: vmName,
314321
Publisher: "Microsoft.Azure.ContainerUpstream",
315322
Version: "1.0",
@@ -321,7 +328,7 @@ func GetBootstrappingVMExtension(osType string, cloud string, vmName string) *Ex
321328
// This command for the existence of the bootstrapSentinelFile on the machine, with retries and sleep between reties.
322329
// If the file is not present after the retries are exhausted the extension fails with return code '-2' - ERROR_FILE_NOT_FOUND.
323330
return &ExtensionSpec{
324-
Name: "CAPZ.Windows.Bootstrapping",
331+
Name: BootstrappingExtensionWindows,
325332
VMName: vmName,
326333
Publisher: "Microsoft.Azure.ContainerUpstream",
327334
Version: "1.0",

azure/scope/machinepoolmachine.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -273,16 +273,29 @@ func (s *MachinePoolMachineScope) Close(ctx context.Context) error {
273273
return s.PatchObject(ctx)
274274
}
275275

276-
// UpdateNodeStatus AzureMachinePoolMachine conditions and ready status. It will also update the node ref and the Kubernetes
276+
// UpdateNodeStatus updates AzureMachinePoolMachine conditions and ready status. It will also update the node ref and the Kubernetes
277277
// version of the VM instance if the node is found.
278278
// Note: This func should be called at the end of a reconcile request and after updating the scope with the most recent Azure data.
279279
func (s *MachinePoolMachineScope) UpdateNodeStatus(ctx context.Context) error {
280-
ctx, _, done := tele.StartSpanWithLogger(
280+
ctx, log, done := tele.StartSpanWithLogger(
281281
ctx,
282282
"scope.MachinePoolMachineScope.UpdateNodeStatus",
283283
)
284284
defer done()
285285

286+
if s.instance != nil {
287+
switch s.instance.BootstrappingState {
288+
case infrav1.Creating:
289+
conditions.MarkFalse(s.AzureMachinePoolMachine, infrav1.BootstrapSucceededCondition, infrav1.BootstrapInProgressReason, clusterv1.ConditionSeverityInfo, "VM bootstrapping")
290+
case infrav1.Failed:
291+
log.Info("VM bootstrapping failed")
292+
conditions.MarkFalse(s.AzureMachinePoolMachine, infrav1.BootstrapSucceededCondition, infrav1.BootstrapFailedReason, clusterv1.ConditionSeverityInfo, "VM bootstrapping failed")
293+
case infrav1.Succeeded:
294+
log.Info("VM bootstrapping succeeded")
295+
conditions.MarkTrue(s.AzureMachinePoolMachine, infrav1.BootstrapSucceededCondition)
296+
}
297+
}
298+
286299
var node *corev1.Node
287300
nodeRef := s.AzureMachinePoolMachine.Status.NodeRef
288301

azure/types.go

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -94,12 +94,13 @@ type ExtensionSpec struct {
9494
type (
9595
// VMSSVM defines a VM in a virtual machine scale set.
9696
VMSSVM struct {
97-
ID string `json:"id,omitempty"`
98-
InstanceID string `json:"instanceID,omitempty"`
99-
Image infrav1.Image `json:"image,omitempty"`
100-
Name string `json:"name,omitempty"`
101-
AvailabilityZone string `json:"availabilityZone,omitempty"`
102-
State infrav1.ProvisioningState `json:"vmState,omitempty"`
97+
ID string `json:"id,omitempty"`
98+
InstanceID string `json:"instanceID,omitempty"`
99+
Image infrav1.Image `json:"image,omitempty"`
100+
Name string `json:"name,omitempty"`
101+
AvailabilityZone string `json:"availabilityZone,omitempty"`
102+
State infrav1.ProvisioningState `json:"vmState,omitempty"`
103+
BootstrappingState infrav1.ProvisioningState `json:"bootstrappingState,omitempty"`
103104
}
104105

105106
// VMSS defines a virtual machine scale set.

exp/controllers/azuremachinepool_controller.go

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -315,11 +315,8 @@ func (ampr *AzureMachinePoolReconciler) reconcileNormal(ctx context.Context, mac
315315
log.Info("Unexpected scale set deletion", "id", machinePoolScope.ProviderID())
316316
ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure scale set deletion")
317317
case infrav1.Failed:
318-
err := ams.Delete(ctx)
319-
if err != nil {
320-
return reconcile.Result{}, errors.Wrap(err, "failed to delete scale set in a failed state")
321-
}
322-
return reconcile.Result{}, errors.Wrap(err, "Scale set deleted, retry creating in next reconcile")
318+
log.Info("Unexpected scale set failure", "id", machinePoolScope.ProviderID())
319+
ampr.Recorder.Eventf(machinePoolScope.AzureMachinePool, corev1.EventTypeWarning, "UnexpectedVMFailure", "Unexpected Azure scale set failure")
323320
}
324321

325322
if machinePoolScope.NeedsRequeue() {

exp/controllers/azuremachinepoolmachine_controller.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@ import (
3838
capierrors "sigs.k8s.io/cluster-api/errors"
3939
"sigs.k8s.io/cluster-api/util"
4040
"sigs.k8s.io/cluster-api/util/annotations"
41+
"sigs.k8s.io/cluster-api/util/conditions"
4142
"sigs.k8s.io/cluster-api/util/predicates"
4243
ctrl "sigs.k8s.io/controller-runtime"
4344
"sigs.k8s.io/controller-runtime/pkg/client"
@@ -287,6 +288,11 @@ func (ampmr *AzureMachinePoolMachineController) reconcileNormal(ctx context.Cont
287288

288289
log.V(2).Info(fmt.Sprintf("Scale Set VM is %s", state), "id", machineScope.ProviderID())
289290

291+
bootstrappingCondition := conditions.Get(machineScope.AzureMachinePoolMachine, infrav1.BootstrapSucceededCondition)
292+
if bootstrappingCondition != nil && bootstrappingCondition.Reason == infrav1.BootstrapFailedReason {
293+
return reconcile.Result{}, nil
294+
}
295+
290296
if !infrav1.IsTerminalProvisioningState(state) || !machineScope.IsReady() {
291297
log.V(2).Info("Requeuing", "state", state, "ready", machineScope.IsReady())
292298
// we are in a non-terminal state, retry in a bit
@@ -377,7 +383,7 @@ func (r *azureMachinePoolMachineReconciler) Delete(ctx context.Context) error {
377383
}
378384

379385
if err := r.Scope.UpdateInstanceStatus(ctx); err != nil {
380-
log.V(4).Info("failed to update VMSS VM instanace status during delete")
386+
log.V(4).Info("failed to update VMSS VM instance status during delete")
381387
}
382388
}()
383389

0 commit comments

Comments
 (0)