Skip to content

Commit ab98452

Browse files
authored
Merge pull request #544 from CecileRobertMichon/vm-state
Improve VM provisioning State handling
2 parents 085098b + 688bb34 commit ab98452

File tree

6 files changed

+52
-41
lines changed

6 files changed

+52
-41
lines changed

api/v1alpha3/types.go

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -338,19 +338,19 @@ type LoadBalancerHealthCheck struct {
338338
// VMState describes the state of an Azure virtual machine.
339339
type VMState string
340340

341-
var (
341+
const (
342342
// VMStateCreating ...
343-
VMStateCreating = VMState("Creating")
343+
VMStateCreating VMState = "Creating"
344344
// VMStateDeleting ...
345-
VMStateDeleting = VMState("Deleting")
345+
VMStateDeleting VMState = "Deleting"
346346
// VMStateFailed ...
347-
VMStateFailed = VMState("Failed")
347+
VMStateFailed VMState = "Failed"
348348
// VMStateMigrating ...
349-
VMStateMigrating = VMState("Migrating")
349+
VMStateMigrating VMState = "Migrating"
350350
// VMStateSucceeded ...
351-
VMStateSucceeded = VMState("Succeeded")
351+
VMStateSucceeded VMState = "Succeeded"
352352
// VMStateUpdating ...
353-
VMStateUpdating = VMState("Updating")
353+
VMStateUpdating VMState = "Updating"
354354
)
355355

356356
// VM describes an Azure virtual machine.

cloud/scope/machine.go

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -161,11 +161,16 @@ func (m *MachineScope) SetVMState(v infrav1.VMState) {
161161
m.AzureMachine.Status.VMState = &v
162162
}
163163

164-
// SetReady sets the AzureMachine Ready Status
164+
// SetReady sets the AzureMachine Ready Status to true.
165165
func (m *MachineScope) SetReady() {
166166
m.AzureMachine.Status.Ready = true
167167
}
168168

169+
// SetNotReady sets the AzureMachine Ready Status to false.
170+
func (m *MachineScope) SetNotReady() {
171+
m.AzureMachine.Status.Ready = false
172+
}
173+
169174
// SetFailureMessage sets the AzureMachine status failure message.
170175
func (m *MachineScope) SetFailureMessage(v error) {
171176
m.AzureMachine.Status.FailureMessage = pointer.StringPtr(v.Error())

cloud/services/virtualmachines/virtualmachines.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,11 +51,11 @@ type Spec struct {
5151
func (s *Service) Get(ctx context.Context, spec interface{}) (interface{}, error) {
5252
vmSpec, ok := spec.(*Spec)
5353
if !ok {
54-
return compute.VirtualMachine{}, errors.New("invalid vm specification")
54+
return compute.VirtualMachine{}, errors.New("invalid VM specification")
5555
}
5656
vm, err := s.Client.Get(ctx, s.Scope.ResourceGroup(), vmSpec.Name)
5757
if err != nil && azure.ResourceNotFound(err) {
58-
return nil, errors.Wrapf(err, "vm %s not found", vmSpec.Name)
58+
return nil, errors.Wrapf(err, "VM %s not found", vmSpec.Name)
5959
} else if err != nil {
6060
return vm, err
6161
}
@@ -79,22 +79,22 @@ func (s *Service) Get(ctx context.Context, spec interface{}) (interface{}, error
7979
func (s *Service) Reconcile(ctx context.Context, spec interface{}) error {
8080
vmSpec, ok := spec.(*Spec)
8181
if !ok {
82-
return errors.New("invalid vm specification")
82+
return errors.New("invalid VM specification")
8383
}
8484

8585
storageProfile, err := generateStorageProfile(*vmSpec)
8686
if err != nil {
8787
return err
8888
}
8989

90-
klog.V(2).Infof("getting nic %s", vmSpec.NICName)
90+
klog.V(2).Infof("getting NIC %s", vmSpec.NICName)
9191
nic, err := s.InterfacesClient.Get(ctx, s.Scope.ResourceGroup(), vmSpec.NICName)
9292
if err != nil {
9393
return err
9494
}
95-
klog.V(2).Infof("got nic %s", vmSpec.NICName)
95+
klog.V(2).Infof("got NIC %s", vmSpec.NICName)
9696

97-
klog.V(2).Infof("creating vm %s ", vmSpec.Name)
97+
klog.V(2).Infof("creating VM %s ", vmSpec.Name)
9898

9999
sshKeyData := vmSpec.SSHKeyData
100100
if sshKeyData == "" {
@@ -174,27 +174,27 @@ func (s *Service) Reconcile(ctx context.Context, spec interface{}) error {
174174
return errors.Wrapf(err, "cannot create vm")
175175
}
176176

177-
klog.V(2).Infof("successfully created vm %s ", vmSpec.Name)
177+
klog.V(2).Infof("successfully created VM %s ", vmSpec.Name)
178178
return nil
179179
}
180180

181181
// Delete deletes the virtual machine with the provided name.
182182
func (s *Service) Delete(ctx context.Context, spec interface{}) error {
183183
vmSpec, ok := spec.(*Spec)
184184
if !ok {
185-
return errors.New("invalid vm specification")
185+
return errors.New("invalid VM specification")
186186
}
187-
klog.V(2).Infof("deleting vm %s ", vmSpec.Name)
187+
klog.V(2).Infof("deleting VM %s ", vmSpec.Name)
188188
err := s.Client.Delete(ctx, s.Scope.ResourceGroup(), vmSpec.Name)
189189
if err != nil && azure.ResourceNotFound(err) {
190190
// already deleted
191191
return nil
192192
}
193193
if err != nil {
194-
return errors.Wrapf(err, "failed to delete vm %s in resource group %s", vmSpec.Name, s.Scope.ResourceGroup())
194+
return errors.Wrapf(err, "failed to delete VM %s in resource group %s", vmSpec.Name, s.Scope.ResourceGroup())
195195
}
196196

197-
klog.V(2).Infof("successfully deleted vm %s ", vmSpec.Name)
197+
klog.V(2).Infof("successfully deleted VM %s ", vmSpec.Name)
198198
return nil
199199
}
200200

cloud/services/virtualmachines/virtualmachines_test.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ import (
4141
"sigs.k8s.io/controller-runtime/pkg/client/fake"
4242
)
4343

44-
const expectedInvalidSpec = "invalid vm specification"
44+
const expectedInvalidSpec = "invalid VM specification"
4545

4646
func init() {
4747
clusterv1.AddToScheme(scheme.Scheme)
@@ -167,7 +167,7 @@ func TestGetVM(t *testing.T) {
167167
vmSpec: Spec{
168168
Name: "my-vm",
169169
},
170-
expectedError: "vm my-vm not found: #: Not found: StatusCode=404",
170+
expectedError: "VM my-vm not found: #: Not found: StatusCode=404",
171171
expect: func(m *mock_virtualmachines.MockClientMockRecorder, mnic *mock_networkinterfaces.MockClientMockRecorder, mpip *mock_publicips.MockClientMockRecorder) {
172172
mpip.Get(context.TODO(), "my-rg", "my-publicIP-id").Return(network.PublicIPAddress{}, nil)
173173
mnic.Get(context.TODO(), "my-rg", gomock.Any()).Return(network.Interface{}, nil)
@@ -604,7 +604,7 @@ func TestDeleteVM(t *testing.T) {
604604
vmSpec: Spec{
605605
Name: "my-vm",
606606
},
607-
expectedError: "failed to delete vm my-vm in resource group my-rg: #: Internal Server Error: StatusCode=500",
607+
expectedError: "failed to delete VM my-vm in resource group my-rg: #: Internal Server Error: StatusCode=500",
608608
expect: func(m *mock_virtualmachines.MockClientMockRecorder) {
609609
m.Delete(context.TODO(), "my-rg", "my-vm").
610610
Return(autorest.NewErrorWithResponse("", "", &http.Response{StatusCode: 500}, "Internal Server Error"))

controllers/azuremachine_controller.go

Lines changed: 20 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -217,17 +217,6 @@ func (r *AzureMachineReconciler) reconcileNormal(ctx context.Context, machineSco
217217
return reconcile.Result{}, err
218218
}
219219

220-
// Set an error message if we couldn't find the VM.
221-
// TODO: Nodes are getting marked failed. Need to debug this.
222-
// Should we set a requeue above? CAPA doesn't seem to need the requeue.
223-
/*
224-
if vm == nil {
225-
machineScope.SetFailureReason(capierrors.UpdateMachineError)
226-
machineScope.SetFailureMessage(errors.New("Azure virtual machine cannot be found"))
227-
return reconcile.Result{}, nil
228-
}
229-
*/
230-
231220
// TODO(ncdc): move this validation logic into a validating webhook
232221
if errs := r.validateUpdate(&machineScope.AzureMachine.Spec, vm); len(errs) > 0 {
233222
agg := kerrors.NewAggregate(errs)
@@ -248,13 +237,30 @@ func (r *AzureMachineReconciler) reconcileNormal(ctx context.Context, machineSco
248237

249238
switch vm.State {
250239
case infrav1.VMStateSucceeded:
251-
machineScope.Info("Machine VM is running", "instance-id", *machineScope.GetVMID())
240+
machineScope.V(2).Info("VM is running", "id", *machineScope.GetVMID())
252241
machineScope.SetReady()
242+
case infrav1.VMStateCreating:
243+
machineScope.V(2).Info("VM is creating", "id", *machineScope.GetVMID())
244+
machineScope.SetNotReady()
253245
case infrav1.VMStateUpdating:
254-
machineScope.Info("Machine VM is updating", "instance-id", *machineScope.GetVMID())
246+
machineScope.V(2).Info("VM is updating", "id", *machineScope.GetVMID())
247+
machineScope.SetNotReady()
248+
case infrav1.VMStateDeleting:
249+
machineScope.Info("Unexpected VM deletion", "state", vm.State, "instance-id", *machineScope.GetVMID())
250+
r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "UnexpectedVMDeletion", "Unexpected Azure VM deletion")
251+
machineScope.SetNotReady()
252+
case infrav1.VMStateFailed:
253+
machineScope.SetNotReady()
254+
machineScope.Error(errors.New("Failed to create or update VM"), "VM is in failed state", "id", *machineScope.GetVMID())
255+
r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "FailedVMState", "Azure VM is in failed state")
256+
machineScope.SetFailureReason(capierrors.UpdateMachineError)
257+
machineScope.SetFailureMessage(errors.Errorf("Azure VM state is %s", vm.State))
255258
default:
259+
machineScope.SetNotReady()
260+
machineScope.Info("VM state is undefined", "state", vm.State, "instance-id", *machineScope.GetVMID())
261+
r.Recorder.Eventf(machineScope.AzureMachine, corev1.EventTypeWarning, "UnhandledVMState", "Azure VM state is undefined")
256262
machineScope.SetFailureReason(capierrors.UpdateMachineError)
257-
machineScope.SetFailureMessage(errors.Errorf("Azure VM state %q is unexpected", vm.State))
263+
machineScope.SetFailureMessage(errors.Errorf("Azure VM state %q is undefined", vm.State))
258264
}
259265

260266
// Ensure that the tags are correct.

controllers/azuremachine_reconciler.go

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -73,12 +73,12 @@ func (s *azureMachineService) Create() (*infrav1.VM, error) {
7373
nicName := azure.GenerateNICName(s.machineScope.Name())
7474
nicErr := s.reconcileNetworkInterface(nicName)
7575
if nicErr != nil {
76-
return nil, errors.Wrapf(nicErr, "failed to create nic %s for machine %s", nicName, s.machineScope.Name())
76+
return nil, errors.Wrapf(nicErr, "failed to create NIC %s for machine %s", nicName, s.machineScope.Name())
7777
}
7878

7979
vm, vmErr := s.createVirtualMachine(nicName)
8080
if vmErr != nil {
81-
return nil, errors.Wrapf(vmErr, "failed to create vm %s ", s.machineScope.Name())
81+
return nil, errors.Wrapf(vmErr, "failed to create VM %s ", s.machineScope.Name())
8282
}
8383

8484
return vm, nil
@@ -131,7 +131,7 @@ func (s *azureMachineService) Delete() error {
131131

132132
func (s *azureMachineService) VMIfExists(id *string) (*infrav1.VM, error) {
133133
if id == nil {
134-
s.clusterScope.Info("VM does not have an id")
134+
s.clusterScope.Info("VM does not have an ID")
135135
return nil, nil
136136
}
137137

@@ -145,15 +145,15 @@ func (s *azureMachineService) VMIfExists(id *string) (*infrav1.VM, error) {
145145
}
146146

147147
if err != nil {
148-
return nil, errors.Wrap(err, "Failed to get vm")
148+
return nil, errors.Wrap(err, "Failed to get VM")
149149
}
150150

151151
vm, ok := vmInterface.(*infrav1.VM)
152152
if !ok {
153153
return nil, errors.New("returned incorrect vm interface")
154154
}
155155

156-
klog.Infof("Found vm for machine %s", s.machineScope.Name())
156+
klog.V(2).Infof("Found VM for AzureMachine %s", s.machineScope.Name())
157157

158158
return vm, nil
159159
}

0 commit comments

Comments
 (0)