@@ -321,9 +321,7 @@ func (scaleSet *ScaleSet) IncreaseSize(delta int) error {
321321}
322322
323323// GetScaleSetVms returns list of nodes for the given scale set.
324- // Note that the list results is not used directly because their resource ID format
325- // is not consistent with Get results.
326- func (scaleSet * ScaleSet ) GetScaleSetVms () ([]string , * retry.Error ) {
324+ func (scaleSet * ScaleSet ) GetScaleSetVms () ([]compute.VirtualMachineScaleSetVM , error ) {
327325 klog .V (4 ).Infof ("GetScaleSetVms: starts" )
328326 ctx , cancel := getContextWithCancel ()
329327 defer cancel ()
@@ -336,24 +334,7 @@ func (scaleSet *ScaleSet) GetScaleSetVms() ([]string, *retry.Error) {
336334 return nil , rerr
337335 }
338336
339- allVMs := make ([]string , 0 )
340- for _ , vm := range vmList {
341- // The resource ID is empty string, which indicates the instance may be in deleting state.
342- if len (* vm .ID ) == 0 {
343- continue
344- }
345-
346- resourceID , err := convertResourceGroupNameToLower (* vm .ID )
347- if err != nil {
348- // This shouldn't happen. Log a waring message for tracking.
349- klog .Warningf ("GetScaleSetVms.convertResourceGroupNameToLower failed with error: %v" , err )
350- continue
351- }
352-
353- allVMs = append (allVMs , resourceID )
354- }
355-
356- return allVMs , nil
337+ return vmList , nil
357338}
358339
359340// DecreaseTargetSize decreases the target size of the node group. This function
@@ -406,6 +387,9 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef) error {
406387 return err
407388 }
408389
390+ scaleSet .instanceMutex .Lock ()
391+ defer scaleSet .instanceMutex .Unlock ()
392+
409393 instanceIDs := []string {}
410394 for _ , instance := range instances {
411395 asg , err := scaleSet .manager .GetAsgForInstance (instance )
@@ -417,6 +401,11 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef) error {
417401 return fmt .Errorf ("cannot delete instance (%s) which don't belong to the same Scale Set (%q)" , instance .Name , commonAsg )
418402 }
419403
404+ if cpi , found := scaleSet .getInstanceByProviderID (instance .Name ); found && cpi .Status != nil && cpi .Status .State == cloudprovider .InstanceDeleting {
405+ klog .V (3 ).Infof ("Skipping deleting instance %s as its current state is deleting" , instance .Name )
406+ continue
407+ }
408+
420409 instanceID , err := getLastSegment (instance .Name )
421410 if err != nil {
422411 klog .Errorf ("getLastSegment failed with error: %v" , err )
@@ -426,9 +415,16 @@ func (scaleSet *ScaleSet) DeleteInstances(instances []*azureRef) error {
426415 instanceIDs = append (instanceIDs , instanceID )
427416 }
428417
418+ // nothing to delete
419+ if len (instanceIDs ) == 0 {
420+ klog .V (3 ).Infof ("No new instances eligible for deletion, skipping" )
421+ return nil
422+ }
423+
429424 requiredIds := & compute.VirtualMachineScaleSetVMInstanceRequiredIDs {
430425 InstanceIds : & instanceIDs ,
431426 }
427+
432428 ctx , cancel := getContextWithCancel ()
433429 defer cancel ()
434430 resourceGroup := scaleSet .manager .config .ResourceGroup
@@ -682,16 +678,65 @@ func (scaleSet *ScaleSet) Nodes() ([]cloudprovider.Instance, error) {
682678 return nil , rerr .Error ()
683679 }
684680
685- instances := make ([]cloudprovider.Instance , len (vms ))
686- for i := range vms {
687- name := "azure://" + vms [i ]
688- instances [i ] = cloudprovider.Instance {Id : name }
689- }
690-
691- scaleSet .instanceCache = instances
681+ scaleSet .instanceCache = buildInstanceCache (vms )
692682 scaleSet .lastInstanceRefresh = time .Now ()
693683 klog .V (4 ).Infof ("Nodes: returns" )
694- return instances , nil
684+ return scaleSet .instanceCache , nil
685+ }
686+
687+ // Note that the GetScaleSetVms() results is not used directly because for the List endpoint,
688+ // their resource ID format is not consistent with Get endpoint
689+ func buildInstanceCache (vms []compute.VirtualMachineScaleSetVM ) []cloudprovider.Instance {
690+ instances := []cloudprovider.Instance {}
691+
692+ for _ , vm := range vms {
693+ // The resource ID is empty string, which indicates the instance may be in deleting state.
694+ if len (* vm .ID ) == 0 {
695+ continue
696+ }
697+
698+ resourceID , err := convertResourceGroupNameToLower (* vm .ID )
699+ if err != nil {
700+ // This shouldn't happen. Log a waring message for tracking.
701+ klog .Warningf ("buildInstanceCache.convertResourceGroupNameToLower failed with error: %v" , err )
702+ continue
703+ }
704+
705+ instances = append (instances , cloudprovider.Instance {
706+ Id : "azure://" + resourceID ,
707+ Status : instanceStatusFromVM (vm ),
708+ })
709+ }
710+
711+ return instances
712+ }
713+
714+ func (scaleSet * ScaleSet ) getInstanceByProviderID (providerID string ) (cloudprovider.Instance , bool ) {
715+ for _ , instance := range scaleSet .instanceCache {
716+ if instance .Id == providerID {
717+ return instance , true
718+ }
719+ }
720+ return cloudprovider.Instance {}, false
721+ }
722+
723+ // instanceStatusFromVM converts the VM provisioning state to cloudprovider.InstanceStatus
724+ func instanceStatusFromVM (vm compute.VirtualMachineScaleSetVM ) * cloudprovider.InstanceStatus {
725+ if vm .ProvisioningState == nil {
726+ return nil
727+ }
728+
729+ status := & cloudprovider.InstanceStatus {}
730+ switch * vm .ProvisioningState {
731+ case string (compute .ProvisioningStateDeleting ):
732+ status .State = cloudprovider .InstanceDeleting
733+ case string (compute .ProvisioningStateCreating ):
734+ status .State = cloudprovider .InstanceCreating
735+ default :
736+ status .State = cloudprovider .InstanceRunning
737+ }
738+
739+ return status
695740}
696741
697742func (scaleSet * ScaleSet ) invalidateInstanceCache () {
0 commit comments