Skip to content

Commit 1fb87b1

Browse files
authored
Merge pull request kubernetes#3023 from feiskyer/fix-3018
Ensure VMSS is not under updating before scaling out
2 parents dbc53ee + 1ae92f7 commit 1fb87b1

File tree

3 files changed

+71
-6
lines changed

3 files changed

+71
-6
lines changed

cluster-autoscaler/cloudprovider/azure/azure_fakes.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ func (client *VirtualMachineScaleSetsClientMock) CreateOrUpdateAsync(ctx context
8080

8181
// WaitForAsyncOperationResult waits for the response of the request
8282
func (client *VirtualMachineScaleSetsClientMock) WaitForAsyncOperationResult(ctx context.Context, future *azure.Future) (*http.Response, error) {
83-
return nil, nil
83+
return &http.Response{StatusCode: http.StatusOK}, nil
8484
}
8585

8686
// DeleteInstances deletes a set of instances for specified VirtualMachineScaleSet.

cluster-autoscaler/cloudprovider/azure/azure_scale_set.go

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ import (
4040
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute"
4141
"github.com/Azure/go-autorest/autorest"
4242
"github.com/Azure/go-autorest/autorest/azure"
43+
"github.com/Azure/go-autorest/autorest/to"
4344
)
4445

4546
var (
@@ -253,27 +254,35 @@ func (scaleSet *ScaleSet) updateVMSSCapacity(future *azure.Future) {
253254
return
254255
}
255256

256-
klog.Errorf("virtualMachineScaleSetsClient.WaitForCreateOrUpdate for scale set %q failed: %v", scaleSet.Name, err)
257+
klog.Errorf("virtualMachineScaleSetsClient.WaitForAsyncOperationResult for scale set %q failed: %v", scaleSet.Name, err)
257258
}
258259

259260
// SetScaleSetSize sets ScaleSet size.
260261
func (scaleSet *ScaleSet) SetScaleSetSize(size int64) error {
261262
scaleSet.sizeMutex.Lock()
262263
defer scaleSet.sizeMutex.Unlock()
263264

264-
// Proactively set the VMSS size so autoscaler makes better decisions.
265-
scaleSet.curSize = size
266-
scaleSet.lastSizeRefresh = time.Now()
267-
268265
vmssInfo, rerr := scaleSet.getVMSSInfo()
269266
if rerr != nil {
270267
klog.Errorf("Failed to get information for VMSS (%q): %v", scaleSet.Name, rerr)
271268
return rerr.Error()
272269
}
273270

271+
// Abort scaling to avoid concurrent VMSS scaling if the VMSS is still under updating.
272+
// Note that the VMSS provisioning state would be updated per scaleSet.sizeRefreshPeriod.
273+
if vmssInfo.VirtualMachineScaleSetProperties != nil && strings.EqualFold(to.String(vmssInfo.VirtualMachineScaleSetProperties.ProvisioningState), string(compute.ProvisioningStateUpdating)) {
274+
klog.Errorf("VMSS %q is still under updating, waiting for it finishes before scaling", scaleSet.Name)
275+
return fmt.Errorf("VMSS %q is still under updating", scaleSet.Name)
276+
}
277+
278+
// Proactively set the VMSS size so autoscaler makes better decisions.
279+
scaleSet.curSize = size
280+
scaleSet.lastSizeRefresh = time.Now()
281+
274282
// Update the new capacity to cache.
275283
vmssSizeMutex.Lock()
276284
vmssInfo.Sku.Capacity = &size
285+
vmssInfo.VirtualMachineScaleSetProperties.ProvisioningState = to.StringPtr(string(compute.ProvisioningStateUpdating))
277286
vmssSizeMutex.Unlock()
278287

279288
// Compose a new VMSS for updating.

cluster-autoscaler/cloudprovider/azure/azure_scale_set_test.go

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ import (
2020
"fmt"
2121
"net/http"
2222
"testing"
23+
"time"
2324

2425
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-12-01/compute"
2526
"github.com/Azure/go-autorest/autorest"
27+
"github.com/Azure/go-autorest/autorest/to"
2628
"github.com/stretchr/testify/assert"
2729
"github.com/stretchr/testify/mock"
2830

@@ -97,6 +99,60 @@ func TestIncreaseSize(t *testing.T) {
9799
assert.Equal(t, 5, targetSize)
98100
}
99101

102+
func TestIncreaseSizeOnVMSSUpdating(t *testing.T) {
103+
manager := newTestAzureManager(t)
104+
vmssName := "vmss-updating"
105+
var vmssCapacity int64 = 3
106+
scaleSetClient := &VirtualMachineScaleSetsClientMock{
107+
FakeStore: map[string]map[string]compute.VirtualMachineScaleSet{
108+
"test": {
109+
vmssName: {
110+
Name: &vmssName,
111+
Sku: &compute.Sku{
112+
Capacity: &vmssCapacity,
113+
},
114+
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
115+
ProvisioningState: to.StringPtr(string(compute.ProvisioningStateUpdating)),
116+
},
117+
},
118+
},
119+
},
120+
}
121+
manager.azClient.virtualMachineScaleSetsClient = scaleSetClient
122+
registered := manager.RegisterAsg(newTestScaleSet(manager, vmssName))
123+
assert.True(t, registered)
124+
manager.regenerateCache()
125+
126+
provider, err := BuildAzureCloudProvider(manager, nil)
127+
assert.NoError(t, err)
128+
129+
// Scaling should fail because VMSS is still under updating.
130+
scaleSet, ok := provider.NodeGroups()[0].(*ScaleSet)
131+
assert.True(t, ok)
132+
err = scaleSet.IncreaseSize(1)
133+
assert.Equal(t, fmt.Errorf("VMSS %q is still under updating", scaleSet.Name), err)
134+
135+
// Scaling should succeed after VMSS ProvisioningState changed to succeeded.
136+
scaleSetClient.FakeStore = map[string]map[string]compute.VirtualMachineScaleSet{
137+
"test": {
138+
vmssName: {
139+
Name: &vmssName,
140+
Sku: &compute.Sku{
141+
Capacity: &vmssCapacity,
142+
},
143+
VirtualMachineScaleSetProperties: &compute.VirtualMachineScaleSetProperties{
144+
ProvisioningState: to.StringPtr(string(compute.ProvisioningStateSucceeded)),
145+
},
146+
},
147+
},
148+
}
149+
scaleSetStatusCache.mutex.Lock()
150+
scaleSetStatusCache.lastRefresh = time.Now().Add(-1 * scaleSet.sizeRefreshPeriod)
151+
scaleSetStatusCache.mutex.Unlock()
152+
err = scaleSet.IncreaseSize(1)
153+
assert.NoError(t, err)
154+
}
155+
100156
func TestBelongs(t *testing.T) {
101157
provider := newTestProvider(t)
102158
registered := provider.azureManager.RegisterAsg(

0 commit comments

Comments
 (0)