Skip to content

Commit 77d0ebe

Browse files
authored
Merge pull request kubernetes#83102 from feiskyer/fix-too-many-calls
Fix aggressive VM calls for Azure VMSS
2 parents 29f23e6 + 197892d commit 77d0ebe

File tree

4 files changed

+173
-180
lines changed

4 files changed

+173
-180
lines changed

staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_vmss.go

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -86,8 +86,9 @@ func (ss *scaleSet) AttachDisk(isManagedDisk bool, diskName, diskURI string, nod
8686
defer cancel()
8787

8888
// Invalidate the cache right after updating
89-
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
90-
defer ss.vmssVMCache.Delete(key)
89+
if err = ss.deleteCacheForNode(vmName); err != nil {
90+
return err
91+
}
9192

9293
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - attach disk(%s, %s)", nodeResourceGroup, nodeName, diskName, diskURI)
9394
_, err = ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "attach_disk")
@@ -157,8 +158,9 @@ func (ss *scaleSet) DetachDisk(diskName, diskURI string, nodeName types.NodeName
157158
defer cancel()
158159

159160
// Invalidate the cache right after updating
160-
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
161-
defer ss.vmssVMCache.Delete(key)
161+
if err = ss.deleteCacheForNode(vmName); err != nil {
162+
return nil, err
163+
}
162164

163165
klog.V(2).Infof("azureDisk - update(%s): vm(%s) - detach disk(%s, %s)", nodeResourceGroup, nodeName, diskName, diskURI)
164166
return ss.VirtualMachineScaleSetVMsClient.Update(ctx, nodeResourceGroup, ssName, instanceID, newVM, "detach_disk")

staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss.go

Lines changed: 87 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ import (
2525
"sort"
2626
"strconv"
2727
"strings"
28+
"sync"
2829

2930
"github.com/Azure/azure-sdk-for-go/services/compute/mgmt/2019-07-01/compute"
3031
"github.com/Azure/azure-sdk-for-go/services/network/mgmt/2019-06-01/network"
@@ -60,10 +61,8 @@ type scaleSet struct {
6061
// (e.g. master nodes) may not belong to any scale sets.
6162
availabilitySet VMSet
6263

63-
vmssCache *timedCache
64-
vmssVMCache *timedCache
65-
nodeNameToScaleSetMappingCache *timedCache
66-
availabilitySetNodesCache *timedCache
64+
vmssVMCache *timedCache
65+
availabilitySetNodesCache *timedCache
6766
}
6867

6968
// newScaleSet creates a new scaleSet.
@@ -74,22 +73,12 @@ func newScaleSet(az *Cloud) (VMSet, error) {
7473
availabilitySet: newAvailabilitySet(az),
7574
}
7675

77-
ss.nodeNameToScaleSetMappingCache, err = ss.newNodeNameToScaleSetMappingCache()
78-
if err != nil {
79-
return nil, err
80-
}
81-
8276
ss.availabilitySetNodesCache, err = ss.newAvailabilitySetNodesCache()
8377
if err != nil {
8478
return nil, err
8579
}
8680

87-
ss.vmssCache, err = ss.newVmssCache()
88-
if err != nil {
89-
return nil, err
90-
}
91-
92-
ss.vmssVMCache, err = ss.newVmssVMCache()
81+
ss.vmssVMCache, err = ss.newVMSSVirtualMachinesCache()
9382
if err != nil {
9483
return nil, err
9584
}
@@ -99,39 +88,46 @@ func newScaleSet(az *Cloud) (VMSet, error) {
9988

10089
// getVmssVM gets virtualMachineScaleSetVM by nodeName from cache.
10190
// It returns cloudprovider.InstanceNotFound if node does not belong to any scale sets.
102-
func (ss *scaleSet) getVmssVM(nodeName string) (ssName, instanceID string, vm compute.VirtualMachineScaleSetVM, err error) {
103-
instanceID, err = getScaleSetVMInstanceID(nodeName)
104-
if err != nil {
105-
return ssName, instanceID, vm, err
106-
}
91+
func (ss *scaleSet) getVmssVM(nodeName string) (string, string, *compute.VirtualMachineScaleSetVM, error) {
92+
getter := func(nodeName string) (string, string, *compute.VirtualMachineScaleSetVM, error) {
93+
cached, err := ss.vmssVMCache.Get(vmssVirtualMachinesKey)
94+
if err != nil {
95+
return "", "", nil, err
96+
}
10797

108-
ssName, err = ss.getScaleSetNameByNodeName(nodeName)
109-
if err != nil {
110-
return ssName, instanceID, vm, err
111-
}
98+
virtualMachines := cached.(*sync.Map)
99+
if vm, ok := virtualMachines.Load(nodeName); ok {
100+
result := vm.(*vmssVirtualMachinesEntry)
101+
return result.vmssName, result.instanceID, result.virtualMachine, nil
102+
}
112103

113-
if ssName == "" {
114-
return "", "", vm, cloudprovider.InstanceNotFound
104+
return "", "", nil, nil
115105
}
116106

117-
resourceGroup, err := ss.GetNodeResourceGroup(nodeName)
107+
_, err := getScaleSetVMInstanceID(nodeName)
118108
if err != nil {
119-
return "", "", vm, err
109+
return "", "", nil, err
120110
}
121111

122-
klog.V(4).Infof("getVmssVM gets scaleSetName (%q) and instanceID (%q) for node %q", ssName, instanceID, nodeName)
123-
key := buildVmssCacheKey(resourceGroup, ss.makeVmssVMName(ssName, instanceID))
124-
cachedVM, err := ss.vmssVMCache.Get(key)
112+
vmssName, instanceID, vm, err := getter(nodeName)
125113
if err != nil {
126-
return ssName, instanceID, vm, err
114+
return "", "", nil, err
115+
}
116+
if vm != nil {
117+
return vmssName, instanceID, vm, nil
127118
}
128119

129-
if cachedVM == nil {
130-
klog.Errorf("Can't find node (%q) in any scale sets", nodeName)
131-
return ssName, instanceID, vm, cloudprovider.InstanceNotFound
120+
klog.V(3).Infof("Couldn't find VMSS VM with nodeName %s, refreshing the cache", nodeName)
121+
ss.vmssVMCache.Delete(vmssVirtualMachinesKey)
122+
vmssName, instanceID, vm, err = getter(nodeName)
123+
if err != nil {
124+
return "", "", nil, err
132125
}
133126

134-
return ssName, instanceID, *(cachedVM.(*compute.VirtualMachineScaleSetVM)), nil
127+
if vm == nil {
128+
return "", "", nil, cloudprovider.InstanceNotFound
129+
}
130+
return vmssName, instanceID, vm, nil
135131
}
136132

137133
// GetPowerStatusByNodeName returns the power state of the specified node.
@@ -158,20 +154,49 @@ func (ss *scaleSet) GetPowerStatusByNodeName(name string) (powerState string, er
158154

159155
// getCachedVirtualMachineByInstanceID gets scaleSetVMInfo from cache.
160156
// The node must belong to one of scale sets.
161-
func (ss *scaleSet) getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID string) (vm compute.VirtualMachineScaleSetVM, err error) {
162-
vmName := ss.makeVmssVMName(scaleSetName, instanceID)
163-
key := buildVmssCacheKey(resourceGroup, vmName)
164-
cachedVM, err := ss.vmssVMCache.Get(key)
157+
func (ss *scaleSet) getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID string) (*compute.VirtualMachineScaleSetVM, error) {
158+
getter := func() (vm *compute.VirtualMachineScaleSetVM, found bool, err error) {
159+
cached, err := ss.vmssVMCache.Get(vmssVirtualMachinesKey)
160+
if err != nil {
161+
return nil, false, err
162+
}
163+
164+
virtualMachines := cached.(*sync.Map)
165+
virtualMachines.Range(func(key, value interface{}) bool {
166+
vmEntry := value.(*vmssVirtualMachinesEntry)
167+
if strings.EqualFold(vmEntry.resourceGroup, resourceGroup) &&
168+
strings.EqualFold(vmEntry.vmssName, scaleSetName) &&
169+
strings.EqualFold(vmEntry.instanceID, instanceID) {
170+
vm = vmEntry.virtualMachine
171+
found = true
172+
return false
173+
}
174+
175+
return true
176+
})
177+
178+
return vm, found, nil
179+
}
180+
181+
vm, found, err := getter()
165182
if err != nil {
166-
return vm, err
183+
return nil, err
184+
}
185+
if found {
186+
return vm, nil
167187
}
168188

169-
if cachedVM == nil {
170-
klog.Errorf("couldn't find vmss virtual machine by scaleSetName (%s) and instanceID (%s)", scaleSetName, instanceID)
171-
return vm, cloudprovider.InstanceNotFound
189+
klog.V(3).Infof("Couldn't find VMSS VM with scaleSetName %q and instanceID %q, refreshing the cache", scaleSetName, instanceID)
190+
ss.vmssVMCache.Delete(vmssVirtualMachinesKey)
191+
vm, found, err = getter()
192+
if err != nil {
193+
return nil, err
194+
}
195+
if !found {
196+
return nil, cloudprovider.InstanceNotFound
172197
}
173198

174-
return *(cachedVM.(*compute.VirtualMachineScaleSetVM)), nil
199+
return vm, nil
175200
}
176201

177202
// GetInstanceIDByNodeName gets the cloud provider ID by node name.
@@ -463,9 +488,15 @@ func (ss *scaleSet) listScaleSets(resourceGroup string) ([]string, error) {
463488
return nil, err
464489
}
465490

466-
ssNames := make([]string, len(allScaleSets))
467-
for i := range allScaleSets {
468-
ssNames[i] = *(allScaleSets[i].Name)
491+
ssNames := make([]string, 0)
492+
for _, vmss := range allScaleSets {
493+
name := *vmss.Name
494+
if vmss.Sku != nil && to.Int64(vmss.Sku.Capacity) == 0 {
495+
klog.V(3).Infof("Capacity of VMSS %q is 0, skipping", name)
496+
continue
497+
}
498+
499+
ssNames = append(ssNames, name)
469500
}
470501

471502
return ssNames, nil
@@ -500,7 +531,7 @@ func (ss *scaleSet) getAgentPoolScaleSets(nodes []*v1.Node) (*[]string, error) {
500531
}
501532

502533
nodeName := nodes[nx].Name
503-
ssName, err := ss.getScaleSetNameByNodeName(nodeName)
534+
ssName, _, _, err := ss.getVmssVM(nodeName)
504535
if err != nil {
505536
return nil, err
506537
}
@@ -599,7 +630,7 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
599630
return network.Interface{}, err
600631
}
601632

602-
primaryInterfaceID, err := ss.getPrimaryInterfaceID(vm)
633+
primaryInterfaceID, err := ss.getPrimaryInterfaceID(*vm)
603634
if err != nil {
604635
klog.Errorf("error: ss.GetPrimaryInterface(%s), ss.getPrimaryInterfaceID(), err=%v", nodeName, err)
605636
return network.Interface{}, err
@@ -816,8 +847,9 @@ func (ss *scaleSet) EnsureHostInPool(service *v1.Service, nodeName types.NodeNam
816847
}
817848

818849
// Invalidate the cache since we would update it.
819-
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
820-
defer ss.vmssVMCache.Delete(key)
850+
if err = ss.deleteCacheForNode(vmName); err != nil {
851+
return err
852+
}
821853

822854
// Update vmssVM with backoff.
823855
ctx, cancel := getContextWithCancel()
@@ -1094,8 +1126,9 @@ func (ss *scaleSet) ensureBackendPoolDeletedFromNode(service *v1.Service, nodeNa
10941126
}
10951127

10961128
// Invalidate the cache since we would update it.
1097-
key := buildVmssCacheKey(nodeResourceGroup, ss.makeVmssVMName(ssName, instanceID))
1098-
defer ss.vmssVMCache.Delete(key)
1129+
if err = ss.deleteCacheForNode(nodeName); err != nil {
1130+
return err
1131+
}
10991132

11001133
// Update vmssVM with backoff.
11011134
ctx, cancel := getContextWithCancel()

0 commit comments

Comments
 (0)