Skip to content

Commit 4d813c1

Browse files
authored
Merge pull request kubernetes#90749 from andyzhangx/dangling-error
fix: azure disk dangling attach issue on VMSS which would cause API throttling
2 parents fecc66a + 05d7570 commit 4d813c1

File tree

6 files changed

+56
-24
lines changed

6 files changed

+56
-24
lines changed

staging/src/k8s.io/legacy-cloud-providers/azure/azure_controller_common.go

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,11 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
121121
diskEncryptionSetID := ""
122122
writeAcceleratorEnabled := false
123123

124+
vmset, err := c.getNodeVMSet(nodeName, azcache.CacheReadTypeUnsafe)
125+
if err != nil {
126+
return -1, err
127+
}
128+
124129
if isManagedDisk {
125130
diskName := path.Base(diskURI)
126131
resourceGroup, err := getResourceGroupFromDiskURI(diskURI)
@@ -140,9 +145,12 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
140145
attachErr := fmt.Sprintf(
141146
"disk(%s) already attached to node(%s), could not be attached to node(%s)",
142147
diskURI, *disk.ManagedBy, nodeName)
143-
attachedNode := path.Base(*disk.ManagedBy)
148+
attachedNode, err := vmset.GetNodeNameByProviderID(*disk.ManagedBy)
149+
if err != nil {
150+
return -1, err
151+
}
144152
klog.V(2).Infof("found dangling volume %s attached to node %s", diskURI, attachedNode)
145-
danglingErr := volerr.NewDanglingError(attachErr, types.NodeName(attachedNode), "")
153+
danglingErr := volerr.NewDanglingError(attachErr, attachedNode, "")
146154
return -1, danglingErr
147155
}
148156

@@ -157,11 +165,6 @@ func (c *controllerCommon) AttachDisk(isManagedDisk bool, diskName, diskURI stri
157165
}
158166
}
159167

160-
vmset, err := c.getNodeVMSet(nodeName, azcache.CacheReadTypeUnsafe)
161-
if err != nil {
162-
return -1, err
163-
}
164-
165168
instanceid, err := c.cloud.InstanceID(context.TODO(), nodeName)
166169
if err != nil {
167170
klog.Warningf("failed to get azure instance id (%v) for node %s", err, nodeName)

staging/src/k8s.io/legacy-cloud-providers/azure/azure_loadbalancer.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -405,7 +405,7 @@ func (az *Cloud) getServiceLoadBalancerStatus(service *v1.Service, lb *network.L
405405
if pipID == nil {
406406
return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress ID is Nil", serviceName, *lb.Name)
407407
}
408-
pipName, err := getLastSegment(*pipID)
408+
pipName, err := getLastSegment(*pipID, "/")
409409
if err != nil {
410410
return nil, fmt.Errorf("get(%s): lb(%s) - failed to get LB PublicIPAddress Name from ID(%s)", serviceName, *lb.Name, *pipID)
411411
}

staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard.go

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -72,7 +72,7 @@ const (
7272
)
7373

7474
var errNotInVMSet = errors.New("vm is not in the vmset")
75-
var providerIDRE = regexp.MustCompile(`^` + CloudProviderName + `://(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`)
75+
var providerIDRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/Microsoft.Compute/virtualMachines/(.+)$`)
7676
var backendPoolIDRE = regexp.MustCompile(`^/subscriptions/(?:.*)/resourceGroups/(?:.*)/providers/Microsoft.Network/loadBalancers/(.+)/backendAddressPools/(?:.*)`)
7777
var nicResourceGroupRE = regexp.MustCompile(`.*/subscriptions/(?:.*)/resourceGroups/(.+)/providers/Microsoft.Network/networkInterfaces/(?:.*)`)
7878

@@ -171,8 +171,8 @@ func isMasterNode(node *v1.Node) bool {
171171
}
172172

173173
// returns the deepest child's identifier from a full identifier string.
174-
func getLastSegment(ID string) (string, error) {
175-
parts := strings.Split(ID, "/")
174+
func getLastSegment(ID, separator string) (string, error) {
175+
parts := strings.Split(ID, separator)
176176
name := parts[len(parts)-1]
177177
if len(name) == 0 {
178178
return "", fmt.Errorf("resource name was missing from identifier")
@@ -519,7 +519,7 @@ func (as *availabilitySet) GetIPByNodeName(name string) (string, string, error)
519519
publicIP := ""
520520
if ipConfig.PublicIPAddress != nil && ipConfig.PublicIPAddress.ID != nil {
521521
pipID := *ipConfig.PublicIPAddress.ID
522-
pipName, err := getLastSegment(pipID)
522+
pipName, err := getLastSegment(pipID, "/")
523523
if err != nil {
524524
return "", "", fmt.Errorf("failed to publicIP name for node %q with pipID %q", name, pipID)
525525
}
@@ -589,7 +589,7 @@ func (as *availabilitySet) getAgentPoolAvailabiliySets(nodes []*v1.Node) (agentP
589589
// already added in the list
590590
continue
591591
}
592-
asName, err := getLastSegment(asID)
592+
asName, err := getLastSegment(asID, "/")
593593
if err != nil {
594594
klog.Errorf("as.getNodeAvailabilitySet - Node (%s)- getLastSegment(%s), err=%v", nodeName, asID, err)
595595
return nil, err
@@ -680,7 +680,7 @@ func (as *availabilitySet) getPrimaryInterfaceWithVMSet(nodeName, vmSetName stri
680680
if err != nil {
681681
return network.Interface{}, err
682682
}
683-
nicName, err := getLastSegment(primaryNicID)
683+
nicName, err := getLastSegment(primaryNicID, "/")
684684
if err != nil {
685685
return network.Interface{}, err
686686
}

staging/src/k8s.io/legacy-cloud-providers/azure/azure_standard_test.go

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,33 +62,44 @@ func TestIsMasterNode(t *testing.T) {
6262
func TestGetLastSegment(t *testing.T) {
6363
tests := []struct {
6464
ID string
65+
separator string
6566
expected string
6667
expectErr bool
6768
}{
6869
{
6970
ID: "",
71+
separator: "/",
7072
expected: "",
7173
expectErr: true,
7274
},
7375
{
7476
ID: "foo/",
77+
separator: "/",
7578
expected: "",
7679
expectErr: true,
7780
},
7881
{
7982
ID: "foo/bar",
83+
separator: "/",
8084
expected: "bar",
8185
expectErr: false,
8286
},
8387
{
8488
ID: "foo/bar/baz",
89+
separator: "/",
8590
expected: "baz",
8691
expectErr: false,
8792
},
93+
{
94+
ID: "k8s-agentpool-36841236-vmss_1",
95+
separator: "_",
96+
expected: "1",
97+
expectErr: false,
98+
},
8899
}
89100

90101
for _, test := range tests {
91-
s, e := getLastSegment(test.ID)
102+
s, e := getLastSegment(test.ID, test.separator)
92103
if test.expectErr && e == nil {
93104
t.Errorf("Expected err, but it was nil")
94105
continue

staging/src/k8s.io/legacy-cloud-providers/azure/azure_test.go

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -2149,10 +2149,15 @@ func TestGetNodeNameByProviderID(t *testing.T) {
21492149
name: "k8s-agent-AAAAAAAA-0",
21502150
fail: false,
21512151
},
2152+
{
2153+
providerID: "/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-1",
2154+
name: "k8s-agent-AAAAAAAA-1",
2155+
fail: false,
2156+
},
21522157
{
21532158
providerID: CloudProviderName + ":/subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
2154-
name: "",
2155-
fail: true,
2159+
name: "k8s-agent-AAAAAAAA-0",
2160+
fail: false,
21562161
},
21572162
{
21582163
providerID: CloudProviderName + "://",
@@ -2161,20 +2166,20 @@ func TestGetNodeNameByProviderID(t *testing.T) {
21612166
},
21622167
{
21632168
providerID: ":///subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
2164-
name: "",
2165-
fail: true,
2169+
name: "k8s-agent-AAAAAAAA-0",
2170+
fail: false,
21662171
},
21672172
{
21682173
providerID: "aws:///subscriptions/00000000-0000-0000-0000-000000000000/resourceGroups/myResourceGroupName/providers/Microsoft.Compute/virtualMachines/k8s-agent-AAAAAAAA-0",
2169-
name: "",
2170-
fail: true,
2174+
name: "k8s-agent-AAAAAAAA-0",
2175+
fail: false,
21712176
},
21722177
}
21732178

21742179
for _, test := range providers {
21752180
name, err := az.vmSet.GetNodeNameByProviderID(test.providerID)
21762181
if (err != nil) != test.fail {
2177-
t.Errorf("Expected to failt=%t, with pattern %v", test.fail, test)
2182+
t.Errorf("Expected to fail=%t, with pattern %v", test.fail, test)
21782183
}
21792184

21802185
if test.fail {

staging/src/k8s.io/legacy-cloud-providers/azure/azure_vmss.go

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,11 @@ func (ss *scaleSet) GetInstanceIDByNodeName(name string) (string, error) {
285285
}
286286

287287
// GetNodeNameByProviderID gets the node name by provider ID.
288+
// providerID example:
289+
// 1. vmas providerID: azure:///subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachines/aks-nodepool1-27053986-0
290+
// 2. vmss providerID:
291+
// azure:///subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachineScaleSets/aks-agentpool-22126781-vmss/virtualMachines/1
292+
// /subscriptions/subsid/resourceGroups/rg/providers/Microsoft.Compute/virtualMachineScaleSets/aks-agentpool-22126781-vmss/virtualMachines/k8s-agentpool-36841236-vmss_1
288293
func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName, error) {
289294
// NodeName is not part of providerID for vmss instances.
290295
scaleSetName, err := extractScaleSetNameByProviderID(providerID)
@@ -298,12 +303,20 @@ func (ss *scaleSet) GetNodeNameByProviderID(providerID string) (types.NodeName,
298303
return "", fmt.Errorf("error of extracting resource group for node %q", providerID)
299304
}
300305

301-
instanceID, err := getLastSegment(providerID)
306+
instanceID, err := getLastSegment(providerID, "/")
302307
if err != nil {
303308
klog.V(4).Infof("Can not extract instanceID from providerID (%s), assuming it is managed by availability set: %v", providerID, err)
304309
return ss.availabilitySet.GetNodeNameByProviderID(providerID)
305310
}
306311

312+
// instanceID contains scaleSetName (returned by disk.ManagedBy), e.g. k8s-agentpool-36841236-vmss_1
313+
if strings.HasPrefix(strings.ToLower(instanceID), strings.ToLower(scaleSetName)) {
314+
instanceID, err = getLastSegment(instanceID, "_")
315+
if err != nil {
316+
return "", err
317+
}
318+
}
319+
307320
vm, err := ss.getVmssVMByInstanceID(resourceGroup, scaleSetName, instanceID, azcache.CacheReadTypeUnsafe)
308321
if err != nil {
309322
return "", err
@@ -695,7 +708,7 @@ func (ss *scaleSet) GetPrimaryInterface(nodeName string) (network.Interface, err
695708
return network.Interface{}, err
696709
}
697710

698-
nicName, err := getLastSegment(primaryInterfaceID)
711+
nicName, err := getLastSegment(primaryInterfaceID, "/")
699712
if err != nil {
700713
klog.Errorf("error: ss.GetPrimaryInterface(%s), getLastSegment(%s), err=%v", nodeName, primaryInterfaceID, err)
701714
return network.Interface{}, err

0 commit comments

Comments
 (0)