Skip to content

Commit e259fd8

Browse files
authored
Merge pull request #1540 from k8s-infra-cherrypick-robot/cherry-pick-1538-to-release-1.24
[release-1.24] fix: nfs mount failure when there are multiple subnets in the cluster
2 parents 8c03965 + 451fc8b commit e259fd8

File tree

5 files changed

+98
-73
lines changed

5 files changed

+98
-73
lines changed

pkg/blob/azure.go

Lines changed: 74 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,10 @@ import (
3131
"k8s.io/klog/v2"
3232
"k8s.io/utils/pointer"
3333
"sigs.k8s.io/cloud-provider-azure/pkg/azclient/configloader"
34+
azcache "sigs.k8s.io/cloud-provider-azure/pkg/cache"
3435
azure "sigs.k8s.io/cloud-provider-azure/pkg/provider"
3536
providerconfig "sigs.k8s.io/cloud-provider-azure/pkg/provider/config"
37+
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
3638
)
3739

3840
var (
@@ -181,9 +183,10 @@ func (d *Driver) getKeyvaultToken() (authorizer autorest.Authorizer, err error)
181183
return authorizer, nil
182184
}
183185

184-
func (d *Driver) updateSubnetServiceEndpoints(ctx context.Context, vnetResourceGroup, vnetName, subnetName string) error {
186+
func (d *Driver) updateSubnetServiceEndpoints(ctx context.Context, vnetResourceGroup, vnetName, subnetName string) ([]string, error) {
187+
var vnetResourceIDs []string
185188
if d.cloud.SubnetsClient == nil {
186-
return fmt.Errorf("SubnetsClient is nil")
189+
return vnetResourceIDs, fmt.Errorf("SubnetsClient is nil")
187190
}
188191

189192
if vnetResourceGroup == "" {
@@ -197,56 +200,89 @@ func (d *Driver) updateSubnetServiceEndpoints(ctx context.Context, vnetResourceG
197200
if vnetName == "" {
198201
vnetName = d.cloud.VnetName
199202
}
200-
if subnetName == "" {
201-
subnetName = d.cloud.SubnetName
202-
}
203203

204204
klog.V(2).Infof("updateSubnetServiceEndpoints on vnetName: %s, subnetName: %s, location: %s", vnetName, subnetName, location)
205-
if subnetName == "" || vnetName == "" || location == "" {
206-
return fmt.Errorf("value of subnetName, vnetName or location is empty")
205+
if vnetName == "" || location == "" {
206+
return vnetResourceIDs, fmt.Errorf("vnetName or location is empty")
207207
}
208208

209209
lockKey := vnetResourceGroup + vnetName + subnetName
210-
d.subnetLockMap.LockEntry(lockKey)
211-
defer d.subnetLockMap.UnlockEntry(lockKey)
212-
213-
subnet, err := d.cloud.SubnetsClient.Get(ctx, vnetResourceGroup, vnetName, subnetName, "")
210+
cache, err := d.subnetCache.Get(lockKey, azcache.CacheReadTypeDefault)
214211
if err != nil {
215-
return fmt.Errorf("failed to get the subnet %s under vnet %s: %v", subnetName, vnetName, err)
216-
}
217-
endpointLocaions := []string{location}
218-
storageServiceEndpoint := network.ServiceEndpointPropertiesFormat{
219-
Service: &storageService,
220-
Locations: &endpointLocaions,
221-
}
222-
storageServiceExists := false
223-
if subnet.SubnetPropertiesFormat == nil {
224-
subnet.SubnetPropertiesFormat = &network.SubnetPropertiesFormat{}
212+
return nil, err
225213
}
226-
if subnet.SubnetPropertiesFormat.ServiceEndpoints == nil {
227-
subnet.SubnetPropertiesFormat.ServiceEndpoints = &[]network.ServiceEndpointPropertiesFormat{}
214+
if cache != nil {
215+
vnetResourceIDs = cache.([]string)
216+
klog.V(2).Infof("subnet %s under vnet %s in rg %s is already updated, vnetResourceIDs: %v", subnetName, vnetName, vnetResourceGroup, vnetResourceIDs)
217+
return vnetResourceIDs, nil
228218
}
229-
serviceEndpoints := *subnet.SubnetPropertiesFormat.ServiceEndpoints
230-
for _, v := range serviceEndpoints {
231-
if strings.HasPrefix(pointer.StringDeref(v.Service, ""), storageService) {
232-
storageServiceExists = true
233-
klog.V(4).Infof("serviceEndpoint(%s) is already in subnet(%s)", storageService, subnetName)
234-
break
219+
220+
d.subnetLockMap.LockEntry(lockKey)
221+
defer d.subnetLockMap.UnlockEntry(lockKey)
222+
223+
var subnets []network.Subnet
224+
if subnetName != "" {
225+
// list multiple subnets separated by comma
226+
subnetNames := strings.Split(subnetName, ",")
227+
for _, sn := range subnetNames {
228+
sn = strings.TrimSpace(sn)
229+
subnet, rerr := d.cloud.SubnetsClient.Get(ctx, vnetResourceGroup, vnetName, sn, "")
230+
if rerr != nil {
231+
return vnetResourceIDs, fmt.Errorf("failed to get the subnet %s under rg %s vnet %s: %v", subnetName, vnetResourceGroup, vnetName, rerr.Error())
232+
}
233+
subnets = append(subnets, subnet)
234+
}
235+
} else {
236+
var rerr *retry.Error
237+
subnets, rerr = d.cloud.SubnetsClient.List(ctx, vnetResourceGroup, vnetName)
238+
if rerr != nil {
239+
return vnetResourceIDs, fmt.Errorf("failed to list the subnets under rg %s vnet %s: %v", vnetResourceGroup, vnetName, rerr.Error())
235240
}
236241
}
237242

238-
if !storageServiceExists {
239-
serviceEndpoints = append(serviceEndpoints, storageServiceEndpoint)
240-
subnet.SubnetPropertiesFormat.ServiceEndpoints = &serviceEndpoints
243+
for _, subnet := range subnets {
244+
if subnet.Name == nil {
245+
return vnetResourceIDs, fmt.Errorf("subnet name is nil")
246+
}
247+
sn := *subnet.Name
248+
vnetResourceID := d.getSubnetResourceID(vnetResourceGroup, vnetName, sn)
249+
klog.V(2).Infof("set vnetResourceID %s", vnetResourceID)
250+
vnetResourceIDs = append(vnetResourceIDs, vnetResourceID)
251+
252+
endpointLocaions := []string{location}
253+
storageServiceEndpoint := network.ServiceEndpointPropertiesFormat{
254+
Service: &storageService,
255+
Locations: &endpointLocaions,
256+
}
257+
storageServiceExists := false
258+
if subnet.SubnetPropertiesFormat == nil {
259+
subnet.SubnetPropertiesFormat = &network.SubnetPropertiesFormat{}
260+
}
261+
if subnet.SubnetPropertiesFormat.ServiceEndpoints == nil {
262+
subnet.SubnetPropertiesFormat.ServiceEndpoints = &[]network.ServiceEndpointPropertiesFormat{}
263+
}
264+
serviceEndpoints := *subnet.SubnetPropertiesFormat.ServiceEndpoints
265+
for _, v := range serviceEndpoints {
266+
if strings.HasPrefix(pointer.StringDeref(v.Service, ""), storageService) {
267+
storageServiceExists = true
268+
klog.V(4).Infof("serviceEndpoint(%s) is already in subnet(%s)", storageService, sn)
269+
break
270+
}
271+
}
272+
273+
if !storageServiceExists {
274+
serviceEndpoints = append(serviceEndpoints, storageServiceEndpoint)
275+
subnet.SubnetPropertiesFormat.ServiceEndpoints = &serviceEndpoints
241276

242-
klog.V(2).Infof("begin to update the subnet %s under vnet %s rg %s", subnetName, vnetName, vnetResourceGroup)
243-
if err := d.cloud.SubnetsClient.CreateOrUpdate(ctx, vnetResourceGroup, vnetName, subnetName, subnet); err != nil {
244-
return fmt.Errorf("failed to update the subnet %s under vnet %s: %v", subnetName, vnetName, err)
277+
klog.V(2).Infof("begin to update the subnet %s under vnet %s in rg %s", sn, vnetName, vnetResourceGroup)
278+
if err := d.cloud.SubnetsClient.CreateOrUpdate(ctx, vnetResourceGroup, vnetName, sn, subnet); err != nil {
279+
return vnetResourceIDs, fmt.Errorf("failed to update the subnet %s under vnet %s: %v", sn, vnetName, err)
280+
}
245281
}
246-
klog.V(2).Infof("serviceEndpoint(%s) is appended in subnet(%s)", storageService, subnetName)
247282
}
248-
249-
return nil
283+
// cache the subnet update
284+
d.subnetCache.Set(lockKey, vnetResourceIDs)
285+
return vnetResourceIDs, nil
250286
}
251287

252288
func (d *Driver) getStorageEndPointSuffix() string {

pkg/blob/azure_test.go

Lines changed: 14 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -32,12 +32,11 @@ import (
3232
"github.com/stretchr/testify/assert"
3333
"go.uber.org/mock/gomock"
3434
"k8s.io/client-go/kubernetes"
35+
"k8s.io/utils/pointer"
3536

3637
"sigs.k8s.io/blob-csi-driver/pkg/util"
3738
"sigs.k8s.io/cloud-provider-azure/pkg/azureclients/subnetclient/mocksubnetclient"
3839
azureprovider "sigs.k8s.io/cloud-provider-azure/pkg/provider"
39-
40-
"sigs.k8s.io/cloud-provider-azure/pkg/retry"
4140
)
4241

4342
// TestGetCloudProvider tests the func getCloudProvider().
@@ -328,25 +327,14 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
328327
testFunc func(t *testing.T)
329328
}{
330329
{
331-
name: "[fail] no subnet",
332-
testFunc: func(t *testing.T) {
333-
retErr := retry.NewError(false, fmt.Errorf("the subnet does not exist"))
334-
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(network.Subnet{}, retErr).Times(1)
335-
expectedErr := fmt.Errorf("failed to get the subnet %s under vnet %s: %v", config.SubnetName, config.VnetName, retErr)
336-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
337-
if !reflect.DeepEqual(err, expectedErr) {
338-
t.Errorf("Unexpected error: %v", err)
339-
}
340-
},
341-
},
342-
{
343-
name: "[success] subnetPropertiesFormat is nil",
330+
name: "[fail] subnet name is nil",
344331
testFunc: func(t *testing.T) {
345332
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(network.Subnet{}, nil).Times(1)
346333
mockSubnetClient.EXPECT().CreateOrUpdate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
347334

348-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
349-
if !reflect.DeepEqual(err, nil) {
335+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "subnetname")
336+
expectedErr := fmt.Errorf("subnet name is nil")
337+
if !reflect.DeepEqual(err, expectedErr) {
350338
t.Errorf("Unexpected error: %v", err)
351339
}
352340
},
@@ -356,12 +344,11 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
356344
testFunc: func(t *testing.T) {
357345
fakeSubnet := network.Subnet{
358346
SubnetPropertiesFormat: &network.SubnetPropertiesFormat{},
347+
Name: pointer.String("subnetName"),
359348
}
360349

361350
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).Times(1)
362-
mockSubnetClient.EXPECT().CreateOrUpdate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
363-
364-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
351+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "subnetname")
365352
if !reflect.DeepEqual(err, nil) {
366353
t.Errorf("Unexpected error: %v", err)
367354
}
@@ -374,12 +361,12 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
374361
SubnetPropertiesFormat: &network.SubnetPropertiesFormat{
375362
ServiceEndpoints: &[]network.ServiceEndpointPropertiesFormat{},
376363
},
364+
Name: pointer.String("subnetName"),
377365
}
378366

379-
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).Times(1)
380-
mockSubnetClient.EXPECT().CreateOrUpdate(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(nil).Times(1)
367+
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).AnyTimes()
381368

382-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
369+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "subnetname")
383370
if !reflect.DeepEqual(err, nil) {
384371
t.Errorf("Unexpected error: %v", err)
385372
}
@@ -396,11 +383,12 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
396383
},
397384
},
398385
},
386+
Name: pointer.String("subnetName"),
399387
}
400388

401-
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).Times(1)
389+
mockSubnetClient.EXPECT().Get(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).Return(fakeSubnet, nil).AnyTimes()
402390

403-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
391+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "subnetname")
404392
if !reflect.DeepEqual(err, nil) {
405393
t.Errorf("Unexpected error: %v", err)
406394
}
@@ -411,7 +399,7 @@ func TestUpdateSubnetServiceEndpoints(t *testing.T) {
411399
testFunc: func(t *testing.T) {
412400
d.cloud.SubnetsClient = nil
413401
expectedErr := fmt.Errorf("SubnetsClient is nil")
414-
err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
402+
_, err := d.updateSubnetServiceEndpoints(ctx, "", "", "")
415403
if !reflect.DeepEqual(err, expectedErr) {
416404
t.Errorf("Unexpected error: %v", err)
417405
}

pkg/blob/blob.go

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -247,6 +247,8 @@ type Driver struct {
247247
volStatsCache azcache.Resource
248248
// a timed cache storing account which should use sastoken for azcopy based volume cloning
249249
azcopySasTokenCache azcache.Resource
250+
// a timed cache storing subnet operations
251+
subnetCache azcache.Resource
250252
// sas expiry time for azcopy in volume clone
251253
sasTokenExpirationMinutes int
252254
// timeout in minutes for waiting for azcopy to finish
@@ -305,6 +307,10 @@ func NewDriver(options *DriverOptions, kubeClient kubernetes.Interface, cloud *p
305307
if d.volStatsCache, err = azcache.NewTimedCache(time.Duration(options.VolStatsCacheExpireInMinutes)*time.Minute, getter, false); err != nil {
306308
klog.Fatalf("%v", err)
307309
}
310+
if d.subnetCache, err = azcache.NewTimedCache(10*time.Minute, getter, false); err != nil {
311+
klog.Fatalf("%v", err)
312+
}
313+
308314
d.mounter = &mount.SafeFormatAndMount{
309315
Interface: mount.New(""),
310316
Exec: utilexec.New(),

pkg/blob/blob_test.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,7 @@ func TestNewDriver(t *testing.T) {
9797
fakedriver.dataPlaneAPIVolCache = driver.dataPlaneAPIVolCache
9898
fakedriver.azcopySasTokenCache = driver.azcopySasTokenCache
9999
fakedriver.volStatsCache = driver.volStatsCache
100+
fakedriver.subnetCache = driver.subnetCache
100101
fakedriver.cloud = driver.cloud
101102
assert.Equal(t, driver, fakedriver)
102103
}

pkg/blob/controllerserver.go

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -279,15 +279,9 @@ func (d *Driver) CreateVolume(ctx context.Context, req *csi.CreateVolumeRequest)
279279
storeAccountKey = false
280280
if !pointer.BoolDeref(createPrivateEndpoint, false) {
281281
// set VirtualNetworkResourceIDs for storage account firewall setting
282-
subnets := strings.Split(subnetName, ",")
283-
for _, subnet := range subnets {
284-
subnet = strings.TrimSpace(subnet)
285-
vnetResourceID := d.getSubnetResourceID(vnetResourceGroup, vnetName, subnet)
286-
klog.V(2).Infof("set vnetResourceID(%s) for NFS protocol", vnetResourceID)
287-
vnetResourceIDs = append(vnetResourceIDs, vnetResourceID)
288-
if err := d.updateSubnetServiceEndpoints(ctx, vnetResourceGroup, vnetName, subnet); err != nil {
289-
return nil, status.Errorf(codes.Internal, "update service endpoints failed with error: %v", err)
290-
}
282+
var err error
283+
if vnetResourceIDs, err = d.updateSubnetServiceEndpoints(ctx, vnetResourceGroup, vnetName, subnetName); err != nil {
284+
return nil, status.Errorf(codes.Internal, "update service endpoints failed with error: %v", err)
291285
}
292286
}
293287
}

0 commit comments

Comments
 (0)