Skip to content

Commit 05e0568

Browse files
authored
NVME driver concurrency
1 parent 8598bb1 commit 05e0568

File tree

6 files changed

+103
-5
lines changed

6 files changed

+103
-5
lines changed

core/concurrent_core.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3236,6 +3236,7 @@ func (o *ConcurrentTridentOrchestrator) publishVolume(ctx context.Context, volum
32363236
}
32373237

32383238
publishInfo.TridentUUID = o.uuid
3239+
publishInfo.BackendUUID = backend.BackendUUID()
32393240

32403241
// Enable publish enforcement if the backend supports it and the volume isn't already enforced
32413242
if publishEnforceable && !volume.Config.AccessInfo.PublishEnforcement {

storage_drivers/ontap/api/abstraction_error.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ const (
1818
EXPORT_POLICY_NOT_FOUND = "1703954"
1919
EXPORT_POLICY_RULE_EXISTS = "1704070"
2020
CONSISTENCY_GROUP_SNAP_EXISTS_ERROR = "53411921"
21+
NVME_SUBSYSTEM_ALREADY_EXISTS = "72090025"
2122
)
2223

2324
// ///////////////////////////////////////////////////////////////////////////

storage_drivers/ontap/api/abstraction_rest.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3460,6 +3460,28 @@ func (d OntapAPIREST) NVMeSubsystemCreate(ctx context.Context, subsystemName, co
34603460
Logc(ctx).Infof("Subsystem doesn't exist, creating new subsystem %v now.", subsystemName)
34613461
subsystem, err = d.api.NVMeSubsystemCreate(ctx, subsystemName, comment)
34623462
if err != nil {
3463+
3464+
errorResponse, extractErr := ExtractErrorResponse(ctx, err)
3465+
if extractErr != nil {
3466+
return nil, err
3467+
}
3468+
if errorResponse != nil && errorResponse.Error != nil {
3469+
errorCode := errorResponse.Error.Code
3470+
if errorCode != nil && *errorCode == NVME_SUBSYSTEM_ALREADY_EXISTS {
3471+
// If there is a conflict error, it means the subsystem got created in between the get and create calls
3472+
Logc(ctx).Infof("Subsystem %v already exists.", subsystemName)
3473+
subsystem, err = d.api.NVMeSubsystemGetByName(ctx, subsystemName, fields)
3474+
if err != nil {
3475+
Logc(ctx).Infof("Problem getting subsystem; %v", err)
3476+
return nil, err
3477+
}
3478+
if subsystem != nil {
3479+
return &NVMeSubsystem{UUID: *subsystem.UUID, Name: *subsystem.Name, NQN: *subsystem.TargetNqn}, nil
3480+
}
3481+
return nil, fmt.Errorf("unable to create subsystem %v", subsystemName)
3482+
}
3483+
}
3484+
34633485
return nil, err
34643486
}
34653487

storage_drivers/ontap/api/abstraction_rest_test.go

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -795,6 +795,41 @@ func TestNVMeSubsystemCreate(t *testing.T) {
795795
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
796796
newsubsys, err = oapi.NVMeSubsystemCreate(ctx, subsystemName, subsystemComment)
797797
assert.Error(t, err)
798+
799+
// case 6: Subsystem not present, create a new one but returned already exists error with code NVME_SUBSYSTEM_ALREADY_EXISTS
800+
mock.EXPECT().NVMeSubsystemGetByName(ctx, subsystemName, gomock.Any()).Return(nil, nil).Times(1)
801+
mock.EXPECT().NVMeSubsystemGetByName(ctx, subsystemName, gomock.Any()).Return(subsys, nil).Times(1)
802+
mockErr := &nvme.NvmeSubsystemCreateDefault{
803+
Payload: &models.ErrorResponse{
804+
Error: &models.ReturnedError{
805+
Code: convert.ToPtr(api.NVME_SUBSYSTEM_ALREADY_EXISTS),
806+
Message: convert.ToPtr("NVMe subsystem already exists"),
807+
},
808+
},
809+
}
810+
mock.EXPECT().NVMeSubsystemCreate(ctx, subsystemName, subsystemComment).Return(nil, mockErr)
811+
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
812+
newsubsys, err = oapi.NVMeSubsystemCreate(ctx, subsystemName, subsystemComment)
813+
assert.NoError(t, err)
814+
assert.Equal(t, newsubsys.UUID, subsysUUID, "subsystem UUID does not match")
815+
assert.Equal(t, newsubsys.Name, subsystemName, "subsystem name does not match")
816+
817+
// case 7: Subsystem not present, create a new one but returned already exists error with code NVME_SUBSYSTEM_ALREADY_EXISTS
818+
mock.EXPECT().NVMeSubsystemGetByName(ctx, subsystemName, gomock.Any()).Return(nil, nil).Times(1)
819+
mock.EXPECT().NVMeSubsystemGetByName(ctx, subsystemName, gomock.Any()).Return(nil, nil).Times(1)
820+
mockErr = &nvme.NvmeSubsystemCreateDefault{
821+
Payload: &models.ErrorResponse{
822+
Error: &models.ReturnedError{
823+
Code: convert.ToPtr(api.NVME_SUBSYSTEM_ALREADY_EXISTS),
824+
Message: convert.ToPtr("NVMe subsystem already exists"),
825+
},
826+
},
827+
}
828+
mock.EXPECT().NVMeSubsystemCreate(ctx, subsystemName, subsystemComment).Return(nil, mockErr)
829+
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
830+
newsubsys, err = oapi.NVMeSubsystemCreate(ctx, subsystemName, subsystemComment)
831+
assert.Error(t, err)
832+
assert.Nil(t, newsubsys, "subsystem expected to be nil when error is returned")
798833
}
799834

800835
func TestNVMeEnsureNamespaceMapped(t *testing.T) {

storage_drivers/ontap/ontap_common.go

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -152,10 +152,16 @@ var (
152152
volumeNameRegex = regexp.MustCompile(`\{+.*\.volume.Name[^{a-z]*\}+`)
153153
volumeNameStartWithRegex = regexp.MustCompile(`^[A-Za-z_].*`)
154154
smbShareDeleteACL = map[string]string{DefaultSMBAccessControlUser: DefaultSMBAccessControlUserType}
155-
lunMutex = locks.NewGCNamedMutex()
156-
igroupMutex = locks.NewGCNamedMutex()
157155
exportPolicyMutex = locks.NewGCNamedMutex()
158156

157+
// NOTE: The lock order should be lunMutex first, then igroupMutex
158+
lunMutex = locks.NewGCNamedMutex()
159+
igroupMutex = locks.NewGCNamedMutex()
160+
161+
// NOTE: The lock order should be namespaceMutex first, then subsystemMutex
162+
namespaceMutex = locks.NewGCNamedMutex()
163+
subsystemMutex = locks.NewGCNamedMutex()
164+
159165
duringVolCloneAfterSnapCreation1 = fiji.Register("duringVolCloneAfterSnapCreation1", "ontap_common")
160166
duringVolCloneAfterSnapCreation2 = fiji.Register("duringVolCloneAfterSnapCreation2", "ontap_common")
161167
)
@@ -5431,3 +5437,15 @@ func getUniqueNodeSpecificSubsystemName(
54315437

54325438
return completeSSName, finalSSName, nil
54335439
}
5440+
5441+
// lockNamespaceAndSubsystem acquires the namespace lock first and then the subsystem lock.
5442+
// It returns a function that should be deferred to release the locks in reverse order.
5443+
func lockNamespaceAndSubsystem(nsUUID, ssUUID string) func() {
5444+
namespaceMutex.Lock(nsUUID)
5445+
subsystemMutex.Lock(ssUUID)
5446+
5447+
return func() {
5448+
subsystemMutex.Unlock(ssUUID)
5449+
namespaceMutex.Unlock(nsUUID)
5450+
}
5451+
}

storage_drivers/ontap/ontap_san_nvme.go

Lines changed: 24 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -690,6 +690,10 @@ func (d *NVMeStorageDriver) Import(ctx context.Context, volConfig *storage.Volum
690690
return fmt.Errorf("nvme namespace not found in volume %s", originalName)
691691
}
692692

693+
// Lock the namespace until import is done.
694+
namespaceMutex.Lock(nsInfo.UUID)
695+
defer namespaceMutex.Unlock(nsInfo.UUID)
696+
693697
// The Namespace should be online
694698
if nsInfo.State != "online" {
695699
return fmt.Errorf("Namespace %s is not online", nsInfo.Name)
@@ -888,6 +892,7 @@ func (d *NVMeStorageDriver) Publish(
888892

889893
nsPath := volConfig.InternalID
890894
nsUUID := volConfig.AccessInfo.NVMeNamespaceUUID
895+
891896
// For docker context, some of the attributes like fsType, luks needs to be
892897
// fetched from namespace where they were stored while creating the namespace.
893898
if d.Config.DriverContext == tridentconfig.ContextDocker {
@@ -929,17 +934,20 @@ func (d *NVMeStorageDriver) Publish(
929934
}
930935
}
931936

932-
// Checks if subsystem exists and creates a new one if not
933-
subsystem, err := d.API.NVMeSubsystemCreate(ctx, ssName, ssName)
937+
// If 2 concurrent requests try to create the same subsystem, one will succeed and ONTAP is guaranteed to return
938+
// "already exists" error code for the other. So no need for locking around subsystem creation.
939+
subsystem, err := d.createOrGetSubsystem(ctx, ssName)
934940
if err != nil {
935-
Logc(ctx).Errorf("subsystem create failed, %v", err)
936941
return err
937942
}
938943

939944
if subsystem == nil {
940945
return fmt.Errorf("No subsystem returned after subsystem create")
941946
}
942947

948+
unlock := lockNamespaceAndSubsystem(nsUUID, subsystem.UUID)
949+
defer unlock()
950+
943951
// Fill important info in publishInfo
944952
publishInfo.NVMeSubsystemNQN = subsystem.NQN
945953
publishInfo.NVMeSubsystemUUID = subsystem.UUID
@@ -989,6 +997,9 @@ func (d *NVMeStorageDriver) Unpublish(
989997
subsystemUUID := volConfig.AccessInfo.NVMeSubsystemUUID
990998
namespaceUUID := volConfig.AccessInfo.NVMeNamespaceUUID
991999

1000+
unlock := lockNamespaceAndSubsystem(namespaceUUID, subsystemUUID)
1001+
defer unlock()
1002+
9921003
removePublishInfo, err := d.API.NVMeEnsureNamespaceUnmapped(ctx, publishInfo.HostNQN, subsystemUUID, namespaceUUID)
9931004
if removePublishInfo {
9941005
volConfig.AccessInfo.NVMeTargetIPs = []string{}
@@ -1165,6 +1176,16 @@ func (d *NVMeStorageDriver) getStoragePoolAttributes(ctx context.Context) map[st
11651176
}
11661177
}
11671178

1179+
func (d *NVMeStorageDriver) createOrGetSubsystem(ctx context.Context, ssName string) (*api.NVMeSubsystem, error) {
1180+
// This checks if subsystem exists and creates it if not.
1181+
ss, err := d.API.NVMeSubsystemCreate(ctx, ssName, ssName)
1182+
if err != nil {
1183+
Logc(ctx).Errorf("subsystem create failed, %v", err)
1184+
return nil, err
1185+
}
1186+
return ss, nil
1187+
}
1188+
11681189
// GetVolumeOpts populates the volume properties required in volume create.
11691190
func (d *NVMeStorageDriver) GetVolumeOpts(
11701191
ctx context.Context, volConfig *storage.VolumeConfig, requests map[string]sa.Request,

0 commit comments

Comments
 (0)