Skip to content

Commit d771e94

Browse files
Implement unique subsystem names for regular ONTAP NVMe driver
1 parent 75cda47 commit d771e94

File tree

6 files changed

+151
-46
lines changed

6 files changed

+151
-46
lines changed

storage_drivers/ontap/api/abstraction_rest.go

Lines changed: 26 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3520,6 +3520,7 @@ func (d OntapAPIREST) NVMeEnsureNamespaceMapped(ctx context.Context, subsystemUU
35203520
// NVMeEnsureNamespaceUnmapped first checks if a namespace is mapped to the subsystem and if it is mapped:
35213521
// a) removes the namespace from the subsystem
35223522
// b) deletes the subsystem if no more namespaces are attached to it
3523+
// c) Handles the case where multiple hosts are mapped to single common subsystem arising out of lengthy node names.
35233524
// If namespace is not mapped to subsystem, it is treated as success
35243525
// The function also returns a bool value along with error. A true value denotes the subsystem is deleted
35253526
// successfully and Published info can be removed for the NVMe volume
@@ -3549,8 +3550,15 @@ func (d OntapAPIREST) NVMeEnsureNamespaceUnmapped(ctx context.Context, hostNQN,
35493550
}
35503551
}
35513552

3552-
// In case of multiple hosts attached to a subsystem (e.g. in RWX case), do not delete the namespace,
3553-
// subsystem or the published info
3553+
// Below are the cases where multiple hosts are attached to a subsystem:
3554+
// case 1: RWX case - Multiple hosts are intentionally sharing the same namespace for ReadWriteMany access.
3555+
// In this scenario, do not delete the namespace mapping, as other hosts may still be using it.
3556+
// case 2: Common subsystem due to lengthy host names - When host names exceed the allowed length,
3557+
// multiple hosts may be mapped to a single "common" subsystem. In this scenario, the subsystem
3558+
// is not truly shared for RWX purposes, but is a result of host name truncation. Therefore,
3559+
// it is appropriate to remove the namespace mapping when detaching a host, as the mapping is
3560+
// not required for other hosts. This differs from the RWX case, where the namespace mapping
3561+
// must be preserved for legitimate multi-host access.
35543562
if len(subsystemHosts) > 1 {
35553563
if hostFound {
35563564
Logc(ctx).Infof("Multiple hosts are attached to this subsystem %v. Do not delete namespace or subsystem",
@@ -3560,6 +3568,21 @@ func (d OntapAPIREST) NVMeEnsureNamespaceUnmapped(ctx context.Context, hostNQN,
35603568
return false, err
35613569
}
35623570
}
3571+
3572+
// Check if there are multiple namespaces attached to the subsystem,
3573+
// This indicates case 2, so, remove the namespace only.
3574+
count, err := d.api.NVMeNamespaceCount(ctx, subsystemUUID)
3575+
if err != nil {
3576+
return false, fmt.Errorf("error getting namespace count for subsystem %s; %v", subsystemUUID, err)
3577+
}
3578+
3579+
if count > 1 {
3580+
err = d.api.NVMeSubsystemRemoveNamespace(ctx, subsystemUUID, namespaceUUID)
3581+
if err != nil {
3582+
return false, fmt.Errorf("error removing namespace %s from subsystem %s; %v", namespaceUUID, subsystemUUID, err)
3583+
}
3584+
}
3585+
35633586
return false, nil
35643587
}
35653588

@@ -3583,7 +3606,7 @@ func (d OntapAPIREST) NVMeEnsureNamespaceUnmapped(ctx context.Context, hostNQN,
35833606
return false, fmt.Errorf("error getting namespace count for subsystem %s; %v", subsystemUUID, err)
35843607
}
35853608

3586-
// Delete the subsystem if no. of namespaces is 0
3609+
// Delete the subsystem if the namespace count for this subsystem is 0 after removal.
35873610
if count == 0 {
35883611
if err := d.api.NVMeSubsystemDelete(ctx, subsystemUUID); err != nil {
35893612
return false, fmt.Errorf("error deleting subsystem %s; %v", subsystemUUID, err)

storage_drivers/ontap/api/abstraction_rest_test.go

Lines changed: 74 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -964,6 +964,7 @@ func TestNVMeNamespaceUnmapped(t *testing.T) {
964964
mock.EXPECT().NVMeIsNamespaceMapped(ctx, subsystemUUID, nsUUID).Return(true, nil).Times(1)
965965
mock.EXPECT().NVMeGetHostsOfSubsystem(ctx, subsystemUUID).Return([]*models.NvmeSubsystemHost{host1, host2}, nil).Times(1)
966966
mock.EXPECT().NVMeRemoveHostFromSubsystem(ctx, hostNQN, subsystemUUID).Return(nil).Times(1)
967+
mock.EXPECT().NVMeNamespaceCount(ctx, subsystemUUID).Return(int64(1), nil).Times(1)
967968

968969
removePublishInfo, err = oapi.NVMeEnsureNamespaceUnmapped(ctx, hostNQN, subsystemUUID, nsUUID)
969970

@@ -1035,11 +1036,84 @@ func TestNVMeNamespaceUnmapped(t *testing.T) {
10351036
mock.EXPECT().NVMeIsNamespaceMapped(ctx, subsystemUUID, nsUUID).Return(true, nil).Times(1)
10361037
mock.EXPECT().NVMeGetHostsOfSubsystem(ctx, subsystemUUID).Return([]*models.NvmeSubsystemHost{host1, host2},
10371038
nil).Times(1)
1039+
mock.EXPECT().NVMeNamespaceCount(ctx, subsystemUUID).Return(int64(1), nil).Times(1)
10381040

10391041
removePublishInfo, err = oapi.NVMeEnsureNamespaceUnmapped(ctx, nonExistentHostNQN, subsystemUUID, nsUUID)
10401042

10411043
assert.Equal(t, false, removePublishInfo, "nqn is unmapped")
10421044
assert.NoError(t, err)
1045+
1046+
// case 13: Multiple hosts with error getting namespace count
1047+
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
1048+
mock.EXPECT().NVMeIsNamespaceMapped(ctx, subsystemUUID, nsUUID).Return(true, nil).Times(1)
1049+
mock.EXPECT().NVMeGetHostsOfSubsystem(ctx, subsystemUUID).Return([]*models.NvmeSubsystemHost{host1, host2}, nil).Times(1)
1050+
mock.EXPECT().NVMeRemoveHostFromSubsystem(ctx, hostNQN, subsystemUUID).Return(nil).Times(1)
1051+
mock.EXPECT().NVMeNamespaceCount(ctx, subsystemUUID).Return(int64(0), errors.New("Error getting namespace count")).Times(1)
1052+
1053+
removePublishInfo, err = oapi.NVMeEnsureNamespaceUnmapped(ctx, hostNQN, subsystemUUID, nsUUID)
1054+
1055+
assert.Equal(t, false, removePublishInfo, "subsystem removed")
1056+
assert.Error(t, err)
1057+
1058+
// case 14: Multiple hosts with namespace count > 1, error removing namespace
1059+
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
1060+
mock.EXPECT().NVMeIsNamespaceMapped(ctx, subsystemUUID, nsUUID).Return(true, nil).Times(1)
1061+
mock.EXPECT().NVMeGetHostsOfSubsystem(ctx, subsystemUUID).Return([]*models.NvmeSubsystemHost{host1, host2}, nil).Times(1)
1062+
mock.EXPECT().NVMeRemoveHostFromSubsystem(ctx, hostNQN, subsystemUUID).Return(nil).Times(1)
1063+
mock.EXPECT().NVMeNamespaceCount(ctx, subsystemUUID).Return(int64(2), nil).Times(1)
1064+
mock.EXPECT().NVMeSubsystemRemoveNamespace(ctx, subsystemUUID, nsUUID).Return(errors.New("Error removing namespace")).Times(1)
1065+
1066+
removePublishInfo, err = oapi.NVMeEnsureNamespaceUnmapped(ctx, hostNQN, subsystemUUID, nsUUID)
1067+
1068+
assert.Equal(t, false, removePublishInfo, "subsystem removed")
1069+
assert.Error(t, err)
1070+
1071+
// case 15: Multiple hosts with namespace count > 1, success removing namespace
1072+
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
1073+
mock.EXPECT().NVMeIsNamespaceMapped(ctx, subsystemUUID, nsUUID).Return(true, nil).Times(1)
1074+
mock.EXPECT().NVMeGetHostsOfSubsystem(ctx, subsystemUUID).Return([]*models.NvmeSubsystemHost{host1, host2}, nil).Times(1)
1075+
mock.EXPECT().NVMeRemoveHostFromSubsystem(ctx, hostNQN, subsystemUUID).Return(nil).Times(1)
1076+
mock.EXPECT().NVMeNamespaceCount(ctx, subsystemUUID).Return(int64(3), nil).Times(1)
1077+
mock.EXPECT().NVMeSubsystemRemoveNamespace(ctx, subsystemUUID, nsUUID).Return(nil).Times(1)
1078+
1079+
removePublishInfo, err = oapi.NVMeEnsureNamespaceUnmapped(ctx, hostNQN, subsystemUUID, nsUUID)
1080+
1081+
assert.Equal(t, false, removePublishInfo, "subsystem is not removed due to multiple namespaces")
1082+
assert.NoError(t, err)
1083+
1084+
// case 16: Multiple hosts with namespace count = 1 (RWX case), no removal of namespace or subsystem
1085+
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
1086+
mock.EXPECT().NVMeIsNamespaceMapped(ctx, subsystemUUID, nsUUID).Return(true, nil).Times(1)
1087+
mock.EXPECT().NVMeGetHostsOfSubsystem(ctx, subsystemUUID).Return([]*models.NvmeSubsystemHost{host1, host2}, nil).Times(1)
1088+
mock.EXPECT().NVMeRemoveHostFromSubsystem(ctx, hostNQN, subsystemUUID).Return(nil).Times(1)
1089+
mock.EXPECT().NVMeNamespaceCount(ctx, subsystemUUID).Return(int64(1), nil).Times(1)
1090+
1091+
removePublishInfo, err = oapi.NVMeEnsureNamespaceUnmapped(ctx, hostNQN, subsystemUUID, nsUUID)
1092+
1093+
assert.Equal(t, false, removePublishInfo, "subsystem is not removed in RWX case")
1094+
assert.NoError(t, err)
1095+
1096+
// case 17: Single host but different from requested host (nonExistentHostNQN), subsystem and namespace not removed
1097+
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
1098+
mock.EXPECT().NVMeIsNamespaceMapped(ctx, subsystemUUID, nsUUID).Return(true, nil).Times(1)
1099+
mock.EXPECT().NVMeGetHostsOfSubsystem(ctx, subsystemUUID).Return([]*models.NvmeSubsystemHost{host1}, nil).Times(1)
1100+
1101+
removePublishInfo, err = oapi.NVMeEnsureNamespaceUnmapped(ctx, nonExistentHostNQN, subsystemUUID, nsUUID)
1102+
1103+
assert.Equal(t, false, removePublishInfo, "subsystem is not removed when host doesn't match")
1104+
assert.NoError(t, err)
1105+
1106+
// case 18: Namespace count > 0 after removal, subsystem not deleted
1107+
mock.EXPECT().ClientConfig().Return(clientConfig).AnyTimes()
1108+
mock.EXPECT().NVMeIsNamespaceMapped(ctx, subsystemUUID, nsUUID).Return(true, nil).Times(1)
1109+
mock.EXPECT().NVMeGetHostsOfSubsystem(ctx, subsystemUUID).Return([]*models.NvmeSubsystemHost{host1}, nil).Times(1)
1110+
mock.EXPECT().NVMeSubsystemRemoveNamespace(ctx, subsystemUUID, nsUUID).Return(nil).Times(1)
1111+
mock.EXPECT().NVMeNamespaceCount(ctx, subsystemUUID).Return(int64(2), nil).Times(1)
1112+
1113+
removePublishInfo, err = oapi.NVMeEnsureNamespaceUnmapped(ctx, hostNQN, subsystemUUID, nsUUID)
1114+
1115+
assert.Equal(t, true, removePublishInfo, "subsystem has remaining namespaces")
1116+
assert.NoError(t, err)
10431117
}
10441118

10451119
func TestNVMeIsNamespaceMapped(t *testing.T) {

storage_drivers/ontap/ontap_common.go

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5438,6 +5438,8 @@ func getUniqueNodeSpecificSubsystemName(
54385438

54395439
// Construct the subsystem name
54405440
completeSSName := fmt.Sprintf("%s_%s_%s", prefix, nodeName, tridentUUID)
5441+
// Skip any underscores at the beginning
5442+
completeSSName = strings.TrimLeft(completeSSName, "_")
54415443
finalSSName := completeSSName
54425444

54435445
// Ensure the final name does not exceed the maximum length
@@ -5451,6 +5453,7 @@ func getUniqueNodeSpecificSubsystemName(
54515453

54525454
base64Str := base64.StdEncoding.EncodeToString(u[:])
54535455
finalSSName = fmt.Sprintf("%s_%s_%s", prefix, nodeName, base64Str)
5456+
finalSSName = strings.TrimLeft(finalSSName, "_")
54545457

54555458
if len(finalSSName) > maxSubsystemLength {
54565459
// If even after Trident UUID reduction, the length is more than max length,

storage_drivers/ontap/ontap_common_test.go

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9824,6 +9824,26 @@ func TestGetUniqueNodeSpecificSubsystemName(t *testing.T) {
98249824
expectedFinal: "",
98259825
expectError: true,
98269826
},
9827+
{
9828+
description: "Valid input, with no prefix passed",
9829+
nodeName: "node1",
9830+
tridentUUID: tridentUUID,
9831+
prefix: "",
9832+
maxSubsystemLength: 64,
9833+
expectedBestCase: fmt.Sprintf("node1_%s", tridentUUID),
9834+
expectedFinal: fmt.Sprintf("node1_%s", tridentUUID),
9835+
expectError: false,
9836+
},
9837+
{
9838+
description: "Even hash is more than limit, truncation needed with no prefix",
9839+
nodeName: "averylongnodenameexceedingthelimit",
9840+
tridentUUID: tridentUUID,
9841+
prefix: "",
9842+
maxSubsystemLength: 32,
9843+
expectedBestCase: fmt.Sprintf("averylongnodenameexceedingthelimit_%s", tridentUUID),
9844+
expectedFinal: fmt.Sprintf("%x", sha256.Sum256([]byte(fmt.Sprintf("averylongnodenameexceedingthelimit_%s", tridentUUID))))[:32],
9845+
expectError: false,
9846+
},
98279847
}
98289848

98299849
for _, test := range tests {

storage_drivers/ontap/ontap_san_nvme.go

Lines changed: 20 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,8 @@ const (
5858
defaultNamespaceBlockSize = 4096
5959
// maximumSubsystemNameLength represent the max length of subsystem name
6060
maximumSubsystemNameLength = 64
61+
// nvmeSubsystemPrefix Subsystem prefix for ONTAP NVMe driver (empty for legacy compatibility).
62+
nvmeSubsystemPrefix = ""
6163
)
6264

6365
// Namespace attributes stored in its comment field. These fields are useful for docker context.
@@ -923,6 +925,7 @@ func (d *NVMeStorageDriver) Publish(
923925
// When FS type is RAW, we create a new subsystem per namespace,
924926
// else we use the subsystem created for that particular node
925927
var ssName string
928+
var completeSSName string
926929
if volConfig.FileSystem == filesystem.Raw {
927930
ssName = getNamespaceSpecificSubsystemName(name, pvName)
928931
} else {
@@ -931,13 +934,24 @@ func (d *NVMeStorageDriver) Publish(
931934
if tridentconfig.CurrentDriverContext == tridentconfig.ContextDocker {
932935
ssName = d.getStoragePrefixSubsystemName()
933936
} else {
934-
ssName = d.getNodeSpecificSubsystemName(publishInfo.HostName, publishInfo.TridentUUID)
937+
if completeSSName, ssName, err = getUniqueNodeSpecificSubsystemName(
938+
publishInfo.HostName, publishInfo.TridentUUID, nvmeSubsystemPrefix, maximumSubsystemNameLength); err != nil {
939+
return fmt.Errorf("failed to create node specific subsystem name: %w", err)
940+
}
935941
}
936942
}
937943

944+
// Update the subsystem comment
945+
var ssComment string
946+
if completeSSName == "" {
947+
ssComment = ssName
948+
} else {
949+
ssComment = completeSSName
950+
}
951+
938952
// If 2 concurrent requests try to create the same subsystem, one will succeed and ONTAP is guaranteed to return
939953
// "already exists" error code for the other. So no need for locking around subsystem creation.
940-
subsystem, err := d.createOrGetSubsystem(ctx, ssName)
954+
subsystem, err := d.createOrGetSubsystem(ctx, ssName, ssComment)
941955
if err != nil {
942956
return err
943957
}
@@ -1177,9 +1191,11 @@ func (d *NVMeStorageDriver) getStoragePoolAttributes(ctx context.Context) map[st
11771191
}
11781192
}
11791193

1180-
func (d *NVMeStorageDriver) createOrGetSubsystem(ctx context.Context, ssName string) (*api.NVMeSubsystem, error) {
1194+
func (d *NVMeStorageDriver) createOrGetSubsystem(
1195+
ctx context.Context, ssName, comment string,
1196+
) (*api.NVMeSubsystem, error) {
11811197
// This checks if subsystem exists and creates it if not.
1182-
ss, err := d.API.NVMeSubsystemCreate(ctx, ssName, ssName)
1198+
ss, err := d.API.NVMeSubsystemCreate(ctx, ssName, comment)
11831199
if err != nil {
11841200
Logc(ctx).Errorf("subsystem create failed, %v", err)
11851201
return nil, err
@@ -1743,17 +1759,6 @@ func (d *NVMeStorageDriver) ParseNVMeNamespaceCommentString(_ context.Context, c
17431759
return nil, fmt.Errorf("nsAttrs field not found in Namespace comment")
17441760
}
17451761

1746-
func (d *NVMeStorageDriver) getNodeSpecificSubsystemName(nodeName, tridentUUID string) string {
1747-
// For CSI mode, use per-node subsystems
1748-
subsystemName := fmt.Sprintf("%s-%s", nodeName, tridentUUID)
1749-
if len(subsystemName) > maximumSubsystemNameLength {
1750-
// If the new subsystem name is over the subsystem character limit, it means the host name is too long.
1751-
subsystemPrefixLength := maximumSubsystemNameLength - len(tridentUUID) - 1
1752-
subsystemName = fmt.Sprintf("%s-%s", nodeName[:subsystemPrefixLength], tridentUUID)
1753-
}
1754-
return subsystemName
1755-
}
1756-
17571762
// getStoragePrefixSubsystemName generates a stable subsystem name using storage prefix for Docker mode
17581763
func (d *NVMeStorageDriver) getStoragePrefixSubsystemName() string {
17591764
// Default for Docker is "netappdvp_", users can customize for isolation/sharing

0 commit comments

Comments
 (0)