Skip to content

Commit 7f629ca

Browse files
authored
Handle stale LUKS mappers for NVMe
This commit enhances Trident's CSI node stage and unstage operations to handle stale mapper devices for LUKS-encrypted NVMe devices.
1 parent 5d2664c commit 7f629ca

File tree

20 files changed

+844
-323
lines changed

20 files changed

+844
-323
lines changed

frontend/csi/node_helpers/kubernetes/plugin.go

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2022 NetApp, Inc. All Rights Reserved.
1+
// Copyright 2025 NetApp, Inc. All Rights Reserved.
22

33
package kubernetes
44

@@ -214,13 +214,13 @@ func (h *helper) RemovePublishedPath(ctx context.Context, volumeID, pathToRemove
214214

215215
volTrackingInfo, err := h.ReadTrackingInfo(ctx, volumeID)
216216
if err != nil {
217-
return fmt.Errorf("failed to read the tracking file; %v", err)
217+
return fmt.Errorf("failed to read the tracking file; %w", err)
218218
}
219219

220220
delete(volTrackingInfo.PublishedPaths, pathToRemove)
221221

222222
if err := h.WriteTrackingInfo(ctx, volumeID, volTrackingInfo); err != nil {
223-
return fmt.Errorf("failed to update the tracking file; %v", err)
223+
return fmt.Errorf("failed to update the tracking file; %w", err)
224224
}
225225

226226
h.publishedPaths[volumeID] = volTrackingInfo.PublishedPaths

frontend/csi/node_server.go

Lines changed: 41 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -638,12 +638,12 @@ func (p *Plugin) nodeExpandVolume(
638638
devicePath := publishInfo.DevicePath
639639
if convert.ToBool(publishInfo.LUKSEncryption) {
640640
if !luks.IsLegacyDevicePath(devicePath) {
641-
devicePath, err = p.devices.GetLUKSDeviceForMultipathDevice(devicePath)
641+
devicePath, err = p.devices.GetLUKSDevicePathForVolume(ctx, volumeId)
642642
if err != nil {
643643
Logc(ctx).WithFields(LogFields{
644644
"volumeId": volumeId,
645645
"publishedPath": publishInfo.DevicePath,
646-
}).WithError(err).Error("Failed to get LUKS device path from device path.")
646+
}).WithError(err).Error("Failed to get LUKS device path for volume.")
647647
return status.Error(codes.Internal, err.Error())
648648
}
649649
}
@@ -1465,15 +1465,18 @@ func (p *Plugin) nodeUnstageFCPVolume(
14651465
publishInfo.DevicePath = dmPath
14661466
}
14671467
} else {
1468-
// If not using luks legacy device path we need to find the LUKS mapper device
1469-
luksMapperPath, err = p.devices.GetLUKSDeviceForMultipathDevice(publishInfo.DevicePath)
1468+
// If not using luks legacy device path we need to find the LUKS mapper device.
1469+
luksMapperPath, err = p.devices.GetLUKSDevicePathForVolume(ctx, req.GetVolumeId())
14701470
if err != nil {
1471-
if !errors.IsNotFoundError(err) {
1472-
Logc(ctx).WithFields(fields).WithError(err).Warn(
1473-
"Could not determine LUKS device path from multipath device. " +
1474-
"Continuing with device removal.")
1471+
// If the LUKS device is not found, the functional difference is negligible to unstage.
1472+
// But it may be useful to log at different levels for observability.
1473+
log := Logc(ctx).WithFields(fields).WithError(err)
1474+
if errors.IsNotFoundError(err) {
1475+
log.Warn("Failed to get LUKS device path for volume.")
1476+
} else {
1477+
log.Debug("Could not determine LUKS device path for volume.")
14751478
}
1476-
Logc(ctx).WithFields(fields).Info("No LUKS device path found from multipath device.")
1479+
log.Debug("Continuing with device removal.")
14771480
}
14781481
}
14791482
err = p.devices.EnsureLUKSDeviceClosedWithMaxWaitLimit(ctx, luksMapperPath)
@@ -1516,11 +1519,10 @@ func (p *Plugin) nodeUnstageFCPVolume(
15161519
"multipathDevice": deviceInfo.MultipathDevice,
15171520
}
15181521

1519-
luksMapperPath, err = p.devices.GetLUKSDeviceForMultipathDevice(deviceInfo.MultipathDevice)
1522+
luksMapperPath, err = p.devices.GetLUKSDevicePathForVolume(ctx, req.GetVolumeId())
15201523
if err != nil {
15211524
if !errors.IsNotFoundError(err) {
1522-
Logc(ctx).WithFields(fields).
1523-
WithError(err).Error("Failed to get LUKS device path from multipath device.")
1525+
Logc(ctx).WithFields(fields).WithError(err).Error("Failed to get LUKS device path from multipath device.")
15241526
return err
15251527
}
15261528
Logc(ctx).WithFields(fields).Info("No LUKS device path found from multipath device.")
@@ -1969,7 +1971,7 @@ func (p *Plugin) nodeUnstageISCSIVolume(
19691971
if convert.ToBool(publishInfo.LUKSEncryption) {
19701972
var err error
19711973
var luksMapperPath string
1972-
fields := LogFields{"device": publishInfo.DevicePath}
1974+
fields := LogFields{"device": publishInfo.DevicePath, "volume": req.GetVolumeId()}
19731975
// Set device path to dm device to correctly verify legacy volumes.
19741976
if luks.IsLegacyDevicePath(publishInfo.DevicePath) {
19751977
luksMapperPath = publishInfo.DevicePath
@@ -1983,17 +1985,22 @@ func (p *Plugin) nodeUnstageISCSIVolume(
19831985
publishInfo.DevicePath = dmPath
19841986
}
19851987
} else {
1986-
// If not using luks legacy device path we need to find the LUKS mapper device
1987-
luksMapperPath, err = p.devices.GetLUKSDeviceForMultipathDevice(publishInfo.DevicePath)
1988+
// Use the volume ID to get the LUKS mapper path.
1989+
// This should always work if the mapper is still present.
1990+
luksMapperPath, err = p.devices.GetLUKSDevicePathForVolume(ctx, req.GetVolumeId())
19881991
if err != nil {
1989-
if !errors.IsNotFoundError(err) {
1990-
Logc(ctx).WithFields(fields).WithError(err).Warn(
1991-
"Could not determine LUKS device path from multipath device. " +
1992-
"Continuing with device removal.")
1992+
// If the LUKS device is not found, the functional difference is negligible to unstage.
1993+
// But it may be useful to log at different levels for observability.
1994+
log := Logc(ctx).WithFields(fields).WithError(err)
1995+
if errors.IsNotFoundError(err) {
1996+
log.Warn("Failed to get LUKS device path for volume.")
1997+
} else {
1998+
log.Debug("Could not determine LUKS device path for volume.")
19931999
}
1994-
Logc(ctx).WithFields(fields).Info("No LUKS device path found from multipath device.")
2000+
log.Debug("Continuing with device removal.")
19952001
}
19962002
}
2003+
19972004
err = p.devices.EnsureLUKSDeviceClosedWithMaxWaitLimit(ctx, luksMapperPath)
19982005
if err != nil {
19992006
Logc(ctx).WithError(err).Debug("Unable to remove LUKS device. Continuing with tracking file removal.")
@@ -2044,7 +2051,7 @@ func (p *Plugin) nodeUnstageISCSIVolume(
20442051
"multipathDevice": deviceInfo.MultipathDevice,
20452052
}
20462053

2047-
luksMapperPath, err = p.devices.GetLUKSDeviceForMultipathDevice(deviceInfo.MultipathDevice)
2054+
luksMapperPath, err = p.devices.GetLUKSDevicePathForVolume(ctx, req.GetVolumeId())
20482055
if err != nil {
20492056
if !errors.IsNotFoundError(err) {
20502057
Logc(ctx).WithFields(fields).WithError(err).Error("Failed to get LUKS device path from multipath device.")
@@ -3043,7 +3050,7 @@ func (p *Plugin) nodeUnstageNVMeVolume(
30433050
return nil, fmt.Errorf("failed to get NVMe device; %v", err)
30443051
}
30453052

3046-
var devicePath string
3053+
devicePath := publishInfo.DevicePath
30473054
if nvmeDev != nil {
30483055
devicePath = nvmeDev.GetPath()
30493056
}
@@ -3056,10 +3063,19 @@ func (p *Plugin) nodeUnstageNVMeVolume(
30563063
"publishedPath": publishInfo.DevicePath,
30573064
}
30583065

3059-
luksMapperPath, err = p.devices.GetLUKSDeviceForMultipathDevice(devicePath)
3066+
// Use the volume ID to get the LUKS mapper path.
3067+
// This should always work if the mapper is still present.
3068+
luksMapperPath, err = p.devices.GetLUKSDevicePathForVolume(ctx, req.GetVolumeId())
30603069
if err != nil {
3061-
Logc(ctx).WithFields(fields).WithError(err).Debug("Failed to get LUKS device path from device path. " +
3062-
"Device may already be removed.")
3070+
// If the LUKS device is not found, the functional difference is negligible to unstage.
3071+
// But it may be useful to log at different levels for observability.
3072+
log := Logc(ctx).WithFields(fields).WithError(err)
3073+
if errors.IsNotFoundError(err) {
3074+
log.Warn("Failed to get LUKS device path for volume.")
3075+
} else {
3076+
log.Debug("Could not determine LUKS device path for volume.")
3077+
}
3078+
log.Debug("Continuing with device removal.")
30633079
}
30643080

30653081
if luksMapperPath != "" {

frontend/csi/node_server_test.go

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -2114,7 +2114,8 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
21142114
},
21152115
getDeviceClient: func() devices.Devices {
21162116
mockDeviceClient := mock_devices.NewMockDevices(gomock.NewController(t))
2117-
mockDeviceClient.EXPECT().GetLUKSDeviceForMultipathDevice(gomock.Any()).Return(mockDevicePath, nil)
2117+
2118+
mockDeviceClient.EXPECT().GetLUKSDevicePathForVolume(gomock.Any(), gomock.Any()).Return(mockDevicePath, nil)
21182119
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosedWithMaxWaitLimit(gomock.Any(), mockDevicePath).Return(nil)
21192120
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosed(gomock.Any(), mockDevicePath).Return(nil)
21202121
mockDeviceClient.EXPECT().RemoveMultipathDeviceMappingWithRetries(gomock.Any(), gomock.Any(),
@@ -2148,7 +2149,6 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
21482149

21492150
mockISCSIClient.EXPECT().TargetHasMountedDevice(gomock.Any(), gomock.Any()).Return(false, nil)
21502151
mockISCSIClient.EXPECT().SafeToLogOut(gomock.Any(), gomock.Any(), gomock.Any()).Return(true)
2151-
21522152
mockISCSIClient.EXPECT().RemovePortalsFromSession(gomock.Any(), gomock.Any(), gomock.Any())
21532153
mockISCSIClient.EXPECT().Logout(gomock.Any(), gomock.Any(), gomock.Any())
21542154
mockISCSIClient.EXPECT().PrepareDeviceForRemoval(gomock.Any(), gomock.Any(), gomock.Any(),
@@ -2160,7 +2160,7 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
21602160
},
21612161
getDeviceClient: func() devices.Devices {
21622162
mockDeviceClient := mock_devices.NewMockDevices(gomock.NewController(t))
2163-
mockDeviceClient.EXPECT().GetLUKSDeviceForMultipathDevice(gomock.Any()).Return(mockDevicePath, nil)
2163+
mockDeviceClient.EXPECT().GetLUKSDevicePathForVolume(gomock.Any(), gomock.Any()).Return(mockDevicePath, nil)
21642164
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosedWithMaxWaitLimit(gomock.Any(), mockDevicePath).
21652165
Return(nil)
21662166
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosed(gomock.Any(), mockDevicePath).Return(nil)
@@ -2204,8 +2204,7 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
22042204
},
22052205
getDeviceClient: func() devices.Devices {
22062206
mockDeviceClient := mock_devices.NewMockDevices(gomock.NewController(t))
2207-
2208-
mockDeviceClient.EXPECT().GetLUKSDeviceForMultipathDevice(gomock.Any()).Return(mockDevicePath, nil)
2207+
mockDeviceClient.EXPECT().GetLUKSDevicePathForVolume(gomock.Any(), gomock.Any()).Return(mockDevicePath, nil)
22092208
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosedWithMaxWaitLimit(gomock.Any(), mockDevicePath).
22102209
Return(fmt.Errorf("mock error"))
22112210
return mockDeviceClient
@@ -2223,7 +2222,7 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
22232222
},
22242223
getDeviceClient: func() devices.Devices {
22252224
mockDeviceClient := mock_devices.NewMockDevices(gomock.NewController(t))
2226-
mockDeviceClient.EXPECT().GetLUKSDeviceForMultipathDevice(gomock.Any()).Return(mockDevicePath, nil)
2225+
mockDeviceClient.EXPECT().GetLUKSDevicePathForVolume(gomock.Any(), gomock.Any()).Return(mockDevicePath, nil)
22272226
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosedWithMaxWaitLimit(gomock.Any(), mockDevicePath).
22282227
Return(nil)
22292228
mockDeviceClient.EXPECT().RemoveMultipathDeviceMappingWithRetries(gomock.Any(), gomock.Any(),
@@ -2296,7 +2295,8 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
22962295
return mockDeviceClient
22972296
},
22982297
},
2299-
"SAN: iSCSI unstage: GetLUKSDeviceForMultipathDevice error": {
2298+
// mockDeviceClient.EXPECT().GetLUKSDevicePathForVolume(gomock.Any(), gomock.Any()).Return(mockDevicePath, nil)
2299+
"SAN: iSCSI unstage: GetLUKSDevicePathForVolume error": {
23002300
assertError: assert.Error,
23012301
request: NewNodeUnstageVolumeRequestBuilder().Build(),
23022302
publishInfo: NewVolumePublishInfoBuilder(TypeiSCSIVolumePublishInfo).WithLUKSEncryption("true").Build(),
@@ -2315,8 +2315,8 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
23152315
},
23162316
getDeviceClient: func() devices.Devices {
23172317
mockDeviceClient := mock_devices.NewMockDevices(gomock.NewController(t))
2318-
mockDeviceClient.EXPECT().GetLUKSDeviceForMultipathDevice(gomock.Any()).Return("", fmt.Errorf(
2319-
"mock error"))
2318+
mockDeviceClient.EXPECT().GetLUKSDevicePathForVolume(gomock.Any(),
2319+
gomock.Any()).Return(mockDevicePath, errors.New("mock error"))
23202320
return mockDeviceClient
23212321
},
23222322
},
@@ -2335,7 +2335,7 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
23352335
},
23362336
getDeviceClient: func() devices.Devices {
23372337
mockDeviceClient := mock_devices.NewMockDevices(gomock.NewController(t))
2338-
mockDeviceClient.EXPECT().GetLUKSDeviceForMultipathDevice(gomock.Any()).Return(mockDevicePath, nil)
2338+
mockDeviceClient.EXPECT().GetLUKSDevicePathForVolume(gomock.Any(), gomock.Any()).Return(mockDevicePath, nil)
23392339
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosedWithMaxWaitLimit(gomock.Any(), mockDevicePath).
23402340
Return(nil)
23412341
return mockDeviceClient
@@ -2368,7 +2368,7 @@ func TestNodeUnstageISCSIVolume(t *testing.T) {
23682368
},
23692369
getDeviceClient: func() devices.Devices {
23702370
mockDeviceClient := mock_devices.NewMockDevices(gomock.NewController(t))
2371-
mockDeviceClient.EXPECT().GetLUKSDeviceForMultipathDevice(gomock.Any()).Return(mockDevicePath, nil)
2371+
mockDeviceClient.EXPECT().GetLUKSDevicePathForVolume(gomock.Any(), gomock.Any()).Return(mockDevicePath, nil)
23722372
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosedWithMaxWaitLimit(gomock.Any(), mockDevicePath).
23732373
Return(nil)
23742374
mockDeviceClient.EXPECT().EnsureLUKSDeviceClosed(gomock.Any(), mockDevicePath).Return(nil)

frontend/csi/volume_publish_manager.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ func (v *VolumePublishManager) readTrackingInfo(
151151
err := jsonRW.ReadJSONFile(ctx, &volumeTrackingInfo, path.Join(v.volumeTrackingInfoPath, filename),
152152
"volume tracking info")
153153
if err != nil {
154-
return nil, err
154+
return nil, fmt.Errorf("failed to read tracking info for volume %s: %w", volumeID, err)
155155
}
156156

157157
Logc(ctx).WithField("volumeTrackingInfo", volumeTrackingInfo).Debug("Volume tracking info found.")

frontend/csi/volume_publish_manager_test.go

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2024 NetApp, Inc. All Rights Reserved.
1+
// Copyright 2025 NetApp, Inc. All Rights Reserved.
22

33
package csi
44

@@ -79,10 +79,10 @@ func TestWriteTrackingInfo(t *testing.T) {
7979
assert.NoError(t, err, "no error expected when write succeeds")
8080

8181
mockJSONUtils.EXPECT().WriteJSONFile(gomock.Any(), trackInfo, "tmp-"+fName, "volume tracking info").
82-
Return(errors.New("foo"))
82+
Return(errors.InvalidJSONError("foo"))
8383
err = v.WriteTrackingInfo(ctx, volId, trackInfo)
8484
assert.Error(t, err, "error expected when write tracking info fails")
85-
assert.Equal(t, "foo", err.Error(), "expected actual error we threw")
85+
assert.True(t, errors.IsInvalidJSONError(err), "expected actual error we threw")
8686
}
8787

8888
func TestReadTrackingInfo(t *testing.T) {
@@ -111,15 +111,17 @@ func TestReadTrackingInfo(t *testing.T) {
111111
mockJSONUtils.EXPECT().ReadJSONFile(gomock.Any(), emptyTrackInfo, fName, "volume tracking info").
112112
SetArg(1, *trackInfo).Return(nil)
113113
trackInfo, err := v.ReadTrackingInfo(context.Background(), volId)
114+
assert.NoError(t, err, "no error expected when write succeed")
115+
assert.NotNil(t, trackInfo, "expected a valid tracking info")
114116
assert.Equal(t, fsType, trackInfo.FilesystemType, "tracking file did not have expected value in it")
115-
assert.NoError(t, err, "tracking file should have been written")
116117

117118
emptyTrackInfo = &models.VolumeTrackingInfo{}
118119
mockJSONUtils.EXPECT().ReadJSONFile(gomock.Any(), emptyTrackInfo, fName,
119-
"volume tracking info").Return(errors.New("foo"))
120-
_, err = v.ReadTrackingInfo(context.Background(), volId)
120+
"volume tracking info").Return(errors.NotFoundError("not found"))
121+
trackInfo, err = v.ReadTrackingInfo(context.Background(), volId)
121122
assert.Error(t, err, "expected error when reading the file results in an error")
122-
assert.Equal(t, "foo", err.Error(), "expected the error we threw in the mock")
123+
assert.Nil(t, trackInfo, "expected nil tracking info")
124+
assert.True(t, errors.IsNotFoundError(err), "expected not found error")
123125
}
124126

125127
func TestListVolumeTrackingInfo_FailsToGetVolumeTrackingFiles(t *testing.T) {
@@ -255,10 +257,10 @@ func TestDeleteTrackingInfo(t *testing.T) {
255257
err := v.DeleteTrackingInfo(context.Background(), volName)
256258
assert.NoError(t, err, "expected no error deleting the tracking info")
257259

258-
mockFilesytesm.EXPECT().DeleteFile(gomock.Any(), gomock.Any(), gomock.Any()).Return("", errors.New("foo"))
260+
mockFilesytesm.EXPECT().DeleteFile(gomock.Any(), gomock.Any(), gomock.Any()).Return("", errors.InvalidJSONError("foo"))
259261
err = v.DeleteTrackingInfo(context.Background(), volName)
260262
assert.Error(t, err, "expected error if delete tracking info fails")
261-
assert.Equal(t, "foo", err.Error(), "expected the error we threw")
263+
assert.True(t, errors.IsInvalidJSONError(err), "expected the error we threw")
262264
}
263265

264266
func TestUpgradeVolumeTrackingFile(t *testing.T) {

0 commit comments

Comments
 (0)