Skip to content

Commit 94d0bf7

Browse files
authored
Updates comments, logic, and logs for NVMe and LUKS
1 parent f1b17be commit 94d0bf7

File tree

5 files changed

+103
-89
lines changed

5 files changed

+103
-89
lines changed

frontend/csi/node_server.go

Lines changed: 49 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -502,20 +502,25 @@ func (p *Plugin) nodeExpandVolume(
502502
if !utils.IsLegacyLUKSDevicePath(devicePath) {
503503
devicePath, err = utils.GetLUKSDeviceForMultipathDevice(devicePath)
504504
if err != nil {
505+
Logc(ctx).WithFields(LogFields{
506+
"volumeId": volumeId,
507+
"publishedPath": publishInfo.DevicePath,
508+
}).WithError(err).Error("Failed to get LUKS device path from device path.")
505509
return status.Error(codes.Internal, err.Error())
506510
}
507511
}
508512
Logc(ctx).WithField("volumeId", volumeId).Info("Resizing the LUKS mapping.")
509-
// Refresh the luks device
510-
// cryptsetup resize <luks-device-path> << <passphrase>
513+
514+
// Refresh the LUKS device.
515+
// "cryptsetup resize <luks-device-path> << <passphrase>"
511516
passphrase, ok := secrets["luks-passphrase"]
512517
if !ok {
513518
return status.Error(codes.InvalidArgument, "cannot expand LUKS encrypted volume; no passphrase provided")
514519
} else if passphrase == "" {
515520
return status.Error(codes.InvalidArgument, "cannot expand LUKS encrypted volume; empty passphrase provided")
516521
}
517-
err := utils.ResizeLUKSDevice(ctx, devicePath, passphrase)
518-
if err != nil {
522+
523+
if err := utils.ResizeLUKSDevice(ctx, devicePath, passphrase); err != nil {
519524
if errors.IsIncorrectLUKSPassphraseError(err) {
520525
return status.Error(codes.InvalidArgument, err.Error())
521526
}
@@ -1302,7 +1307,7 @@ func (p *Plugin) nodeUnstageFCPVolume(
13021307
return status.Error(codes.Internal, errStr)
13031308
}
13041309

1305-
// If the luks device still exists, it means the device was unable to be closed prior to removing the block
1310+
// If the LUKS device still exists, it means the device was unable to be closed prior to removing the block
13061311
// device. This can happen if the LUN was deleted or offline. It should be removable by this point.
13071312
// It needs to be removed prior to removing the 'unmappedMpathDevice' device below.
13081313
if luksMapperPath != "" {
@@ -1752,7 +1757,7 @@ func (p *Plugin) nodeUnstageISCSIVolume(
17521757
return status.Error(codes.Internal, errStr)
17531758
}
17541759

1755-
// If the luks device still exists, it means the device was unable to be closed prior to removing the block
1760+
// If the LUKS device still exists, it means the device was unable to be closed prior to removing the block
17561761
// device. This can happen if the LUN was deleted or offline. It should be removable by this point.
17571762
// It needs to be removed prior to removing the 'unmappedMpathDevice' device below.
17581763
if luksMapperPath != "" {
@@ -2472,7 +2477,7 @@ func (p *Plugin) nodeStageNVMeVolume(
24722477
// Ensure we update the passphrase in case it has never been set before
24732478
err = ensureLUKSVolumePassphrase(ctx, p.restClient, luksDevice, volumeId, req.GetSecrets(), true)
24742479
if err != nil {
2475-
return fmt.Errorf("could not set LUKS volume passphrase")
2480+
return fmt.Errorf("could not set LUKS volume passphrase; %v", err)
24762481
}
24772482
}
24782483

@@ -2482,13 +2487,12 @@ func (p *Plugin) nodeStageNVMeVolume(
24822487
PublishedPaths: map[string]struct{}{},
24832488
}
24842489

2485-
// Save the device info to the volume tracking info path for use in the publish & unstage calls.
2490+
// Save the device info to the volume tracking info path for use in future CSI node publish & unstage calls.
24862491
if err := p.nodeHelper.WriteTrackingInfo(ctx, volumeId, volTrackingInfo); err != nil {
24872492
return err
24882493
}
24892494

24902495
p.nvmeHandler.AddPublishedNVMeSession(&publishedNVMeSessions, publishInfo)
2491-
24922496
return nil
24932497
}
24942498

@@ -2507,7 +2511,7 @@ func (p *Plugin) nodeUnstageNVMeVolume(
25072511
// Proceed further with unstage flow, if device is not found.
25082512
nvmeDev, err := p.nvmeHandler.NewNVMeDevice(ctx, publishInfo.NVMeNamespaceUUID)
25092513
if err != nil && !errors.IsNotFoundError(err) {
2510-
return nil, fmt.Errorf("error while getting NVMe device, %v", err)
2514+
return nil, fmt.Errorf("failed to get NVMe device; %v", err)
25112515
}
25122516

25132517
var devicePath string
@@ -2518,18 +2522,19 @@ func (p *Plugin) nodeUnstageNVMeVolume(
25182522
var luksMapperPath string
25192523
if utils.ParseBool(publishInfo.LUKSEncryption) && devicePath != "" {
25202524
fields := LogFields{
2521-
"lunID": publishInfo.IscsiLunNumber,
2522-
"publishedDevice": publishInfo.DevicePath,
2523-
"nvmeDevPath": nvmeDev.GetPath(),
2525+
"namespace": publishInfo.NVMeNamespaceUUID,
2526+
"devicePath": devicePath,
2527+
"publishedPath": publishInfo.DevicePath,
25242528
}
25252529

25262530
luksMapperPath, err = p.devices.GetLUKSDeviceForMultipathDevice(devicePath)
25272531
if err != nil {
2532+
Logc(ctx).WithFields(fields).WithError(err).Error("Failed to get LUKS device path from device path.")
25282533
return &csi.NodeUnstageVolumeResponse{}, err
25292534
}
25302535

2531-
// Ensure the LUKS device is closed if the luksMapperPath is set.
25322536
if luksMapperPath != "" {
2537+
fields["luksMapperPath"] = luksMapperPath
25332538
if err = p.devices.EnsureLUKSDeviceClosedWithMaxWaitLimit(ctx, luksMapperPath); err != nil {
25342539
if !errors.IsMaxWaitExceededError(err) {
25352540
Logc(ctx).WithFields(fields).WithError(err).Error("Failed to close LUKS device.")
@@ -2540,63 +2545,53 @@ func (p *Plugin) nodeUnstageNVMeVolume(
25402545
}
25412546
}
25422547

2548+
// Attempt to flush the NVMe device.
25432549
if !nvmeDev.IsNil() {
2544-
// If device is found, proceed to flush and clean up.
2545-
err := nvmeDev.FlushDevice(ctx, p.unsafeDetach, force)
25462550
// If flush fails, give a grace period of 6 minutes (nvmeMaxFlushWaitDuration) before giving up.
2547-
if err != nil {
2551+
if err := nvmeDev.FlushDevice(ctx, p.unsafeDetach, force); err != nil {
25482552
if NVMeNamespacesFlushRetry[publishInfo.NVMeNamespaceUUID].IsZero() {
25492553
NVMeNamespacesFlushRetry[publishInfo.NVMeNamespaceUUID] = time.Now()
2550-
return nil, fmt.Errorf("error while flushing NVMe device, %v", err)
2551-
} else {
2552-
elapsed := time.Since(NVMeNamespacesFlushRetry[publishInfo.NVMeNamespaceUUID])
2553-
if elapsed > nvmeMaxFlushWaitDuration {
2554-
// Waited enough, log it and proceed with next step in detach flow.
2555-
Logc(ctx).WithFields(
2556-
LogFields{
2557-
"namespace": publishInfo.NVMeNamespaceUUID,
2558-
"elapsed": elapsed,
2559-
"maxWait": nvmeMaxFlushWaitDuration,
2560-
}).Debug("Volume is not safe to be detached. But, waited enough time.")
2561-
// Done with this, remove entry from exceptions list.
2562-
delete(NVMeNamespacesFlushRetry, publishInfo.NVMeNamespaceUUID)
2563-
} else {
2564-
// Allowing to wait for some more time. Let the kubelet retry.
2565-
Logc(ctx).WithFields(
2566-
LogFields{
2567-
"namespace": publishInfo.NVMeNamespaceUUID,
2568-
"elapsed": elapsed,
2569-
}).Debug("Waiting for some more time.")
2570-
return nil, fmt.Errorf("error while flushing NVMe device, %v", err)
2571-
}
2554+
return nil, fmt.Errorf("failed to flush NVMe device; %v", err)
25722555
}
2573-
} else {
2574-
// No error in 'flush', remove entry from exceptions list in case it was added earlier.
2575-
delete(NVMeNamespacesFlushRetry, publishInfo.NVMeNamespaceUUID)
2556+
2557+
// If the max wait time for flush isn't hit yet, fail and let the CSI node agent call again.
2558+
elapsed := time.Since(NVMeNamespacesFlushRetry[publishInfo.NVMeNamespaceUUID])
2559+
if elapsed <= nvmeMaxFlushWaitDuration {
2560+
Logc(ctx).WithFields(LogFields{
2561+
"devicePath": devicePath,
2562+
"namespace": publishInfo.NVMeNamespaceUUID,
2563+
"elapsed": elapsed,
2564+
}).WithError(err).Debug("Could not flush NVMe device.")
2565+
return nil, fmt.Errorf("failed to flush NVMe device; %v", err)
2566+
}
2567+
2568+
Logc(ctx).WithFields(LogFields{
2569+
"namespace": publishInfo.NVMeNamespaceUUID,
2570+
"elapsed": elapsed,
2571+
"maxWait": nvmeMaxFlushWaitDuration,
2572+
}).Warn("Could not flush device within expected time period.")
25762573
}
2574+
2575+
delete(NVMeNamespacesFlushRetry, publishInfo.NVMeNamespaceUUID)
25772576
}
25782577

25792578
// Get the number of namespaces associated with the subsystem
25802579
nvmeSubsys := p.nvmeHandler.NewNVMeSubsystem(ctx, publishInfo.NVMeSubsystemNQN)
25812580
numNs, err := nvmeSubsys.GetNamespaceCount(ctx)
25822581
if err != nil {
2583-
Logc(ctx).WithFields(
2584-
LogFields{
2585-
"subsystem": publishInfo.NVMeSubsystemNQN,
2586-
"error": err,
2587-
}).Debug("Error getting Namespace count.")
2582+
Logc(ctx).WithField(
2583+
"subsystem", publishInfo.NVMeSubsystemNQN,
2584+
).WithError(err).Debug("Error getting Namespace count.")
25882585
}
25892586

25902587
// If number of namespaces is more than 1, don't disconnect the subsystem. If we get any issues while getting the
25912588
// number of namespaces through the CLI, we can rely on the disconnect flag from NVMe self-healing sessions (if
25922589
// NVMe self-healing is enabled), which keeps track of namespaces associated with the subsystem.
25932590
if (err == nil && numNs <= 1) || (p.nvmeSelfHealingInterval > 0 && err != nil && disconnect) {
25942591
if err := nvmeSubsys.Disconnect(ctx); err != nil {
2595-
Logc(ctx).WithFields(
2596-
LogFields{
2597-
"subsystem": publishInfo.NVMeSubsystemNQN,
2598-
"error": err,
2599-
}).Debug("Error disconnecting subsystem.")
2592+
Logc(ctx).WithField(
2593+
"subsystem", publishInfo.NVMeSubsystemNQN,
2594+
).WithError(err).Debug("Error disconnecting subsystem.")
26002595
}
26012596
}
26022597

@@ -2614,7 +2609,7 @@ func (p *Plugin) nodeUnstageNVMeVolume(
26142609
return nil, status.Error(codes.Internal, errStr)
26152610
}
26162611

2617-
// If the luks device still exists, it means the device was unable to be closed prior to removing the block
2612+
// If the LUKS device still exists, it means the device was unable to be closed prior to removing the block
26182613
// device. This can happen if the LUN was deleted or offline. It should be removable by this point.
26192614
// It needs to be removed prior to removing the 'unmappedMpathDevice' device below.
26202615
if luksMapperPath != "" {
@@ -2669,7 +2664,7 @@ func (p *Plugin) nodePublishNVMeVolume(
26692664
Logc(ctx).WithError(err).Error("Failed to ensure current LUKS passphrase.")
26702665
}
26712666

2672-
// At this point, we must reassign the device path to the luks mapper path for mounts to work.
2667+
// At this point, we must reassign the device path to the LUKS mapper path for mounts to work.
26732668
devicePath = luksDevice.MappedDevicePath()
26742669
}
26752670

storage_drivers/ontap/ontap_san_nvme_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2284,7 +2284,6 @@ func TestImport_LUKSNamespace(t *testing.T) {
22842284
mAPI.EXPECT().VolumeInfo(ctx, gomock.Any()).Return(vol, nil)
22852285
mAPI.EXPECT().NVMeNamespaceGetByName(ctx, "/vol/"+originalName+"/*").Return(ns, nil)
22862286
mAPI.EXPECT().NVMeIsNamespaceMapped(ctx, "", ns.UUID).Return(false, nil)
2287-
// mAPI.EXPECT().VolumeRename(ctx, originalName, volConfig.InternalName).Return(nil)
22882287

22892288
beforeLUKSOverheadBytesStr, err := utils.ConvertSizeToBytes(volConfig.Size)
22902289
if err != nil {

utils/iscsi/iscsi.go

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -404,6 +404,7 @@ func (client *Client) AttachVolume(
404404
if err != nil {
405405
return mpathSize, err
406406
}
407+
407408
devicePath = luksDevice.MappedDevicePath()
408409
}
409410

@@ -418,11 +419,14 @@ func (client *Client) AttachVolume(
418419
if existingFstype == "" {
419420
if !isLUKSDevice {
420421
if unformatted, err := client.deviceClient.IsDeviceUnformatted(ctx, devicePath); err != nil {
421-
Logc(ctx).WithField("device",
422-
devicePath).Errorf("Unable to identify if the device is unformatted; err: %v", err)
422+
Logc(ctx).WithField(
423+
"device", devicePath,
424+
).WithError(err).Errorf("Unable to identify if the device is unformatted.")
423425
return mpathSize, err
424426
} else if !unformatted {
425-
Logc(ctx).WithField("device", devicePath).Errorf("Device is not unformatted; err: %v", err)
427+
Logc(ctx).WithField(
428+
"device", devicePath,
429+
).WithError(err).Errorf("Device is not unformatted.")
426430
return mpathSize, fmt.Errorf("device %v is not unformatted", devicePath)
427431
}
428432
} else {

utils/nvme.go

Lines changed: 22 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -313,7 +313,7 @@ func NVMeMountVolume(
313313
Logc(ctx).Debug(">>>> nvme.NVMeMountVolume")
314314
defer Logc(ctx).Debug("<<<< nvme.NVMeMountVolume")
315315

316-
// This is the raw device path for a nvme namespace.
316+
// Initially, the device path raw device path for this NVMe namespace.
317317
devicePath := publishInfo.DevicePath
318318

319319
// Format and open a LUKS device if LUKS Encryption is set to true.
@@ -326,12 +326,19 @@ func NVMeMountVolume(
326326
if err != nil {
327327
return err
328328
}
329+
329330
devicePath = luksDevice.MappedDevicePath()
330331
}
331332

333+
// Fail fast if the device should be a LUKS device but is not LUKS formatted.
332334
if isLUKSDevice && !luksFormatted {
333-
Logc(ctx).Errorf("Unable to identify if luks device.", devicePath)
334-
return err
335+
Logc(ctx).WithFields(LogFields{
336+
"devicePath": publishInfo.DevicePath,
337+
"luksMapperPath": devicePath,
338+
"isLUKSFormatted": luksFormatted,
339+
"shouldBeLUKS": isLUKSDevice,
340+
}).Error("Device should be a LUKS device but is not LUKS formatted.")
341+
return fmt.Errorf("device should be a LUKS device but is not LUKS formatted")
335342
}
336343

337344
// No filesystem work is required for raw block; return early.
@@ -346,16 +353,23 @@ func NVMeMountVolume(
346353
if existingFstype == "" {
347354
if !isLUKSDevice {
348355
if unformatted, err := isDeviceUnformatted(ctx, devicePath); err != nil {
349-
Logc(ctx).WithField("device",
350-
devicePath).Errorf("Unable to identify if the device is not formatted; err: %v", err)
356+
Logc(ctx).WithField(
357+
"device", devicePath,
358+
).WithError(err).Error("Unable to identify if the device is not formatted.")
351359
return err
352360
} else if !unformatted {
353-
Logc(ctx).WithField("device", devicePath).Errorf("Device is not not formatted; err: %v", err)
354-
return fmt.Errorf("device %v is not unformatted", devicePath)
361+
Logc(ctx).WithField(
362+
"device", devicePath,
363+
).WithError(err).Error("Device is not unformatted.")
364+
return fmt.Errorf("device %v is already formatted", devicePath)
355365
}
356366
}
357367

358-
Logc(ctx).WithFields(LogFields{"volume": name, "fstype": publishInfo.FilesystemType}).Debug("Formatting LUN.")
368+
Logc(ctx).WithFields(LogFields{
369+
"volume": name,
370+
"namespace": publishInfo.NVMeNamespaceUUID,
371+
"fstype": publishInfo.FilesystemType,
372+
}).Debug("Formatting NVMe Namespace.")
359373
err := fsClient.FormatVolume(ctx, devicePath, publishInfo.FilesystemType, publishInfo.FormatOptions)
360374
if err != nil {
361375
return fmt.Errorf("error formatting Namespace %s, device %s: %v", name, devicePath, err)

0 commit comments

Comments
 (0)