Skip to content

Commit 0912ca2

Browse files
authored
Precise LUN scanning
Enhance self-healing Scan remediations to initiate rescans by exact host, channel, target, and LUN ID.
1 parent 4ed2197 commit 0912ca2

File tree

8 files changed

+418
-23
lines changed

8 files changed

+418
-23
lines changed

frontend/csi/node_server.go

Lines changed: 21 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -2229,12 +2229,11 @@ func (p *Plugin) selfHealingRectifySession(ctx context.Context, portal string, a
22292229
if err != nil {
22302230
return fmt.Errorf("failed to get publish info for session on portal '%s'; %v", portal, err)
22312231
}
2232-
lunID, targetIQN := publishInfo.IscsiLunNumber, publishInfo.IscsiTargetIQN
22332232

22342233
switch action {
22352234
case models.LogoutLoginScan:
2236-
if err = p.iscsi.Logout(ctx, targetIQN, portal); err != nil {
2237-
return fmt.Errorf("error while logging out of target %s", targetIQN)
2235+
if err = p.iscsi.Logout(ctx, publishInfo.IscsiTargetIQN, portal); err != nil {
2236+
return fmt.Errorf("error while logging out of target %s", publishInfo.IscsiTargetIQN)
22382237
} else {
22392238
Logc(ctx).Debug("Logout is successful.")
22402239
}
@@ -2244,8 +2243,7 @@ func (p *Plugin) selfHealingRectifySession(ctx context.Context, portal string, a
22442243
// Set FilesystemType to "raw" so that we only heal the session connectivity and not perform the mount and
22452244
// filesystem related operations.
22462245
publishInfo.FilesystemType = filesystem.Raw
2247-
2248-
volumeID, err := publishedISCSISessions.VolumeIDForPortalAndLUN(portal, lunID)
2246+
volumeID, err := publishedISCSISessions.VolumeIDForPortalAndLUN(portal, publishInfo.IscsiLunNumber)
22492247
if err != nil {
22502248
return fmt.Errorf("failed to get volume ID for lun ID; %v", err)
22512249
}
@@ -2274,23 +2272,25 @@ func (p *Plugin) selfHealingRectifySession(ctx context.Context, portal string, a
22742272
// Login is successful, fallthrough to perform scan
22752273
fallthrough
22762274
case models.Scan:
2277-
if p.deprecatedIgroupInUse(ctx) {
2278-
Logc(ctx).WithField("lunID", lunID).Debug("Initiating SCSI scan for exact LUN.")
2275+
// This detection may be useful for support in the future.
2276+
// Retain this until there is a good reason to remove it.
2277+
_ = p.deprecatedIgroupInUse(ctx)
22792278

2280-
if err := utils.InitiateScanForLun(ctx, int(lunID), targetIQN); err != nil {
2281-
Logc(ctx).WithError(err).Debug("Error while initiating SCSI scan for LUN.")
2282-
} else {
2283-
Logc(ctx).WithField("lunID", lunID).Debug("Successfully initiated SCSI scan for LUN.")
2284-
}
2285-
} else {
2286-
Logc(ctx).Debug("Initiating SCSI scan for all LUNs.")
2279+
luns, err := publishedISCSISessions.LUNsForPortal(portal)
2280+
if err != nil {
2281+
return fmt.Errorf("failed to get LUNs for portal: %s; %w", portal, err)
2282+
}
22872283

2288-
if err := utils.InitiateScanForAllLUNs(ctx, targetIQN); err != nil {
2289-
Logc(ctx).WithError(err).Debug("Error while initiating SCSI scan for LUNs.")
2290-
} else {
2291-
Logc(ctx).Debug("Successfully initiated SCSI scan for all LUNs.")
2292-
}
2284+
if err = utils.InitiateScanForLuns(ctx, luns, publishInfo.IscsiTargetIQN); err != nil {
2285+
Logc(ctx).WithError(err).Error("Could not initiate scan for some LUNs.")
2286+
return fmt.Errorf("failed to initiate scan for LUNs in portal: %s; %w", portal, err)
22932287
}
2288+
2289+
Logc(ctx).WithFields(LogFields{
2290+
"portal": portal,
2291+
"luns": luns,
2292+
"target": publishInfo.IscsiTargetIQN,
2293+
}).Debug("Successfully initiated iSCSI scan(s).")
22942294
default:
22952295
Logc(ctx).Debug("No valid action to be taken in iSCSI self-healing.")
22962296
}
@@ -2299,6 +2299,8 @@ func (p *Plugin) selfHealingRectifySession(ctx context.Context, portal string, a
22992299
}
23002300

23012301
// deprecatedIgroupInUse looks through the tracking files for deprecated igroups and reports if any are in use.
2302+
// NOTE: Precise LUN scanning removes the requirement for this logic, but this information may be useful for debugging
2303+
// and support cases. Additionally, this calculation is cheap so keep it in for now.
23022304
func (p *Plugin) deprecatedIgroupInUse(ctx context.Context) bool {
23032305
volumeTrackingInfo, _ := p.nodeHelper.ListVolumeTrackingInfo(ctx)
23042306
for id, info := range volumeTrackingInfo {

frontend/csi/node_server_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1653,7 +1653,7 @@ func TestAttemptLock_Success(t *testing.T) {
16531653
wg.Wait()
16541654
}
16551655

1656-
func TestOutdatedAccessControlInUse(t *testing.T) {
1656+
func TestDeprecatedIgroupsInUse(t *testing.T) {
16571657
tt := map[string]struct {
16581658
tracking map[string]*models.VolumeTrackingInfo
16591659
expected bool

mocks/mock_utils/mock_iscsi/mock_reconcile_utils.go

Lines changed: 15 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

utils/devices/devices.go

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2024 NetApp, Inc. All Rights Reserved.
1+
// Copyright 2025 NetApp, Inc. All Rights Reserved.
22

33
//go:generate mockgen -destination=../../mocks/mock_utils/mock_devices/mock_devices_client.go github.com/netapp/trident/utils/devices Devices
44
//go:generate mockgen -destination=../../mocks/mock_utils/mock_devices/mock_size_getter_client.go github.com/netapp/trident/utils/devices SizeGetter
@@ -855,7 +855,6 @@ func (c *Client) ScanTargetLUN(ctx context.Context, deviceAddresses []models.Scs
855855
"scanFile": filename,
856856
"host": deviceAddress.Host,
857857
}).Debug("Invoked SCSI scan for host.")
858-
859858
}
860859

861860
return nil

utils/iscsi.go

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -500,6 +500,47 @@ func InitiateScanForLun(ctx context.Context, lunID int, iSCSINodeName string) er
500500
return nil
501501
}
502502

503+
// InitiateScanForLuns initiates scans for LUNs in a given target against all hosts.
504+
func InitiateScanForLuns(ctx context.Context, luns []int32, target string) error {
505+
fields := LogFields{
506+
"luns": luns,
507+
"target": target,
508+
}
509+
Logc(ctx).WithFields(fields).Debug(">>>> iscsi.InitiateScanForLuns")
510+
defer Logc(ctx).WithFields(fields).Debug("<<<< iscsi.InitiateScanForLuns")
511+
512+
deviceAddresses, err := IscsiUtils.DiscoverSCSIAddressMapForTarget(ctx, target)
513+
if err != nil {
514+
return fmt.Errorf("failed to discover SCSI address map for target: '%s'; %w", target, err)
515+
} else if len(deviceAddresses) == 0 {
516+
return fmt.Errorf("no SCSI addresses found for target: '%s'", target)
517+
}
518+
519+
// Build a set of all device addresses -> luns.
520+
// This should have entries like so: "10:0:0:0", "10:0:0:1", "11:0:0:0", "11:0:0:1", etc.
521+
// As an example, if 10 LUNs require scan:
522+
// "10:0:0:0", "10:0:0:1",...,"10:0:0:9"
523+
// "11:0:0:0", "11:0:0:0",...,"11:0:0:9"
524+
deviceAddressesWithLUNs := make([]models.ScsiDeviceAddress, 0)
525+
for _, lun := range luns {
526+
for _, address := range deviceAddresses {
527+
deviceAddressesWithLUNs = append(deviceAddressesWithLUNs, models.ScsiDeviceAddress{
528+
Host: address.Host,
529+
Channel: address.Channel,
530+
Target: address.Target,
531+
LUN: strconv.Itoa(int(lun)),
532+
})
533+
}
534+
}
535+
536+
if err := iSCSIScanTargetLUN(ctx, deviceAddressesWithLUNs); err != nil {
537+
Logc(ctx).WithError(err).Error("Could not initiate scan.")
538+
return fmt.Errorf("failed to initiate scan; %w", err)
539+
}
540+
541+
return nil
542+
}
543+
503544
// InitiateScanForAllLUNs scans all paths to each of the LUNs passed.
504545
func InitiateScanForAllLUNs(ctx context.Context, iSCSINodeName string) error {
505546
fields := LogFields{"iSCSINodeName": iSCSINodeName}

utils/iscsi/reconcile_utils.go

Lines changed: 129 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
// Copyright 2024 NetApp, Inc. All Rights Reserved.
1+
// Copyright 2025 NetApp, Inc. All Rights Reserved.
22

33
package iscsi
44

@@ -7,6 +7,7 @@ package iscsi
77
import (
88
"context"
99
"fmt"
10+
"path/filepath"
1011
"strconv"
1112
"strings"
1213

@@ -19,6 +20,7 @@ import (
1920

2021
type IscsiReconcileUtils interface {
2122
GetISCSIHostSessionMapForTarget(context.Context, string) map[int]int
23+
DiscoverSCSIAddressMapForTarget(ctx context.Context, targetIQN string) (map[string]models.ScsiDeviceAddress, error)
2224
GetSysfsBlockDirsForLUN(int, map[int]int) []string
2325
GetDevicesForLUN(paths []string) ([]string, error)
2426
ReconcileISCSIVolumeInfo(ctx context.Context, trackingInfo *models.VolumeTrackingInfo) (bool, error)
@@ -68,6 +70,132 @@ func (h *IscsiReconcileHelper) ReconcileISCSIVolumeInfo(
6870
return false, nil
6971
}
7072

73+
// DiscoverSCSIAddressMapForTarget creates a map of unique "host:channel:targetID" to ScsiDeviceAddresses that
74+
// exist for active sessions on a given target IQN. We can rely on "host:channel:targetID" for our keys safely because
75+
// we filter by target IQN. The resulting map should be used with a set of LUN IDs to initiate precise LUN scanning.
76+
func (h *IscsiReconcileHelper) DiscoverSCSIAddressMapForTarget(
77+
ctx context.Context, targetIQN string,
78+
) (map[string]models.ScsiDeviceAddress, error) {
79+
fields := LogFields{"iSCSINodeName": targetIQN}
80+
Logc(ctx).WithFields(fields).Debug(">>>> iscsi.DiscoverSCSIAddressMapForTarget")
81+
defer Logc(ctx).WithFields(fields).Debug("<<<< iscsi.DiscoverSCSIAddressMapForTarget")
82+
83+
// deviceMap is a map of "h:c:t" -> ScsiDeviceAddress.
84+
deviceMap := make(map[string]models.ScsiDeviceAddress)
85+
86+
// Read in everything under: '/sys/class/scsi_host/'.
87+
scsiHostPath := filepath.Join(h.chrootPathPrefix, "sys", "class", "scsi_host")
88+
hostEntries, err := h.osFs.ReadDir(scsiHostPath)
89+
if err != nil {
90+
return nil, fmt.Errorf("failed to list hosts; %w", err)
91+
}
92+
93+
// Search through each dir under: '/sys/class/scsi_host/'.
94+
for _, hostEntry := range hostEntries {
95+
hostName := hostEntry.Name() // example: "host10" from "/sys/class/scsi_host/host10"
96+
if !strings.HasPrefix(hostName, "host") {
97+
continue
98+
}
99+
100+
// Read in all dirs under: '/sys/class/scsi_host/host#/device'.
101+
hostDevicePath := filepath.Join(scsiHostPath, hostName, "device")
102+
hostDeviceEntries, err := h.osFs.ReadDir(hostDevicePath)
103+
if err != nil {
104+
Logc(ctx).WithError(err).Errorf("Could not read host device entries at: '%s'.", hostDevicePath)
105+
continue
106+
}
107+
108+
// Look for "session#" within the device directory entries.
109+
// It's possible that multiple sessions that Trident setup can exist for a given host.
110+
// Example:
111+
// '/sys/class/scsi_host/host10/device/session1'
112+
// '/sys/class/scsi_host/host10/device/session2'
113+
for _, hostDeviceEntry := range hostDeviceEntries {
114+
sessionName := hostDeviceEntry.Name()
115+
if !strings.HasPrefix(hostDeviceEntry.Name(), "session") {
116+
continue
117+
}
118+
119+
// Check if the iscsi session exists: '/sys/class/iscsi_host/host#/device/session#/iscsi_session/session#'.
120+
sessionPath := filepath.Join(hostDevicePath, sessionName, "iscsi_session", sessionName)
121+
if sessionExists, err := h.osFs.Exists(sessionPath); err != nil {
122+
Logc(ctx).WithError(err).Errorf("Could not read iscsi session path at: '%s'", sessionPath)
123+
continue
124+
} else if !sessionExists {
125+
Logc(ctx).Debugf("iSCSI session path '%s' does not exist.", sessionPath)
126+
continue
127+
}
128+
129+
// Read in target IQN from: '/sys/class/iscsi_host/host#/device/session#/iscsi_session/session#/targetname'.
130+
targetNamePath := filepath.Join(sessionPath, "targetname")
131+
contents, err := h.osFs.ReadFile(targetNamePath)
132+
if err != nil {
133+
Logc(ctx).WithError(err).Errorf("Could not read target IQN at: '%s'", targetNamePath)
134+
continue
135+
}
136+
137+
// Ignore sessions that aren't connected to the expected target IQN.
138+
targetName := strings.TrimSpace(string(contents))
139+
if targetName != targetIQN {
140+
Logc(ctx).Debugf("IQN mismatch. '%s' != '%s'; ignoring session.", targetName, targetIQN)
141+
continue
142+
}
143+
144+
// At this point, we know this session is for a NetApp target.
145+
// Read in all entries under: '/sys/class/iscsi_host/host#/device/session#/iscsi_session/session#/device'
146+
sessionDevicePath := filepath.Join(sessionPath, "device")
147+
sessionDeviceEntries, err := h.osFs.ReadDir(sessionDevicePath)
148+
if err != nil {
149+
Logc(ctx).WithError(err).Errorf("Could not read session device entries at: '%s'.", sessionDevicePath)
150+
continue
151+
}
152+
153+
// Search for the 'target<H:C:T>' directory under:
154+
// `/sys/class/iscsi_host/host#/device/session#/iscsi_session/session#/device`.
155+
for _, entry := range sessionDeviceEntries {
156+
entryName := entry.Name()
157+
if !strings.HasPrefix(entryName, "target") {
158+
continue
159+
}
160+
Logc(ctx).WithField("scsiTargetDevice", entryName).Debug("Found SCSI target device directory.")
161+
162+
// At this point, we know we're looking at a target device directory.
163+
// '/sys/class/iscsi_host/host#/device/session#/iscsi_session/session#/device/target<H:C:T>'
164+
var hostID, channelID, targetID string
165+
hctSuffix := strings.TrimPrefix(entryName, "target") // "targetH:C:T" -> "H:C:T"
166+
hctElems := strings.Split(hctSuffix, ":") // "H:C:T" -> ["H","C","T"]
167+
if len(hctElems) != 3 {
168+
Logc(ctx).Errorf("Invalid format detected with: '%s'; expected 'target<H:C:T>'", entryName)
169+
continue
170+
}
171+
// It can be safely assumed that if these elements exist, the kernel has assigned them valid values.
172+
hostID, channelID, targetID = hctElems[0], hctElems[1], hctElems[2]
173+
fields := LogFields{
174+
"hostID": hostID,
175+
"channelID": channelID,
176+
"targetID": targetID,
177+
}
178+
179+
// Build unique key "hostID:channelID:targetID"
180+
// Filtering by targetIQN above should remove chances of tracking scsi addresses not owned by Trident.
181+
// It is technically possible for a given host to have multiple channels and multiple targetIDs, we
182+
// can probably safely assume the targetID will remain the same for a given backend.
183+
Logc(ctx).WithFields(fields).Debug("Discovered host, channel and target ID.")
184+
key := fmt.Sprintf("%s:%s:%s", hostID, channelID, targetID)
185+
if _, exists := deviceMap[key]; !exists {
186+
deviceMap[key] = models.ScsiDeviceAddress{
187+
Host: hostID,
188+
Channel: channelID,
189+
Target: targetID,
190+
}
191+
}
192+
}
193+
}
194+
}
195+
196+
return deviceMap, nil
197+
}
198+
71199
// GetISCSIHostSessionMapForTarget returns a map of iSCSI host numbers to iSCSI session numbers
72200
// for a given iSCSI target.
73201
func (h *IscsiReconcileHelper) GetISCSIHostSessionMapForTarget(ctx context.Context, iSCSINodeName string) map[int]int {

0 commit comments

Comments
 (0)