Skip to content

Commit c90e1e8

Browse files
authored
Revert "Do not log out of stale portals during iSCSI self-healing"
1 parent 1c1cef9 commit c90e1e8

File tree

2 files changed

+8
-120
lines changed

2 files changed

+8
-120
lines changed

utils/iscsi/iscsi.go

Lines changed: 6 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@ import (
1111
"fmt"
1212
"os"
1313
"os/exec"
14-
"path/filepath"
1514
"regexp"
1615
"sort"
1716
"strconv"
@@ -43,7 +42,6 @@ const (
4342
SessionInfoSource = "sessionSource"
4443
SessionSourceCurrentStatus = "currentStatus"
4544
SessionSourceNodeStage = "nodeStage"
46-
sessionConnectionStateUp = "up"
4745

4846
iscsiadmLoginTimeoutValue = 10
4947
iscsiadmLoginTimeout = iscsiadmLoginTimeoutValue * time.Second
@@ -1117,109 +1115,7 @@ func (client *Client) portalsToLogin(ctx context.Context, targetIQN string, port
11171115
return portalsNotLoggedIn, loggedIn, nil
11181116
}
11191117

1120-
// getSessionConnectionsState returns the state of iscsi session connections stored in:
1121-
// '/sys/class/iscsi_session/session<ID>/device/connection<ID>:0/iscsi_connection/connection<ID>:0.
1122-
func (client *Client) getSessionConnectionsState(ctx context.Context, sessionID string) []string {
1123-
Logc(ctx).WithField("sessionID", sessionID).Debug(">>>> iscsi.getSessionConnectionsState")
1124-
defer Logc(ctx).Debug("<<<< iscsi.getSessionConnectionsState")
1125-
1126-
// Find the session device dirs under: '/sys/class/iscsi_session/session<ID>/device/'.
1127-
sessionName := fmt.Sprintf("session%s", sessionID)
1128-
sessionDevicePath := filepath.Join(client.chrootPathPrefix, "sys", "class", "iscsi_session", sessionName, "device")
1129-
sessionDeviceEntries, err := client.os.ReadDir(sessionDevicePath)
1130-
if err != nil {
1131-
Logc(ctx).WithField("path", sessionDevicePath).WithError(err).Error("Could not read session dirs.")
1132-
return nil
1133-
}
1134-
1135-
const notFound = "<NOT FOUND>"
1136-
var errs error
1137-
1138-
// Dynamically discover the 'state' for all underlying connections and return them.
1139-
connectionStates := make([]string, 0)
1140-
for _, entry := range sessionDeviceEntries {
1141-
// Only consider: `/sys/class/iscsi_session/session<ID>/device/connection<ID>:0`
1142-
connection := entry.Name()
1143-
if !strings.HasPrefix(connection, "connection") {
1144-
continue
1145-
}
1146-
1147-
// At this point, we know we're looking at something like:
1148-
// '/sys/class/iscsi_session/session<ID>/device/connection<ID>:0' but we need:
1149-
// '/sys/class/iscsi_session/session<ID>/device/connection<ID>:0/iscsi_connection/connection<ID>:0'
1150-
state := notFound
1151-
statePath := filepath.Join(sessionDevicePath, connection, "iscsi_connection", connection, "state")
1152-
rawState, err := client.os.ReadFile(statePath)
1153-
if err != nil {
1154-
errs = errors.Join(errs, fmt.Errorf("failed to read session state at: '%s'; %w", statePath, err))
1155-
} else if len(rawState) != 0 {
1156-
state = strings.TrimSpace(string(rawState))
1157-
}
1158-
1159-
// If the connection state is "up" or not found, further inspection won't be helpful. Ignore this and move on.
1160-
if state == sessionConnectionStateUp || state == notFound {
1161-
continue
1162-
}
1163-
1164-
// Get the persistent address. This is the IP associated with a session.
1165-
address := notFound
1166-
addrPath := filepath.Join(sessionDevicePath, connection, "iscsi_connection", connection, "persistent_address")
1167-
rawAddress, err := client.os.ReadFile(addrPath)
1168-
if err != nil {
1169-
errs = errors.Join(errs, fmt.Errorf("failed to read connection IP at: '%s'; %w", addrPath, err))
1170-
} else if len(rawAddress) != 0 {
1171-
address = strings.TrimSpace(string(rawAddress))
1172-
}
1173-
1174-
// Get the persistent port. This is the port associated with a session.
1175-
port := notFound
1176-
portPath := filepath.Join(sessionDevicePath, connection, "iscsi_connection", connection, "persistent_port")
1177-
rawPort, err := client.os.ReadFile(portPath)
1178-
if err != nil {
1179-
errs = errors.Join(errs, fmt.Errorf("failed to read connection port at: '%s'; %w", portPath, err))
1180-
} else if len(rawPort) != 0 {
1181-
port = strings.TrimSpace(string(rawPort))
1182-
}
1183-
1184-
portal := fmt.Sprintf("%s:%s", address, port)
1185-
1186-
// This will allow Trident to communicate which portals have bad connections.
1187-
connectionState := fmt.Sprintf("\"portal:'%s'; connection:'%s'; state:'%s'\"", portal, connection, state)
1188-
connectionStates = append(connectionStates, connectionState)
1189-
}
1190-
1191-
if errs != nil {
1192-
Logc(ctx).WithError(errs).Error("Could not discover state of iSCSI connections.")
1193-
}
1194-
1195-
return connectionStates
1196-
}
1197-
1198-
// getSessionState returns the state stored in /sys/class/iscsi_session/session<sid>/state.
1199-
// If no state is found, an empty string is returned.
1200-
func (client *Client) getSessionState(ctx context.Context, sessionID string) string {
1201-
Logc(ctx).WithField("sessionID", sessionID).Debug(">>>> iscsi.getSessionState")
1202-
defer Logc(ctx).Debug("<<<< iscsi.getSessionState")
1203-
1204-
// Find the session state from the session at /sys/class/iscsi_session/sessionXXX/state
1205-
filename := fmt.Sprintf(client.chrootPathPrefix+"/sys/class/iscsi_session/session%s/state", sessionID)
1206-
sessionStateBytes, err := client.os.ReadFile(filename)
1207-
if err != nil {
1208-
Logc(ctx).WithField("path", filename).WithError(err).Error("Could not read session state file.")
1209-
return ""
1210-
}
1211-
1212-
sessionState := strings.TrimSpace(string(sessionStateBytes))
1213-
Logc(ctx).WithFields(LogFields{
1214-
"sessionID": sessionID,
1215-
"sessionState": sessionState,
1216-
"sysfsFile": filename,
1217-
}).Debug("Found iSCSI session state.")
1218-
1219-
return sessionState
1220-
}
1221-
1222-
// isSessionStale - reads /sys/class/iscsi_session/session<sid>/state and returns true if it is not "LOGGED_IN".
1118+
// IsSessionStale - reads /sys/class/iscsi_session/session<sid>/state and returns true if it is not "LOGGED_IN".
12231119
// Looks that the state of an already established session to identify if it is
12241120
// logged in or not, if it is not logged in then it could be a stale session.
12251121
// For now, we are relying on the sysfs files
@@ -1231,7 +1127,10 @@ func (client *Client) isSessionStale(ctx context.Context, sessionID string) bool
12311127
filename := fmt.Sprintf(client.chrootPathPrefix+"/sys/class/iscsi_session/session%s/state", sessionID)
12321128
sessionStateBytes, err := client.os.ReadFile(filename)
12331129
if err != nil {
1234-
Logc(ctx).WithField("path", filename).WithError(err).Error("Could not read session state file.")
1130+
Logc(ctx).WithFields(LogFields{
1131+
"path": filename,
1132+
"error": err,
1133+
}).Error("Could not read session state file")
12351134
return false
12361135
}
12371136

@@ -2826,14 +2725,6 @@ func (client *Client) InspectAllISCSISessions(
28262725

28272726
if action != models.NoAction {
28282727
candidateStalePortals = append(candidateStalePortals, portal)
2829-
2830-
// At this point we know the iSCSI session has been stale for some time but do not know why.
2831-
// Retrieve additional state from sysfs and inform the admin of an issue.
2832-
Logc(ctx).WithFields(LogFields{
2833-
"portal": portal,
2834-
"sessionState": client.getSessionState(ctx, currentPortalInfo.SessionNumber),
2835-
"connectionState": client.getSessionConnectionsState(ctx, currentPortalInfo.SessionNumber),
2836-
}).Warn("Portal requires manual intervention; storage network connection may be unstable.")
28372728
}
28382729
continue
28392730
}
@@ -2903,10 +2794,7 @@ func isStalePortal(
29032794
} else if timeNow.Sub(publishedPortalInfo.FirstIdentifiedStaleAt) >= iSCSISessionWaitTime {
29042795
Logc(ctx).WithFields(logFields).Warningf("Portal exceeded stale wait time at %v; adding to stale portals list.",
29052796
timeNow)
2906-
// Things like storage platform upgrades or extended network outages may result in a FREE or FAILED state on the
2907-
// session. At this point in time, there isn't a reliable mechanism to know when it would be safe to perform a
2908-
// Logout remediation step, so only ever perform a LoginScan.
2909-
return models.LoginScan
2797+
return models.LogoutLoginScan
29102798
} else {
29112799
Logc(ctx).WithFields(logFields).Warningf("Portal has not exceeded stale wait time at %v.", timeNow)
29122800
}

utils/iscsi/iscsi_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -5879,7 +5879,7 @@ func TestIsStalePortal(t *testing.T) {
58795879
SessionWaitTime: 10 * time.Second,
58805880
TimeNow: time.Now().Add(20 * time.Second),
58815881
Portal: ipList[0],
5882-
ResultAction: models.LoginScan,
5882+
ResultAction: models.LogoutLoginScan,
58835883
SimulateConditions: func(publishedSessions, currentSessions *models.ISCSISessions, portal string) {
58845884
publishedSessions.Info[portal].PortalInfo.FirstIdentifiedStaleAt = time.Now()
58855885
},
@@ -5943,7 +5943,7 @@ func TestIsStalePortal(t *testing.T) {
59435943
SessionWaitTime: 10 * time.Second,
59445944
TimeNow: time.Now().Add(20 * time.Second),
59455945
Portal: ipList[0],
5946-
ResultAction: models.LoginScan,
5946+
ResultAction: models.LogoutLoginScan,
59475947
SimulateConditions: func(publishedSessions, currentSessions *models.ISCSISessions, portal string) {
59485948
publishedSessions.Info[portal].PortalInfo.Credentials = chapCredentials[0]
59495949
publishedSessions.Info[portal].PortalInfo.FirstIdentifiedStaleAt = time.Now()

0 commit comments

Comments
 (0)