Skip to content

Commit 38074de

Browse files
Trond MyklebustAnna Schumaker
authored andcommitted
NFSv4/flexfiles: Fix handling of NFS level errors in I/O
Allow the flexfiles error handling to recognise NFS level errors (as opposed to RPC level errors) and handle them separately. The main motivator is the NFSERR_PERM errors that get returned if the NFS client connects to the data server through a port number that is lower than 1024. In that case, the client should disconnect and retry a READ on a different data server, or it should retry a WRITE after reconnecting. Reviewed-by: Tigran Mkrtchyan <[email protected]> Fixes: d67ae82 ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Trond Myklebust <[email protected]> Signed-off-by: Anna Schumaker <[email protected]>
1 parent c017762 commit 38074de

File tree

1 file changed

+84
-34
lines changed

1 file changed

+84
-34
lines changed

fs/nfs/flexfilelayout/flexfilelayout.c

Lines changed: 84 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1105,6 +1105,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
11051105
}
11061106

11071107
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
1108+
u32 op_status,
11081109
struct nfs4_state *state,
11091110
struct nfs_client *clp,
11101111
struct pnfs_layout_segment *lseg,
@@ -1115,34 +1116,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11151116
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
11161117
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
11171118

1118-
switch (task->tk_status) {
1119-
case -NFS4ERR_BADSESSION:
1120-
case -NFS4ERR_BADSLOT:
1121-
case -NFS4ERR_BAD_HIGH_SLOT:
1122-
case -NFS4ERR_DEADSESSION:
1123-
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1124-
case -NFS4ERR_SEQ_FALSE_RETRY:
1125-
case -NFS4ERR_SEQ_MISORDERED:
1119+
switch (op_status) {
1120+
case NFS4_OK:
1121+
case NFS4ERR_NXIO:
1122+
break;
1123+
case NFSERR_PERM:
1124+
if (!task->tk_xprt)
1125+
break;
1126+
xprt_force_disconnect(task->tk_xprt);
1127+
goto out_retry;
1128+
case NFS4ERR_BADSESSION:
1129+
case NFS4ERR_BADSLOT:
1130+
case NFS4ERR_BAD_HIGH_SLOT:
1131+
case NFS4ERR_DEADSESSION:
1132+
case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1133+
case NFS4ERR_SEQ_FALSE_RETRY:
1134+
case NFS4ERR_SEQ_MISORDERED:
11261135
dprintk("%s ERROR %d, Reset session. Exchangeid "
11271136
"flags 0x%x\n", __func__, task->tk_status,
11281137
clp->cl_exchange_flags);
11291138
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
1130-
break;
1131-
case -NFS4ERR_DELAY:
1139+
goto out_retry;
1140+
case NFS4ERR_DELAY:
11321141
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
11331142
fallthrough;
1134-
case -NFS4ERR_GRACE:
1143+
case NFS4ERR_GRACE:
11351144
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
1136-
break;
1137-
case -NFS4ERR_RETRY_UNCACHED_REP:
1138-
break;
1145+
goto out_retry;
1146+
case NFS4ERR_RETRY_UNCACHED_REP:
1147+
goto out_retry;
11391148
/* Invalidate Layout errors */
1140-
case -NFS4ERR_PNFS_NO_LAYOUT:
1141-
case -ESTALE: /* mapped NFS4ERR_STALE */
1142-
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
1143-
case -EISDIR: /* mapped NFS4ERR_ISDIR */
1144-
case -NFS4ERR_FHEXPIRED:
1145-
case -NFS4ERR_WRONG_TYPE:
1149+
case NFS4ERR_PNFS_NO_LAYOUT:
1150+
case NFS4ERR_STALE:
1151+
case NFS4ERR_BADHANDLE:
1152+
case NFS4ERR_ISDIR:
1153+
case NFS4ERR_FHEXPIRED:
1154+
case NFS4ERR_WRONG_TYPE:
11461155
dprintk("%s Invalid layout error %d\n", __func__,
11471156
task->tk_status);
11481157
/*
@@ -1155,6 +1164,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11551164
pnfs_destroy_layout(NFS_I(inode));
11561165
rpc_wake_up(&tbl->slot_tbl_waitq);
11571166
goto reset;
1167+
default:
1168+
break;
1169+
}
1170+
1171+
switch (task->tk_status) {
11581172
/* RPC connection errors */
11591173
case -ENETDOWN:
11601174
case -ENETUNREACH:
@@ -1174,27 +1188,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11741188
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
11751189
&devid->deviceid);
11761190
rpc_wake_up(&tbl->slot_tbl_waitq);
1177-
fallthrough;
1191+
break;
11781192
default:
1179-
if (ff_layout_avoid_mds_available_ds(lseg))
1180-
return -NFS4ERR_RESET_TO_PNFS;
1181-
reset:
1182-
dprintk("%s Retry through MDS. Error %d\n", __func__,
1183-
task->tk_status);
1184-
return -NFS4ERR_RESET_TO_MDS;
1193+
break;
11851194
}
1195+
1196+
if (ff_layout_avoid_mds_available_ds(lseg))
1197+
return -NFS4ERR_RESET_TO_PNFS;
1198+
reset:
1199+
dprintk("%s Retry through MDS. Error %d\n", __func__,
1200+
task->tk_status);
1201+
return -NFS4ERR_RESET_TO_MDS;
1202+
1203+
out_retry:
11861204
task->tk_status = 0;
11871205
return -EAGAIN;
11881206
}
11891207

11901208
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
11911209
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
1210+
u32 op_status,
11921211
struct nfs_client *clp,
11931212
struct pnfs_layout_segment *lseg,
11941213
u32 idx)
11951214
{
11961215
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
11971216

1217+
switch (op_status) {
1218+
case NFS_OK:
1219+
case NFSERR_NXIO:
1220+
break;
1221+
case NFSERR_PERM:
1222+
if (!task->tk_xprt)
1223+
break;
1224+
xprt_force_disconnect(task->tk_xprt);
1225+
goto out_retry;
1226+
case NFSERR_ACCES:
1227+
case NFSERR_BADHANDLE:
1228+
case NFSERR_FBIG:
1229+
case NFSERR_IO:
1230+
case NFSERR_NOSPC:
1231+
case NFSERR_ROFS:
1232+
case NFSERR_STALE:
1233+
goto out_reset_to_pnfs;
1234+
case NFSERR_JUKEBOX:
1235+
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
1236+
goto out_retry;
1237+
default:
1238+
break;
1239+
}
1240+
11981241
switch (task->tk_status) {
11991242
/* File access problems. Don't mark the device as unavailable */
12001243
case -EACCES:
@@ -1218,6 +1261,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
12181261
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
12191262
&devid->deviceid);
12201263
}
1264+
out_reset_to_pnfs:
12211265
/* FIXME: Need to prevent infinite looping here. */
12221266
return -NFS4ERR_RESET_TO_PNFS;
12231267
out_retry:
@@ -1228,6 +1272,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
12281272
}
12291273

12301274
static int ff_layout_async_handle_error(struct rpc_task *task,
1275+
u32 op_status,
12311276
struct nfs4_state *state,
12321277
struct nfs_client *clp,
12331278
struct pnfs_layout_segment *lseg,
@@ -1246,10 +1291,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
12461291

12471292
switch (vers) {
12481293
case 3:
1249-
return ff_layout_async_handle_error_v3(task, clp, lseg, idx);
1250-
case 4:
1251-
return ff_layout_async_handle_error_v4(task, state, clp,
1294+
return ff_layout_async_handle_error_v3(task, op_status, clp,
12521295
lseg, idx);
1296+
case 4:
1297+
return ff_layout_async_handle_error_v4(task, op_status, state,
1298+
clp, lseg, idx);
12531299
default:
12541300
/* should never happen */
12551301
WARN_ON_ONCE(1);
@@ -1302,6 +1348,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
13021348
switch (status) {
13031349
case NFS4ERR_DELAY:
13041350
case NFS4ERR_GRACE:
1351+
case NFS4ERR_PERM:
13051352
break;
13061353
case NFS4ERR_NXIO:
13071354
ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1334,7 +1381,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
13341381
trace_ff_layout_read_error(hdr, task->tk_status);
13351382
}
13361383

1337-
err = ff_layout_async_handle_error(task, hdr->args.context->state,
1384+
err = ff_layout_async_handle_error(task, hdr->res.op_status,
1385+
hdr->args.context->state,
13381386
hdr->ds_clp, hdr->lseg,
13391387
hdr->pgio_mirror_idx);
13401388

@@ -1507,7 +1555,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
15071555
trace_ff_layout_write_error(hdr, task->tk_status);
15081556
}
15091557

1510-
err = ff_layout_async_handle_error(task, hdr->args.context->state,
1558+
err = ff_layout_async_handle_error(task, hdr->res.op_status,
1559+
hdr->args.context->state,
15111560
hdr->ds_clp, hdr->lseg,
15121561
hdr->pgio_mirror_idx);
15131562

@@ -1556,8 +1605,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
15561605
trace_ff_layout_commit_error(data, task->tk_status);
15571606
}
15581607

1559-
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
1560-
data->lseg, data->ds_commit_index);
1608+
err = ff_layout_async_handle_error(task, data->res.op_status,
1609+
NULL, data->ds_clp, data->lseg,
1610+
data->ds_commit_index);
15611611

15621612
trace_nfs4_pnfs_commit_ds(data, err);
15631613
switch (err) {

0 commit comments

Comments
 (0)