Skip to content

Commit 8217345

Browse files
Trond Myklebustgregkh
authored andcommitted
NFSv4/flexfiles: Fix handling of NFS level errors in I/O
[ Upstream commit 38074de35b015df5623f524d6f2b49a0cd395c40 ] Allow the flexfiles error handling to recognise NFS level errors (as opposed to RPC level errors) and handle them separately. The main motivator is the NFSERR_PERM errors that get returned if the NFS client connects to the data server through a port number that is lower than 1024. In that case, the client should disconnect and retry a READ on a different data server, or it should retry a WRITE after reconnecting. Reviewed-by: Tigran Mkrtchyan <[email protected]> Fixes: d67ae82 ("pnfs/flexfiles: Add the FlexFile Layout Driver") Signed-off-by: Trond Myklebust <[email protected]> Signed-off-by: Anna Schumaker <[email protected]> Signed-off-by: Sasha Levin <[email protected]>
1 parent c8851a6 commit 8217345

File tree

1 file changed

+87
-34
lines changed

1 file changed

+87
-34
lines changed

fs/nfs/flexfilelayout/flexfilelayout.c

Lines changed: 87 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1096,6 +1096,7 @@ static void ff_layout_reset_read(struct nfs_pgio_header *hdr)
10961096
}
10971097

10981098
static int ff_layout_async_handle_error_v4(struct rpc_task *task,
1099+
u32 op_status,
10991100
struct nfs4_state *state,
11001101
struct nfs_client *clp,
11011102
struct pnfs_layout_segment *lseg,
@@ -1106,32 +1107,42 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11061107
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
11071108
struct nfs4_slot_table *tbl = &clp->cl_session->fc_slot_table;
11081109

1109-
switch (task->tk_status) {
1110-
case -NFS4ERR_BADSESSION:
1111-
case -NFS4ERR_BADSLOT:
1112-
case -NFS4ERR_BAD_HIGH_SLOT:
1113-
case -NFS4ERR_DEADSESSION:
1114-
case -NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1115-
case -NFS4ERR_SEQ_FALSE_RETRY:
1116-
case -NFS4ERR_SEQ_MISORDERED:
1110+
switch (op_status) {
1111+
case NFS4_OK:
1112+
case NFS4ERR_NXIO:
1113+
break;
1114+
case NFSERR_PERM:
1115+
if (!task->tk_xprt)
1116+
break;
1117+
xprt_force_disconnect(task->tk_xprt);
1118+
goto out_retry;
1119+
case NFS4ERR_BADSESSION:
1120+
case NFS4ERR_BADSLOT:
1121+
case NFS4ERR_BAD_HIGH_SLOT:
1122+
case NFS4ERR_DEADSESSION:
1123+
case NFS4ERR_CONN_NOT_BOUND_TO_SESSION:
1124+
case NFS4ERR_SEQ_FALSE_RETRY:
1125+
case NFS4ERR_SEQ_MISORDERED:
11171126
dprintk("%s ERROR %d, Reset session. Exchangeid "
11181127
"flags 0x%x\n", __func__, task->tk_status,
11191128
clp->cl_exchange_flags);
11201129
nfs4_schedule_session_recovery(clp->cl_session, task->tk_status);
1121-
break;
1122-
case -NFS4ERR_DELAY:
1123-
case -NFS4ERR_GRACE:
1130+
goto out_retry;
1131+
case NFS4ERR_DELAY:
1132+
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
1133+
fallthrough;
1134+
case NFS4ERR_GRACE:
11241135
rpc_delay(task, FF_LAYOUT_POLL_RETRY_MAX);
1125-
break;
1126-
case -NFS4ERR_RETRY_UNCACHED_REP:
1127-
break;
1136+
goto out_retry;
1137+
case NFS4ERR_RETRY_UNCACHED_REP:
1138+
goto out_retry;
11281139
/* Invalidate Layout errors */
1129-
case -NFS4ERR_PNFS_NO_LAYOUT:
1130-
case -ESTALE: /* mapped NFS4ERR_STALE */
1131-
case -EBADHANDLE: /* mapped NFS4ERR_BADHANDLE */
1132-
case -EISDIR: /* mapped NFS4ERR_ISDIR */
1133-
case -NFS4ERR_FHEXPIRED:
1134-
case -NFS4ERR_WRONG_TYPE:
1140+
case NFS4ERR_PNFS_NO_LAYOUT:
1141+
case NFS4ERR_STALE:
1142+
case NFS4ERR_BADHANDLE:
1143+
case NFS4ERR_ISDIR:
1144+
case NFS4ERR_FHEXPIRED:
1145+
case NFS4ERR_WRONG_TYPE:
11351146
dprintk("%s Invalid layout error %d\n", __func__,
11361147
task->tk_status);
11371148
/*
@@ -1144,6 +1155,11 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11441155
pnfs_destroy_layout(NFS_I(inode));
11451156
rpc_wake_up(&tbl->slot_tbl_waitq);
11461157
goto reset;
1158+
default:
1159+
break;
1160+
}
1161+
1162+
switch (task->tk_status) {
11471163
/* RPC connection errors */
11481164
case -ECONNREFUSED:
11491165
case -EHOSTDOWN:
@@ -1159,26 +1175,56 @@ static int ff_layout_async_handle_error_v4(struct rpc_task *task,
11591175
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
11601176
&devid->deviceid);
11611177
rpc_wake_up(&tbl->slot_tbl_waitq);
1162-
fallthrough;
1178+
break;
11631179
default:
1164-
if (ff_layout_avoid_mds_available_ds(lseg))
1165-
return -NFS4ERR_RESET_TO_PNFS;
1166-
reset:
1167-
dprintk("%s Retry through MDS. Error %d\n", __func__,
1168-
task->tk_status);
1169-
return -NFS4ERR_RESET_TO_MDS;
1180+
break;
11701181
}
1182+
1183+
if (ff_layout_avoid_mds_available_ds(lseg))
1184+
return -NFS4ERR_RESET_TO_PNFS;
1185+
reset:
1186+
dprintk("%s Retry through MDS. Error %d\n", __func__,
1187+
task->tk_status);
1188+
return -NFS4ERR_RESET_TO_MDS;
1189+
1190+
out_retry:
11711191
task->tk_status = 0;
11721192
return -EAGAIN;
11731193
}
11741194

11751195
/* Retry all errors through either pNFS or MDS except for -EJUKEBOX */
11761196
static int ff_layout_async_handle_error_v3(struct rpc_task *task,
1197+
u32 op_status,
1198+
struct nfs_client *clp,
11771199
struct pnfs_layout_segment *lseg,
11781200
u32 idx)
11791201
{
11801202
struct nfs4_deviceid_node *devid = FF_LAYOUT_DEVID_NODE(lseg, idx);
11811203

1204+
switch (op_status) {
1205+
case NFS_OK:
1206+
case NFSERR_NXIO:
1207+
break;
1208+
case NFSERR_PERM:
1209+
if (!task->tk_xprt)
1210+
break;
1211+
xprt_force_disconnect(task->tk_xprt);
1212+
goto out_retry;
1213+
case NFSERR_ACCES:
1214+
case NFSERR_BADHANDLE:
1215+
case NFSERR_FBIG:
1216+
case NFSERR_IO:
1217+
case NFSERR_NOSPC:
1218+
case NFSERR_ROFS:
1219+
case NFSERR_STALE:
1220+
goto out_reset_to_pnfs;
1221+
case NFSERR_JUKEBOX:
1222+
nfs_inc_stats(lseg->pls_layout->plh_inode, NFSIOS_DELAY);
1223+
goto out_retry;
1224+
default:
1225+
break;
1226+
}
1227+
11821228
switch (task->tk_status) {
11831229
/* File access problems. Don't mark the device as unavailable */
11841230
case -EACCES:
@@ -1197,6 +1243,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
11971243
nfs4_delete_deviceid(devid->ld, devid->nfs_client,
11981244
&devid->deviceid);
11991245
}
1246+
out_reset_to_pnfs:
12001247
/* FIXME: Need to prevent infinite looping here. */
12011248
return -NFS4ERR_RESET_TO_PNFS;
12021249
out_retry:
@@ -1207,6 +1254,7 @@ static int ff_layout_async_handle_error_v3(struct rpc_task *task,
12071254
}
12081255

12091256
static int ff_layout_async_handle_error(struct rpc_task *task,
1257+
u32 op_status,
12101258
struct nfs4_state *state,
12111259
struct nfs_client *clp,
12121260
struct pnfs_layout_segment *lseg,
@@ -1225,10 +1273,11 @@ static int ff_layout_async_handle_error(struct rpc_task *task,
12251273

12261274
switch (vers) {
12271275
case 3:
1228-
return ff_layout_async_handle_error_v3(task, lseg, idx);
1229-
case 4:
1230-
return ff_layout_async_handle_error_v4(task, state, clp,
1276+
return ff_layout_async_handle_error_v3(task, op_status, clp,
12311277
lseg, idx);
1278+
case 4:
1279+
return ff_layout_async_handle_error_v4(task, op_status, state,
1280+
clp, lseg, idx);
12321281
default:
12331282
/* should never happen */
12341283
WARN_ON_ONCE(1);
@@ -1281,6 +1330,7 @@ static void ff_layout_io_track_ds_error(struct pnfs_layout_segment *lseg,
12811330
switch (status) {
12821331
case NFS4ERR_DELAY:
12831332
case NFS4ERR_GRACE:
1333+
case NFS4ERR_PERM:
12841334
break;
12851335
case NFS4ERR_NXIO:
12861336
ff_layout_mark_ds_unreachable(lseg, idx);
@@ -1313,7 +1363,8 @@ static int ff_layout_read_done_cb(struct rpc_task *task,
13131363
trace_ff_layout_read_error(hdr);
13141364
}
13151365

1316-
err = ff_layout_async_handle_error(task, hdr->args.context->state,
1366+
err = ff_layout_async_handle_error(task, hdr->res.op_status,
1367+
hdr->args.context->state,
13171368
hdr->ds_clp, hdr->lseg,
13181369
hdr->pgio_mirror_idx);
13191370

@@ -1483,7 +1534,8 @@ static int ff_layout_write_done_cb(struct rpc_task *task,
14831534
trace_ff_layout_write_error(hdr);
14841535
}
14851536

1486-
err = ff_layout_async_handle_error(task, hdr->args.context->state,
1537+
err = ff_layout_async_handle_error(task, hdr->res.op_status,
1538+
hdr->args.context->state,
14871539
hdr->ds_clp, hdr->lseg,
14881540
hdr->pgio_mirror_idx);
14891541

@@ -1529,8 +1581,9 @@ static int ff_layout_commit_done_cb(struct rpc_task *task,
15291581
trace_ff_layout_commit_error(data);
15301582
}
15311583

1532-
err = ff_layout_async_handle_error(task, NULL, data->ds_clp,
1533-
data->lseg, data->ds_commit_index);
1584+
err = ff_layout_async_handle_error(task, data->res.op_status,
1585+
NULL, data->ds_clp, data->lseg,
1586+
data->ds_commit_index);
15341587

15351588
trace_nfs4_pnfs_commit_ds(data, err);
15361589
switch (err) {

0 commit comments

Comments
 (0)