Skip to content

Commit 5b9d31a

Browse files
author
Trond Myklebust
committed
NFSv4: Add a parameter to limit the number of retries after NFS4ERR_DELAY
When using a 'softerr' mount, the NFSv4 client can get stuck waiting forever while the server just returns NFS4ERR_DELAY. Among other things, this causes the knfsd server threads to busy wait. Add a parameter that tells the NFSv4 client how many times to retry before giving up. Signed-off-by: Trond Myklebust <[email protected]>
1 parent 05d3ef8 commit 5b9d31a

File tree

4 files changed

+41
-1
lines changed

4 files changed

+41
-1
lines changed

Documentation/admin-guide/kernel-parameters.txt

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3533,6 +3533,13 @@
35333533
[NFS] set the TCP port on which the NFSv4 callback
35343534
channel should listen.
35353535

3536+
nfs.delay_retrans=
3537+
[NFS] specifies the number of times the NFSv4 client
3538+
retries the request before returning an EAGAIN error,
3539+
after a reply of NFS4ERR_DELAY from the server.
3540+
Only applies if the softerr mount option is enabled,
3541+
and the specified value is >= 0.
3542+
35363543
nfs.enable_ino64=
35373544
[NFS] enable 64-bit inode numbers.
35383545
If zero, the NFS client will fake up a 32-bit inode

fs/nfs/nfs4_fs.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -209,6 +209,7 @@ struct nfs4_exception {
209209
struct inode *inode;
210210
nfs4_stateid *stateid;
211211
long timeout;
212+
unsigned short retrans;
212213
unsigned char task_is_privileged : 1;
213214
unsigned char delay : 1,
214215
recovering : 1,
@@ -546,6 +547,7 @@ extern unsigned short max_session_slots;
546547
extern unsigned short max_session_cb_slots;
547548
extern unsigned short send_implementation_id;
548549
extern bool recover_lost_locks;
550+
extern short nfs_delay_retrans;
549551

550552
#define NFS4_CLIENT_ID_UNIQ_LEN (64)
551553
extern char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN];

fs/nfs/nfs4proc.c

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -585,6 +585,21 @@ static int nfs4_do_handle_exception(struct nfs_server *server,
585585
return 0;
586586
}
587587

588+
/*
589+
* Track the number of NFS4ERR_DELAY related retransmissions and return
590+
* EAGAIN if the 'softerr' mount option is set, and we've exceeded the limit
591+
* set by 'nfs_delay_retrans'.
592+
*/
593+
static int nfs4_exception_should_retrans(const struct nfs_server *server,
594+
struct nfs4_exception *exception)
595+
{
596+
if (server->flags & NFS_MOUNT_SOFTERR && nfs_delay_retrans >= 0) {
597+
if (exception->retrans++ >= (unsigned short)nfs_delay_retrans)
598+
return -EAGAIN;
599+
}
600+
return 0;
601+
}
602+
588603
/* This is the error handling routine for processes that are allowed
589604
* to sleep.
590605
*/
@@ -595,6 +610,11 @@ int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_
595610

596611
ret = nfs4_do_handle_exception(server, errorcode, exception);
597612
if (exception->delay) {
613+
int ret2 = nfs4_exception_should_retrans(server, exception);
614+
if (ret2 < 0) {
615+
exception->retry = 0;
616+
return ret2;
617+
}
598618
ret = nfs4_delay(&exception->timeout,
599619
exception->interruptible);
600620
goto out_retry;
@@ -623,6 +643,11 @@ nfs4_async_handle_exception(struct rpc_task *task, struct nfs_server *server,
623643

624644
ret = nfs4_do_handle_exception(server, errorcode, exception);
625645
if (exception->delay) {
646+
int ret2 = nfs4_exception_should_retrans(server, exception);
647+
if (ret2 < 0) {
648+
exception->retry = 0;
649+
return ret2;
650+
}
626651
rpc_delay(task, nfs4_update_delay(&exception->timeout));
627652
goto out_retry;
628653
}

fs/nfs/super.c

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1366,6 +1366,7 @@ unsigned short max_session_cb_slots = NFS4_DEF_CB_SLOT_TABLE_SIZE;
13661366
unsigned short send_implementation_id = 1;
13671367
char nfs4_client_id_uniquifier[NFS4_CLIENT_ID_UNIQ_LEN] = "";
13681368
bool recover_lost_locks = false;
1369+
short nfs_delay_retrans = -1;
13691370

13701371
EXPORT_SYMBOL_GPL(nfs_callback_nr_threads);
13711372
EXPORT_SYMBOL_GPL(nfs_callback_set_tcpport);
@@ -1376,6 +1377,7 @@ EXPORT_SYMBOL_GPL(max_session_cb_slots);
13761377
EXPORT_SYMBOL_GPL(send_implementation_id);
13771378
EXPORT_SYMBOL_GPL(nfs4_client_id_uniquifier);
13781379
EXPORT_SYMBOL_GPL(recover_lost_locks);
1380+
EXPORT_SYMBOL_GPL(nfs_delay_retrans);
13791381

13801382
#define NFS_CALLBACK_MAXPORTNR (65535U)
13811383

@@ -1424,5 +1426,9 @@ MODULE_PARM_DESC(recover_lost_locks,
14241426
"If the server reports that a lock might be lost, "
14251427
"try to recover it risking data corruption.");
14261428

1427-
1429+
module_param_named(delay_retrans, nfs_delay_retrans, short, 0644);
1430+
MODULE_PARM_DESC(delay_retrans,
1431+
"Unless negative, specifies the number of times the NFSv4 "
1432+
"client retries a request before returning an EAGAIN error, "
1433+
"after a reply of NFS4ERR_DELAY from the server.");
14281434
#endif /* CONFIG_NFS_V4 */

0 commit comments

Comments
 (0)