Skip to content

Commit e0537c9

Browse files
daimngoAnna Schumaker
authored andcommitted
SUNRPC: only put task on cl_tasks list after the RPC call slot is reserved.
Under heavy write load, we've seen the cl_tasks list grows to millions of entries. Even though the list is extremely long, the system still runs fine until the user wants to get the information of all active RPC tasks by doing: When this happens, tasks_start acquires the cl_lock to walk the cl_tasks list, returning one entry at a time to the caller. The cl_lock is held until all tasks on this list have been processed. While the cl_lock is held, completed RPC tasks have to spin wait in rpc_task_release_client for the cl_lock. If there are millions of entries in the cl_tasks list it will take a long time before tasks_stop is called and the cl_lock is released. The spin wait tasks can use up all the available CPUs in the system, preventing other jobs to run, this causes the system to temporarily lock up. This patch fixes this problem by delaying inserting the RPC task on the cl_tasks list until the RPC call slot is reserved. This limits the length of the cl_tasks to the number of call slots available in the system. Signed-off-by: Dai Ngo <[email protected]> Signed-off-by: Anna Schumaker <[email protected]>
1 parent 5bc55a3 commit e0537c9

File tree

2 files changed

+14
-5
lines changed

2 files changed

+14
-5
lines changed

include/linux/sunrpc/clnt.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,7 @@ struct rpc_clnt {
9393
const struct cred *cl_cred;
9494
unsigned int cl_max_connect; /* max number of transports not to the same IP */
9595
struct super_block *pipefs_sb;
96+
atomic_t cl_task_count;
9697
};
9798

9899
/*

net/sunrpc/clnt.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -958,12 +958,17 @@ void rpc_shutdown_client(struct rpc_clnt *clnt)
958958

959959
trace_rpc_clnt_shutdown(clnt);
960960

961+
clnt->cl_shutdown = 1;
961962
while (!list_empty(&clnt->cl_tasks)) {
962963
rpc_killall_tasks(clnt);
963964
wait_event_timeout(destroy_wait,
964965
list_empty(&clnt->cl_tasks), 1*HZ);
965966
}
966967

968+
/* wait for tasks still in workqueue or waitqueue */
969+
wait_event_timeout(destroy_wait,
970+
atomic_read(&clnt->cl_task_count) == 0, 1 * HZ);
971+
967972
rpc_release_client(clnt);
968973
}
969974
EXPORT_SYMBOL_GPL(rpc_shutdown_client);
@@ -1139,6 +1144,7 @@ void rpc_task_release_client(struct rpc_task *task)
11391144
list_del(&task->tk_task);
11401145
spin_unlock(&clnt->cl_lock);
11411146
task->tk_client = NULL;
1147+
atomic_dec(&clnt->cl_task_count);
11421148

11431149
rpc_release_client(clnt);
11441150
}
@@ -1189,10 +1195,7 @@ void rpc_task_set_client(struct rpc_task *task, struct rpc_clnt *clnt)
11891195
task->tk_flags |= RPC_TASK_TIMEOUT;
11901196
if (clnt->cl_noretranstimeo)
11911197
task->tk_flags |= RPC_TASK_NO_RETRANS_TIMEOUT;
1192-
/* Add to the client's list of all tasks */
1193-
spin_lock(&clnt->cl_lock);
1194-
list_add_tail(&task->tk_task, &clnt->cl_tasks);
1195-
spin_unlock(&clnt->cl_lock);
1198+
atomic_inc(&clnt->cl_task_count);
11961199
}
11971200

11981201
static void
@@ -1787,9 +1790,14 @@ call_reserveresult(struct rpc_task *task)
17871790
if (status >= 0) {
17881791
if (task->tk_rqstp) {
17891792
task->tk_action = call_refresh;
1793+
1794+
/* Add to the client's list of all tasks */
1795+
spin_lock(&task->tk_client->cl_lock);
1796+
if (list_empty(&task->tk_task))
1797+
list_add_tail(&task->tk_task, &task->tk_client->cl_tasks);
1798+
spin_unlock(&task->tk_client->cl_lock);
17901799
return;
17911800
}
1792-
17931801
rpc_call_rpcerror(task, -EIO);
17941802
return;
17951803
}

0 commit comments

Comments
 (0)