Skip to content

Commit bade4be

Browse files
committed
svcrdma: Revert "svcrdma: Reduce Receive doorbell rate"
I tested commit 43042b9 ("svcrdma: Reduce Receive doorbell rate") with mlx4 (IB) and software iWARP and didn't find any issues. However, I recently got my hardware iWARP setup back on line (FastLinQ) and it's crashing hard on this commit (confirmed via bisect). The failure mode is complex. - After a connection is established, the first Receive completes normally. - But the second and third Receives have garbage in their Receive buffers. The server responds with ERR_VERS as a result. - When the client tears down the connection to retry, a couple of posted Receives flush twice, and that corrupts the recv_ctxt free list. - __svc_rdma_free then faults or loops infinitely while destroying the xprt's recv_ctxts. Since 43042b9 ("svcrdma: Reduce Receive doorbell rate") does not fix a bug but is a scalability enhancement, it's safe and appropriate to revert it while working on a replacement. Fixes: 43042b9 ("svcrdma: Reduce Receive doorbell rate") Signed-off-by: Chuck Lever <[email protected]>
1 parent b4250dd commit bade4be

File tree

2 files changed

+39
-44
lines changed

2 files changed

+39
-44
lines changed

include/linux/sunrpc/svc_rdma.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,6 @@ struct svcxprt_rdma {
104104

105105
wait_queue_head_t sc_send_wait; /* SQ exhaustion waitlist */
106106
unsigned long sc_flags;
107-
u32 sc_pending_recvs;
108107
struct list_head sc_read_complete_q;
109108
struct work_struct sc_work;
110109

net/sunrpc/xprtrdma/svc_rdma_recvfrom.c

Lines changed: 39 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -266,46 +266,33 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp)
266266
svc_rdma_recv_ctxt_put(rdma, ctxt);
267267
}
268268

269-
static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
270-
unsigned int wanted, bool temp)
269+
static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
270+
struct svc_rdma_recv_ctxt *ctxt)
271271
{
272-
const struct ib_recv_wr *bad_wr = NULL;
273-
struct svc_rdma_recv_ctxt *ctxt;
274-
struct ib_recv_wr *recv_chain;
275272
int ret;
276273

277-
recv_chain = NULL;
278-
while (wanted--) {
279-
ctxt = svc_rdma_recv_ctxt_get(rdma);
280-
if (!ctxt)
281-
break;
282-
283-
trace_svcrdma_post_recv(ctxt);
284-
ctxt->rc_temp = temp;
285-
ctxt->rc_recv_wr.next = recv_chain;
286-
recv_chain = &ctxt->rc_recv_wr;
287-
rdma->sc_pending_recvs++;
288-
}
289-
if (!recv_chain)
290-
return false;
291-
292-
ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
274+
trace_svcrdma_post_recv(ctxt);
275+
ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
293276
if (ret)
294277
goto err_post;
295-
return true;
278+
return 0;
296279

297280
err_post:
298-
while (bad_wr) {
299-
ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
300-
rc_recv_wr);
301-
bad_wr = bad_wr->next;
302-
svc_rdma_recv_ctxt_put(rdma, ctxt);
303-
}
304-
305281
trace_svcrdma_rq_post_err(rdma, ret);
306-
/* Since we're destroying the xprt, no need to reset
307-
* sc_pending_recvs. */
308-
return false;
282+
svc_rdma_recv_ctxt_put(rdma, ctxt);
283+
return ret;
284+
}
285+
286+
static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
287+
{
288+
struct svc_rdma_recv_ctxt *ctxt;
289+
290+
if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
291+
return 0;
292+
ctxt = svc_rdma_recv_ctxt_get(rdma);
293+
if (!ctxt)
294+
return -ENOMEM;
295+
return __svc_rdma_post_recv(rdma, ctxt);
309296
}
310297

311298
/**
@@ -316,30 +303,46 @@ static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
316303
*/
317304
bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
318305
{
319-
return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
306+
struct svc_rdma_recv_ctxt *ctxt;
307+
unsigned int i;
308+
int ret;
309+
310+
for (i = 0; i < rdma->sc_max_requests; i++) {
311+
ctxt = svc_rdma_recv_ctxt_get(rdma);
312+
if (!ctxt)
313+
return false;
314+
ctxt->rc_temp = true;
315+
ret = __svc_rdma_post_recv(rdma, ctxt);
316+
if (ret)
317+
return false;
318+
}
319+
return true;
320320
}
321321

322322
/**
323323
* svc_rdma_wc_receive - Invoked by RDMA provider for each polled Receive WC
324324
* @cq: Completion Queue context
325325
* @wc: Work Completion object
326326
*
327+
* NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
328+
* the Receive completion handler could be running.
327329
*/
328330
static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
329331
{
330332
struct svcxprt_rdma *rdma = cq->cq_context;
331333
struct ib_cqe *cqe = wc->wr_cqe;
332334
struct svc_rdma_recv_ctxt *ctxt;
333335

334-
rdma->sc_pending_recvs--;
335-
336336
/* WARNING: Only wc->wr_cqe and wc->status are reliable */
337337
ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
338338

339339
trace_svcrdma_wc_receive(wc, &ctxt->rc_cid);
340340
if (wc->status != IB_WC_SUCCESS)
341341
goto flushed;
342342

343+
if (svc_rdma_post_recv(rdma))
344+
goto post_err;
345+
343346
/* All wc fields are now known to be valid */
344347
ctxt->rc_byte_len = wc->byte_len;
345348

@@ -350,18 +353,11 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
350353
spin_unlock(&rdma->sc_rq_dto_lock);
351354
if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
352355
svc_xprt_enqueue(&rdma->sc_xprt);
353-
354-
if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) &&
355-
rdma->sc_pending_recvs < rdma->sc_max_requests)
356-
if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH,
357-
false))
358-
goto post_err;
359-
360356
return;
361357

362358
flushed:
363-
svc_rdma_recv_ctxt_put(rdma, ctxt);
364359
post_err:
360+
svc_rdma_recv_ctxt_put(rdma, ctxt);
365361
set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
366362
svc_xprt_enqueue(&rdma->sc_xprt);
367363
}

0 commit comments

Comments
 (0)