Skip to content

Commit 32cf5a4

Browse files
committed
Revert "svcrdma: Add Write chunk WRs to the RPC's Send WR chain"
Performance regression reported with NFS/RDMA using Omnipath, bisected to commit e084ee6 ("svcrdma: Add Write chunk WRs to the RPC's Send WR chain"). Tracing on the server reports: nfsd-7771 [060] 1758.891809: svcrdma_sq_post_err: cq.id=205 cid=226 sc_sq_avail=13643/851 status=-12 sq_post_err reports ENOMEM, and the rdma->sc_sq_avail (13643) is larger than rdma->sc_sq_depth (851). The number of available Send Queue entries is always supposed to be smaller than the Send Queue depth. That seems like a Send Queue accounting bug in svcrdma. As it's getting to be late in the 6.9-rc cycle, revert this commit. It can be revisited in a subsequent kernel release. Link: https://bugzilla.kernel.org/show_bug.cgi?id=218743 Fixes: e084ee6 ("svcrdma: Add Write chunk WRs to the RPC's Send WR chain") Signed-off-by: Chuck Lever <[email protected]>
1 parent f488138 commit 32cf5a4

File tree

3 files changed

+26
-78
lines changed

3 files changed

+26
-78
lines changed

include/linux/sunrpc/svc_rdma.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -210,7 +210,6 @@ struct svc_rdma_recv_ctxt {
210210
*/
211211
struct svc_rdma_write_info {
212212
struct svcxprt_rdma *wi_rdma;
213-
struct list_head wi_list;
214213

215214
const struct svc_rdma_chunk *wi_chunk;
216215

@@ -239,10 +238,7 @@ struct svc_rdma_send_ctxt {
239238
struct ib_cqe sc_cqe;
240239
struct xdr_buf sc_hdrbuf;
241240
struct xdr_stream sc_stream;
242-
243-
struct list_head sc_write_info_list;
244241
struct svc_rdma_write_info sc_reply_info;
245-
246242
void *sc_xprt_buf;
247243
int sc_page_count;
248244
int sc_cur_sge_no;
@@ -274,14 +270,11 @@ extern void svc_rdma_cc_init(struct svcxprt_rdma *rdma,
274270
extern void svc_rdma_cc_release(struct svcxprt_rdma *rdma,
275271
struct svc_rdma_chunk_ctxt *cc,
276272
enum dma_data_direction dir);
277-
extern void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
278-
struct svc_rdma_send_ctxt *ctxt);
279273
extern void svc_rdma_reply_chunk_release(struct svcxprt_rdma *rdma,
280274
struct svc_rdma_send_ctxt *ctxt);
281-
extern int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
282-
const struct svc_rdma_pcl *write_pcl,
283-
struct svc_rdma_send_ctxt *sctxt,
284-
const struct xdr_buf *xdr);
275+
extern int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
276+
const struct svc_rdma_recv_ctxt *rctxt,
277+
const struct xdr_buf *xdr);
285278
extern int svc_rdma_prepare_reply_chunk(struct svcxprt_rdma *rdma,
286279
const struct svc_rdma_pcl *write_pcl,
287280
const struct svc_rdma_pcl *reply_pcl,

net/sunrpc/xprtrdma/svc_rdma_rw.c

Lines changed: 22 additions & 64 deletions
Original file line numberDiff line numberDiff line change
@@ -230,28 +230,6 @@ static void svc_rdma_write_info_free(struct svc_rdma_write_info *info)
230230
queue_work(svcrdma_wq, &info->wi_work);
231231
}
232232

233-
/**
234-
* svc_rdma_write_chunk_release - Release Write chunk I/O resources
235-
* @rdma: controlling transport
236-
* @ctxt: Send context that is being released
237-
*/
238-
void svc_rdma_write_chunk_release(struct svcxprt_rdma *rdma,
239-
struct svc_rdma_send_ctxt *ctxt)
240-
{
241-
struct svc_rdma_write_info *info;
242-
struct svc_rdma_chunk_ctxt *cc;
243-
244-
while (!list_empty(&ctxt->sc_write_info_list)) {
245-
info = list_first_entry(&ctxt->sc_write_info_list,
246-
struct svc_rdma_write_info, wi_list);
247-
list_del(&info->wi_list);
248-
249-
cc = &info->wi_cc;
250-
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
251-
svc_rdma_write_info_free(info);
252-
}
253-
}
254-
255233
/**
256234
* svc_rdma_reply_chunk_release - Release Reply chunk I/O resources
257235
* @rdma: controlling transport
@@ -308,23 +286,26 @@ static void svc_rdma_write_done(struct ib_cq *cq, struct ib_wc *wc)
308286
struct ib_cqe *cqe = wc->wr_cqe;
309287
struct svc_rdma_chunk_ctxt *cc =
310288
container_of(cqe, struct svc_rdma_chunk_ctxt, cc_cqe);
289+
struct svc_rdma_write_info *info =
290+
container_of(cc, struct svc_rdma_write_info, wi_cc);
311291

312292
switch (wc->status) {
313293
case IB_WC_SUCCESS:
314294
trace_svcrdma_wc_write(&cc->cc_cid);
315-
return;
295+
break;
316296
case IB_WC_WR_FLUSH_ERR:
317297
trace_svcrdma_wc_write_flush(wc, &cc->cc_cid);
318298
break;
319299
default:
320300
trace_svcrdma_wc_write_err(wc, &cc->cc_cid);
321301
}
322302

323-
/* The RDMA Write has flushed, so the client won't get
324-
* some of the outgoing RPC message. Signal the loss
325-
* to the client by closing the connection.
326-
*/
327-
svc_xprt_deferred_close(&rdma->sc_xprt);
303+
svc_rdma_wake_send_waiters(rdma, cc->cc_sqecount);
304+
305+
if (unlikely(wc->status != IB_WC_SUCCESS))
306+
svc_xprt_deferred_close(&rdma->sc_xprt);
307+
308+
svc_rdma_write_info_free(info);
328309
}
329310

330311
/**
@@ -620,19 +601,13 @@ static int svc_rdma_xb_write(const struct xdr_buf *xdr, void *data)
620601
return xdr->len;
621602
}
622603

623-
/* Link Write WRs for @chunk onto @sctxt's WR chain.
624-
*/
625-
static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
626-
struct svc_rdma_send_ctxt *sctxt,
627-
const struct svc_rdma_chunk *chunk,
628-
const struct xdr_buf *xdr)
604+
static int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma,
605+
const struct svc_rdma_chunk *chunk,
606+
const struct xdr_buf *xdr)
629607
{
630608
struct svc_rdma_write_info *info;
631609
struct svc_rdma_chunk_ctxt *cc;
632-
struct ib_send_wr *first_wr;
633610
struct xdr_buf payload;
634-
struct list_head *pos;
635-
struct ib_cqe *cqe;
636611
int ret;
637612

638613
if (xdr_buf_subsegment(xdr, &payload, chunk->ch_position,
@@ -648,25 +623,10 @@ static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
648623
if (ret != payload.len)
649624
goto out_err;
650625

651-
ret = -EINVAL;
652-
if (unlikely(cc->cc_sqecount > rdma->sc_sq_depth))
653-
goto out_err;
654-
655-
first_wr = sctxt->sc_wr_chain;
656-
cqe = &cc->cc_cqe;
657-
list_for_each(pos, &cc->cc_rwctxts) {
658-
struct svc_rdma_rw_ctxt *rwc;
659-
660-
rwc = list_entry(pos, struct svc_rdma_rw_ctxt, rw_list);
661-
first_wr = rdma_rw_ctx_wrs(&rwc->rw_ctx, rdma->sc_qp,
662-
rdma->sc_port_num, cqe, first_wr);
663-
cqe = NULL;
664-
}
665-
sctxt->sc_wr_chain = first_wr;
666-
sctxt->sc_sqecount += cc->cc_sqecount;
667-
list_add(&info->wi_list, &sctxt->sc_write_info_list);
668-
669626
trace_svcrdma_post_write_chunk(&cc->cc_cid, cc->cc_sqecount);
627+
ret = svc_rdma_post_chunk_ctxt(rdma, cc);
628+
if (ret < 0)
629+
goto out_err;
670630
return 0;
671631

672632
out_err:
@@ -675,27 +635,25 @@ static int svc_rdma_prepare_write_chunk(struct svcxprt_rdma *rdma,
675635
}
676636

677637
/**
678-
* svc_rdma_prepare_write_list - Construct WR chain for sending Write list
638+
* svc_rdma_send_write_list - Send all chunks on the Write list
679639
* @rdma: controlling RDMA transport
680-
* @write_pcl: Write list provisioned by the client
681-
* @sctxt: Send WR resources
640+
* @rctxt: Write list provisioned by the client
682641
* @xdr: xdr_buf containing an RPC Reply message
683642
*
684643
* Returns zero on success, or a negative errno if one or more
685644
* Write chunks could not be sent.
686645
*/
687-
int svc_rdma_prepare_write_list(struct svcxprt_rdma *rdma,
688-
const struct svc_rdma_pcl *write_pcl,
689-
struct svc_rdma_send_ctxt *sctxt,
690-
const struct xdr_buf *xdr)
646+
int svc_rdma_send_write_list(struct svcxprt_rdma *rdma,
647+
const struct svc_rdma_recv_ctxt *rctxt,
648+
const struct xdr_buf *xdr)
691649
{
692650
struct svc_rdma_chunk *chunk;
693651
int ret;
694652

695-
pcl_for_each_chunk(chunk, write_pcl) {
653+
pcl_for_each_chunk(chunk, &rctxt->rc_write_pcl) {
696654
if (!chunk->ch_payload_length)
697655
break;
698-
ret = svc_rdma_prepare_write_chunk(rdma, sctxt, chunk, xdr);
656+
ret = svc_rdma_send_write_chunk(rdma, chunk, xdr);
699657
if (ret < 0)
700658
return ret;
701659
}

net/sunrpc/xprtrdma/svc_rdma_sendto.c

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -142,7 +142,6 @@ svc_rdma_send_ctxt_alloc(struct svcxprt_rdma *rdma)
142142
ctxt->sc_send_wr.sg_list = ctxt->sc_sges;
143143
ctxt->sc_send_wr.send_flags = IB_SEND_SIGNALED;
144144
ctxt->sc_cqe.done = svc_rdma_wc_send;
145-
INIT_LIST_HEAD(&ctxt->sc_write_info_list);
146145
ctxt->sc_xprt_buf = buffer;
147146
xdr_buf_init(&ctxt->sc_hdrbuf, ctxt->sc_xprt_buf,
148147
rdma->sc_max_req_size);
@@ -228,7 +227,6 @@ static void svc_rdma_send_ctxt_release(struct svcxprt_rdma *rdma,
228227
struct ib_device *device = rdma->sc_cm_id->device;
229228
unsigned int i;
230229

231-
svc_rdma_write_chunk_release(rdma, ctxt);
232230
svc_rdma_reply_chunk_release(rdma, ctxt);
233231

234232
if (ctxt->sc_page_count)
@@ -1015,8 +1013,7 @@ int svc_rdma_sendto(struct svc_rqst *rqstp)
10151013
if (!p)
10161014
goto put_ctxt;
10171015

1018-
ret = svc_rdma_prepare_write_list(rdma, &rctxt->rc_write_pcl, sctxt,
1019-
&rqstp->rq_res);
1016+
ret = svc_rdma_send_write_list(rdma, rctxt, &rqstp->rq_res);
10201017
if (ret < 0)
10211018
goto put_ctxt;
10221019

0 commit comments

Comments
 (0)