@@ -208,6 +208,9 @@ struct svc_rdma_send_ctxt *svc_rdma_send_ctxt_get(struct svcxprt_rdma *rdma)
208208 ctxt -> sc_send_wr .num_sge = 0 ;
209209 ctxt -> sc_cur_sge_no = 0 ;
210210 ctxt -> sc_page_count = 0 ;
211+ ctxt -> sc_wr_chain = & ctxt -> sc_send_wr ;
212+ ctxt -> sc_sqecount = 1 ;
213+
211214 return ctxt ;
212215
213216out_empty :
@@ -293,7 +296,7 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
293296 struct svc_rdma_send_ctxt * ctxt =
294297 container_of (cqe , struct svc_rdma_send_ctxt , sc_cqe );
295298
296- svc_rdma_wake_send_waiters (rdma , 1 );
299+ svc_rdma_wake_send_waiters (rdma , ctxt -> sc_sqecount );
297300
298301 if (unlikely (wc -> status != IB_WC_SUCCESS ))
299302 goto flushed ;
@@ -312,36 +315,44 @@ static void svc_rdma_wc_send(struct ib_cq *cq, struct ib_wc *wc)
312315}
313316
314317/**
315- * svc_rdma_send - Post a single Send WR
316- * @rdma: transport on which to post the WR
317- * @ctxt: send ctxt with a Send WR ready to post
318+ * svc_rdma_post_send - Post a WR chain to the Send Queue
319+ * @rdma: transport context
320+ * @ctxt: WR chain to post
318321 *
319322 * Copy fields in @ctxt to stack variables in order to guarantee
320323 * that these values remain available after the ib_post_send() call.
321324 * In some error flow cases, svc_rdma_wc_send() releases @ctxt.
322325 *
326+ * Note there is potential for starvation when the Send Queue is
327+ * full because there is no order to when waiting threads are
328+ * awoken. The transport is typically provisioned with a deep
329+ * enough Send Queue that SQ exhaustion should be a rare event.
330+ *
323331 * Return values:
324332 * %0: @ctxt's WR chain was posted successfully
325333 * %-ENOTCONN: The connection was lost
326334 */
327- int svc_rdma_send (struct svcxprt_rdma * rdma , struct svc_rdma_send_ctxt * ctxt )
335+ int svc_rdma_post_send (struct svcxprt_rdma * rdma ,
336+ struct svc_rdma_send_ctxt * ctxt )
328337{
329- struct ib_send_wr * wr = & ctxt -> sc_send_wr ;
338+ struct ib_send_wr * first_wr = ctxt -> sc_wr_chain ;
339+ struct ib_send_wr * send_wr = & ctxt -> sc_send_wr ;
340+ const struct ib_send_wr * bad_wr = first_wr ;
330341 struct rpc_rdma_cid cid = ctxt -> sc_cid ;
331- int ret ;
342+ int ret , sqecount = ctxt -> sc_sqecount ;
332343
333344 might_sleep ();
334345
335346 /* Sync the transport header buffer */
336347 ib_dma_sync_single_for_device (rdma -> sc_pd -> device ,
337- wr -> sg_list [0 ].addr ,
338- wr -> sg_list [0 ].length ,
348+ send_wr -> sg_list [0 ].addr ,
349+ send_wr -> sg_list [0 ].length ,
339350 DMA_TO_DEVICE );
340351
341352 /* If the SQ is full, wait until an SQ entry is available */
342353 while (!test_bit (XPT_CLOSE , & rdma -> sc_xprt .xpt_flags )) {
343- if (( atomic_dec_return ( & rdma -> sc_sq_avail ) < 0 ) ) {
344- svc_rdma_wake_send_waiters (rdma , 1 );
354+ if (atomic_sub_return ( sqecount , & rdma -> sc_sq_avail ) < 0 ) {
355+ svc_rdma_wake_send_waiters (rdma , sqecount );
345356
346357 /* When the transport is torn down, assume
347358 * ib_drain_sq() will trigger enough Send
@@ -358,12 +369,18 @@ int svc_rdma_send(struct svcxprt_rdma *rdma, struct svc_rdma_send_ctxt *ctxt)
358369 }
359370
360371 trace_svcrdma_post_send (ctxt );
361- ret = ib_post_send (rdma -> sc_qp , wr , NULL );
372+ ret = ib_post_send (rdma -> sc_qp , first_wr , & bad_wr );
362373 if (ret ) {
363374 trace_svcrdma_sq_post_err (rdma , & cid , ret );
364375 svc_xprt_deferred_close (& rdma -> sc_xprt );
365- svc_rdma_wake_send_waiters (rdma , 1 );
366- break ;
376+
377+ /* If even one WR was posted, there will be a
378+ * Send completion that bumps sc_sq_avail.
379+ */
380+ if (bad_wr == first_wr ) {
381+ svc_rdma_wake_send_waiters (rdma , sqecount );
382+ break ;
383+ }
367384 }
368385 return 0 ;
369386 }
@@ -884,7 +901,7 @@ static int svc_rdma_send_reply_msg(struct svcxprt_rdma *rdma,
884901 sctxt -> sc_send_wr .opcode = IB_WR_SEND ;
885902 }
886903
887- return svc_rdma_send (rdma , sctxt );
904+ return svc_rdma_post_send (rdma , sctxt );
888905}
889906
890907/**
@@ -948,7 +965,7 @@ void svc_rdma_send_error_msg(struct svcxprt_rdma *rdma,
948965 sctxt -> sc_send_wr .num_sge = 1 ;
949966 sctxt -> sc_send_wr .opcode = IB_WR_SEND ;
950967 sctxt -> sc_sges [0 ].length = sctxt -> sc_hdrbuf .len ;
951- if (svc_rdma_send (rdma , sctxt ))
968+ if (svc_rdma_post_send (rdma , sctxt ))
952969 goto put_ctxt ;
953970 return ;
954971
0 commit comments