Skip to content

Commit 0bd73ae

Browse files
metze-sambasmfrench
authored andcommitted
smb: server: allocate enough space for RW WRs and ib_drain_qp()
Make use of rdma_rw_mr_factor() to calculate the number of rw credits and the number of pages per RDMA RW operation. We get the same numbers for iWarp connections, tested with siw.ko and irdma.ko (in iWarp mode). siw: CIFS: max_qp_rd_atom=128, max_fast_reg_page_list_len = 256 CIFS: max_sgl_rd=0, max_sge_rd=1 CIFS: responder_resources=32 max_frmr_depth=256 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 3276800 max_qp_wr 32768 ksmbd: max_fast_reg_page_list_len = 256, max_sgl_rd=0, max_sge_rd=1 ksmbd: device reporting max_cqe 3276800 max_qp_wr 32768 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 9, num_pages = 256, maxpages=2048 ksmbd: Info: rdma_send_wr 27 + max_send_wr 256 = 283 irdma (in iWarp mode): CIFS: max_qp_rd_atom=127, max_fast_reg_page_list_len = 262144 CIFS: max_sgl_rd=0, max_sge_rd=13 CIFS: responder_resources=32 max_frmr_depth=2048 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: max_fast_reg_page_list_len = 262144, max_sgl_rd=0, max_sge_rd=13 ksmbd: device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 9, num_pages = 256, maxpages=2048 ksmbd: rdma_send_wr 27 + max_send_wr 256 = 283 This means that we get the different correct numbers for ROCE, tested with rdma_rxe.ko and irdma.ko (in RoCEv2 mode). rxe: CIFS: max_qp_rd_atom=128, max_fast_reg_page_list_len = 512 CIFS: max_sgl_rd=0, max_sge_rd=32 CIFS: responder_resources=32 max_frmr_depth=512 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 32767 max_qp_wr 1048576 ksmbd: max_fast_reg_page_list_len = 512, max_sgl_rd=0, max_sge_rd=32 ksmbd: device reporting max_cqe 32767 max_qp_wr 1048576 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256 ksmbd: New sc->rw_io.credits: max = 65, num_pages = 32, maxpages=2048 ksmbd: rdma_send_wr 65 + max_send_wr 256 = 321 irdma (in RoCEv2 mode): CIFS: max_qp_rd_atom=127, max_fast_reg_page_list_len = 262144, CIFS: max_sgl_rd=0, max_sge_rd=13 CIFS: responder_resources=32 max_frmr_depth=2048 mr_io.type=0 CIFS: max_send_wr 384, device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: max_fast_reg_page_list_len = 262144, max_sgl_rd=0, max_sge_rd=13 ksmbd: device reporting max_cqe 1048574 max_qp_wr 4063 ksmbd: Old sc->rw_io.credits: max = 9, num_pages = 256, ksmbd: New sc->rw_io.credits: max = 159, num_pages = 13, maxpages=2048 ksmbd: rdma_send_wr 159 + max_send_wr 256 = 415 And rely on rdma_rw_init_qp() to setup ib_mr_pool_init() for RW MRs. ib_mr_pool_destroy() will be called by rdma_rw_cleanup_mrs(). It seems the code was implemented before the rdma_rw_* layer was fully established in the kernel. While there also add additional space for ib_drain_qp(). This should make sure ib_post_send() will never fail because the submission queue is full. Fixes: ddbdc86 ("ksmbd: smbd: introduce read/write credits for RDMA read/write") Fixes: 4c564f0 ("smb: server: make use of common smbdirect_socket") Fixes: 177368b ("smb: server: make use of common smbdirect_socket_parameters") Fixes: 95475d8 ("smb: server: make use smbdirect_socket.rw_io.credits") Cc: Steve French <[email protected]> Cc: Tom Talpey <[email protected]> Cc: [email protected] Cc: [email protected] Signed-off-by: Stefan Metzmacher <[email protected]> Acked-by: Namjae Jeon <[email protected]> Signed-off-by: Steve French <[email protected]>
1 parent 211ddde commit 0bd73ae

File tree

1 file changed

+142
-91
lines changed

1 file changed

+142
-91
lines changed

fs/smb/server/transport_rdma.c

Lines changed: 142 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -471,7 +471,6 @@ static void free_transport(struct smb_direct_transport *t)
471471

472472
if (sc->ib.qp) {
473473
ib_drain_qp(sc->ib.qp);
474-
ib_mr_pool_destroy(sc->ib.qp, &sc->ib.qp->rdma_mrs);
475474
sc->ib.qp = NULL;
476475
rdma_destroy_qp(sc->rdma.cm_id);
477476
}
@@ -1871,20 +1870,11 @@ static int smb_direct_prepare_negotiation(struct smbdirect_socket *sc)
18711870
return ret;
18721871
}
18731872

1874-
static unsigned int smb_direct_get_max_fr_pages(struct smbdirect_socket *sc)
1875-
{
1876-
return min_t(unsigned int,
1877-
sc->ib.dev->attrs.max_fast_reg_page_list_len,
1878-
256);
1879-
}
1880-
1881-
static int smb_direct_init_params(struct smbdirect_socket *sc,
1882-
struct ib_qp_cap *cap)
1873+
static int smb_direct_init_params(struct smbdirect_socket *sc)
18831874
{
18841875
struct smbdirect_socket_parameters *sp = &sc->parameters;
1885-
struct ib_device *device = sc->ib.dev;
1886-
int max_send_sges, max_rw_wrs, max_send_wrs;
1887-
unsigned int max_sge_per_wr, wrs_per_credit;
1876+
int max_send_sges;
1877+
unsigned int maxpages;
18881878

18891879
/* need 3 more sge. because a SMB_DIRECT header, SMB2 header,
18901880
* SMB2 response could be mapped.
@@ -1895,67 +1885,18 @@ static int smb_direct_init_params(struct smbdirect_socket *sc,
18951885
return -EINVAL;
18961886
}
18971887

1898-
/* Calculate the number of work requests for RDMA R/W.
1899-
* The maximum number of pages which can be registered
1900-
* with one Memory region can be transferred with one
1901-
* R/W credit. And at least 4 work requests for each credit
1902-
* are needed for MR registration, RDMA R/W, local & remote
1903-
* MR invalidation.
1904-
*/
1905-
sc->rw_io.credits.num_pages = smb_direct_get_max_fr_pages(sc);
1906-
sc->rw_io.credits.max = DIV_ROUND_UP(sp->max_read_write_size,
1907-
(sc->rw_io.credits.num_pages - 1) *
1908-
PAGE_SIZE);
1909-
1910-
max_sge_per_wr = min_t(unsigned int, device->attrs.max_send_sge,
1911-
device->attrs.max_sge_rd);
1912-
max_sge_per_wr = max_t(unsigned int, max_sge_per_wr,
1913-
max_send_sges);
1914-
wrs_per_credit = max_t(unsigned int, 4,
1915-
DIV_ROUND_UP(sc->rw_io.credits.num_pages,
1916-
max_sge_per_wr) + 1);
1917-
max_rw_wrs = sc->rw_io.credits.max * wrs_per_credit;
1918-
1919-
max_send_wrs = sp->send_credit_target + max_rw_wrs;
1920-
if (max_send_wrs > device->attrs.max_cqe ||
1921-
max_send_wrs > device->attrs.max_qp_wr) {
1922-
pr_err("consider lowering send_credit_target = %d\n",
1923-
sp->send_credit_target);
1924-
pr_err("Possible CQE overrun, device reporting max_cqe %d max_qp_wr %d\n",
1925-
device->attrs.max_cqe, device->attrs.max_qp_wr);
1926-
return -EINVAL;
1927-
}
1928-
1929-
if (sp->recv_credit_max > device->attrs.max_cqe ||
1930-
sp->recv_credit_max > device->attrs.max_qp_wr) {
1931-
pr_err("consider lowering receive_credit_max = %d\n",
1932-
sp->recv_credit_max);
1933-
pr_err("Possible CQE overrun, device reporting max_cpe %d max_qp_wr %d\n",
1934-
device->attrs.max_cqe, device->attrs.max_qp_wr);
1935-
return -EINVAL;
1936-
}
1937-
1938-
if (device->attrs.max_send_sge < SMBDIRECT_SEND_IO_MAX_SGE) {
1939-
pr_err("warning: device max_send_sge = %d too small\n",
1940-
device->attrs.max_send_sge);
1941-
return -EINVAL;
1942-
}
1943-
if (device->attrs.max_recv_sge < SMBDIRECT_RECV_IO_MAX_SGE) {
1944-
pr_err("warning: device max_recv_sge = %d too small\n",
1945-
device->attrs.max_recv_sge);
1946-
return -EINVAL;
1947-
}
1888+
maxpages = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE);
1889+
sc->rw_io.credits.max = rdma_rw_mr_factor(sc->ib.dev,
1890+
sc->rdma.cm_id->port_num,
1891+
maxpages);
1892+
sc->rw_io.credits.num_pages = DIV_ROUND_UP(maxpages, sc->rw_io.credits.max);
1893+
/* add one extra in order to handle unaligned pages */
1894+
sc->rw_io.credits.max += 1;
19481895

19491896
sc->recv_io.credits.target = 1;
19501897

19511898
atomic_set(&sc->rw_io.credits.count, sc->rw_io.credits.max);
19521899

1953-
cap->max_send_wr = max_send_wrs;
1954-
cap->max_recv_wr = sp->recv_credit_max;
1955-
cap->max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE;
1956-
cap->max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE;
1957-
cap->max_inline_data = 0;
1958-
cap->max_rdma_ctxs = sc->rw_io.credits.max;
19591900
return 0;
19601901
}
19611902

@@ -2029,13 +1970,129 @@ static int smb_direct_create_pools(struct smbdirect_socket *sc)
20291970
return -ENOMEM;
20301971
}
20311972

2032-
static int smb_direct_create_qpair(struct smbdirect_socket *sc,
2033-
struct ib_qp_cap *cap)
1973+
static u32 smb_direct_rdma_rw_send_wrs(struct ib_device *dev, const struct ib_qp_init_attr *attr)
1974+
{
1975+
/*
1976+
* This could be split out of rdma_rw_init_qp()
1977+
* and be a helper function next to rdma_rw_mr_factor()
1978+
*
1979+
* We can't check unlikely(rdma_rw_force_mr) here,
1980+
* but that is most likely 0 anyway.
1981+
*/
1982+
u32 factor;
1983+
1984+
WARN_ON_ONCE(attr->port_num == 0);
1985+
1986+
/*
1987+
* Each context needs at least one RDMA READ or WRITE WR.
1988+
*
1989+
* For some hardware we might need more, eventually we should ask the
1990+
* HCA driver for a multiplier here.
1991+
*/
1992+
factor = 1;
1993+
1994+
/*
1995+
* If the device needs MRs to perform RDMA READ or WRITE operations,
1996+
* we'll need two additional MRs for the registrations and the
1997+
* invalidation.
1998+
*/
1999+
if (rdma_protocol_iwarp(dev, attr->port_num) || dev->attrs.max_sgl_rd)
2000+
factor += 2; /* inv + reg */
2001+
2002+
return factor * attr->cap.max_rdma_ctxs;
2003+
}
2004+
2005+
static int smb_direct_create_qpair(struct smbdirect_socket *sc)
20342006
{
20352007
struct smbdirect_socket_parameters *sp = &sc->parameters;
20362008
int ret;
2009+
struct ib_qp_cap qp_cap;
20372010
struct ib_qp_init_attr qp_attr;
2038-
int pages_per_rw;
2011+
u32 max_send_wr;
2012+
u32 rdma_send_wr;
2013+
2014+
/*
2015+
* Note that {rdma,ib}_create_qp() will call
2016+
* rdma_rw_init_qp() if cap->max_rdma_ctxs is not 0.
2017+
* It will adjust cap->max_send_wr to the required
2018+
* number of additional WRs for the RDMA RW operations.
2019+
* It will cap cap->max_send_wr to the device limit.
2020+
*
2021+
* +1 for ib_drain_qp
2022+
*/
2023+
qp_cap.max_send_wr = sp->send_credit_target + 1;
2024+
qp_cap.max_recv_wr = sp->recv_credit_max + 1;
2025+
qp_cap.max_send_sge = SMBDIRECT_SEND_IO_MAX_SGE;
2026+
qp_cap.max_recv_sge = SMBDIRECT_RECV_IO_MAX_SGE;
2027+
qp_cap.max_inline_data = 0;
2028+
qp_cap.max_rdma_ctxs = sc->rw_io.credits.max;
2029+
2030+
/*
2031+
* Find out the number of max_send_wr
2032+
* after rdma_rw_init_qp() adjusted it.
2033+
*
2034+
* We only do it on a temporary variable,
2035+
* as rdma_create_qp() will trigger
2036+
* rdma_rw_init_qp() again.
2037+
*/
2038+
memset(&qp_attr, 0, sizeof(qp_attr));
2039+
qp_attr.cap = qp_cap;
2040+
qp_attr.port_num = sc->rdma.cm_id->port_num;
2041+
rdma_send_wr = smb_direct_rdma_rw_send_wrs(sc->ib.dev, &qp_attr);
2042+
max_send_wr = qp_cap.max_send_wr + rdma_send_wr;
2043+
2044+
if (qp_cap.max_send_wr > sc->ib.dev->attrs.max_cqe ||
2045+
qp_cap.max_send_wr > sc->ib.dev->attrs.max_qp_wr) {
2046+
pr_err("Possible CQE overrun: max_send_wr %d\n",
2047+
qp_cap.max_send_wr);
2048+
pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
2049+
IB_DEVICE_NAME_MAX,
2050+
sc->ib.dev->name,
2051+
sc->ib.dev->attrs.max_cqe,
2052+
sc->ib.dev->attrs.max_qp_wr);
2053+
pr_err("consider lowering send_credit_target = %d\n",
2054+
sp->send_credit_target);
2055+
return -EINVAL;
2056+
}
2057+
2058+
if (qp_cap.max_rdma_ctxs &&
2059+
(max_send_wr >= sc->ib.dev->attrs.max_cqe ||
2060+
max_send_wr >= sc->ib.dev->attrs.max_qp_wr)) {
2061+
pr_err("Possible CQE overrun: rdma_send_wr %d + max_send_wr %d = %d\n",
2062+
rdma_send_wr, qp_cap.max_send_wr, max_send_wr);
2063+
pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
2064+
IB_DEVICE_NAME_MAX,
2065+
sc->ib.dev->name,
2066+
sc->ib.dev->attrs.max_cqe,
2067+
sc->ib.dev->attrs.max_qp_wr);
2068+
pr_err("consider lowering send_credit_target = %d, max_rdma_ctxs = %d\n",
2069+
sp->send_credit_target, qp_cap.max_rdma_ctxs);
2070+
return -EINVAL;
2071+
}
2072+
2073+
if (qp_cap.max_recv_wr > sc->ib.dev->attrs.max_cqe ||
2074+
qp_cap.max_recv_wr > sc->ib.dev->attrs.max_qp_wr) {
2075+
pr_err("Possible CQE overrun: max_recv_wr %d\n",
2076+
qp_cap.max_recv_wr);
2077+
pr_err("device %.*s reporting max_cqe %d max_qp_wr %d\n",
2078+
IB_DEVICE_NAME_MAX,
2079+
sc->ib.dev->name,
2080+
sc->ib.dev->attrs.max_cqe,
2081+
sc->ib.dev->attrs.max_qp_wr);
2082+
pr_err("consider lowering receive_credit_max = %d\n",
2083+
sp->recv_credit_max);
2084+
return -EINVAL;
2085+
}
2086+
2087+
if (qp_cap.max_send_sge > sc->ib.dev->attrs.max_send_sge ||
2088+
qp_cap.max_recv_sge > sc->ib.dev->attrs.max_recv_sge) {
2089+
pr_err("device %.*s max_send_sge/max_recv_sge = %d/%d too small\n",
2090+
IB_DEVICE_NAME_MAX,
2091+
sc->ib.dev->name,
2092+
sc->ib.dev->attrs.max_send_sge,
2093+
sc->ib.dev->attrs.max_recv_sge);
2094+
return -EINVAL;
2095+
}
20392096

20402097
sc->ib.pd = ib_alloc_pd(sc->ib.dev, 0);
20412098
if (IS_ERR(sc->ib.pd)) {
@@ -2046,8 +2103,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc,
20462103
}
20472104

20482105
sc->ib.send_cq = ib_alloc_cq_any(sc->ib.dev, sc,
2049-
sp->send_credit_target +
2050-
cap->max_rdma_ctxs,
2106+
max_send_wr,
20512107
IB_POLL_WORKQUEUE);
20522108
if (IS_ERR(sc->ib.send_cq)) {
20532109
pr_err("Can't create RDMA send CQ\n");
@@ -2057,7 +2113,7 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc,
20572113
}
20582114

20592115
sc->ib.recv_cq = ib_alloc_cq_any(sc->ib.dev, sc,
2060-
sp->recv_credit_max,
2116+
qp_cap.max_recv_wr,
20612117
IB_POLL_WORKQUEUE);
20622118
if (IS_ERR(sc->ib.recv_cq)) {
20632119
pr_err("Can't create RDMA recv CQ\n");
@@ -2066,10 +2122,18 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc,
20662122
goto err;
20672123
}
20682124

2125+
/*
2126+
* We reset completely here!
2127+
* As the above use was just temporary
2128+
* to calc max_send_wr and rdma_send_wr.
2129+
*
2130+
* rdma_create_qp() will trigger rdma_rw_init_qp()
2131+
* again if max_rdma_ctxs is not 0.
2132+
*/
20692133
memset(&qp_attr, 0, sizeof(qp_attr));
20702134
qp_attr.event_handler = smb_direct_qpair_handler;
20712135
qp_attr.qp_context = sc;
2072-
qp_attr.cap = *cap;
2136+
qp_attr.cap = qp_cap;
20732137
qp_attr.sq_sig_type = IB_SIGNAL_REQ_WR;
20742138
qp_attr.qp_type = IB_QPT_RC;
20752139
qp_attr.send_cq = sc->ib.send_cq;
@@ -2085,18 +2149,6 @@ static int smb_direct_create_qpair(struct smbdirect_socket *sc,
20852149
sc->ib.qp = sc->rdma.cm_id->qp;
20862150
sc->rdma.cm_id->event_handler = smb_direct_cm_handler;
20872151

2088-
pages_per_rw = DIV_ROUND_UP(sp->max_read_write_size, PAGE_SIZE) + 1;
2089-
if (pages_per_rw > sc->ib.dev->attrs.max_sgl_rd) {
2090-
ret = ib_mr_pool_init(sc->ib.qp, &sc->ib.qp->rdma_mrs,
2091-
sc->rw_io.credits.max, IB_MR_TYPE_MEM_REG,
2092-
sc->rw_io.credits.num_pages, 0);
2093-
if (ret) {
2094-
pr_err("failed to init mr pool count %zu pages %zu\n",
2095-
sc->rw_io.credits.max, sc->rw_io.credits.num_pages);
2096-
goto err;
2097-
}
2098-
}
2099-
21002152
return 0;
21012153
err:
21022154
if (sc->ib.qp) {
@@ -2183,10 +2235,9 @@ static int smb_direct_prepare(struct ksmbd_transport *t)
21832235

21842236
static int smb_direct_connect(struct smbdirect_socket *sc)
21852237
{
2186-
struct ib_qp_cap qp_cap;
21872238
int ret;
21882239

2189-
ret = smb_direct_init_params(sc, &qp_cap);
2240+
ret = smb_direct_init_params(sc);
21902241
if (ret) {
21912242
pr_err("Can't configure RDMA parameters\n");
21922243
return ret;
@@ -2198,7 +2249,7 @@ static int smb_direct_connect(struct smbdirect_socket *sc)
21982249
return ret;
21992250
}
22002251

2201-
ret = smb_direct_create_qpair(sc, &qp_cap);
2252+
ret = smb_direct_create_qpair(sc);
22022253
if (ret) {
22032254
pr_err("Can't accept RDMA client: %d\n", ret);
22042255
return ret;

0 commit comments

Comments
 (0)