Skip to content

Commit d162694

Browse files
metze-sambasmfrench
authored andcommitted
smb: server: let smb_direct_writev() respect SMB_DIRECT_MAX_SEND_SGES
We should not use more sges for ib_post_send() than we told the rdma device in rdma_create_qp()! Otherwise ib_post_send() will return -EINVAL, so we disconnect the connection. Or with the current siw.ko we'll get 0 from ib_post_send(), but will never ever get a completion for the request. I've already sent a fix for siw.ko... So we need to make sure smb_direct_writev() limits the number of vectors we pass to individual smb_direct_post_send_data() calls, so that we don't go over the queue pair limits. Commit 621433b ("ksmbd: smbd: relax the count of sges required") was very strange and I guess only needed because SMB_DIRECT_MAX_SEND_SGES was 8 at that time. It basically removed the check that the rdma device is able to handle the number of sges we try to use. While the real problem was added by commit ddbdc86 ("ksmbd: smbd: introduce read/write credits for RDMA read/write") as it used the minumun of device->attrs.max_send_sge and device->attrs.max_sge_rd, with the problem that device->attrs.max_sge_rd is always 1 for iWarp. And that limitation should only apply to RDMA Read operations. For now we keep that limitation for RDMA Write operations too, fixing that is a task for another day as it's not really required a bug fix. Commit 2b4eeea ("ksmbd: decrease the number of SMB3 smbdirect server SGEs") lowered SMB_DIRECT_MAX_SEND_SGES to 6, which is also used by our client code. And that client code enforces device->attrs.max_send_sge >= 6 since commit d2e81f9 ("Decrease the number of SMB3 smbdirect client SGEs") and (briefly looking) only the i40w driver provides only 3, see I40IW_MAX_WQ_FRAGMENT_COUNT. But currently we'd require 4 anyway, so that would not work anyway, but now it fails early. Cc: Steve French <[email protected]> Cc: Tom Talpey <[email protected]> Cc: Hyunchul Lee <[email protected]> Cc: [email protected] Cc: [email protected] Cc: [email protected] Fixes: 0626e66 ("cifsd: add server handler for central processing and tranport layers") Fixes: ddbdc86 ("ksmbd: smbd: introduce read/write credits for RDMA read/write") Fixes: 621433b ("ksmbd: smbd: relax the count of sges required") Fixes: 2b4eeea ("ksmbd: decrease the number of SMB3 smbdirect server SGEs") Acked-by: Namjae Jeon <[email protected]> Signed-off-by: Stefan Metzmacher <[email protected]> Signed-off-by: Steve French <[email protected]>
1 parent f83ec76 commit d162694

File tree

1 file changed

+107
-50
lines changed

1 file changed

+107
-50
lines changed

fs/smb/server/transport_rdma.c

Lines changed: 107 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1209,78 +1209,130 @@ static int smb_direct_writev(struct ksmbd_transport *t,
12091209
bool need_invalidate, unsigned int remote_key)
12101210
{
12111211
struct smb_direct_transport *st = smb_trans_direct_transfort(t);
1212-
int remaining_data_length;
1213-
int start, i, j;
1214-
int max_iov_size = st->max_send_size -
1212+
size_t remaining_data_length;
1213+
size_t iov_idx;
1214+
size_t iov_ofs;
1215+
size_t max_iov_size = st->max_send_size -
12151216
sizeof(struct smb_direct_data_transfer);
12161217
int ret;
1217-
struct kvec vec;
12181218
struct smb_direct_send_ctx send_ctx;
1219+
int error = 0;
12191220

12201221
if (st->status != SMB_DIRECT_CS_CONNECTED)
12211222
return -ENOTCONN;
12221223

12231224
//FIXME: skip RFC1002 header..
1225+
if (WARN_ON_ONCE(niovs <= 1 || iov[0].iov_len != 4))
1226+
return -EINVAL;
12241227
buflen -= 4;
1228+
iov_idx = 1;
1229+
iov_ofs = 0;
12251230

12261231
remaining_data_length = buflen;
12271232
ksmbd_debug(RDMA, "Sending smb (RDMA): smb_len=%u\n", buflen);
12281233

12291234
smb_direct_send_ctx_init(st, &send_ctx, need_invalidate, remote_key);
1230-
start = i = 1;
1231-
buflen = 0;
1232-
while (true) {
1233-
buflen += iov[i].iov_len;
1234-
if (buflen > max_iov_size) {
1235-
if (i > start) {
1236-
remaining_data_length -=
1237-
(buflen - iov[i].iov_len);
1238-
ret = smb_direct_post_send_data(st, &send_ctx,
1239-
&iov[start], i - start,
1240-
remaining_data_length);
1241-
if (ret)
1235+
while (remaining_data_length) {
1236+
struct kvec vecs[SMB_DIRECT_MAX_SEND_SGES - 1]; /* minus smbdirect hdr */
1237+
size_t possible_bytes = max_iov_size;
1238+
size_t possible_vecs;
1239+
size_t bytes = 0;
1240+
size_t nvecs = 0;
1241+
1242+
/*
1243+
* For the last message remaining_data_length should be
1244+
* have been 0 already!
1245+
*/
1246+
if (WARN_ON_ONCE(iov_idx >= niovs)) {
1247+
error = -EINVAL;
1248+
goto done;
1249+
}
1250+
1251+
/*
1252+
* We have 2 factors which limit the arguments we pass
1253+
* to smb_direct_post_send_data():
1254+
*
1255+
* 1. The number of supported sges for the send,
1256+
* while one is reserved for the smbdirect header.
1257+
* And we currently need one SGE per page.
1258+
* 2. The number of negotiated payload bytes per send.
1259+
*/
1260+
possible_vecs = min_t(size_t, ARRAY_SIZE(vecs), niovs - iov_idx);
1261+
1262+
while (iov_idx < niovs && possible_vecs && possible_bytes) {
1263+
struct kvec *v = &vecs[nvecs];
1264+
int page_count;
1265+
1266+
v->iov_base = ((u8 *)iov[iov_idx].iov_base) + iov_ofs;
1267+
v->iov_len = min_t(size_t,
1268+
iov[iov_idx].iov_len - iov_ofs,
1269+
possible_bytes);
1270+
page_count = get_buf_page_count(v->iov_base, v->iov_len);
1271+
if (page_count > possible_vecs) {
1272+
/*
1273+
* If the number of pages in the buffer
1274+
* is to much (because we currently require
1275+
* one SGE per page), we need to limit the
1276+
* length.
1277+
*
1278+
* We know possible_vecs is at least 1,
1279+
* so we always keep the first page.
1280+
*
1281+
* We need to calculate the number extra
1282+
* pages (epages) we can also keep.
1283+
*
1284+
* We calculate the number of bytes in the
1285+
* first page (fplen), this should never be
1286+
* larger than v->iov_len because page_count is
1287+
* at least 2, but adding a limitation feels
1288+
* better.
1289+
*
1290+
* Then we calculate the number of bytes (elen)
1291+
* we can keep for the extra pages.
1292+
*/
1293+
size_t epages = possible_vecs - 1;
1294+
size_t fpofs = offset_in_page(v->iov_base);
1295+
size_t fplen = min_t(size_t, PAGE_SIZE - fpofs, v->iov_len);
1296+
size_t elen = min_t(size_t, v->iov_len - fplen, epages*PAGE_SIZE);
1297+
1298+
v->iov_len = fplen + elen;
1299+
page_count = get_buf_page_count(v->iov_base, v->iov_len);
1300+
if (WARN_ON_ONCE(page_count > possible_vecs)) {
1301+
/*
1302+
* Something went wrong in the above
1303+
* logic...
1304+
*/
1305+
error = -EINVAL;
12421306
goto done;
1243-
} else {
1244-
/* iov[start] is too big, break it */
1245-
int nvec = (buflen + max_iov_size - 1) /
1246-
max_iov_size;
1247-
1248-
for (j = 0; j < nvec; j++) {
1249-
vec.iov_base =
1250-
(char *)iov[start].iov_base +
1251-
j * max_iov_size;
1252-
vec.iov_len =
1253-
min_t(int, max_iov_size,
1254-
buflen - max_iov_size * j);
1255-
remaining_data_length -= vec.iov_len;
1256-
ret = smb_direct_post_send_data(st, &send_ctx, &vec, 1,
1257-
remaining_data_length);
1258-
if (ret)
1259-
goto done;
12601307
}
1261-
i++;
1262-
if (i == niovs)
1263-
break;
12641308
}
1265-
start = i;
1266-
buflen = 0;
1267-
} else {
1268-
i++;
1269-
if (i == niovs) {
1270-
/* send out all remaining vecs */
1271-
remaining_data_length -= buflen;
1272-
ret = smb_direct_post_send_data(st, &send_ctx,
1273-
&iov[start], i - start,
1274-
remaining_data_length);
1275-
if (ret)
1276-
goto done;
1277-
break;
1309+
possible_vecs -= page_count;
1310+
nvecs += 1;
1311+
possible_bytes -= v->iov_len;
1312+
bytes += v->iov_len;
1313+
1314+
iov_ofs += v->iov_len;
1315+
if (iov_ofs >= iov[iov_idx].iov_len) {
1316+
iov_idx += 1;
1317+
iov_ofs = 0;
12781318
}
12791319
}
1320+
1321+
remaining_data_length -= bytes;
1322+
1323+
ret = smb_direct_post_send_data(st, &send_ctx,
1324+
vecs, nvecs,
1325+
remaining_data_length);
1326+
if (unlikely(ret)) {
1327+
error = ret;
1328+
goto done;
1329+
}
12801330
}
12811331

12821332
done:
12831333
ret = smb_direct_flush_send_list(st, &send_ctx, true);
1334+
if (unlikely(!ret && error))
1335+
ret = error;
12841336

12851337
/*
12861338
* As an optimization, we don't wait for individual I/O to finish
@@ -1744,6 +1796,11 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
17441796
return -EINVAL;
17451797
}
17461798

1799+
if (device->attrs.max_send_sge < SMB_DIRECT_MAX_SEND_SGES) {
1800+
pr_err("warning: device max_send_sge = %d too small\n",
1801+
device->attrs.max_send_sge);
1802+
return -EINVAL;
1803+
}
17471804
if (device->attrs.max_recv_sge < SMB_DIRECT_MAX_RECV_SGES) {
17481805
pr_err("warning: device max_recv_sge = %d too small\n",
17491806
device->attrs.max_recv_sge);
@@ -1767,7 +1824,7 @@ static int smb_direct_init_params(struct smb_direct_transport *t,
17671824

17681825
cap->max_send_wr = max_send_wrs;
17691826
cap->max_recv_wr = t->recv_credit_max;
1770-
cap->max_send_sge = max_sge_per_wr;
1827+
cap->max_send_sge = SMB_DIRECT_MAX_SEND_SGES;
17711828
cap->max_recv_sge = SMB_DIRECT_MAX_RECV_SGES;
17721829
cap->max_inline_data = 0;
17731830
cap->max_rdma_ctxs = t->max_rw_credits;

0 commit comments

Comments
 (0)