Skip to content

Commit ea1bb00

Browse files
zhijianli88jgunthorpe
authored andcommitted
RDMA/rxe: Implement flush execution in responder side
Only the requested placement types that also registered in the destination memory region are acceptable. Otherwise, responder will also reply NAK "Remote Access Error" if it found a placement type violation. We will persist data via arch_wb_cache_pmem(), which could be architecture specific. This commit also adds 2 helpers to update qp.resp from the incoming packet. Link: https://lore.kernel.org/r/[email protected] Reviewed-by: Zhu Yanjun <[email protected]> Signed-off-by: Li Zhijian <[email protected]> Signed-off-by: Jason Gunthorpe <[email protected]>
1 parent fa1fd68 commit ea1bb00

File tree

4 files changed

+183
-20
lines changed

4 files changed

+183
-20
lines changed

drivers/infiniband/sw/rxe/rxe_loc.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ void rxe_mr_init_dma(int access, struct rxe_mr *mr);
6464
int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
6565
int access, struct rxe_mr *mr);
6666
int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr);
67+
int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length);
6768
int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
6869
enum rxe_mr_copy_dir dir);
6970
int copy_data(struct rxe_pd *pd, int access, struct rxe_dma_info *dma,

drivers/infiniband/sw/rxe/rxe_mr.c

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@
44
* Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
55
*/
66

7+
#include <linux/libnvdimm.h>
8+
79
#include "rxe.h"
810
#include "rxe_loc.h"
911

@@ -192,6 +194,7 @@ int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
192194
mr->offset = ib_umem_offset(umem);
193195
mr->state = RXE_MR_STATE_VALID;
194196
mr->ibmr.type = IB_MR_TYPE_USER;
197+
mr->ibmr.page_size = PAGE_SIZE;
195198

196199
return 0;
197200

@@ -295,6 +298,39 @@ void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
295298
return addr;
296299
}
297300

301+
int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, int length)
302+
{
303+
size_t offset;
304+
305+
if (length == 0)
306+
return 0;
307+
308+
if (mr->ibmr.type == IB_MR_TYPE_DMA)
309+
return -EFAULT;
310+
311+
offset = (iova - mr->ibmr.iova + mr->offset) & mr->page_mask;
312+
while (length > 0) {
313+
u8 *va;
314+
int bytes;
315+
316+
bytes = mr->ibmr.page_size - offset;
317+
if (bytes > length)
318+
bytes = length;
319+
320+
va = iova_to_vaddr(mr, iova, length);
321+
if (!va)
322+
return -EFAULT;
323+
324+
arch_wb_cache_pmem(va, bytes);
325+
326+
length -= bytes;
327+
iova += bytes;
328+
offset = 0;
329+
}
330+
331+
return 0;
332+
}
333+
298334
/* copy data from a range (vaddr, vaddr+length-1) to or from
299335
* a mr object starting at iova.
300336
*/

drivers/infiniband/sw/rxe/rxe_resp.c

Lines changed: 140 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ enum resp_states {
2323
RESPST_READ_REPLY,
2424
RESPST_ATOMIC_REPLY,
2525
RESPST_ATOMIC_WRITE_REPLY,
26+
RESPST_PROCESS_FLUSH,
2627
RESPST_COMPLETE,
2728
RESPST_ACKNOWLEDGE,
2829
RESPST_CLEANUP,
@@ -59,6 +60,7 @@ static char *resp_state_name[] = {
5960
[RESPST_READ_REPLY] = "READ_REPLY",
6061
[RESPST_ATOMIC_REPLY] = "ATOMIC_REPLY",
6162
[RESPST_ATOMIC_WRITE_REPLY] = "ATOMIC_WRITE_REPLY",
63+
[RESPST_PROCESS_FLUSH] = "PROCESS_FLUSH",
6264
[RESPST_COMPLETE] = "COMPLETE",
6365
[RESPST_ACKNOWLEDGE] = "ACKNOWLEDGE",
6466
[RESPST_CLEANUP] = "CLEANUP",
@@ -258,19 +260,37 @@ static enum resp_states check_op_seq(struct rxe_qp *qp,
258260
}
259261
}
260262

263+
static bool check_qp_attr_access(struct rxe_qp *qp,
264+
struct rxe_pkt_info *pkt)
265+
{
266+
if (((pkt->mask & RXE_READ_MASK) &&
267+
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
268+
((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
269+
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
270+
((pkt->mask & RXE_ATOMIC_MASK) &&
271+
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC)))
272+
return false;
273+
274+
if (pkt->mask & RXE_FLUSH_MASK) {
275+
u32 flush_type = feth_plt(pkt);
276+
277+
if ((flush_type & IB_FLUSH_GLOBAL &&
278+
!(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_GLOBAL)) ||
279+
(flush_type & IB_FLUSH_PERSISTENT &&
280+
!(qp->attr.qp_access_flags & IB_ACCESS_FLUSH_PERSISTENT)))
281+
return false;
282+
}
283+
284+
return true;
285+
}
286+
261287
static enum resp_states check_op_valid(struct rxe_qp *qp,
262288
struct rxe_pkt_info *pkt)
263289
{
264290
switch (qp_type(qp)) {
265291
case IB_QPT_RC:
266-
if (((pkt->mask & RXE_READ_MASK) &&
267-
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_READ)) ||
268-
((pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) &&
269-
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_WRITE)) ||
270-
((pkt->mask & RXE_ATOMIC_MASK) &&
271-
!(qp->attr.qp_access_flags & IB_ACCESS_REMOTE_ATOMIC))) {
292+
if (!check_qp_attr_access(qp, pkt))
272293
return RESPST_ERR_UNSUPPORTED_OPCODE;
273-
}
274294

275295
break;
276296

@@ -437,6 +457,23 @@ static enum resp_states rxe_resp_check_length(struct rxe_qp *qp,
437457
return RESPST_CHK_RKEY;
438458
}
439459

460+
static void qp_resp_from_reth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
461+
{
462+
qp->resp.va = reth_va(pkt);
463+
qp->resp.offset = 0;
464+
qp->resp.rkey = reth_rkey(pkt);
465+
qp->resp.resid = reth_len(pkt);
466+
qp->resp.length = reth_len(pkt);
467+
}
468+
469+
static void qp_resp_from_atmeth(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
470+
{
471+
qp->resp.va = atmeth_va(pkt);
472+
qp->resp.offset = 0;
473+
qp->resp.rkey = atmeth_rkey(pkt);
474+
qp->resp.resid = sizeof(u64);
475+
}
476+
440477
static enum resp_states check_rkey(struct rxe_qp *qp,
441478
struct rxe_pkt_info *pkt)
442479
{
@@ -448,23 +485,26 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
448485
u32 pktlen;
449486
int mtu = qp->mtu;
450487
enum resp_states state;
451-
int access;
488+
int access = 0;
452489

453490
if (pkt->mask & (RXE_READ_OR_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
454-
if (pkt->mask & RXE_RETH_MASK) {
455-
qp->resp.va = reth_va(pkt);
456-
qp->resp.offset = 0;
457-
qp->resp.rkey = reth_rkey(pkt);
458-
qp->resp.resid = reth_len(pkt);
459-
qp->resp.length = reth_len(pkt);
460-
}
491+
if (pkt->mask & RXE_RETH_MASK)
492+
qp_resp_from_reth(qp, pkt);
493+
461494
access = (pkt->mask & RXE_READ_MASK) ? IB_ACCESS_REMOTE_READ
462495
: IB_ACCESS_REMOTE_WRITE;
496+
} else if (pkt->mask & RXE_FLUSH_MASK) {
497+
u32 flush_type = feth_plt(pkt);
498+
499+
if (pkt->mask & RXE_RETH_MASK)
500+
qp_resp_from_reth(qp, pkt);
501+
502+
if (flush_type & IB_FLUSH_GLOBAL)
503+
access |= IB_ACCESS_FLUSH_GLOBAL;
504+
if (flush_type & IB_FLUSH_PERSISTENT)
505+
access |= IB_ACCESS_FLUSH_PERSISTENT;
463506
} else if (pkt->mask & RXE_ATOMIC_MASK) {
464-
qp->resp.va = atmeth_va(pkt);
465-
qp->resp.offset = 0;
466-
qp->resp.rkey = atmeth_rkey(pkt);
467-
qp->resp.resid = sizeof(u64);
507+
qp_resp_from_atmeth(qp, pkt);
468508
access = IB_ACCESS_REMOTE_ATOMIC;
469509
} else {
470510
return RESPST_EXECUTE;
@@ -511,11 +551,20 @@ static enum resp_states check_rkey(struct rxe_qp *qp,
511551
}
512552
}
513553

554+
if (pkt->mask & RXE_FLUSH_MASK) {
555+
/* FLUSH MR may not set va or resid
556+
* no need to check range since we will flush whole mr
557+
*/
558+
if (feth_sel(pkt) == IB_FLUSH_MR)
559+
goto skip_check_range;
560+
}
561+
514562
if (mr_check_range(mr, va + qp->resp.offset, resid)) {
515563
state = RESPST_ERR_RKEY_VIOLATION;
516564
goto err;
517565
}
518566

567+
skip_check_range:
519568
if (pkt->mask & (RXE_WRITE_MASK | RXE_ATOMIC_WRITE_MASK)) {
520569
if (resid > mtu) {
521570
if (pktlen != mtu || bth_pad(pkt)) {
@@ -621,11 +670,61 @@ static struct resp_res *rxe_prepare_res(struct rxe_qp *qp,
621670
res->last_psn = pkt->psn;
622671
res->cur_psn = pkt->psn;
623672
break;
673+
case RXE_FLUSH_MASK:
674+
res->flush.va = qp->resp.va + qp->resp.offset;
675+
res->flush.length = qp->resp.length;
676+
res->flush.type = feth_plt(pkt);
677+
res->flush.level = feth_sel(pkt);
624678
}
625679

626680
return res;
627681
}
628682

683+
static enum resp_states process_flush(struct rxe_qp *qp,
684+
struct rxe_pkt_info *pkt)
685+
{
686+
u64 length, start;
687+
struct rxe_mr *mr = qp->resp.mr;
688+
struct resp_res *res = qp->resp.res;
689+
690+
/* oA19-14, oA19-15 */
691+
if (res && res->replay)
692+
return RESPST_ACKNOWLEDGE;
693+
else if (!res) {
694+
res = rxe_prepare_res(qp, pkt, RXE_FLUSH_MASK);
695+
qp->resp.res = res;
696+
}
697+
698+
if (res->flush.level == IB_FLUSH_RANGE) {
699+
start = res->flush.va;
700+
length = res->flush.length;
701+
} else { /* level == IB_FLUSH_MR */
702+
start = mr->ibmr.iova;
703+
length = mr->ibmr.length;
704+
}
705+
706+
if (res->flush.type & IB_FLUSH_PERSISTENT) {
707+
if (rxe_flush_pmem_iova(mr, start, length))
708+
return RESPST_ERR_RKEY_VIOLATION;
709+
/* Make data persistent. */
710+
wmb();
711+
} else if (res->flush.type & IB_FLUSH_GLOBAL) {
712+
/* Make data global visibility. */
713+
wmb();
714+
}
715+
716+
qp->resp.msn++;
717+
718+
/* next expected psn, read handles this separately */
719+
qp->resp.psn = (pkt->psn + 1) & BTH_PSN_MASK;
720+
qp->resp.ack_psn = qp->resp.psn;
721+
722+
qp->resp.opcode = pkt->opcode;
723+
qp->resp.status = IB_WC_SUCCESS;
724+
725+
return RESPST_ACKNOWLEDGE;
726+
}
727+
629728
/* Guarantee atomicity of atomic operations at the machine level. */
630729
static DEFINE_SPINLOCK(atomic_ops_lock);
631730

@@ -980,6 +1079,8 @@ static enum resp_states execute(struct rxe_qp *qp, struct rxe_pkt_info *pkt)
9801079
return RESPST_ATOMIC_REPLY;
9811080
} else if (pkt->mask & RXE_ATOMIC_WRITE_MASK) {
9821081
return RESPST_ATOMIC_WRITE_REPLY;
1082+
} else if (pkt->mask & RXE_FLUSH_MASK) {
1083+
return RESPST_PROCESS_FLUSH;
9831084
} else {
9841085
/* Unreachable */
9851086
WARN_ON_ONCE(1);
@@ -1176,7 +1277,7 @@ static enum resp_states acknowledge(struct rxe_qp *qp,
11761277
send_ack(qp, qp->resp.aeth_syndrome, pkt->psn);
11771278
else if (pkt->mask & RXE_ATOMIC_MASK)
11781279
send_atomic_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
1179-
else if (pkt->mask & RXE_ATOMIC_WRITE_MASK)
1280+
else if (pkt->mask & (RXE_FLUSH_MASK | RXE_ATOMIC_WRITE_MASK))
11801281
send_read_response_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
11811282
else if (bth_ack(pkt))
11821283
send_ack(qp, AETH_ACK_UNLIMITED, pkt->psn);
@@ -1234,6 +1335,22 @@ static enum resp_states duplicate_request(struct rxe_qp *qp,
12341335
/* SEND. Ack again and cleanup. C9-105. */
12351336
send_ack(qp, AETH_ACK_UNLIMITED, prev_psn);
12361337
return RESPST_CLEANUP;
1338+
} else if (pkt->mask & RXE_FLUSH_MASK) {
1339+
struct resp_res *res;
1340+
1341+
/* Find the operation in our list of responder resources. */
1342+
res = find_resource(qp, pkt->psn);
1343+
if (res) {
1344+
res->replay = 1;
1345+
res->cur_psn = pkt->psn;
1346+
qp->resp.res = res;
1347+
rc = RESPST_PROCESS_FLUSH;
1348+
goto out;
1349+
}
1350+
1351+
/* Resource not found. Class D error. Drop the request. */
1352+
rc = RESPST_CLEANUP;
1353+
goto out;
12371354
} else if (pkt->mask & RXE_READ_MASK) {
12381355
struct resp_res *res;
12391356

@@ -1431,6 +1548,9 @@ int rxe_responder(void *arg)
14311548
case RESPST_ATOMIC_WRITE_REPLY:
14321549
state = atomic_write_reply(qp, pkt);
14331550
break;
1551+
case RESPST_PROCESS_FLUSH:
1552+
state = process_flush(qp, pkt);
1553+
break;
14341554
case RESPST_ACKNOWLEDGE:
14351555
state = acknowledge(qp, pkt);
14361556
break;

drivers/infiniband/sw/rxe/rxe_verbs.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,12 @@ struct resp_res {
165165
u64 va;
166166
u32 resid;
167167
} read;
168+
struct {
169+
u32 length;
170+
u64 va;
171+
u8 type;
172+
u8 level;
173+
} flush;
168174
};
169175
};
170176

0 commit comments

Comments
 (0)