Skip to content

Commit 5f3c75d

Browse files
UCT/MLX5: Add DEVX QP/CQ on foreign memory support
1 parent 03ccf73 commit 5f3c75d

File tree

4 files changed

+227
-89
lines changed

4 files changed

+227
-89
lines changed

src/uct/ib/mlx5/dc/dc_mlx5.c

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1831,17 +1831,18 @@ void uct_dc_mlx5_iface_reset_dci(uct_dc_mlx5_iface_t *iface,
18311831

18321832
status = uct_ib_mlx5_modify_qp_state(&iface->super.super.super,
18331833
&txwq->super, IBV_QPS_RESET);
1834+
if (status != UCS_OK) {
1835+
ucs_fatal("iface %p failed to reset dci[%d] qpn 0x%x: %s",
1836+
iface, dci_index, txwq->super.qp_num,
1837+
ucs_status_string(status));
1838+
}
18341839

18351840
uct_rc_mlx5_iface_commom_clean(&iface->super.cq[UCT_IB_DIR_TX], NULL,
18361841
txwq->super.qp_num);
18371842

18381843
/* Resume posting from to the beginning of the QP */
18391844
uct_ib_mlx5_txwq_reset(txwq);
1840-
if (status != UCS_OK) {
1841-
ucs_fatal("iface %p failed to reset dci[%d] qpn 0x%x: %s",
1842-
iface, dci_index, txwq->super.qp_num,
1843-
ucs_status_string(status));
1844-
}
1845+
uct_ib_mlx5_init_wq_buf(txwq);
18451846

18461847
status = uct_dc_mlx5_iface_dci_connect(iface, dci);
18471848
if (status != UCS_OK) {

src/uct/ib/mlx5/dv/ib_mlx5_dv.c

Lines changed: 139 additions & 75 deletions
Original file line numberDiff line numberDiff line change
@@ -124,12 +124,12 @@ void uct_ib_mlx5dv_qp_init_attr(uct_ib_qp_init_attr_t *qp_init_attr,
124124
}
125125

126126
#if HAVE_DEVX
127-
ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
128-
const uct_ib_mlx5_cq_t *send_cq,
129-
const uct_ib_mlx5_cq_t *recv_cq,
130-
uct_ib_mlx5_qp_t *qp,
131-
uct_ib_mlx5_txwq_t *tx,
132-
uct_ib_mlx5_qp_attr_t *attr)
127+
ucs_status_t uct_ib_mlx5_devx_create_qp_common(uct_ib_iface_t *iface,
128+
const uct_ib_mlx5_cq_t *send_cq,
129+
const uct_ib_mlx5_cq_t *recv_cq,
130+
uct_ib_mlx5_qp_t *qp,
131+
uct_ib_mlx5_txwq_t *tx,
132+
uct_ib_mlx5_qp_attr_t *attr)
133133
{
134134
uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.md, uct_ib_mlx5_md_t);
135135
uct_ib_device_t *dev = &md->super.dev;
@@ -138,7 +138,6 @@ ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
138138
char in_2init[UCT_IB_MLX5DV_ST_SZ_BYTES(rst2init_qp_in)] = {};
139139
char out_2init[UCT_IB_MLX5DV_ST_SZ_BYTES(rst2init_qp_out)] = {};
140140
uct_ib_mlx5_mmio_mode_t mmio_mode;
141-
int max_tx, max_rx, len_tx, len;
142141
uct_ib_mlx5_devx_uar_t *uar;
143142
ucs_status_t status;
144143
void *qpc;
@@ -162,27 +161,6 @@ ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
162161
goto err;
163162
}
164163

165-
max_tx = uct_ib_mlx5_devx_sq_length(attr->super.cap.max_send_wr);
166-
len_tx = max_tx * MLX5_SEND_WQE_BB;
167-
max_rx = ucs_roundup_pow2_or0(attr->super.cap.max_recv_wr);
168-
len = len_tx + max_rx * UCT_IB_MLX5_MAX_BB * UCT_IB_MLX5_WQE_SEG_SIZE;
169-
170-
if (tx != NULL) {
171-
status = uct_ib_mlx5_md_buf_alloc(md, len, 0, &qp->devx.wq_buf,
172-
&qp->devx.mem, 0, "qp umem");
173-
if (status != UCS_OK) {
174-
goto err_uar;
175-
}
176-
} else {
177-
qp->devx.wq_buf = NULL;
178-
}
179-
180-
qp->devx.dbrec = uct_ib_mlx5_get_dbrec(md);
181-
if (!qp->devx.dbrec) {
182-
status = UCS_ERR_NO_MEMORY;
183-
goto err_free_mem;
184-
}
185-
186164
UCT_IB_MLX5DV_SET(create_qp_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_QP);
187165
qpc = UCT_IB_MLX5DV_ADDR_OF(create_qp_in, in, qpc);
188166
if (attr->super.qp_type == UCT_IB_QPT_DCI) {
@@ -195,7 +173,7 @@ ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
195173
} else {
196174
ucs_error("create qp failed: unknown type %d", attr->super.qp_type);
197175
status = UCS_ERR_UNSUPPORTED;
198-
goto err_free_db;
176+
goto err_uar;
199177
}
200178
UCT_IB_MLX5DV_SET(qpc, qpc, pm_state, UCT_IB_MLX5_QPC_PM_STATE_MIGRATED);
201179
UCT_IB_MLX5DV_SET(qpc, qpc, rdma_wr_disabled, !!attr->rdma_wr_disabled);
@@ -208,8 +186,8 @@ ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
208186
UCT_IB_MLX5DV_SET(qpc, qpc, cqn_snd, send_cq->cq_num);
209187
UCT_IB_MLX5DV_SET(qpc, qpc, cqn_rcv, recv_cq->cq_num);
210188
/* cppcheck-suppress internalAstError */
211-
UCT_IB_MLX5DV_SET(qpc, qpc, log_sq_size, ucs_ilog2_or0(max_tx));
212-
UCT_IB_MLX5DV_SET(qpc, qpc, log_rq_size, ucs_ilog2_or0(max_rx));
189+
UCT_IB_MLX5DV_SET(qpc, qpc, log_sq_size, ucs_ilog2_or0(attr->max_tx));
190+
UCT_IB_MLX5DV_SET(qpc, qpc, log_rq_size, 0);
213191
UCT_IB_MLX5DV_SET(qpc, qpc, cs_req,
214192
uct_ib_mlx5_qpc_cs_req(attr->super.max_inl_cqe[UCT_IB_DIR_TX]));
215193
UCT_IB_MLX5DV_SET(qpc, qpc, cs_res,
@@ -237,7 +215,7 @@ ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
237215
"QP", UCS_LOG_LEVEL_ERROR);
238216
if (!qp->devx.obj) {
239217
status = UCS_ERR_IO_ERROR;
240-
goto err_free_db;
218+
goto err_uar;
241219
}
242220

243221
qp->qp_num = UCT_IB_MLX5DV_GET(create_qp_out, out, qpn);
@@ -266,16 +244,16 @@ ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
266244

267245
qp->type = UCT_IB_MLX5_OBJ_TYPE_DEVX;
268246

269-
attr->super.cap.max_send_wr = max_tx;
270-
attr->super.cap.max_recv_wr = max_rx;
247+
attr->super.cap.max_send_wr = attr->max_tx;
248+
attr->super.cap.max_recv_wr = 0;
271249

272250
if (tx != NULL) {
273251
ucs_assert(qp->devx.wq_buf != NULL);
274252
tx->reg = &uar->super;
275253
tx->qstart = qp->devx.wq_buf;
276-
tx->qend = UCS_PTR_BYTE_OFFSET(qp->devx.wq_buf, len_tx);
254+
tx->qend = UCS_PTR_BYTE_OFFSET(qp->devx.wq_buf, attr->len);
277255
tx->dbrec = &qp->devx.dbrec->db[MLX5_SND_DBR];
278-
tx->bb_max = max_tx - 2 * UCT_IB_MLX5_MAX_BB;
256+
tx->bb_max = attr->max_tx - 2 * UCT_IB_MLX5_MAX_BB;
279257
ucs_assert(*tx->dbrec == 0);
280258
uct_ib_mlx5_txwq_reset(tx);
281259
} else {
@@ -287,16 +265,60 @@ ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
287265

288266
err_free:
289267
uct_ib_mlx5_devx_obj_destroy(qp->devx.obj, "QP");
290-
err_free_db:
291-
uct_ib_mlx5_put_dbrec(qp->devx.dbrec);
292-
err_free_mem:
293-
uct_ib_mlx5_md_buf_free(md, qp->devx.wq_buf, &qp->devx.mem);
294268
err_uar:
295269
uct_worker_tl_data_put(uar, uct_ib_mlx5_devx_uar_cleanup);
296270
err:
297271
return status;
298272
}
299273

274+
ucs_status_t uct_ib_mlx5_devx_create_qp(uct_ib_iface_t *iface,
275+
const uct_ib_mlx5_cq_t *send_cq,
276+
const uct_ib_mlx5_cq_t *recv_cq,
277+
uct_ib_mlx5_qp_t *qp,
278+
uct_ib_mlx5_txwq_t *tx,
279+
uct_ib_mlx5_qp_attr_t *attr)
280+
{
281+
uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.md, uct_ib_mlx5_md_t);
282+
ucs_status_t status;
283+
284+
uct_ib_mlx5_wq_calc_sizes(attr);
285+
286+
if (tx != NULL) {
287+
status = uct_ib_mlx5_md_buf_alloc(md, attr->len, 0, &qp->devx.wq_buf,
288+
&qp->devx.mem, 0, "qp umem");
289+
if (status != UCS_OK) {
290+
return status;
291+
}
292+
} else {
293+
qp->devx.wq_buf = NULL;
294+
}
295+
296+
qp->devx.dbrec = uct_ib_mlx5_get_dbrec(md);
297+
if (qp->devx.dbrec == NULL) {
298+
status = UCS_ERR_NO_MEMORY;
299+
goto err_free;
300+
}
301+
302+
status = uct_ib_mlx5_devx_create_qp_common(iface, send_cq, recv_cq, qp, tx,
303+
attr);
304+
if (status != UCS_OK) {
305+
goto err_free_db;
306+
}
307+
308+
if (tx != NULL) {
309+
ucs_assert(tx->qstart != NULL);
310+
uct_ib_mlx5_init_wq_buf(tx);
311+
}
312+
313+
return UCS_OK;
314+
315+
err_free_db:
316+
uct_ib_mlx5_put_dbrec(qp->devx.dbrec);
317+
err_free:
318+
uct_ib_mlx5_md_buf_free(md, qp->devx.wq_buf, &qp->devx.mem);
319+
return status;
320+
}
321+
300322
ucs_status_t uct_ib_mlx5_devx_modify_qp(uct_ib_mlx5_qp_t *qp,
301323
const void *in, size_t inlen,
302324
void *out, size_t outlen)
@@ -385,9 +407,14 @@ ucs_status_t uct_ib_mlx5_devx_modify_qp_state(uct_ib_mlx5_qp_t *qp,
385407
return uct_ib_mlx5_devx_modify_qp(qp, in, sizeof(in), out, sizeof(out));
386408
}
387409

388-
void uct_ib_mlx5_devx_destroy_qp(uct_ib_mlx5_md_t *md, uct_ib_mlx5_qp_t *qp)
410+
void uct_ib_mlx5_devx_destroy_qp_common(uct_ib_mlx5_qp_t *qp)
389411
{
390412
uct_ib_mlx5_devx_obj_destroy(qp->devx.obj, "QP");
413+
}
414+
415+
void uct_ib_mlx5_devx_destroy_qp(uct_ib_mlx5_md_t *md, uct_ib_mlx5_qp_t *qp)
416+
{
417+
uct_ib_mlx5_devx_destroy_qp_common(qp);
391418
uct_ib_mlx5_put_dbrec(qp->devx.dbrec);
392419
uct_ib_mlx5_md_buf_free(md, qp->devx.wq_buf, &qp->devx.mem);
393420
}
@@ -529,39 +556,33 @@ uct_ib_mlx5_devx_query_qp_peer_info(uct_ib_iface_t *iface, uct_ib_mlx5_qp_t *qp,
529556
}
530557

531558
ucs_status_t
532-
uct_ib_mlx5_devx_create_cq(uct_ib_iface_t *iface, uct_ib_dir_t dir,
533-
const uct_ib_iface_init_attr_t *init_attr,
534-
uct_ib_mlx5_cq_t *cq, int preferred_cpu, size_t inl)
559+
uct_ib_mlx5_devx_create_cq_common(uct_ib_iface_t *iface, uct_ib_dir_t dir,
560+
const uct_ib_mlx5_cq_attr_t *attr,
561+
uct_ib_mlx5_cq_t *cq, int preferred_cpu,
562+
size_t inl)
535563
{
536564
char in[UCT_IB_MLX5DV_ST_SZ_BYTES(create_cq_in)] = {0};
537565
char out[UCT_IB_MLX5DV_ST_SZ_BYTES(create_cq_out)] = {0};
538566
void *cqctx = UCT_IB_MLX5DV_ADDR_OF(create_cq_in, in, cqc);
539567
uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.md, uct_ib_mlx5_md_t);
540568
uct_ib_device_t *dev = uct_ib_iface_device(iface);
541-
unsigned cq_size = ucs_roundup_pow2(uct_ib_cq_size(iface, init_attr, dir));
542-
int log_cq_size = ucs_ilog2(cq_size);
543-
int cqe_size = uct_ib_get_cqe_size(inl > 32 ? 128 : 64);
544569
int num_comp_vectors = dev->ibv_context->num_comp_vectors;
545-
size_t umem_len = cqe_size * cq_size;
570+
int log_cq_size;
546571
ucs_status_t status;
547572
uint32_t eqn;
548573

574+
log_cq_size = ucs_ilog2(attr->cq_size);
549575
UCT_IB_MLX5DV_SET(create_cq_in, in, opcode, UCT_IB_MLX5_CMD_OP_CREATE_CQ);
550576

551577
/* Set DB record umem related bits */
552-
cq->devx.dbrec = uct_ib_mlx5_get_dbrec(md);
553-
if (cq->devx.dbrec == NULL) {
554-
status = UCS_ERR_NO_MEMORY;
555-
goto err;
556-
}
557578
UCT_IB_MLX5DV_SET(cqc, cqctx, dbr_umem_id, cq->devx.dbrec->mem_id);
558579
UCT_IB_MLX5DV_SET64(cqc, cqctx, dbr_addr, cq->devx.dbrec->offset);
559580

560581
/* Set EQN related bits */
561582
if (mlx5dv_devx_query_eqn(dev->ibv_context,
562583
preferred_cpu % num_comp_vectors, &eqn) != 0) {
563584
status = UCS_ERR_IO_ERROR;
564-
goto err_free_db;
585+
goto err;
565586
}
566587

567588
UCT_IB_MLX5DV_SET(cqc, cqctx, c_eqn, eqn);
@@ -575,31 +596,23 @@ uct_ib_mlx5_devx_create_cq(uct_ib_iface_t *iface, uct_ib_dir_t dir,
575596
UCT_IB_MLX5_MMIO_MODE_DB);
576597
if (UCS_PTR_IS_ERR(cq->devx.uar)) {
577598
status = UCS_PTR_STATUS(cq->devx.uar);
578-
goto err_free_db;
599+
goto err;
579600
}
580601
UCT_IB_MLX5DV_SET(cqc, cqctx, uar_page, cq->devx.uar->uar->page_id);
581602

582603
/* Set CQ umem related bits */
583-
status = uct_ib_mlx5_md_buf_alloc(md, umem_len, 0, &cq->devx.cq_buf,
584-
&cq->devx.mem, IBV_ACCESS_LOCAL_WRITE,
585-
"cq umem");
586-
if (status != UCS_OK) {
587-
goto err_uar;
588-
}
589-
memset(cq->devx.cq_buf, 0, umem_len);
590-
591604
UCT_IB_MLX5DV_SET(create_cq_in, in, cq_umem_id, cq->devx.mem.mem->umem_id);
592605
UCT_IB_MLX5DV_SET64(create_cq_in, in, cq_umem_offset, 0);
593606

594607
UCT_IB_MLX5DV_SET(cqc, cqctx, log_cq_size, log_cq_size);
595-
UCT_IB_MLX5DV_SET(cqc, cqctx, cqe_sz, (cqe_size == 128) ? 1 : 0);
608+
UCT_IB_MLX5DV_SET(cqc, cqctx, cqe_sz, (attr->cqe_size == 128) ? 1 : 0);
596609

597-
if (init_attr->cqe_zip_sizes[dir] & cqe_size) {
610+
if (attr->flags & UCT_IB_MLX5_CQ_CQE_ZIP) {
598611
UCT_IB_MLX5DV_SET(cqc, cqctx, cqe_comp_en, 1);
599612
UCT_IB_MLX5DV_SET(cqc, cqctx, cqe_comp_layout, 1);
600613
}
601614

602-
if (!UCS_ENABLE_ASSERT && (init_attr->flags & UCT_IB_CQ_IGNORE_OVERRUN)) {
615+
if (attr->flags & UCT_IB_MLX5_CQ_IGNORE_OVERRUN) {
603616
UCT_IB_MLX5DV_SET(cqc, cqctx, oi, 1);
604617
}
605618

@@ -608,35 +621,86 @@ uct_ib_mlx5_devx_create_cq(uct_ib_iface_t *iface, uct_ib_dir_t dir,
608621
"CQ", UCS_LOG_LEVEL_ERROR);
609622
if (cq->devx.obj == NULL) {
610623
status = UCS_ERR_IO_ERROR;
611-
goto err_free_mem;
624+
goto err_uar;
612625
}
613626

614-
uct_ib_mlx5_fill_cq_common(cq, cq_size, cqe_size,
627+
uct_ib_mlx5_init_cq_common(cq, attr->cq_size, attr->cqe_size,
615628
UCT_IB_MLX5DV_GET(create_cq_out, out, cqn),
616629
cq->devx.cq_buf, cq->devx.uar->uar->base_addr,
617630
cq->devx.dbrec->db,
618-
!!(init_attr->cqe_zip_sizes[dir] & cqe_size));
631+
!!(attr->flags & UCT_IB_MLX5_CQ_CQE_ZIP));
619632

620-
iface->config.max_inl_cqe[dir] = uct_ib_mlx5_inl_cqe(inl, cqe_size);
633+
iface->config.max_inl_cqe[dir] = uct_ib_mlx5_inl_cqe(inl, attr->cqe_size);
621634
iface->cq[dir] = NULL;
622635
cq->type = UCT_IB_MLX5_OBJ_TYPE_DEVX;
623636
return UCS_OK;
624637

625-
err_free_mem:
626-
uct_ib_mlx5_md_buf_free(md, cq->devx.cq_buf, &cq->devx.mem);
627638
err_uar:
628639
uct_worker_tl_data_put(cq->devx.uar, uct_ib_mlx5_devx_uar_cleanup);
640+
err:
641+
return status;
642+
}
643+
644+
ucs_status_t
645+
uct_ib_mlx5_devx_create_cq(uct_ib_iface_t *iface, uct_ib_dir_t dir,
646+
const uct_ib_iface_init_attr_t *init_attr,
647+
uct_ib_mlx5_cq_t *cq, int preferred_cpu, size_t inl)
648+
{
649+
uct_ib_mlx5_md_t *md = ucs_derived_of(iface->super.md, uct_ib_mlx5_md_t);
650+
uct_ib_mlx5_cq_attr_t attr = {};
651+
ucs_status_t status;
652+
653+
uct_ib_mlx5_cq_calc_sizes(iface, dir, init_attr, inl, &attr);
654+
655+
if (init_attr->cqe_zip_sizes[dir] & attr.cqe_size) {
656+
attr.flags |= UCT_IB_MLX5_CQ_CQE_ZIP;
657+
}
658+
659+
if (!UCS_ENABLE_ASSERT && (init_attr->flags & UCT_IB_CQ_IGNORE_OVERRUN)) {
660+
attr.flags |= UCT_IB_MLX5_CQ_IGNORE_OVERRUN;
661+
}
662+
663+
status = uct_ib_mlx5_md_buf_alloc(md, attr.umem_len, 0, &cq->devx.cq_buf,
664+
&cq->devx.mem, IBV_ACCESS_LOCAL_WRITE,
665+
"cq umem");
666+
if (status != UCS_OK) {
667+
return status;
668+
}
669+
670+
memset(cq->devx.cq_buf, 0, attr.umem_len);
671+
672+
cq->devx.dbrec = uct_ib_mlx5_get_dbrec(md);
673+
if (cq->devx.dbrec == NULL) {
674+
status = UCS_ERR_NO_MEMORY;
675+
goto err_free;
676+
}
677+
678+
status = uct_ib_mlx5_devx_create_cq_common(iface, dir, &attr, cq,
679+
preferred_cpu, inl);
680+
if (status != UCS_OK) {
681+
goto err_free_db;
682+
}
683+
684+
uct_ib_mlx5_fill_cq_buf(cq, attr.umem_len);
685+
return UCS_OK;
686+
629687
err_free_db:
630688
uct_ib_mlx5_put_dbrec(cq->devx.dbrec);
631-
err:
689+
err_free:
690+
uct_ib_mlx5_md_buf_free(md, cq->devx.cq_buf, &cq->devx.mem);
632691
return status;
633692
}
634693

635-
void uct_ib_mlx5_devx_destroy_cq(uct_ib_mlx5_md_t *md, uct_ib_mlx5_cq_t *cq)
694+
void uct_ib_mlx5_devx_destroy_cq_common(uct_ib_mlx5_cq_t *cq)
636695
{
637696
uct_ib_mlx5_devx_obj_destroy(cq->devx.obj, "CQ");
638-
uct_ib_mlx5_put_dbrec(cq->devx.dbrec);
639697
uct_worker_tl_data_put(cq->devx.uar, uct_ib_mlx5_devx_uar_cleanup);
698+
}
699+
700+
void uct_ib_mlx5_devx_destroy_cq(uct_ib_mlx5_md_t *md, uct_ib_mlx5_cq_t *cq)
701+
{
702+
uct_ib_mlx5_devx_destroy_cq_common(cq);
703+
uct_ib_mlx5_put_dbrec(cq->devx.dbrec);
640704
uct_ib_mlx5_md_buf_free(md, cq->devx.cq_buf, &cq->devx.mem);
641705
}
642706
#endif

0 commit comments

Comments
 (0)