Skip to content

Commit 1084d5f

Browse files
authored
Merge pull request #10760 from tvegas1/direct_nic_datapath
Direct NIC Datapath
2 parents 1a43fda + e7f59b6 commit 1084d5f

File tree

16 files changed

+709
-79
lines changed

16 files changed

+709
-79
lines changed

src/ucp/core/ucp_context.c

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1190,6 +1190,7 @@ static void ucp_add_tl_resource_if_enabled(
11901190
const uct_tl_resource_desc_t *resource, unsigned *num_resources_p,
11911191
uint64_t dev_cfg_masks[], uint64_t *tl_cfg_mask)
11921192
{
1193+
ucp_tl_md_t *md = &context->tl_mds[md_index];
11931194
uint8_t rsc_flags;
11941195
ucp_rsc_index_t dev_index, i;
11951196

@@ -1220,6 +1221,10 @@ static void ucp_add_tl_resource_if_enabled(
12201221
}
12211222
context->tl_rscs[context->num_tls].dev_index = dev_index;
12221223

1224+
if (resource->sys_device < UCP_MAX_SYS_DEVICES) {
1225+
md->sys_dev_map |= UCS_BIT(resource->sys_device);
1226+
}
1227+
12231228
++context->num_tls;
12241229
++(*num_resources_p);
12251230
}
@@ -1429,8 +1434,9 @@ static ucs_status_t ucp_fill_tl_md(ucp_context_h context,
14291434
ucs_status_t status;
14301435

14311436
/* Initialize tl_md structure */
1432-
tl_md->cmpt_index = cmpt_index;
1433-
tl_md->rsc = *md_rsc;
1437+
tl_md->cmpt_index = cmpt_index;
1438+
tl_md->rsc = *md_rsc;
1439+
tl_md->sys_dev_map = 0;
14341440

14351441
/* Read MD configuration */
14361442
status = uct_md_config_read(context->tl_cmpts[cmpt_index].cmpt, NULL, NULL,

src/ucp/core/ucp_context.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,11 @@ typedef struct ucp_tl_md {
326326
* Global VA memory handle
327327
*/
328328
uct_mem_h gva_mr;
329+
330+
/**
331+
* Set of known system devices associated to the MD
332+
*/
333+
ucp_sys_dev_map_t sys_dev_map;
329334
} ucp_tl_md_t;
330335

331336

src/ucp/core/ucp_mm.c

Lines changed: 40 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -518,6 +518,28 @@ static ucs_status_t ucp_memh_register_gva(ucp_context_h context, ucp_mem_h memh,
518518
return UCS_OK;
519519
}
520520

521+
static int ucp_memh_sys_dev_reachable(ucs_sys_device_t mem_sys_dev,
522+
ucp_sys_dev_map_t sys_dev_map)
523+
{
524+
ucs_sys_device_t sys_dev;
525+
526+
if (mem_sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN) {
527+
return 1;
528+
}
529+
530+
/*
531+
* If at least one sys_dev is not reachable, do not register on it
532+
* as we cannot know in advance which device is going to be used.
533+
*/
534+
ucs_for_each_bit(sys_dev, sys_dev_map) {
535+
if (!ucs_topo_is_reachable(sys_dev, mem_sys_dev)) {
536+
return 0;
537+
}
538+
}
539+
540+
return 1;
541+
}
542+
521543
static ucs_status_t
522544
ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
523545
ucp_md_map_t md_map, unsigned uct_flags,
@@ -538,6 +560,7 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
538560
void *reg_address;
539561
size_t reg_length;
540562
size_t reg_align;
563+
ucp_sys_dev_map_t sys_dev_map;
541564

542565
if (gva_enable) {
543566
status = ucp_memh_register_gva(context, memh, md_map);
@@ -565,7 +588,8 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
565588
(reg_md_map & context->dmabuf_reg_md_map)) {
566589
/* Query dmabuf file descriptor and offset */
567590
mem_attr.field_mask = UCT_MD_MEM_ATTR_FIELD_DMABUF_FD |
568-
UCT_MD_MEM_ATTR_FIELD_DMABUF_OFFSET;
591+
UCT_MD_MEM_ATTR_FIELD_DMABUF_OFFSET |
592+
UCT_MD_MEM_ATTR_FIELD_SYS_DEV;
569593
status = uct_md_mem_query(context->tl_mds[dmabuf_prov_md_index].md,
570594
address, length, &mem_attr);
571595
if (status != UCS_OK) {
@@ -574,12 +598,24 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
574598
address, length, ucs_status_string(status));
575599
} else {
576600
ucs_trace("uct_md_mem_query(dmabuf address %p length %zu) returned "
577-
"fd %d offset %zu",
601+
"fd %d offset %zu sys_dev %u",
578602
address, length, mem_attr.dmabuf_fd,
579-
mem_attr.dmabuf_offset);
603+
mem_attr.dmabuf_offset, mem_attr.sys_dev);
604+
580605
dmabuf_md_map = context->dmabuf_reg_md_map;
581606
reg_params.dmabuf_fd = mem_attr.dmabuf_fd;
582607
reg_params.dmabuf_offset = mem_attr.dmabuf_offset;
608+
609+
/* Exclude any unreachable MD from registration */
610+
ucs_for_each_bit(md_index, dmabuf_md_map) {
611+
sys_dev_map = context->tl_mds[md_index].sys_dev_map;
612+
if (!ucp_memh_sys_dev_reachable(mem_attr.sys_dev,
613+
sys_dev_map)) {
614+
ucs_trace("md[%d] skipped: cannot reach mem_sys_dev=%u",
615+
md_index, mem_attr.sys_dev);
616+
reg_md_map &= ~UCS_BIT(md_index);
617+
}
618+
}
583619
}
584620
}
585621

@@ -835,7 +871,7 @@ static ucs_status_t ucp_memh_init_uct_reg(ucp_context_h context, ucp_mem_h memh,
835871
goto err;
836872
}
837873

838-
ucp_memh_init_from_parent(memh, cache_md_map);
874+
ucp_memh_init_from_parent(memh, memh->parent->md_map);
839875

840876
status = ucp_memh_register(context, memh, reg_md_map, uct_flags,
841877
alloc_name);

src/ucp/proto/proto_common.c

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -576,6 +576,7 @@ ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
576576
ucp_md_index_t md_index;
577577
ucp_lane_map_t lane_map;
578578
char lane_desc[64];
579+
ucs_sys_device_t lane_sys_dev;
579580

580581
if (max_lanes == 0) {
581582
return 0;
@@ -676,6 +677,15 @@ ucp_proto_common_find_lanes(const ucp_proto_init_params_t *params,
676677
}
677678
}
678679

680+
/* The two devices must also have internal reachability */
681+
lane_sys_dev = context->tl_rscs[rsc_index].tl_rsc.sys_device;
682+
if (!ucs_topo_is_reachable(lane_sys_dev, select_param->sys_dev)) {
683+
ucs_trace("%s: no reachability between lane_sys_dev=%u and "
684+
"sys_dev=%u",
685+
lane_desc, lane_sys_dev, select_param->sys_dev);
686+
continue;
687+
}
688+
679689
ucs_trace("%s: added as lane %d", lane_desc, lane);
680690
lanes[num_lanes++] = lane;
681691
}

0 commit comments

Comments
 (0)