@@ -518,6 +518,28 @@ static ucs_status_t ucp_memh_register_gva(ucp_context_h context, ucp_mem_h memh,
518518 return UCS_OK ;
519519}
520520
521+ static int ucp_memh_sys_dev_reachable (ucs_sys_device_t mem_sys_dev ,
522+ ucp_sys_dev_map_t sys_dev_map )
523+ {
524+ ucs_sys_device_t sys_dev ;
525+
526+ if (mem_sys_dev == UCS_SYS_DEVICE_ID_UNKNOWN ) {
527+ return 1 ;
528+ }
529+
530+ /*
531+ * If at least one sys_dev is not reachable, do not register on it
532+ * as we cannot know in advance which device is going to be used.
533+ */
534+ ucs_for_each_bit (sys_dev , sys_dev_map ) {
535+ if (!ucs_topo_is_reachable (sys_dev , mem_sys_dev )) {
536+ return 0 ;
537+ }
538+ }
539+
540+ return 1 ;
541+ }
542+
521543static ucs_status_t
522544ucp_memh_register_internal (ucp_context_h context , ucp_mem_h memh ,
523545 ucp_md_map_t md_map , unsigned uct_flags ,
@@ -538,6 +560,7 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
538560 void * reg_address ;
539561 size_t reg_length ;
540562 size_t reg_align ;
563+ ucp_sys_dev_map_t sys_dev_map ;
541564
542565 if (gva_enable ) {
543566 status = ucp_memh_register_gva (context , memh , md_map );
@@ -565,7 +588,8 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
565588 (reg_md_map & context -> dmabuf_reg_md_map )) {
566589 /* Query dmabuf file descriptor and offset */
567590 mem_attr .field_mask = UCT_MD_MEM_ATTR_FIELD_DMABUF_FD |
568- UCT_MD_MEM_ATTR_FIELD_DMABUF_OFFSET ;
591+ UCT_MD_MEM_ATTR_FIELD_DMABUF_OFFSET |
592+ UCT_MD_MEM_ATTR_FIELD_SYS_DEV ;
569593 status = uct_md_mem_query (context -> tl_mds [dmabuf_prov_md_index ].md ,
570594 address , length , & mem_attr );
571595 if (status != UCS_OK ) {
@@ -574,12 +598,24 @@ ucp_memh_register_internal(ucp_context_h context, ucp_mem_h memh,
574598 address , length , ucs_status_string (status ));
575599 } else {
576600 ucs_trace ("uct_md_mem_query(dmabuf address %p length %zu) returned "
577- "fd %d offset %zu" ,
601+ "fd %d offset %zu sys_dev %u " ,
578602 address , length , mem_attr .dmabuf_fd ,
579- mem_attr .dmabuf_offset );
603+ mem_attr .dmabuf_offset , mem_attr .sys_dev );
604+
580605 dmabuf_md_map = context -> dmabuf_reg_md_map ;
581606 reg_params .dmabuf_fd = mem_attr .dmabuf_fd ;
582607 reg_params .dmabuf_offset = mem_attr .dmabuf_offset ;
608+
609+ /* Exclude any unreachable MD from registration */
610+ ucs_for_each_bit (md_index , dmabuf_md_map ) {
611+ sys_dev_map = context -> tl_mds [md_index ].sys_dev_map ;
612+ if (!ucp_memh_sys_dev_reachable (mem_attr .sys_dev ,
613+ sys_dev_map )) {
614+ ucs_trace ("md[%d] skipped: cannot reach mem_sys_dev=%u" ,
615+ md_index , mem_attr .sys_dev );
616+ reg_md_map &= ~UCS_BIT (md_index );
617+ }
618+ }
583619 }
584620 }
585621
@@ -835,7 +871,7 @@ static ucs_status_t ucp_memh_init_uct_reg(ucp_context_h context, ucp_mem_h memh,
835871 goto err ;
836872 }
837873
838- ucp_memh_init_from_parent (memh , cache_md_map );
874+ ucp_memh_init_from_parent (memh , memh -> parent -> md_map );
839875
840876 status = ucp_memh_register (context , memh , reg_md_map , uct_flags ,
841877 alloc_name );
0 commit comments