Skip to content

Commit 1c6e882

Browse files
committed
UCT/CUDA_IPC: Enforce host memory support for mem_ttpe EP
1 parent e3cb7d6 commit 1c6e882

File tree

3 files changed

+11
-8
lines changed

3 files changed

+11
-8
lines changed

src/ucp/core/ucp_ep.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,12 +684,18 @@ ucs_status_t ucp_worker_mem_type_eps_create(ucp_worker_h worker)
684684
ucs_status_t status;
685685
void *address_buffer;
686686
size_t address_length;
687-
ucp_tl_bitmap_t mem_access_tls;
687+
ucp_tl_bitmap_t mem_access_tls, host_mem_access_tls;
688688
char ep_name[UCP_WORKER_ADDRESS_NAME_MAX];
689689
unsigned addr_indices[UCP_MAX_LANES];
690+
ucp_lane_index_t num_lanes;
690691

691692
ucs_memory_type_for_each(mem_type) {
692693
ucp_context_memaccess_tl_bitmap(context, mem_type, 0, &mem_access_tls);
694+
/* Mem type EP requires host memory support */
695+
ucp_context_memaccess_tl_bitmap(context, UCS_MEMORY_TYPE_HOST, 0,
696+
&host_mem_access_tls);
697+
UCS_STATIC_BITMAP_AND_INPLACE(&mem_access_tls, host_mem_access_tls);
698+
693699
if (UCP_MEM_IS_HOST(mem_type) ||
694700
UCS_STATIC_BITMAP_IS_ZERO(mem_access_tls)) {
695701
continue;
@@ -725,6 +731,9 @@ ucs_status_t ucp_worker_mem_type_eps_create(ucp_worker_h worker)
725731
goto err_free_address_list;
726732
}
727733

734+
/* Mem type EP cannot have more than one lane */
735+
num_lanes = ucp_ep_num_lanes(worker->mem_type_ep[mem_type]);
736+
ucs_assertv_always(num_lanes == 1, "num_lanes=%u", num_lanes);
728737
UCS_ASYNC_UNBLOCK(&worker->async);
729738

730739
ucs_free(local_address.address_list);

src/uct/cuda/cuda_ipc/cuda_ipc_iface.c

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,6 @@ static ucs_config_field_t uct_cuda_ipc_iface_config_table[] = {
7474
"Estimated CPU overhead for transferring GPU memory",
7575
ucs_offsetof(uct_cuda_ipc_iface_config_t, params.overhead), UCS_CONFIG_TYPE_TIME},
7676

77-
{"ENABLE_SAME_PROCESS", "n",
78-
"Enable same process same device communication for cuda_ipc",
79-
ucs_offsetof(uct_cuda_ipc_iface_config_t, params.enable_same_process), UCS_CONFIG_TYPE_BOOL},
80-
8177
{NULL}
8278
};
8379

@@ -146,8 +142,7 @@ uct_cuda_ipc_iface_is_reachable_v2(const uct_iface_h tl_iface,
146142
dev_addr = (const uct_cuda_ipc_device_addr_t *)params->device_addr;
147143
same_uuid = (ucs_get_system_id() == dev_addr->system_uuid);
148144

149-
if ((getpid() == *(pid_t*)params->iface_addr) && same_uuid &&
150-
!iface->config.enable_same_process) {
145+
if ((getpid() == *(pid_t*)params->iface_addr) && same_uuid) {
151146
uct_iface_fill_info_str_buf(params, "same process");
152147
return 0;
153148
}

src/uct/cuda/cuda_ipc/cuda_ipc_iface.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ typedef struct {
2828
double bandwidth; /* estimated bandwidth */
2929
double latency; /* estimated latency */
3030
double overhead; /* estimated CPU overhead */
31-
int enable_same_process; /* enable cuda_ipc for same pid same device */
3231
} uct_cuda_ipc_iface_config_params_t;
3332

3433

0 commit comments

Comments
 (0)