Skip to content

Commit b8728e4

Browse files
committed
UCT/CUDA_IPC: Enforce host memory support for mem_type EP
1 parent e3d088d commit b8728e4

File tree

6 files changed

+13
-35
lines changed

6 files changed

+13
-35
lines changed

src/ucp/core/ucp_ep.c

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -684,12 +684,18 @@ ucs_status_t ucp_worker_mem_type_eps_create(ucp_worker_h worker)
684684
ucs_status_t status;
685685
void *address_buffer;
686686
size_t address_length;
687-
ucp_tl_bitmap_t mem_access_tls;
687+
ucp_tl_bitmap_t mem_access_tls, host_mem_access_tls;
688688
char ep_name[UCP_WORKER_ADDRESS_NAME_MAX];
689689
unsigned addr_indices[UCP_MAX_LANES];
690+
ucp_lane_index_t num_lanes;
690691

691692
ucs_memory_type_for_each(mem_type) {
692693
ucp_context_memaccess_tl_bitmap(context, mem_type, 0, &mem_access_tls);
694+
/* Mem type EP requires host memory support */
695+
ucp_context_memaccess_tl_bitmap(context, UCS_MEMORY_TYPE_HOST, 0,
696+
&host_mem_access_tls);
697+
UCS_STATIC_BITMAP_AND_INPLACE(&mem_access_tls, host_mem_access_tls);
698+
693699
if (UCP_MEM_IS_HOST(mem_type) ||
694700
UCS_STATIC_BITMAP_IS_ZERO(mem_access_tls)) {
695701
continue;
@@ -725,6 +731,9 @@ ucs_status_t ucp_worker_mem_type_eps_create(ucp_worker_h worker)
725731
goto err_free_address_list;
726732
}
727733

734+
/* Mem type EP cannot have more than one lane */
735+
num_lanes = ucp_ep_num_lanes(worker->mem_type_ep[mem_type]);
736+
ucs_assertv_always(num_lanes == 1, "num_lanes=%u", num_lanes);
728737
UCS_ASYNC_UNBLOCK(&worker->async);
729738

730739
ucs_free(local_address.address_list);

src/uct/cuda/cuda_ipc/cuda_ipc_iface.c

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -74,10 +74,6 @@ static ucs_config_field_t uct_cuda_ipc_iface_config_table[] = {
7474
"Estimated CPU overhead for transferring GPU memory",
7575
ucs_offsetof(uct_cuda_ipc_iface_config_t, params.overhead), UCS_CONFIG_TYPE_TIME},
7676

77-
{"ENABLE_SAME_PROCESS", "n",
78-
"Enable same process same device communication for cuda_ipc",
79-
ucs_offsetof(uct_cuda_ipc_iface_config_t, params.enable_same_process), UCS_CONFIG_TYPE_BOOL},
80-
8177
{NULL}
8278
};
8379

@@ -146,12 +142,6 @@ uct_cuda_ipc_iface_is_reachable_v2(const uct_iface_h tl_iface,
146142
dev_addr = (const uct_cuda_ipc_device_addr_t *)params->device_addr;
147143
same_uuid = (ucs_get_system_id() == dev_addr->system_uuid);
148144

149-
if ((getpid() == *(pid_t*)params->iface_addr) && same_uuid &&
150-
!iface->config.enable_same_process) {
151-
uct_iface_fill_info_str_buf(params, "same process");
152-
return 0;
153-
}
154-
155145
if (same_uuid ||
156146
uct_cuda_ipc_iface_mnnvl_supported(md, dev_addr, dev_addr_len)) {
157147
return uct_iface_scope_is_reachable(tl_iface, params);

src/uct/cuda/cuda_ipc/cuda_ipc_iface.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,6 @@ typedef struct {
2828
double bandwidth; /* estimated bandwidth */
2929
double latency; /* estimated latency */
3030
double overhead; /* estimated CPU overhead */
31-
int enable_same_process; /* enable cuda_ipc for same pid same device */
3231
} uct_cuda_ipc_iface_config_params_t;
3332

3433

test/gtest/ucp/test_ucp_device.cc

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,6 @@ void test_ucp_device::get_test_variants(std::vector<ucp_test_variant> &variants)
7878

7979
void test_ucp_device::init()
8080
{
81-
m_env.push_back(new ucs::scoped_setenv("UCX_CUDA_IPC_ENABLE_SAME_PROCESS", "y"));
8281
m_env.push_back(new ucs::scoped_setenv("UCX_IB_GDA_MAX_SYS_LATENCY", "1us"));
8382
ucp_test::init();
8483
sender().connect(&receiver(), get_ep_params());

test/gtest/ucp/test_ucp_peer_failure.cc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -987,7 +987,7 @@ UCS_TEST_P(test_ucp_peer_failure_rndv_put_ppln_abort, rtr_mtype)
987987
}
988988

989989
UCS_TEST_P(test_ucp_peer_failure_rndv_put_ppln_abort, pipeline,
990-
"RNDV_FRAG_SIZE=host:8K")
990+
"RNDV_FRAG_SIZE=host:8K,cuda:8K")
991991
{
992992
rndv_progress_failure_test(rndv_mode::put_ppln, true);
993993
}

test/gtest/uct/test_uct_iface.cc

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -24,11 +24,6 @@ class test_uct_iface : public uct_test {
2424
}
2525

2626
void test_is_reachable();
27-
28-
virtual bool is_self_reachable() const
29-
{
30-
return true;
31-
}
3227
};
3328

3429
void test_uct_iface::test_is_reachable()
@@ -63,7 +58,7 @@ void test_uct_iface::test_is_reachable()
6358
ASSERT_UCS_OK(status);
6459

6560
bool is_reachable = uct_iface_is_reachable_v2(iface, &params);
66-
EXPECT_EQ(is_self_reachable(), is_reachable);
61+
EXPECT_TRUE(is_reachable);
6762

6863
// Allocate corrupted address buffers, make it larger than the correct
6964
// buffer size in case the corrupted data indicates a larger address length
@@ -98,18 +93,4 @@ UCS_TEST_P(test_uct_iface, is_reachable)
9893
}
9994

10095
UCT_INSTANTIATE_TEST_CASE(test_uct_iface)
101-
102-
class test_uct_iface_self_unreachable : public test_uct_iface {
103-
protected:
104-
bool is_self_reachable() const override
105-
{
106-
return false;
107-
}
108-
};
109-
110-
UCS_TEST_P(test_uct_iface_self_unreachable, is_reachable)
111-
{
112-
test_is_reachable();
113-
}
114-
115-
UCT_INSTANTIATE_CUDA_IPC_TEST_CASE(test_uct_iface_self_unreachable)
96+
UCT_INSTANTIATE_CUDA_IPC_TEST_CASE(test_uct_iface)

0 commit comments

Comments
 (0)