Skip to content

Commit 67b5b78

Browse files
committed
UCT/ROCM: initial commit for device initiated ipc put
1 parent 9c82940 commit 67b5b78

File tree

17 files changed

+1064
-47
lines changed

17 files changed

+1064
-47
lines changed

config/hip.am

Lines changed: 39 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,39 @@
1+
#
2+
# Copyright (c) Advanced Micro Devices, Inc. 2026. ALL RIGHTS RESERVED.
3+
# See file LICENSE for terms.
4+
#
5+
6+
SUFFIXES = .hip
7+
8+
HIPCC ?= hipcc
9+
10+
HIPCC_CMD = $(HIPCC) -DHAVE_CONFIG_H -DUCT_DEVICE_CODE_HIP -fPIE -I$(top_srcdir)/src -I$(top_builddir)/src $(BASE_CXXFLAGS) $(CXXFLAGS) $(HIP_CPPFLAGS) $(HIP_CXXFLAGS) $(HIPCC_EXTRA_FLAGS) -Wno-c++20-extensions -c $< -MT $@ -MF $(DEPDIR)/hip/$@.d -MMD -o $@
11+
HIPCC_LT_CMD = $(LIBTOOL) --tag=CXX --mode=compile $(HIPCC_CMD)
12+
13+
define hipcc-build
14+
@$(MKDIR_P) $(shell dirname $(DEPDIR)/hip/$@)
15+
@$(if $(filter false,$(AM_V_P)),echo " HIPCC $@")
16+
@$(if $(filter .o,$(suffix $@)),$(HIPCC_CMD),$(HIPCC_LT_CMD)) $($(1)) $(if $(filter false,$(AM_V_P)), >/dev/null)
17+
endef
18+
19+
define hipcc-source
20+
EXTRA_DIST += $(2)
21+
$(1): $(2)
22+
$$(call hipcc-build,$(3))
23+
endef
24+
25+
# Default rules when no target-specific compile flags are required
26+
.hip.o:
27+
$(call hipcc-build)
28+
29+
.hip.lo:
30+
$(call hipcc-build)
31+
32+
HIP_DEP_FILES := $(shell find $(DEPDIR)/hip/ -type f -name *.d 2>/dev/null)
33+
-include $(HIP_DEP_FILES)
34+
35+
clean-local:
36+
-rm -rf $(DEPDIR)/hip
37+
38+
distclean-local:
39+
-rm -rf $(DEPDIR)/hip

src/ucp/core/ucp_device.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/**
22
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2025. ALL RIGHTS RESERVED.
3+
* Copyright (C) Advanced Micro Devices, Inc. 2026. ALL RIGHTS RESERVED.
34
*
45
* See file LICENSE for terms.
56
*/
@@ -434,7 +435,11 @@ ucs_status_t
434435
ucp_device_local_mem_list_create(const ucp_device_mem_list_params_t *params,
435436
ucp_device_local_mem_list_h *mem_list_h)
436437
{
438+
#if HAVE_ROCM
439+
const ucs_memory_type_t export_mem_type = UCS_MEMORY_TYPE_ROCM;
440+
#else
437441
const ucs_memory_type_t export_mem_type = UCS_MEMORY_TYPE_CUDA;
442+
#endif
438443
ucs_status_t status;
439444
uct_allocated_memory_t mem;
440445
ucs_sys_device_t local_sys_dev;
@@ -682,7 +687,11 @@ ucs_status_t
682687
ucp_device_remote_mem_list_create(const ucp_device_mem_list_params_t *params,
683688
ucp_device_remote_mem_list_h *mem_list_h)
684689
{
690+
#if HAVE_ROCM
691+
const ucs_memory_type_t export_mem_type = UCS_MEMORY_TYPE_ROCM;
692+
#else
685693
const ucs_memory_type_t export_mem_type = UCS_MEMORY_TYPE_CUDA;
694+
#endif
686695
ucs_status_t status;
687696
uct_allocated_memory_t mem;
688697

src/ucp/wireup/select.c

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/**
22
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2016. ALL RIGHTS RESERVED.
33
* Copyright (C) Los Alamos National Security, LLC. 2019 ALL RIGHTS RESERVED.
4+
* Copyright (C) Advanced Micro Devices, Inc. 2026. ALL RIGHTS RESERVED.
45
*
56
* See file LICENSE for terms.
67
*/
@@ -2504,6 +2505,14 @@ ucp_wireup_add_device_lanes(const ucp_wireup_select_params_t *select_params,
25042505
found_lane = ucp_wireup_add_bw_lanes(select_params, &bw_info,
25052506
mem_type_tl_bitmap, UCP_NULL_LANE,
25062507
select_ctx, 0);
2508+
2509+
/* Add device lanes for ROCm memory */
2510+
ucp_wireup_memaccess_bitmap(context, UCS_MEMORY_TYPE_ROCM,
2511+
&mem_type_tl_bitmap);
2512+
found_lane |= ucp_wireup_add_bw_lanes(select_params, &bw_info,
2513+
mem_type_tl_bitmap, UCP_NULL_LANE,
2514+
select_ctx, 0);
2515+
25072516
if (!found_lane) {
25082517
ucs_debug("ep %p: could not find device lanes", select_params->ep);
25092518
}

src/ucs/sys/device_code.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/**
22
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2025. ALL RIGHTS RESERVED.
3+
* Copyright (C) Advanced Micro Devices, Inc. 2026. ALL RIGHTS RESERVED.
34
*
45
* See file LICENSE for terms.
56
*/
@@ -13,11 +14,11 @@
1314
/*
1415
* Declare GPU specific functions
1516
*/
16-
#ifdef __NVCC__
17+
#if defined(__NVCC__) || defined(__HIPCC__)
1718
#define UCS_F_DEVICE __device__ __forceinline__ static
1819
#else
1920
#define UCS_F_DEVICE static inline
20-
#endif /* __NVCC__ */
21+
#endif /* __NVCC__ || __HIPCC__ */
2122

2223

2324
#ifndef UCP_DEVICE_ENABLE_PARAMS_CHECK

src/uct/api/device/uct_device_impl.h

Lines changed: 40 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
/**
22
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2025. ALL RIGHTS RESERVED.
3+
* Copyright (C) Advanced Micro Devices, Inc. 2026. ALL RIGHTS RESERVED.
34
*
45
* See file LICENSE for terms.
56
*/
@@ -10,10 +11,14 @@
1011
#include "uct_device_types.h"
1112

1213
#include <uct/api/uct_def.h>
14+
#if HAVE_ROCM
15+
#include <uct/rocm/ipc/rocm_ipc.h>
16+
#else
1317
#include <uct/cuda/cuda_ipc/cuda_ipc.cuh>
18+
#endif
1419
#include <ucs/sys/device_code.h>
1520

16-
#if __has_include(<uct/ib/mlx5/gdaki/gdaki.cuh>) && __has_include(<infiniband/mlx5dv.h>)
21+
#if defined(__NVCC__) && __has_include(<uct/ib/mlx5/gdaki/gdaki.cuh>) && __has_include(<infiniband/mlx5dv.h>)
1722
# include <uct/ib/mlx5/gdaki/gdaki.cuh>
1823
# define UCT_RC_MLX5_GDA_SUPPORTED 1
1924
#else
@@ -24,7 +29,11 @@ union uct_device_completion {
2429
#if UCT_RC_MLX5_GDA_SUPPORTED
2530
uct_rc_gda_completion_t rc_gda;
2631
#endif
32+
#if HAVE_ROCM
33+
uct_rocm_ipc_completion_t rocm_ipc;
34+
#else
2735
uct_cuda_ipc_completion_t cuda_ipc;
36+
#endif
2837
};
2938

3039

@@ -73,12 +82,20 @@ uct_device_ep_put(uct_device_ep_h device_ep,
7382
channel_id, flags, comp);
7483
} else
7584
#endif
85+
#if HAVE_ROCM
86+
if (device_ep->uct_tl_id == UCT_DEVICE_TL_ROCM_IPC) {
87+
return uct_rocm_ipc_ep_put<level>(device_ep, mem_elem, address,
88+
remote_address, length, flags, comp);
89+
} else
90+
#else
7691
if (device_ep->uct_tl_id == UCT_DEVICE_TL_CUDA_IPC) {
7792
return uct_cuda_ipc_ep_put<level>(device_ep, mem_elem, address,
7893
remote_address, length, flags, comp);
94+
} else
95+
#endif
96+
{
97+
return UCS_ERR_UNSUPPORTED;
7998
}
80-
81-
return UCS_ERR_UNSUPPORTED;
8299
}
83100

84101

@@ -122,12 +139,20 @@ UCS_F_DEVICE ucs_status_t uct_device_ep_atomic_add(
122139
channel_id, flags, comp);
123140
} else
124141
#endif
142+
#if HAVE_ROCM
143+
if (device_ep->uct_tl_id == UCT_DEVICE_TL_ROCM_IPC) {
144+
return uct_rocm_ipc_ep_atomic_add<level>(device_ep, mem_elem, inc_value,
145+
remote_address, flags, comp);
146+
} else
147+
#else
125148
if (device_ep->uct_tl_id == UCT_DEVICE_TL_CUDA_IPC) {
126149
return uct_cuda_ipc_ep_atomic_add<level>(device_ep, mem_elem, inc_value,
127150
remote_address, flags, comp);
151+
} else
152+
#endif
153+
{
154+
return UCS_ERR_UNSUPPORTED;
128155
}
129-
130-
return UCS_ERR_UNSUPPORTED;
131156
}
132157

133158

@@ -149,14 +174,20 @@ UCS_F_DEVICE ucs_status_t uct_device_ep_get_ptr(
149174
uct_device_ep_h device_ep, const uct_device_mem_element_t *mem_elem,
150175
uint64_t address, void **addr_p)
151176
{
152-
if (device_ep->uct_tl_id != UCT_DEVICE_TL_CUDA_IPC) {
177+
#if HAVE_ROCM
178+
if (device_ep->uct_tl_id == UCT_DEVICE_TL_ROCM_IPC) {
179+
return uct_rocm_ipc_ep_get_ptr(device_ep, mem_elem, address, addr_p);
180+
} else
181+
#else
182+
if (device_ep->uct_tl_id == UCT_DEVICE_TL_CUDA_IPC) {
183+
return uct_cuda_ipc_ep_get_ptr(device_ep, mem_elem, address, addr_p);
184+
} else
185+
#endif
186+
{
153187
return UCS_ERR_UNSUPPORTED;
154188
}
155-
156-
return uct_cuda_ipc_ep_get_ptr(device_ep, mem_elem, address, addr_p);
157189
}
158190

159-
160191
/**
161192
* @ingroup UCT_DEVICE
162193
* @brief Progress all operations on device endpoint @a device_ep.

src/uct/api/device/uct_device_types.h

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
/**
22
* Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2025. ALL RIGHTS RESERVED.
3-
*
3+
* Copyright (C) Advanced Micro Devices, Inc. 2026. ALL RIGHTS RESERVED.
4+
*
45
* See file LICENSE for terms.
56
*/
67

@@ -36,6 +37,21 @@ typedef struct {
3637
} uct_cuda_ipc_completion_t;
3738

3839

40+
/**
41+
* @brief Device memory element for ROCm IPC.
42+
*/
43+
typedef struct {
44+
ptrdiff_t mapped_offset;
45+
} uct_rocm_ipc_device_mem_element_t;
46+
47+
48+
/**
49+
* @brief Completion object for device ROCm IPC operations.
50+
*/
51+
typedef struct {
52+
} uct_rocm_ipc_completion_t;
53+
54+
3955
/**
4056
* @brief Device memory element for GDAKI.
4157
*/
@@ -57,6 +73,7 @@ typedef enum {
5773
typedef enum {
5874
UCT_DEVICE_TL_RC_MLX5_GDA,
5975
UCT_DEVICE_TL_CUDA_IPC,
76+
UCT_DEVICE_TL_ROCM_IPC,
6077
UCT_DEVICE_TL_LAST
6178
} uct_device_tl_id_t;
6279

@@ -75,6 +92,7 @@ typedef union uct_device_completion uct_device_completion_t;
7592
union uct_device_mem_element {
7693
uct_ib_md_device_mem_element_t ib_md_mem_element;
7794
uct_cuda_ipc_md_device_mem_element_t cuda_ipc_md_mem_element;
95+
uct_rocm_ipc_device_mem_element_t rocm_ipc_mem_element;
7896
};
7997

8098

src/uct/rocm/Makefile.am

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
#
22
# Copyright (c) NVIDIA CORPORATION & AFFILIATES, 2001-2018. ALL RIGHTS RESERVED.
3+
# Copyright (C) Advanced Micro Devices, Inc. 2026. ALL RIGHTS RESERVED.
34
# See file LICENSE for terms.
45
#
56

@@ -14,6 +15,10 @@ libuct_rocm_la_LDFLAGS = $(ROCM_LDFLAGS) $(ROCM_LIBS) -version-info $(SOVERSION
1415
$(patsubst %, -Xlinker %, -L$(ROCM_ROOT)/lib -rpath $(ROCM_ROOT)/hip/lib -rpath $(ROCM_ROOT)/lib) \
1516
$(patsubst %, -Xlinker %, --enable-new-dtags) \
1617
$(patsubst %, -Xlinker %, -rpath $(ROCM_ROOT)/lib64)
18+
libuct_rocm_ladir = $(includedir)/uct/rocm
19+
20+
nobase_dist_libuct_rocm_la_HEADERS = \
21+
ipc/rocm_ipc.h
1722

1823
noinst_HEADERS = \
1924
base/rocm_base.h \

src/uct/rocm/copy/rocm_copy_md.c

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
/*
2-
* Copyright (C) Advanced Micro Devices, Inc. 2019-2023. ALL RIGHTS RESERVED.
2+
* Copyright (C) Advanced Micro Devices, Inc. 2019-2026. ALL RIGHTS RESERVED.
33
* See file LICENSE for terms.
44
*/
55

@@ -57,7 +57,8 @@ uct_rocm_copy_md_query(uct_md_h uct_md, uct_md_attr_v2_t *md_attr)
5757
md_attr->cache_mem_types = UCS_BIT(UCS_MEMORY_TYPE_HOST) |
5858
UCS_BIT(UCS_MEMORY_TYPE_ROCM);
5959
md_attr->alloc_mem_types = UCS_BIT(UCS_MEMORY_TYPE_ROCM);
60-
md_attr->access_mem_types = UCS_BIT(UCS_MEMORY_TYPE_ROCM);
60+
md_attr->access_mem_types = UCS_BIT(UCS_MEMORY_TYPE_HOST) |
61+
UCS_BIT(UCS_MEMORY_TYPE_ROCM);
6162
md_attr->detect_mem_types = UCS_BIT(UCS_MEMORY_TYPE_ROCM);
6263
if (md->have_dmabuf) {
6364
md_attr->dmabuf_mem_types |= UCS_BIT(UCS_MEMORY_TYPE_ROCM);

0 commit comments

Comments
 (0)