Skip to content

Commit afdf77b

Browse files
authored
Merge pull request #10680 from rakhmets/topic/nvml-wrap
UCT/CUDA: Added wrappers for nvml functions.
2 parents abd477a + 0e1d788 commit afdf77b

File tree

12 files changed

+379
-107
lines changed

12 files changed

+379
-107
lines changed

config/m4/cuda.m4

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,10 +76,10 @@ AS_IF([test "x$cuda_checked" != "xyes"],
7676
[AC_MSG_ERROR([libnvidia-ml not found. Install appropriate nvidia-driver package])])
7777
cuda_happy="no"])])
7878
79-
# Check for nvmlDeviceGetGpuFabricInfo
80-
AC_CHECK_DECLS([nvmlDeviceGetGpuFabricInfo],
79+
# Check for nvmlDeviceGetGpuFabricInfoV
80+
AC_CHECK_DECLS([nvmlDeviceGetGpuFabricInfoV],
8181
[AC_DEFINE([HAVE_NVML_FABRIC_INFO], 1, [Enable NVML GPU fabric info support])],
82-
[AC_MSG_NOTICE([nvmlDeviceGetGpuFabricInfo function not found in libnvidia-ml. MNNVL support will be disabled.])],
82+
[AC_MSG_NOTICE([nvmlDeviceGetGpuFabricInfoV function not found in libnvidia-ml. MNNVL support will be disabled.])],
8383
[[#include <nvml.h>]])
8484
8585

src/ucm/cuda/cudamem.c

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -27,17 +27,17 @@
2727
#define UCM_CUDA_ALLOC_FUNC(_name, _retval, _success, _size, _ptr_type, _ref, \
2828
_args_fmt, ...) \
2929
_retval ucm_##_name(_ptr_type _ref ptr_arg, \
30-
UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
30+
UCS_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
3131
{ \
3232
_ptr_type ptr; \
3333
_retval ret; \
3434
\
3535
ucm_event_enter(); \
36-
ret = ucm_orig_##_name(ptr_arg, UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
36+
ret = ucm_orig_##_name(ptr_arg, UCS_FUNC_PASS_ARGS(__VA_ARGS__)); \
3737
if (ret == (_success)) { \
3838
ptr = _ref ptr_arg; \
3939
ucm_trace("%s(" _args_fmt ") allocated %p", __func__, \
40-
UCM_FUNC_PASS_ARGS(__VA_ARGS__), (void*)ptr); \
40+
UCS_FUNC_PASS_ARGS(__VA_ARGS__), (void*)ptr); \
4141
ucm_cuda_dispatch_mem_alloc((CUdeviceptr)ptr, (_size)); \
4242
} \
4343
ucm_event_leave(); \
@@ -47,16 +47,16 @@
4747
/* Create a body of CUDA memory release replacement function */
4848
#define UCM_CUDA_FREE_FUNC(_name, _mem_type, _retval, _ptr_arg, _size, \
4949
_args_fmt, ...) \
50-
_retval ucm_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
50+
_retval ucm_##_name(UCS_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
5151
{ \
5252
_retval ret; \
5353
\
5454
ucm_event_enter(); \
5555
ucm_trace("%s(" _args_fmt ")", __func__, \
56-
UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
56+
UCS_FUNC_PASS_ARGS(__VA_ARGS__)); \
5757
ucm_cuda_dispatch_mem_free((CUdeviceptr)(_ptr_arg), _size, _mem_type, \
5858
#_name); \
59-
ret = ucm_orig_##_name(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
59+
ret = ucm_orig_##_name(UCS_FUNC_PASS_ARGS(__VA_ARGS__)); \
6060
ucm_event_leave(); \
6161
return ret; \
6262
}

src/ucm/util/replace.h

Lines changed: 13 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010

1111
#include <ucm/bistro/bistro.h>
1212
#include <ucs/datastruct/list.h>
13+
#include <ucs/sys/preprocessor.h>
1314
#include <ucs/type/status.h>
1415
#include <pthread.h>
1516

@@ -30,7 +31,7 @@ extern pthread_t volatile ucm_reloc_get_orig_thread;
3031
#define _UCM_DEFINE_REPLACE_FUNC(_over_name, _ucm_name, _rettype, _fail_val, ...) \
3132
\
3233
/* Define a symbol which goes to the replacement - in case we are loaded first */ \
33-
_rettype _over_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
34+
_rettype _over_name(UCS_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
3435
{ \
3536
_rettype res; \
3637
UCM_BISTRO_PROLOGUE; \
@@ -39,7 +40,7 @@ extern pthread_t volatile ucm_reloc_get_orig_thread;
3940
if (ucs_unlikely(ucm_reloc_get_orig_thread == pthread_self())) { \
4041
return (_rettype)_fail_val; \
4142
} \
42-
res = _ucm_name(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
43+
res = _ucm_name(UCS_FUNC_PASS_ARGS(__VA_ARGS__)); \
4344
UCM_BISTRO_EPILOGUE; \
4445
return res; \
4546
}
@@ -48,11 +49,12 @@ extern pthread_t volatile ucm_reloc_get_orig_thread;
4849
_UCM_DEFINE_DLSYM_FUNC(_name, ucm_orig_##_name, ucm_override_##_name, \
4950
_rettype, __VA_ARGS__)
5051

52+
5153
#define _UCM_DEFINE_DLSYM_FUNC(_name, _orig_name, _over_name, _rettype, ...) \
52-
_rettype _over_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)); \
54+
_rettype _over_name(UCS_FUNC_DEFINE_ARGS(__VA_ARGS__)); \
5355
\
5456
/* Call the original function using dlsym(RTLD_NEXT) */ \
55-
_rettype _orig_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
57+
_rettype _orig_name(UCS_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
5658
{ \
5759
typedef _rettype (*func_ptr_t) (__VA_ARGS__); \
5860
static func_ptr_t orig_func_ptr = NULL; \
@@ -67,7 +69,7 @@ extern pthread_t volatile ucm_reloc_get_orig_thread;
6769
ucm_reloc_get_orig_thread = (pthread_t)-1; \
6870
pthread_mutex_unlock(&ucm_reloc_get_orig_lock); \
6971
} \
70-
return orig_func_ptr(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
72+
return orig_func_ptr(UCS_FUNC_PASS_ARGS(__VA_ARGS__)); \
7173
}
7274

7375
#define UCM_DEFINE_REPLACE_DLSYM_FUNC(_name, _rettype, _fail_val, ...) \
@@ -87,58 +89,33 @@ extern pthread_t volatile ucm_reloc_get_orig_thread;
8789
_UCM_DEFINE_DLSYM_FUNC(_name, ucm_orig_##_name##_dlsym, \
8890
ucm_override_##_name, _rettype, __VA_ARGS__) \
8991
\
90-
_rettype (*ucm_orig_##_name)(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) = \
92+
_rettype (*ucm_orig_##_name)(UCS_FUNC_DEFINE_ARGS(__VA_ARGS__)) = \
9193
ucm_orig_##_name##_dlsym; \
9294
\
9395
_UCM_DEFINE_REPLACE_FUNC(ucm_override_##_name, ucm_##_name, \
9496
_rettype, _fail_val, __VA_ARGS__)
9597

9698
#define UCM_DEFINE_SYSCALL_FUNC(_name, _rettype, _syscall_id, ...) \
9799
/* Call syscall */ \
98-
_rettype ucm_orig_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
100+
_rettype ucm_orig_##_name(UCS_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
99101
{ \
100-
return (_rettype)syscall(_syscall_id, UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
102+
return (_rettype)syscall(_syscall_id, UCS_FUNC_PASS_ARGS(__VA_ARGS__)); \
101103
}
102104

103105
#if UCM_BISTRO_HOOKS
104106
# define UCM_DEFINE_SELECT_FUNC(_name, _rettype, _syscall_id, ...) \
105107
_UCM_DEFINE_DLSYM_FUNC(_name, ucm_orig_##_name##_dlsym, \
106108
ucm_override_##_name, _rettype, __VA_ARGS__) \
107109
UCM_DEFINE_SYSCALL_FUNC(_name##_syscall, _rettype, _syscall_id, __VA_ARGS__) \
108-
_rettype ucm_orig_##_name(UCM_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
110+
_rettype ucm_orig_##_name(UCS_FUNC_DEFINE_ARGS(__VA_ARGS__)) \
109111
{ \
110112
return (ucm_mmap_hook_mode() == UCM_MMAP_HOOK_BISTRO) ? \
111-
ucm_orig_##_name##_syscall(UCM_FUNC_PASS_ARGS(__VA_ARGS__)) : \
112-
ucm_orig_##_name##_dlsym(UCM_FUNC_PASS_ARGS(__VA_ARGS__)); \
113+
ucm_orig_##_name##_syscall(UCS_FUNC_PASS_ARGS(__VA_ARGS__)) : \
114+
ucm_orig_##_name##_dlsym(UCS_FUNC_PASS_ARGS(__VA_ARGS__)); \
113115
}
114116
#else
115117
# define UCM_DEFINE_SELECT_FUNC(_name, _rettype, _syscall_id, ...) \
116118
UCM_DEFINE_DLSYM_FUNC(_name, _rettype, __VA_ARGS__)
117119
#endif
118120

119-
/*
120-
* Define argument list with given types.
121-
*/
122-
#define UCM_FUNC_DEFINE_ARGS(...) \
123-
UCS_PP_FOREACH_SEP(_UCM_FUNC_ARG_DEFINE, _, \
124-
UCS_PP_ZIP((UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))), \
125-
(__VA_ARGS__)))
126-
127-
/*
128-
* Pass auto-generated arguments to a function call.
129-
*/
130-
#define UCM_FUNC_PASS_ARGS(...) \
131-
UCS_PP_FOREACH_SEP(_UCM_FUNC_ARG_PASS, _, UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__)))
132-
133-
134-
/*
135-
* Helpers
136-
*/
137-
#define _UCM_FUNC_ARG_DEFINE(_, _bundle) \
138-
__UCM_FUNC_ARG_DEFINE(_, UCS_PP_TUPLE_0 _bundle, UCS_PP_TUPLE_1 _bundle)
139-
#define __UCM_FUNC_ARG_DEFINE(_, _index, _type) \
140-
_type UCS_PP_TOKENPASTE(arg, _index)
141-
#define _UCM_FUNC_ARG_PASS(_, _index) \
142-
UCS_PP_TOKENPASTE(arg, _index)
143-
144121
#endif

src/ucs/sys/preprocessor.h

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -158,4 +158,31 @@
158158
#define _UCS_PP_SEQ(_n) _UCS_PP_SEQ_##_n
159159
#define UCS_PP_SEQ(_n) _UCS_PP_SEQ(_n)
160160

161+
162+
/*
163+
* Define argument list with given types.
164+
*/
165+
#define UCS_FUNC_DEFINE_ARGS(...) \
166+
UCS_PP_FOREACH_SEP(_UCS_FUNC_ARG_DEFINE, _, \
167+
UCS_PP_ZIP((UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__))), \
168+
(__VA_ARGS__)))
169+
170+
171+
/*
172+
* Pass auto-generated arguments to a function call.
173+
*/
174+
#define UCS_FUNC_PASS_ARGS(...) \
175+
UCS_PP_FOREACH_SEP(_UCS_FUNC_ARG_PASS, _, \
176+
UCS_PP_SEQ(UCS_PP_NUM_ARGS(__VA_ARGS__)))
177+
178+
179+
/*
180+
* Helpers
181+
*/
182+
#define _UCS_FUNC_ARG_DEFINE(_, _bundle) \
183+
__UCS_FUNC_ARG_DEFINE(_, UCS_PP_TUPLE_0 _bundle, UCS_PP_TUPLE_1 _bundle)
184+
#define __UCS_FUNC_ARG_DEFINE(_, _index, _type) \
185+
_type UCS_PP_TOKENPASTE(arg, _index)
186+
#define _UCS_FUNC_ARG_PASS(_, _index) UCS_PP_TOKENPASTE(arg, _index)
187+
161188
#endif

src/uct/cuda/Makefile.am

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ libuct_cuda_la_LIBADD = $(top_builddir)/src/ucs/libucs.la \
1818
noinst_HEADERS = \
1919
base/cuda_md.h \
2020
base/cuda_iface.h \
21+
base/cuda_nvml.h \
2122
cuda_copy/cuda_copy_md.h \
2223
cuda_copy/cuda_copy_iface.h \
2324
cuda_copy/cuda_copy_ep.h \
@@ -30,6 +31,7 @@ noinst_HEADERS = \
3031
libuct_cuda_la_SOURCES = \
3132
base/cuda_iface.c \
3233
base/cuda_md.c \
34+
base/cuda_nvml.c \
3335
cuda_copy/cuda_copy_md.c \
3436
cuda_copy/cuda_copy_iface.c \
3537
cuda_copy/cuda_copy_ep.c \

src/uct/cuda/base/cuda_iface.h

Lines changed: 0 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,11 @@
1313
#include <ucs/datastruct/khash.h>
1414

1515
#include <cuda.h>
16-
#include <nvml.h>
1716

1817

1918
const char *uct_cuda_base_cu_get_error_string(CUresult result);
2019

2120

22-
#define UCT_NVML_FUNC(_func, _log_level) \
23-
({ \
24-
ucs_status_t _status = UCS_OK; \
25-
do { \
26-
nvmlReturn_t _err = (_func); \
27-
if (NVML_SUCCESS != _err) { \
28-
ucs_log((_log_level), "%s failed: %s", \
29-
UCS_PP_MAKE_STRING(_func), \
30-
(NVML_ERROR_DRIVER_NOT_LOADED != _err) ? \
31-
nvmlErrorString(_err) : \
32-
"nvml is a stub library"); \
33-
_status = UCS_ERR_IO_ERROR; \
34-
} \
35-
} while (0); \
36-
_status; \
37-
})
38-
39-
40-
#define UCT_NVML_FUNC_LOG_ERR(_func) \
41-
UCT_NVML_FUNC(_func, UCS_LOG_LEVEL_ERROR)
42-
43-
4421
#define UCT_CUDADRV_LOG(_func, _log_level, _result) \
4522
ucs_log((_log_level), "%s failed: %s", UCS_PP_MAKE_STRING(_func), \
4623
uct_cuda_base_cu_get_error_string(_result))

0 commit comments

Comments
 (0)