Skip to content

Commit 43c6ff2

Browse files
authored
Feature/nccl dso (#5001)
* "add nccl enforce" * Dev * Update comment * Add nccl test * Follow comments
1 parent fcd74e0 commit 43c6ff2

File tree

12 files changed

+320
-4
lines changed

12 files changed

+320
-4
lines changed

CMakeLists.txt

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ include(external/eigen) # download eigen3
129129
include(external/pybind11) # download pybind11
130130

131131
include(cudnn) # set cudnn libraries, must before configure
132+
include(nccl) # set nccl libraries
132133
include(configure) # add paddle env configuration
133134
include(generic) # simplify cmake module
134135
include(package) # set paddle packages
@@ -159,7 +160,7 @@ set(EXTERNAL_LIBS
159160
if(WITH_GPU)
160161
list(APPEND EXTERNAL_LIBS ${CUDA_LIBRARIES} ${CUDA_rt_LIBRARY})
161162
if(NOT WITH_DSO)
162-
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY})
163+
list(APPEND EXTERNAL_LIBS ${CUDNN_LIBRARY} ${CUDA_CUBLAS_LIBRARIES} ${CUDA_curand_LIBRARY} ${NCCL_LIBRARY})
163164
endif(NOT WITH_DSO)
164165
endif(WITH_GPU)
165166

cmake/configure.cmake

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,12 +62,19 @@ else()
6262
FIND_PACKAGE(CUDA REQUIRED)
6363

6464
if(${CUDA_VERSION_MAJOR} VERSION_LESS 7)
65-
message(FATAL_ERROR "Paddle need CUDA >= 7.0 to compile")
65+
message(FATAL_ERROR "Paddle needs CUDA >= 7.0 to compile")
6666
endif()
6767

6868
if(NOT CUDNN_FOUND)
69-
message(FATAL_ERROR "Paddle need cudnn to compile")
69+
message(FATAL_ERROR "Paddle needs cudnn to compile")
7070
endif()
71+
if (NOT NCCL_INCLUDE_DIR)
72+
message(FATAL_ERROR "Paddle needs nccl header to compile")
73+
endif()
74+
if (NOT WITH_DSO AND NOT NCCL_LIBRARY)
75+
message(FATAL_ERROR "Paddle needs nccl libraries when WITH_DSO=OFF")
76+
endif()
77+
7178

7279
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} "-Xcompiler ${SIMD_FLAG}")
7380

cmake/nccl.cmake

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
if (NOT WITH_GPU)
2+
return ()
3+
endif()
4+
5+
set(NCCL_ROOT "/usr" CACHE PATH "CUDNN ROOT")
6+
find_path(NCCL_INCLUDE_DIR nccl.h PATHS
7+
${NCCL_ROOT} ${NCCL_ROOT}/include
8+
$ENV{NCCL_ROOT} $ENV{NCCL_ROOT}/include ${CUDA_TOOLKIT_INCLUDE}
9+
NO_DEFAULT_PATH)
10+
11+
get_filename_component(__libpath_hist ${CUDA_CUDART_LIBRARY} PATH)
12+
13+
set(TARGET_ARCH "x86_64")
14+
if(NOT ${CMAKE_SYSTEM_PROCESSOR})
15+
set(TARGET_ARCH ${CMAKE_SYSTEM_PROCESSOR})
16+
endif()
17+
18+
list(APPEND NCCL_CHECK_LIBRARY_DIRS
19+
${NCCL_ROOT}
20+
${NCCL_ROOT}/lib64
21+
${NCCL_ROOT}/lib
22+
${NCCL_ROOT}/lib/${TARGET_ARCH}-linux-gnu
23+
$ENV{NCCL_ROOT}
24+
$ENV{NCCL_ROOT}/lib64
25+
$ENV{NCCL_ROOT}/lib
26+
/usr/lib)
27+
find_library(NCCL_LIBRARY NAMES libnccl.so libnccl.dylib # libcudnn_static.a
28+
PATHS ${NCCL_CHECK_LIBRARY_DIRS} ${NCCL_INCLUDE_DIR} ${__libpath_hist}
29+
NO_DEFAULT_PATH
30+
DOC "Path to nccl library.")

paddle/platform/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,3 +25,4 @@ nv_test(device_context_test SRCS device_context_test.cc DEPS device_context gpu_
2525

2626
nv_test(cudnn_helper_test SRCS cudnn_helper_test.cc DEPS dynload_cuda)
2727
nv_test(transform_test SRCS transform_test.cu DEPS paddle_memory place device_context)
28+
nv_test(nccl_test SRCS nccl_test.cu DEPS dynload_cuda gpu_info device_context)
Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
11
cc_library(dynamic_loader SRCS dynamic_loader.cc DEPS glog gflags)
2-
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc DEPS dynamic_loader)
2+
nv_library(dynload_cuda SRCS cublas.cc cudnn.cc curand.cc nccl.cc DEPS dynamic_loader)

paddle/platform/dynload/dynamic_loader.cc

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,11 @@ DEFINE_string(warpctc_dir, "", "Specify path for loading libwarpctc.so.");
3535

3636
DEFINE_string(lapack_dir, "", "Specify path for loading liblapack.so.");
3737

38+
DEFINE_string(nccl_dir, "",
39+
"Specify path for loading nccl library, such as libcublas, "
40+
"libcurand. For instance, /usr/local/cuda/lib64. If default, "
41+
"dlopen will search cuda from LD_LIBRARY_PATH");
42+
3843
namespace paddle {
3944
namespace platform {
4045
namespace dynload {
@@ -157,6 +162,14 @@ void GetLapackDsoHandle(void** dso_handle) {
157162
#endif
158163
}
159164

165+
void GetNCCLDsoHandle(void** dso_handle) {
166+
#if defined(__APPLE__) || defined(__OSX__)
167+
GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.dylib", dso_handle);
168+
#else
169+
GetDsoHandleFromSearchPath(FLAGS_nccl_dir, "libnccl.so", dso_handle);
170+
#endif
171+
}
172+
160173
} // namespace dynload
161174
} // namespace platform
162175
} // namespace paddle

paddle/platform/dynload/dynamic_loader.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,14 @@ void GetWarpCTCDsoHandle(void** dso_handle);
5858
*/
5959
void GetLapackDsoHandle(void** dso_handle);
6060

61+
/**
62+
* @brief load the DSO of NVIDIA nccl
63+
*
64+
* @param **dso_handle dso handler
65+
*
66+
*/
67+
void GetNCCLDsoHandle(void** dso_handle);
68+
6169
} // namespace dynload
6270
} // namespace platform
6371
} // namespace paddle

paddle/platform/dynload/nccl.cc

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#include "paddle/platform/dynload/nccl.h"
16+
17+
namespace paddle {
18+
namespace platform {
19+
namespace dynload {
20+
21+
std::once_flag nccl_dso_flag;
22+
void *nccl_dso_handle;
23+
24+
#define DEFINE_WRAP(__name) DynLoad__##__name __name
25+
26+
NCCL_RAND_ROUTINE_EACH(DEFINE_WRAP);
27+
28+
} // namespace dynload
29+
} // namespace platform
30+
} // namespace paddle

paddle/platform/dynload/nccl.h

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
2+
3+
Licensed under the Apache License, Version 2.0 (the "License");
4+
you may not use this file except in compliance with the License.
5+
You may obtain a copy of the License at
6+
7+
http://www.apache.org/licenses/LICENSE-2.0
8+
9+
Unless required by applicable law or agreed to in writing, software
10+
distributed under the License is distributed on an "AS IS" BASIS,
11+
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
See the License for the specific language governing permissions and
13+
limitations under the License. */
14+
15+
#pragma once
16+
17+
#include <dlfcn.h>
18+
#include <nccl.h>
19+
#include <mutex>
20+
#include "paddle/platform/dynload/dynamic_loader.h"
21+
22+
namespace paddle {
23+
namespace platform {
24+
namespace dynload {
25+
26+
extern std::once_flag nccl_dso_flag;
27+
extern void* nccl_dso_handle;
28+
29+
#ifdef PADDLE_USE_DSO
30+
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
31+
struct DynLoad__##__name { \
32+
template <typename... Args> \
33+
auto operator()(Args... args) -> decltype(__name(args...)) { \
34+
using nccl_func = decltype(__name(args...)) (*)(Args...); \
35+
std::call_once(nccl_dso_flag, \
36+
paddle::platform::dynload::GetNCCLDsoHandle, \
37+
&nccl_dso_handle); \
38+
void* p_##__name = dlsym(nccl_dso_handle, #__name); \
39+
return reinterpret_cast<nccl_func>(p_##__name)(args...); \
40+
} \
41+
}; \
42+
extern DynLoad__##__name __name
43+
#else
44+
#define DECLARE_DYNAMIC_LOAD_NCCL_WRAP(__name) \
45+
struct DynLoad__##__name { \
46+
template <typename... Args> \
47+
ncclResult_t operator()(Args... args) { \
48+
return __name(args...); \
49+
} \
50+
}; \
51+
extern DynLoad__##__name __name
52+
#endif
53+
54+
#define NCCL_RAND_ROUTINE_EACH(__macro) \
55+
__macro(ncclCommInitAll); \
56+
__macro(ncclGetUniqueId); \
57+
__macro(ncclCommInitRank); \
58+
__macro(ncclCommDestroy); \
59+
__macro(ncclCommCount); \
60+
__macro(ncclCommCuDevice); \
61+
__macro(ncclCommUserRank); \
62+
__macro(ncclAllReduce); \
63+
__macro(ncclBcast); \
64+
__macro(ncclAllGather); \
65+
__macro(ncclReduce); \
66+
__macro(ncclGetErrorString);
67+
68+
NCCL_RAND_ROUTINE_EACH(DECLARE_DYNAMIC_LOAD_NCCL_WRAP)
69+
70+
} // namespace dynload
71+
} // namespace platform
72+
} // namespace paddle

paddle/platform/enforce.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,11 +29,14 @@ limitations under the License. */
2929
#include <cxxabi.h> // for __cxa_demangle
3030
#endif
3131

32+
#include <glog/logging.h>
33+
3234
#ifdef PADDLE_WITH_CUDA
3335

3436
#include "paddle/platform/dynload/cublas.h"
3537
#include "paddle/platform/dynload/cudnn.h"
3638
#include "paddle/platform/dynload/curand.h"
39+
#include "paddle/platform/dynload/nccl.h"
3740

3841
#include <cublas_v2.h>
3942
#include <cudnn.h>
@@ -172,6 +175,17 @@ inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
172175
throw std::runtime_error(err + string::Sprintf(args...));
173176
}
174177

178+
template <typename... Args>
179+
inline typename std::enable_if<sizeof...(Args) != 0, void>::type throw_on_error(
180+
ncclResult_t stat, const Args&... args) {
181+
if (stat == ncclSuccess) {
182+
return;
183+
} else {
184+
throw std::runtime_error(platform::dynload::ncclGetErrorString(stat) +
185+
string::Sprintf(args...));
186+
}
187+
}
188+
175189
#endif // PADDLE_ONLY_CPU
176190

177191
template <typename T>

0 commit comments

Comments
 (0)