Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit 9b6d29c

Browse files
Omar Awileolupton
andauthored
OpenMP target offloading for random123 (#736)
* A clean branch based on master and implementing OpenMP offloading for random123, while keeping the CUDA/OpenACC versions intact. * Make sure g_k_dev is also visible to host * Fix cmake formatting & adjust CMake logic. * Also remove device annotation for host-only global and suppress an unused variable warning when assertions are disabled. Co-authored-by: Olli Lupton <[email protected]>
1 parent 09c0144 commit 9b6d29c

File tree

2 files changed

+38
-4
lines changed

2 files changed

+38
-4
lines changed

coreneuron/CMakeLists.txt

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -129,8 +129,18 @@ if(CORENRN_ENABLE_GPU)
129129
# CMake <v3.20 does not pass explicit -x <lang> options based on the LANGUAGE property
130130
# (https://cmake.org/cmake/help/latest/policy/CMP0119.html), so using a single .cu file and
131131
# setting LANGUAGE=CXX in non-GPU builds does not work.
132-
list(REMOVE_ITEM CORENEURON_CODE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cpp")
133132
list(APPEND CORENEURON_CODE_FILES ${CORENEURON_CUDA_FILES})
133+
if(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenMP")
134+
list(REMOVE_ITEM CORENEURON_CODE_FILES "${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cu")
135+
elseif(CORENRN_ACCELERATOR_OFFLOAD STREQUAL "OpenACC")
136+
list(REMOVE_ITEM CORENEURON_CODE_FILES
137+
"${CMAKE_CURRENT_SOURCE_DIR}/utils/randoms/nrnran123.cpp")
138+
else()
139+
message(
140+
FATAL_ERROR
141+
"Don't know how to build Random123 compatibility layer for GPU with ${CORENRN_ACCELERATOR_OFFLOAD} offload."
142+
)
143+
endif()
134144

135145
# Eigen-3.5+ provides better GPU support. However, some functions cannot be called directly from
136146
# within an OpenACC region. Therefore, we need to wrap them in a special API (decorate them with

coreneuron/utils/randoms/nrnran123.cu

Lines changed: 27 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -76,23 +76,31 @@ using random123_allocator = coreneuron::unified_allocator<coreneuron::nrnran123_
7676
* shutdown. If the destructor calls cudaFree and the CUDA runtime has already
7777
* been shut down then tools like cuda-memcheck reports errors.
7878
*/
79+
#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
80+
nrn_pragma_omp(declare target)
81+
philox4x32_key_t g_k{};
82+
nrn_pragma_omp(end declare target)
83+
#else
7984
philox4x32_key_t* g_k{};
85+
#endif
8086

8187
// In a GPU build we need a device-side global pointer to this global state.
8288
// This is set to the same unified memory address as `g_k` in
8389
// `setup_global_state()` if the GPU is enabled. It would be cleaner to use
8490
// __managed__ here, but unfortunately that does not work on machines that do
8591
// not have a GPU.
86-
#ifdef __CUDACC__
92+
#if defined(__CUDACC__) && !defined(CORENEURON_PREFER_OPENMP_OFFLOAD)
8793
CORENRN_DEVICE philox4x32_key_t* g_k_dev;
8894
#endif
8995

9096
OMP_Mutex g_instance_count_mutex;
97+
9198
std::size_t g_instance_count{};
9299

93100
constexpr double SHIFT32 = 1.0 / 4294967297.0; /* 1/(2^32 + 1) */
94101

95102
void setup_global_state() {
103+
#if !defined(CORENEURON_PREFER_OPENMP_OFFLOAD)
96104
if (g_k) {
97105
// Already initialised, nothing to do
98106
return;
@@ -107,15 +115,18 @@ void setup_global_state() {
107115
// there is no point initialising the device global to it.
108116
{
109117
auto const code = cudaMemcpyToSymbol(g_k_dev, &g_k, sizeof(g_k));
118+
static_cast<void>(code);
110119
assert(code == cudaSuccess);
111120
}
112121
// Make sure g_k_dev is updated.
113122
{
114123
auto const code = cudaDeviceSynchronize();
124+
static_cast<void>(code);
115125
assert(code == cudaSuccess);
116126
}
117127
}
118128
#endif
129+
#endif
119130
}
120131

121132
/** @brief Get the Random123 global state from either host or device code.
@@ -125,13 +136,26 @@ CORENRN_HOST_DEVICE philox4x32_key_t& get_global_state() {
125136
#ifdef __CUDA_ARCH__
126137
// Called from device code
127138
ret = g_k_dev;
139+
#else
140+
#if defined(CORENEURON_ENABLE_GPU) && defined(CORENEURON_PREFER_OPENMP_OFFLOAD) && defined(_OPENMP)
141+
ret = &g_k;
128142
#else
129143
// Called from host code
130144
ret = g_k;
145+
#endif
131146
#endif
132147
assert(ret);
133148
return *ret;
134149
}
150+
151+
nrn_pragma_omp(declare target)
152+
/** @brief Provide a helper function in global namespace that is declared target for OpenMP
153+
* offloading to function correctly with NVHPC
154+
*/
155+
CORENRN_HOST_DEVICE philox4x32_ctr_t philox4x32_helper(coreneuron::nrnran123_State* s) {
156+
return philox4x32(s->c, get_global_state());
157+
}
158+
nrn_pragma_omp(end declare target)
135159
} // namespace
136160

137161
namespace coreneuron {
@@ -157,7 +181,7 @@ CORENRN_HOST_DEVICE void nrnran123_setseq(nrnran123_State* s, uint32_t seq, char
157181
s->which_ = which;
158182
}
159183
s->c.v[0] = seq;
160-
s->r = philox4x32(s->c, get_global_state());
184+
s->r = philox4x32_helper(s);
161185
}
162186

163187
CORENRN_HOST_DEVICE void nrnran123_getids(nrnran123_State* s, uint32_t* id1, uint32_t* id2) {
@@ -181,7 +205,7 @@ CORENRN_HOST_DEVICE uint32_t nrnran123_ipick(nrnran123_State* s) {
181205
if (which > 3) {
182206
which = 0;
183207
s->c.v[0]++;
184-
s->r = philox4x32(s->c, get_global_state());
208+
s->r = philox4x32_helper(s);
185209
}
186210
s->which_ = which;
187211
return rval;

0 commit comments

Comments
 (0)