Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit abe2e4a

Browse files
committed
Improve GPU device selection in multi-gpu environment
- compute local rank within shared memory node - find our number of gpu nodes and then ping devide to specific node - add debug message showing #GPUs per node shared by #ranks
1 parent a91c619 commit abe2e4a

File tree

3 files changed

+58
-6
lines changed

3 files changed

+58
-6
lines changed

coreneuron/gpu/nrn_acc_manager.cpp

Lines changed: 20 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
#include "coreneuron/sim/scopmath/newton_struct.h"
1212
#include "coreneuron/coreneuron.hpp"
1313
#include "coreneuron/utils/nrnoc_aux.hpp"
14+
#include "coreneuron/mpi/nrnmpi.h"
1415

1516
#ifdef _OPENACC
1617
#include <openacc.h>
@@ -946,14 +947,27 @@ void nrn_ion_global_map_copyto_device() {
946947
}
947948

948949
void init_gpu(int nthreads, NrnThread* threads) {
949-
if (nthreads <= 0) {
950-
printf("\n Warning: No threads to copy on GPU! ");
951-
return;
950+
// choose nvidia GPU by default
951+
acc_device_t device_type = acc_device_nvidia;
952+
953+
// check how many gpu devices available
954+
int num_devices = acc_get_num_devices(device_type);
955+
956+
// if no gpu found, can't run on GPU
957+
if (num_devices == 0) {
958+
nrn_fatal_error("\n ERROR : Enabled GPU execution but couldn't find NVIDIA GPU! \n");
952959
}
953960

954-
/** @todo: currently only checking nvidia gpu */
955-
if (acc_get_num_devices(acc_device_nvidia) == 0) {
956-
printf("\n WARNING: Enabled GPU execution but couldn't find NVIDIA GPU! \n");
961+
// get local rank within a node and assign specific gpu gpu for this node.
962+
// multiple threads within the node will use same device.
963+
int local_rank = nrnmpi_local_rank();
964+
int local_size = nrnmpi_local_size();
965+
966+
int device_num = local_rank % num_devices;
967+
acc_set_device_num(device_num, device_type);
968+
969+
if (nrnmpi_myid == 0) {
970+
std::cout << " Info : " << num_devices << " GPUs shared by " << local_size << " ranks per node\n";
957971
}
958972

959973
for (int i = 0; i < nthreads; i++) {

coreneuron/mpi/nrnmpi.cpp

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,4 +223,40 @@ int nrnmpi_initialized() {
223223
return flag;
224224
}
225225

226+
/**
227+
* Return local mpi rank within a shared memory node
228+
*
229+
* When performing certain operations, we need to know the rank of mpi
230+
* process on a given node. This function uses MPI 3 MPI_Comm_split_type
231+
* function and MPI_COMM_TYPE_SHARED key to find out the local rank.
232+
*/
233+
int nrnmpi_local_rank() {
234+
int local_rank = 0;
235+
#if NRNMPI
236+
MPI_Comm local_comm;
237+
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, nrnmpi_myid_world, MPI_INFO_NULL, &local_comm);
238+
MPI_Comm_rank(local_comm, &local_rank);
239+
MPI_Comm_free(&local_comm);
240+
#endif
241+
return local_rank;
242+
}
243+
244+
/**
245+
* Return number of ranks launched on single shared memory node
246+
*
247+
* We use MPI 3 MPI_Comm_split_type function and MPI_COMM_TYPE_SHARED key to
248+
* determine number of mpi ranks within a shared memory node..
249+
*/
250+
int nrnmpi_local_size() {
251+
int local_size = 1;
252+
#if NRNMPI
253+
MPI_Comm local_comm;
254+
MPI_Comm_split_type(MPI_COMM_WORLD, MPI_COMM_TYPE_SHARED, nrnmpi_myid_world, MPI_INFO_NULL, &local_comm);
255+
MPI_Comm_size(local_comm, &local_size);
256+
MPI_Comm_free(&local_comm);
257+
#endif
258+
return local_size;
259+
}
260+
261+
226262
} // namespace coreneuron

coreneuron/mpi/nrnmpi.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,8 @@ extern int nrnmpi_myid_bbs; /* rank in nrn_bbs_comm of rank 0 of a subworl
4646
extern void nrn_abort(int errcode);
4747
extern void nrn_fatal_error(const char* msg);
4848
extern double nrn_wtime(void);
49+
extern int nrnmpi_local_rank();
50+
extern int nrnmpi_local_size();
4951
} // namespace coreneuron
5052

5153
#if defined(NRNMPI)

0 commit comments

Comments
 (0)