Skip to content
This repository was archived by the owner on Mar 20, 2023. It is now read-only.

Commit 7669a53

Browse files
pramodkiomaganaris
andauthored
Bug fixes for gpu execution (#330)
* Bug fixes for gpu execution - init_gpu should be called very early before mechanism registration to make sure all memory is allocated on specific gpu in case of multi-gpu system (e.g. bbcore_read & acc copyin) - nrnran123_normal should be marked as acc routine seq - minor debug message improvement * fix MPI & GPU initialisation Co-authored-by: Ioannis <[email protected]>
1 parent adfc124 commit 7669a53

File tree

6 files changed

+27
-22
lines changed

6 files changed

+27
-22
lines changed

coreneuron/apps/main1.cpp

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -138,7 +138,6 @@ void call_prcellstate_for_prcellgid(int prcellgid, int compute_gpu, int is_init)
138138
void nrn_init_and_load_data(int argc,
139139
char* argv[],
140140
bool is_mapping_needed = false,
141-
bool nrnmpi_under_nrncontrol = true,
142141
bool run_setup_cleanup = true) {
143142
#if defined(NRN_FEEXCEPT)
144143
nrn_feenableexcept();
@@ -147,11 +146,6 @@ void nrn_init_and_load_data(int argc,
147146
/// profiler like tau/vtune : do not measure from begining
148147
Instrumentor::stop_profile();
149148

150-
// mpi initialisation
151-
#if NRNMPI
152-
nrnmpi_init(nrnmpi_under_nrncontrol ? 1 : 0, &argc, &argv);
153-
#endif
154-
155149
// memory footprint after mpi initialisation
156150
report_mem_usage("After MPI_Init");
157151

@@ -427,18 +421,26 @@ using namespace coreneuron;
427421

428422

429423
extern "C" void mk_mech_init(int argc, char** argv) {
430-
#if NRNMPI
431-
nrnmpi_init(1, &argc, &argv);
432-
#endif
433424
// read command line parameters and parameter config files
434-
435425
try {
436426
corenrn_param.parse(argc, argv);
437427
}
438428
catch (...) {
439429
nrn_abort(1);
440430
}
441431

432+
#if NRNMPI
433+
if (corenrn_param.mpi_enable) {
434+
nrnmpi_init(1, &argc, &argv);
435+
}
436+
#endif
437+
438+
#ifdef _OPENACC
439+
if (corenrn_param.gpu) {
440+
init_gpu();
441+
}
442+
#endif
443+
442444
if (!corenrn_param.writeParametersFilepath.empty()) {
443445
std::ofstream out(corenrn_param.writeParametersFilepath, std::ios::trunc);
444446
out << corenrn_param.app.config_to_str(false, false);

coreneuron/gpu/nrn_acc_manager.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,18 @@ extern InterleaveInfo* interleave_info;
2525
void copy_ivoc_vect_to_device(IvocVect*& iv, IvocVect*& div);
2626
void nrn_ion_global_map_copyto_device();
2727
void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay);
28-
void init_gpu(int nthreads, NrnThread* threads);
2928

3029
/* note: threads here are corresponding to global nrn_threads array */
3130
void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
3231
#ifdef _OPENACC
32+
// initialize NrnThreads for gpu execution
33+
// empty thread or only artificial cells should be on cpu
34+
for (int i = 0; i < nthreads; i++) {
35+
NrnThread* nt = threads + i;
36+
nt->compute_gpu = (nt->end > 0) ? 1 : 0;
37+
nt->_dt = dt;
38+
}
3339

34-
init_gpu(nthreads, threads);
3540
nrn_ion_global_map_copyto_device();
3641

3742
#ifdef UNIFIED_MEMORY
@@ -946,7 +951,7 @@ void nrn_ion_global_map_copyto_device() {
946951
}
947952
}
948953

949-
void init_gpu(int nthreads, NrnThread* threads) {
954+
void init_gpu() {
950955
// choose nvidia GPU by default
951956
acc_device_t device_type = acc_device_nvidia;
952957

@@ -969,13 +974,6 @@ void init_gpu(int nthreads, NrnThread* threads) {
969974
if (nrnmpi_myid == 0) {
970975
std::cout << " Info : " << num_devices << " GPUs shared by " << local_size << " ranks per node\n";
971976
}
972-
973-
for (int i = 0; i < nthreads; i++) {
974-
// empty thread or only artificial cells should be on cpu
975-
NrnThread* nt = threads + i;
976-
nt->compute_gpu = (nt->end > 0) ? 1 : 0;
977-
nt->_dt = dt;
978-
}
979977
}
980978

981979
void nrn_VecPlay_copyto_device(NrnThread* nt, void** d_vecplay) {

coreneuron/gpu/nrn_acc_manager.hpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ void update_matrix_to_gpu(NrnThread* _nt);
2020
void update_net_receive_buffer(NrnThread* _nt);
2121
void realloc_net_receive_buffer(NrnThread* nt, Memb_list* ml);
2222
void update_net_send_buffer_on_host(NrnThread* nt, NetSendBuffer_t* nsb);
23+
void init_gpu();
2324

2425
} // namespace coreneuron
2526
#endif // _nrn_device_manager_

coreneuron/io/nrn_setup.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,6 @@ static void setup_ThreadData(NrnThread& nt);
5353
extern void nrn_init_and_load_data(int argc,
5454
char** argv,
5555
bool is_mapping_needed = false,
56-
bool nrnmpi_under_nrncontrol = true,
5756
bool run_setup_cleanup = true);
5857
extern void nrn_setup_cleanup();
5958

coreneuron/network/partrans_setup.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -256,7 +256,9 @@ void nrn_partrans::gap_thread_setup(NrnThread& nt) {
256256
}
257257

258258
void nrn_partrans::gap_indices_permute(NrnThread& nt) {
259-
printf("nrn_partrans::gap_indices_permute\n");
259+
if (nrnmpi_myid == 0) {
260+
printf("nrn_partrans::gap_indices_permute\n");
261+
}
260262
nrn_partrans::TransferThreadData& ttd = transfer_thread_data_[nt.id];
261263
// sources
262264
if (ttd.nsrc > 0 && nt._permute) {

coreneuron/utils/randoms/nrnran123.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,9 @@ extern DEVICE double nrnran123_negexp(nrnran123_State*); /* mean 1.0 */
151151
/* nrnran123_negexp min value is 2.3283064e-10, max is 22.18071 */
152152

153153
/* missing declaration in coreneuron */
154+
#if !defined(DISABLE_OPENACC)
155+
#pragma acc routine seq
156+
#endif
154157
extern DEVICE double nrnran123_normal(nrnran123_State*);
155158

156159
extern DEVICE double nrnran123_gauss(nrnran123_State*); /* mean 0.0, std 1.0 */

0 commit comments

Comments
 (0)