BlueBrain
diff --git a/‎README.md‎
Lines changed: 36 additions & 18 deletions b/‎README.md‎
Lines changed: 36 additions & 18 deletions
diff --git a/‎coreneuron/apps/main1.cpp‎
Lines changed: 3 additions & 0 deletions b/‎coreneuron/apps/main1.cpp‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎coreneuron/io/nrn2core_direct.h‎
Lines changed: 0 additions & 9 deletions b/‎coreneuron/io/nrn2core_direct.h‎
Lines changed: 0 additions & 9 deletions
diff --git a/‎coreneuron/io/nrn_setup.cpp‎
Lines changed: 64 additions & 44 deletions b/‎coreneuron/io/nrn_setup.cpp‎
Lines changed: 64 additions & 44 deletions
diff --git a/‎coreneuron/io/phase2.cpp‎
Lines changed: 0 additions & 9 deletions b/‎coreneuron/io/phase2.cpp‎
Lines changed: 0 additions & 9 deletions
@@ -45,28 +45,40 @@ CoreNEURON is now integrated into the development version of the NEURON simulato
 
 	HPC systems often use a module system to select software. For example, you can load the compiler, cmake, and python dependencies using module as follows:
 
+  ```
+  module load intel intel-mpi python cmake
+  ```
 
-	```
-	module load intel intel-mpi python cmake
-	```
 Note that if you are building on Cray system with the GNU toolchain, you have to set following environment variable:
 
-	```bash
-	export CRAYPE_LINK_TYPE=dynamic
-	```
+  ```
+  export CRAYPE_LINK_TYPE=dynamic
+  ```
 
 3. Run CMake with the appropriate [options](https://github.com/neuronsimulator/nrn#build-using-cmake) and additionally enable CoreNEURON with `-DNRN_ENABLE_CORENEURON=ON` option:
 
-  ```bash
+  ```
+  cmake .. \
+   -DNRN_ENABLE_CORENEURON=ON \
+   -DNRN_ENABLE_INTERVIEWS=OFF \
+   -DNRN_ENABLE_RX3D=OFF \
+   -DCMAKE_INSTALL_PREFIX=$HOME/install
+  ```
+
+4. If you would like to enable GPU support with OpenACC, make sure to use `-DCORENRN_ENABLE_GPU=ON` option and use the PGI/NVIDIA HPC SDK compilers with CUDA. For example,
+
+  ```
   cmake .. \
    -DNRN_ENABLE_CORENEURON=ON \
+   -DCORENRN_ENABLE_GPU=ON
    -DNRN_ENABLE_INTERVIEWS=OFF \
    -DNRN_ENABLE_RX3D=OFF \
    -DCMAKE_INSTALL_PREFIX=$HOME/install
+   -DCMAKE_C_COMPILER=nvc \
+   -DCMAKE_CXX_COMPILER=nvc++
   ```
-If you would like to enable GPU support with OpenACC, make sure to use `-DCORENRN_ENABLE_GPU=ON` option and use the PGI/NVIDIA HPC SDK compilers with CUDA.
 
-> NOTE : if the CMake command files, please make sure to delete temporary CMake cache files (`CMakeCache.txt`) before rerunning CMake.
+NOTE : If the CMake command fails, please make sure to delete temporary CMake cache files (`CMakeCache.txt`) before rerunning CMake.
 
 4. Build and Install :  once the configure step is done, you can build and install the project as:
 
@@ -89,7 +101,7 @@ As in a typical NEURON workflow, you can use `nrnivmodl` to translate MOD files:
 nrnivmodl mod_directory
 ```
 
-In order to enable CoreNEURON support, you must set the  `-coreneuron` flag:
+In order to enable CoreNEURON support, you must set the  `-coreneuron` flag. Make sure to necessary modules (compilers, CUDA, MPI etc) are loaded before using nrnivmodl:
 
 ```
 nrnivmodl -coreneuron mod_directory
@@ -109,7 +121,12 @@ With CoreNEURON, existing NEURON models can be run with minimal changes. For a g
 	from neuron import coreneuron
 	coreneuron.enable = True
 	```
-3. Use `psolve` to run simulation after initialization :
+3. If GPU support is enabled during build, enable GPU execution using :
+	```
+	coreneuron.gpu = True
+    ```
+
+4. Use `psolve` to run simulation after initialization :
 
 	```
 	h.stdinit()
@@ -162,8 +179,10 @@ nrn_spike_gids = nrn_spike_gids.to_python()
 # now run CoreNEURON
 from neuron import coreneuron
 coreneuron.enable = True
+
 # for GPU support
 # coreneuron.gpu = True
+
 coreneuron.verbose = 0
 h.stdinit()
 corenrn_all_spike_t = h.Vector()
@@ -210,11 +229,10 @@ By default, OpenMP threading is enabled. You can disable it with `-DCORENRN_ENAB
 
 #### GPU enabled build is failing with inlining related errors, what to do?
 
-If there are large functions / procedures in the MOD file that are not inlined by the compiler, you may need to pass additional C++ flags to PGI compiler. You can try:
+If there are large functions / procedures in the MOD file that are not inlined by the compiler, you may need to pass additional C++ flags to PGI compiler. You can try following CXX flags:
 
 ```
-cmake .. -DCMAKE_CXX_FLAGS="-O2 -Minline=size:1000,levels:100,totalsize:40000,maxsize:4000" \
-         -DCORENRN_ENABLE_GPU=ON -DCMAKE_INSTALL_PREFIX=$HOME/install
+-DCMAKE_CXX_FLAGS="-O2 -Minline=size:1000,levels:100,totalsize:40000,maxsize:4000"
 ```
 
 For other errors, please [open an issue](https://github.com/BlueBrain/CoreNeuron/issues).
@@ -261,8 +279,8 @@ CoreNEURON has support for GPUs using the OpenACC programming model when enabled
 
 ```bash
 module purge all
-module load pgi/19.4 cuda/10 cmake intel-mpi # change pgi, cuda and mpi modules
-cmake .. -DCORENRN_ENABLE_GPU=ON -DCMAKE_INSTALL_PREFIX=$HOME/install
+module load nvidia-hpc-sdk/20.11 cuda/11 cmake openmpi # change pgi, cuda and mpi modules
+cmake .. -DCORENRN_ENABLE_GPU=ON -DCMAKE_INSTALL_PREFIX=$HOME/install -DCMAKE_C_COMPILER=nvc -DCMAKE_CXX_COMPILER=nvc++
 make -j && make install
 ```
 
@@ -278,7 +296,7 @@ You have to run GPU executable with the `--gpu` flag. Make sure to enable cell r
 mpirun -n 1 ./bin/nrniv-core --mpi --gpu --tstop 100 --datpath ../tests/integration/ring --cell-permute 2
 ```
 
-> Note: If your model is using Random123 random number generator, you cannot use the same executable for CPU and GPU runs. We suggest to build separate executables for CPU and GPU simulations. This will be fixed in future releases.
+Note: If your model is using Random123 random number generator, you cannot use the same executable for CPU and GPU runs. We suggest to install separate NEURON with CoreNEURON for CPU and GPU simulations. This will be fixed in future releases.
 
 
 ##### Running tests with SLURM
@@ -339,4 +357,4 @@ You can see current [contributors here](https://github.com/BlueBrain/CoreNeuron/
 
 ## Funding
 
-CoreNEURON is developed in a joint collaboration between the Blue Brain Project and Yale University. This work has been funded by the EPFL Blue Brain Project (funded by the Swiss ETH board), NIH grant number R01NS11613 (Yale University), the European Union Seventh Framework Program (FP7/20072013) under grant agreement n◦ 604102 (HBP) and the Eu- ropean Union’s Horizon 2020 Framework Programme for Research and Innovation under Grant Agreement n◦ 720270 (Human Brain Project SGA1) and Grant Agreement n◦ 785907 (Human Brain Project SGA2).
+CoreNEURON is developed in a joint collaboration between the Blue Brain Project and Yale University. This work has been funded by the EPFL Blue Brain Project (funded by the Swiss ETH board), NIH grant number R01NS11613 (Yale University), the European Union Seventh Framework Program (FP7/20072013) under grant agreement n◦ 604102 (HBP) and the European Union’s Horizon 2020 Framework Programme for Research and Innovation under Grant Agreement n◦ 720270 (Human Brain Project SGA1) and Grant Agreement n◦ 785907 (Human Brain Project SGA2).
@@ -577,6 +577,9 @@ extern "C" int run_solve_core(int argc, char** argv) {
         Instrumentor::phase_end("simulation");
         Instrumentor::stop_profile();
 
+        // update cpu copy of NrnThread from GPU
+        update_nrnthreads_on_host(nrn_threads, nrn_nthread);
+
         // direct mode and full trajectory gathering on CoreNEURON, send back.
         if (corenrn_embedded) {
             trajectory_return();
 
@@ -17,15 +17,6 @@ extern void (*nrn2core_mkmech_info_)(std::ostream&);
 extern void* (*nrn2core_get_global_dbl_item_)(void*, const char*& name, int& size, double*& val);
 extern int (*nrn2core_get_global_int_item_)(const char* name);
 
-extern void (*nrn2core_get_partrans_setup_info_)(int tid,
-                                                 int& ntar,
-                                                 int& nsrc,
-                                                 int& type,
-                                                 int& ix_vpre,
-                                                 int*& sid_target,
-                                                 int*& sid_src,
-                                                 int*& v_indices);
-
 extern int (*nrn2core_get_dat1_)(int tid,
                                  int& n_presyn,
                                  int& n_netcon,
 
@@ -63,17 +63,11 @@ int corenrn_embedded_nthread;
 
 void (*nrn2core_group_ids_)(int*);
 
-void (*nrn2core_get_partrans_setup_info_)(int tid,
-                                          int& ntar,
-                                          int& nsrc,
-                                          int& type,
-                                          int& ix_vpre,
-                                          int*& sid_target,
-                                          int*& sid_src,
-                                          int*& v_indices);
-
-
-
+extern "C" {
+coreneuron::nrn_partrans::SetupTransferInfo*
+    (*nrn2core_get_partrans_setup_info_)(int ngroup, int cn_nthread,
+                                         size_t cn_sidt_size);
+}
 
 void (*nrn2core_get_trajectory_requests_)(int tid,
                                           int& bsize,
@@ -489,22 +483,12 @@ void nrn_setup(const char* filesdat,
     for (int i = 0; i < nrn_nthread; ++i)
         nrnthreads_netcon_srcgid[i] = nullptr;
 
-    // gap junctions
-    if (nrn_have_gaps) {
-        nrn_partrans::transfer_thread_data_ = new nrn_partrans::TransferThreadData[nrn_nthread];
-        nrn_partrans::setup_info_ = new nrn_partrans::SetupInfo[userParams.ngroup];
-        if (!corenrn_embedded) {
-            coreneuron::phase_wrapper<coreneuron::gap>(userParams);
-        } else {
-            nrn_assert(sizeof(nrn_partrans::sgid_t) == sizeof(int));
-            for (int i = 0; i < userParams.ngroup; ++i) {
-                nrn_partrans::SetupInfo& si = nrn_partrans::setup_info_[i];
-                (*nrn2core_get_partrans_setup_info_)(i, si.ntar, si.nsrc, si.type, si.ix_vpre,
-                                                     si.sid_target, si.sid_src, si.v_indices);
-            }
-        }
-        nrn_partrans::gap_mpi_setup(userParams.ngroup);
-    }
+    // Gap junctions used to be done first in the sense of reading files
+    // and calling gap_mpi_setup. But during phase2, gap_thread_setup and
+    // gap_indices_permute were called after NrnThread.data was in its final
+    // layout and mechanism permutation was determined. This is no longer
+    // ideal as it necessitates keeping setup_info_ in existence to the end
+    // of phase2.  So gap junction setup is deferred to after phase2.
 
     nrnthreads_netcon_negsrcgid_tid.resize(nrn_nthread);
     if (!corenrn_embedded) {
@@ -528,6 +512,28 @@ void nrn_setup(const char* filesdat,
     /* nrn_multithread_job supports serial, pthread, and openmp. */
     coreneuron::phase_wrapper<coreneuron::phase::two>(userParams, corenrn_embedded);
 
+    // gap junctions
+    // Gaps are done after phase2, in order to use layout and permutation
+    // information via calls to stdindex2ptr.
+    if (nrn_have_gaps) {
+        nrn_partrans::transfer_thread_data_ = new nrn_partrans::TransferThreadData[nrn_nthread];
+        if (!corenrn_embedded) {
+            nrn_partrans::setup_info_ = new nrn_partrans::SetupTransferInfo[
+                nrn_nthread];
+            coreneuron::phase_wrapper<coreneuron::gap>(userParams);
+        } else {
+            nrn_partrans::setup_info_ = (*nrn2core_get_partrans_setup_info_)(
+                userParams.ngroup, nrn_nthread, sizeof(nrn_partrans::sgid_t));
+        }
+
+        nrn_multithread_job(nrn_partrans::gap_data_indices_setup);
+        nrn_partrans::gap_mpi_setup(userParams.ngroup);
+
+        // Whether allocated in NEURON or here, delete here.
+        delete [] nrn_partrans::setup_info_;
+        nrn_partrans::setup_info_ = nullptr;
+    }
+
     if (is_mapping_needed)
         coreneuron::phase_wrapper<coreneuron::phase::three>(userParams);
 
@@ -578,9 +584,9 @@ void setup_ThreadData(NrnThread& nt) {
 }
 
 void read_phasegap(NrnThread& nt, UserParams& userParams) {
-    nrn_partrans::SetupInfo& si = nrn_partrans::setup_info_[nt.id];
-    si.ntar = 0;
-    si.nsrc = 0;
+    auto& si = nrn_partrans::setup_info_[nt.id];
+    size_t ntar = 0;
+    size_t nsrc = 0;
 
     auto& F = userParams.file_reader[nt.id];
     if (F.fail()) {
@@ -590,25 +596,37 @@ void read_phasegap(NrnThread& nt, UserParams& userParams) {
     int chkpntsave = F.checkpoint();
     F.checkpoint(0);
 
-    si.ntar = F.read_int();
-    si.nsrc = F.read_int();
-    si.type = F.read_int();
-    si.ix_vpre = F.read_int();
-    si.sid_target = F.read_array<int>(si.ntar);
-    si.sid_src = F.read_array<int>(si.nsrc);
-    si.v_indices = F.read_array<int>(si.nsrc);
+    int sidt_size = F.read_int();
+    assert(sidt_size == int(sizeof(nrn_partrans::sgid_t)));
+    ntar = size_t(F.read_int());
+    nsrc = size_t(F.read_int());
+
+    si.src_sid.resize(nsrc);
+    si.src_type.resize(nsrc);
+    si.src_index.resize(nsrc);
+    if (nsrc) {
+        F.read_array<nrn_partrans::sgid_t>(si.src_sid.data(), nsrc);
+        F.read_array<int>(si.src_type.data(), nsrc);
+        F.read_array<int>(si.src_index.data(), nsrc);
+    }
 
-    F.checkpoint(chkpntsave);
+    si.tar_sid.resize(ntar);
+    si.tar_type.resize(ntar);
+    si.tar_index.resize(ntar);
+    if (ntar) {
+        F.read_array<nrn_partrans::sgid_t>(si.tar_sid.data(), ntar);
+        F.read_array<int>(si.tar_type.data(), ntar);
+        F.read_array<int>(si.tar_index.data(), ntar);
+    }
 
 #if DEBUG
-  printf("%d read_phasegap tid=%d type=%d %s ix_vpre=%d nsrc=%d ntar=%d\n",
-    nrnmpi_myid, nt.id, si.type, corenrn.get_memb_func(si.type).sym, si.ix_vpre,
-    si.nsrc, si.ntar);
+  printf("%d read_phasegap tid=%d nsrc=%d ntar=%d\n",
+    nrnmpi_myid, nt.id, nsrc, ntar);
   for (int i=0; i < si.nsrc; ++i) {
-    printf("sid_src %d %d\n", si.sid_src[i], si.v_indices[i]);
+    printf("src %z %d %d\n", size_t(si.src_sid[i]), si.src_type[i], si.src_index[i]);
   }
-  for (int i=0; i <si. ntar; ++i) {
-    printf("sid_tar %d %d\n", si.sid_target[i], i);
+  for (int i=0; i <si.ntar; ++i) {
+    printf("tar %z %d %d\n", size_t(si.src_sid[i]), si.src_type[i], si.src_index[i]);
   }
 #endif
 }
@@ -855,6 +873,8 @@ void nrn_cleanup() {
     }
 
     destroy_interleave_info();
+
+    nrn_partrans::gap_cleanup();
 }
 
 void delete_trajectory_requests(NrnThread& nt) {
 
@@ -3,7 +3,6 @@
 #include "coreneuron/sim/multicore.hpp"
 #include "coreneuron/io/nrn_checkpoint.hpp"
 #include "coreneuron/utils/nrnoc_aux.hpp"
-#include "coreneuron/network/partrans.hpp"
 #include "coreneuron/permute/cellorder.hpp"
 #include "coreneuron/permute/node_permute.h"
 #include "coreneuron/utils/vrecitem.h"
@@ -996,10 +995,6 @@ void Phase2::populate(NrnThread& nt, const UserParams& userParams) {
         }
     }
 
-    if (nrn_have_gaps) {
-        nrn_partrans::gap_thread_setup(nt);
-    }
-
     pdata_relocation(nt, memb_func);
 
     /* if desired, apply the node permutation. This involves permuting
@@ -1053,10 +1048,6 @@ void Phase2::populate(NrnThread& nt, const UserParams& userParams) {
         }
     }
 
-    if (nrn_have_gaps && interleave_permute_type) {
-        nrn_partrans::gap_indices_permute(nt);
-    }
-
     set_dependencies(nt, memb_func);
 
     fill_before_after_lists(nt, memb_func);
Original file line number	Diff line number	Diff line change
`@@ -3,7 +3,6 @@`
`3`	`3`	`#include "coreneuron/sim/multicore.hpp"`
`4`	`4`	`#include "coreneuron/io/nrn_checkpoint.hpp"`
`5`	`5`	`#include "coreneuron/utils/nrnoc_aux.hpp"`
`6`		`-#include "coreneuron/network/partrans.hpp"`
`7`	`6`	`#include "coreneuron/permute/cellorder.hpp"`
`8`	`7`	`#include "coreneuron/permute/node_permute.h"`
`9`	`8`	`#include "coreneuron/utils/vrecitem.h"`
`@@ -996,10 +995,6 @@ void Phase2::populate(NrnThread& nt, const UserParams& userParams) {`
`996`	`995`	`}`
`997`	`996`	`}`
`998`	`997`
`999`		`- if (nrn_have_gaps) {`
`1000`		`- nrn_partrans::gap_thread_setup(nt);`
`1001`		`- }`
`1002`		`-`
`1003`	`998`	`pdata_relocation(nt, memb_func);`
`1004`	`999`
`1005`	`1000`	`/* if desired, apply the node permutation. This involves permuting`
`@@ -1053,10 +1048,6 @@ void Phase2::populate(NrnThread& nt, const UserParams& userParams) {`
`1053`	`1048`	`}`
`1054`	`1049`	`}`
`1055`	`1050`
`1056`		`- if (nrn_have_gaps && interleave_permute_type) {`
`1057`		`- nrn_partrans::gap_indices_permute(nt);`
`1058`		`- }`
`1059`		`-`
`1060`	`1051`	`set_dependencies(nt, memb_func);`
`1061`	`1052`
`1062`	`1053`	`fill_before_after_lists(nt, memb_func);`