fornetcon: fix GPU execution. (#781)

olupton · web-flow · commit 55c5e15b234d · 2022-02-23T11:22:29.000+01:00
* Bump submodule past BlueBrain/mod2c#77.
diff --git a/coreneuron/gpu/nrn_acc_manager.cpp b/coreneuron/gpu/nrn_acc_manager.cpp
@@ -562,6 +562,17 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
                 // not kept up to date timestep-by-timestep on the device.
             }
         }
+        {
+            auto* d_fornetcon_perm_indices = cnrn_target_copyin(nt->_fornetcon_perm_indices,
+                                                                nt->_fornetcon_perm_indices_size);
+            cnrn_target_memcpy_to_device(&(d_nt->_fornetcon_perm_indices),
+                                         &d_fornetcon_perm_indices);
+        }
+        {
+            auto* d_fornetcon_weight_perm = cnrn_target_copyin(nt->_fornetcon_weight_perm,
+                                                               nt->_fornetcon_weight_perm_size);
+            cnrn_target_memcpy_to_device(&(d_nt->_fornetcon_weight_perm), &d_fornetcon_weight_perm);
+        }
     }
 
 #endif
@@ -937,6 +948,8 @@ void delete_nrnthreads_on_device(NrnThread* threads, int nthreads) {
 #ifdef CORENEURON_ENABLE_GPU
     for (int i = 0; i < nthreads; i++) {
         NrnThread* nt = threads + i;
+        cnrn_target_delete(nt->_fornetcon_weight_perm);
+        cnrn_target_delete(nt->_fornetcon_perm_indices);
         {
             TrajectoryRequests* tr = nt->trajec_requests;
             if (tr) {
diff --git a/coreneuron/io/nrn_setup.cpp b/coreneuron/io/nrn_setup.cpp
@@ -704,6 +704,11 @@ void nrn_cleanup_ion_map() {
     nrn_ion_global_map_size = 0;
 }
 
+void delete_fornetcon_info(NrnThread& nt) {
+    delete[] std::exchange(nt._fornetcon_perm_indices, nullptr);
+    delete[] std::exchange(nt._fornetcon_weight_perm, nullptr);
+}
+
 /* nrn_threads_free() presumes all NrnThread and NrnThreadMembList data is
  * allocated with malloc(). This is not the case here, so let's try and fix
  * things up first. */
@@ -726,6 +731,7 @@ void nrn_cleanup() {
     for (int it = 0; it < nrn_nthread; ++it) {
         NrnThread* nt = nrn_threads + it;
         NrnThreadMembList* next_tml = nullptr;
+        delete_fornetcon_info(*nt);
         delete_trajectory_requests(*nt);
         for (NrnThreadMembList* tml = nt->tml; tml; tml = next_tml) {
             Memb_list* ml = tml->ml;
diff --git a/coreneuron/io/setup_fornetcon.cpp b/coreneuron/io/setup_fornetcon.cpp
@@ -126,8 +126,15 @@ void setup_fornetcon_info(NrnThread& nt) {
 
     // Displacement vector has an extra element since the number for last item
     // at n-1 is x[n] - x[n-1] and number for first is x[0] = 0.
-    nt._fornetcon_perm_indices.resize(n_perm_indices + 1);
-    nt._fornetcon_weight_perm.resize(n_weight_perm);
+    delete[] std::exchange(nt._fornetcon_perm_indices, nullptr);
+    delete[] std::exchange(nt._fornetcon_weight_perm, nullptr);
+    // Manual memory management because of needing to copy NrnThread to the GPU
+    // and update device-side pointers there. Note the {} ensure the allocated
+    // arrays are zero-initalised.
+    nt._fornetcon_perm_indices_size = n_perm_indices + 1;
+    nt._fornetcon_perm_indices = new size_t[nt._fornetcon_perm_indices_size]{};
+    nt._fornetcon_weight_perm_size = n_weight_perm;
+    nt._fornetcon_weight_perm = new size_t[nt._fornetcon_weight_perm_size]{};
 
     // From dparam fornetcon slots, compute displacement vector, and
     // set the dparam fornetcon slot to the index of the displacement vector
diff --git a/coreneuron/sim/multicore.hpp b/coreneuron/sim/multicore.hpp
@@ -144,8 +144,10 @@ struct NrnThread: public MemoryManaged {
     TrajectoryRequests* trajec_requests = nullptr; /* per time step values returned to NEURON */
 
     /* Needed in case there are FOR_NETCON statements in use. */
-    std::vector<size_t> _fornetcon_perm_indices; /* displacement like list of indices */
-    std::vector<size_t> _fornetcon_weight_perm;  /* permutation indices into weight */
+    std::size_t _fornetcon_perm_indices_size{}; /* length of _fornetcon_perm_indices */
+    size_t* _fornetcon_perm_indices{};          /* displacement like list of indices */
+    std::size_t _fornetcon_weight_perm_size{};  /* length of _fornetcon_weight_perm */
+    size_t* _fornetcon_weight_perm{};           /* permutation indices into weight */
 
     std::vector<int> _pnt_offset; /* for SelfEvent queue transfer */
 };
diff --git a/external/mod2c b/external/mod2c
@@ -1 +1 @@
-Subproject commit 4898ef45064804f9c7815765721d2b23b67e40b3
+Subproject commit 683ba3a9c0bf0a1126fbee93daad57d9c4c29da0