@@ -34,10 +34,7 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
3434 nrn_ion_global_map_copyto_device ();
3535
3636#ifdef UNIFIED_MEMORY
37-
38- int i;
39-
40- for (i = 0 ; i < nthreads; i++) {
37+ for (int i = 0 ; i < nthreads; i++) {
4138 NrnThread* nt = threads + i; // NrnThread on host
4239
4340 if (nt->n_presyn ) {
@@ -64,8 +61,6 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
6461 }
6562
6663#else
67- int i;
68-
6964 /* -- copy NrnThread to device. this needs to be contigious vector because offset is used to
7065 * find
7166 * corresponding NrnThread using Point_process in NET_RECEIVE block
@@ -78,7 +73,7 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
7873
7974 /* pointers for data struct on device, starting with d_ */
8075
81- for (i = 0 ; i < nthreads; i++) {
76+ for (int i = 0 ; i < nthreads; i++) {
8277 NrnThread* nt = threads + i; // NrnThread on host
8378 NrnThread* d_nt = d_threads + i; // NrnThread on device
8479 if (!nt->compute_gpu ) {
@@ -140,23 +135,20 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
140135
141136 /* -- copy NrnThreadMembList list ml to device -- */
142137
143- NrnThreadMembList* tml;
144- NrnThreadMembList* d_tml;
145138 NrnThreadMembList* d_last_tml;
146139
147- Memb_list* d_ml;
148- int first_tml = 1 ;
140+ bool first_tml = true ;
149141 size_t offset = 6 * ne;
150142
151- for (tml = nt->tml ; tml; tml = tml->next ) {
143+ for (auto tml = nt->tml ; tml; tml = tml->next ) {
152144 /* copy tml to device*/
153145 /* QUESTIONS: does tml will point to nullptr as in host ? : I assume so!*/
154- d_tml = (NrnThreadMembList*)acc_copyin (tml, sizeof (NrnThreadMembList));
146+ auto d_tml = (NrnThreadMembList*)acc_copyin (tml, sizeof (NrnThreadMembList));
155147
156148 /* first tml is pointed by nt */
157149 if (first_tml) {
158150 acc_memcpy_to_device (&(d_nt->tml ), &d_tml, sizeof (NrnThreadMembList*));
159- first_tml = 0 ;
151+ first_tml = false ;
160152 } else {
161153 /* rest of tml forms linked list */
162154 acc_memcpy_to_device (&(d_last_tml->next ), &d_tml, sizeof (NrnThreadMembList*));
@@ -166,7 +158,7 @@ void setup_nrnthreads_on_device(NrnThread* threads, int nthreads) {
166158 d_last_tml = d_tml;
167159
168160 /* now for every tml, there is a ml. copy that and setup pointer */
169- d_ml = (Memb_list*)acc_copyin (tml->ml , sizeof (Memb_list));
161+ auto d_ml = (Memb_list*)acc_copyin (tml->ml , sizeof (Memb_list));
170162 acc_memcpy_to_device (&(d_tml->ml ), &d_ml, sizeof (Memb_list*));
171163
172164 /* setup nt._ml_list */
@@ -519,9 +511,7 @@ static void net_receive_buffer_order(NetReceiveBuffer_t* nrb) {
519511 * functional version.
520512 */
521513void update_net_receive_buffer (NrnThread* nt) {
522- NrnThreadMembList* tml;
523-
524- for (tml = nt->tml ; tml; tml = tml->next ) {
514+ for (auto tml = nt->tml ; tml; tml = tml->next ) {
525515 // net_receive buffer to copy
526516 NetReceiveBuffer_t* nrb = tml->ml ->_net_receive_buffer ;
527517
@@ -574,10 +564,7 @@ void update_nrnthreads_on_host(NrnThread* threads, int nthreads) {
574564
575565 printf (" \n --- Copying to Host! --- \n " );
576566
577- int i;
578- NetReceiveBuffer_t* nrb;
579-
580- for (i = 0 ; i < nthreads; i++) {
567+ for (int i = 0 ; i < nthreads; i++) {
581568 NrnThread* nt = threads + i;
582569
583570 if (nt->compute_gpu && (nt->end > 0 )) {
@@ -598,8 +585,7 @@ void update_nrnthreads_on_host(NrnThread* threads, int nthreads) {
598585 /* @todo: nt._ml_list[tml->index] = tml->ml; */
599586
600587 /* -- copy NrnThreadMembList list ml to host -- */
601- NrnThreadMembList* tml;
602- for (tml = nt->tml ; tml; tml = tml->next ) {
588+ for (auto tml = nt->tml ; tml; tml = tml->next ) {
603589 Memb_list* ml = tml->ml ;
604590
605591 acc_update_self (&tml->index , sizeof (int ));
@@ -625,7 +611,7 @@ void update_nrnthreads_on_host(NrnThread* threads, int nthreads) {
625611 acc_update_self (ml->pdata , pcnt * sizeof (int ));
626612 }
627613
628- nrb = tml->ml ->_net_receive_buffer ;
614+ auto nrb = tml->ml ->_net_receive_buffer ;
629615
630616 if (nrb) {
631617 acc_update_self (&nrb->_cnt , sizeof (int ));
@@ -674,10 +660,7 @@ void update_nrnthreads_on_device(NrnThread* threads, int nthreads) {
674660
675661 printf (" \n --- Copying to Device! --- \n " );
676662
677- int i;
678- NetReceiveBuffer_t* nrb;
679-
680- for (i = 0 ; i < nthreads; i++) {
663+ for (int i = 0 ; i < nthreads; i++) {
681664 NrnThread* nt = threads + i;
682665
683666 if (nt->compute_gpu && (nt->end > 0 )) {
@@ -698,21 +681,19 @@ void update_nrnthreads_on_device(NrnThread* threads, int nthreads) {
698681 /* @todo: nt._ml_list[tml->index] = tml->ml; */
699682
700683 /* -- copy NrnThreadMembList list ml to host -- */
701- NrnThreadMembList* tml;
702- for (tml = nt->tml ; tml; tml = tml->next ) {
684+ for (auto tml = nt->tml ; tml; tml = tml->next ) {
703685 Memb_list* ml = tml->ml ;
704686 int type = tml->index ;
705687 int n = ml->nodecount ;
706688 int szp = corenrn.get_prop_param_size ()[type];
707689 int szdp = corenrn.get_prop_dparam_size ()[type];
708- int is_art = corenrn.get_is_artificial ()[type];
709690 int layout = corenrn.get_mech_data_layout ()[type];
710691
711692 int pcnt = nrn_soa_padded_size (n, layout) * szp;
712693
713694 acc_update_device (ml->data , pcnt * sizeof (double ));
714695
715- if (!is_art ) {
696+ if (!corenrn. get_is_artificial ()[type] ) {
716697 acc_update_device (ml->nodeindices , n * sizeof (int ));
717698 }
718699
@@ -721,7 +702,7 @@ void update_nrnthreads_on_device(NrnThread* threads, int nthreads) {
721702 acc_update_device (ml->pdata , pcnt * sizeof (int ));
722703 }
723704
724- nrb = tml->ml ->_net_receive_buffer ;
705+ auto nrb = tml->ml ->_net_receive_buffer ;
725706
726707 if (nrb) {
727708 acc_update_device (&nrb->_cnt , sizeof (int ));
@@ -824,10 +805,6 @@ void finalize_data_on_device() {
824805 called yet for Ramdom123 data / streams etc. So handle this better!
825806 */
826807 return ;
827-
828- #ifdef _OPENACC
829- acc_shutdown (acc_device_default);
830- #endif
831808}
832809
833810void nrn_newtonspace_copyto_device (NewtonSpace* ns) {
@@ -841,8 +818,6 @@ void nrn_newtonspace_copyto_device(NewtonSpace* ns) {
841818 NewtonSpace* d_ns = (NewtonSpace*)acc_copyin (ns, sizeof (NewtonSpace));
842819
843820 double * pd;
844- int * pint;
845- double ** ppd;
846821
847822 pd = (double *)acc_copyin (ns->delta_x , n * sizeof (double ));
848823 acc_memcpy_to_device (&(d_ns->delta_x ), &pd, sizeof (double *));
@@ -856,10 +831,10 @@ void nrn_newtonspace_copyto_device(NewtonSpace* ns) {
856831 pd = (double *)acc_copyin (ns->rowmax , n * sizeof (double ));
857832 acc_memcpy_to_device (&(d_ns->rowmax ), &pd, sizeof (double *));
858833
859- pint = (int *)acc_copyin (ns->perm , n * sizeof (int ));
834+ auto pint = (int *)acc_copyin (ns->perm , n * sizeof (int ));
860835 acc_memcpy_to_device (&(d_ns->perm ), &pint, sizeof (int *));
861836
862- ppd = (double **)acc_copyin (ns->jacobian , ns->n * sizeof (double *));
837+ auto ppd = (double **)acc_copyin (ns->jacobian , ns->n * sizeof (double *));
863838 acc_memcpy_to_device (&(d_ns->jacobian ), &ppd, sizeof (double **));
864839
865840 // the actual jacobian doubles were allocated as a single array
@@ -886,25 +861,19 @@ void nrn_sparseobj_copyto_device(SparseObj* so) {
886861 // r_down, c_right, value
887862 // do not care about the Elm* ptr value, just the space.
888863
889- Elm** ppelm;
890- Elm* pelm;
891- unsigned * pu;
892- double * pd;
893- double ** ppd;
894-
895864 Elm** d_rowst = (Elm**)acc_copyin (so->rowst , n1 * sizeof (Elm*));
896865 acc_memcpy_to_device (&(d_so->rowst ), &d_rowst, sizeof (Elm**));
897866
898867 Elm** d_diag = (Elm**)acc_copyin (so->diag , n1 * sizeof (Elm*));
899868 acc_memcpy_to_device (&(d_so->diag ), &d_diag, sizeof (Elm**));
900869
901- pu = (unsigned *)acc_copyin (so->ngetcall , so->_cntml_padded * sizeof (unsigned ));
870+ auto pu = (unsigned *)acc_copyin (so->ngetcall , so->_cntml_padded * sizeof (unsigned ));
902871 acc_memcpy_to_device (&(d_so->ngetcall ), &pu, sizeof (Elm**));
903872
904- pd = (double *)acc_copyin (so->rhs , n1 * so->_cntml_padded * sizeof (double ));
873+ auto pd = (double *)acc_copyin (so->rhs , n1 * so->_cntml_padded * sizeof (double ));
905874 acc_memcpy_to_device (&(d_so->rhs ), &pd, sizeof (double *));
906875
907- double ** d_coef_list =
876+ auto d_coef_list =
908877 (double **)acc_copyin (so->coef_list , so->coef_list_size * sizeof (double *));
909878 acc_memcpy_to_device (&(d_so->coef_list ), &d_coef_list, sizeof (double **));
910879
@@ -940,13 +909,13 @@ void nrn_sparseobj_copyto_device(SparseObj* so) {
940909 // visit all the Elm again and fill in pelm->r_down and pelm->c_left
941910 for (unsigned irow = 1 ; irow < n1; ++irow) {
942911 for (Elm* elm = so->rowst [irow]; elm; elm = elm->c_right ) {
943- pelm = (Elm*)acc_deviceptr (elm);
912+ auto pelm = (Elm*)acc_deviceptr (elm);
944913 if (elm->r_down ) {
945- Elm* d_e = (Elm*)acc_deviceptr (elm->r_down );
914+ auto d_e = (Elm*)acc_deviceptr (elm->r_down );
946915 acc_memcpy_to_device (&(pelm->r_down ), &d_e, sizeof (Elm*));
947916 }
948917 if (elm->c_right ) {
949- Elm* d_e = (Elm*)acc_deviceptr (elm->c_right );
918+ auto d_e = (Elm*)acc_deviceptr (elm->c_right );
950919 acc_memcpy_to_device (&(pelm->c_right ), &d_e, sizeof (Elm*));
951920 }
952921 }
@@ -983,8 +952,7 @@ void init_gpu(int nthreads, NrnThread* threads) {
983952 }
984953
985954 /* * @todo: currently only checking nvidia gpu */
986- int num_gpus = acc_get_num_devices (acc_device_nvidia);
987- if (num_gpus == 0 ) {
955+ if (acc_get_num_devices (acc_device_nvidia) == 0 ) {
988956 printf (" \n WARNING: Enabled GPU execution but couldn't find NVIDIA GPU! \n " );
989957 }
990958
0 commit comments