2222#include " coreneuron/utils/nrn_assert.h"
2323#include " coreneuron/utils/nrnmutdec.h"
2424#include " coreneuron/utils/memory.h"
25+ #include " coreneuron/mpi/nrnmpi.h"
26+ #include " coreneuron/mpi/nrnmpi_impl.h"
2527#include " coreneuron/io/nrn_setup.hpp"
2628#include " coreneuron/network/partrans.hpp"
2729#include " coreneuron/io/nrn_checkpoint.hpp"
3335#include " coreneuron/io/phase2.hpp"
3436#include " coreneuron/io/mech_report.h"
3537#include " coreneuron/apps/corenrn_parameters.hpp"
38+ #include " coreneuron/io/nrn_setup.hpp"
3639
3740// callbacks into nrn/src/nrniv/nrnbbcore_write.cpp
3841#include " coreneuron/sim/fast_imem.hpp"
@@ -149,8 +152,6 @@ extern corenrn_parameters corenrn_param;
149152
150153static OMP_Mutex mut;
151154
152- static size_t model_size (void );
153-
154155// / Vector of maps for negative presyns
155156std::vector<std::map<int , PreSyn*>> neg_gid2out;
156157// / Maps for ouput and input presyns
@@ -537,15 +538,30 @@ void nrn_setup(const char* filesdat,
537538 // / which is only executed by StochKV.c.
538539 nrn_mk_table_check (); // was done in nrn_thread_memblist_setup in multicore.c
539540
540- model_size ();
541- delete[] userParams.gidgroups ;
541+ size_t model_size_bytes;
542+
543+ if (corenrn_param.model_stats ) {
544+ write_mech_report ();
545+ model_size_bytes = model_size (true );
546+ } else {
547+ model_size_bytes = model_size (false );
548+ }
542549
543550 if (nrnmpi_myid == 0 && !corenrn_param.is_quiet ()) {
544551 printf (" Setup Done : %.2lf seconds \n " , nrn_wtime () - time);
552+
553+ if (model_size_bytes < 1024 ) {
554+ printf (" Model size : %ld bytes\n " , model_size_bytes);
555+ } else if (model_size_bytes < 1024 * 1024 ) {
556+ printf (" Model size : %.2lf kB\n " , model_size_bytes / 1024 .);
557+ } else if (model_size_bytes < 1024 * 1024 * 1024 ) {
558+ printf (" Model size : %.2lf MB\n " , model_size_bytes / (1024 . * 1024 .));
559+ } else {
560+ printf (" Model size : %.2lf GB\n " , model_size_bytes / (1024 . * 1024 . * 1024 .));
561+ }
545562 }
546- if (corenrn_param.count_mechs ) {
547- write_mech_report ();
548- }
563+
564+ delete[] userParams.gidgroups ;
549565}
550566
551567void setup_ThreadData (NrnThread& nt) {
@@ -938,11 +954,8 @@ void read_phase3(NrnThread& nt, UserParams& userParams) {
938954}
939955
940956static size_t memb_list_size (NrnThreadMembList* tml) {
941- size_t sz_ntml = sizeof (NrnThreadMembList);
942- size_t sz_ml = sizeof (Memb_list);
943- size_t szi = sizeof (int );
944- size_t nbyte = sz_ntml + sz_ml;
945- nbyte += tml->ml ->nodecount * szi;
957+ size_t nbyte = sizeof (NrnThreadMembList) + sizeof (Memb_list);
958+ nbyte += tml->ml ->nodecount * sizeof (int );
946959 nbyte += corenrn.get_prop_dparam_size ()[tml->index ] * tml->ml ->nodecount * sizeof (Datum);
947960#ifdef DEBUG
948961 int i = tml->index ;
@@ -982,18 +995,21 @@ size_t input_presyn_size(void) {
982995 return nbyte;
983996}
984997
985- size_t model_size (void ) {
998+ size_t model_size (bool detailed_report ) {
986999 size_t nbyte = 0 ;
987- size_t szd = sizeof (double );
988- size_t szi = sizeof (int );
989- size_t szv = sizeof (void *);
990- size_t sz_th = sizeof (NrnThread);
991- size_t sz_ps = sizeof (PreSyn);
992- size_t sz_psi = sizeof (InputPreSyn);
993- size_t sz_nc = sizeof (NetCon);
994- size_t sz_pp = sizeof (Point_process);
1000+ size_t sz_nrnThread = sizeof (NrnThread);
1001+ size_t sz_presyn = sizeof (PreSyn);
1002+ size_t sz_input_presyn = sizeof (InputPreSyn);
1003+ size_t sz_netcon = sizeof (NetCon);
1004+ size_t sz_pntproc = sizeof (Point_process);
9951005 size_t nccnt = 0 ;
9961006
1007+ std::vector<size_t > size_data (13 , 0 );
1008+ std::vector<size_t > global_size_data_min (13 , 0 );
1009+ std::vector<size_t > global_size_data_max (13 , 0 );
1010+ std::vector<size_t > global_size_data_sum (13 , 0 );
1011+ std::vector<float > global_size_data_avg (13 , 0.0 );
1012+
9971013 for (int i = 0 ; i < nrn_nthread; ++i) {
9981014 NrnThread& nt = nrn_threads[i];
9991015 size_t nb_nt = 0 ; // per thread
@@ -1007,9 +1023,14 @@ size_t model_size(void) {
10071023 }
10081024
10091025 // basic thread size includes mechanism data and G*V=I matrix
1010- nb_nt += sz_th;
1011- nb_nt += nt._ndata * szd + nt._nidata * szi + nt._nvdata * szv;
1012- nb_nt += nt.end * szi; // _v_parent_index
1026+ nb_nt += sz_nrnThread;
1027+ nb_nt += nt._ndata * sizeof (double ) + nt._nidata * sizeof (int ) + nt._nvdata * sizeof (void *);
1028+ nb_nt += nt.end * sizeof (int ); // _v_parent_index
1029+
1030+ // network connectivity
1031+ nb_nt += nt.n_pntproc * sz_pntproc + nt.n_netcon * sz_netcon + nt.n_presyn * sz_presyn +
1032+ nt.n_input_presyn * sz_input_presyn + nt.n_weight * sizeof (double );
1033+ nbyte += nb_nt;
10131034
10141035#ifdef DEBUG
10151036 printf (" ncell=%d end=%d nmech=%d\n " , nt.ncell , nt.end , nmech);
@@ -1023,35 +1044,167 @@ size_t model_size(void) {
10231044 printf (" n_pntproc=%d sz=%ld nbyte=%ld\n " , nt.n_pntproc , sz_pp, nt.n_pntproc * sz_pp);
10241045 printf (" n_netcon=%d sz=%ld nbyte=%ld\n " , nt.n_netcon , sz_nc, nt.n_netcon * sz_nc);
10251046 printf (" n_weight = %d\n " , nt.n_weight );
1026- #endif
10271047
1028- // spike handling
1029- nb_nt += nt.n_pntproc * sz_pp + nt.n_netcon * sz_nc + nt.n_presyn * sz_ps +
1030- nt.n_input_presyn * sz_psi + nt.n_weight * szd;
1031- nbyte += nb_nt;
1032- #ifdef DEBUG
10331048 printf (" %d thread %d total bytes %ld\n " , nrnmpi_myid, i, nb_nt);
10341049#endif
1050+
1051+ if (detailed_report) {
1052+ size_data[0 ] += nt.ncell ;
1053+ size_data[1 ] += nt.end ;
1054+ size_data[2 ] += nmech;
1055+ size_data[3 ] += nt._ndata ;
1056+ size_data[4 ] += nt._nidata ;
1057+ size_data[5 ] += nt._nvdata ;
1058+ size_data[6 ] += nt.n_presyn ;
1059+ size_data[7 ] += nt.n_input_presyn ;
1060+ size_data[8 ] += nt.n_pntproc ;
1061+ size_data[9 ] += nt.n_netcon ;
1062+ size_data[10 ] += nt.n_weight ;
1063+ size_data[11 ] += nb_nt;
1064+ }
10351065 }
10361066
1037- #ifdef DEBUG
1038- printf (" %d netcon pointers %ld nbyte=%ld\n " , nrnmpi_myid, nccnt, nccnt * sizeof (NetCon*));
1039- #endif
10401067 nbyte += nccnt * sizeof (NetCon*);
10411068 nbyte += output_presyn_size ();
10421069 nbyte += input_presyn_size ();
10431070
1071+ nbyte += nrnran123_instance_count () * nrnran123_state_size ();
1072+
10441073#ifdef DEBUG
1074+ printf (" %d netcon pointers %ld nbyte=%ld\n " , nrnmpi_myid, nccnt, nccnt * sizeof (NetCon*));
10451075 printf (" nrnran123 size=%ld cnt=%ld nbyte=%ld\n " ,
10461076 nrnran123_state_size (),
10471077 nrnran123_instance_count (),
10481078 nrnran123_instance_count () * nrnran123_state_size ());
1079+ printf (" %d total bytes %ld\n " , nrnmpi_myid, nbyte);
1080+ #endif
1081+ if (detailed_report) {
1082+ size_data[12 ] = nbyte;
1083+ #if NRNMPI
1084+ MPI_Allreduce (&size_data[0 ],
1085+ &global_size_data_min[0 ],
1086+ 13 ,
1087+ MPI_UNSIGNED_LONG_LONG,
1088+ MPI_MIN,
1089+ nrnmpi_comm);
1090+ MPI_Allreduce (&size_data[0 ],
1091+ &global_size_data_max[0 ],
1092+ 13 ,
1093+ MPI_UNSIGNED_LONG_LONG,
1094+ MPI_MAX,
1095+ nrnmpi_comm);
1096+ MPI_Allreduce (&size_data[0 ],
1097+ &global_size_data_sum[0 ],
1098+ 13 ,
1099+ MPI_UNSIGNED_LONG_LONG,
1100+ MPI_SUM,
1101+ nrnmpi_comm);
1102+ for (int i = 0 ; i < 13 ; i++) {
1103+ global_size_data_avg[i] = global_size_data_sum[i] / float (nrnmpi_numprocs);
1104+ }
1105+ #else
1106+ global_size_data_max = size_data;
1107+ global_size_data_min = size_data;
1108+ global_size_data_avg.assign (size_data.cbegin (), size_data.cend ());
10491109#endif
1110+ // now print the collected data:
1111+ if (nrnmpi_myid == 0 ) {
1112+ printf (" Memory size information for all NrnThreads per rank\n " );
1113+ printf (" ------------------------------------------------------------------\n " );
1114+ printf (" %22s %12s %12s %12s\n " , " field" , " min" , " max" , " avg" );
1115+ printf (" %22s %12ld %12ld %15.2f\n " ,
1116+ " n_cell" ,
1117+ global_size_data_min[0 ],
1118+ global_size_data_max[0 ],
1119+ global_size_data_avg[0 ]);
1120+ printf (" %22s %12ld %12ld %15.2f\n " ,
1121+ " n_compartment" ,
1122+ global_size_data_min[1 ],
1123+ global_size_data_max[1 ],
1124+ global_size_data_avg[1 ]);
1125+ printf (" %22s %12ld %12ld %15.2f\n " ,
1126+ " n_mechanism" ,
1127+ global_size_data_min[2 ],
1128+ global_size_data_max[2 ],
1129+ global_size_data_avg[2 ]);
1130+ printf (" %22s %12ld %12ld %15.2f\n " ,
1131+ " _ndata" ,
1132+ global_size_data_min[3 ],
1133+ global_size_data_max[3 ],
1134+ global_size_data_avg[3 ]);
1135+ printf (" %22s %12ld %12ld %15.2f\n " ,
1136+ " _nidata" ,
1137+ global_size_data_min[4 ],
1138+ global_size_data_max[4 ],
1139+ global_size_data_avg[4 ]);
1140+ printf (" %22s %12ld %12ld %15.2f\n " ,
1141+ " _nvdata" ,
1142+ global_size_data_min[5 ],
1143+ global_size_data_max[5 ],
1144+ global_size_data_avg[5 ]);
1145+ printf (" %22s %12ld %12ld %15.2f\n " ,
1146+ " n_presyn" ,
1147+ global_size_data_min[6 ],
1148+ global_size_data_max[6 ],
1149+ global_size_data_avg[6 ]);
1150+ printf (" %22s %12ld %12ld %15.2f\n " ,
1151+ " n_presyn (bytes)" ,
1152+ global_size_data_min[6 ] * sz_presyn,
1153+ global_size_data_max[6 ] * sz_presyn,
1154+ global_size_data_avg[6 ] * sz_presyn);
1155+ printf (" %22s %12ld %12ld %15.2f\n " ,
1156+ " n_input_presyn" ,
1157+ global_size_data_min[7 ],
1158+ global_size_data_max[7 ],
1159+ global_size_data_avg[7 ]);
1160+ printf (" %22s %12ld %12ld %15.2f\n " ,
1161+ " n_input_presyn (bytes)" ,
1162+ global_size_data_min[7 ] * sz_input_presyn,
1163+ global_size_data_max[7 ] * sz_input_presyn,
1164+ global_size_data_avg[7 ] * sz_input_presyn);
1165+ printf (" %22s %12ld %12ld %15.2f\n " ,
1166+ " n_pntproc" ,
1167+ global_size_data_min[8 ],
1168+ global_size_data_max[8 ],
1169+ global_size_data_avg[8 ]);
1170+ printf (" %22s %12ld %12ld %15.2f\n " ,
1171+ " n_pntproc (bytes)" ,
1172+ global_size_data_min[8 ] * sz_pntproc,
1173+ global_size_data_max[8 ] * sz_pntproc,
1174+ global_size_data_avg[8 ] * sz_pntproc);
1175+ printf (" %22s %12ld %12ld %15.2f\n " ,
1176+ " n_netcon" ,
1177+ global_size_data_min[9 ],
1178+ global_size_data_max[9 ],
1179+ global_size_data_avg[9 ]);
1180+ printf (" %22s %12ld %12ld %15.2f\n " ,
1181+ " n_netcon (bytes)" ,
1182+ global_size_data_min[9 ] * sz_netcon,
1183+ global_size_data_max[9 ] * sz_netcon,
1184+ global_size_data_avg[9 ] * sz_netcon);
1185+ printf (" %22s %12ld %12ld %15.2f\n " ,
1186+ " n_weight" ,
1187+ global_size_data_min[10 ],
1188+ global_size_data_max[10 ],
1189+ global_size_data_avg[10 ]);
1190+ printf (" %22s %12ld %12ld %15.2f\n " ,
1191+ " NrnThread (bytes)" ,
1192+ global_size_data_min[11 ],
1193+ global_size_data_max[11 ],
1194+ global_size_data_avg[11 ]);
1195+ printf (" %22s %12ld %12ld %15.2f\n " ,
1196+ " model size (bytes)" ,
1197+ global_size_data_min[12 ],
1198+ global_size_data_max[12 ],
1199+ global_size_data_avg[12 ]);
1200+ }
1201+ }
10501202
1051- nbyte += nrnran123_instance_count () * nrnran123_state_size ();
1203+ #if NRNMPI
1204+ size_t global_nbyte = 0 ;
1205+ MPI_Allreduce (&nbyte, &global_nbyte, 1 , MPI_UNSIGNED_LONG_LONG, MPI_SUM, nrnmpi_comm);
1206+ nbyte = global_nbyte;
10521207
1053- #ifdef DEBUG
1054- printf (" %d total bytes %ld\n " , nrnmpi_myid, nbyte);
10551208#endif
10561209
10571210 return nbyte;
0 commit comments