@@ -922,8 +922,8 @@ void Analyse::relativeIndClique() {
922922 }
923923}
924924
925- void Analyse::computeStorage (Int fr, Int sz, double & fr_entries,
926- double & cl_entries) const {
925+ void Analyse::computeStorage (Int fr, Int sz, Int64 & fr_entries,
926+ Int64 & cl_entries) const {
927927 // compute storage required by frontal and clique, based on the format used
928928
929929 const Int cl = fr - sz;
@@ -934,91 +934,14 @@ void Analyse::computeStorage(Int fr, Int sz, double& fr_entries,
934934
935935 // clique is stored as a collection of rectangles
936936 n_blocks = (cl - 1 ) / nb_ + 1 ;
937- double schur_size{};
937+ Int64 schur_size{};
938938 for (Int j = 0 ; j < n_blocks; ++j) {
939939 const Int jb = std::min (nb_, cl - j * nb_);
940- schur_size += (double )(cl - j * nb_) * jb;
940+ schur_size += (Int64 )(cl - j * nb_) * jb;
941941 }
942942 cl_entries = schur_size;
943943}
944944
945- void Analyse::computeStorage () {
946- std::vector<double > clique_entries (sn_count_);
947- std::vector<double > frontal_entries (sn_count_);
948- std::vector<double > storage (sn_count_);
949- std::vector<double > storage_factors (sn_count_);
950-
951- // initialise data of supernodes
952- for (Int sn = 0 ; sn < sn_count_; ++sn) {
953- // supernode size
954- const Int sz = sn_start_[sn + 1 ] - sn_start_[sn];
955-
956- // frontal size
957- const Int fr = ptr_sn_[sn + 1 ] - ptr_sn_[sn];
958-
959- // compute storage based on format used
960- computeStorage (fr, sz, frontal_entries[sn], clique_entries[sn]);
961-
962- // compute number of entries in factors within the subtree
963- storage_factors[sn] += frontal_entries[sn];
964- if (sn_parent_[sn] != -1 )
965- storage_factors[sn_parent_[sn]] += storage_factors[sn];
966- }
967-
968- // linked lists of children
969- std::vector<Int> head, next;
970- childrenLinkedList (sn_parent_, head, next);
971-
972- // go through the supernodes
973- for (Int sn = 0 ; sn < sn_count_; ++sn) {
974- // leaf node
975- if (head[sn] == -1 ) {
976- storage[sn] = frontal_entries[sn] + clique_entries[sn];
977- continue ;
978- }
979-
980- double clique_total_entries{};
981- double factors_total_entries{};
982- Int child = head[sn];
983- while (child != -1 ) {
984- clique_total_entries += clique_entries[child];
985- factors_total_entries += storage_factors[child];
986- child = next[child];
987- }
988-
989- // Compute storage
990- // storage is found as max(storage_1,storage_2), where
991- // storage_1 = max_j storage[j] + \sum_{k up to j-1} clique_entries[k] +
992- // storage_factors[k]
993- // storage_2 = frontal_entries + clique_entries + clique_total_entries +
994- // factors_total_entries
995- const double storage_2 = frontal_entries[sn] + clique_entries[sn] +
996- clique_total_entries + factors_total_entries;
997-
998- double clique_partial_entries{};
999- double factors_partial_entries{};
1000- double storage_1{};
1001-
1002- child = head[sn];
1003- while (child != -1 ) {
1004- double current =
1005- storage[child] + clique_partial_entries + factors_partial_entries;
1006-
1007- clique_partial_entries += clique_entries[child];
1008- factors_partial_entries += storage_factors[child];
1009- storage_1 = std::max (storage_1, current);
1010-
1011- child = next[child];
1012- }
1013- storage[sn] = std::max (storage_1, storage_2);
1014- }
1015-
1016- for (Int sn = 0 ; sn < sn_count_; ++sn) {
1017- // save max storage needed, multiply by 8 because double needs 8 bytes
1018- serial_storage_ = std::max (serial_storage_, 8 * storage[sn]);
1019- }
1020- }
1021-
1022945void Analyse::computeCriticalPath () {
1023946 // Compute the critical path within the supernodal elimination tree, and the
1024947 // number of operations along the path. This is the number of operations that
@@ -1064,8 +987,8 @@ void Analyse::computeCriticalPath() {
1064987}
1065988
1066989void Analyse::reorderChildren () {
1067- std::vector<double > clique_entries (sn_count_);
1068- std::vector<double > frontal_entries (sn_count_);
990+ std::vector<Int64 > clique_entries (sn_count_);
991+ std::vector<Int64 > frontal_entries (sn_count_);
1069992 std::vector<double > storage (sn_count_);
1070993 std::vector<double > storage_factors (sn_count_);
1071994
@@ -1266,6 +1189,72 @@ Int Analyse::checkOverflow() const {
12661189 return 0 ;
12671190}
12681191
1192+ void Analyse::computeStackSize () {
1193+ // Compute the minimum size of the stack to process the elimination tree
1194+ // serially.
1195+
1196+ std::vector<Int64> clique_entries (sn_count_);
1197+ std::vector<Int64> stack_subtrees (sn_count_);
1198+ Int64 total_frontal{};
1199+
1200+ // initialise data of supernodes
1201+ for (Int sn = 0 ; sn < sn_count_; ++sn) {
1202+ // supernode size
1203+ const Int sz = sn_start_[sn + 1 ] - sn_start_[sn];
1204+
1205+ // frontal size
1206+ const Int fr = ptr_sn_[sn + 1 ] - ptr_sn_[sn];
1207+
1208+ Int64 frontal_entries{};
1209+
1210+ // compute storage based on format used
1211+ computeStorage (fr, sz, frontal_entries, clique_entries[sn]);
1212+
1213+ total_frontal += frontal_entries;
1214+ }
1215+
1216+ // linked lists of children
1217+ std::vector<Int> head, next;
1218+ childrenLinkedList (sn_parent_, head, next);
1219+
1220+ // go through the supernodes
1221+ for (Int sn = 0 ; sn < sn_count_; ++sn) {
1222+ // leaf node
1223+ if (head[sn] == -1 ) {
1224+ stack_subtrees[sn] = clique_entries[sn];
1225+ continue ;
1226+ }
1227+
1228+ // Compute storage
1229+ // storage is found as max(storage_1,storage_2), where
1230+ // storage_1 = max_j stack_size[j] + \sum_{k up to j-1} clique_entries[k]
1231+ // storage_2 = clique_total_entries (including node itself)
1232+
1233+ Int64 clique_partial_entries{};
1234+ Int64 storage_1{};
1235+
1236+ Int child = head[sn];
1237+ while (child != -1 ) {
1238+ Int64 current = stack_subtrees[child] + clique_partial_entries;
1239+
1240+ clique_partial_entries += clique_entries[child];
1241+ storage_1 = std::max (storage_1, current);
1242+
1243+ child = next[child];
1244+ }
1245+
1246+ Int64 storage_2 = clique_partial_entries + clique_entries[sn];
1247+
1248+ stack_subtrees[sn] = std::max (storage_1, storage_2);
1249+ max_stack_size_ = std::max (max_stack_size_, stack_subtrees[sn]);
1250+ }
1251+
1252+ // minimum storage in serial is equal to the space needed to store the
1253+ // factorisation and the maximum size of the stack. Times 8 to obtain the
1254+ // number of bytes.
1255+ serial_storage_ = (total_frontal + max_stack_size_) * 8 ;
1256+ }
1257+
12691258Int Analyse::run (Symbolic& S) {
12701259 // Perform analyse phase and store the result into the symbolic object S.
12711260 // After Run returns, the Analyse object is not valid.
@@ -1337,9 +1326,9 @@ Int Analyse::run(Symbolic& S) {
13371326 data_.sumTime (kTimeAnalyseRelInd , clock_items.stop ());
13381327#endif
13391328
1340- computeStorage ();
13411329 computeBlockStart ();
13421330 computeCriticalPath ();
1331+ computeStackSize ();
13431332
13441333 // move relevant stuff into S
13451334 S.n_ = n_;
@@ -1354,6 +1343,7 @@ Int Analyse::run(Symbolic& S) {
13541343 S.serial_storage_ = serial_storage_;
13551344 S.flops_ = dense_ops_;
13561345 S.block_size_ = nb_;
1346+ S.max_stack_size_ = max_stack_size_;
13571347
13581348 // compute largest supernode
13591349 std::vector<Int> sn_size (sn_start_.begin () + 1 , sn_start_.end ());
0 commit comments