@@ -908,8 +908,8 @@ void Analyse::relativeIndClique() {
908908 }
909909}
910910
911- void Analyse::computeStorage (Int fr, Int sz, double & fr_entries,
912- double & cl_entries) const {
911+ void Analyse::computeStorage (Int fr, Int sz, int64_t & fr_entries,
912+ int64_t & cl_entries) const {
913913 // compute storage required by frontal and clique, based on the format used
914914
915915 const Int cl = fr - sz;
@@ -920,17 +920,17 @@ void Analyse::computeStorage(Int fr, Int sz, double& fr_entries,
920920
921921 // clique is stored as a collection of rectangles
922922 n_blocks = (cl - 1 ) / nb_ + 1 ;
923- double schur_size{};
923+ int64_t schur_size{};
924924 for (Int j = 0 ; j < n_blocks; ++j) {
925925 const Int jb = std::min (nb_, cl - j * nb_);
926- schur_size += (double )(cl - j * nb_) * jb;
926+ schur_size += (ino64_t )(cl - j * nb_) * jb;
927927 }
928928 cl_entries = schur_size;
929929}
930930
931931void Analyse::computeStorage () {
932- std::vector<double > clique_entries (sn_count_);
933- std::vector<double > frontal_entries (sn_count_);
932+ std::vector<int64_t > clique_entries (sn_count_);
933+ std::vector<int64_t > frontal_entries (sn_count_);
934934 std::vector<double > storage (sn_count_);
935935 std::vector<double > storage_factors (sn_count_);
936936
@@ -1050,8 +1050,8 @@ void Analyse::computeCriticalPath() {
10501050}
10511051
10521052void Analyse::reorderChildren () {
1053- std::vector<double > clique_entries (sn_count_);
1054- std::vector<double > frontal_entries (sn_count_);
1053+ std::vector<int64_t > clique_entries (sn_count_);
1054+ std::vector<int64_t > frontal_entries (sn_count_);
10551055 std::vector<double > storage (sn_count_);
10561056 std::vector<double > storage_factors (sn_count_);
10571057
@@ -1304,12 +1304,15 @@ void Analyse::findTreeSplitting() {
13041304 is_in_tree_splitting_[child] = true ;
13051305 current_nodedata = &res_insert.first ->second ;
13061306 current_nodedata->type = NodeType::subtree;
1307+ current_nodedata->stack_size = 0 ;
13071308 current_ops = 0.0 ;
13081309 }
13091310
13101311 current_ops += subtree_ops[child];
13111312 current_nodedata->group .push_back (child);
13121313 current_nodedata->firstdesc .push_back (first_desc[child]);
1314+ current_nodedata->stack_size =
1315+ std::max (current_nodedata->stack_size , stack_subtrees_[child]);
13131316
13141317 if (current_ops > small_thresh) current_nodedata = nullptr ;
13151318 }
@@ -1324,6 +1327,7 @@ void Analyse::findTreeSplitting() {
13241327 res_insert.first ->second .type = NodeType::subtree;
13251328 res_insert.first ->second .group .push_back (sn);
13261329 res_insert.first ->second .firstdesc .push_back (first_desc[sn]);
1330+ res_insert.first ->second .stack_size = stack_subtrees_[sn];
13271331 }
13281332 /*
13291333 else if (subtree_ops[sn_parent_[sn]] > small_thresh) {
@@ -1337,6 +1341,62 @@ void Analyse::findTreeSplitting() {
13371341 }
13381342}
13391343
1344+ void Analyse::computeStackSize () {
1345+ // Compute the minimum size of the stack to process each subtree.
1346+
1347+ std::vector<int64_t > clique_entries (sn_count_);
1348+ std::vector<int64_t > frontal_entries (sn_count_);
1349+ stack_subtrees_.assign (sn_count_, 0 );
1350+
1351+ // initialise data of supernodes
1352+ for (Int sn = 0 ; sn < sn_count_; ++sn) {
1353+ // supernode size
1354+ const Int sz = sn_start_[sn + 1 ] - sn_start_[sn];
1355+
1356+ // frontal size
1357+ const Int fr = ptr_sn_[sn + 1 ] - ptr_sn_[sn];
1358+
1359+ // compute storage based on format used
1360+ computeStorage (fr, sz, frontal_entries[sn], clique_entries[sn]);
1361+ }
1362+
1363+ // linked lists of children
1364+ std::vector<Int> head, next;
1365+ childrenLinkedList (sn_parent_, head, next);
1366+
1367+ // go through the supernodes
1368+ for (Int sn = 0 ; sn < sn_count_; ++sn) {
1369+ // leaf node
1370+ if (head[sn] == -1 ) {
1371+ stack_subtrees_[sn] = clique_entries[sn];
1372+ continue ;
1373+ }
1374+
1375+ // Compute storage
1376+ // storage is found as max(storage_1,storage_2), where
1377+ // storage_1 = max_j stack_size[j] + \sum_{k up to j-1} clique_entries[k]
1378+ // storage_2 = clique_total_entries (including node itself)
1379+
1380+ int64_t clique_partial_entries{};
1381+ int64_t storage_1{};
1382+
1383+ Int child = head[sn];
1384+ while (child != -1 ) {
1385+ int64_t current = stack_subtrees_[child] + clique_partial_entries;
1386+
1387+ clique_partial_entries += clique_entries[child];
1388+ storage_1 = std::max (storage_1, current);
1389+
1390+ child = next[child];
1391+ }
1392+
1393+ int64_t storage_2 = clique_partial_entries + clique_entries[sn];
1394+
1395+ stack_subtrees_[sn] = std::max (storage_1, storage_2);
1396+ max_stack_size_ = std::max (max_stack_size_, stack_subtrees_[sn]);
1397+ }
1398+ }
1399+
13401400Int Analyse::run (Symbolic& S) {
13411401 // Perform analyse phase and store the result into the symbolic object S.
13421402 // After Run returns, the Analyse object is not valid.
@@ -1411,6 +1471,7 @@ Int Analyse::run(Symbolic& S) {
14111471 computeStorage ();
14121472 computeBlockStart ();
14131473 computeCriticalPath ();
1474+ computeStackSize ();
14141475
14151476 findTreeSplitting ();
14161477
@@ -1427,6 +1488,7 @@ Int Analyse::run(Symbolic& S) {
14271488 S.serial_storage_ = serial_storage_;
14281489 S.flops_ = dense_ops_;
14291490 S.block_size_ = nb_;
1491+ S.max_stack_size_ = max_stack_size_;
14301492
14311493 // compute largest supernode
14321494 std::vector<Int> sn_size (sn_start_.begin () + 1 , sn_start_.end ());
0 commit comments