@@ -1218,8 +1218,8 @@ void Analyse::computeStackSize() {
12181218
12191219 std::vector<int64_t > clique_entries (sn_count_);
12201220 std::vector<int64_t > frontal_entries (sn_count_);
1221- stack_size_serial_ .assign (sn_count_, 0 );
1222- stack_size_parallel_ .assign (sn_count_, 0 );
1221+ stack_subtree_serial_ .assign (sn_count_, 0 );
1222+ stack_subtree_parallel_ .assign (sn_count_, 0 );
12231223 factors_total_entries_ = 0 ;
12241224
12251225 // initialise data of supernodes
@@ -1244,8 +1244,8 @@ void Analyse::computeStackSize() {
12441244 for (Int sn = 0 ; sn < sn_count_; ++sn) {
12451245 // leaf node
12461246 if (head[sn] == -1 ) {
1247- stack_size_serial_ [sn] = clique_entries[sn];
1248- stack_size_parallel_ [sn] = clique_entries[sn];
1247+ stack_subtree_serial_ [sn] = clique_entries[sn];
1248+ stack_subtree_parallel_ [sn] = clique_entries[sn];
12491249 continue ;
12501250 }
12511251
@@ -1264,7 +1264,8 @@ void Analyse::computeStackSize() {
12641264
12651265 Int child = head[sn];
12661266 while (child != -1 ) {
1267- int64_t current = stack_size_serial_[child] + clique_partial_entries_ser;
1267+ int64_t current =
1268+ stack_subtree_serial_[child] + clique_partial_entries_ser;
12681269
12691270 clique_total_entries_ser += clique_entries[child];
12701271 clique_partial_entries_ser += clique_entries[child];
@@ -1274,8 +1275,8 @@ void Analyse::computeStackSize() {
12741275 // the same way. If the child is in the layer, it is ignored for this
12751276 // computation, since it gets its own space and doesn't need space in the
12761277 // parent's stack.
1277- if (layerIndex .find (child) == layerIndex .end ()) {
1278- current = stack_size_parallel_ [child] + clique_partial_entries_par;
1278+ if (layerIndex_ .find (child) == layerIndex_ .end ()) {
1279+ current = stack_subtree_parallel_ [child] + clique_partial_entries_par;
12791280
12801281 clique_total_entries_par += clique_entries[child];
12811282 clique_partial_entries_par += clique_entries[child];
@@ -1288,8 +1289,8 @@ void Analyse::computeStackSize() {
12881289 int64_t storage_2_ser = clique_total_entries_ser + clique_entries[sn];
12891290 int64_t storage_2_par = clique_total_entries_par + clique_entries[sn];
12901291
1291- stack_size_serial_ [sn] = std::max (storage_1_ser, storage_2_ser);
1292- stack_size_parallel_ [sn] = std::max (storage_1_par, storage_2_par);
1292+ stack_subtree_serial_ [sn] = std::max (storage_1_ser, storage_2_ser);
1293+ stack_subtree_parallel_ [sn] = std::max (storage_1_par, storage_2_par);
12931294 }
12941295}
12951296
@@ -1342,8 +1343,8 @@ void Analyse::generateParallelLayer(Int threads) {
13421343 // - subtrees not added because too small
13431344
13441345 std::vector<Int> layer;
1345- std::set<Int> above_layer ;
1346- std::set<Int> small_subtrees ;
1346+ aboveLayer_. clear () ;
1347+ smallSubtrees_. clear () ;
13471348
13481349 // insert roots in layer
13491350 for (Int sn = 0 ; sn < sn_count_; ++sn) {
@@ -1365,7 +1366,7 @@ void Analyse::generateParallelLayer(Int threads) {
13651366 subtree_ops[*std::next (layer.rbegin (), 1 )];
13661367
13671368 // printf("iter %d,layer %d, above %d, small %d, ratio %f\n", iter,
1368- // layer.size(), above_layer .size(), small_subtrees .size(),
1369+ // layer.size(), aboveLayer_ .size(), smallSubtrees_ .size(),
13691370 // ratio_first_two);
13701371
13711372 // if there are enough subtrees and they are somewhat balanced, stop
@@ -1390,7 +1391,7 @@ void Analyse::generateParallelLayer(Int threads) {
13901391 auto it = layer.begin ();
13911392 std::advance (it, index_to_remove);
13921393 layer.erase (it);
1393- above_layer .insert (node_to_remove);
1394+ aboveLayer_ .insert (node_to_remove);
13941395
13951396 // find child with most operations
13961397 Int child_most_ops = -1 ;
@@ -1422,7 +1423,7 @@ void Analyse::generateParallelLayer(Int threads) {
14221423 child == child_most_ops) {
14231424 layer.push_back (child);
14241425 } else {
1425- small_subtrees .insert (child);
1426+ smallSubtrees_ .insert (child);
14261427 }
14271428 child = next[child];
14281429 }
@@ -1435,7 +1436,7 @@ void Analyse::generateParallelLayer(Int threads) {
14351436 // in the layer.
14361437 Int index = 0 ;
14371438 for (auto it = layer.begin (); it != layer.end (); ++it) {
1438- layerIndex .insert ({*it, index});
1439+ layerIndex_ .insert ({*it, index});
14391440 ++index;
14401441 }
14411442 }
@@ -1454,7 +1455,8 @@ void Analyse::generateParallelLayer(Int threads) {
14541455 serial_stack_size_ = 0 ;
14551456 for (Int sn = 0 ; sn < sn_count_; ++sn) {
14561457 if (sn_parent_[sn] == -1 )
1457- serial_stack_size_ = std::max (serial_stack_size_, stack_size_serial_[sn]);
1458+ serial_stack_size_ =
1459+ std::max (serial_stack_size_, stack_subtree_serial_[sn]);
14581460 }
14591461
14601462 // number of entries for stacks in parallel: max stack_size of any root, plus
@@ -1463,10 +1465,23 @@ void Analyse::generateParallelLayer(Int threads) {
14631465 for (Int sn = 0 ; sn < sn_count_; ++sn) {
14641466 if (sn_parent_[sn] == -1 )
14651467 parallel_stack_size_ =
1466- std::max (parallel_stack_size_, stack_size_parallel_[sn]);
1468+ std::max (parallel_stack_size_, stack_subtree_parallel_[sn]);
1469+ }
1470+ root_stack_entries_ = parallel_stack_size_;
1471+ for (auto & subtree : layerIndex_)
1472+ parallel_stack_size_ += stack_subtree_parallel_[subtree.first ];
1473+
1474+ // generate info about subtrees in the layer
1475+ std::vector<Int> first_desc;
1476+ firstDescendant (sn_parent_, first_desc);
1477+ layerSubtrees_.resize (layerIndex_.size ());
1478+ for (auto & subtree : layerIndex_) {
1479+ Int node = subtree.first ;
1480+ Int index = subtree.second ;
1481+ layerSubtrees_[index].start = first_desc[node];
1482+ layerSubtrees_[index].end = node + 1 ;
1483+ layerSubtrees_[index].stack = stack_subtree_parallel_[node];
14671484 }
1468- for (auto a : layerIndex)
1469- parallel_stack_size_ += stack_size_parallel_[a.first ];
14701485}
14711486
14721487Int Analyse::run (Symbolic& S) {
@@ -1566,6 +1581,7 @@ Int Analyse::run(Symbolic& S) {
15661581 S.serial_stack_size_ = serial_stack_size_;
15671582 S.parallel_stack_size_ = parallel_stack_size_;
15681583 S.factors_total_entries_ = factors_total_entries_;
1584+ S.root_stack_entries_ = root_stack_entries_;
15691585
15701586 // compute largest supernode
15711587 std::vector<Int> sn_size (sn_start_.begin () + 1 , sn_start_.end ());
@@ -1600,6 +1616,10 @@ Int Analyse::run(Symbolic& S) {
16001616 S.relind_clique_ = std::move (relind_clique_);
16011617 S.consecutive_sums_ = std::move (consecutive_sums_);
16021618 S.clique_block_start_ = std::move (clique_block_start_);
1619+ S.layerIndex_ = std::move (layerIndex_);
1620+ S.layerSubtrees_ = std::move (layerSubtrees_);
1621+ S.aboveLayer_ = std::move (aboveLayer_);
1622+ S.smallSubtrees_ = std::move (smallSubtrees_);
16031623
16041624#if HIPO_TIMING_LEVEL >= 1
16051625 data_.sumTime (kTimeAnalyse , clock_total.stop ());
0 commit comments