@@ -191,19 +191,19 @@ void Factorise::processSupernode(Int sn, const bool should_parallelise) {
191191 if (do_parallelise) {
192192 // if there is only one child, do not parallelise
193193 if (first_child_[sn] != -1 && next_child_[first_child_[sn]] == -1 ) {
194- spawnNode (first_child_[sn], tg, false );
194+ spawn (first_child_[sn], tg, false );
195195 do_parallelise = false ;
196196 } else {
197197 // spawn children of this supernode in reverse order
198198 Int child_to_spawn = first_child_reverse_[sn];
199199 while (child_to_spawn != -1 ) {
200- spawnNode (child_to_spawn, tg);
200+ spawn (child_to_spawn, tg);
201201 child_to_spawn = next_child_reverse_[child_to_spawn];
202202 }
203203
204204 // wait for first child to finish, before starting the parent (if there is
205205 // a first child)
206- if (first_child_reverse_[sn] != -1 ) syncNode (first_child_[sn], tg);
206+ if (first_child_reverse_[sn] != -1 ) sync (first_child_[sn], tg);
207207 }
208208 }
209209
@@ -262,13 +262,18 @@ void Factorise::processSupernode(Int sn, const bool should_parallelise) {
262262 // Child contribution is found:
263263 // - in cliquestack, if we are processing the tree in serial.
264264 // - in schur_contribution_ if we are processing the tree in parallel.
265- // Children are always summed from last to first.
265+ // Children are summed:
266+ // - in reverse order in serial, so that the correct child is found on top
267+ // of the CliqueStack.
268+ // - in forward order otherwise, so that if a small subtree is synced, and
269+ // it is not the first in its group, it will already have synced when it
270+ // is needed.
266271
267272 const double * child_clique;
268273
269274 if (do_parallelise) {
270275 // sync with spawned child, apart from the first one
271- if (child_sn != first_child_[sn]) syncNode (child_sn, tg);
276+ if (child_sn != first_child_[sn]) sync (child_sn, tg);
272277 if (flag_stop_.load (std::memory_order_relaxed)) return ;
273278 }
274279
@@ -379,7 +384,7 @@ void Factorise::processSupernode(Int sn, const bool should_parallelise) {
379384 HIPO_CLOCK_STOP (2 , data_, kTimeFactoriseTerminate );
380385}
381386
382- void Factorise::spawnNode (Int sn, const TaskGroupSpecial& tg, bool do_spawn) {
387+ void Factorise::spawn (Int sn, const TaskGroupSpecial& tg, bool do_spawn) {
383388 // If do_spawn is true, a task is actually spawned, otherwise it is executed
384389 // immediately. This avoids the overhead of spawning a task if a supernode has
385390 // a single child.
@@ -388,7 +393,7 @@ void Factorise::spawnNode(Int sn, const TaskGroupSpecial& tg, bool do_spawn) {
388393
389394 if (!data) {
390395 // sn is head of small subtree, but not the first subtree in the group.
391- // It will be processed in another task.
396+ // It is processed in another task.
392397 return ;
393398 }
394399
@@ -423,8 +428,8 @@ void Factorise::spawnNode(Int sn, const TaskGroupSpecial& tg, bool do_spawn) {
423428 }
424429}
425430
426- void Factorise::syncNode (Int sn, const TaskGroupSpecial& tg) {
427- // If spawnNode (sn,tg) created a task, then sync it.
431+ void Factorise::sync (Int sn, const TaskGroupSpecial& tg) {
432+ // If spawn (sn,tg) created a task, then sync it.
428433 // This happens only if sn is found in the treeSplitting data structure.
429434 if (S_.treeSplitting ().belong (sn)) tg.sync ();
430435}
@@ -448,7 +453,7 @@ bool Factorise::run(Numeric& num) {
448453 if (S_.parTree ()) {
449454 // spawn tasks for root supernodes
450455 for (Int sn = 0 ; sn < S_.sn (); ++sn) {
451- if (S_.snParent (sn) == -1 ) (spawnNode (sn, tg));
456+ if (S_.snParent (sn) == -1 ) (spawn (sn, tg));
452457 }
453458
454459 // sync tasks for root supernodes
0 commit comments