@@ -20,7 +20,12 @@ Factorise::Factorise(const Symbolic& S, const std::vector<Int>& rowsA,
2020 const std::vector<double >& valA, const Regul& regul,
2121 const Log* log, DataCollector& data,
2222 std::vector<std::vector<double >>& sn_columns)
23- : S_{S}, sn_columns_{sn_columns}, regul_{regul}, log_{log}, data_{data} {
23+ : S_{S},
24+ child_left_ (S_.sn()),
25+ sn_columns_{sn_columns},
26+ regul_{regul},
27+ log_{log},
28+ data_{data} {
2429 // Input the symmetric matrix to be factorised in CSC format and the symbolic
2530 // factorisation coming from Analyse.
2631 // Only the lower triangular part of the matrix is used.
@@ -63,6 +68,18 @@ Factorise::Factorise(const Symbolic& S, const std::vector<Int>& rowsA,
6368 first_child_reverse_ = first_child_;
6469 next_child_reverse_ = next_child_;
6570 reverseLinkedList (first_child_reverse_, next_child_reverse_);
71+
72+ // create vector with number of children for each supernode
73+ for (Int sn = 0 ; sn < S_.sn (); ++sn) {
74+ Int child = first_child_[sn];
75+ while (child != -1 ) {
76+ child_left_[sn]++;
77+ child = next_child_[child];
78+ }
79+
80+ // count number of roots
81+ if (S_.snParent (sn) == -1 ) roots_left_++;
82+ }
6683 }
6784
6885 // compute largest diagonal entry in absolute value
@@ -172,23 +189,8 @@ void Factorise::processSupernode(Int sn) {
172189 // Assemble frontal matrix for supernode sn, perform partial factorisation and
173190 // store the result.
174191
175- highs::parallel::TaskGroup tg;
176-
177192 if (flag_stop_) return ;
178193
179- if (S_.parTree ()) {
180- // spawn children of this supernode in reverse order
181- Int child_to_spawn = first_child_reverse_[sn];
182- while (child_to_spawn != -1 ) {
183- tg.spawn ([=]() { processSupernode (child_to_spawn); });
184- child_to_spawn = next_child_reverse_[child_to_spawn];
185- }
186-
187- // wait for first child to finish, before starting the parent (if there is a
188- // first child)
189- if (first_child_reverse_[sn] != -1 ) tg.sync ();
190- }
191-
192194#if HIPO_TIMING_LEVEL >= 2
193195 Clock clock;
194196#endif
@@ -240,17 +242,10 @@ void Factorise::processSupernode(Int sn) {
240242 // Schur contribution of the current child
241243 std::vector<double >& child_clique = schur_contribution_[child_sn];
242244
243- if (S_.parTree ()) {
244- // sync with spawned child, apart from the first one
245- if (child_sn != first_child_[sn]) tg.sync ();
246-
247- if (flag_stop_) return ;
248-
249- if (child_clique.size () == 0 ) {
250- if (log_) log_->printDevInfo (" Missing child supernode contribution\n " );
251- flag_stop_ = true ;
252- return ;
253- }
245+ if (child_clique.size () == 0 ) {
246+ if (log_) log_->printDevInfo (" Missing child supernode contribution\n " );
247+ flag_stop_ = true ;
248+ return ;
254249 }
255250
256251 // determine size of clique of child
@@ -351,6 +346,22 @@ void Factorise::processSupernode(Int sn) {
351346#if HIPO_TIMING_LEVEL >= 2
352347 data_.sumTime (kTimeFactoriseTerminate , clock.stop ());
353348#endif
349+
350+ // finished processing this supernode.
351+ // check if the parent should be spawned
352+ highs::parallel::TaskGroup tg;
353+ if (S_.parTree ()) {
354+ Int parent = S_.snParent (sn);
355+ if (parent != -1 ) {
356+ Int left = child_left_[parent].fetch_sub (1 ) - 1 ;
357+ if (left == 0 ) {
358+ tg.spawn ([this , parent]() { processSupernode (parent); });
359+ }
360+ } else {
361+ roots_left_.fetch_sub (1 );
362+ }
363+ }
364+ tg.taskWait ();
354365}
355366
356367bool Factorise::run (Numeric& num) {
@@ -372,16 +383,14 @@ bool Factorise::run(Numeric& num) {
372383 sn_columns_.resize (S_.sn ());
373384
374385 if (S_.parTree ()) {
375- Int spawned_roots{};
376- // spawn tasks for root supernodes
377386 for (Int sn = 0 ; sn < S_.sn (); ++sn) {
378- if (S_. snParent (sn) == - 1 ) {
379- tg. spawn ([=]() { processSupernode (sn); });
380- ++spawned_roots ;
387+ // spawn only the leaves
388+ if (first_child_[sn] == - 1 ) {
389+ tg. spawn ([ this , sn]() { processSupernode (sn); }) ;
381390 }
382391 }
383392
384- // sync tasks for root supernodes
393+ // sync all spawned tasks
385394 tg.taskWait ();
386395 } else {
387396 // go through each supernode serially
0 commit comments