Skip to content

Commit 924fe4e

Browse files
committed
Tree explored leaves-up rather than roots-down
1 parent 82198ae commit 924fe4e

File tree

2 files changed

+49
-33
lines changed

2 files changed

+49
-33
lines changed

highs/ipm/hipo/factorhighs/Factorise.cpp

Lines changed: 42 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,12 @@ Factorise::Factorise(const Symbolic& S, const std::vector<Int>& rowsA,
2020
const std::vector<double>& valA, const Regul& regul,
2121
const Log* log, DataCollector& data,
2222
std::vector<std::vector<double>>& sn_columns)
23-
: S_{S}, sn_columns_{sn_columns}, regul_{regul}, log_{log}, data_{data} {
23+
: S_{S},
24+
child_left_(S_.sn()),
25+
sn_columns_{sn_columns},
26+
regul_{regul},
27+
log_{log},
28+
data_{data} {
2429
// Input the symmetric matrix to be factorised in CSC format and the symbolic
2530
// factorisation coming from Analyse.
2631
// Only the lower triangular part of the matrix is used.
@@ -63,6 +68,18 @@ Factorise::Factorise(const Symbolic& S, const std::vector<Int>& rowsA,
6368
first_child_reverse_ = first_child_;
6469
next_child_reverse_ = next_child_;
6570
reverseLinkedList(first_child_reverse_, next_child_reverse_);
71+
72+
// create vector with number of children for each supernode
73+
for (Int sn = 0; sn < S_.sn(); ++sn) {
74+
Int child = first_child_[sn];
75+
while (child != -1) {
76+
child_left_[sn]++;
77+
child = next_child_[child];
78+
}
79+
80+
// count number of roots
81+
if (S_.snParent(sn) == -1) roots_left_++;
82+
}
6683
}
6784

6885
// compute largest diagonal entry in absolute value
@@ -172,23 +189,8 @@ void Factorise::processSupernode(Int sn) {
172189
// Assemble frontal matrix for supernode sn, perform partial factorisation and
173190
// store the result.
174191

175-
highs::parallel::TaskGroup tg;
176-
177192
if (flag_stop_) return;
178193

179-
if (S_.parTree()) {
180-
// spawn children of this supernode in reverse order
181-
Int child_to_spawn = first_child_reverse_[sn];
182-
while (child_to_spawn != -1) {
183-
tg.spawn([=]() { processSupernode(child_to_spawn); });
184-
child_to_spawn = next_child_reverse_[child_to_spawn];
185-
}
186-
187-
// wait for first child to finish, before starting the parent (if there is a
188-
// first child)
189-
if (first_child_reverse_[sn] != -1) tg.sync();
190-
}
191-
192194
#if HIPO_TIMING_LEVEL >= 2
193195
Clock clock;
194196
#endif
@@ -240,17 +242,10 @@ void Factorise::processSupernode(Int sn) {
240242
// Schur contribution of the current child
241243
std::vector<double>& child_clique = schur_contribution_[child_sn];
242244

243-
if (S_.parTree()) {
244-
// sync with spawned child, apart from the first one
245-
if (child_sn != first_child_[sn]) tg.sync();
246-
247-
if (flag_stop_) return;
248-
249-
if (child_clique.size() == 0) {
250-
if (log_) log_->printDevInfo("Missing child supernode contribution\n");
251-
flag_stop_ = true;
252-
return;
253-
}
245+
if (child_clique.size() == 0) {
246+
if (log_) log_->printDevInfo("Missing child supernode contribution\n");
247+
flag_stop_ = true;
248+
return;
254249
}
255250

256251
// determine size of clique of child
@@ -351,6 +346,22 @@ void Factorise::processSupernode(Int sn) {
351346
#if HIPO_TIMING_LEVEL >= 2
352347
data_.sumTime(kTimeFactoriseTerminate, clock.stop());
353348
#endif
349+
350+
// finished processing this supernode.
351+
// check if the parent should be spawned
352+
highs::parallel::TaskGroup tg;
353+
if (S_.parTree()) {
354+
Int parent = S_.snParent(sn);
355+
if (parent != -1) {
356+
Int left = child_left_[parent].fetch_sub(1) - 1;
357+
if (left == 0) {
358+
tg.spawn([this, parent]() { processSupernode(parent); });
359+
}
360+
} else {
361+
roots_left_.fetch_sub(1);
362+
}
363+
}
364+
tg.taskWait();
354365
}
355366

356367
bool Factorise::run(Numeric& num) {
@@ -372,16 +383,14 @@ bool Factorise::run(Numeric& num) {
372383
sn_columns_.resize(S_.sn());
373384

374385
if (S_.parTree()) {
375-
Int spawned_roots{};
376-
// spawn tasks for root supernodes
377386
for (Int sn = 0; sn < S_.sn(); ++sn) {
378-
if (S_.snParent(sn) == -1) {
379-
tg.spawn([=]() { processSupernode(sn); });
380-
++spawned_roots;
387+
// spawn only the leaves
388+
if (first_child_[sn] == -1) {
389+
tg.spawn([this, sn]() { processSupernode(sn); });
381390
}
382391
}
383392

384-
// sync tasks for root supernodes
393+
// sync all spawned tasks
385394
tg.taskWait();
386395
} else {
387396
// go through each supernode serially

highs/ipm/hipo/factorhighs/Factorise.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#ifndef FACTORHIGHS_FACTORISE_H
22
#define FACTORHIGHS_FACTORISE_H
33

4+
#include <atomic>
45
#include <cmath>
56

67
#include "Numeric.h"
@@ -30,6 +31,12 @@ class Factorise {
3031
std::vector<Int> first_child_reverse_{};
3132
std::vector<Int> next_child_reverse_{};
3233

34+
// number of children left to process for each supernode
35+
// This vector's size must be determined at construction and cannot be
36+
// resized, because std::atomic<Int> is not copy constructible/assignable.
37+
std::vector<std::atomic<Int>> child_left_;
38+
std::atomic<Int> roots_left_{};
39+
3340
// generated elements, aka Schur complements.
3441
std::vector<std::vector<double>> schur_contribution_{};
3542

0 commit comments

Comments
 (0)