Skip to content

Commit c5265e8

Browse files
committed
Split tree in single nodes and subtrees for parallelisation
1 parent 3d19148 commit c5265e8

File tree

9 files changed

+168
-25
lines changed

9 files changed

+168
-25
lines changed

highs/ipm/hipo/auxiliary/Auxiliary.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -265,6 +265,23 @@ double getDiagStart(Int n, Int k, Int nb, Int n_blocks, std::vector<Int>& start,
265265
return result;
266266
}
267267

268+
void firstDescendant(const std::vector<Int>& parent, std::vector<Int>& first) {
269+
// Given an elimination tree, find the first descendant of each node, i.e.
270+
// given a node j, first[j] is the descendant of j with smallest number.
271+
// Taken from Tim Davis "Direct Methods for Sparse Linear Systems".
272+
273+
const Int n = parent.size();
274+
first.assign(n, -1);
275+
276+
for (Int i = 0; i < n; ++i) {
277+
Int j = i;
278+
while (j != -1 && first[j] == -1) {
279+
first[j] = i;
280+
j = parent[j];
281+
}
282+
}
283+
}
284+
268285
Clock::Clock() { start(); }
269286
void Clock::start() { t0 = std::chrono::high_resolution_clock::now(); }
270287
double Clock::stop() const {

highs/ipm/hipo/auxiliary/Auxiliary.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ void processEdge(Int j, Int i, const std::vector<Int>& first,
3737
std::vector<Int>& prevleaf, std::vector<Int>& ancestor);
3838
double getDiagStart(Int n, Int k, Int nb, Int n_blocks, std::vector<Int>& start,
3939
bool triang = false);
40+
void firstDescendant(const std::vector<Int>& parent, std::vector<Int>& first);
4041

4142
template <typename T>
4243
void permuteVector(std::vector<T>& v, const std::vector<Int>& perm) {

highs/ipm/hipo/factorhighs/Analyse.cpp

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
#include <fstream>
44
#include <iostream>
5+
#include <map>
56
#include <random>
67
#include <stack>
78

@@ -1284,6 +1285,72 @@ void Analyse::computeBlockStart() {
12841285
}
12851286
}
12861287

1288+
void Analyse::findTreeSplitting() {
1289+
// Split the tree into single nodes and subtrees.
1290+
// The subtrees have at most 1% of total operations.
1291+
// The tree is parallelised by creating a task for each single node and a task
1292+
// for each subtree.
1293+
1294+
// linked lists of children
1295+
std::vector<Int> head, next;
1296+
childrenLinkedList(sn_parent_, head, next);
1297+
1298+
// compute number of operations for each supernode
1299+
std::vector<double> sn_ops(sn_count_);
1300+
double total_ops = dense_ops_;
1301+
for (Int sn = 0; sn < sn_count_; ++sn) {
1302+
// supernode size
1303+
const Int sz = sn_start_[sn + 1] - sn_start_[sn];
1304+
1305+
// frontal size
1306+
const Int fr = ptr_sn_[sn + 1] - ptr_sn_[sn];
1307+
1308+
// number of operations for this supernode
1309+
for (Int i = 0; i < sz; ++i) {
1310+
sn_ops[sn] += (double)(fr - i - 1) * (fr - i - 1);
1311+
}
1312+
1313+
// add assembly operations times spops_weight to the parent
1314+
if (sn_parent_[sn] != -1) {
1315+
const Int ldc = fr - sz;
1316+
sn_ops[sn_parent_[sn]] += ldc * (ldc + 1) / 2 * spops_weight;
1317+
total_ops += ldc * (ldc + 1) / 2 * spops_weight;
1318+
}
1319+
}
1320+
1321+
// compute number of operations to process each subtree
1322+
std::vector<double> subtree_ops(sn_count_, 0.0);
1323+
for (Int sn = 0; sn < sn_count_; ++sn) {
1324+
subtree_ops[sn] += sn_ops[sn];
1325+
if (sn_parent_[sn] != -1) {
1326+
subtree_ops[sn_parent_[sn]] += subtree_ops[sn];
1327+
}
1328+
}
1329+
1330+
// Find first descendant of each supernode
1331+
std::vector<Int> first_desc;
1332+
firstDescendant(sn_parent_, first_desc);
1333+
1334+
// Divide the tree into single nodes and subtrees, such that each subtree has
1335+
// at most small_thresh operations overall.
1336+
const double small_thresh = small_thresh_coeff * total_ops;
1337+
for (Int sn = 0; sn < sn_count_; ++sn) {
1338+
if (subtree_ops[sn] > small_thresh) {
1339+
// sn is a single node
1340+
tree_splitting_.insert({sn, {NodeType::single, -1}});
1341+
1342+
} else if (sn_parent_[sn] == -1 ||
1343+
subtree_ops[sn_parent_[sn]] > small_thresh) {
1344+
// sn is head of a subtree
1345+
tree_splitting_.insert({sn, {NodeType::subtree, first_desc[sn]}});
1346+
1347+
} else {
1348+
// sn is part of a subtree, but not the head
1349+
continue;
1350+
}
1351+
}
1352+
}
1353+
12871354
Int Analyse::run(Symbolic& S) {
12881355
// Perform analyse phase and store the result into the symbolic object S.
12891356
// After Run returns, the Analyse object is not valid.
@@ -1359,6 +1426,8 @@ Int Analyse::run(Symbolic& S) {
13591426
computeBlockStart();
13601427
computeCriticalPath();
13611428

1429+
findTreeSplitting();
1430+
13621431
// move relevant stuff into S
13631432
S.n_ = n_;
13641433
S.sn_ = sn_count_;
@@ -1406,6 +1475,7 @@ Int Analyse::run(Symbolic& S) {
14061475
S.relind_clique_ = std::move(relind_clique_);
14071476
S.consecutive_sums_ = std::move(consecutive_sums_);
14081477
S.clique_block_start_ = std::move(clique_block_start_);
1478+
S.tree_splitting_ = std::move(tree_splitting_);
14091479

14101480
#if HIPO_TIMING_LEVEL >= 1
14111481
data_.sumTime(kTimeAnalyse, clock_total.stop());

highs/ipm/hipo/factorhighs/Analyse.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,8 @@ class Analyse {
7777

7878
std::vector<std::vector<Int>> clique_block_start_{};
7979

80+
std::map<Int, NodeData> tree_splitting_;
81+
8082
// block size
8183
Int nb_{};
8284

@@ -103,6 +105,8 @@ class Analyse {
103105
void computeCriticalPath();
104106
void computeBlockStart();
105107

108+
void findTreeSplitting();
109+
106110
public:
107111
// Constructor: matrix must be in lower triangular format
108112
Analyse(const std::vector<Int>& rows, const std::vector<Int>& ptr,

highs/ipm/hipo/factorhighs/FactorHiGHSSettings.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,10 @@ const double kDynamicDiagCoeff = 1e-24;
5151
const Int kMaxRefinementIter = 3;
5252
const double kRefinementTolerance = 1e-12;
5353

54+
// tree splitting
55+
const double small_thresh_coeff = 0.01;
56+
const double spops_weight = 30.0;
57+
5458
struct Regul {
5559
double primal{};
5660
double dual{};

highs/ipm/hipo/factorhighs/Factorise.cpp

Lines changed: 48 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
#include "SymScaling.h"
1212
#include "ipm/hipo/auxiliary/Auxiliary.h"
1313
#include "ipm/hipo/auxiliary/Log.h"
14-
#include "parallel/HighsParallel.h"
1514

1615
namespace hipo {
1716

@@ -168,42 +167,40 @@ void Factorise::permute(const std::vector<Int>& iperm) {
168167
valA_ = std::move(new_val);
169168
}
170169

171-
class TaskGroupSpecial : public highs::parallel::TaskGroup {
170+
TaskGroupSpecial::~TaskGroupSpecial() {
172171
// Using TaskGroup may throw an exception when tasks are cancelled. Not sure
173172
// exactly why this happens, but for now this fix seems to work.
174173

175-
public:
176-
~TaskGroupSpecial() {
177-
// No virtual destructor in TaskGroup. Do not call this class via pointer to
178-
// the base!
174+
// No virtual destructor in TaskGroup. Do not call this class via pointer to
175+
// the base!
179176

180-
cancel();
177+
cancel();
181178

182-
// re-call taskWait if it throws, until it succeeds
183-
while (true) {
184-
try {
185-
taskWait();
186-
break;
187-
} catch (HighsTask::Interrupt) {
188-
continue;
189-
}
179+
// re-call taskWait if it throws, until it succeeds
180+
while (true) {
181+
try {
182+
taskWait();
183+
break;
184+
} catch (HighsTask::Interrupt) {
185+
continue;
190186
}
191187
}
192-
};
188+
}
193189

194-
void Factorise::processSupernode(Int sn) {
190+
void Factorise::processSupernode(Int sn, bool parallelise) {
195191
// Assemble frontal matrix for supernode sn, perform partial factorisation and
196192
// store the result.
197193

198194
TaskGroupSpecial tg;
199195

200196
if (flag_stop_) return;
201197

202-
if (S_.parTree()) {
198+
if (parallelise) {
203199
// spawn children of this supernode in reverse order
204200
Int child_to_spawn = first_child_reverse_[sn];
205201
while (child_to_spawn != -1) {
206-
tg.spawn([=]() { processSupernode(child_to_spawn); });
202+
if (spawnNode(child_to_spawn, tg)) return;
203+
207204
child_to_spawn = next_child_reverse_[child_to_spawn];
208205
}
209206

@@ -263,7 +260,7 @@ void Factorise::processSupernode(Int sn) {
263260
// Schur contribution of the current child
264261
std::vector<double>& child_clique = schur_contribution_[child_sn];
265262

266-
if (S_.parTree()) {
263+
if (parallelise) {
267264
// sync with spawned child, apart from the first one
268265
if (child_sn != first_child_[sn]) tg.sync();
269266

@@ -376,6 +373,35 @@ void Factorise::processSupernode(Int sn) {
376373
#endif
377374
}
378375

376+
void Factorise::processSubtree(Int start, Int end) {
377+
for (Int sn = start; sn < end; ++sn) {
378+
processSupernode(sn, false);
379+
}
380+
}
381+
382+
bool Factorise::spawnNode(Int sn, const TaskGroupSpecial& tg) {
383+
auto it = S_.treeSplitting().find(sn);
384+
385+
if (it == S_.treeSplitting().end()) {
386+
log_->printDevInfo("Missing supernode from tree splitting\n");
387+
flag_stop_ = true;
388+
return true;
389+
}
390+
391+
if (it->second.type == NodeType::single) {
392+
// sn is single node, spawn only that
393+
tg.spawn([=]() { processSupernode(sn, true); });
394+
395+
} else {
396+
// sn is subtree, spawn the whole subtree
397+
Int start = it->second.first;
398+
Int end = sn + 1;
399+
tg.spawn([=]() { processSubtree(start, end); });
400+
}
401+
402+
return false;
403+
}
404+
379405
bool Factorise::run(Numeric& num) {
380406
#if HIPO_TIMING_LEVEL >= 1
381407
Clock clock;
@@ -395,12 +421,10 @@ bool Factorise::run(Numeric& num) {
395421
sn_columns_.resize(S_.sn());
396422

397423
if (S_.parTree()) {
398-
Int spawned_roots{};
399424
// spawn tasks for root supernodes
400425
for (Int sn = 0; sn < S_.sn(); ++sn) {
401426
if (S_.snParent(sn) == -1) {
402-
tg.spawn([=]() { processSupernode(sn); });
403-
++spawned_roots;
427+
if (spawnNode(sn, tg)) return true;
404428
}
405429
}
406430

@@ -409,7 +433,7 @@ bool Factorise::run(Numeric& num) {
409433
} else {
410434
// go through each supernode serially
411435
for (Int sn = 0; sn < S_.sn(); ++sn) {
412-
processSupernode(sn);
436+
processSupernode(sn, false);
413437
}
414438
}
415439

highs/ipm/hipo/factorhighs/Factorise.h

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,15 @@
77
#include "Symbolic.h"
88
#include "ipm/hipo/auxiliary/IntConfig.h"
99
#include "ipm/hipo/auxiliary/Log.h"
10+
#include "parallel/HighsParallel.h"
1011

1112
namespace hipo {
1213

14+
class TaskGroupSpecial : public highs::parallel::TaskGroup {
15+
public:
16+
~TaskGroupSpecial();
17+
};
18+
1319
class Factorise {
1420
public:
1521
// matrix to factorise
@@ -68,7 +74,9 @@ class Factorise {
6874

6975
public:
7076
void permute(const std::vector<Int>& iperm);
71-
void processSupernode(Int sn);
77+
void processSupernode(Int sn, bool parallelise);
78+
void processSubtree(Int start, Int end);
79+
bool spawnNode(Int sn, const TaskGroupSpecial& tg);
7280

7381
public:
7482
Factorise(const Symbolic& S, const std::vector<Int>& rowsA,

highs/ipm/hipo/factorhighs/Symbolic.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ const std::vector<Int>& Symbolic::iperm() const { return iperm_; }
4444
const std::vector<Int>& Symbolic::snParent() const { return sn_parent_; }
4545
const std::vector<Int>& Symbolic::snStart() const { return sn_start_; }
4646
const std::vector<Int>& Symbolic::pivotSign() const { return pivot_sign_; }
47+
const std::map<Int, NodeData>& Symbolic::treeSplitting() const {
48+
return tree_splitting_;
49+
}
4750

4851
std::string memoryString(double mem) {
4952
std::stringstream ss;
@@ -74,6 +77,8 @@ void Symbolic::print(const Log& log, bool verbose) const {
7477
log_stream << textline("Critical ops:") << sci(critops_, 0, 1) << '\n';
7578
log_stream << textline("Max tree speedup:") << fix(flops_ / critops_, 0, 2)
7679
<< '\n';
80+
log_stream << textline("Number of tasks:") << integer(tree_splitting_.size())
81+
<< '\n';
7782
log_stream << textline("Artificial nz:") << sci(artificial_nz_, 0, 1)
7883
<< '\n';
7984
log_stream << textline("Artificial ops:") << sci(artificial_ops_, 0, 1)

highs/ipm/hipo/factorhighs/Symbolic.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,20 @@
11
#ifndef FACTORHIGHS_SYMBOLIC_H
22
#define FACTORHIGHS_SYMBOLIC_H
33

4+
#include <map>
45
#include <vector>
56

67
#include "ipm/hipo/auxiliary/IntConfig.h"
78
#include "ipm/hipo/auxiliary/Log.h"
89

910
namespace hipo {
1011

12+
enum NodeType { single, subtree };
13+
struct NodeData {
14+
NodeType type;
15+
Int first;
16+
};
17+
1118
// Symbolic factorisation object
1219
class Symbolic {
1320
// Options for parallelism
@@ -94,6 +101,8 @@ class Symbolic {
94101
// Starting position of diagonal blocks for hybrid formats
95102
std::vector<std::vector<Int>> clique_block_start_{};
96103

104+
std::map<Int, NodeData> tree_splitting_;
105+
97106
friend class Analyse;
98107

99108
public:
@@ -124,6 +133,7 @@ class Symbolic {
124133
const std::vector<Int>& snParent() const;
125134
const std::vector<Int>& snStart() const;
126135
const std::vector<Int>& pivotSign() const;
136+
const std::map<Int, NodeData>& treeSplitting() const;
127137

128138
void print(const Log& log, bool verbose = false) const;
129139
};

0 commit comments

Comments
 (0)