Skip to content

Commit 1e6967f

Browse files
committed
Modified tree parallelism to use partition of the elimination tree
1 parent 4653677 commit 1e6967f

File tree

7 files changed

+78
-62
lines changed

7 files changed

+78
-62
lines changed

highs/ipm/hipo/factorhighs/Analyse.cpp

Lines changed: 20 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1474,13 +1474,27 @@ void Analyse::generateParallelLayer(Int threads) {
14741474
// generate info about subtrees in the layer
14751475
std::vector<Int> first_desc;
14761476
firstDescendant(sn_parent_, first_desc);
1477-
layerSubtrees_.resize(layerIndex_.size());
1477+
layerSubtreesInfo_.resize(layerIndex_.size());
14781478
for (auto& subtree : layerIndex_) {
14791479
Int node = subtree.first;
14801480
Int index = subtree.second;
1481-
layerSubtrees_[index].start = first_desc[node];
1482-
layerSubtrees_[index].end = node + 1;
1483-
layerSubtrees_[index].stack = stack_subtree_parallel_[node];
1481+
layerSubtreesInfo_[index].start = first_desc[node];
1482+
layerSubtreesInfo_[index].end = node + 1;
1483+
layerSubtreesInfo_[index].stack = stack_subtree_parallel_[node];
1484+
}
1485+
1486+
smallSubtreesInfo_.resize(smallSubtrees_.size());
1487+
Int index = 0;
1488+
for (auto it = smallSubtrees_.begin(); it != smallSubtrees_.end(); ++it) {
1489+
Int node = *it;
1490+
1491+
smallSubtreesInfo_[index].start = first_desc[node];
1492+
smallSubtreesInfo_[index].end = node + 1;
1493+
1494+
// no stack needed for small subtrees
1495+
smallSubtreesInfo_[index].stack = -1;
1496+
1497+
++index;
14841498
}
14851499
}
14861500

@@ -1617,7 +1631,8 @@ Int Analyse::run(Symbolic& S) {
16171631
S.consecutive_sums_ = std::move(consecutive_sums_);
16181632
S.clique_block_start_ = std::move(clique_block_start_);
16191633
S.layerIndex_ = std::move(layerIndex_);
1620-
S.layerSubtrees_ = std::move(layerSubtrees_);
1634+
S.layerSubtreesInfo_ = std::move(layerSubtreesInfo_);
1635+
S.smallSubtreesInfo_ = std::move(smallSubtreesInfo_);
16211636
S.aboveLayer_ = std::move(aboveLayer_);
16221637
S.smallSubtrees_ = std::move(smallSubtrees_);
16231638

highs/ipm/hipo/factorhighs/Analyse.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ class Analyse {
8080

8181
// Parallel info
8282
std::map<Int, Int> layerIndex_;
83-
std::vector<SubtreeInfo> layerSubtrees_;
83+
std::vector<SubtreeInfo> layerSubtreesInfo_, smallSubtreesInfo_;
8484
std::set<Int> aboveLayer_, smallSubtrees_;
8585
std::vector<int64_t> stack_subtree_serial_;
8686
std::vector<int64_t> stack_subtree_parallel_;

highs/ipm/hipo/factorhighs/Factorise.cpp

Lines changed: 31 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -58,13 +58,6 @@ Factorise::Factorise(const Symbolic& S, const std::vector<Int>& rowsA,
5858
// create linked lists of children in supernodal elimination tree
5959
childrenLinkedList(S_.snParent(), first_child_, next_child_);
6060

61-
if (S_.parTree()) {
62-
// create reverse linked lists of children
63-
first_child_reverse_ = first_child_;
64-
next_child_reverse_ = next_child_;
65-
reverseLinkedList(first_child_reverse_, next_child_reverse_);
66-
}
67-
6861
// compute largest diagonal entry in absolute value
6962
max_diag_ = 0.0;
7063
min_diag_ = kHighsInf;
@@ -174,19 +167,6 @@ void Factorise::processSupernode(Int sn) {
174167

175168
if (flag_stop_) return;
176169

177-
if (S_.parTree()) {
178-
// spawn children of this supernode in reverse order
179-
Int child_to_spawn = first_child_reverse_[sn];
180-
while (child_to_spawn != -1) {
181-
highs::parallel::spawn([=]() { processSupernode(child_to_spawn); });
182-
child_to_spawn = next_child_reverse_[child_to_spawn];
183-
}
184-
185-
// wait for first child to finish, before starting the parent (if there is a
186-
// first child)
187-
if (first_child_reverse_[sn] != -1) highs::parallel::sync();
188-
}
189-
190170
#if HIPO_TIMING_LEVEL >= 2
191171
Clock clock;
192172
#endif
@@ -238,17 +218,10 @@ void Factorise::processSupernode(Int sn) {
238218
// Schur contribution of the current child
239219
std::vector<double>& child_clique = schur_contribution_[child_sn];
240220

241-
if (S_.parTree()) {
242-
// sync with spawned child, apart from the first one
243-
if (child_sn != first_child_[sn]) highs::parallel::sync();
244-
245-
if (flag_stop_) return;
246-
247-
if (child_clique.size() == 0) {
248-
if (log_) log_->printDevInfo("Missing child supernode contribution\n");
249-
flag_stop_ = true;
250-
return;
251-
}
221+
if (child_clique.size() == 0) {
222+
if (log_) log_->printDevInfo("Missing child supernode contribution\n");
223+
flag_stop_ = true;
224+
return;
252225
}
253226

254227
// determine size of clique of child
@@ -351,6 +324,12 @@ void Factorise::processSupernode(Int sn) {
351324
#endif
352325
}
353326

327+
void Factorise::processSupernodes(Int start, Int end) {
328+
for (Int sn = start; sn < end; ++sn) {
329+
processSupernode(sn);
330+
}
331+
}
332+
354333
bool Factorise::run(Numeric& num) {
355334
#if HIPO_TIMING_LEVEL >= 1
356335
Clock clock;
@@ -368,24 +347,33 @@ bool Factorise::run(Numeric& num) {
368347
sn_columns_.resize(S_.sn());
369348

370349
if (S_.parTree()) {
371-
Int spawned_roots{};
372-
// spawn tasks for root supernodes
373-
for (Int sn = 0; sn < S_.sn(); ++sn) {
374-
if (S_.snParent(sn) == -1) {
375-
highs::parallel::spawn([=]() { processSupernode(sn); });
376-
++spawned_roots;
377-
}
350+
// process subtrees in the layer
351+
for (Int i = 0; i < S_.layerIndex().size(); ++i) {
352+
Int start = S_.layerSubtreeInfo(i).start;
353+
Int end = S_.layerSubtreeInfo(i).end;
354+
highs::parallel::spawn([=]() { processSupernodes(start, end); });
355+
}
356+
357+
// process small subtrees
358+
for (Int i = 0; i < S_.smallSubtrees().size(); ++i) {
359+
Int start = S_.smallSubtreeInfo(i).start;
360+
Int end = S_.smallSubtreeInfo(i).end;
361+
processSupernodes(start, end);
378362
}
379363

380-
// sync tasks for root supernodes
381-
for (Int root = 0; root < spawned_roots; ++root) {
364+
// wait for subtrees in the layer to complete
365+
for (Int i = 0; i < S_.layerIndex().size(); ++i) {
382366
highs::parallel::sync();
383367
}
368+
369+
// process nodes above layer
370+
for (auto it = S_.aboveLayer().begin(); it != S_.aboveLayer().end(); ++it) {
371+
processSupernode(*it);
372+
}
373+
384374
} else {
385375
// go through each supernode serially
386-
for (Int sn = 0; sn < S_.sn(); ++sn) {
387-
processSupernode(sn);
388-
}
376+
processSupernodes(0, S_.sn());
389377
}
390378

391379
if (flag_stop_) return true;

highs/ipm/hipo/factorhighs/Factorise.h

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -26,10 +26,6 @@ class Factorise {
2626
std::vector<Int> first_child_{};
2727
std::vector<Int> next_child_{};
2828

29-
// reverse linked lists of chidlren
30-
std::vector<Int> first_child_reverse_{};
31-
std::vector<Int> next_child_reverse_{};
32-
3329
// generated elements, aka Schur complements.
3430
std::vector<std::vector<double>> schur_contribution_{};
3531

@@ -69,6 +65,7 @@ class Factorise {
6965
public:
7066
void permute(const std::vector<Int>& iperm);
7167
void processSupernode(Int sn);
68+
void processSupernodes(Int start, Int end);
7269

7370
public:
7471
Factorise(const Symbolic& S, const std::vector<Int>& rowsA,

highs/ipm/hipo/factorhighs/HybridHybridFormatHandler.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,7 @@ void HybridHybridFormatHandler::assembleClique(const std::vector<double>& child,
154154
}
155155

156156
void HybridHybridFormatHandler::extremeEntries() {
157+
#ifdef HIPO_COLLECT_EXPENSIVE_DATA
157158
double minD = 1e100;
158159
double maxD = 0.0;
159160
double minoffD = 1e100;
@@ -199,6 +200,7 @@ void HybridHybridFormatHandler::extremeEntries() {
199200
}
200201

201202
data_.setExtremeEntries(minD, maxD, minoffD, maxoffD);
203+
#endif
202204
}
203205

204206
} // namespace hipo

highs/ipm/hipo/factorhighs/Symbolic.cpp

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,15 @@ const std::vector<Int>& Symbolic::iperm() const { return iperm_; }
4444
const std::vector<Int>& Symbolic::snParent() const { return sn_parent_; }
4545
const std::vector<Int>& Symbolic::snStart() const { return sn_start_; }
4646
const std::vector<Int>& Symbolic::pivotSign() const { return pivot_sign_; }
47+
const SubtreeInfo& Symbolic::layerSubtreeInfo(Int i) const {
48+
return layerSubtreesInfo_[i];
49+
}
50+
const SubtreeInfo& Symbolic::smallSubtreeInfo(Int i) const {
51+
return smallSubtreesInfo_[i];
52+
}
53+
const std::set<Int>& Symbolic::aboveLayer() const { return aboveLayer_; }
54+
const std::set<Int>& Symbolic::smallSubtrees() const { return smallSubtrees_; }
55+
const std::map<Int, Int>& Symbolic::layerIndex() const { return layerIndex_; }
4756

4857
std::string memoryString(double mem) {
4958
std::stringstream ss;
@@ -68,10 +77,10 @@ void Symbolic::print(const Log& log, bool verbose) const {
6877
log_stream << textline("Fill-in:") << fix(fillin_, 0, 2) << '\n';
6978
log_stream << textline("Flops:") << sci(flops_, 0, 2) << '\n';
7079
if (verbose) {
71-
log_stream << textline("serial stack entries:") << sci(serial_stack_size_, 0, 1)
72-
<< '\n';
73-
log_stream << textline("parallel stacks entries:") << sci(parallel_stack_size_, 0, 1)
74-
<< '\n';
80+
log_stream << textline("serial stack entries:")
81+
<< sci(serial_stack_size_, 0, 1) << '\n';
82+
log_stream << textline("parallel stacks entries:")
83+
<< sci(parallel_stack_size_, 0, 1) << '\n';
7584
log_stream << textline("factor entries:")
7685
<< sci(factors_total_entries_, 0, 1) << '\n';
7786
log_stream << textline("serial memory:")

highs/ipm/hipo/factorhighs/Symbolic.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
#define FACTORHIGHS_SYMBOLIC_H
33

44
#include <map>
5-
#include <vector>
65
#include <set>
6+
#include <vector>
77

88
#include "ipm/hipo/auxiliary/IntConfig.h"
99
#include "ipm/hipo/auxiliary/Log.h"
@@ -107,13 +107,13 @@ class Symbolic {
107107
std::map<Int, Int> layerIndex_;
108108

109109
// Information about subtrees in the layer
110-
// - layerSubtrees_[j] contains the following information about the j-th
111-
// subtree
112-
// in the layer (according to the numbering in layerIndex_):
110+
// - layerSubtreesInfo_[j] contains the following information about the j-th
111+
// subtree in the layer (according to the numbering in layerIndex_):
113112
// . start: first node in the subtree
114113
// . end: first later node not in the subtree
115114
// . stack: minimum amount of stack space required to process the subtree
116-
std::vector<SubtreeInfo> layerSubtrees_;
115+
std::vector<SubtreeInfo> layerSubtreesInfo_;
116+
std::vector<SubtreeInfo> smallSubtreesInfo_;
117117

118118
// Set containing the nodes that appear above the parallel layer
119119
std::set<Int> aboveLayer_;
@@ -156,6 +156,11 @@ class Symbolic {
156156
const std::vector<Int>& snParent() const;
157157
const std::vector<Int>& snStart() const;
158158
const std::vector<Int>& pivotSign() const;
159+
const SubtreeInfo& layerSubtreeInfo(Int i) const;
160+
const SubtreeInfo& smallSubtreeInfo(Int i) const;
161+
const std::set<Int>& aboveLayer() const;
162+
const std::set<Int>& smallSubtrees() const;
163+
const std::map<Int, Int>& layerIndex() const;
159164

160165
void print(const Log& log, bool verbose = false) const;
161166
};

0 commit comments

Comments
 (0)