Skip to content

Commit ad3eec3

Browse files
committed
Set up tree splitting for solve
1 parent 1fe40aa commit ad3eec3

File tree

6 files changed

+169
-0
lines changed

6 files changed

+169
-0
lines changed

highs/ipm/hipo/auxiliary/Auxiliary.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,23 @@ double getDiagStart(Int n, Int k, Int nb, Int n_blocks, std::vector<Int>& start,
223223
return result;
224224
}
225225

226+
void firstDescendant(const std::vector<Int>& parent, std::vector<Int>& first) {
227+
// Given an elimination tree, find the first descendant of each node, i.e.
228+
// given a node j, first[j] is the descendant of j with smallest number.
229+
// Taken from Tim Davis "Direct Methods for Sparse Linear Systems".
230+
231+
const Int n = parent.size();
232+
first.assign(n, -1);
233+
234+
for (Int i = 0; i < n; ++i) {
235+
Int j = i;
236+
while (j != -1 && first[j] == -1) {
237+
first[j] = i;
238+
j = parent[j];
239+
}
240+
}
241+
}
242+
226243
Clock::Clock() { start(); }
227244
void Clock::start() { t0 = std::chrono::high_resolution_clock::now(); }
228245
double Clock::stop() const {

highs/ipm/hipo/auxiliary/Auxiliary.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ void processEdge(Int j, Int i, const std::vector<Int>& first,
2929
std::vector<Int>& prevleaf, std::vector<Int>& ancestor);
3030
double getDiagStart(Int n, Int k, Int nb, Int n_blocks, std::vector<Int>& start,
3131
bool triang = false);
32+
void firstDescendant(const std::vector<Int>& parent, std::vector<Int>& first);
3233

3334
template <typename T>
3435
void permuteVector(std::vector<T>& v, const std::vector<Int>& perm) {

highs/ipm/hipo/factorhighs/Analyse.cpp

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1226,6 +1226,118 @@ void Analyse::computeBlockStart() {
12261226
}
12271227
}
12281228

1229+
void Analyse::findTreeSplittingSolve() {
1230+
// Split the tree into single nodes and subtrees for solve.
1231+
// The subtrees have at most 1% of total operations. They are grouped together
1232+
// so that each group of subtrees has enough operations.
1233+
// The tree is parallelised by creating a task for each single node and a task
1234+
// for each group of subtrees.
1235+
1236+
// compute number of operations for each supernode
1237+
std::vector<double> sn_ops(sn_count_);
1238+
double total_ops = 0;
1239+
for (Int sn = 0; sn < sn_count_; ++sn) {
1240+
// supernode size
1241+
const Int sz = sn_start_[sn + 1] - sn_start_[sn];
1242+
1243+
// frontal size
1244+
const Int fr = ptr_sn_[sn + 1] - ptr_sn_[sn];
1245+
1246+
// number of operations for this supernode for the solve
1247+
double ops_to_add = (double)sz * sz / 2 + (double)sz * fr;
1248+
sn_ops[sn] += ops_to_add;
1249+
total_ops += ops_to_add;
1250+
1251+
// add assembly operations
1252+
if (sn_parent_[sn] != -1) {
1253+
const Int ldc = fr - sz;
1254+
sn_ops[sn_parent_[sn]] += ldc;
1255+
total_ops += ldc;
1256+
}
1257+
}
1258+
1259+
// compute number of operations to process each subtree
1260+
std::vector<double> subtree_ops(sn_count_, 0.0);
1261+
for (Int sn = 0; sn < sn_count_; ++sn) {
1262+
subtree_ops[sn] += sn_ops[sn];
1263+
if (sn_parent_[sn] != -1) {
1264+
subtree_ops[sn_parent_[sn]] += subtree_ops[sn];
1265+
}
1266+
}
1267+
1268+
// Find first descendant of each supernode
1269+
std::vector<Int> first_desc;
1270+
firstDescendant(sn_parent_, first_desc);
1271+
1272+
// linked lists of children
1273+
std::vector<Int> head, next;
1274+
childrenLinkedList(sn_parent_, head, next);
1275+
1276+
node_data_ptr_.assign(sn_count_, nullptr);
1277+
1278+
// Divide the tree into single nodes and subtrees, such that each subtree has
1279+
// at most small_thresh operations overall. Group subtrees together, so that
1280+
// groups have enough operations.
1281+
const double small_thresh = 0.05 * total_ops;
1282+
for (Int sn = 0; sn < sn_count_; ++sn) {
1283+
if (subtree_ops[sn] > small_thresh) {
1284+
// sn is a single node
1285+
auto res_insert = tree_splitting_.insert({sn, {}});
1286+
node_data_ptr_[sn] = &res_insert.first->second;
1287+
res_insert.first->second.type = NodeType::single;
1288+
num_single_++;
1289+
1290+
// The children of this sn are either single nodes or head of subtrees.
1291+
// Divide the head of subtrees in groups, so that each group has enough
1292+
// operations. Each group corresponds to one task executed in parallel.
1293+
1294+
double current_ops = 0.0;
1295+
NodeData* current_nodedata = nullptr;
1296+
Int child = head[sn];
1297+
while (child != -1) {
1298+
bool is_small = subtree_ops[child] <= small_thresh;
1299+
1300+
if (is_small) {
1301+
num_subtrees_++;
1302+
1303+
if (!current_nodedata) {
1304+
auto res_insert = tree_splitting_.insert({child, {}});
1305+
current_nodedata = &res_insert.first->second;
1306+
node_data_ptr_[child] = current_nodedata;
1307+
current_nodedata->type = NodeType::subtree;
1308+
current_ops = 0.0;
1309+
}
1310+
1311+
current_ops += subtree_ops[child];
1312+
current_nodedata->group.push_back(child);
1313+
current_nodedata->firstdesc.push_back(first_desc[child]);
1314+
1315+
if (current_ops > small_thresh) current_nodedata = nullptr;
1316+
}
1317+
1318+
child = next[child];
1319+
}
1320+
1321+
} else if (sn_parent_[sn] == -1) {
1322+
// sn is small root: single task with whole subtree
1323+
auto res_insert = tree_splitting_.insert({sn, {}});
1324+
node_data_ptr_[sn] = &res_insert.first->second;
1325+
res_insert.first->second.type = NodeType::subtree;
1326+
res_insert.first->second.group.push_back(sn);
1327+
res_insert.first->second.firstdesc.push_back(first_desc[sn]);
1328+
}
1329+
/*
1330+
else if (subtree_ops[sn_parent_[sn]] > small_thresh) {
1331+
// sn is head of a subtree, processed as part of a group of subtrees
1332+
continue;
1333+
} else {
1334+
// sn is part of a subtree, but not the head
1335+
continue;
1336+
}
1337+
*/
1338+
}
1339+
}
1340+
12291341
Int Analyse::run(Symbolic& S) {
12301342
// Perform analyse phase and store the result into the symbolic object S.
12311343
// After Run returns, the Analyse object is not valid.
@@ -1300,6 +1412,7 @@ Int Analyse::run(Symbolic& S) {
13001412
computeStorage();
13011413
computeBlockStart();
13021414
computeCriticalPath();
1415+
findTreeSplittingSolve();
13031416

13041417
// move relevant stuff into S
13051418
S.n_ = n_;
@@ -1314,6 +1427,8 @@ Int Analyse::run(Symbolic& S) {
13141427
S.serial_storage_ = serial_storage_;
13151428
S.flops_ = dense_ops_;
13161429
S.block_size_ = nb_;
1430+
S.num_single_ = num_single_;
1431+
S.num_subtrees_ = num_subtrees_;
13171432

13181433
// compute largest supernode
13191434
std::vector<Int> sn_size(sn_start_.begin() + 1, sn_start_.end());
@@ -1348,6 +1463,8 @@ Int Analyse::run(Symbolic& S) {
13481463
S.relind_clique_ = std::move(relind_clique_);
13491464
S.consecutive_sums_ = std::move(consecutive_sums_);
13501465
S.clique_block_start_ = std::move(clique_block_start_);
1466+
S.tree_splitting_solve_ = std::move(tree_splitting_);
1467+
S.node_data_ptr_ = std::move(node_data_ptr_);
13511468

13521469
#if HIPO_TIMING_LEVEL >= 1
13531470
data_.sumTime(kTimeAnalyse, clock_total.stop());

highs/ipm/hipo/factorhighs/Analyse.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33

44
#include <algorithm>
55
#include <vector>
6+
#include <map>
67

78
#include "DataCollector.h"
89
#include "Symbolic.h"
@@ -77,6 +78,11 @@ class Analyse {
7778

7879
std::vector<std::vector<Int>> clique_block_start_{};
7980

81+
std::map<Int, NodeData> tree_splitting_;
82+
std::vector<NodeData*> node_data_ptr_;
83+
Int num_single_{};
84+
Int num_subtrees_{};
85+
8086
// block size
8187
Int nb_{};
8288

@@ -102,6 +108,7 @@ class Analyse {
102108
double& cl_entries) const;
103109
void computeCriticalPath();
104110
void computeBlockStart();
111+
void findTreeSplittingSolve();
105112

106113
public:
107114
// Constructor: matrix must be in lower triangular format

highs/ipm/hipo/factorhighs/Symbolic.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ Int Symbolic::cliqueSize(Int sn) const {
4444
bool Symbolic::parTree() const { return parallel_tree_; }
4545
bool Symbolic::parNode() const { return parallel_node_; }
4646
bool Symbolic::metisNo2hop() const { return metis_no2hop_; }
47+
const NodeData* Symbolic::nodeDataPtr(Int sn) const {
48+
return node_data_ptr_[sn];
49+
}
4750

4851
const std::vector<Int>& Symbolic::ptr() const { return ptr_; }
4952
const std::vector<Int>& Symbolic::iperm() const { return iperm_; }

highs/ipm/hipo/factorhighs/Symbolic.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,21 @@
11
#ifndef FACTORHIGHS_SYMBOLIC_H
22
#define FACTORHIGHS_SYMBOLIC_H
33

4+
#include <map>
45
#include <vector>
56

67
#include "ipm/hipo/auxiliary/IntConfig.h"
78
#include "ipm/hipo/auxiliary/Log.h"
89

910
namespace hipo {
1011

12+
enum NodeType { single, subtree };
13+
struct NodeData {
14+
NodeType type;
15+
std::vector<Int> firstdesc;
16+
std::vector<Int> group;
17+
};
18+
1119
// Symbolic factorisation object
1220
class Symbolic {
1321
// Options for parallelism
@@ -36,6 +44,7 @@ class Symbolic {
3644
Int sn_size_1_{};
3745
Int sn_size_10_{};
3846
Int sn_size_100_{};
47+
Int num_single_, num_subtrees_;
3948

4049
// Inverse permutation
4150
std::vector<Int> iperm_{};
@@ -97,6 +106,20 @@ class Symbolic {
97106
// Starting position of diagonal blocks for hybrid formats
98107
std::vector<std::vector<Int>> clique_block_start_{};
99108

109+
// Information to split the elimination tree for solve. Each entry in
110+
// tree_splitting_solve_ corresponds to a task that is executed in parallel.
111+
// tree_splitting_solve_ contains pairs (sn, data):
112+
// - If data.type is single, then the task processes only the supernode sn.
113+
// - If data.type is subtree, then the task processes each subtree rooted at
114+
// data.group[i]. Each subtree requires processing supernodes j,
115+
// data.firstdesc[i] <= j <= data.group[i].
116+
std::map<Int, NodeData> tree_splitting_solve_;
117+
118+
// For each supernode, provide the pointer to the NodeData information, if the
119+
// supernode is found in the tree splitting data structure. Otherwise,
120+
// contains nullptr. Avoids too many lookups into the map.
121+
std::vector<NodeData*> node_data_ptr_;
122+
100123
friend class Analyse;
101124

102125
public:
@@ -125,6 +148,7 @@ class Symbolic {
125148
bool parTree() const;
126149
bool parNode() const;
127150
bool metisNo2hop() const;
151+
const NodeData* nodeDataPtr(Int sn) const;
128152
const std::vector<Int>& ptr() const;
129153
const std::vector<Int>& iperm() const;
130154
const std::vector<Int>& snParent() const;

0 commit comments

Comments
 (0)