Skip to content

Commit 751abcb

Browse files
committed
Added stack of cliques for subtrees
1 parent 26eba55 commit 751abcb

File tree

15 files changed

+349
-59
lines changed

15 files changed

+349
-59
lines changed

cmake/sources.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -204,6 +204,7 @@ set(hipo_headers
204204
set(factor_highs_sources
205205
ipm/hipo/factorhighs/Analyse.cpp
206206
ipm/hipo/factorhighs/CallAndTimeBlas.cpp
207+
ipm/hipo/factorhighs/CliqueStack.cpp
207208
ipm/hipo/factorhighs/DataCollector.cpp
208209
ipm/hipo/factorhighs/DenseFactHybrid.cpp
209210
ipm/hipo/factorhighs/DenseFactKernel.cpp
@@ -223,6 +224,7 @@ set(factor_highs_sources
223224
set(factor_highs_headers
224225
ipm/hipo/factorhighs/Analyse.h
225226
ipm/hipo/factorhighs/CallAndTimeBlas.h
227+
ipm/hipo/factorhighs/CliqueStack.h
226228
ipm/hipo/factorhighs/DataCollector.h
227229
ipm/hipo/factorhighs/DenseFact.h
228230
ipm/hipo/factorhighs/DgemmParallel.h

highs/ipm/hipo/auxiliary/Auxiliary.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -250,8 +250,8 @@ void processEdge(Int j, Int i, const std::vector<Int>& first,
250250
prevleaf[i] = j;
251251
}
252252

253-
double getDiagStart(Int n, Int k, Int nb, Int n_blocks, std::vector<Int>& start,
254-
bool triang) {
253+
int64_t getDiagStart(Int n, Int k, Int nb, Int n_blocks,
254+
std::vector<Int>& start, bool triang) {
255255
// start position of diagonal blocks for blocked dense formats
256256
start.assign(n_blocks, 0);
257257
for (Int i = 1; i < n_blocks; ++i) {
@@ -260,8 +260,9 @@ double getDiagStart(Int n, Int k, Int nb, Int n_blocks, std::vector<Int>& start,
260260
}
261261

262262
Int jb = std::min(nb, k - (n_blocks - 1) * nb);
263-
double result = (double)start.back() + (double)(n - (n_blocks - 1) * nb) * jb;
264-
if (triang) result -= (double)jb * (jb - 1) / 2;
263+
int64_t result =
264+
(int64_t)start.back() + (int64_t)(n - (n_blocks - 1) * nb) * jb;
265+
if (triang) result -= (int64_t)jb * (jb - 1) / 2;
265266
return result;
266267
}
267268

highs/ipm/hipo/auxiliary/Auxiliary.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,8 @@ void dfsPostorder(Int node, Int& start, std::vector<Int>& head,
3535
void processEdge(Int j, Int i, const std::vector<Int>& first,
3636
std::vector<Int>& maxfirst, std::vector<Int>& delta,
3737
std::vector<Int>& prevleaf, std::vector<Int>& ancestor);
38-
double getDiagStart(Int n, Int k, Int nb, Int n_blocks, std::vector<Int>& start,
39-
bool triang = false);
38+
int64_t getDiagStart(Int n, Int k, Int nb, Int n_blocks,
39+
std::vector<Int>& start, bool triang = false);
4040
void firstDescendant(const std::vector<Int>& parent, std::vector<Int>& first);
4141

4242
template <typename T>

highs/ipm/hipo/factorhighs/Analyse.cpp

Lines changed: 70 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -908,8 +908,8 @@ void Analyse::relativeIndClique() {
908908
}
909909
}
910910

911-
void Analyse::computeStorage(Int fr, Int sz, double& fr_entries,
912-
double& cl_entries) const {
911+
void Analyse::computeStorage(Int fr, Int sz, int64_t& fr_entries,
912+
int64_t& cl_entries) const {
913913
// compute storage required by frontal and clique, based on the format used
914914

915915
const Int cl = fr - sz;
@@ -920,17 +920,17 @@ void Analyse::computeStorage(Int fr, Int sz, double& fr_entries,
920920

921921
// clique is stored as a collection of rectangles
922922
n_blocks = (cl - 1) / nb_ + 1;
923-
double schur_size{};
923+
int64_t schur_size{};
924924
for (Int j = 0; j < n_blocks; ++j) {
925925
const Int jb = std::min(nb_, cl - j * nb_);
926-
schur_size += (double)(cl - j * nb_) * jb;
926+
schur_size += (ino64_t)(cl - j * nb_) * jb;
927927
}
928928
cl_entries = schur_size;
929929
}
930930

931931
void Analyse::computeStorage() {
932-
std::vector<double> clique_entries(sn_count_);
933-
std::vector<double> frontal_entries(sn_count_);
932+
std::vector<int64_t> clique_entries(sn_count_);
933+
std::vector<int64_t> frontal_entries(sn_count_);
934934
std::vector<double> storage(sn_count_);
935935
std::vector<double> storage_factors(sn_count_);
936936

@@ -1050,8 +1050,8 @@ void Analyse::computeCriticalPath() {
10501050
}
10511051

10521052
void Analyse::reorderChildren() {
1053-
std::vector<double> clique_entries(sn_count_);
1054-
std::vector<double> frontal_entries(sn_count_);
1053+
std::vector<int64_t> clique_entries(sn_count_);
1054+
std::vector<int64_t> frontal_entries(sn_count_);
10551055
std::vector<double> storage(sn_count_);
10561056
std::vector<double> storage_factors(sn_count_);
10571057

@@ -1304,12 +1304,15 @@ void Analyse::findTreeSplitting() {
13041304
is_in_tree_splitting_[child] = true;
13051305
current_nodedata = &res_insert.first->second;
13061306
current_nodedata->type = NodeType::subtree;
1307+
current_nodedata->stack_size = 0;
13071308
current_ops = 0.0;
13081309
}
13091310

13101311
current_ops += subtree_ops[child];
13111312
current_nodedata->group.push_back(child);
13121313
current_nodedata->firstdesc.push_back(first_desc[child]);
1314+
current_nodedata->stack_size =
1315+
std::max(current_nodedata->stack_size, stack_subtrees_[child]);
13131316

13141317
if (current_ops > small_thresh) current_nodedata = nullptr;
13151318
}
@@ -1324,6 +1327,7 @@ void Analyse::findTreeSplitting() {
13241327
res_insert.first->second.type = NodeType::subtree;
13251328
res_insert.first->second.group.push_back(sn);
13261329
res_insert.first->second.firstdesc.push_back(first_desc[sn]);
1330+
res_insert.first->second.stack_size = stack_subtrees_[sn];
13271331
}
13281332
/*
13291333
else if (subtree_ops[sn_parent_[sn]] > small_thresh) {
@@ -1337,6 +1341,62 @@ void Analyse::findTreeSplitting() {
13371341
}
13381342
}
13391343

1344+
void Analyse::computeStackSize() {
1345+
// Compute the minimum size of the stack to process each subtree.
1346+
1347+
std::vector<int64_t> clique_entries(sn_count_);
1348+
std::vector<int64_t> frontal_entries(sn_count_);
1349+
stack_subtrees_.assign(sn_count_, 0);
1350+
1351+
// initialise data of supernodes
1352+
for (Int sn = 0; sn < sn_count_; ++sn) {
1353+
// supernode size
1354+
const Int sz = sn_start_[sn + 1] - sn_start_[sn];
1355+
1356+
// frontal size
1357+
const Int fr = ptr_sn_[sn + 1] - ptr_sn_[sn];
1358+
1359+
// compute storage based on format used
1360+
computeStorage(fr, sz, frontal_entries[sn], clique_entries[sn]);
1361+
}
1362+
1363+
// linked lists of children
1364+
std::vector<Int> head, next;
1365+
childrenLinkedList(sn_parent_, head, next);
1366+
1367+
// go through the supernodes
1368+
for (Int sn = 0; sn < sn_count_; ++sn) {
1369+
// leaf node
1370+
if (head[sn] == -1) {
1371+
stack_subtrees_[sn] = clique_entries[sn];
1372+
continue;
1373+
}
1374+
1375+
// Compute storage
1376+
// storage is found as max(storage_1,storage_2), where
1377+
// storage_1 = max_j stack_size[j] + \sum_{k up to j-1} clique_entries[k]
1378+
// storage_2 = clique_total_entries (including node itself)
1379+
1380+
int64_t clique_partial_entries{};
1381+
int64_t storage_1{};
1382+
1383+
Int child = head[sn];
1384+
while (child != -1) {
1385+
int64_t current = stack_subtrees_[child] + clique_partial_entries;
1386+
1387+
clique_partial_entries += clique_entries[child];
1388+
storage_1 = std::max(storage_1, current);
1389+
1390+
child = next[child];
1391+
}
1392+
1393+
int64_t storage_2 = clique_partial_entries + clique_entries[sn];
1394+
1395+
stack_subtrees_[sn] = std::max(storage_1, storage_2);
1396+
max_stack_size_ = std::max(max_stack_size_, stack_subtrees_[sn]);
1397+
}
1398+
}
1399+
13401400
Int Analyse::run(Symbolic& S) {
13411401
// Perform analyse phase and store the result into the symbolic object S.
13421402
// After Run returns, the Analyse object is not valid.
@@ -1411,6 +1471,7 @@ Int Analyse::run(Symbolic& S) {
14111471
computeStorage();
14121472
computeBlockStart();
14131473
computeCriticalPath();
1474+
computeStackSize();
14141475

14151476
findTreeSplitting();
14161477

@@ -1427,6 +1488,7 @@ Int Analyse::run(Symbolic& S) {
14271488
S.serial_storage_ = serial_storage_;
14281489
S.flops_ = dense_ops_;
14291490
S.block_size_ = nb_;
1491+
S.max_stack_size_ = max_stack_size_;
14301492

14311493
// compute largest supernode
14321494
std::vector<Int> sn_size(sn_start_.begin() + 1, sn_start_.end());

highs/ipm/hipo/factorhighs/Analyse.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,9 @@ class Analyse {
8080
std::map<Int, NodeData> tree_splitting_;
8181
std::vector<bool> is_in_tree_splitting_;
8282

83+
std::vector<int64_t> stack_subtrees_{};
84+
int64_t max_stack_size_{};
85+
8386
// block size
8487
Int nb_{};
8588

@@ -101,10 +104,11 @@ class Analyse {
101104
void relativeIndClique();
102105
void reorderChildren();
103106
void computeStorage();
104-
void computeStorage(Int fr, Int sz, double& fr_entries,
105-
double& cl_entries) const;
107+
void computeStorage(Int fr, Int sz, int64_t& fr_entries,
108+
int64_t& cl_entries) const;
106109
void computeCriticalPath();
107110
void computeBlockStart();
111+
void computeStackSize();
108112

109113
void findTreeSplitting();
110114

Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#include "CliqueStack.h"
2+
3+
#include <cassert>
4+
#include <cstring>
5+
6+
namespace hipo {
7+
8+
CliqueStack::CliqueStack(Int stack_size) {
9+
stack_.assign(stack_size, 0.0);
10+
top_ = 0;
11+
workspace_ = nullptr;
12+
worksize_ = 0;
13+
}
14+
15+
double* CliqueStack::setup(Int clique_size) {
16+
// Clear workspace
17+
18+
assert(!workspace_ && !worksize_);
19+
20+
// This should not trigger reallocation, because the reserve in init is done
21+
// with the maximum possible size of a clique.
22+
if (top_ + clique_size > stack_.size()) {
23+
printf("=== Warning, reallocation of workspace\n");
24+
stack_.resize(top_ + clique_size, 0.0);
25+
}
26+
27+
workspace_ = &stack_[top_];
28+
worksize_ = clique_size;
29+
30+
// initialize workspace to zero
31+
std::memset(workspace_, 0, worksize_ * sizeof(double));
32+
33+
return workspace_;
34+
}
35+
36+
const double* CliqueStack::get() const { return stack_.data(); }
37+
Int CliqueStack::getTop() const { return top_; }
38+
39+
const double* CliqueStack::getChild(Int& child_sn) const {
40+
// Get the top of the stack, in terms of supernode ID of the child and pointer
41+
// to its data.
42+
43+
child_sn = sn_pushed_.top().first;
44+
Int child_size = sn_pushed_.top().second;
45+
const double* child = &stack_[top_ - child_size];
46+
47+
return child;
48+
}
49+
50+
void CliqueStack::popChild() {
51+
// Remove top child from the stack
52+
53+
Int child_size = sn_pushed_.top().second;
54+
sn_pushed_.pop();
55+
56+
top_ -= child_size;
57+
}
58+
59+
void CliqueStack::pushWork(Int sn) {
60+
// Put the content of the workspace at the top of the stack
61+
62+
// stack_[top_] has lower address than workspace, so no need to resize.
63+
// workspace_ and stack_[top_] do not overlap, so use memcpy
64+
std::memcpy(&stack_[top_], workspace_, worksize_ * sizeof(double));
65+
66+
top_ += worksize_;
67+
68+
// keep track of supernodes pushed
69+
sn_pushed_.push({sn, worksize_});
70+
71+
worksize_ = 0;
72+
workspace_ = nullptr;
73+
}
74+
75+
} // namespace hipo
Lines changed: 82 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,82 @@
1+
#ifndef FACTORHIGHS_CLIQUE_STACK_H
2+
#define FACTORHIGHS_CLIQUE_STACK_H
3+
4+
#include <stack>
5+
#include <vector>
6+
7+
#include "ipm/hipo/auxiliary/IntConfig.h"
8+
9+
// Class to manage the stack of cliques.
10+
//
11+
// The stack is used as follows:
12+
// - use init to initialize the stack, providing the max stack size. If the
13+
// parameter is correct, there will be no reallocation of stack during its
14+
// operation.
15+
// - use setup, with the size of the clique that is being computed, to
16+
// initialize enough elements in the workspace. This also returns a pointer to
17+
// write the new clique. The workspace is simply a chunk of space at the top
18+
// of the stack where the new clique is being computed before being pushed in
19+
// the right position.
20+
// - use getChild to obtain information about the child clique that is at the
21+
// top of the stack. This also returns a pointer to read the data of the child
22+
// clique.
23+
// - use popChild when the top child clique is no longer needed, to move on to
24+
// the next one.
25+
// - use pushWork, with the ID of supernode that is being processed, when all
26+
// children clique have been assembled, the dense factorization is completed,
27+
// and the clique in the workspace is ready to be pushed onto the stack.
28+
29+
namespace hipo {
30+
31+
class CliqueStack {
32+
std::vector<double> stack_;
33+
double* workspace_;
34+
Int worksize_;
35+
36+
// pairs (sn, size) of supernodes that got pushed
37+
std::stack<std::pair<Int, Int>> sn_pushed_{};
38+
39+
Int top_{};
40+
41+
public:
42+
CliqueStack(Int size);
43+
double* setup(Int clique_size);
44+
const double* getChild(Int& child_sn) const;
45+
void popChild();
46+
void pushWork(Int sn);
47+
const double* get() const;
48+
Int getTop() const;
49+
};
50+
51+
} // namespace hipo
52+
53+
// Example: sn 7 has children 5,6 and the stack looks like this:
54+
//
55+
// | - - - | - - - - - - - | - | - - - - - - - ...
56+
// | 4 | 5 | 6 | t
57+
// | - - - | - - - - - - - | - | - - - - - - - ...
58+
//
59+
// where t points to the top of the stack.
60+
// The information about the supernodes pushed (in sn_pushed_) is:
61+
// (sn 4, size 7), (sn 5, size 15), (sn 6, size 3)
62+
//
63+
// The workspace starts at t and fits in the stack.
64+
// The clique of sn 7 is constructed in the workspace. After a child clique is
65+
// assembled, it is popped from the stack, by moving t back. After all children
66+
// are assembled and the dense factorization is done, the stack looks like this:
67+
//
68+
// | - - - | - - - - - - - - - | - - - - - - - - - - - | - - ...
69+
// | 4 | t free | 7 |
70+
// | - - - | - - - - - - - - - | - - - - - - - - - - - | - - ...
71+
//
72+
// The clique 7 can then be pushed onto the stack by copying it at position t.
73+
// At the end, the stack looks like this:
74+
//
75+
// | - - - | - - - - - - - - - - - | - - - - - ...
76+
// | 4 | 7 | t
77+
// | - - - | - - - - - - - - - - - | - - - - - ...
78+
//
79+
// The information in sn_pushed_ is:
80+
// (sn 4, size 7), (sn 7, size 23)
81+
82+
#endif

0 commit comments

Comments
 (0)