Skip to content

Commit 2dc2b85

Browse files
committed
Add CliqueStack for serial tree
1 parent 57b26fb commit 2dc2b85

15 files changed

+343
-143
lines changed

cmake/sources.cmake

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -205,6 +205,7 @@ set(hipo_headers
205205
set(factor_highs_sources
206206
ipm/hipo/factorhighs/Analyse.cpp
207207
ipm/hipo/factorhighs/CallAndTimeBlas.cpp
208+
ipm/hipo/factorhighs/CliqueStack.cpp
208209
ipm/hipo/factorhighs/DataCollector.cpp
209210
ipm/hipo/factorhighs/DenseFactHybrid.cpp
210211
ipm/hipo/factorhighs/DenseFactKernel.cpp
@@ -223,6 +224,7 @@ set(factor_highs_sources
223224
set(factor_highs_headers
224225
ipm/hipo/factorhighs/Analyse.h
225226
ipm/hipo/factorhighs/CallAndTimeBlas.h
227+
ipm/hipo/factorhighs/CliqueStack.h
226228
ipm/hipo/factorhighs/DataCollector.h
227229
ipm/hipo/factorhighs/DenseFact.h
228230
ipm/hipo/factorhighs/DgemmParallel.h

highs/ipm/hipo/factorhighs/Analyse.cpp

Lines changed: 74 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -922,8 +922,8 @@ void Analyse::relativeIndClique() {
922922
}
923923
}
924924

925-
void Analyse::computeStorage(Int fr, Int sz, double& fr_entries,
926-
double& cl_entries) const {
925+
void Analyse::computeStorage(Int fr, Int sz, Int64& fr_entries,
926+
Int64& cl_entries) const {
927927
// compute storage required by frontal and clique, based on the format used
928928

929929
const Int cl = fr - sz;
@@ -934,91 +934,14 @@ void Analyse::computeStorage(Int fr, Int sz, double& fr_entries,
934934

935935
// clique is stored as a collection of rectangles
936936
n_blocks = (cl - 1) / nb_ + 1;
937-
double schur_size{};
937+
Int64 schur_size{};
938938
for (Int j = 0; j < n_blocks; ++j) {
939939
const Int jb = std::min(nb_, cl - j * nb_);
940-
schur_size += (double)(cl - j * nb_) * jb;
940+
schur_size += (Int64)(cl - j * nb_) * jb;
941941
}
942942
cl_entries = schur_size;
943943
}
944944

945-
void Analyse::computeStorage() {
946-
std::vector<double> clique_entries(sn_count_);
947-
std::vector<double> frontal_entries(sn_count_);
948-
std::vector<double> storage(sn_count_);
949-
std::vector<double> storage_factors(sn_count_);
950-
951-
// initialise data of supernodes
952-
for (Int sn = 0; sn < sn_count_; ++sn) {
953-
// supernode size
954-
const Int sz = sn_start_[sn + 1] - sn_start_[sn];
955-
956-
// frontal size
957-
const Int fr = ptr_sn_[sn + 1] - ptr_sn_[sn];
958-
959-
// compute storage based on format used
960-
computeStorage(fr, sz, frontal_entries[sn], clique_entries[sn]);
961-
962-
// compute number of entries in factors within the subtree
963-
storage_factors[sn] += frontal_entries[sn];
964-
if (sn_parent_[sn] != -1)
965-
storage_factors[sn_parent_[sn]] += storage_factors[sn];
966-
}
967-
968-
// linked lists of children
969-
std::vector<Int> head, next;
970-
childrenLinkedList(sn_parent_, head, next);
971-
972-
// go through the supernodes
973-
for (Int sn = 0; sn < sn_count_; ++sn) {
974-
// leaf node
975-
if (head[sn] == -1) {
976-
storage[sn] = frontal_entries[sn] + clique_entries[sn];
977-
continue;
978-
}
979-
980-
double clique_total_entries{};
981-
double factors_total_entries{};
982-
Int child = head[sn];
983-
while (child != -1) {
984-
clique_total_entries += clique_entries[child];
985-
factors_total_entries += storage_factors[child];
986-
child = next[child];
987-
}
988-
989-
// Compute storage
990-
// storage is found as max(storage_1,storage_2), where
991-
// storage_1 = max_j storage[j] + \sum_{k up to j-1} clique_entries[k] +
992-
// storage_factors[k]
993-
// storage_2 = frontal_entries + clique_entries + clique_total_entries +
994-
// factors_total_entries
995-
const double storage_2 = frontal_entries[sn] + clique_entries[sn] +
996-
clique_total_entries + factors_total_entries;
997-
998-
double clique_partial_entries{};
999-
double factors_partial_entries{};
1000-
double storage_1{};
1001-
1002-
child = head[sn];
1003-
while (child != -1) {
1004-
double current =
1005-
storage[child] + clique_partial_entries + factors_partial_entries;
1006-
1007-
clique_partial_entries += clique_entries[child];
1008-
factors_partial_entries += storage_factors[child];
1009-
storage_1 = std::max(storage_1, current);
1010-
1011-
child = next[child];
1012-
}
1013-
storage[sn] = std::max(storage_1, storage_2);
1014-
}
1015-
1016-
for (Int sn = 0; sn < sn_count_; ++sn) {
1017-
// save max storage needed, multiply by 8 because double needs 8 bytes
1018-
serial_storage_ = std::max(serial_storage_, 8 * storage[sn]);
1019-
}
1020-
}
1021-
1022945
void Analyse::computeCriticalPath() {
1023946
// Compute the critical path within the supernodal elimination tree, and the
1024947
// number of operations along the path. This is the number of operations that
@@ -1064,8 +987,8 @@ void Analyse::computeCriticalPath() {
1064987
}
1065988

1066989
void Analyse::reorderChildren() {
1067-
std::vector<double> clique_entries(sn_count_);
1068-
std::vector<double> frontal_entries(sn_count_);
990+
std::vector<Int64> clique_entries(sn_count_);
991+
std::vector<Int64> frontal_entries(sn_count_);
1069992
std::vector<double> storage(sn_count_);
1070993
std::vector<double> storage_factors(sn_count_);
1071994

@@ -1266,6 +1189,72 @@ Int Analyse::checkOverflow() const {
12661189
return 0;
12671190
}
12681191

1192+
void Analyse::computeStackSize() {
1193+
// Compute the minimum size of the stack to process the elimination tree
1194+
// serially.
1195+
1196+
std::vector<Int64> clique_entries(sn_count_);
1197+
std::vector<Int64> stack_subtrees(sn_count_);
1198+
Int64 total_frontal{};
1199+
1200+
// initialise data of supernodes
1201+
for (Int sn = 0; sn < sn_count_; ++sn) {
1202+
// supernode size
1203+
const Int sz = sn_start_[sn + 1] - sn_start_[sn];
1204+
1205+
// frontal size
1206+
const Int fr = ptr_sn_[sn + 1] - ptr_sn_[sn];
1207+
1208+
Int64 frontal_entries{};
1209+
1210+
// compute storage based on format used
1211+
computeStorage(fr, sz, frontal_entries, clique_entries[sn]);
1212+
1213+
total_frontal += frontal_entries;
1214+
}
1215+
1216+
// linked lists of children
1217+
std::vector<Int> head, next;
1218+
childrenLinkedList(sn_parent_, head, next);
1219+
1220+
// go through the supernodes
1221+
for (Int sn = 0; sn < sn_count_; ++sn) {
1222+
// leaf node
1223+
if (head[sn] == -1) {
1224+
stack_subtrees[sn] = clique_entries[sn];
1225+
continue;
1226+
}
1227+
1228+
// Compute storage
1229+
// storage is found as max(storage_1,storage_2), where
1230+
// storage_1 = max_j stack_size[j] + \sum_{k up to j-1} clique_entries[k]
1231+
// storage_2 = clique_total_entries (including node itself)
1232+
1233+
Int64 clique_partial_entries{};
1234+
Int64 storage_1{};
1235+
1236+
Int child = head[sn];
1237+
while (child != -1) {
1238+
Int64 current = stack_subtrees[child] + clique_partial_entries;
1239+
1240+
clique_partial_entries += clique_entries[child];
1241+
storage_1 = std::max(storage_1, current);
1242+
1243+
child = next[child];
1244+
}
1245+
1246+
Int64 storage_2 = clique_partial_entries + clique_entries[sn];
1247+
1248+
stack_subtrees[sn] = std::max(storage_1, storage_2);
1249+
max_stack_size_ = std::max(max_stack_size_, stack_subtrees[sn]);
1250+
}
1251+
1252+
// minimum storage in serial is equal to the space needed to store the
1253+
// factorisation and the maximum size of the stack. Times 8 to obtain the
1254+
// number of bytes.
1255+
serial_storage_ = (total_frontal + max_stack_size_) * 8;
1256+
}
1257+
12691258
Int Analyse::run(Symbolic& S) {
12701259
// Perform analyse phase and store the result into the symbolic object S.
12711260
// After Run returns, the Analyse object is not valid.
@@ -1337,9 +1326,9 @@ Int Analyse::run(Symbolic& S) {
13371326
data_.sumTime(kTimeAnalyseRelInd, clock_items.stop());
13381327
#endif
13391328

1340-
computeStorage();
13411329
computeBlockStart();
13421330
computeCriticalPath();
1331+
computeStackSize();
13431332

13441333
// move relevant stuff into S
13451334
S.n_ = n_;
@@ -1354,6 +1343,7 @@ Int Analyse::run(Symbolic& S) {
13541343
S.serial_storage_ = serial_storage_;
13551344
S.flops_ = dense_ops_;
13561345
S.block_size_ = nb_;
1346+
S.max_stack_size_ = max_stack_size_;
13571347

13581348
// compute largest supernode
13591349
std::vector<Int> sn_size(sn_start_.begin() + 1, sn_start_.end());

highs/ipm/hipo/factorhighs/Analyse.h

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,8 @@ class Analyse {
7575
// estimate of maximum storage
7676
double serial_storage_{};
7777

78+
Int64 max_stack_size_{};
79+
7880
std::vector<std::vector<Int64>> clique_block_start_{};
7981

8082
// block size
@@ -97,12 +99,11 @@ class Analyse {
9799
void relativeIndCols();
98100
void relativeIndClique();
99101
void reorderChildren();
100-
void computeStorage();
101-
void computeStorage(Int fr, Int sz, double& fr_entries,
102-
double& cl_entries) const;
102+
void computeStorage(Int fr, Int sz, Int64& fr_entries,
103+
Int64& cl_entries) const;
103104
void computeCriticalPath();
104105
void computeBlockStart();
105-
106+
void computeStackSize();
106107
Int checkOverflow() const;
107108

108109
public:
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#include "CliqueStack.h"
2+
3+
#include <cassert>
4+
#include <cstring>
5+
6+
namespace hipo {
7+
8+
void CliqueStack::init(Int64 stack_size) {
9+
stack_.resize(stack_size, 0.0);
10+
top_ = 0;
11+
workspace_ = nullptr;
12+
worksize_ = 0;
13+
}
14+
15+
double* CliqueStack::setup(Int64 clique_size, bool& reallocation) {
16+
// Clear workspace
17+
18+
assert(!workspace_ && !worksize_);
19+
reallocation = false;
20+
21+
// This should not trigger reallocation, because the resize in init is done
22+
// with the maximum possible size of the stack.
23+
if (top_ + clique_size > stack_.size()) {
24+
reallocation = true;
25+
stack_.resize(top_ + clique_size, 0.0);
26+
}
27+
28+
workspace_ = &stack_[top_];
29+
worksize_ = clique_size;
30+
31+
// initialize workspace to zero
32+
std::memset(workspace_, 0, worksize_ * sizeof(double));
33+
34+
return workspace_;
35+
}
36+
37+
bool CliqueStack::empty() const { return stack_.empty(); }
38+
39+
const double* CliqueStack::getChild(Int& child_sn) const {
40+
// Get the top of the stack, in terms of supernode ID of the child and pointer
41+
// to its data.
42+
43+
child_sn = sn_pushed_.top().first;
44+
Int64 child_size = sn_pushed_.top().second;
45+
const double* child = &stack_[top_ - child_size];
46+
47+
return child;
48+
}
49+
50+
void CliqueStack::popChild() {
51+
// Remove top child from the stack
52+
53+
Int64 child_size = sn_pushed_.top().second;
54+
sn_pushed_.pop();
55+
56+
top_ -= child_size;
57+
}
58+
59+
void CliqueStack::pushWork(Int sn) {
60+
// Put the content of the workspace at the top of the stack
61+
62+
// stack_[top_] has lower address than workspace, so no need to resize.
63+
// workspace_ and stack_[top_] do not overlap, so use memcpy
64+
std::memcpy(&stack_[top_], workspace_, worksize_ * sizeof(double));
65+
66+
top_ += worksize_;
67+
68+
// keep track of supernodes pushed
69+
sn_pushed_.push({sn, worksize_});
70+
71+
worksize_ = 0;
72+
workspace_ = nullptr;
73+
}
74+
75+
} // namespace hipo

0 commit comments

Comments
 (0)