Skip to content

Commit b5be8ad

Browse files
committed
Reverse order of children, to obtain same result as in serial
1 parent 93fae3e commit b5be8ad

File tree

3 files changed

+33
-35
lines changed

3 files changed

+33
-35
lines changed

highs/ipm/hipo/factorhighs/Analyse.cpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1297,9 +1297,10 @@ void Analyse::findTreeSplitting() {
12971297
// The tree is parallelised by creating a task for each single node and a task
12981298
// for each group of subtrees.
12991299

1300-
// linked lists of children
1301-
std::vector<Int> head, next;
1302-
childrenLinkedList(sn_parent_, head, next);
1300+
// reverse linked lists of children
1301+
std::vector<Int> head_reverse, next_reverse;
1302+
childrenLinkedList(sn_parent_, head_reverse, next_reverse);
1303+
reverseLinkedList(head_reverse, next_reverse);
13031304

13041305
// compute number of operations for each supernode
13051306
std::vector<double> sn_ops(sn_count_);
@@ -1352,9 +1353,13 @@ void Analyse::findTreeSplitting() {
13521353
// count is in the interval [small_thresh, 2*small_thresh]. Each group
13531354
// corresponds to one task executed in parallel.
13541355

1356+
// The children are explored in reverse order, so that they can be synced
1357+
// in reverse order during the factorisation. In this way, the parallel
1358+
// and serial code obtain the exact same answer.
1359+
13551360
double current_ops = 0.0;
13561361
NodeData* current_nodedata = nullptr;
1357-
Int child = head[sn];
1362+
Int child = head_reverse[sn];
13581363
while (child != -1) {
13591364
bool is_small = subtree_ops[child] <= small_thresh;
13601365

@@ -1371,7 +1376,7 @@ void Analyse::findTreeSplitting() {
13711376
if (current_ops > small_thresh) current_nodedata = nullptr;
13721377
}
13731378

1374-
child = next[child];
1379+
child = next_reverse[child];
13751380
}
13761381

13771382
} else if (sn_parent_[sn] == -1) {

highs/ipm/hipo/factorhighs/Factorise.cpp

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -194,16 +194,16 @@ void Factorise::processSupernode(Int sn, const bool should_parallelise) {
194194
spawn(first_child_[sn], tg, false);
195195
do_parallelise = false;
196196
} else {
197-
// spawn children of this supernode in reverse order
198-
Int child_to_spawn = first_child_reverse_[sn];
197+
// spawn children of this supernode in forward order
198+
Int child_to_spawn = first_child_[sn];
199199
while (child_to_spawn != -1) {
200200
spawn(child_to_spawn, tg);
201-
child_to_spawn = next_child_reverse_[child_to_spawn];
201+
child_to_spawn = next_child_[child_to_spawn];
202202
}
203203

204204
// wait for first child to finish, before starting the parent (if there is
205205
// a first child)
206-
if (first_child_reverse_[sn] != -1) sync(first_child_[sn], tg);
206+
if (first_child_reverse_[sn] != -1) sync(first_child_reverse_[sn], tg);
207207
}
208208
}
209209

@@ -255,28 +255,20 @@ void Factorise::processSupernode(Int sn, const bool should_parallelise) {
255255
// ===================================================
256256
// Assemble frontal matrices of children
257257
// ===================================================
258-
Int child_sn = first_child_[sn];
259-
if (serial) child_sn = first_child_reverse_[sn];
260-
258+
Int child_sn = first_child_reverse_[sn];
261259
while (child_sn != -1) {
262260
// Child contribution is found:
263261
// - in cliquestack, if we are processing the tree in serial.
264262
// - in schur_contribution_ if we are processing the tree in parallel.
265-
// Children are summed:
266-
// - in reverse order in serial, so that the correct child is found on top
267-
// of the CliqueStack.
268-
// - in forward order otherwise, so that if a small subtree is synced, and
269-
// it is not the first in its group, it will already have synced when it
270-
// is needed.
271-
272-
const double* child_clique;
263+
// Children are summed always in reverse order.
273264

274265
if (do_parallelise) {
275266
// sync with spawned child, apart from the first one
276-
if (child_sn != first_child_[sn]) sync(child_sn, tg);
267+
if (child_sn != first_child_reverse_[sn]) sync(child_sn, tg);
277268
if (flag_stop_.load(std::memory_order_relaxed)) return;
278269
}
279270

271+
const double* child_clique;
280272
if (!serial) {
281273
child_clique = schur_contribution_[child_sn].data();
282274
if (!child_clique) {
@@ -341,10 +333,7 @@ void Factorise::processSupernode(Int sn, const bool should_parallelise) {
341333
}
342334

343335
// move on to the next child
344-
if (!serial)
345-
child_sn = next_child_[child_sn];
346-
else
347-
child_sn = next_child_reverse_[child_sn];
336+
child_sn = next_child_reverse_[child_sn];
348337
}
349338

350339
if (flag_stop_.load(std::memory_order_relaxed)) return;

highs/ipm/hipo/factorhighs/TreeSplitting.h

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -60,25 +60,29 @@ class TreeSplitting {
6060
// where Fj is the first descendant of node j.
6161
// belong_[j] is true for j=p,a,c,e,f and false for j=b,d,g.
6262
//
63-
// - When a is ran, a task is executed that executes the whole subtree of nodes
63+
// - When a is run, a task is executed that executes the whole subtree of nodes
6464
// a, b and d.
65-
// - When b is ran, nothing happens, since it was already executed as part of
65+
// - When b is run, nothing happens, since it was already executed as part of
6666
// the task that executed a.
67-
// - When c is ran, a task is created that executes only that supernode.
68-
// - When d is ran, nothing happens, since it was already executed as part of
67+
// - When c is run, a task is created that executes only that supernode.
68+
// - When d is run, nothing happens, since it was already executed as part of
6969
// the task that executed a.
70-
// - When e is ran, a task is executed that executes the whole subtree of nodes
70+
// - When e is run, a task is executed that executes the whole subtree of nodes
7171
// e and g.
72-
// - When f is ran, a task is created that executes only that supernode.
73-
// - When g is ran, nothing happens, since it was already executed as part of
72+
// - When f is run, a task is created that executes only that supernode.
73+
// - When g is run, nothing happens, since it was already executed as part of
7474
// the task that executed e.
7575
//
7676
// It is important that node a is processed before b and d, so that when b or d
7777
// are synced, their operations are already performed. Otherwise, syncing node b
7878
// would complete even though the operations for node b have not been performed.
79-
// In other words, children should be synced in forward order, and thus spawned
80-
// in reverse order.
81-
//
79+
// When the factorisation is run in serial using a CliqueStack, the children are
80+
// processed in reverse order. To obtain the same result when running the code
81+
// in parallel, the children must be synced in reverse order, and so spawned in
82+
// forward order. To make the TreeSplitting work, the groups of supernodes
83+
// should be formed in reverse order, so that in the group {a,b,d}, d has the
84+
// lowest ordering and a the highest one. In this way, syncing in reverse order
85+
// produces the correct result.
8286

8387
} // namespace hipo
8488
#endif

0 commit comments

Comments
 (0)