Skip to content

Commit 64838ce

Browse files
committed
Parallel ratio considers small subtrees
1 parent bf2bec1 commit 64838ce

File tree

2 files changed

+30
-15
lines changed

2 files changed

+30
-15
lines changed

highs/ipm/hipo/factorhighs/Analyse.cpp

Lines changed: 25 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1368,15 +1368,18 @@ void Analyse::generateParallelLayer(Int threads) {
13681368
// How the layer is found:
13691369
// assignToBins returns the largest number of operations in any thread with
13701370
// a given layer L, called f(L).
1371+
// Subtrees that are too small to be included in the layer, according to a
1372+
// threshold, belong to the set of small subtrees S.
13711373
// The parallelisability ratio of a given layer is measured as
1372-
// total_ops / (ops_above + f(L))
1374+
// total_ops / (ops_above + ops_small + f(L))
13731375
// We want this number to be as large as possible. Equivalently, we want the
1374-
// score = ops_above + f(L) to be as small as possible.
1376+
// score = ops_above + ops_small + f(L) to be as small as possible.
13751377
// For each node p in the layer, compute the score when the layer is
13761378
// L' = L \ {p} U {children of p}.
13771379
// The improvement to the score brough by this layer compared to the
13781380
// previous one is measured by
1379-
// f(L) - f(L') - ops_of_node
1381+
// f(L) - f(L') - S' - ops_of_node
1382+
// where S' is the new operations added to S due to the new layer.
13801383
// If this quantity is positive, there is an improvement when choosing L'
13811384
// over L. If there are nodes with positive improvement, take the best one,
13821385
// remove that node from the layer and add its children. If no node brings
@@ -1408,26 +1411,30 @@ void Analyse::generateParallelLayer(Int threads) {
14081411
std::vector<Int>::iterator best_it;
14091412
double best_largest_bin;
14101413

1411-
bool any_node_with_large_children = false;
1414+
bool any_node_with_children = false;
14121415

14131416
// loop over all nodes in the current layer
14141417
for (auto it = layer.begin(); it != layer.end(); ++it) {
14151418
// build layer obtained adding children
14161419
std::vector<Int> local_layer = layer;
1420+
double local_small{};
14171421
Int child = head[*it];
14181422
while (child != -1) {
14191423
if (subtree_ops[child] > small_thresh) {
14201424
local_layer.push_back(child);
1421-
any_node_with_large_children = true;
1425+
} else {
1426+
local_small += subtree_ops[child];
14221427
}
1428+
any_node_with_children = true;
14231429
child = next[child];
14241430
}
14251431

14261432
// compute largest bin with this new layer
14271433
double largest_bin =
14281434
assignToBins(local_layer, subtree_ops, *it, threads);
14291435

1430-
double score = current_largest_bin - largest_bin - sn_ops[*it];
1436+
double score =
1437+
current_largest_bin - largest_bin - sn_ops[*it] - local_small;
14311438

14321439
if (score > best_score) {
14331440
best_score = score;
@@ -1436,15 +1443,17 @@ void Analyse::generateParallelLayer(Int threads) {
14361443
}
14371444

14381445
log_stream << "\t"
1439-
<< fix(total_ops / (ops_above + sn_ops[*it] + largest_bin),
1446+
<< fix(total_ops / (ops_above + sn_ops[*it] + largest_bin +
1447+
ops_small + local_small),
14401448
0, 2)
1441-
<< " (" << sci(score, 0, 1) << ")\n";
1449+
<< " (" << sci(score, 0, 1) << ") <== " << integer(*it)
1450+
<< "\n";
14421451
}
14431452

14441453
log_stream << "Iter " << integer(iter) << ": ";
14451454

14461455
// no node brings a benefit
1447-
if (best_score < 0 || !any_node_with_large_children) {
1456+
if (best_score < 0 || !any_node_with_children) {
14481457
log_stream << "fail\n";
14491458
break;
14501459
} else {
@@ -1482,21 +1491,27 @@ void Analyse::generateParallelLayer(Int threads) {
14821491
}
14831492

14841493
log_stream << "ratio "
1485-
<< fix(total_ops / (ops_above + best_largest_bin), 0, 2)
1494+
<< fix(total_ops /
1495+
(ops_above + best_largest_bin + ops_small),
1496+
0, 2)
14861497
<< ", layer " << integer(layer.size()) << '\n';
14871498
}
14881499

14891500
++iter;
14901501
}
14911502
// layer has been decided
14921503

1504+
double ratio = total_ops / (ops_above + ops_small +
1505+
assignToBins(layer, subtree_ops, -1, threads));
1506+
14931507
log_stream << "\nLayer " << integer(layer.size()) << ": ";
14941508
for (Int i : layer)
14951509
log_stream << fix(subtree_ops[i] / total_ops * 100, 0, 1) << " ";
14961510
log_stream << "\nAbove " << fix(ops_above / total_ops * 100, 0, 1) << "% ("
14971511
<< integer(aboveLayer_.size()) << ")\n";
14981512
log_stream << "Small " << fix(ops_small / total_ops * 100, 0, 1) << "% ("
14991513
<< integer(smallSubtrees_.size()) << ")\n";
1514+
log_stream << "Parallel ratio " << fix(ratio, 0, 2) << "\n";
15001515

15011516
log_->printDevDetailed(log_stream);
15021517

highs/ipm/hipo/factorhighs/Factorise.cpp

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -354,18 +354,18 @@ bool Factorise::run(Numeric& num) {
354354
highs::parallel::spawn([=]() { processSupernodes(start, end); });
355355
}
356356

357+
// wait for subtrees in the layer to complete
358+
for (Int i = 0; i < S_.layerIndex().size(); ++i) {
359+
highs::parallel::sync();
360+
}
361+
357362
// process small subtrees
358363
for (Int i = 0; i < S_.smallSubtrees().size(); ++i) {
359364
Int start = S_.smallSubtreeInfo(i).start;
360365
Int end = S_.smallSubtreeInfo(i).end;
361366
processSupernodes(start, end);
362367
}
363368

364-
// wait for subtrees in the layer to complete
365-
for (Int i = 0; i < S_.layerIndex().size(); ++i) {
366-
highs::parallel::sync();
367-
}
368-
369369
// process nodes above layer
370370
for (auto it = S_.aboveLayer().begin(); it != S_.aboveLayer().end(); ++it) {
371371
processSupernode(*it);

0 commit comments

Comments
 (0)