Skip to content

Commit 29eca3a

Browse files
committed
Attempt to improve parallel layer
1 parent 1e6967f commit 29eca3a

File tree

1 file changed

+43
-9
lines changed

1 file changed

+43
-9
lines changed

highs/ipm/hipo/factorhighs/Analyse.cpp

Lines changed: 43 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -114,8 +114,8 @@ Int Analyse::getPermutation() {
114114

115115
// set logging of Metis depending on debug level
116116
options[METIS_OPTION_DBGLVL] = 0;
117-
if (log_->debug(2))
118-
options[METIS_OPTION_DBGLVL] = METIS_DBG_INFO | METIS_DBG_COARSEN;
117+
if (log_->debug(2)) options[METIS_OPTION_DBGLVL] |= METIS_DBG_INFO;
118+
if (log_->debug(3)) options[METIS_OPTION_DBGLVL] |= METIS_DBG_COARSEN;
119119

120120
if (log_) log_->printDevInfo("Running Metis\n");
121121
Int status = METIS_NodeND(&n_, temp_ptr.data(), temp_rows.data(), NULL,
@@ -1295,7 +1295,16 @@ void Analyse::computeStackSize() {
12951295
}
12961296

12971297
void Analyse::generateParallelLayer(Int threads) {
1298+
// Look for a layer that splits the tree such that there are at least "pieces"
1299+
// subtrees in the layer, with the largest being no more than "ratio_thresh"
1300+
// times more expensive than the second largest.
1301+
const Int pieces = 2;
1302+
const double ratio_thresh = 4;
1303+
12981304
if (threads > 1) {
1305+
std::stringstream log_stream;
1306+
log_stream << "Searching parallel layer\n";
1307+
12991308
// linked lists of children
13001309
std::vector<Int> head, next;
13011310
childrenLinkedList(sn_parent_, head, next);
@@ -1351,6 +1360,8 @@ void Analyse::generateParallelLayer(Int threads) {
13511360
if (sn_parent_[sn] == -1) layer.push_back(sn);
13521361
}
13531362

1363+
double ops_above{};
1364+
double ops_small{};
13541365
Int iter = 0;
13551366
while (true) {
13561367
// sort layer so that nodes with high subtree_ops appear last
@@ -1365,15 +1376,31 @@ void Analyse::generateParallelLayer(Int threads) {
13651376
ratio_first_two = subtree_ops[*layer.rbegin()] /
13661377
subtree_ops[*std::next(layer.rbegin(), 1)];
13671378

1368-
// printf("iter %d,layer %d, above %d, small %d, ratio %f\n", iter,
1369-
// layer.size(), aboveLayer_.size(), smallSubtrees_.size(),
1370-
// ratio_first_two);
1379+
// log layer info
1380+
log_stream << " iter " << iter << ": "
1381+
<< "L " << layer.size() << ", A " << aboveLayer_.size() << "("
1382+
<< fix(ops_above / total_ops * 100, 0, 1) << "%), S "
1383+
<< smallSubtrees_.size() << "("
1384+
<< fix(ops_small / total_ops * 100, 0, 1) << "%), ratio "
1385+
<< (layer.size() > 1 ? fix(ratio_first_two, 0, 1) : "-")
1386+
<< "\n ";
1387+
for (Int i : layer) {
1388+
log_stream << " " << fix(sn_ops[i] / total_ops * 100, 0, 1) << "("
1389+
<< fix(subtree_ops[i] / total_ops * 100, 0, 1) << ")";
1390+
}
1391+
log_stream << "\n";
13711392

13721393
// if there are enough subtrees and they are somewhat balanced, stop
1373-
if (layer.size() >= threads && ratio_first_two < 2) break;
1394+
if (layer.size() >= pieces && ratio_first_two < ratio_thresh) {
1395+
log_stream << " Accept layer\n";
1396+
break;
1397+
}
13741398

13751399
// don't allow too many iterations
1376-
if (iter > sn_count_ / 10) break;
1400+
if (iter > sn_count_ / 10) {
1401+
log_stream << " Too many iterations\n";
1402+
break;
1403+
}
13771404

13781405
// find most expensive node in layer which have children
13791406
Int node_to_remove = -1;
@@ -1385,13 +1412,17 @@ void Analyse::generateParallelLayer(Int threads) {
13851412
break;
13861413
}
13871414
}
1388-
if (node_to_remove == -1) break;
1415+
if (node_to_remove == -1) {
1416+
log_stream << " No candidate left\n";
1417+
break;
1418+
}
13891419

13901420
// remove node from layer
13911421
auto it = layer.begin();
13921422
std::advance(it, index_to_remove);
13931423
layer.erase(it);
13941424
aboveLayer_.insert(node_to_remove);
1425+
ops_above += sn_ops[node_to_remove];
13951426

13961427
// find child with most operations
13971428
Int child_most_ops = -1;
@@ -1405,7 +1436,7 @@ void Analyse::generateParallelLayer(Int threads) {
14051436
child = next[child];
14061437
}
14071438

1408-
const double small_subtree_thresh = 0.001;
1439+
const double small_subtree_thresh = 0.01;
14091440

14101441
// If child with most operations is large enough, ignore.
14111442
// Otherwise, force at least this child to be added to layer.
@@ -1424,6 +1455,7 @@ void Analyse::generateParallelLayer(Int threads) {
14241455
layer.push_back(child);
14251456
} else {
14261457
smallSubtrees_.insert(child);
1458+
ops_small += subtree_ops[child];
14271459
}
14281460
child = next[child];
14291461
}
@@ -1439,6 +1471,8 @@ void Analyse::generateParallelLayer(Int threads) {
14391471
layerIndex_.insert({*it, index});
14401472
++index;
14411473
}
1474+
1475+
log_->printDevDetailed(log_stream);
14421476
}
14431477

14441478
// Compute the size of the stack needed to process the tree in serial. This is

0 commit comments

Comments
 (0)