@@ -1226,6 +1226,118 @@ void Analyse::computeBlockStart() {
12261226 }
12271227}
12281228
1229+ void Analyse::findTreeSplittingSolve () {
1230+ // Split the tree into single nodes and subtrees for solve.
1231+ // The subtrees have at most 1% of total operations. They are grouped together
1232+ // so that each group of subtrees has enough operations.
1233+ // The tree is parallelised by creating a task for each single node and a task
1234+ // for each group of subtrees.
1235+
1236+ // compute number of operations for each supernode
1237+ std::vector<double > sn_ops (sn_count_);
1238+ double total_ops = 0 ;
1239+ for (Int sn = 0 ; sn < sn_count_; ++sn) {
1240+ // supernode size
1241+ const Int sz = sn_start_[sn + 1 ] - sn_start_[sn];
1242+
1243+ // frontal size
1244+ const Int fr = ptr_sn_[sn + 1 ] - ptr_sn_[sn];
1245+
1246+ // number of operations for this supernode for the solve
1247+ double ops_to_add = (double )sz * sz / 2 + (double )sz * fr;
1248+ sn_ops[sn] += ops_to_add;
1249+ total_ops += ops_to_add;
1250+
1251+ // add assembly operations
1252+ if (sn_parent_[sn] != -1 ) {
1253+ const Int ldc = fr - sz;
1254+ sn_ops[sn_parent_[sn]] += ldc;
1255+ total_ops += ldc;
1256+ }
1257+ }
1258+
1259+ // compute number of operations to process each subtree
1260+ std::vector<double > subtree_ops (sn_count_, 0.0 );
1261+ for (Int sn = 0 ; sn < sn_count_; ++sn) {
1262+ subtree_ops[sn] += sn_ops[sn];
1263+ if (sn_parent_[sn] != -1 ) {
1264+ subtree_ops[sn_parent_[sn]] += subtree_ops[sn];
1265+ }
1266+ }
1267+
1268+ // Find first descendant of each supernode
1269+ std::vector<Int> first_desc;
1270+ firstDescendant (sn_parent_, first_desc);
1271+
1272+ // linked lists of children
1273+ std::vector<Int> head, next;
1274+ childrenLinkedList (sn_parent_, head, next);
1275+
1276+ node_data_ptr_.assign (sn_count_, nullptr );
1277+
1278+ // Divide the tree into single nodes and subtrees, such that each subtree has
1279+ // at most small_thresh operations overall. Group subtrees together, so that
1280+ // groups have enough operations.
1281+ const double small_thresh = 0.05 * total_ops;
1282+ for (Int sn = 0 ; sn < sn_count_; ++sn) {
1283+ if (subtree_ops[sn] > small_thresh) {
1284+ // sn is a single node
1285+ auto res_insert = tree_splitting_.insert ({sn, {}});
1286+ node_data_ptr_[sn] = &res_insert.first ->second ;
1287+ res_insert.first ->second .type = NodeType::single;
1288+ num_single_++;
1289+
1290+ // The children of this sn are either single nodes or head of subtrees.
1291+ // Divide the head of subtrees in groups, so that each group has enough
1292+ // operations. Each group corresponds to one task executed in parallel.
1293+
1294+ double current_ops = 0.0 ;
1295+ NodeData* current_nodedata = nullptr ;
1296+ Int child = head[sn];
1297+ while (child != -1 ) {
1298+ bool is_small = subtree_ops[child] <= small_thresh;
1299+
1300+ if (is_small) {
1301+ num_subtrees_++;
1302+
1303+ if (!current_nodedata) {
1304+ auto res_insert = tree_splitting_.insert ({child, {}});
1305+ current_nodedata = &res_insert.first ->second ;
1306+ node_data_ptr_[child] = current_nodedata;
1307+ current_nodedata->type = NodeType::subtree;
1308+ current_ops = 0.0 ;
1309+ }
1310+
1311+ current_ops += subtree_ops[child];
1312+ current_nodedata->group .push_back (child);
1313+ current_nodedata->firstdesc .push_back (first_desc[child]);
1314+
1315+ if (current_ops > small_thresh) current_nodedata = nullptr ;
1316+ }
1317+
1318+ child = next[child];
1319+ }
1320+
1321+ } else if (sn_parent_[sn] == -1 ) {
1322+ // sn is small root: single task with whole subtree
1323+ auto res_insert = tree_splitting_.insert ({sn, {}});
1324+ node_data_ptr_[sn] = &res_insert.first ->second ;
1325+ res_insert.first ->second .type = NodeType::subtree;
1326+ res_insert.first ->second .group .push_back (sn);
1327+ res_insert.first ->second .firstdesc .push_back (first_desc[sn]);
1328+ }
1329+ /*
1330+ else if (subtree_ops[sn_parent_[sn]] > small_thresh) {
1331+ // sn is head of a subtree, processed as part of a group of subtrees
1332+ continue;
1333+ } else {
1334+ // sn is part of a subtree, but not the head
1335+ continue;
1336+ }
1337+ */
1338+ }
1339+ }
1340+
12291341Int Analyse::run (Symbolic& S) {
12301342 // Perform analyse phase and store the result into the symbolic object S.
12311343 // After Run returns, the Analyse object is not valid.
@@ -1300,6 +1412,7 @@ Int Analyse::run(Symbolic& S) {
13001412 computeStorage ();
13011413 computeBlockStart ();
13021414 computeCriticalPath ();
1415+ findTreeSplittingSolve ();
13031416
13041417 // move relevant stuff into S
13051418 S.n_ = n_;
@@ -1314,6 +1427,8 @@ Int Analyse::run(Symbolic& S) {
13141427 S.serial_storage_ = serial_storage_;
13151428 S.flops_ = dense_ops_;
13161429 S.block_size_ = nb_;
1430+ S.num_single_ = num_single_;
1431+ S.num_subtrees_ = num_subtrees_;
13171432
13181433 // compute largest supernode
13191434 std::vector<Int> sn_size (sn_start_.begin () + 1 , sn_start_.end ());
@@ -1348,6 +1463,8 @@ Int Analyse::run(Symbolic& S) {
13481463 S.relind_clique_ = std::move (relind_clique_);
13491464 S.consecutive_sums_ = std::move (consecutive_sums_);
13501465 S.clique_block_start_ = std::move (clique_block_start_);
1466+ S.tree_splitting_solve_ = std::move (tree_splitting_);
1467+ S.node_data_ptr_ = std::move (node_data_ptr_);
13511468
13521469#if HIPO_TIMING_LEVEL >= 1
13531470 data_.sumTime (kTimeAnalyse , clock_total.stop ());
0 commit comments