@@ -28,6 +28,7 @@ limitations under the License.
2828
2929#include " osp/auxiliary/datastructures/union_find.hpp"
3030#include " osp/auxiliary/hash_util.hpp"
31+ #include " osp/auxiliary/math/divisors.hpp"
3132#include " osp/coarser/Coarser.hpp"
3233#include " osp/graph_algorithms/directed_graph_path_util.hpp"
3334
@@ -36,7 +37,7 @@ namespace osp {
3637
3738namespace SarkarParams {
3839
39- enum class Mode { LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER };
40+ enum class Mode { LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER };
4041
4142template <typename commCostType>
4243struct Parameters {
@@ -66,9 +67,13 @@ class Sarkar : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
6667 vertex_idx_t <Graph_t_in> someParentsContraction (v_workw_t <Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t <Graph_t_in>>> &expansionMapOutput) const ;
6768 vertex_idx_t <Graph_t_in> levelContraction (v_workw_t <Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t <Graph_t_in>>> &expansionMapOutput) const ;
6869
70+ vertex_idx_t <Graph_t_in> homogeneous_buffer_merge (v_workw_t <Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t <Graph_t_in>>> &expansionMapOutput) const ;
6971 vertex_idx_t <Graph_t_in> out_buffer_merge (v_workw_t <Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t <Graph_t_in>>> &expansionMapOutput) const ;
7072 vertex_idx_t <Graph_t_in> in_buffer_merge (v_workw_t <Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t <Graph_t_in>>> &expansionMapOutput) const ;
7173
74+ std::vector<std::size_t > homogeneousMerge (const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const ;
75+ std::vector<std::size_t > simpleMerge (const std::vector<std::size_t > &sizes, const std::size_t minSize, const std::size_t maxSize) const ;
76+
7277 std::vector<std::size_t > computeNodeHashes (const Graph_t_in &graph, const std::vector< vertex_idx_t <Graph_t_in> > &vertexPoset, const std::vector< v_workw_t <Graph_t_in> > &dist) const ;
7378
7479 public:
@@ -598,6 +603,12 @@ std::vector<std::vector<vertex_idx_t<Graph_t_in>>> Sarkar<Graph_t_in, Graph_t_ou
598603 diff = out_buffer_merge (params.commCost , dag_in, expansionMap);
599604 }
600605 break ;
606+
607+ case SarkarParams::Mode::HOMOGENEOUS_BUFFER:
608+ {
609+ diff = homogeneous_buffer_merge (params.commCost , dag_in, expansionMap);
610+ }
611+ break ;
601612 }
602613
603614 // std::cout << " Diff: " << diff << '\n';
@@ -1144,6 +1155,198 @@ std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::computeNodeHashes(cons
11441155 return hashes;
11451156}
11461157
1158+ template <typename Graph_t_in, typename Graph_t_out>
1159+ std::vector<std::size_t > Sarkar<Graph_t_in, Graph_t_out>::homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const {
1160+ assert (minSize <= maxSize);
1161+ assert (number > 0 );
1162+
1163+ std::size_t bestDiv = 1U ;
1164+ for (std::size_t div : divisorsList (number)) {
1165+ if (div > maxSize) continue ;
1166+
1167+ if (div < minSize && bestDiv < div) {
1168+ bestDiv = div;
1169+ }
1170+ if (div >= minSize && ((bestDiv < minSize) || (div < bestDiv))) {
1171+ bestDiv = div;
1172+ }
1173+ }
1174+
1175+ if (bestDiv != 1U ) {
1176+ return std::vector<std::size_t >(number / bestDiv, bestDiv);
1177+ }
1178+
1179+ std::size_t bestScore = 0U ;
1180+ std::size_t bestBins = number / minSize;
1181+ for (std::size_t bins = std::max ( number / maxSize, static_cast <std::size_t >(2U )); bins <= number / minSize; ++bins) {
1182+ if (number % bins == 0U ) {
1183+ return std::vector<std::size_t >(bins, number / bins);
1184+ }
1185+
1186+ std::size_t score = std::min ( divisorsList (number / bins).size (), divisorsList ((number / bins) + 1 ).size () );
1187+ if (score >= bestScore) {
1188+ bestScore = score;
1189+ bestBins = bins;
1190+ }
1191+ }
1192+
1193+ std::size_t remainder = number % bestBins;
1194+ std::size_t size = number / bestBins;
1195+
1196+ std::vector<std::size_t > groups;
1197+ for (std::size_t i = 0U ; i < bestBins; ++i) {
1198+ if (remainder != 0U ) {
1199+ groups.emplace_back (size + 1U );
1200+ --remainder;
1201+ } else {
1202+ groups.emplace_back (size);
1203+ }
1204+ }
1205+
1206+ return groups;
1207+ }
1208+
1209+ template <typename Graph_t_in, typename Graph_t_out>
1210+ std::vector<std::size_t > Sarkar<Graph_t_in, Graph_t_out>::simpleMerge(const std::vector<std::size_t > &sizes, const std::size_t minSize, const std::size_t maxSize) const {
1211+ std::map<std::size_t , std::size_t > countOfSize;
1212+ for (std::size_t size : sizes) {
1213+ auto it = countOfSize.find (size);
1214+ if (it == countOfSize.end ()) {
1215+ countOfSize.emplace (size, 1U );
1216+ } else {
1217+ ++(it->second );
1218+ }
1219+ }
1220+
1221+ // todo homogeneous
1222+
1223+ return std::vector<std::size_t >(sizes.size (), minSize + maxSize);
1224+ }
1225+
1226+
1227+ template <typename Graph_t_in, typename Graph_t_out>
1228+ vertex_idx_t <Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_merge(v_workw_t <Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t <Graph_t_in>>> &expansionMapOutput) const {
1229+ using VertexType = vertex_idx_t <Graph_t_in>;
1230+ assert (expansionMapOutput.size () == 0 );
1231+
1232+ const std::vector< vertex_idx_t <Graph_t_in> > vertexTopPoset = get_top_node_distance<Graph_t_in, vertex_idx_t <Graph_t_in>>(graph);
1233+ const std::vector< vertex_idx_t <Graph_t_in> > vertexBotPoset = getBotPosetMap (graph);
1234+ const std::vector< v_workw_t <Graph_t_in> > topDist = getTopDistance (commCost, graph);
1235+ const std::vector< v_workw_t <Graph_t_in> > botDist = getBotDistance (commCost, graph);
1236+
1237+ std::vector<std::size_t > hashValues = computeNodeHashes (graph, vertexTopPoset, topDist);
1238+ std::vector<std::size_t > hashValuesWithParents = hashValues;
1239+ for (const VertexType &par : graph.vertices ()) {
1240+ for (const VertexType &chld : graph.children (par)) {
1241+ hash_combine (hashValuesWithParents[chld], hashValues[par]);
1242+ }
1243+ }
1244+ hashValues = computeNodeHashes (graph, vertexBotPoset, botDist);
1245+ std::vector<std::size_t > hashValuesWithChildren = hashValues;
1246+ for (const VertexType &chld : graph.vertices ()) {
1247+ for (const VertexType &par : graph.parents (chld)) {
1248+ hash_combine (hashValuesWithChildren[par], hashValues[chld]);
1249+ }
1250+ }
1251+ for (const VertexType &vert : graph.vertices ()) {
1252+ hash_combine (hashValuesWithParents[vert], hashValuesWithChildren[vert]);
1253+ }
1254+ const std::vector<std::size_t > &hashValuesCombined = hashValuesWithParents;
1255+
1256+ std::unordered_map<std::size_t , std::set<VertexType>> orbits;
1257+ for (const VertexType &vert : graph.vertices ()) {
1258+ if (graph.vertex_work_weight (vert) > params.smallWeightThreshold ) continue ;
1259+
1260+ const std::size_t hash = hashValuesCombined[vert];
1261+ auto found_iter = orbits.find (hash);
1262+ if (found_iter == orbits.end ()) {
1263+ orbits.emplace (std::piecewise_construct, std::forward_as_tuple (hash), std::forward_as_tuple (std::initializer_list< vertex_idx_t <Graph_t_in> >{vert}));
1264+ } else {
1265+ found_iter->second .emplace (vert);
1266+ }
1267+ }
1268+
1269+ vertex_idx_t <Graph_t_in> counter = 0 ;
1270+ std::vector<bool > partitionedFlag (graph.num_vertices (), false );
1271+
1272+ for (const VertexType &vert : graph.vertices ()) {
1273+ if (graph.vertex_work_weight (vert) > params.smallWeightThreshold ) continue ;
1274+ if (partitionedFlag[vert]) continue ;
1275+
1276+ const std::set<VertexType> &orb = orbits.at (hashValuesCombined[vert]);
1277+ if (orb.size () <= 1U ) continue ;
1278+
1279+ std::set<VertexType> parents;
1280+ for (const VertexType &par : graph.parents (vert)) {
1281+ parents.emplace (par);
1282+ }
1283+ std::set<VertexType> children;
1284+ for (const VertexType &chld : graph.children (vert)) {
1285+ children.emplace (chld);
1286+ }
1287+
1288+ std::set<VertexType> secureOrb;
1289+ for (const VertexType &vertCandidate : orb) {
1290+ if (vertexTopPoset[vertCandidate] != vertexTopPoset[vert]) continue ;
1291+ if (vertexBotPoset[vertCandidate] != vertexBotPoset[vert]) continue ;
1292+ if (graph.vertex_work_weight (vertCandidate) != graph.vertex_work_weight (vert)) continue ;
1293+ if (topDist[vertCandidate] != topDist[vert]) continue ;
1294+ if (botDist[vertCandidate] != botDist[vert]) continue ;
1295+ if constexpr (has_typed_vertices_v<Graph_t_in>) {
1296+ if (graph.vertex_type (vertCandidate) != graph.vertex_type (vert)) continue ;
1297+ }
1298+
1299+ std::set<VertexType> candidateParents;
1300+ for (const VertexType &par : graph.parents (vertCandidate)) {
1301+ candidateParents.emplace (par);
1302+ }
1303+ if (candidateParents != parents) continue ;
1304+
1305+ std::set<VertexType> candidateChildren;
1306+ for (const VertexType &chld : graph.children (vertCandidate)) {
1307+ candidateChildren.emplace (chld);
1308+ }
1309+ if (candidateChildren != children) continue ;
1310+
1311+ secureOrb.emplace (vertCandidate);
1312+ }
1313+ if (secureOrb.size () <= 1U ) continue ;
1314+
1315+ const v_workw_t <Graph_t_in> desiredVerticesInGroup = graph.vertex_work_weight (vert) == 0 ? std::numeric_limits<v_workw_t <Graph_t_in>>::lowest () : params.smallWeightThreshold / graph.vertex_work_weight (vert);
1316+ const v_workw_t <Graph_t_in> maxVerticesInGroup = graph.vertex_work_weight (vert) == 0 ? std::numeric_limits<v_workw_t <Graph_t_in>>::max () : params.maxWeight / graph.vertex_work_weight (vert);
1317+
1318+ const std::size_t minDesiredSize = desiredVerticesInGroup < 2 ? 2U : static_cast <std::size_t >(desiredVerticesInGroup);
1319+ const std::size_t maxDesiredSize = std::max (minDesiredSize, std::min (minDesiredSize * 2U , static_cast <std::size_t >(maxVerticesInGroup)));
1320+
1321+ std::vector<std::size_t > groups = homogeneousMerge (secureOrb.size (), minDesiredSize, maxDesiredSize);
1322+
1323+ auto secureOrbIter = secureOrb.begin ();
1324+ for (std::size_t groupSize : groups) {
1325+ std::vector<VertexType> cluster;
1326+ for (std::size_t i = 0 ; i < groupSize; ++i) {
1327+ cluster.emplace_back (*secureOrbIter);
1328+ ++secureOrbIter;
1329+ }
1330+ expansionMapOutput.emplace_back ( std::move (cluster) );
1331+ counter += static_cast <VertexType>(groupSize) - 1 ;
1332+ }
1333+
1334+ for (const VertexType &touchedVertex : secureOrb) {
1335+ partitionedFlag[touchedVertex] = true ;
1336+ }
1337+ }
1338+
1339+ for (const VertexType &vert : graph.vertices ()) {
1340+ if (partitionedFlag[vert]) continue ;
1341+ expansionMapOutput.emplace_back (std::initializer_list<VertexType>{vert});
1342+ }
1343+
1344+ return counter;
1345+ };
1346+
1347+
1348+
1349+
11471350template <typename Graph_t_in, typename Graph_t_out>
11481351vertex_idx_t <Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::out_buffer_merge(v_workw_t <Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t <Graph_t_in>>> &expansionMapOutput) const {
11491352 using VertexType = vertex_idx_t <Graph_t_in>;
@@ -1153,11 +1356,11 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::out_buffer_merge(v_wor
11531356 const std::vector< v_workw_t <Graph_t_in> > topDist = getTopDistance (commCost, graph);
11541357 const std::vector< v_workw_t <Graph_t_in> > botDist = getBotDistance (commCost, graph);
11551358
1156- auto cmp = [](const std::pair<long , std::vector<VertexType>> &lhs, const std::pair<long , std::vector<VertexType>> &rhs) {
1157- return (lhs.first > rhs.first )
1158- || ((lhs.first == rhs.first ) && (lhs.second < rhs.second ));
1159- };
1160- std::set<std::pair<long , std::vector<VertexType>>, decltype (cmp)> vertPriority (cmp);
1359+ // auto cmp = [](const std::pair<long, std::vector<VertexType>> &lhs, const std::pair<long, std::vector<VertexType>> &rhs) {
1360+ // return (lhs.first > rhs.first)
1361+ // || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
1362+ // };
1363+ // std::set<std::pair<long, std::vector<VertexType>>, decltype(cmp)> vertPriority(cmp);
11611364
11621365 std::vector<std::size_t > hashValues = computeNodeHashes (graph, vertexPoset, topDist);
11631366 std::vector<std::size_t > hashValuesWithParents = hashValues;
@@ -1292,6 +1495,7 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::out_buffer_merge(v_wor
12921495 counter += static_cast <VertexType>( similarGroup.size () ) - 1 ;
12931496
12941497 } else {
1498+ // todo do ordering based on vertex index
12951499 std::map<VertexType, std::vector<VertexType>> prevGrouping;
12961500 for (const VertexType &simVert : similarGroup) {
12971501 const VertexType prevGroupIndx = similarityGroupingsPrevious.find_origin_by_name (simVert);
@@ -1309,8 +1513,22 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::out_buffer_merge(v_wor
13091513 prevGroupSizes.emplace_back (prevGroup.second .size ());
13101514 }
13111515
1516+ std::vector<std::size_t > allocs = simpleMerge (prevGroupSizes, minDesiredSize, maxDesiredSize);
1517+
1518+ std::vector< std::vector<VertexType> > groupsToMerge ( *std::max_element (allocs.begin (), allocs.end ()) );
1519+ std::size_t cntr = 0U ;
1520+ for (const auto &prevGroup : prevGrouping) {
1521+ groupsToMerge[ allocs[cntr] ].insert (groupsToMerge[ allocs[cntr] ].end (), prevGroup.second .begin (), prevGroup.second .end ());
1522+ ++cntr;
1523+ }
13121524
1313- // look at previous step and figure out merging
1525+ for (const auto &groupToMerge : groupsToMerge) {
1526+ expansionMapOutput.emplace_back (groupToMerge);
1527+ for (const VertexType &mergedVert : groupToMerge) {
1528+ partitionedFlag[mergedVert] = true ;
1529+ }
1530+ counter += static_cast <VertexType>( groupToMerge.size () ) - 1 ;
1531+ }
13141532 }
13151533 }
13161534 // deal with non-merged
0 commit comments