Skip to content

Commit d63c5ae

Browse files
homogeneous buffer merges
1 parent 8c60508 commit d63c5ae

File tree

4 files changed

+246
-22
lines changed

4 files changed

+246
-22
lines changed

apps/coarser_plotter.cpp

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,9 @@ int main(int argc, char *argv[]) {
5151
params.commCostVec = std::vector<v_workw_t<Graph_t>>({1, 2, 5, 10, 20, 50, 100, 200, 500, 1000});
5252
params.max_num_iteration_without_changes = 3;
5353
params.leniency = 0.005;
54-
// params.maxWeight = 15000;
54+
params.maxWeight = 15000;
55+
params.smallWeightThreshold = 4000;
56+
params.use_buffer_merge = true;
5557

5658
SarkarMul<Graph_t, Graph_t> coarser;
5759
coarser.setParameters(params);

include/osp/coarser/Sarkar/Sarkar.hpp

Lines changed: 225 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ limitations under the License.
2828

2929
#include "osp/auxiliary/datastructures/union_find.hpp"
3030
#include "osp/auxiliary/hash_util.hpp"
31+
#include "osp/auxiliary/math/divisors.hpp"
3132
#include "osp/coarser/Coarser.hpp"
3233
#include "osp/graph_algorithms/directed_graph_path_util.hpp"
3334

@@ -36,7 +37,7 @@ namespace osp {
3637

3738
namespace SarkarParams {
3839

39-
enum class Mode { LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER };
40+
enum class Mode { LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER };
4041

4142
template<typename commCostType>
4243
struct Parameters {
@@ -66,9 +67,13 @@ class Sarkar : public CoarserGenExpansionMap<Graph_t_in, Graph_t_out> {
6667
vertex_idx_t<Graph_t_in> someParentsContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
6768
vertex_idx_t<Graph_t_in> levelContraction(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
6869

70+
vertex_idx_t<Graph_t_in> homogeneous_buffer_merge(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
6971
vertex_idx_t<Graph_t_in> out_buffer_merge(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
7072
vertex_idx_t<Graph_t_in> in_buffer_merge(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const;
7173

74+
std::vector<std::size_t> homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const;
75+
std::vector<std::size_t> simpleMerge(const std::vector<std::size_t> &sizes, const std::size_t minSize, const std::size_t maxSize) const;
76+
7277
std::vector<std::size_t> computeNodeHashes(const Graph_t_in &graph, const std::vector< vertex_idx_t<Graph_t_in> > &vertexPoset, const std::vector< v_workw_t<Graph_t_in> > &dist) const;
7378

7479
public:
@@ -598,6 +603,12 @@ std::vector<std::vector<vertex_idx_t<Graph_t_in>>> Sarkar<Graph_t_in, Graph_t_ou
598603
diff = out_buffer_merge(params.commCost, dag_in, expansionMap);
599604
}
600605
break;
606+
607+
case SarkarParams::Mode::HOMOGENEOUS_BUFFER:
608+
{
609+
diff = homogeneous_buffer_merge(params.commCost, dag_in, expansionMap);
610+
}
611+
break;
601612
}
602613

603614
// std::cout << " Diff: " << diff << '\n';
@@ -1144,6 +1155,198 @@ std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::computeNodeHashes(cons
11441155
return hashes;
11451156
}
11461157

1158+
template<typename Graph_t_in, typename Graph_t_out>
1159+
std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const {
1160+
assert(minSize <= maxSize);
1161+
assert(number > 0);
1162+
1163+
std::size_t bestDiv = 1U;
1164+
for (std::size_t div : divisorsList(number)) {
1165+
if (div > maxSize) continue;
1166+
1167+
if (div < minSize && bestDiv < div) {
1168+
bestDiv = div;
1169+
}
1170+
if (div >= minSize && ((bestDiv < minSize) || (div < bestDiv))) {
1171+
bestDiv = div;
1172+
}
1173+
}
1174+
1175+
if (bestDiv != 1U) {
1176+
return std::vector<std::size_t>(number / bestDiv, bestDiv);
1177+
}
1178+
1179+
std::size_t bestScore = 0U;
1180+
std::size_t bestBins = number / minSize;
1181+
for (std::size_t bins = std::max( number / maxSize, static_cast<std::size_t>(2U)); bins <= number / minSize; ++bins) {
1182+
if (number % bins == 0U) {
1183+
return std::vector<std::size_t>(bins, number / bins);
1184+
}
1185+
1186+
std::size_t score = std::min( divisorsList(number / bins).size(), divisorsList((number / bins) + 1).size() );
1187+
if (score >= bestScore) {
1188+
bestScore = score;
1189+
bestBins = bins;
1190+
}
1191+
}
1192+
1193+
std::size_t remainder = number % bestBins;
1194+
std::size_t size = number / bestBins;
1195+
1196+
std::vector<std::size_t> groups;
1197+
for (std::size_t i = 0U; i < bestBins; ++i) {
1198+
if (remainder != 0U) {
1199+
groups.emplace_back(size + 1U);
1200+
--remainder;
1201+
} else {
1202+
groups.emplace_back(size);
1203+
}
1204+
}
1205+
1206+
return groups;
1207+
}
1208+
1209+
template<typename Graph_t_in, typename Graph_t_out>
1210+
std::vector<std::size_t> Sarkar<Graph_t_in, Graph_t_out>::simpleMerge(const std::vector<std::size_t> &sizes, const std::size_t minSize, const std::size_t maxSize) const {
1211+
std::map<std::size_t, std::size_t> countOfSize;
1212+
for (std::size_t size : sizes) {
1213+
auto it = countOfSize.find(size);
1214+
if (it == countOfSize.end()) {
1215+
countOfSize.emplace(size, 1U);
1216+
} else {
1217+
++(it->second);
1218+
}
1219+
}
1220+
1221+
// todo homogeneous
1222+
1223+
return std::vector<std::size_t>(sizes.size(), minSize + maxSize);
1224+
}
1225+
1226+
1227+
template<typename Graph_t_in, typename Graph_t_out>
1228+
vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::homogeneous_buffer_merge(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
1229+
using VertexType = vertex_idx_t<Graph_t_in>;
1230+
assert(expansionMapOutput.size() == 0);
1231+
1232+
const std::vector< vertex_idx_t<Graph_t_in> > vertexTopPoset = get_top_node_distance<Graph_t_in, vertex_idx_t<Graph_t_in>>(graph);
1233+
const std::vector< vertex_idx_t<Graph_t_in> > vertexBotPoset = getBotPosetMap(graph);
1234+
const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
1235+
const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
1236+
1237+
std::vector<std::size_t> hashValues = computeNodeHashes(graph, vertexTopPoset, topDist);
1238+
std::vector<std::size_t> hashValuesWithParents = hashValues;
1239+
for (const VertexType &par : graph.vertices()) {
1240+
for (const VertexType &chld : graph.children(par)) {
1241+
hash_combine(hashValuesWithParents[chld], hashValues[par]);
1242+
}
1243+
}
1244+
hashValues = computeNodeHashes(graph, vertexBotPoset, botDist);
1245+
std::vector<std::size_t> hashValuesWithChildren = hashValues;
1246+
for (const VertexType &chld : graph.vertices()) {
1247+
for (const VertexType &par : graph.parents(chld)) {
1248+
hash_combine(hashValuesWithChildren[par], hashValues[chld]);
1249+
}
1250+
}
1251+
for (const VertexType &vert : graph.vertices()) {
1252+
hash_combine(hashValuesWithParents[vert], hashValuesWithChildren[vert]);
1253+
}
1254+
const std::vector<std::size_t> &hashValuesCombined = hashValuesWithParents;
1255+
1256+
std::unordered_map<std::size_t, std::set<VertexType>> orbits;
1257+
for (const VertexType &vert : graph.vertices()) {
1258+
if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) continue;
1259+
1260+
const std::size_t hash = hashValuesCombined[vert];
1261+
auto found_iter = orbits.find(hash);
1262+
if (found_iter == orbits.end()) {
1263+
orbits.emplace(std::piecewise_construct, std::forward_as_tuple(hash), std::forward_as_tuple(std::initializer_list< vertex_idx_t<Graph_t_in> >{vert}));
1264+
} else {
1265+
found_iter->second.emplace(vert);
1266+
}
1267+
}
1268+
1269+
vertex_idx_t<Graph_t_in> counter = 0;
1270+
std::vector<bool> partitionedFlag(graph.num_vertices(), false);
1271+
1272+
for (const VertexType &vert : graph.vertices()) {
1273+
if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) continue;
1274+
if (partitionedFlag[vert]) continue;
1275+
1276+
const std::set<VertexType> &orb = orbits.at(hashValuesCombined[vert]);
1277+
if (orb.size() <= 1U) continue;
1278+
1279+
std::set<VertexType> parents;
1280+
for (const VertexType &par : graph.parents(vert)) {
1281+
parents.emplace(par);
1282+
}
1283+
std::set<VertexType> children;
1284+
for (const VertexType &chld : graph.children(vert)) {
1285+
children.emplace(chld);
1286+
}
1287+
1288+
std::set<VertexType> secureOrb;
1289+
for (const VertexType &vertCandidate : orb) {
1290+
if (vertexTopPoset[vertCandidate] != vertexTopPoset[vert]) continue;
1291+
if (vertexBotPoset[vertCandidate] != vertexBotPoset[vert]) continue;
1292+
if (graph.vertex_work_weight(vertCandidate) != graph.vertex_work_weight(vert)) continue;
1293+
if (topDist[vertCandidate] != topDist[vert]) continue;
1294+
if (botDist[vertCandidate] != botDist[vert]) continue;
1295+
if constexpr (has_typed_vertices_v<Graph_t_in>) {
1296+
if (graph.vertex_type(vertCandidate) != graph.vertex_type(vert)) continue;
1297+
}
1298+
1299+
std::set<VertexType> candidateParents;
1300+
for (const VertexType &par : graph.parents(vertCandidate)) {
1301+
candidateParents.emplace(par);
1302+
}
1303+
if (candidateParents != parents) continue;
1304+
1305+
std::set<VertexType> candidateChildren;
1306+
for (const VertexType &chld : graph.children(vertCandidate)) {
1307+
candidateChildren.emplace(chld);
1308+
}
1309+
if (candidateChildren != children) continue;
1310+
1311+
secureOrb.emplace(vertCandidate);
1312+
}
1313+
if (secureOrb.size() <= 1U) continue;
1314+
1315+
const v_workw_t<Graph_t_in> desiredVerticesInGroup = graph.vertex_work_weight(vert) == 0 ? std::numeric_limits<v_workw_t<Graph_t_in>>::lowest() : params.smallWeightThreshold / graph.vertex_work_weight(vert);
1316+
const v_workw_t<Graph_t_in> maxVerticesInGroup = graph.vertex_work_weight(vert) == 0 ? std::numeric_limits<v_workw_t<Graph_t_in>>::max() : params.maxWeight / graph.vertex_work_weight(vert);
1317+
1318+
const std::size_t minDesiredSize = desiredVerticesInGroup < 2 ? 2U : static_cast<std::size_t>(desiredVerticesInGroup);
1319+
const std::size_t maxDesiredSize = std::max(minDesiredSize, std::min(minDesiredSize * 2U, static_cast<std::size_t>(maxVerticesInGroup)));
1320+
1321+
std::vector<std::size_t> groups = homogeneousMerge(secureOrb.size(), minDesiredSize, maxDesiredSize);
1322+
1323+
auto secureOrbIter = secureOrb.begin();
1324+
for (std::size_t groupSize : groups) {
1325+
std::vector<VertexType> cluster;
1326+
for (std::size_t i = 0; i < groupSize; ++i) {
1327+
cluster.emplace_back(*secureOrbIter);
1328+
++secureOrbIter;
1329+
}
1330+
expansionMapOutput.emplace_back( std::move(cluster) );
1331+
counter += static_cast<VertexType>(groupSize) - 1;
1332+
}
1333+
1334+
for (const VertexType &touchedVertex : secureOrb) {
1335+
partitionedFlag[touchedVertex] = true;
1336+
}
1337+
}
1338+
1339+
for (const VertexType &vert : graph.vertices()) {
1340+
if (partitionedFlag[vert]) continue;
1341+
expansionMapOutput.emplace_back(std::initializer_list<VertexType>{vert});
1342+
}
1343+
1344+
return counter;
1345+
};
1346+
1347+
1348+
1349+
11471350
template<typename Graph_t_in, typename Graph_t_out>
11481351
vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::out_buffer_merge(v_workw_t<Graph_t_in> commCost, const Graph_t_in &graph, std::vector<std::vector<vertex_idx_t<Graph_t_in>>> &expansionMapOutput) const {
11491352
using VertexType = vertex_idx_t<Graph_t_in>;
@@ -1153,11 +1356,11 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::out_buffer_merge(v_wor
11531356
const std::vector< v_workw_t<Graph_t_in> > topDist = getTopDistance(commCost, graph);
11541357
const std::vector< v_workw_t<Graph_t_in> > botDist = getBotDistance(commCost, graph);
11551358

1156-
auto cmp = [](const std::pair<long, std::vector<VertexType>> &lhs, const std::pair<long, std::vector<VertexType>> &rhs) {
1157-
return (lhs.first > rhs.first)
1158-
|| ((lhs.first == rhs.first) && (lhs.second < rhs.second));
1159-
};
1160-
std::set<std::pair<long, std::vector<VertexType>>, decltype(cmp)> vertPriority(cmp);
1359+
// auto cmp = [](const std::pair<long, std::vector<VertexType>> &lhs, const std::pair<long, std::vector<VertexType>> &rhs) {
1360+
// return (lhs.first > rhs.first)
1361+
// || ((lhs.first == rhs.first) && (lhs.second < rhs.second));
1362+
// };
1363+
// std::set<std::pair<long, std::vector<VertexType>>, decltype(cmp)> vertPriority(cmp);
11611364

11621365
std::vector<std::size_t> hashValues = computeNodeHashes(graph, vertexPoset, topDist);
11631366
std::vector<std::size_t> hashValuesWithParents = hashValues;
@@ -1292,6 +1495,7 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::out_buffer_merge(v_wor
12921495
counter += static_cast<VertexType>( similarGroup.size() ) - 1;
12931496

12941497
} else {
1498+
// todo do ordering based on vertex index
12951499
std::map<VertexType, std::vector<VertexType>> prevGrouping;
12961500
for (const VertexType &simVert : similarGroup) {
12971501
const VertexType prevGroupIndx = similarityGroupingsPrevious.find_origin_by_name(simVert);
@@ -1309,8 +1513,22 @@ vertex_idx_t<Graph_t_in> Sarkar<Graph_t_in, Graph_t_out>::out_buffer_merge(v_wor
13091513
prevGroupSizes.emplace_back(prevGroup.second.size());
13101514
}
13111515

1516+
std::vector<std::size_t> allocs = simpleMerge(prevGroupSizes, minDesiredSize, maxDesiredSize);
1517+
1518+
std::vector< std::vector<VertexType> > groupsToMerge( *std::max_element(allocs.begin(), allocs.end()) );
1519+
std::size_t cntr = 0U;
1520+
for (const auto &prevGroup : prevGrouping) {
1521+
groupsToMerge[ allocs[cntr] ].insert(groupsToMerge[ allocs[cntr] ].end(), prevGroup.second.begin(), prevGroup.second.end());
1522+
++cntr;
1523+
}
13121524

1313-
// look at previous step and figure out merging
1525+
for (const auto &groupToMerge : groupsToMerge) {
1526+
expansionMapOutput.emplace_back(groupToMerge);
1527+
for (const VertexType &mergedVert : groupToMerge) {
1528+
partitionedFlag[mergedVert] = true;
1529+
}
1530+
counter += static_cast<VertexType>( groupToMerge.size() ) - 1;
1531+
}
13141532
}
13151533
}
13161534
// deal with non-merged

include/osp/coarser/Sarkar/SarkarMul.hpp

Lines changed: 13 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -242,24 +242,23 @@ RETURN_STATUS SarkarMul<Graph_t, Graph_t_coarse>::run_contractions() {
242242
status = std::max(status, run_contractions(commCost));
243243
}
244244

245-
if constexpr (has_typed_vertices_v<Graph_t>) {
246-
if (ml_params.use_buffer_merge) {
247-
unsigned no_change = 0;
245+
if (ml_params.use_buffer_merge) {
246+
unsigned no_change = 0;
248247

249-
while (no_change < ml_params.max_num_iteration_without_changes) {
250-
params.mode = thue_coin.get_flip()? SarkarParams::Mode::FAN_IN_BUFFER : SarkarParams::Mode::FAN_OUT_BUFFER;
251-
updateParams();
252-
253-
status = std::max(status, run_single_contraction_mode(diff));
248+
while (no_change < ml_params.max_num_iteration_without_changes) {
249+
params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER;
250+
// params.mode = thue_coin.get_flip()? SarkarParams::Mode::FAN_IN_BUFFER : SarkarParams::Mode::FAN_OUT_BUFFER;
251+
updateParams();
254252

255-
if (diff > 0) {
256-
no_change = 0;
257-
} else {
258-
no_change++;
259-
}
253+
status = std::max(status, run_single_contraction_mode(diff));
260254

261-
status = std::max(status, run_contractions( ml_params.commCostVec.back() ));
255+
if (diff > 0) {
256+
no_change = 0;
257+
} else {
258+
no_change++;
262259
}
260+
261+
status = std::max(status, run_contractions( ml_params.commCostVec.back() ));
263262
}
264263
}
265264

tests/coarser.cpp

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -682,6 +682,11 @@ BOOST_AUTO_TEST_CASE(Sarkar_test) {
682682
params.mode = SarkarParams::Mode::FAN_OUT_BUFFER;
683683
coarser.setParameters(params);
684684
test_coarser_same_graph<graph_t>(coarser);
685+
686+
687+
params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER;
688+
coarser.setParameters(params);
689+
test_coarser_same_graph<graph_t>(coarser);
685690
}
686691

687692

0 commit comments

Comments
 (0)