diff --git a/.clang-tidy b/.clang-tidy index 7299225d..d65480a5 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1,24 +1,101 @@ Checks: '-*,readability-identifier-naming' CheckOptions: - # 1. Member Variables: camelBack with a trailing underscore (e.g., memberVariableTest_) - - key: readability-identifier-naming.MemberCase + - key: readability-identifier-naming.AggressiveDependentMemberLookup + value: 1 + + - key: readability-identifier-naming.AbstractClassCase + value: CamelCase + + - key: readability-identifier-naming.ClassCase + value: CamelCase + + - key: readability-identifier-naming.ClassConstexprCase value: camelBack - - key: readability-identifier-naming.MemberSuffix + - key: readability-identifier-naming.ClassConstexprSuffix + value: _ + + - key: readability-identifier-naming.ClassConstantCase + value: camelBack + - key: readability-identifier-naming.ClassConstantSuffix + value: _ + + - key: readability-identifier-naming.ClassMemberCase + value: camelBack + - key: readability-identifier-naming.ClassMemberSuffix value: _ - # 2. Functions: CamelCase (e.g., CalculateTotal) + - key: readability-identifier-naming.ClassMethodCase + value: CamelCase + - key: readability-identifier-naming.ClassMethodIgnoredRegexp + value: "^(begin|cbegin|end|cend|rbegin|crbegin|rend|crend|size|empty)$" + + - key: readability-identifier-naming.EnumCase + value: CamelCase + + - key: readability-identifier-naming.EnumConstantCase + value: UPPER_CASE + - key: readability-identifier-naming.FunctionCase value: CamelCase - # 3. Local Variables/Parameters: camelBack (e.g., totalValue) - - key: readability-identifier-naming.VariableCase + - key: readability-identifier-naming.GlobalConstexprVariableCase + value: UPPER_CASE + + - key: readability-identifier-naming.GlobalVariableCase value: camelBack - - key: readability-identifier-naming.ParameterCase + + - key: readability-identifier-naming.LocalVariableCase value: camelBack - # 4. Classes/Structs: CamelCase (e.g., MyClass) - - key: readability-identifier-naming.ClassCase + - key: readability-identifier-naming.MacroDefinitionCase + value: UPPER_CASE + + - key: readability-identifier-naming.MacroDefinitionIgnoredRegexp + value: '^[A-Z]+(_[A-Z]+)*_$' + + - key: readability-identifier-naming.MemberCase + value: camelBack + - key: readability-identifier-naming.MemberSuffix + value: _ + + - key: readability-identifier-naming.MethodCase value: CamelCase + - key: readability-identifier-naming.MethodIgnoredRegexp + value: "^(begin|cbegin|end|cend|rbegin|crbegin|rend|crend|size|empty)$" + + - key: readability-identifier-naming.NamespaceCase + value: lower_case + + - key: readability-identifier-naming.ParameterCase + value: camelBack + + - key: readability-identifier-naming.ScopedEnumConstantCase + value: UPPER_CASE + - key: readability-identifier-naming.StructCase - value: CamelCase \ No newline at end of file + value: CamelCase + + - key: readability-identifier-naming.TemplateParameterCase + value: CamelCase + + # Includes the fix for std::iterator_traits compatibility + - key: readability-identifier-naming.TypeAliasCase + value: CamelCase + - key: readability-identifier-naming.TypeAliasIgnoredRegexp + value: "^(iterator_category|difference_type|value_type|pointer|reference|const_pointer|const_reference)$" + + - key: readability-identifier-naming.TypedefCase + value: CamelCase + + - key: readability-identifier-naming.TypeTemplateParameterCase + value: CamelCase + + - key: readability-identifier-naming.ValueTemplateParameterCase + value: camelBack + + - key: readability-identifier-naming.VariableCase + value: camelBack + + - key: readability-identifier-naming.IgnoreMainLikeFunctions + value: 1 \ No newline at end of file diff --git a/apps/bsp_test_suite.cpp b/apps/bsp_test_suite.cpp index f31c1972..c2ade522 100644 --- a/apps/bsp_test_suite.cpp +++ b/apps/bsp_test_suite.cpp @@ -23,11 +23,11 @@ limitations under the License. #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp" -using graph_t = osp::computational_dag_edge_idx_vector_impl_def_int_t; +using GraphT = osp::ComputationalDagEdgeIdxVectorImplDefIntT; int main(int argc, char *argv[]) { - osp::BspScheduleRecompTestSuiteRunner runner; - return runner.run(argc, argv); + osp::BspScheduleRecompTestSuiteRunner runner; + return runner.Run(argc, argv); return 0; } diff --git a/apps/coarser_plotter.cpp b/apps/coarser_plotter.cpp index 93cfae2c..745e1850 100644 --- a/apps/coarser_plotter.cpp +++ b/apps/coarser_plotter.cpp @@ -26,7 +26,7 @@ limitations under the License. #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" using namespace osp; -using Graph_t = computational_dag_edge_idx_vector_impl_def_int_t; +using GraphT = ComputationalDagEdgeIdxVectorImplDefIntT; int main(int argc, char *argv[]) { if (argc < 3) { @@ -34,66 +34,66 @@ int main(int argc, char *argv[]) { return 1; } - std::string graph_file = argv[1]; - std::string graph_name = graph_file.substr(graph_file.rfind("/") + 1, graph_file.rfind(".") - graph_file.rfind("/") - 1); + std::string graphFile = argv[1]; + std::string graphName = graphFile.substr(graphFile.rfind("/") + 1, graphFile.rfind(".") - graphFile.rfind("/") - 1); - Graph_t graph; - bool status = file_reader::readGraph(graph_file, graph); + GraphT graph; + bool status = file_reader::ReadGraph(graphFile, graph); if (!status) { std::cout << "Failed to read graph\n"; return 1; } - SarkarParams::MulParameters> params; - params.commCostVec = std::vector>({1, 2, 5, 10, 20, 50, 100, 200, 500, 1000}); - params.max_num_iteration_without_changes = 3; - params.leniency = 0.005; - params.maxWeight = 15000; - params.smallWeightThreshold = 4000; - params.buffer_merge_mode = SarkarParams::BufferMergeMode::FULL; + sarkar_params::MulParameters> params; + params.commCostVec_ = std::vector>({1, 2, 5, 10, 20, 50, 100, 200, 500, 1000}); + params.maxNumIterationWithoutChanges_ = 3; + params.leniency_ = 0.005; + params.maxWeight_ = 15000; + params.smallWeightThreshold_ = 4000; + params.bufferMergeMode_ = sarkar_params::BufferMergeMode::FULL; - SarkarMul coarser; - coarser.setParameters(params); + SarkarMul coarser; + coarser.SetParameters(params); - Graph_t coarse_graph; - std::vector> contraction_map; + GraphT coarseGraph; + std::vector> contractionMap; - Graph_t graph_copy = graph; - bool ignore_vertex_types = false; + GraphT graphCopy = graph; + bool ignoreVertexTypes = false; - if (ignore_vertex_types) { - for (const auto &vert : graph_copy.vertices()) { - graph_copy.set_vertex_type(vert, 0); + if (ignoreVertexTypes) { + for (const auto &vert : graphCopy.Vertices()) { + graphCopy.SetVertexType(vert, 0); } } - coarser.coarsenDag(graph_copy, coarse_graph, contraction_map); + coarser.CoarsenDag(graphCopy, coarseGraph, contractionMap); - std::vector colours(contraction_map.size()); - for (std::size_t i = 0; i < contraction_map.size(); ++i) { - colours[i] = static_cast(contraction_map[i]); + std::vector colours(contractionMap.size()); + for (std::size_t i = 0; i < contractionMap.size(); ++i) { + colours[i] = static_cast(contractionMap[i]); } - std::ofstream out_dot(argv[2]); - if (!out_dot.is_open()) { + std::ofstream outDot(argv[2]); + if (!outDot.is_open()) { std::cout << "Unable to write/open output file.\n"; return 1; } DotFileWriter writer; - writer.write_colored_graph(out_dot, graph, colours); + writer.WriteColoredGraph(outDot, graph, colours); if (argc >= 4) { - std::ofstream coarse_out_dot(argv[3]); - if (!coarse_out_dot.is_open()) { + std::ofstream coarseOutDot(argv[3]); + if (!coarseOutDot.is_open()) { std::cout << "Unable to write/open output file.\n"; return 1; } - std::vector coarse_colours(coarse_graph.num_vertices()); - std::iota(coarse_colours.begin(), coarse_colours.end(), 0); + std::vector coarseColours(coarseGraph.NumVertices()); + std::iota(coarseColours.begin(), coarseColours.end(), 0); - writer.write_colored_graph(coarse_out_dot, coarse_graph, coarse_colours); + writer.WriteColoredGraph(coarseOutDot, coarseGraph, coarseColours); } return 0; diff --git a/apps/graph_analyser.cpp b/apps/graph_analyser.cpp index 48e4f3ee..d5789900 100644 --- a/apps/graph_analyser.cpp +++ b/apps/graph_analyser.cpp @@ -31,73 +31,73 @@ limitations under the License. using namespace osp; -using ComputationalDag = computational_dag_edge_idx_vector_impl_def_int_t; +using ComputationalDag = ComputationalDagEdgeIdxVectorImplDefIntT; -void add_graph_stats(const ComputationalDag &graph, std::ofstream &outfile) { +void AddGraphStats(const ComputationalDag &graph, std::ofstream &outfile) { // Short and Average Edges - unsigned short_edges = 0; - float avg_edge_length = 0; - size_t sum_edge_length = 0; + unsigned shortEdges = 0; + float avgEdgeLength = 0; + size_t sumEdgeLength = 0; - std::vector top_level = get_top_node_distance(graph); - std::multiset edge_lengths; - for (const auto &edge : edges(graph)) { - unsigned diff = top_level[target(edge, graph)] - top_level[source(edge, graph)]; + std::vector topLevel = GetTopNodeDistance(graph); + std::multiset edgeLengths; + for (const auto &edge : Edges(graph)) { + unsigned diff = topLevel[Target(edge, graph)] - topLevel[Source(edge, graph)]; - edge_lengths.emplace(diff); - sum_edge_length += diff; + edgeLengths.emplace(diff); + sumEdgeLength += diff; if (diff == 1) { - short_edges += 1; + shortEdges += 1; } } - unsigned median_edge_length = 0; - if (!edge_lengths.empty()) { - median_edge_length = Get_Median(edge_lengths); + unsigned medianEdgeLength = 0; + if (!edgeLengths.empty()) { + medianEdgeLength = GetMedian(edgeLengths); } - Get_Median(edge_lengths); + GetMedian(edgeLengths); - if (graph.num_edges() != 0) { - avg_edge_length = static_cast(sum_edge_length) / static_cast(graph.num_edges()); + if (graph.NumEdges() != 0) { + avgEdgeLength = static_cast(sumEdgeLength) / static_cast(graph.NumEdges()); } // Longest Path - unsigned longest_path = 1; + unsigned longestPath = 1; // std::map wavefront; - for (size_t i = 0; i < top_level.size(); i++) { - longest_path = std::max(longest_path, top_level[i]); + for (size_t i = 0; i < topLevel.size(); i++) { + longestPath = std::max(longestPath, topLevel[i]); // if (wavefront.find(top_level[i]) != wavefront.cend()) { // wavefront[top_level[i]] += 1; // } else { // wavefront[top_level[i]] = 1; // } } - float avg_wavefront = static_cast(graph.num_vertices()) / static_cast(longest_path); + float avgWavefront = static_cast(graph.NumVertices()) / static_cast(longestPath); // Average bottom distance - std::vector bot_level = get_bottom_node_distance(graph); - size_t bot_level_sum = 0; - for (size_t i = 0; i < bot_level.size(); i++) { - bot_level_sum += bot_level[i]; + std::vector botLevel = GetBottomNodeDistance(graph); + size_t botLevelSum = 0; + for (size_t i = 0; i < botLevel.size(); i++) { + botLevelSum += botLevel[i]; } - float avg_bot_level = static_cast(bot_level_sum) / static_cast(bot_level.size()); + float avgBotLevel = static_cast(botLevelSum) / static_cast(botLevel.size()); // // Number of Triangles // size_t number_triangles = 0; - // for (const auto& edge : edges(graph)) { + // for (const auto& edge : Edges(graph)) { // std::set neighbour_src; // std::set neighbour_tgt; - // for (const auto& in_edge : graph.in_edges(edge.m_source)) { + // for (const auto& in_edge : graph.InEdges(edge.m_source)) { // neighbour_src.emplace(in_edge.m_source); // } - // for (const auto& in_edge : graph.out_edges(edge.m_source)) { + // for (const auto& in_edge : graph.OutEdges(edge.m_source)) { // neighbour_src.emplace(in_edge.m_target); // } - // for (const auto& in_edge : graph.in_edges(edge.m_target)) { + // for (const auto& in_edge : graph.InEdges(edge.m_target)) { // neighbour_tgt.emplace(in_edge.m_source); // } - // for (const auto& in_edge : graph.out_edges(edge.m_target)) { + // for (const auto& in_edge : graph.OutEdges(edge.m_target)) { // neighbour_tgt.emplace(in_edge.m_target); // } @@ -121,14 +121,14 @@ void add_graph_stats(const ComputationalDag &graph, std::ofstream &outfile) { // number_triangles /= 3; // Adding statistics - outfile << graph.num_vertices() << ","; - outfile << graph.num_edges() << ","; - outfile << longest_path << ","; - outfile << avg_wavefront << ","; - outfile << short_edges << ","; - outfile << median_edge_length << ","; - outfile << avg_edge_length << ","; - outfile << avg_bot_level; + outfile << graph.NumVertices() << ","; + outfile << graph.NumEdges() << ","; + outfile << longestPath << ","; + outfile << avgWavefront << ","; + outfile << shortEdges << ","; + outfile << medianEdgeLength << ","; + outfile << avgEdgeLength << ","; + outfile << avgBotLevel; } int main(int argc, char *argv[]) { @@ -137,30 +137,30 @@ int main(int argc, char *argv[]) { return 1; } - std::filesystem::path graph_dir = argv[1]; - std::ofstream graph_stats_stream(argv[2]); + std::filesystem::path graphDir = argv[1]; + std::ofstream graphStatsStream(argv[2]); - if (!graph_stats_stream.is_open()) { + if (!graphStatsStream.is_open()) { std::cout << "Unable to write/open output file.\n"; return 1; } // Generating Header - graph_stats_stream << "Graph,Vertices,Edges,Longest_Path,Average_Wavefront_Size,Short_Edges,Median_Edge_Length," - "Average_Edge_Length,Average_Bottom_Level" - << std::endl; + graphStatsStream << "Graph,Vertices,Edges,Longest_Path,Average_Wavefront_Size,Short_Edges,Median_Edge_Length," + "Average_Edge_Length,Average_Bottom_Level" + << std::endl; - for (const auto &dirEntry : std::filesystem::recursive_directory_iterator(graph_dir)) { + for (const auto &dirEntry : std::filesystem::recursive_directory_iterator(graphDir)) { if (std::filesystem::is_directory(dirEntry)) { continue; } std::cout << "Processing: " << dirEntry << std::endl; - std::string path_str = dirEntry.path(); + std::string pathStr = dirEntry.path(); ComputationalDag graph; - bool status = file_reader::readGraph(dirEntry.path(), graph); + bool status = file_reader::ReadGraph(dirEntry.path(), graph); if (!status) { std::cout << "Failed to read graph\n"; return 1; @@ -170,12 +170,12 @@ int main(int argc, char *argv[]) { continue; } - std::string graph_name = path_str.substr(path_str.rfind("/") + 1); - graph_name = graph_name.substr(0, graph_name.rfind(".")); + std::string graphName = pathStr.substr(pathStr.rfind("/") + 1); + graphName = graphName.substr(0, graphName.rfind(".")); - graph_stats_stream << graph_name << ","; - add_graph_stats(graph, graph_stats_stream); - graph_stats_stream << std::endl; + graphStatsStream << graphName << ","; + AddGraphStats(graph, graphStatsStream); + graphStatsStream << std::endl; } return 0; diff --git a/apps/graph_converter.cpp b/apps/graph_converter.cpp index 3ffedd4f..ae757773 100644 --- a/apps/graph_converter.cpp +++ b/apps/graph_converter.cpp @@ -27,14 +27,14 @@ limitations under the License. using namespace osp; -using ComputationalDag = computational_dag_edge_idx_vector_impl_def_int_t; +using ComputationalDag = ComputationalDagEdgeIdxVectorImplDefIntT; -void print_usage(const char *prog_name) { +void PrintUsage(const char *progName) { std::cerr << "Graph Format Converter" << std::endl; std::cerr << "----------------------" << std::endl; std::cerr << "This tool converts a directed graph from one file format to another. The desired output" << std::endl; std::cerr << "format is determined by the file extension of the output file." << std::endl << std::endl; - std::cerr << "Usage: " << prog_name << " " << std::endl << std::endl; + std::cerr << "Usage: " << progName << " " << std::endl << std::endl; std::cerr << "Arguments:" << std::endl; std::cerr << " Path to the input graph file." << std::endl << std::endl; std::cerr << " Path for the output graph file. Special values of '.dot' or '.hdag' can be" << std::endl; @@ -47,71 +47,71 @@ void print_usage(const char *prog_name) { std::cerr << "The .hdag format is the HyperdagDB format. A detailed description can be found at:" << std::endl; std::cerr << "https://github.com/Algebraic-Programming/HyperDAG_DB" << std::endl << std::endl; std::cerr << "Examples:" << std::endl; - std::cerr << " " << prog_name << " my_graph.mtx my_graph.hdag" << std::endl; - std::cerr << " " << prog_name << " my_graph.hdag my_graph.dot" << std::endl; - std::cerr << " " << prog_name << " my_graph.mtx .dot # Creates my_graph.dot" << std::endl; - std::cerr << " " << prog_name << " my_graph.dot .hdag # Creates my_graph.hdag" << std::endl; + std::cerr << " " << progName << " my_graph.mtx my_graph.hdag" << std::endl; + std::cerr << " " << progName << " my_graph.hdag my_graph.dot" << std::endl; + std::cerr << " " << progName << " my_graph.mtx .dot # Creates my_graph.dot" << std::endl; + std::cerr << " " << progName << " my_graph.dot .hdag # Creates my_graph.hdag" << std::endl; } int main(int argc, char *argv[]) { if (argc != 3) { - print_usage(argv[0]); + PrintUsage(argv[0]); return 1; } - std::string input_filename = argv[1]; - std::string output_filename_arg = argv[2]; + std::string inputFilename = argv[1]; + std::string outputFilenameArg = argv[2]; - std::filesystem::path input_path(input_filename); - std::string input_ext = input_path.extension().string(); - std::string output_filename; + std::filesystem::path inputPath(inputFilename); + std::string inputExt = inputPath.extension().string(); + std::string outputFilename; - if (output_filename_arg == ".dot") { - if (input_ext == ".dot") { + if (outputFilenameArg == ".dot") { + if (inputExt == ".dot") { std::cerr << "Error: Input file is already a .dot file. Cannot use '.dot' as the output file argument in " "this case." << std::endl; return 1; } - output_filename = std::filesystem::path(input_filename).replace_extension(".dot").string(); - } else if (output_filename_arg == ".hdag") { - if (input_ext == ".hdag") { + outputFilename = std::filesystem::path(inputFilename).replace_extension(".dot").string(); + } else if (outputFilenameArg == ".hdag") { + if (inputExt == ".hdag") { std::cerr << "Error: Input file is already a .hdag file. Cannot use '.hdag' as the output file argument in " "this case." << std::endl; return 1; } - output_filename = std::filesystem::path(input_filename).replace_extension(".hdag").string(); + outputFilename = std::filesystem::path(inputFilename).replace_extension(".hdag").string(); } else { - output_filename = output_filename_arg; + outputFilename = outputFilenameArg; } ComputationalDag graph; - std::cout << "Attempting to read graph from " << input_filename << "..." << std::endl; - bool status = file_reader::readGraph(input_filename, graph); + std::cout << "Attempting to read graph from " << inputFilename << "..." << std::endl; + bool status = file_reader::ReadGraph(inputFilename, graph); if (!status) { std::cout << "Failed to read graph\n"; return 1; } - std::cout << "Successfully read graph with " << graph.num_vertices() << " vertices and " << graph.num_edges() << " edges." + std::cout << "Successfully read graph with " << graph.NumVertices() << " vertices and " << graph.NumEdges() << " edges." << std::endl; - std::filesystem::path output_path(output_filename); - std::string output_ext = output_path.extension().string(); + std::filesystem::path outputPath(outputFilename); + std::string outputExt = outputPath.extension().string(); - if (output_ext == ".dot") { + if (outputExt == ".dot") { DotFileWriter writer; - writer.write_graph(output_filename, graph); - } else if (output_ext == ".hdag") { - file_writer::writeComputationalDagHyperdagFormatDB(output_filename, graph); + writer.WriteGraph(outputFilename, graph); + } else if (outputExt == ".hdag") { + file_writer::WriteComputationalDagHyperdagFormatDb(outputFilename, graph); } else { - std::cerr << "Error: Unsupported output file format: " << output_ext << std::endl; - print_usage(argv[0]); + std::cerr << "Error: Unsupported output file format: " << outputExt << std::endl; + PrintUsage(argv[0]); return 1; } - std::cout << "Successfully wrote graph to " << output_filename << std::endl; + std::cout << "Successfully wrote graph to " << outputFilename << std::endl; return 0; } diff --git a/apps/graph_generator/gen_Erdos-Renyi_graph.cpp b/apps/graph_generator/gen_Erdos-Renyi_graph.cpp index 7c816302..54773fb8 100644 --- a/apps/graph_generator/gen_Erdos-Renyi_graph.cpp +++ b/apps/graph_generator/gen_Erdos-Renyi_graph.cpp @@ -27,8 +27,8 @@ limitations under the License. using namespace osp; -using ComputationalDag = computational_dag_vector_impl_def_int_t; -using VertexType = vertex_idx_t; +using ComputationalDag = ComputationalDagVectorImplDefIntT; +using VertexType = VertexIdxT; int main(int argc, char *argv[]) { if (argc < 3) { @@ -37,49 +37,49 @@ int main(int argc, char *argv[]) { return 1; } - size_t num_vert = static_cast(std::stoul(argv[1])); + size_t numVert = static_cast(std::stoul(argv[1])); double chance = 2 * std::atof(argv[2]); - unsigned num_graphs = 1; + unsigned numGraphs = 1; if (argc > 3) { - num_graphs = static_cast(std::stoul(argv[3])); + numGraphs = static_cast(std::stoul(argv[3])); } // Initiating random values - double lower_bound = -2; - double upper_bound = 2; - std::uniform_real_distribution unif(lower_bound, upper_bound); + double lowerBound = -2; + double upperBound = 2; + std::uniform_real_distribution unif(lowerBound, upperBound); - std::uniform_real_distribution unif_log(-std::log(upper_bound), std::log(upper_bound)); + std::uniform_real_distribution unifLog(-std::log(upperBound), std::log(upperBound)); std::default_random_engine re; - for (size_t j = 0; j < num_graphs; j++) { + for (size_t j = 0; j < numGraphs; j++) { // Generating the graph ComputationalDag graph; - erdos_renyi_graph_gen(graph, num_vert, chance); + ErdosRenyiGraphGen(graph, numVert, chance); // Generating graph name - std::string graph_name = "ErdosRenyi_"; - std::string graph_size_name; - if (graph.num_vertices() < 1000) { - graph_size_name = std::to_string(graph.num_vertices()) + "_"; + std::string graphName = "ErdosRenyi_"; + std::string graphSizeName; + if (graph.NumVertices() < 1000) { + graphSizeName = std::to_string(graph.NumVertices()) + "_"; } else { - graph_size_name = std::to_string(graph.num_vertices() / 1000) + "k_"; + graphSizeName = std::to_string(graph.NumVertices() / 1000) + "k_"; } - graph_name += graph_size_name; + graphName += graphSizeName; - std::string graph_edge_size; - if (graph.num_edges() < 1000) { - graph_edge_size = std::to_string(graph.num_edges()) + "_"; - } else if (graph.num_edges() < 1000000) { - graph_edge_size = std::to_string(graph.num_edges() / 1000) + "k_"; + std::string graphEdgeSize; + if (graph.NumEdges() < 1000) { + graphEdgeSize = std::to_string(graph.NumEdges()) + "_"; + } else if (graph.NumEdges() < 1000000) { + graphEdgeSize = std::to_string(graph.NumEdges() / 1000) + "k_"; } else { - graph_edge_size = std::to_string(graph.num_edges() / 1000000) + "m_"; + graphEdgeSize = std::to_string(graph.NumEdges() / 1000000) + "m_"; } - graph_name += graph_edge_size; + graphName += graphEdgeSize; - graph_name += std::to_string(j); + graphName += std::to_string(j); - graph_name += ".mtx"; + graphName += ".mtx"; // Graph header std::string header = "%" @@ -93,20 +93,20 @@ int main(int argc, char *argv[]) { "%-------------------------------------------------------------------------------\n"; // Writing the graph to file - std::ofstream graph_write; - graph_write.open(graph_name); - graph_write << header; - graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " " - + std::to_string(graph.num_edges() + graph.num_vertices()) + "\n"; - for (VertexType i = 0; i < num_vert; i++) { - double val = (1 - 2 * randInt(2)) * std::exp(unif_log(re)); - graph_write << std::to_string(i + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + "\n"; - for (const auto &chld : graph.children(i)) { + std::ofstream graphWrite; + graphWrite.open(graphName); + graphWrite << header; + graphWrite << std::to_string(graph.NumVertices()) + " " + std::to_string(graph.NumVertices()) + " " + + std::to_string(graph.NumEdges() + graph.NumVertices()) + "\n"; + for (VertexType i = 0; i < numVert; i++) { + double val = (1 - 2 * RandInt(2)) * std::exp(unifLog(re)); + graphWrite << std::to_string(i + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + "\n"; + for (const auto &chld : graph.Children(i)) { val = unif(re); - graph_write << std::to_string(chld + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + "\n"; + graphWrite << std::to_string(chld + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + "\n"; } } - graph_write.close(); + graphWrite.close(); } return 0; diff --git a/apps/graph_generator/gen_near_diag_random_graph.cpp b/apps/graph_generator/gen_near_diag_random_graph.cpp index ede87c5c..5031615f 100644 --- a/apps/graph_generator/gen_near_diag_random_graph.cpp +++ b/apps/graph_generator/gen_near_diag_random_graph.cpp @@ -27,8 +27,8 @@ limitations under the License. using namespace osp; -using ComputationalDag = computational_dag_vector_impl_def_int_t; -using VertexType = vertex_idx_t; +using ComputationalDag = ComputationalDagVectorImplDefIntT; +using VertexType = VertexIdxT; int main(int argc, char *argv[]) { if (argc < 4) { @@ -37,55 +37,55 @@ int main(int argc, char *argv[]) { return 1; } - size_t num_vert = static_cast(std::stoul(argv[1])); + size_t numVert = static_cast(std::stoul(argv[1])); double prob = std::atof(argv[2]); double bandwidth = std::atof(argv[3]); - unsigned num_graphs = 1; + unsigned numGraphs = 1; if (argc > 4) { - num_graphs = static_cast(std::stoul(argv[3])); + numGraphs = static_cast(std::stoul(argv[3])); } // std::cout << "Vert: " << num_vert << " prob: " << prob << " bandwidth: " << bandwidth << " graphs: " << // num_graphs << std::endl; // Initiating random values - double lower_bound = -2; - double upper_bound = 2; - std::uniform_real_distribution unif(lower_bound, upper_bound); + double lowerBound = -2; + double upperBound = 2; + std::uniform_real_distribution unif(lowerBound, upperBound); - std::uniform_real_distribution unif_log(-std::log(upper_bound), std::log(upper_bound)); + std::uniform_real_distribution unifLog(-std::log(upperBound), std::log(upperBound)); std::default_random_engine re; - for (size_t i = 0; i < num_graphs; i++) { + for (size_t i = 0; i < numGraphs; i++) { // Generating the graph ComputationalDag graph; - near_diag_random_graph(graph, num_vert, bandwidth, prob); + NearDiagRandomGraph(graph, numVert, bandwidth, prob); // Generating graph name - std::string graph_name = "RandomBand_"; - graph_name += "p" + std::to_string(static_cast(100 * prob)) + "_"; - graph_name += "b" + std::to_string(static_cast(bandwidth)) + "_"; - std::string graph_size_name; - if (graph.num_vertices() < 1000) { - graph_size_name = std::to_string(graph.num_vertices()) + "_"; + std::string graphName = "RandomBand_"; + graphName += "p" + std::to_string(static_cast(100 * prob)) + "_"; + graphName += "b" + std::to_string(static_cast(bandwidth)) + "_"; + std::string graphSizeName; + if (graph.NumVertices() < 1000) { + graphSizeName = std::to_string(graph.NumVertices()) + "_"; } else { - graph_size_name = std::to_string(graph.num_vertices() / 1000) + "k_"; + graphSizeName = std::to_string(graph.NumVertices() / 1000) + "k_"; } - graph_name += graph_size_name; + graphName += graphSizeName; - std::string graph_edge_size; - if (graph.num_edges() < 1000) { - graph_edge_size = std::to_string(graph.num_edges()) + "_"; - } else if (graph.num_edges() < 1000000) { - graph_edge_size = std::to_string(graph.num_edges() / 1000) + "k_"; + std::string graphEdgeSize; + if (graph.NumEdges() < 1000) { + graphEdgeSize = std::to_string(graph.NumEdges()) + "_"; + } else if (graph.NumEdges() < 1000000) { + graphEdgeSize = std::to_string(graph.NumEdges() / 1000) + "k_"; } else { - graph_edge_size = std::to_string(graph.num_edges() / 1000000) + "m_"; + graphEdgeSize = std::to_string(graph.NumEdges() / 1000000) + "m_"; } - graph_name += graph_edge_size; + graphName += graphEdgeSize; - graph_name += std::to_string(i); + graphName += std::to_string(i); - graph_name += ".mtx"; + graphName += ".mtx"; // Graph header std::string header = "%" @@ -99,20 +99,20 @@ int main(int argc, char *argv[]) { "%-------------------------------------------------------------------------------\n"; // Writing the graph to file - std::ofstream graph_write; - graph_write.open(graph_name); - graph_write << header; - graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " " - + std::to_string(graph.num_edges() + graph.num_vertices()) + "\n"; - for (VertexType j = 0; j < num_vert; j++) { - double val = (1 - 2 * randInt(2)) * std::exp(unif_log(re)); - graph_write << std::to_string(j + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + "\n"; - for (const auto &chld : graph.children(j)) { + std::ofstream graphWrite; + graphWrite.open(graphName); + graphWrite << header; + graphWrite << std::to_string(graph.NumVertices()) + " " + std::to_string(graph.NumVertices()) + " " + + std::to_string(graph.NumEdges() + graph.NumVertices()) + "\n"; + for (VertexType j = 0; j < numVert; j++) { + double val = (1 - 2 * RandInt(2)) * std::exp(unifLog(re)); + graphWrite << std::to_string(j + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + "\n"; + for (const auto &chld : graph.Children(j)) { val = unif(re); - graph_write << std::to_string(chld + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + "\n"; + graphWrite << std::to_string(chld + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + "\n"; } } - graph_write.close(); + graphWrite.close(); } return 0; diff --git a/apps/graph_generator/post_incomplete_cholesky.cpp b/apps/graph_generator/post_incomplete_cholesky.cpp index 757b569d..9ef8c0f6 100644 --- a/apps/graph_generator/post_incomplete_cholesky.cpp +++ b/apps/graph_generator/post_incomplete_cholesky.cpp @@ -31,28 +31,28 @@ int main(int argc, char *argv[]) { return 1; } - std::string filename_graph = argv[1]; + std::string filenameGraph = argv[1]; - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); - std::cout << "Graph: " << name_graph << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; - using SM_csc = Eigen::SparseMatrix; // Compressed Sparse Column format - using SM_csr = Eigen::SparseMatrix; // Compressed Sparse Row format + using SmCsc = Eigen::SparseMatrix; // Compressed Sparse Column format + using SmCsr = Eigen::SparseMatrix; // Compressed Sparse Row format - SM_csc L_csc; // Initialize a sparse matrix in CSC format + SmCsc lCsc; // Initialize a sparse matrix in CSC format - Eigen::loadMarket(L_csc, filename_graph); + Eigen::loadMarket(lCsc, filenameGraph); - SM_csr L_csr = L_csc; // Reformat the sparse matrix from CSC to CSR format + SmCsr lCsr = lCsc; // Reformat the sparse matrix from CSC to CSR format - Eigen::IncompleteCholesky> ichol(L_csc); + Eigen::IncompleteCholesky> ichol(lCsc); - SM_csc LChol_csc = ichol.matrixL(); + SmCsc lCholCsc = ichol.matrixL(); Eigen::saveMarket( - LChol_csc, filename_graph.substr(0, filename_graph.find_last_of(".")) + "_postChol.mtx", Eigen::UpLoType::Symmetric); + lCholCsc, filenameGraph.substr(0, filenameGraph.find_last_of(".")) + "_postChol.mtx", Eigen::UpLoType::Symmetric); return 0; } diff --git a/apps/ilp_bsp_scheduler.cpp b/apps/ilp_bsp_scheduler.cpp index 90fe30f7..9abde065 100644 --- a/apps/ilp_bsp_scheduler.cpp +++ b/apps/ilp_bsp_scheduler.cpp @@ -34,7 +34,7 @@ limitations under the License. using namespace osp; -using ComputationalDag = computational_dag_edge_idx_vector_impl_def_int_t; +using ComputationalDag = ComputationalDagEdgeIdxVectorImplDefIntT; int main(int argc, char *argv[]) { if (argc < 4) { @@ -42,18 +42,18 @@ int main(int argc, char *argv[]) { return 1; } - std::string filename_graph = argv[1]; - std::string name_graph = filename_graph.substr(0, filename_graph.rfind(".")); + std::string filenameGraph = argv[1]; + std::string nameGraph = filenameGraph.substr(0, filenameGraph.rfind(".")); - std::cout << name_graph << std::endl; + std::cout << nameGraph << std::endl; - std::string filename_machine = argv[2]; - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + std::string filenameMachine = argv[2]; + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - int step_int = std::stoi(argv[3]); - if (step_int < 1) { - std::cerr << "Argument max_number_step must be a positive integer: " << step_int << std::endl; + int stepInt = std::stoi(argv[3]); + if (stepInt < 1) { + std::cerr << "Argument max_number_step must be a positive integer: " << stepInt << std::endl; return 1; } @@ -66,45 +66,45 @@ int main(int argc, char *argv[]) { return 1; } - unsigned steps = static_cast(step_int); + unsigned steps = static_cast(stepInt); BspInstance instance; - ComputationalDag &graph = instance.getComputationalDag(); + ComputationalDag &graph = instance.GetComputationalDag(); - bool status_graph = file_reader::readGraph(filename_graph, graph); - bool status_arch = file_reader::readBspArchitecture(filename_machine, instance.getArchitecture()); - // instance.setDiagonalCompatibilityMatrix(graph.num_vertex_types()); - // instance.getArchitecture().setProcessorsWithTypes({0,0,1,1,1,1}); + bool statusGraph = file_reader::ReadGraph(filenameGraph, graph); + bool statusArch = file_reader::ReadBspArchitecture(filenameMachine, instance.GetArchitecture()); + // instance.SetDiagonalCompatibilityMatrix(graph.NumVertexTypes()); + // instance.GetArchitecture().SetProcessorsWithTypes({0,0,1,1,1,1}); - if (!status_graph || !status_arch) { + if (!statusGraph || !statusArch) { std::cout << "Reading files failed." << std::endl; return 1; } - // for (const auto &vertex : graph.vertices()) { + // for (const auto &vertex : graph.Vertices()) { - // graph.set_vertex_work_weight(vertex, graph.vertex_work_weight(vertex) * 80); + // graph.SetVertexWorkWeight(vertex, graph.VertexWorkWeight(vertex) * 80); // } CoptFullScheduler scheduler; - scheduler.setMaxNumberOfSupersteps(steps); + scheduler.SetMaxNumberOfSupersteps(steps); if (recomp) { BspScheduleRecomp schedule(instance); - auto status_schedule = scheduler.computeScheduleRecomp(schedule); + auto statusSchedule = scheduler.ComputeScheduleRecomp(schedule); - if (status_schedule == RETURN_STATUS::OSP_SUCCESS || status_schedule == RETURN_STATUS::BEST_FOUND) { - DotFileWriter dot_writer; - dot_writer.write_schedule_recomp(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" - + scheduler.getScheduleName() + "_recomp_schedule.dot", - schedule); + if (statusSchedule == ReturnStatus::OSP_SUCCESS || statusSchedule == ReturnStatus::BEST_FOUND) { + DotFileWriter dotWriter; + dotWriter.WriteScheduleRecomp(nameGraph + "_" + nameMachine + "_maxS_" + std::to_string(steps) + "_" + + scheduler.GetScheduleName() + "_recomp_schedule.dot", + schedule); - dot_writer.write_schedule_recomp_duplicate(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" - + scheduler.getScheduleName() + "_duplicate_recomp_schedule.dot", - schedule); + dotWriter.WriteScheduleRecompDuplicate(nameGraph + "_" + nameMachine + "_maxS_" + std::to_string(steps) + "_" + + scheduler.GetScheduleName() + "_duplicate_recomp_schedule.dot", + schedule); - std::cout << "Recomp Schedule computed with costs: " << schedule.computeCosts() << std::endl; + std::cout << "Recomp Schedule computed with costs: " << schedule.ComputeCosts() << std::endl; } else { std::cout << "Computing schedule failed." << std::endl; @@ -114,15 +114,15 @@ int main(int argc, char *argv[]) { } else { BspSchedule schedule(instance); - auto status_schedule = scheduler.computeSchedule(schedule); + auto statusSchedule = scheduler.ComputeSchedule(schedule); - if (status_schedule == RETURN_STATUS::OSP_SUCCESS || status_schedule == RETURN_STATUS::BEST_FOUND) { - DotFileWriter dot_writer; - dot_writer.write_schedule(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" - + scheduler.getScheduleName() + "_schedule.dot", - schedule); + if (statusSchedule == ReturnStatus::OSP_SUCCESS || statusSchedule == ReturnStatus::BEST_FOUND) { + DotFileWriter dotWriter; + dotWriter.WriteSchedule(nameGraph + "_" + nameMachine + "_maxS_" + std::to_string(steps) + "_" + + scheduler.GetScheduleName() + "_schedule.dot", + schedule); - std::cout << "Schedule computed with costs: " << schedule.computeCosts() << std::endl; + std::cout << "Schedule computed with costs: " << schedule.ComputeCosts() << std::endl; } else { std::cout << "Computing schedule failed." << std::endl; diff --git a/apps/ilp_hypergraph_partitioner.cpp b/apps/ilp_hypergraph_partitioner.cpp index 78b02a47..4aa2faed 100644 --- a/apps/ilp_hypergraph_partitioner.cpp +++ b/apps/ilp_hypergraph_partitioner.cpp @@ -30,6 +30,7 @@ limitations under the License. #include "osp/auxiliary/misc.hpp" #include "osp/graph_algorithms/directed_graph_path_util.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "osp/partitioning/model/hypergraph.hpp" #include "osp/partitioning/model/hypergraph_utility.hpp" #include "osp/partitioning/partitioners/generic_FM.hpp" #include "osp/partitioning/partitioners/partitioning_ILP.hpp" @@ -37,8 +38,7 @@ limitations under the License. using namespace osp; -using graph = computational_dag_vector_impl_def_int_t; -using hypergraph = Hypergraph_def_t; +using Graph = ComputationalDagVectorImplDefIntT; int main(int argc, char *argv[]) { if (argc < 4) { @@ -46,19 +46,19 @@ int main(int argc, char *argv[]) { return 1; } - std::string filename_hgraph = argv[1]; - std::string name_hgraph = filename_hgraph.substr(0, filename_hgraph.rfind(".")); - std::string file_ending = filename_hgraph.substr(filename_hgraph.rfind(".") + 1); - if (!file_reader::isPathSafe(filename_hgraph)) { + std::string filenameHgraph = argv[1]; + std::string nameHgraph = filenameHgraph.substr(0, filenameHgraph.rfind(".")); + std::string fileEnding = filenameHgraph.substr(filenameHgraph.rfind(".") + 1); + if (!file_reader::IsPathSafe(filenameHgraph)) { std::cerr << "Error: Unsafe file path (possible traversal or invalid type).\n"; return 1; } - std::cout << name_hgraph << std::endl; + std::cout << nameHgraph << std::endl; - int nr_parts = std::stoi(argv[2]); - if (nr_parts < 2 || nr_parts > 32) { - std::cerr << "Argument nr_parts must be an integer between 2 and 32: " << nr_parts << std::endl; + int nrParts = std::stoi(argv[2]); + if (nrParts < 2 || nrParts > 32) { + std::cerr << "Argument nr_parts must be an integer between 2 and 32: " << nrParts << std::endl; return 1; } @@ -79,65 +79,65 @@ int main(int argc, char *argv[]) { return 1; } - PartitioningProblem instance; + PartitioningProblem instance; - bool file_status = true; - if (file_ending == "hdag") { - graph dag; - file_status = file_reader::readComputationalDagHyperdagFormatDB(filename_hgraph, dag); - if (file_status) { - instance.getHypergraph() = convert_from_cdag_as_hyperdag(dag); + bool fileStatus = true; + if (fileEnding == "hdag") { + Graph dag; + fileStatus = file_reader::ReadComputationalDagHyperdagFormatDB(filenameHgraph, dag); + if (fileStatus) { + instance.GetHypergraph() = ConvertFromCdagAsHyperdag(dag); } - } else if (file_ending == "mtx") { - file_status = file_reader::readHypergraphMartixMarketFormat(filename_hgraph, instance.getHypergraph()); + } else if (fileEnding == "mtx") { + fileStatus = file_reader::ReadHypergraphMartixMarketFormat(filenameHgraph, instance.GetHypergraph()); } else { std::cout << "Unknown file extension." << std::endl; return 1; } - if (!file_status) { + if (!fileStatus) { std::cout << "Reading input file failed." << std::endl; return 1; } - instance.setNumberOfPartitions(static_cast(nr_parts)); - instance.setMaxWorkWeightViaImbalanceFactor(imbalance); + instance.SetNumberOfPartitions(static_cast(nrParts)); + instance.SetMaxWorkWeightViaImbalanceFactor(imbalance); - Partitioning initial_partition(instance); - GenericFM fm; - for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) { - initial_partition.setAssignedPartition(node, static_cast(node % static_cast(nr_parts))); + Partitioning initialPartition(instance); + GenericFM fm; + for (size_t node = 0; node < instance.GetHypergraph().NumVertices(); ++node) { + initialPartition.SetAssignedPartition(node, static_cast(node % static_cast(nrParts))); } - if (nr_parts == 2) { - fm.ImprovePartitioning(initial_partition); + if (nrParts == 2) { + fm.ImprovePartitioning(initialPartition); } - if (nr_parts == 4 || nr_parts == 8 || nr_parts == 16 || nr_parts == 32) { - fm.RecursiveFM(initial_partition); + if (nrParts == 4 || nrParts == 8 || nrParts == 16 || nrParts == 32) { + fm.RecursiveFM(initialPartition); } if (replicate > 0) { - PartitioningWithReplication partition(instance); - HypergraphPartitioningILPWithReplication partitioner; + PartitioningWithReplication partition(instance); + HypergraphPartitioningILPWithReplication partitioner; - for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) { - partition.setAssignedPartitions(node, {initial_partition.assignedPartition(node)}); + for (size_t node = 0; node < instance.GetHypergraph().NumVertices(); ++node) { + partition.SetAssignedPartitions(node, {initialPartition.AssignedPartition(node)}); } - if (partition.satisfiesBalanceConstraint()) { - partitioner.setUseInitialSolution(true); + if (partition.SatisfiesBalanceConstraint()) { + partitioner.SetUseInitialSolution(true); } - partitioner.setTimeLimitSeconds(600); + partitioner.SetTimeLimitSeconds(600); if (replicate == 2) { - partitioner.setReplicationModel( - HypergraphPartitioningILPWithReplication::REPLICATION_MODEL_IN_ILP::GENERAL); + partitioner.SetReplicationModel( + HypergraphPartitioningILPWithReplication::ReplicationModelInIlp::GENERAL); } - auto solve_status = partitioner.computePartitioning(partition); + auto solveStatus = partitioner.ComputePartitioning(partition); - if (solve_status == RETURN_STATUS::OSP_SUCCESS || solve_status == RETURN_STATUS::BEST_FOUND) { - file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) + "_ILP_rep" - + std::to_string(replicate) + ".txt", - partition); - std::cout << "Partitioning (with replicaiton) computed with costs: " << partition.computeConnectivityCost() + if (solveStatus == ReturnStatus::OSP_SUCCESS || solveStatus == ReturnStatus::BEST_FOUND) { + file_writer::WriteTxt(nameHgraph + "_" + std::to_string(nrParts) + "_" + std::to_string(imbalance) + "_ILP_rep" + + std::to_string(replicate) + ".txt", + partition); + std::cout << "Partitioning (with replicaiton) computed with costs: " << partition.ComputeConnectivityCost() << std::endl; } else { std::cout << "Computing partition failed." << std::endl; @@ -145,25 +145,25 @@ int main(int argc, char *argv[]) { } } else { - Partitioning partition(instance); - HypergraphPartitioningILP partitioner; + Partitioning partition(instance); + HypergraphPartitioningILP partitioner; - for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) { - partition.setAssignedPartition(node, initial_partition.assignedPartition(node)); + for (size_t node = 0; node < instance.GetHypergraph().NumVertices(); ++node) { + partition.SetAssignedPartition(node, initialPartition.AssignedPartition(node)); } - if (partition.satisfiesBalanceConstraint()) { - partitioner.setUseInitialSolution(true); + if (partition.SatisfiesBalanceConstraint()) { + partitioner.SetUseInitialSolution(true); } - partitioner.setTimeLimitSeconds(600); + partitioner.SetTimeLimitSeconds(600); - auto solve_status = partitioner.computePartitioning(partition); + auto solveStatus = partitioner.ComputePartitioning(partition); - if (solve_status == RETURN_STATUS::OSP_SUCCESS || solve_status == RETURN_STATUS::BEST_FOUND) { - file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) + "_ILP_rep" - + std::to_string(replicate) + ".txt", - partition); - std::cout << "Partitioning computed with costs: " << partition.computeConnectivityCost() << std::endl; + if (solveStatus == ReturnStatus::OSP_SUCCESS || solveStatus == ReturnStatus::BEST_FOUND) { + file_writer::WriteTxt(nameHgraph + "_" + std::to_string(nrParts) + "_" + std::to_string(imbalance) + "_ILP_rep" + + std::to_string(replicate) + ".txt", + partition); + std::cout << "Partitioning computed with costs: " << partition.ComputeConnectivityCost() << std::endl; } else { std::cout << "Computing partition failed." << std::endl; return 1; diff --git a/apps/osp.cpp b/apps/osp.cpp index 7c66224b..6f651163 100644 --- a/apps/osp.cpp +++ b/apps/osp.cpp @@ -39,87 +39,87 @@ limitations under the License. namespace pt = boost::property_tree; using namespace osp; -using graph_t = computational_dag_edge_idx_vector_impl_def_int_t; +using GraphT = ComputationalDagEdgeIdxVectorImplDefIntT; -std::filesystem::path getExecutablePath() { return std::filesystem::canonical("/proc/self/exe"); } +std::filesystem::path GetExecutablePath() { return std::filesystem::canonical("/proc/self/exe"); } // invoked upon program call int main(int argc, char *argv[]) { - ConfigParser parser(getExecutablePath().remove_filename().string() += "osp_config.json"); + ConfigParser parser(GetExecutablePath().remove_filename().string() += "osp_config.json"); try { - parser.parse_args(argc, argv); + parser.ParseArgs(argc, argv); } catch (std::invalid_argument &e) { std::cerr << e.what() << std::endl; return 1; } - for (auto &instance : parser.instances) { - BspInstance bsp_instance; + for (auto &instance : parser.instances_) { + BspInstance bspInstance; - std::string filename_graph = instance.second.get_child("graphFile").get_value(); - std::string name_graph - = filename_graph.substr(filename_graph.rfind("/") + 1, filename_graph.rfind(".") - filename_graph.rfind("/") - 1); + std::string filenameGraph = instance.second.get_child("graphFile").get_value(); + std::string nameGraph + = filenameGraph.substr(filenameGraph.rfind("/") + 1, filenameGraph.rfind(".") - filenameGraph.rfind("/") - 1); - std::string filename_machine = instance.second.get_child("machineParamsFile").get_value(); + std::string filenameMachine = instance.second.get_child("machineParamsFile").get_value(); - std::string name_machine = filename_machine.substr(filename_machine.rfind("/") + 1, - filename_machine.rfind(".") - filename_machine.rfind("/") - 1); + std::string nameMachine + = filenameMachine.substr(filenameMachine.rfind("/") + 1, filenameMachine.rfind(".") - filenameMachine.rfind("/") - 1); - bool status_architecture = file_reader::readBspArchitecture(filename_machine, bsp_instance.getArchitecture()); + bool statusArchitecture = file_reader::ReadBspArchitecture(filenameMachine, bspInstance.GetArchitecture()); - if (!status_architecture) { - std::cerr << "Reading architecture files " + filename_machine << " failed." << std::endl; + if (!statusArchitecture) { + std::cerr << "Reading architecture files " + filenameMachine << " failed." << std::endl; continue; } - bool status_graph = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag()); - if (!status_graph) { - std::cerr << "Reading graph files " + filename_graph << " failed." << std::endl; + bool statusGraph = file_reader::ReadGraph(filenameGraph, bspInstance.GetComputationalDag()); + if (!statusGraph) { + std::cerr << "Reading graph files " + filenameGraph << " failed." << std::endl; continue; } std::cout << "Warning: assuming all node types can be scheduled on all processor types!\n"; - bsp_instance.setAllOnesCompatibilityMatrix(); + bspInstance.SetAllOnesCompatibilityMatrix(); - std::vector schedulers_name(parser.scheduler.size(), ""); - std::vector schedulers_failed(parser.scheduler.size(), false); - std::vector> schedulers_costs(parser.scheduler.size(), 0); - std::vector> schedulers_work_costs(parser.scheduler.size(), 0); - std::vector schedulers_supersteps(parser.scheduler.size(), 0); - std::vector schedulers_compute_time(parser.scheduler.size(), 0); + std::vector schedulersName(parser.scheduler_.size(), ""); + std::vector schedulersFailed(parser.scheduler_.size(), false); + std::vector> schedulersCosts(parser.scheduler_.size(), 0); + std::vector> schedulersWorkCosts(parser.scheduler_.size(), 0); + std::vector schedulersSupersteps(parser.scheduler_.size(), 0); + std::vector schedulersComputeTime(parser.scheduler_.size(), 0); - size_t algorithm_counter = 0; - for (auto &algorithm : parser.scheduler) { - schedulers_name[algorithm_counter] = algorithm.second.get_child("name").get_value(); + size_t algorithmCounter = 0; + for (auto &algorithm : parser.scheduler_) { + schedulersName[algorithmCounter] = algorithm.second.get_child("name").get_value(); - const auto start_time = std::chrono::high_resolution_clock::now(); + const auto startTime = std::chrono::high_resolution_clock::now(); - RETURN_STATUS return_status; - BspSchedule schedule(bsp_instance); + ReturnStatus returnStatus; + BspSchedule schedule(bspInstance); try { - return_status = run_bsp_scheduler(parser, algorithm.second, schedule); + returnStatus = RunBspScheduler(parser, algorithm.second, schedule); } catch (...) { - schedulers_failed[algorithm_counter] = true; + schedulersFailed[algorithmCounter] = true; std::cerr << "Error during execution of Scheduler " + algorithm.second.get_child("name").get_value() + "." << std::endl; continue; } - const auto finish_time = std::chrono::high_resolution_clock::now(); + const auto finishTime = std::chrono::high_resolution_clock::now(); - schedulers_compute_time[algorithm_counter] - = std::chrono::duration_cast(finish_time - start_time).count(); + schedulersComputeTime[algorithmCounter] + = std::chrono::duration_cast(finishTime - startTime).count(); - if (return_status != RETURN_STATUS::OSP_SUCCESS && return_status != RETURN_STATUS::BEST_FOUND) { - schedulers_failed[algorithm_counter] = true; - if (return_status == RETURN_STATUS::ERROR) { + if (returnStatus != ReturnStatus::OSP_SUCCESS && returnStatus != ReturnStatus::BEST_FOUND) { + schedulersFailed[algorithmCounter] = true; + if (returnStatus == ReturnStatus::ERROR) { std::cerr << "Error while computing schedule " + algorithm.second.get_child("name").get_value() + "." << std::endl; - } else if (return_status == RETURN_STATUS::TIMEOUT) { + } else if (returnStatus == ReturnStatus::TIMEOUT) { std::cerr << "Timeout while computing schedule " + algorithm.second.get_child("name").get_value() + "." << std::endl; @@ -129,89 +129,89 @@ int main(int argc, char *argv[]) { << std::endl; } } else { - schedulers_costs[algorithm_counter] = BspScheduleCS(schedule).computeCosts(); - schedulers_work_costs[algorithm_counter] = schedule.computeWorkCosts(); - schedulers_supersteps[algorithm_counter] = schedule.numberOfSupersteps(); + schedulersCosts[algorithmCounter] = BspScheduleCS(schedule).ComputeCosts(); + schedulersWorkCosts[algorithmCounter] = schedule.ComputeWorkCosts(); + schedulersSupersteps[algorithmCounter] = schedule.NumberOfSupersteps(); - if (parser.global_params.get_child("outputSchedule").get_value()) { + if (parser.globalParams_.get_child("outputSchedule").get_value()) { try { - file_writer::write_txt(name_graph + "_" + name_machine + "_" - + algorithm.second.get_child("name").get_value() + "_schedule.txt", - schedule); + file_writer::WriteTxt(nameGraph + "_" + nameMachine + "_" + + algorithm.second.get_child("name").get_value() + "_schedule.txt", + schedule); } catch (std::exception &e) { - std::cerr << "Writing schedule file for " + name_graph + ", " + name_machine + ", " - + schedulers_name[algorithm_counter] + " has failed." + std::cerr << "Writing schedule file for " + nameGraph + ", " + nameMachine + ", " + + schedulersName[algorithmCounter] + " has failed." << std::endl; std::cerr << e.what() << std::endl; } } - if (parser.global_params.get_child("outputSankeySchedule").get_value()) { + if (parser.globalParams_.get_child("outputSankeySchedule").get_value()) { try { - file_writer::write_sankey(name_graph + "_" + name_machine + "_" - + algorithm.second.get_child("name").get_value() - + "_sankey.sankey", - BspScheduleCS(schedule)); + file_writer::WriteSankey(nameGraph + "_" + nameMachine + "_" + + algorithm.second.get_child("name").get_value() + + "_sankey.sankey", + BspScheduleCS(schedule)); } catch (std::exception &e) { - std::cerr << "Writing sankey file for " + name_graph + ", " + name_machine + ", " - + schedulers_name[algorithm_counter] + " has failed." + std::cerr << "Writing sankey file for " + nameGraph + ", " + nameMachine + ", " + + schedulersName[algorithmCounter] + " has failed." << std::endl; std::cerr << e.what() << std::endl; } } - if (parser.global_params.get_child("outputDotSchedule").get_value()) { + if (parser.globalParams_.get_child("outputDotSchedule").get_value()) { try { - DotFileWriter sched_writer; - sched_writer.write_schedule(name_graph + "_" + name_machine + "_" - + algorithm.second.get_child("name").get_value() - + "_schedule.dot", - schedule); + DotFileWriter schedWriter; + schedWriter.WriteSchedule(nameGraph + "_" + nameMachine + "_" + + algorithm.second.get_child("name").get_value() + + "_schedule.dot", + schedule); } catch (std::exception &e) { - std::cerr << "Writing dot file for " + name_graph + ", " + name_machine + ", " - + schedulers_name[algorithm_counter] + " has failed." + std::cerr << "Writing dot file for " + nameGraph + ", " + nameMachine + ", " + + schedulersName[algorithmCounter] + " has failed." << std::endl; std::cerr << e.what() << std::endl; } } } - algorithm_counter++; + algorithmCounter++; } int tw = 1, ww = 1, cw = 1, nsw = 1, ct = 1; - for (size_t i = 0; i < parser.scheduler.size(); i++) { - if (schedulers_failed[i]) { + for (size_t i = 0; i < parser.scheduler_.size(); i++) { + if (schedulersFailed[i]) { continue; } - tw = std::max(tw, 1 + int(std::log10(schedulers_costs[i]))); - ww = std::max(ww, 1 + int(std::log10(schedulers_work_costs[i]))); - cw = std::max(cw, 1 + int(std::log10(schedulers_costs[i] - schedulers_work_costs[i]))); - nsw = std::max(nsw, 1 + int(std::log10(schedulers_supersteps[i]))); - ct = std::max(ct, 1 + int(std::log10(schedulers_compute_time[i]))); + tw = std::max(tw, 1 + int(std::log10(schedulersCosts[i]))); + ww = std::max(ww, 1 + int(std::log10(schedulersWorkCosts[i]))); + cw = std::max(cw, 1 + int(std::log10(schedulersCosts[i] - schedulersWorkCosts[i]))); + nsw = std::max(nsw, 1 + int(std::log10(schedulersSupersteps[i]))); + ct = std::max(ct, 1 + int(std::log10(schedulersComputeTime[i]))); } - std::vector ordering = sorting_arrangement(schedulers_costs); + std::vector ordering = SortingArrangement(schedulersCosts); - std::cout << std::endl << name_graph << " - " << name_machine << std::endl; - std::cout << "Number of Vertices: " + std::to_string(bsp_instance.getComputationalDag().num_vertices()) - + " Number of Edges: " + std::to_string(bsp_instance.getComputationalDag().num_edges()) + std::cout << std::endl << nameGraph << " - " << nameMachine << std::endl; + std::cout << "Number of Vertices: " + std::to_string(bspInstance.GetComputationalDag().NumVertices()) + + " Number of Edges: " + std::to_string(bspInstance.GetComputationalDag().NumEdges()) << std::endl; - for (size_t j = 0; j < parser.scheduler.size(); j++) { + for (size_t j = 0; j < parser.scheduler_.size(); j++) { size_t i = j; i = ordering[j]; - if (schedulers_failed[i]) { - std::cout << "scheduler " << schedulers_name[i] << " failed." << std::endl; + if (schedulersFailed[i]) { + std::cout << "scheduler " << schedulersName[i] << " failed." << std::endl; } else { - std::cout << "total costs: " << std::right << std::setw(tw) << schedulers_costs[i] - << " work costs: " << std::right << std::setw(ww) << schedulers_work_costs[i] - << " comm costs: " << std::right << std::setw(cw) << schedulers_costs[i] - schedulers_work_costs[i] - << " number of supersteps: " << std::right << std::setw(nsw) << schedulers_supersteps[i] - << " compute time: " << std::right << std::setw(ct) << schedulers_compute_time[i] << "ms" - << " scheduler: " << schedulers_name[i] << std::endl; + std::cout << "total costs: " << std::right << std::setw(tw) << schedulersCosts[i] + << " work costs: " << std::right << std::setw(ww) << schedulersWorkCosts[i] + << " comm costs: " << std::right << std::setw(cw) << schedulersCosts[i] - schedulersWorkCosts[i] + << " number of supersteps: " << std::right << std::setw(nsw) << schedulersSupersteps[i] + << " compute time: " << std::right << std::setw(ct) << schedulersComputeTime[i] << "ms" + << " scheduler: " << schedulersName[i] << std::endl; } } } diff --git a/apps/osp_turnus.cpp b/apps/osp_turnus.cpp index ea5f114d..00fb434d 100644 --- a/apps/osp_turnus.cpp +++ b/apps/osp_turnus.cpp @@ -29,8 +29,8 @@ limitations under the License. using namespace osp; -using graph_t = computational_dag_edge_idx_vector_impl_def_int_t; -using mem_constr = persistent_transient_memory_constraint; +using GraphT = ComputationalDagEdgeIdxVectorImplDefIntT; +using MemConstr = PersistentTransientMemoryConstraint; // invoked upon program call int main(int argc, char *argv[]) { @@ -40,70 +40,69 @@ int main(int argc, char *argv[]) { return 1; } - BspInstance bsp_instance; + BspInstance bspInstance; - bsp_instance.getArchitecture().setNumberOfProcessors(static_cast(std::stoul(argv[2]))); - bsp_instance.getArchitecture().setMemoryBound(std::atoi(argv[3])); - bsp_instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT); + bspInstance.GetArchitecture().SetNumberOfProcessors(static_cast(std::stoul(argv[2]))); + bspInstance.GetArchitecture().SetMemoryBound(std::atoi(argv[3])); + bspInstance.GetArchitecture().SetMemoryConstraintType(MemoryConstraintType::PERSISTENT_AND_TRANSIENT); - std::string algorithm_name = argv[4]; + std::string algorithmName = argv[4]; - std::string filename_graph = argv[1]; + std::string filenameGraph = argv[1]; - bool status_graph = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag()); + bool statusGraph = file_reader::ReadGraph(filenameGraph, bspInstance.GetComputationalDag()); - if (!status_graph) { - std::cout << "Error while reading the graph from file: " << filename_graph << std::endl; + if (!statusGraph) { + std::cout << "Error while reading the graph from file: " << filenameGraph << std::endl; return 1; } - if (bsp_instance.getComputationalDag().num_vertex_types() > 1) { + if (bspInstance.GetComputationalDag().NumVertexTypes() > 1) { std::cout << "The graph has more than one vertex type, which is not supported by this scheduler." << std::endl; return 1; } - boost::algorithm::to_lower(algorithm_name); // modifies str + boost::algorithm::to_lower(algorithmName); // modifies str - BspSchedule bsp_schedule(bsp_instance); - Scheduler *scheduler = nullptr; + BspSchedule bspSchedule(bspInstance); + Scheduler *scheduler = nullptr; - if (algorithm_name == "bsp") { - float max_percent_idle_processors = 0.2f; - bool increase_parallelism_in_new_superstep = true; + if (algorithmName == "bsp") { + float maxPercentIdleProcessors = 0.2f; + bool increaseParallelismInNewSuperstep = true; - scheduler = new GreedyBspScheduler(max_percent_idle_processors, increase_parallelism_in_new_superstep); + scheduler = new GreedyBspScheduler(maxPercentIdleProcessors, increaseParallelismInNewSuperstep); - } else if (algorithm_name == "etf") { - scheduler = new EtfScheduler(BL_EST); + } else if (algorithmName == "etf") { + scheduler = new EtfScheduler(BL_EST); - } else if (algorithm_name == "variance") { - const double max_percent_idle_processors = 0.0; - const bool increase_parallelism_in_new_superstep = true; - const double variance_power = 6.0; - const float max_priority_difference_percent = 0.34f; - const double heavy_is_x_times_median = 3.0; - const double min_percent_components_retained = 0.25; - const float bound_component_weight_percent = 4.0f; + } else if (algorithmName == "variance") { + const double maxPercentIdleProcessors = 0.0; + const bool increaseParallelismInNewSuperstep = true; + const double variancePower = 6.0; + const float maxPriorityDifferencePercent = 0.34f; + const double heavyIsXTimesMedian = 3.0; + const double minPercentComponentsRetained = 0.25; + const float boundComponentWeightPercent = 4.0f; const float slack = 0.0f; - scheduler = new LightEdgeVariancePartitioner( - max_percent_idle_processors, - variance_power, - heavy_is_x_times_median, - min_percent_components_retained, - bound_component_weight_percent, - increase_parallelism_in_new_superstep, - max_priority_difference_percent, - slack); + scheduler = new LightEdgeVariancePartitioner(maxPercentIdleProcessors, + variancePower, + heavyIsXTimesMedian, + minPercentComponentsRetained, + boundComponentWeightPercent, + increaseParallelismInNewSuperstep, + maxPriorityDifferencePercent, + slack); } else { - std::cout << "Unknown algorithm: " << algorithm_name << std::endl; + std::cout << "Unknown algorithm: " << algorithmName << std::endl; return 1; } - auto scheduler_status = scheduler->computeSchedule(bsp_schedule); + auto schedulerStatus = scheduler->ComputeSchedule(bspSchedule); - if (scheduler_status == RETURN_STATUS::ERROR) { + if (schedulerStatus == ReturnStatus::ERROR) { std::cout << "Error while scheduling!" << std::endl; delete scheduler; return 1; @@ -111,7 +110,7 @@ int main(int argc, char *argv[]) { delete scheduler; - file_writer::write_txt(filename_graph + "_" + algorithm_name + "_schedule.shed", bsp_schedule); + file_writer::WriteTxt(filenameGraph + "_" + algorithmName + "_schedule.shed", bspSchedule); std::cout << "OSP Success" << std::endl; return 0; diff --git a/apps/sptrsv_test_suite.cpp b/apps/sptrsv_test_suite.cpp index 47643a9b..222a14bb 100644 --- a/apps/sptrsv_test_suite.cpp +++ b/apps/sptrsv_test_suite.cpp @@ -25,11 +25,11 @@ limitations under the License. #ifdef EIGEN_FOUND -using graph_t = osp::sparse_matrix_graph_int32_t; +using GraphT = osp::SparseMatrixGraphInt32T; int main(int argc, char *argv[]) { - osp::BspScheduleTestSuiteRunner runner; - return runner.run(argc, argv); + osp::BspScheduleTestSuiteRunner runner; + return runner.Run(argc, argv); return 0; } diff --git a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp index 01d07714..7b4f249c 100644 --- a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp +++ b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp @@ -53,54 +53,53 @@ namespace pt = boost::property_tree; template class AbstractTestSuiteRunner { protected: - std::string executable_dir; - ConfigParser parser; - std::ofstream log_stream; - std::ofstream stats_out_stream; - std::vector all_csv_headers; - std::vector>> active_stats_modules; + std::string executableDir_; + ConfigParser parser_; + std::ofstream logStream_; + std::ofstream statsOutStream_; + std::vector allCsvHeaders_; + std::vector>> activeStatsModules_; - std::string graph_dir_path, machine_dir_path, output_target_object_dir_path, log_file_path, statistics_output_file_path; - bool write_target_object_to_file = false; - unsigned time_limit_seconds = 0; + std::string graphDirPath_, machineDirPath_, outputTargetObjectDirPath_, logFilePath_, statisticsOutputFilePath_; + bool writeTargetObjectToFile_ = false; + unsigned timeLimitSeconds_ = 0; - virtual std::filesystem::path getExecutablePath() const { return std::filesystem::canonical("/proc/self/exe"); } + virtual std::filesystem::path GetExecutablePath() const { return std::filesystem::canonical("/proc/self/exe"); } - virtual bool parse_common_config() { + virtual bool ParseCommonConfig() { try { - executable_dir = getExecutablePath().remove_filename().string(); - time_limit_seconds = parser.global_params.get_child("timeLimit").get_value(); - write_target_object_to_file - = parser.global_params.get_child("outputSchedule").get_value_optional().value_or(false); - - graph_dir_path = parser.global_params.get_child("graphDirectory").get_value(); - if (graph_dir_path.substr(0, 1) != "/") { - graph_dir_path = executable_dir + graph_dir_path; + executableDir_ = GetExecutablePath().remove_filename().string(); + timeLimitSeconds_ = parser_.globalParams_.get_child("timeLimit").get_value(); + writeTargetObjectToFile_ = parser_.globalParams_.get_child("outputSchedule").get_value_optional().value_or(false); + + graphDirPath_ = parser_.globalParams_.get_child("graphDirectory").get_value(); + if (graphDirPath_.substr(0, 1) != "/") { + graphDirPath_ = executableDir_ + graphDirPath_; } - machine_dir_path = parser.global_params.get_child("archDirectory").get_value(); - if (machine_dir_path.substr(0, 1) != "/") { - machine_dir_path = executable_dir + machine_dir_path; + machineDirPath_ = parser_.globalParams_.get_child("archDirectory").get_value(); + if (machineDirPath_.substr(0, 1) != "/") { + machineDirPath_ = executableDir_ + machineDirPath_; } - if (write_target_object_to_file) { - output_target_object_dir_path = parser.global_params.get_child("scheduleDirectory").get_value(); - if (output_target_object_dir_path.substr(0, 1) != "/") { - output_target_object_dir_path = executable_dir + output_target_object_dir_path; + if (writeTargetObjectToFile_) { + outputTargetObjectDirPath_ = parser_.globalParams_.get_child("scheduleDirectory").get_value(); + if (outputTargetObjectDirPath_.substr(0, 1) != "/") { + outputTargetObjectDirPath_ = executableDir_ + outputTargetObjectDirPath_; } - if (!output_target_object_dir_path.empty() && !std::filesystem::exists(output_target_object_dir_path)) { - std::filesystem::create_directories(output_target_object_dir_path); + if (!outputTargetObjectDirPath_.empty() && !std::filesystem::exists(outputTargetObjectDirPath_)) { + std::filesystem::create_directories(outputTargetObjectDirPath_); } } - log_file_path = parser.global_params.get_child("outputLogFile").get_value(); - if (log_file_path.substr(0, 1) != "/") { - log_file_path = executable_dir + log_file_path; + logFilePath_ = parser_.globalParams_.get_child("outputLogFile").get_value(); + if (logFilePath_.substr(0, 1) != "/") { + logFilePath_ = executableDir_ + logFilePath_; } - statistics_output_file_path = parser.global_params.get_child("outputStatsFile").get_value(); - if (statistics_output_file_path.substr(0, 1) != "/") { - statistics_output_file_path = executable_dir + statistics_output_file_path; + statisticsOutputFilePath_ = parser_.globalParams_.get_child("outputStatsFile").get_value(); + if (statisticsOutputFilePath_.substr(0, 1) != "/") { + statisticsOutputFilePath_ = executableDir_ + statisticsOutputFilePath_; } return true; @@ -110,256 +109,254 @@ class AbstractTestSuiteRunner { } } - virtual void setup_log_file() { - log_stream.open(log_file_path, std::ios_base::app); - if (!log_stream.is_open()) { - std::cerr << "Error: Could not open log file: " << log_file_path << std::endl; + virtual void SetupLogFile() { + logStream_.open(logFilePath_, std::ios_base::app); + if (!logStream_.is_open()) { + std::cerr << "Error: Could not open log file: " << logFilePath_ << std::endl; } } - virtual void setup_statistics_file() { - all_csv_headers = {"Graph", "Machine", "Algorithm", "TimeToCompute(ms)"}; + virtual void SetupStatisticsFile() { + allCsvHeaders_ = {"Graph", "Machine", "Algorithm", "TimeToCompute(ms)"}; - std::set unique_module_metric_headers; - for (const auto &mod : active_stats_modules) { - for (const auto &header : mod->get_metric_headers()) { - auto pair = unique_module_metric_headers.insert(header); + std::set uniqueModuleMetricHeaders; + for (const auto &mod : activeStatsModules_) { + for (const auto &header : mod->GetMetricHeaders()) { + auto pair = uniqueModuleMetricHeaders.insert(header); if (!pair.second) { - log_stream << "Warning: Duplicate metric header '" << header + logStream_ << "Warning: Duplicate metric header '" << header << "' found across statistic modules. Using the first one encountered." << std::endl; } } } - all_csv_headers.insert(all_csv_headers.end(), unique_module_metric_headers.begin(), unique_module_metric_headers.end()); + allCsvHeaders_.insert(allCsvHeaders_.end(), uniqueModuleMetricHeaders.begin(), uniqueModuleMetricHeaders.end()); - std::filesystem::path stats_p(statistics_output_file_path); - if (stats_p.has_parent_path() && !std::filesystem::exists(stats_p.parent_path())) { - std::filesystem::create_directories(stats_p.parent_path()); + std::filesystem::path statsP(statisticsOutputFilePath_); + if (statsP.has_parent_path() && !std::filesystem::exists(statsP.parent_path())) { + std::filesystem::create_directories(statsP.parent_path()); } - bool file_exists_and_has_header = false; - std::ifstream stats_file_check(statistics_output_file_path); - if (stats_file_check.is_open()) { - std::string first_line_in_file; - getline(stats_file_check, first_line_in_file); - std::string expected_header_line; - for (size_t i = 0; i < all_csv_headers.size(); ++i) { - expected_header_line += all_csv_headers[i] + (i == all_csv_headers.size() - 1 ? "" : ","); + bool fileExistsAndHasHeader = false; + std::ifstream statsFileCheck(statisticsOutputFilePath_); + if (statsFileCheck.is_open()) { + std::string firstLineInFile; + getline(statsFileCheck, firstLineInFile); + std::string expectedHeaderLine; + for (size_t i = 0; i < allCsvHeaders_.size(); ++i) { + expectedHeaderLine += allCsvHeaders_[i] + (i == allCsvHeaders_.size() - 1 ? "" : ","); } - if (first_line_in_file == expected_header_line) { - file_exists_and_has_header = true; + if (firstLineInFile == expectedHeaderLine) { + fileExistsAndHasHeader = true; } - stats_file_check.close(); + statsFileCheck.close(); } - stats_out_stream.open(statistics_output_file_path, std::ios_base::app); - if (!stats_out_stream.is_open()) { - log_stream << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path << std::endl; - std::cerr << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path << std::endl; - } else if (!file_exists_and_has_header) { - for (size_t i = 0; i < all_csv_headers.size(); ++i) { - stats_out_stream << all_csv_headers[i] << (i == all_csv_headers.size() - 1 ? "" : ","); + statsOutStream_.open(statisticsOutputFilePath_, std::ios_base::app); + if (!statsOutStream_.is_open()) { + logStream_ << "CRITICAL ERROR: Could not open statistics output file: " << statisticsOutputFilePath_ << std::endl; + std::cerr << "CRITICAL ERROR: Could not open statistics output file: " << statisticsOutputFilePath_ << std::endl; + } else if (!fileExistsAndHasHeader) { + for (size_t i = 0; i < allCsvHeaders_.size(); ++i) { + statsOutStream_ << allCsvHeaders_[i] << (i == allCsvHeaders_.size() - 1 ? "" : ","); } - stats_out_stream << "\n"; - log_stream << "Initialized statistics file " << statistics_output_file_path << " with header." << std::endl; + statsOutStream_ << "\n"; + logStream_ << "Initialized statistics file " << statisticsOutputFilePath_ << " with header." << std::endl; } } - virtual RETURN_STATUS compute_target_object_impl(const BspInstance &instance, - std::unique_ptr &target_object, - const pt::ptree &algo_config, - long long &computation_time_ms) + virtual ReturnStatus ComputeTargetObjectImpl(const BspInstance &instance, + std::unique_ptr &targetObject, + const pt::ptree &algoConfig, + long long &computationTimeMs) = 0; - virtual void create_and_register_statistic_modules(const std::string &module_name) = 0; + virtual void CreateAndRegisterStatisticModules(const std::string &moduleName) = 0; - virtual void write_target_object_hook(const TargetObjectType &, const std::string &, const std::string &, const std::string &) { + virtual void WriteTargetObjectHook(const TargetObjectType &, const std::string &, const std::string &, const std::string &) { } // default in case TargetObjectType cannot be written to file public: AbstractTestSuiteRunner() {} virtual ~AbstractTestSuiteRunner() { - if (log_stream.is_open()) { - log_stream.close(); + if (logStream_.is_open()) { + logStream_.close(); } - if (stats_out_stream.is_open()) { - stats_out_stream.close(); + if (statsOutStream_.is_open()) { + statsOutStream_.close(); } } - int run(int argc, char *argv[]) { + int Run(int argc, char *argv[]) { try { - parser.parse_args(argc, argv); + parser_.ParseArgs(argc, argv); } catch (const std::exception &e) { std::cerr << "Error parsing command line arguments: " << e.what() << std::endl; return 1; } - if (!parse_common_config()) { + if (!ParseCommonConfig()) { return 1; } - setup_log_file(); + SetupLogFile(); - std::vector active_module_names_from_config; + std::vector activeModuleNamesFromConfig; try { - for (const auto &item : parser.global_params.get_child("activeStatisticModules")) { - active_module_names_from_config.push_back(item.second.get_value()); + for (const auto &item : parser_.globalParams_.get_child("activeStatisticModules")) { + activeModuleNamesFromConfig.push_back(item.second.get_value()); } } catch (const pt::ptree_bad_path &e) { - log_stream << "Warning: 'activeStatisticModules' not found. No statistics modules will be run. " << e.what() + logStream_ << "Warning: 'activeStatisticModules' not found. No statistics modules will be run. " << e.what() << std::endl; } - for (const std::string &module_name : active_module_names_from_config) { - create_and_register_statistic_modules(module_name); + for (const std::string &moduleName : activeModuleNamesFromConfig) { + CreateAndRegisterStatisticModules(moduleName); } - if (active_stats_modules.empty()) { - log_stream << "No active statistic modules configured or loaded." << std::endl; + if (activeStatsModules_.empty()) { + logStream_ << "No active statistic modules configured or loaded." << std::endl; } - setup_statistics_file(); + SetupStatisticsFile(); - for (const auto &machine_entry : std::filesystem::recursive_directory_iterator(machine_dir_path)) { - if (std::filesystem::is_directory(machine_entry)) { - log_stream << "Skipping directory " << machine_entry.path().string() << std::endl; + for (const auto &machineEntry : std::filesystem::recursive_directory_iterator(machineDirPath_)) { + if (std::filesystem::is_directory(machineEntry)) { + logStream_ << "Skipping directory " << machineEntry.path().string() << std::endl; continue; } - std::string filename_machine = machine_entry.path().string(); - std::string name_machine = filename_machine.substr(filename_machine.rfind('/') + 1); - if (name_machine.rfind('.') != std::string::npos) { - name_machine = name_machine.substr(0, name_machine.rfind('.')); + std::string filenameMachine = machineEntry.path().string(); + std::string nameMachine = filenameMachine.substr(filenameMachine.rfind('/') + 1); + if (nameMachine.rfind('.') != std::string::npos) { + nameMachine = nameMachine.substr(0, nameMachine.rfind('.')); } BspArchitecture arch; - if (!file_reader::readBspArchitecture(filename_machine, arch)) { - log_stream << "Reading architecture file " << filename_machine << " failed." << std::endl; + if (!file_reader::ReadBspArchitecture(filenameMachine, arch)) { + logStream_ << "Reading architecture file " << filenameMachine << " failed." << std::endl; continue; } - log_stream << "Start Machine: " + filename_machine + "\n"; + logStream_ << "Start Machine: " + filenameMachine + "\n"; - for (const auto &graph_entry : std::filesystem::recursive_directory_iterator(graph_dir_path)) { - if (std::filesystem::is_directory(graph_entry)) { - log_stream << "Skipping directory " << graph_entry.path().string() << std::endl; + for (const auto &graphEntry : std::filesystem::recursive_directory_iterator(graphDirPath_)) { + if (std::filesystem::is_directory(graphEntry)) { + logStream_ << "Skipping directory " << graphEntry.path().string() << std::endl; continue; } - std::string filename_graph = graph_entry.path().string(); - std::string name_graph = filename_graph.substr(filename_graph.rfind('/') + 1); - if (name_graph.rfind('.') != std::string::npos) { - name_graph = name_graph.substr(0, name_graph.rfind('.')); + std::string filenameGraph = graphEntry.path().string(); + std::string nameGraph = filenameGraph.substr(filenameGraph.rfind('/') + 1); + if (nameGraph.rfind('.') != std::string::npos) { + nameGraph = nameGraph.substr(0, nameGraph.rfind('.')); } - log_stream << "Start Graph: " + filename_graph + "\n"; + logStream_ << "Start Graph: " + filenameGraph + "\n"; - BspInstance bsp_instance; - bsp_instance.getArchitecture() = arch; - bool graph_status = false; + BspInstance bspInstance; + bspInstance.GetArchitecture() = arch; + bool graphStatus = false; std::string ext; - if (filename_graph.rfind('.') != std::string::npos) { - ext = filename_graph.substr(filename_graph.rfind('.') + 1); + if (filenameGraph.rfind('.') != std::string::npos) { + ext = filenameGraph.substr(filenameGraph.rfind('.') + 1); } #ifdef EIGEN_FOUND - using SM_csr_int32 = Eigen::SparseMatrix; - using SM_csc_int32 = Eigen::SparseMatrix; - using SM_csr_int64 = Eigen::SparseMatrix; - using SM_csc_int64 = Eigen::SparseMatrix; - SM_csr_int32 L_csr_int32; - SM_csr_int64 L_csr_int64; - SM_csc_int32 L_csc_int32{}; - SM_csc_int64 L_csc_int64{}; - - if constexpr (std::is_same_v - || std::is_same_v) { + using SmCsrInt32 = Eigen::SparseMatrix; + using SmCscInt32 = Eigen::SparseMatrix; + using SmCsrInt64 = Eigen::SparseMatrix; + using SmCscInt64 = Eigen::SparseMatrix; + SmCsrInt32 lCsrInt32; + SmCsrInt64 lCsrInt64; + SmCscInt32 lCscInt32{}; + SmCscInt64 lCscInt64{}; + + if constexpr (std::is_same_v + || std::is_same_v) { if (ext != "mtx") { - log_stream << "Error: Only .mtx file is accepted for SpTRSV" << std::endl; + logStream_ << "Error: Only .mtx file is accepted for SpTRSV" << std::endl; return 0; } - if constexpr (std::is_same_v) { - graph_status = Eigen::loadMarket(L_csr_int32, filename_graph); - if (!graph_status) { - std::cerr << "Failed to read matrix from " << filename_graph << std::endl; + if constexpr (std::is_same_v) { + graphStatus = Eigen::loadMarket(lCsrInt32, filenameGraph); + if (!graphStatus) { + std::cerr << "Failed to read matrix from " << filenameGraph << std::endl; return -1; } - bsp_instance.getComputationalDag().setCSR(&L_csr_int32); - L_csc_int32 = L_csr_int32; - bsp_instance.getComputationalDag().setCSC(&L_csc_int32); + bspInstance.GetComputationalDag().SetCsr(&lCsrInt32); + lCscInt32 = lCsrInt32; + bspInstance.GetComputationalDag().SetCsc(&lCscInt32); } else { - graph_status = Eigen::loadMarket(L_csr_int64, filename_graph); - if (!graph_status) { - std::cerr << "Failed to read matrix from " << filename_graph << std::endl; + graphStatus = Eigen::loadMarket(lCsrInt64, filenameGraph); + if (!graphStatus) { + std::cerr << "Failed to read matrix from " << filenameGraph << std::endl; return -1; } - bsp_instance.getComputationalDag().setCSR(&L_csr_int64); - L_csc_int64 = L_csr_int64; - bsp_instance.getComputationalDag().setCSC(&L_csc_int64); + bspInstance.GetComputationalDag().SetCsr(&lCsrInt64); + lCscInt64 = lCsrInt64; + bspInstance.GetComputationalDag().SetCsc(&lCscInt64); } } else { #endif - graph_status = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag()); + graphStatus = file_reader::ReadGraph(filenameGraph, bspInstance.GetComputationalDag()); #ifdef EIGEN_FOUND } #endif - if (!graph_status) { - log_stream << "Reading graph file " << filename_graph << " failed." << std::endl; + if (!graphStatus) { + logStream_ << "Reading graph file " << filenameGraph << " failed." << std::endl; continue; } - for (auto &algorithm_config_pair : parser.scheduler) { - const pt::ptree &algo_config = algorithm_config_pair.second; + for (auto &algorithmConfigPair : parser_.scheduler_) { + const pt::ptree &algoConfig = algorithmConfigPair.second; - std::string current_algo_name = algo_config.get_child("name").get_value(); - log_stream << "Start Algorithm " + current_algo_name + "\n"; + std::string currentAlgoName = algoConfig.get_child("name").get_value(); + logStream_ << "Start Algorithm " + currentAlgoName + "\n"; - long long computation_time_ms; - std::unique_ptr target_object; + long long computationTimeMs; + std::unique_ptr targetObject; - RETURN_STATUS exec_status - = compute_target_object_impl(bsp_instance, target_object, algo_config, computation_time_ms); + ReturnStatus execStatus = ComputeTargetObjectImpl(bspInstance, targetObject, algoConfig, computationTimeMs); - if (exec_status != RETURN_STATUS::OSP_SUCCESS && exec_status != RETURN_STATUS::BEST_FOUND) { - if (exec_status == RETURN_STATUS::ERROR) { - log_stream << "Error computing with " << current_algo_name << "." << std::endl; - } else if (exec_status == RETURN_STATUS::TIMEOUT) { - log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl; + if (execStatus != ReturnStatus::OSP_SUCCESS && execStatus != ReturnStatus::BEST_FOUND) { + if (execStatus == ReturnStatus::ERROR) { + logStream_ << "Error computing with " << currentAlgoName << "." << std::endl; + } else if (execStatus == ReturnStatus::TIMEOUT) { + logStream_ << "Scheduler " << currentAlgoName << " timed out." << std::endl; } continue; } - if (write_target_object_to_file) { + if (writeTargetObjectToFile_) { try { - write_target_object_hook(*target_object, name_graph, name_machine, current_algo_name); + WriteTargetObjectHook(*targetObject, nameGraph, nameMachine, currentAlgoName); } catch (const std::exception &e) { - log_stream << "Writing target object file for " << name_graph << ", " << name_machine << ", " - << current_algo_name << " has failed: " << e.what() << std::endl; + logStream_ << "Writing target object file for " << nameGraph << ", " << nameMachine << ", " + << currentAlgoName << " has failed: " << e.what() << std::endl; } } - if (stats_out_stream.is_open()) { - std::map current_row_values; - current_row_values["Graph"] = name_graph; - current_row_values["Machine"] = name_machine; - current_row_values["Algorithm"] = current_algo_name; - current_row_values["TimeToCompute(ms)"] = std::to_string(computation_time_ms); + if (statsOutStream_.is_open()) { + std::map currentRowValues; + currentRowValues["Graph"] = nameGraph; + currentRowValues["Machine"] = nameMachine; + currentRowValues["Algorithm"] = currentAlgoName; + currentRowValues["TimeToCompute(ms)"] = std::to_string(computationTimeMs); - for (auto &stat_module : active_stats_modules) { - auto module_metrics = stat_module->record_statistics(*target_object, log_stream); - current_row_values.insert(module_metrics.begin(), module_metrics.end()); + for (auto &statModule : activeStatsModules_) { + auto moduleMetrics = statModule->RecordStatistics(*targetObject, logStream_); + currentRowValues.insert(moduleMetrics.begin(), moduleMetrics.end()); } - for (size_t i = 0; i < all_csv_headers.size(); ++i) { - stats_out_stream << current_row_values[all_csv_headers[i]] - << (i == all_csv_headers.size() - 1 ? "" : ","); + for (size_t i = 0; i < allCsvHeaders_.size(); ++i) { + statsOutStream_ << currentRowValues[allCsvHeaders_[i]] << (i == allCsvHeaders_.size() - 1 ? "" : ","); } - stats_out_stream << "\n"; + statsOutStream_ << "\n"; } } } diff --git a/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp b/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp index dd161798..533deaab 100644 --- a/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp +++ b/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp @@ -32,58 +32,58 @@ limitations under the License. namespace osp { -template -class BspScheduleRecompTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t> { +template +class BspScheduleRecompTestSuiteRunner : public AbstractTestSuiteRunner, ConcreteGraphT> { private: - bool use_memory_constraint_for_bsp; + bool useMemoryConstraintForBsp_; protected: - RETURN_STATUS compute_target_object_impl(const BspInstance &instance, - std::unique_ptr> &schedule, - const pt::ptree &algo_config, - long long &computation_time_ms) override { - std::string algo_name = algo_config.get_child("id").get_value(); - const std::set scheduler_names = get_available_bsp_scheduler_names(); - const std::set scheduler_recomp_names = get_available_bsp_recomp_scheduler_names(); + ReturnStatus ComputeTargetObjectImpl(const BspInstance &instance, + std::unique_ptr> &schedule, + const pt::ptree &algoConfig, + long long &computationTimeMs) override { + std::string algoName = algoConfig.get_child("id").get_value(); + const std::set schedulerNames = GetAvailableBspSchedulerNames(); + const std::set schedulerRecompNames = GetAvailableBspRecompSchedulerNames(); - if (scheduler_names.find(algo_name) != scheduler_names.end()) { - auto bsp_schedule = std::make_unique>(instance); + if (schedulerNames.find(algoName) != schedulerNames.end()) { + auto bspSchedule = std::make_unique>(instance); - const auto start_time = std::chrono::high_resolution_clock::now(); + const auto startTime = std::chrono::high_resolution_clock::now(); - RETURN_STATUS status = run_bsp_scheduler(this->parser, algo_config, *bsp_schedule); + ReturnStatus status = RunBspScheduler(this->parser_, algoConfig, *bspSchedule); - const auto finish_time = std::chrono::high_resolution_clock::now(); - computation_time_ms = std::chrono::duration_cast(finish_time - start_time).count(); + const auto finishTime = std::chrono::high_resolution_clock::now(); + computationTimeMs = std::chrono::duration_cast(finishTime - startTime).count(); - schedule = std::move(bsp_schedule); + schedule = std::move(bspSchedule); return status; - } else if (scheduler_recomp_names.find(algo_name) != scheduler_recomp_names.end()) { - auto bsp_recomp_schedule = std::make_unique>(instance); + } else if (schedulerRecompNames.find(algoName) != schedulerRecompNames.end()) { + auto bspRecompSchedule = std::make_unique>(instance); - const auto start_time = std::chrono::high_resolution_clock::now(); + const auto startTime = std::chrono::high_resolution_clock::now(); - RETURN_STATUS status = run_bsp_recomp_scheduler(this->parser, algo_config, *bsp_recomp_schedule); + ReturnStatus status = RunBspRecompScheduler(this->parser_, algoConfig, *bspRecompSchedule); - const auto finish_time = std::chrono::high_resolution_clock::now(); - computation_time_ms = std::chrono::duration_cast(finish_time - start_time).count(); + const auto finishTime = std::chrono::high_resolution_clock::now(); + computationTimeMs = std::chrono::duration_cast(finishTime - startTime).count(); - schedule = std::move(bsp_recomp_schedule); + schedule = std::move(bspRecompSchedule); return status; } else { std::cerr << "No matching category found for algorithm" << std::endl; - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } } - void create_and_register_statistic_modules(const std::string &module_name) override { - if (module_name == "BasicBspStats") { - this->active_stats_modules.push_back(std::make_unique>>()); - } else if (module_name == "GraphStats") { - this->active_stats_modules.push_back(std::make_unique>>()); + void CreateAndRegisterStatisticModules(const std::string &moduleName) override { + if (moduleName == "BasicBspStats") { + this->activeStatsModules_.push_back(std::make_unique>>()); + } else if (moduleName == "GraphStats") { + this->activeStatsModules_.push_back(std::make_unique>>()); } } @@ -97,7 +97,7 @@ class BspScheduleRecompTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t>() {} + BspScheduleRecompTestSuiteRunner() : AbstractTestSuiteRunner, ConcreteGraphT>() {} }; } // namespace osp diff --git a/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp b/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp index d1338e4c..a19cf416 100644 --- a/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp +++ b/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp @@ -29,43 +29,43 @@ limitations under the License. namespace osp { -template -class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t> { +template +class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunner, ConcreteGraphT> { private: protected: - RETURN_STATUS compute_target_object_impl(const BspInstance &instance, - std::unique_ptr> &schedule, - const pt::ptree &algo_config, - long long &computation_time_ms) override { - schedule = std::make_unique>(instance); + ReturnStatus ComputeTargetObjectImpl(const BspInstance &instance, + std::unique_ptr> &schedule, + const pt::ptree &algoConfig, + long long &computationTimeMs) override { + schedule = std::make_unique>(instance); - const auto start_time = std::chrono::high_resolution_clock::now(); + const auto startTime = std::chrono::high_resolution_clock::now(); - RETURN_STATUS status = run_bsp_scheduler(this->parser, algo_config, *schedule); + ReturnStatus status = RunBspScheduler(this->parser_, algoConfig, *schedule); - const auto finish_time = std::chrono::high_resolution_clock::now(); - computation_time_ms = std::chrono::duration_cast(finish_time - start_time).count(); + const auto finishTime = std::chrono::high_resolution_clock::now(); + computationTimeMs = std::chrono::duration_cast(finishTime - startTime).count(); return status; } - void create_and_register_statistic_modules(const std::string &module_name) override { - if (module_name == "BasicBspStats") { - this->active_stats_modules.push_back(std::make_unique>>()); - } else if (module_name == "BspCommStats") { - this->active_stats_modules.push_back(std::make_unique>()); + void CreateAndRegisterStatisticModules(const std::string &moduleName) override { + if (moduleName == "BasicBspStats") { + this->activeStatsModules_.push_back(std::make_unique>>()); + } else if (moduleName == "BspCommStats") { + this->activeStatsModules_.push_back(std::make_unique>()); #ifdef EIGEN_FOUND - } else if (module_name == "BspSptrsvStats") { - this->active_stats_modules.push_back(std::make_unique>>(NO_PERMUTE)); - } else if (module_name == "BspSptrsvPermLoopProcessorsStats") { - this->active_stats_modules.push_back( - std::make_unique>>(LOOP_PROCESSORS)); - } else if (module_name == "BspSptrsvPermSnakeProcessorsStats") { - this->active_stats_modules.push_back( - std::make_unique>>(SNAKE_PROCESSORS)); + } else if (moduleName == "BspSptrsvStats") { + this->activeStatsModules_.push_back(std::make_unique>>(NO_PERMUTE)); + } else if (moduleName == "BspSptrsvPermLoopProcessorsStats") { + this->activeStatsModules_.push_back( + std::make_unique>>(LOOP_PROCESSORS)); + } else if (moduleName == "BspSptrsvPermSnakeProcessorsStats") { + this->activeStatsModules_.push_back( + std::make_unique>>(SNAKE_PROCESSORS)); #endif - } else if (module_name == "GraphStats") { - this->active_stats_modules.push_back(std::make_unique>>()); + } else if (moduleName == "GraphStats") { + this->activeStatsModules_.push_back(std::make_unique>>()); } } @@ -78,7 +78,7 @@ class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t>() {} + BspScheduleTestSuiteRunner() : AbstractTestSuiteRunner, ConcreteGraphT>() {} }; } // namespace osp diff --git a/apps/test_suite_runner/ConfigParser.hpp b/apps/test_suite_runner/ConfigParser.hpp index 1750ff9f..588397b7 100644 --- a/apps/test_suite_runner/ConfigParser.hpp +++ b/apps/test_suite_runner/ConfigParser.hpp @@ -32,17 +32,17 @@ namespace pt = boost::property_tree; // main parameters for running simple_schedulers.cpp struct ConfigParser { public: - pt::ptree global_params; - pt::ptree scheduler; - pt::ptree instances; + pt::ptree globalParams_; + pt::ptree scheduler_; + pt::ptree instances_; private: - std::string main_config_file = ""; - bool has_config_file = false; + std::string mainConfigFile_ = ""; + bool hasConfigFile_ = false; - pt::ptree scheduler_config; + pt::ptree schedulerConfig_; - void usage() { + void Usage() { std::cout << "Usage: Either read config file: \n" << " --config *.json \t\tSpecify config .json file.\n" << " Or specify command line options:\n" @@ -56,51 +56,51 @@ struct ConfigParser { << " Available scheduler: \n"; pt::ptree loadPtreeRoot; - pt::read_json(main_config_file, loadPtreeRoot); - pt::ptree scheduler_config_usage = loadPtreeRoot.get_child("algorithms"); + pt::read_json(mainConfigFile_, loadPtreeRoot); + pt::ptree schedulerConfigUsage = loadPtreeRoot.get_child("algorithms"); - for (auto &algorithm : scheduler_config_usage) { + for (auto &algorithm : schedulerConfigUsage) { std::cout << " --" << algorithm.second.get_child("name").get_value() << "\t\t" << algorithm.second.get_child("description").get_value() << "\n"; } } - void add_algorithm(std::string name) { - bool algorithm_found = false; - std::string algorithm_identifier = name; + void AddAlgorithm(std::string name) { + bool algorithmFound = false; + std::string algorithmIdentifier = name; - while (algorithm_identifier.find("-") == 0) { - algorithm_identifier = algorithm_identifier.substr(1); + while (algorithmIdentifier.find("-") == 0) { + algorithmIdentifier = algorithmIdentifier.substr(1); } - for (auto &algorithm : scheduler_config) { - std::string alg_name = algorithm.second.get_child("name").get_value(); + for (auto &algorithm : schedulerConfig_) { + std::string algName = algorithm.second.get_child("name").get_value(); - if (alg_name == algorithm_identifier) { - scheduler.push_back(algorithm); - algorithm_found = true; + if (algName == algorithmIdentifier) { + scheduler_.push_back(algorithm); + algorithmFound = true; } } - if (!algorithm_found) { + if (!algorithmFound) { throw std::invalid_argument("Parameter error: wrong input or unknown algorithm \"" + name + "\".\n"); } } - void parse_config_file(std::string filename) { + void ParseConfigFile(std::string filename) { pt::ptree loadPtreeRoot; pt::read_json(filename, loadPtreeRoot); - global_params = loadPtreeRoot.get_child("globalParameters"); + globalParams_ = loadPtreeRoot.get_child("globalParameters"); try { - instances = loadPtreeRoot.get_child("inputInstances"); + instances_ = loadPtreeRoot.get_child("inputInstances"); } catch (const pt::ptree_bad_path &e) {} - pt::ptree scheduler_config_parse = loadPtreeRoot.get_child("algorithms"); - for (auto &algorithm : scheduler_config_parse) { + pt::ptree schedulerConfigParse = loadPtreeRoot.get_child("algorithms"); + for (auto &algorithm : schedulerConfigParse) { if (algorithm.second.get_child("run").get_value()) { - scheduler.push_back(algorithm); + scheduler_.push_back(algorithm); } } } @@ -108,58 +108,58 @@ struct ConfigParser { public: ConfigParser() = default; - ConfigParser(std::string main_config_file_) : main_config_file(main_config_file_), has_config_file(true) {} + ConfigParser(std::string mainConfigFile) : mainConfigFile_(mainConfigFile), hasConfigFile_(true) {} - void parse_args(const int argc, const char *const argv[]) { - if (has_config_file) { + void ParseArgs(const int argc, const char *const argv[]) { + if (hasConfigFile_) { if (argc < 3) { - usage(); + Usage(); throw std::invalid_argument("Parameter error: not enough parameters specified.\n"); } else if (std::string(argv[1]) == "--config") { - std::string config_file = argv[2]; - if (config_file.empty() || config_file.substr(config_file.size() - 5) != ".json") { + std::string configFile = argv[2]; + if (configFile.empty() || configFile.substr(configFile.size() - 5) != ".json") { throw std::invalid_argument("Parameter error: config file ending is not \".json\".\n"); } - parse_config_file(config_file); - if (scheduler.empty()) { + ParseConfigFile(configFile); + if (scheduler_.empty()) { throw std::invalid_argument("Parameter error: config file does not specify scheduler to run!\n"); } - if (instances.empty()) { + if (instances_.empty()) { throw std::invalid_argument("Parameter error: config file does not specify input instances!\n"); } - if (global_params.empty()) { + if (globalParams_.empty()) { throw std::invalid_argument("Parameter error: config file does not specify global parameters!\n"); } } else { - const std::set parameters_requiring_value({"--config", - "--inputDag", - "--g", - "-inputDag", - "-g", - "--timeLimit", - "--t", - "-timeLimit", - "-t", - "--inputMachine", - "--m", - "-inputMachine", - "-m"}); + const std::set parametersRequiringValue({"--config", + "--inputDag", + "--g", + "-inputDag", + "-g", + "--timeLimit", + "--t", + "-timeLimit", + "-t", + "--inputMachine", + "--m", + "-inputMachine", + "-m"}); pt::ptree loadPtreeRoot; - pt::read_json(main_config_file, loadPtreeRoot); + pt::read_json(mainConfigFile_, loadPtreeRoot); - global_params = loadPtreeRoot.get_child("globalParameters"); - scheduler_config = loadPtreeRoot.get_child("algorithms"); + globalParams_ = loadPtreeRoot.get_child("globalParameters"); + schedulerConfig_ = loadPtreeRoot.get_child("algorithms"); pt::ptree instance; - bool graph_specified = false; - bool machine_specified = false; + bool graphSpecified = false; + bool machineSpecified = false; // PROCESS COMMAND LINE ARGUMENTS for (int i = 1; i < argc; ++i) { // Check parameters that require an argument afterwards - if (parameters_requiring_value.count(argv[i]) == 1 && i + 1 >= argc) { + if (parametersRequiringValue.count(argv[i]) == 1 && i + 1 >= argc) { throw std::invalid_argument("Parameter error: no parameter value after the \"" + std::string(argv[i]) + "\" option.\n"); } @@ -167,48 +167,48 @@ struct ConfigParser { std::string flag = argv[i]; if (std::string(flag) == "--config") { - usage(); + Usage(); throw std::invalid_argument("Parameter error: usage \"" + std::string(argv[i]) + "\".\n"); } else if (std::string(flag) == "--timelimit" || std::string(flag) == "--t" || std::string(flag) == "-t" || std::string(flag) == "-timelimit") { - global_params.put("timeLimit", std::stoi(argv[++i])); + globalParams_.put("timeLimit", std::stoi(argv[++i])); } else if (std::string(flag) == "--sankey" || std::string(flag) == "--s" || std::string(flag) == "-s" || std::string(flag) == "-sankey") { - global_params.put("outputSankeySchedule", true); + globalParams_.put("outputSankeySchedule", true); } else if (std::string(flag) == "--dot" || std::string(flag) == "--d" || std::string(flag) == "-d" || std::string(flag) == "-dot") { - global_params.put("outputDotSchedule", true); + globalParams_.put("outputDotSchedule", true); } else if (std::string(flag) == "--inputDag" || std::string(flag) == "--g" || std::string(flag) == "-inputDag" || std::string(flag) == "-g") { instance.put("graphFile", argv[++i]); - graph_specified = true; + graphSpecified = true; } else if (std::string(flag) == "--inputMachine" || std::string(flag) == "--m" || std::string(flag) == "-inputMachine" || std::string(flag) == "-m") { instance.put("machineParamsFile", argv[++i]); - machine_specified = true; + machineSpecified = true; } else if (std::string(flag) == "--output" || std::string(flag) == "--o" || std::string(flag) == "-output" || std::string(flag) == "-o") { - global_params.put("outputSchedule", true); + globalParams_.put("outputSchedule", true); } else { - add_algorithm(flag); + AddAlgorithm(flag); } } - if (!machine_specified || !graph_specified) { - usage(); + if (!machineSpecified || !graphSpecified) { + Usage(); throw std::invalid_argument("Parameter error: no graph or machine parameters were specified!\n"); - } else if (scheduler.empty()) { - usage(); + } else if (scheduler_.empty()) { + Usage(); throw std::invalid_argument("Parameter error: no algorithm was specified!\n"); } - instances.push_back(std::make_pair("", instance)); + instances_.push_back(std::make_pair("", instance)); } } else { if (argc < 3 || std::string(argv[1]) != "--config") { @@ -218,16 +218,16 @@ struct ConfigParser { throw std::invalid_argument("Parameter error: not enough parameters specified.\n"); } else { - std::string config_file = argv[2]; - if (config_file.empty() || config_file.substr(config_file.size() - 5) != ".json") { + std::string configFile = argv[2]; + if (configFile.empty() || configFile.substr(configFile.size() - 5) != ".json") { throw std::invalid_argument("Parameter error: config file ending is not \".json\".\n"); } - parse_config_file(config_file); - if (scheduler.empty()) { + ParseConfigFile(configFile); + if (scheduler_.empty()) { throw std::invalid_argument("Parameter error: config file does not specify scheduler to run!\n"); } - if (global_params.empty()) { + if (globalParams_.empty()) { throw std::invalid_argument("Parameter error: config file does not specify global parameters!\n"); } } diff --git a/apps/test_suite_runner/PebblingTestSuiteRunner.hpp b/apps/test_suite_runner/PebblingTestSuiteRunner.hpp index 2cbcfc5d..6b13abc2 100644 --- a/apps/test_suite_runner/PebblingTestSuiteRunner.hpp +++ b/apps/test_suite_runner/PebblingTestSuiteRunner.hpp @@ -25,50 +25,50 @@ limitations under the License. namespace osp { -template -class BasicPebblingStatsModule : public IStatisticModule> { +template +class BasicPebblingStatsModule : public IStatisticModule> { public: private: - const std::vector metric_headers = {"PebblingCost", "AsynchronousPebblingCost", "Supersteps"}; + const std::vector metricHeaders_ = {"PebblingCost", "AsynchronousPebblingCost", "Supersteps"}; public: std::vector get_metric_headers() const override { return metric_headers; } - std::map record_statistics(const PebblingSchedule &schedule, + std::map record_statistics(const PebblingSchedule &schedule, std::ofstream & /*log_stream*/) const override { std::map stats; - stats["PebblingCost"] = std::to_string(schedule.computeCosts()); + stats["PebblingCost"] = std::to_string(schedule.ComputeCosts()); stats["AsynchronousPebblingCost"] = std::to_string(computeAsynchronousCost()); - stats["Supersteps"] = std::to_string(schedule.numberOfSupersteps()); + stats["Supersteps"] = std::to_string(schedule.NumberOfSupersteps()); return stats; } }; -template -class PebblingTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t> { +template +class PebblingTestSuiteRunner : public AbstractTestSuiteRunner, ConcreteGraphT> { private: - bool use_memory_constraint; + bool useMemoryConstraint_; protected: - RETURN_STATUS compute_target_object_impl(const BspInstance &instance, - std::unique_ptr> &schedule, - const pt::ptree &algo_config, - long long &computation_time_ms) override { - schedule = std::make_unique>(instance); + ReturnStatus compute_target_object_impl(const BspInstance &instance, + std::unique_ptr> &schedule, + const pt::ptree &algoConfig, + long long &computationTimeMs) override { + schedule = std::make_unique>(instance); - const auto start_time = std::chrono::high_resolution_clock::now(); + const auto startTime = std::chrono::high_resolution_clock::now(); - RETURN_STATUS status = run_pebbler(this->parser, algo_config, *schedule); + ReturnStatus status = run_pebbler(this->parser, algoConfig, *schedule); - const auto finish_time = std::chrono::high_resolution_clock::now(); - computation_time_ms = std::chrono::duration_cast(finish_time - start_time).count(); + const auto finishTime = std::chrono::high_resolution_clock::now(); + computationTimeMs = std::chrono::duration_cast(finishTime - startTime).count(); return status; } - void create_and_register_statistic_modules(const std::string &module_name) override { - if (module_name == "BasicPebblingStats") { - this->active_stats_modules.push_back(std::make_unique>()); + void create_and_register_statistic_modules(const std::string &moduleName) override { + if (moduleName == "BasicPebblingStats") { + this->active_stats_modules.push_back(std::make_unique>()); } } @@ -82,7 +82,7 @@ class PebblingTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t>() {} + PebblingTestSuiteRunner() : AbstractTestSuiteRunner, ConcreteGraphT>() {} }; } // namespace osp diff --git a/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp b/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp index 21229567..08d2a6ff 100644 --- a/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp @@ -32,20 +32,20 @@ template class BasicBspStatsModule : public IStatisticModule { public: private: - const std::vector metric_headers = {"BspCost", "WorkCost", "CommCost", "Supersteps"}; + const std::vector metricHeaders_ = {"BspCost", "WorkCost", "CommCost", "Supersteps"}; public: - std::vector get_metric_headers() const override { return metric_headers; } + std::vector GetMetricHeaders() const override { return metricHeaders_; } - std::map record_statistics(const TargetObjectType &schedule, - std::ofstream & /*log_stream*/) const override { + std::map RecordStatistics(const TargetObjectType &schedule, + std::ofstream & /*log_stream*/) const override { std::map stats; - const auto bsp_cost = schedule.computeCosts(); - const auto work_cost = schedule.computeWorkCosts(); - stats["BspCost"] = std::to_string(bsp_cost); - stats["WorkCost"] = std::to_string(work_cost); - stats["CommCost"] = std::to_string(bsp_cost - work_cost); - stats["Supersteps"] = std::to_string(schedule.numberOfSupersteps()); + const auto bspCost = schedule.ComputeCosts(); + const auto workCost = schedule.ComputeWorkCosts(); + stats["BspCost"] = std::to_string(bspCost); + stats["WorkCost"] = std::to_string(workCost); + stats["CommCost"] = std::to_string(bspCost - workCost); + stats["Supersteps"] = std::to_string(schedule.NumberOfSupersteps()); return stats; } }; diff --git a/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp b/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp index 83f6f1b9..8bfee79a 100644 --- a/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp @@ -30,21 +30,21 @@ limitations under the License. namespace osp { -template -class BspCommStatsModule : public IStatisticModule> { +template +class BspCommStatsModule : public IStatisticModule> { public: private: - const std::vector metric_headers = {"TotalCommCost", "TotalLambdaCommCost", "BufferedSendingCosts"}; + const std::vector metricHeaders_ = {"TotalCommCost", "TotalLambdaCommCost", "BufferedSendingCosts"}; public: - std::vector get_metric_headers() const override { return metric_headers; } + std::vector GetMetricHeaders() const override { return metricHeaders_; } - std::map record_statistics(const BspSchedule &schedule, - std::ofstream & /*log_stream*/) const override { + std::map RecordStatistics(const BspSchedule &schedule, + std::ofstream & /*log_stream*/) const override { std::map stats; - stats["TotalCommCost"] = std::to_string(TotalCommunicationCost()(schedule)); - stats["TotalLambdaCommCost"] = std::to_string(TotalLambdaCommunicationCost()(schedule)); - stats["BufferedSendingCosts"] = std::to_string(BufferedSendingCost()(schedule)); + stats["TotalCommCost"] = std::to_string(TotalCommunicationCost()(schedule)); + stats["TotalLambdaCommCost"] = std::to_string(TotalLambdaCommunicationCost()(schedule)); + stats["BufferedSendingCosts"] = std::to_string(BufferedSendingCost()(schedule)); return stats; } }; diff --git a/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp b/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp index e2b650d2..969bc114 100644 --- a/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp @@ -41,7 +41,7 @@ limitations under the License. namespace osp { // Turn permutation mode into a human-readable prefix used in metric names -inline const char *mode_tag(SCHEDULE_NODE_PERMUTATION_MODES m) { +inline const char *ModeTag(ScheduleNodePermutationModes m) { switch (m) { case NO_PERMUTE: return "NoPermute_"; @@ -54,7 +54,7 @@ inline const char *mode_tag(SCHEDULE_NODE_PERMUTATION_MODES m) { } } -bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { +bool CompareVectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { std::cout << std::fixed; std::cout << std::setprecision(15); @@ -76,111 +76,111 @@ bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { template class BspSptrsvStatsModule : public IStatisticModule { public: - explicit BspSptrsvStatsModule(SCHEDULE_NODE_PERMUTATION_MODES _mode = NO_PERMUTE) : mode(_mode) {} + explicit BspSptrsvStatsModule(ScheduleNodePermutationModes mode = NO_PERMUTE) : mode_(mode) {} - std::vector get_metric_headers() const override { - const std::string prefix = mode_tag(mode); + std::vector GetMetricHeaders() const override { + const std::string prefix = ModeTag(mode_); return {prefix + "SpTrSV_Runtime_Geomean(ns)", prefix + "SpTrSV_Runtime_Stddev", prefix + "SpTrSV_Runtime_Q25(ns)", prefix + "SpTrSV_Runtime_Q75(ns)"}; } - std::map record_statistics(const TargetObjectType &schedule, std::ofstream &) const override { + std::map RecordStatistics(const TargetObjectType &schedule, std::ofstream &) const override { std::map stats; if constexpr (std::is_same_v>> || std::is_same_v>>) { - using index_t + using IndexT = std::conditional_t>>, int32_t, int64_t>; - auto instance = schedule.getInstance(); - Sptrsv sim{instance}; + auto instance = schedule.GetInstance(); + Sptrsv sim{instance}; std::vector perm; - if (mode == NO_PERMUTE) { - sim.setup_csr_no_permutation(schedule); - } else if (mode == LOOP_PROCESSORS) { - perm = schedule_node_permuter_basic(schedule, LOOP_PROCESSORS); - sim.setup_csr_with_permutation(schedule, perm); - } else if (mode == SNAKE_PROCESSORS) { - perm = schedule_node_permuter_basic(schedule, SNAKE_PROCESSORS); - sim.setup_csr_with_permutation(schedule, perm); + if (mode_ == NO_PERMUTE) { + sim.SetupCsrNoPermutation(schedule); + } else if (mode_ == LOOP_PROCESSORS) { + perm = ScheduleNodePermuterBasic(schedule, LOOP_PROCESSORS); + sim.SetupCsrWithPermutation(schedule, perm); + } else if (mode_ == SNAKE_PROCESSORS) { + perm = ScheduleNodePermuterBasic(schedule, SNAKE_PROCESSORS); + sim.SetupCsrWithPermutation(schedule, perm); } else { std::cout << "Wrong type of permutation provided" << std::endl; } - Eigen::VectorXd L_b_ref, L_x_ref; - auto n = instance.getComputationalDag().getCSC()->cols(); - L_x_ref.resize(n); - L_b_ref.resize(n); - auto L_view = (*instance.getComputationalDag().getCSR()).template triangularView(); - L_b_ref.setOnes(); - L_x_ref.setZero(); - L_x_ref = L_view.solve(L_b_ref); - - std::vector times_ns; - Eigen::VectorXd L_x_osp = L_x_ref, L_b_osp = L_b_ref; - - for (int i = 0; i < runs; ++i) { - L_b_osp.setOnes(); - L_x_osp.setZero(); - sim.x = &L_x_osp[0]; - sim.b = &L_b_osp[0]; + Eigen::VectorXd lBRef, lXRef; + auto n = instance.GetComputationalDag().GetCSC()->cols(); + lXRef.resize(n); + lBRef.resize(n); + auto lView = (*instance.GetComputationalDag().GetCSR()).template triangularView(); + lBRef.setOnes(); + lXRef.setZero(); + lXRef = lView.solve(lBRef); + + std::vector timesNs; + Eigen::VectorXd lXOsp = lXRef, lBOsp = lBRef; + + for (int i = 0; i < runs_; ++i) { + lBOsp.setOnes(); + lXOsp.setZero(); + sim.x_ = &lXOsp[0]; + sim.b_ = &lBOsp[0]; std::chrono::_V2::system_clock::time_point start, end; - if (mode == NO_PERMUTE) { + if (mode_ == NO_PERMUTE) { start = std::chrono::high_resolution_clock::now(); - sim.lsolve_no_permutation(); + sim.LsolveNoPermutation(); end = std::chrono::high_resolution_clock::now(); } else { start = std::chrono::high_resolution_clock::now(); - sim.lsolve_with_permutation(); + sim.LsolveWithPermutation(); end = std::chrono::high_resolution_clock::now(); } long long elapsed = std::chrono::duration_cast(end - start).count(); - times_ns.push_back(elapsed); + timesNs.push_back(elapsed); } // Geometric mean (requires conversion to double) - double total_log = std::accumulate(times_ns.begin(), times_ns.end(), 0.0, [](double sum, long long val) { + double totalLog = std::accumulate(timesNs.begin(), timesNs.end(), 0.0, [](double sum, long long val) { return sum + std::log(static_cast(val)); }); - long long geom_mean = static_cast(std::exp(total_log / runs)); + long long geomMean = static_cast(std::exp(totalLog / runs_)); // Standard deviation - double mean = std::accumulate(times_ns.begin(), times_ns.end(), 0.0) / runs; - double sq_sum = std::accumulate(times_ns.begin(), times_ns.end(), 0.0, [mean](double acc, long long val) { + double mean = std::accumulate(timesNs.begin(), timesNs.end(), 0.0) / runs_; + double sqSum = std::accumulate(timesNs.begin(), timesNs.end(), 0.0, [mean](double acc, long long val) { double diff = static_cast(val) - mean; return acc + diff * diff; }); - long long stddev = static_cast(std::sqrt(sq_sum / runs)); + long long stddev = static_cast(std::sqrt(sqSum / runs_)); // Quartiles - std::sort(times_ns.begin(), times_ns.end()); - long long q25 = times_ns[runs / 4]; - long long q75 = times_ns[3 * runs / 4]; + std::sort(timesNs.begin(), timesNs.end()); + long long q25 = timesNs[runs_ / 4]; + long long q75 = timesNs[3 * runs_ / 4]; - auto to_str = [](long long value) { + auto toStr = [](long long value) { return std::to_string(value); // no decimal points }; // Permute back if needed - if (mode != NO_PERMUTE) { - sim.permute_x_vector(perm); + if (mode_ != NO_PERMUTE) { + sim.PermuteXVector(perm); } - if (!compare_vectors(L_x_ref, L_x_osp)) { + if (!CompareVectors(lXRef, lXOsp)) { std::cout << "Output is not equal" << std::endl; } - const std::string prefix = mode_tag(mode); - stats[prefix + "SpTrSV_Runtime_Geomean(ns)"] = to_str(geom_mean); - stats[prefix + "SpTrSV_Runtime_Stddev"] = to_str(stddev); - stats[prefix + "SpTrSV_Runtime_Q25(ns)"] = to_str(q25); - stats[prefix + "SpTrSV_Runtime_Q75(ns)"] = to_str(q75); + const std::string prefix = ModeTag(mode_); + stats[prefix + "SpTrSV_Runtime_Geomean(ns)"] = toStr(geomMean); + stats[prefix + "SpTrSV_Runtime_Stddev"] = toStr(stddev); + stats[prefix + "SpTrSV_Runtime_Q25(ns)"] = toStr(q25); + stats[prefix + "SpTrSV_Runtime_Q75(ns)"] = toStr(q75); } else { std::cout << "Simulation is not available without the SparseMatrix type" << std::endl; @@ -190,8 +190,8 @@ class BspSptrsvStatsModule : public IStatisticModule { } private: - SCHEDULE_NODE_PERMUTATION_MODES mode; - static constexpr int runs = 100; // number of runs for benchmarking + ScheduleNodePermutationModes mode_; + static constexpr int runs_ = 100; // number of runs for benchmarking }; } // namespace osp diff --git a/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp b/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp index 09a3953c..6d7e1fe4 100644 --- a/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp @@ -32,21 +32,21 @@ template class GraphStatsModule : public IStatisticModule { public: private: - const std::vector metric_headers = {"num_vertices", "num_edges", "avg_wavefront_size"}; + const std::vector metricHeaders_ = {"num_vertices", "num_edges", "avg_wavefront_size"}; public: - std::vector get_metric_headers() const override { return metric_headers; } + std::vector GetMetricHeaders() const override { return metricHeaders_; } - std::map record_statistics(const TargetObjectType &schedule, - std::ofstream & /*log_stream*/) const override { + std::map RecordStatistics(const TargetObjectType &schedule, + std::ofstream & /*log_stream*/) const override { std::map stats; - const auto &graph = schedule.getInstance().getComputationalDag(); + const auto &graph = schedule.GetInstance().GetComputationalDag(); - stats["num_vertices"] = std::to_string(graph.num_vertices()); - stats["num_edges"] = std::to_string(graph.num_edges()); + stats["num_vertices"] = std::to_string(graph.NumVertices()); + stats["num_edges"] = std::to_string(graph.NumEdges()); stats["avg_wavefront_size"] - = std::to_string(static_cast(graph.num_vertices()) / static_cast(longestPath(graph))); + = std::to_string(static_cast(graph.NumVertices()) / static_cast(LongestPath(graph))); return stats; } }; diff --git a/apps/test_suite_runner/StatsModules/IStatsModule.hpp b/apps/test_suite_runner/StatsModules/IStatsModule.hpp index 46544af7..c9101d10 100644 --- a/apps/test_suite_runner/StatsModules/IStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/IStatsModule.hpp @@ -37,12 +37,12 @@ class IStatisticModule { // Changed from Graph_t_ to TargetObjectType virtual ~IStatisticModule() = default; // Returns a list of column headers this module provides. - virtual std::vector get_metric_headers() const = 0; + virtual std::vector GetMetricHeaders() const = 0; // Called for each generated target_object. // Returns a map of {header_name: value_string}. - virtual std::map record_statistics(const TargetObjectType &target_object, // Changed parameter - std::ofstream &log_stream) const + virtual std::map RecordStatistics(const TargetObjectType &targetObject, // Changed parameter + std::ofstream &logStream) const = 0; }; diff --git a/apps/test_suite_runner/StringToScheduler/get_coarser.hpp b/apps/test_suite_runner/StringToScheduler/get_coarser.hpp index 4020a08e..e0455870 100644 --- a/apps/test_suite_runner/StringToScheduler/get_coarser.hpp +++ b/apps/test_suite_runner/StringToScheduler/get_coarser.hpp @@ -42,257 +42,254 @@ limitations under the License. namespace osp { -template -std::unique_ptr> get_coarser_by_name(const ConfigParser &, - const boost::property_tree::ptree &coarser_algorithm) { - const std::string coarser_name = coarser_algorithm.get_child("name").get_value(); +template +std::unique_ptr> GetCoarserByName(const ConfigParser &, + const boost::property_tree::ptree &coarserAlgorithm) { + const std::string coarserName = coarserAlgorithm.get_child("name").get_value(); - if (coarser_name == "funnel") { - typename FunnelBfs::FunnelBfs_parameters funnel_parameters; - if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { - const auto ¶ms_pt = params_opt.get(); - funnel_parameters.funnel_incoming - = params_pt.get_optional("funnel_incoming").value_or(funnel_parameters.funnel_incoming); - funnel_parameters.use_approx_transitive_reduction = params_pt.get_optional("use_approx_transitive_reduction") - .value_or(funnel_parameters.use_approx_transitive_reduction); + if (coarserName == "funnel") { + typename FunnelBfs::FunnelBfsParameters funnelParameters; + if (auto paramsOpt = coarserAlgorithm.get_child_optional("parameters")) { + const auto ¶msPt = paramsOpt.get(); + funnelParameters.funnelIncoming_ + = paramsPt.get_optional("funnel_incoming").value_or(funnelParameters.funnelIncoming_); + funnelParameters.useApproxTransitiveReduction_ = paramsPt.get_optional("use_approx_transitive_reduction") + .value_or(funnelParameters.useApproxTransitiveReduction_); } - return std::make_unique>(funnel_parameters); + return std::make_unique>(funnelParameters); - } else if (coarser_name == "hdagg") { - auto coarser = std::make_unique>(); - if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { - const auto ¶ms_pt = params_opt.get(); - coarser->set_work_threshold(params_pt.get_optional>("max_work_weight") - .value_or(std::numeric_limits>::max())); - coarser->set_memory_threshold(params_pt.get_optional>("max_memory_weight") - .value_or(std::numeric_limits>::max())); - coarser->set_communication_threshold(params_pt.get_optional>("max_communication_weight") - .value_or(std::numeric_limits>::max())); - coarser->set_super_node_size_threshold( - params_pt.get_optional("max_super_node_size").value_or(std::numeric_limits::max())); + } else if (coarserName == "hdagg") { + auto coarser = std::make_unique>(); + if (auto paramsOpt = coarserAlgorithm.get_child_optional("parameters")) { + const auto ¶msPt = paramsOpt.get(); + coarser->SetWorkThreshold( + paramsPt.get_optional>("max_work_weight").value_or(std::numeric_limits>::max())); + coarser->SetMemoryThreshold( + paramsPt.get_optional>("max_memory_weight").value_or(std::numeric_limits>::max())); + coarser->SetCommunicationThreshold(paramsPt.get_optional>("max_communication_weight") + .value_or(std::numeric_limits>::max())); + coarser->SetSuperNodeSizeThreshold( + paramsPt.get_optional("max_super_node_size").value_or(std::numeric_limits::max())); } return coarser; - } else if (coarser_name == "top_order") { - std::string top_order_strategy = "default"; - if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { - top_order_strategy = params_opt.get().get("strategy", "default"); + } else if (coarserName == "top_order") { + std::string topOrderStrategy = "default"; + if (auto paramsOpt = coarserAlgorithm.get_child_optional("parameters")) { + topOrderStrategy = paramsOpt.get().get("strategy", "default"); } - auto set_params = [&](auto &coarser_ptr) { - if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { - const auto ¶ms_pt = params_opt.get(); - coarser_ptr->set_work_threshold(params_pt.get_optional>("work_threshold") - .value_or(std::numeric_limits>::max())); - coarser_ptr->set_memory_threshold(params_pt.get_optional>("memory_threshold") - .value_or(std::numeric_limits>::max())); - coarser_ptr->set_communication_threshold(params_pt.get_optional>("communication_threshold") - .value_or(std::numeric_limits>::max())); - coarser_ptr->set_super_node_size_threshold( - params_pt.get_optional("super_node_size_threshold").value_or(10)); - coarser_ptr->set_node_dist_threshold(params_pt.get_optional("node_dist_threshold").value_or(10)); + auto setParams = [&](auto &coarserPtr) { + if (auto paramsOpt = coarserAlgorithm.get_child_optional("parameters")) { + const auto ¶msPt = paramsOpt.get(); + coarserPtr->SetWorkThreshold(paramsPt.get_optional>("work_threshold") + .value_or(std::numeric_limits>::max())); + coarserPtr->SetMemoryThreshold(paramsPt.get_optional>("memory_threshold") + .value_or(std::numeric_limits>::max())); + coarserPtr->SetCommunicationThreshold(paramsPt.get_optional>("communication_threshold") + .value_or(std::numeric_limits>::max())); + coarserPtr->SetSuperNodeSizeThreshold(paramsPt.get_optional("super_node_size_threshold").value_or(10)); + coarserPtr->SetNodeDistThreshold(paramsPt.get_optional("node_dist_threshold").value_or(10)); } }; - if (top_order_strategy == "bfs" || top_order_strategy == "default") { - auto coarser = std::make_unique>(); - set_params(coarser); + if (topOrderStrategy == "bfs" || topOrderStrategy == "default") { + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; - } else if (top_order_strategy == "dfs") { - auto coarser = std::make_unique>(); - set_params(coarser); + } else if (topOrderStrategy == "dfs") { + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; - } else if (top_order_strategy == "locality") { - auto coarser = std::make_unique>(); - set_params(coarser); + } else if (topOrderStrategy == "locality") { + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; - } else if (top_order_strategy == "max_children") { - auto coarser = std::make_unique>(); - set_params(coarser); + } else if (topOrderStrategy == "max_children") { + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; - } else if (top_order_strategy == "random") { - auto coarser = std::make_unique>(); - set_params(coarser); + } else if (topOrderStrategy == "random") { + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; - } else if (top_order_strategy == "gorder") { - auto coarser = std::make_unique>(); - set_params(coarser); + } else if (topOrderStrategy == "gorder") { + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; - } else if (top_order_strategy == "cuthill_mckee_wavefront") { - auto coarser = std::make_unique>(); - set_params(coarser); + } else if (topOrderStrategy == "cuthill_mckee_wavefront") { + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; - } else if (top_order_strategy == "cuthill_mckee_undirected") { - auto coarser = std::make_unique>(); - set_params(coarser); + } else if (topOrderStrategy == "cuthill_mckee_undirected") { + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; } else { - std::cerr << "Warning: Unknown top_order strategy '" << top_order_strategy << "'. Falling back to default (bfs)." + std::cerr << "Warning: Unknown top_order strategy '" << topOrderStrategy << "'. Falling back to default (bfs)." << std::endl; - auto coarser = std::make_unique>(); - set_params(coarser); + auto coarser = std::make_unique>(); + setParams(coarser); return coarser; } - } else if (coarser_name == "Sarkar") { - SarkarParams::Parameters> params; - if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { - const auto ¶ms_pt = params_opt.get(); - params.commCost = params_pt.get_optional>("commCost").value_or(params.commCost); - params.maxWeight = params_pt.get_optional>("maxWeight").value_or(params.maxWeight); - params.smallWeightThreshold - = params_pt.get_optional>("smallWeightThreshold").value_or(params.smallWeightThreshold); - params.useTopPoset = params_pt.get_optional("useTopPoset").value_or(params.useTopPoset); - params.geomDecay = params_pt.get_optional("geomDecay").value_or(params.geomDecay); - params.leniency = params_pt.get_optional("leniency").value_or(params.leniency); + } else if (coarserName == "Sarkar") { + sarkar_params::Parameters> params; + if (auto paramsOpt = coarserAlgorithm.get_child_optional("parameters")) { + const auto ¶msPt = paramsOpt.get(); + params.commCost_ = paramsPt.get_optional>("commCost").value_or(params.commCost_); + params.maxWeight_ = paramsPt.get_optional>("maxWeight").value_or(params.maxWeight_); + params.smallWeightThreshold_ + = paramsPt.get_optional>("smallWeightThreshold").value_or(params.smallWeightThreshold_); + params.useTopPoset_ = paramsPt.get_optional("useTopPoset").value_or(params.useTopPoset_); + params.geomDecay_ = paramsPt.get_optional("geomDecay").value_or(params.geomDecay_); + params.leniency_ = paramsPt.get_optional("leniency").value_or(params.leniency_); - if (auto mode_str_opt = params_pt.get_optional("mode")) { - const std::string &mode_str = mode_str_opt.get(); - if (mode_str == "LINES") { - params.mode = SarkarParams::Mode::LINES; - } else if (mode_str == "FAN_IN_FULL") { - params.mode = SarkarParams::Mode::FAN_IN_FULL; - } else if (mode_str == "FAN_IN_PARTIAL") { - params.mode = SarkarParams::Mode::FAN_IN_PARTIAL; - } else if (mode_str == "FAN_OUT_FULL") { - params.mode = SarkarParams::Mode::FAN_OUT_FULL; - } else if (mode_str == "FAN_OUT_PARTIAL") { - params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL; - } else if (mode_str == "LEVEL_EVEN") { - params.mode = SarkarParams::Mode::LEVEL_EVEN; - } else if (mode_str == "LEVEL_ODD") { - params.mode = SarkarParams::Mode::LEVEL_ODD; - } else if (mode_str == "FAN_IN_BUFFER") { - params.mode = SarkarParams::Mode::FAN_IN_BUFFER; - } else if (mode_str == "FAN_OUT_BUFFER") { - params.mode = SarkarParams::Mode::FAN_OUT_BUFFER; - } else if (mode_str == "HOMOGENEOUS_BUFFER") { - params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER; + if (auto modeStrOpt = paramsPt.get_optional("mode")) { + const std::string &modeStr = modeStrOpt.get(); + if (modeStr == "LINES") { + params.mode_ = sarkar_params::Mode::LINES; + } else if (modeStr == "FAN_IN_FULL") { + params.mode_ = sarkar_params::Mode::FAN_IN_FULL; + } else if (modeStr == "FAN_IN_PARTIAL") { + params.mode_ = sarkar_params::Mode::FAN_IN_PARTIAL; + } else if (modeStr == "FAN_OUT_FULL") { + params.mode_ = sarkar_params::Mode::FAN_OUT_FULL; + } else if (modeStr == "FAN_OUT_PARTIAL") { + params.mode_ = sarkar_params::Mode::FAN_OUT_PARTIAL; + } else if (modeStr == "LEVEL_EVEN") { + params.mode_ = sarkar_params::Mode::LEVEL_EVEN; + } else if (modeStr == "LEVEL_ODD") { + params.mode_ = sarkar_params::Mode::LEVEL_ODD; + } else if (modeStr == "FAN_IN_BUFFER") { + params.mode_ = sarkar_params::Mode::FAN_IN_BUFFER; + } else if (modeStr == "FAN_OUT_BUFFER") { + params.mode_ = sarkar_params::Mode::FAN_OUT_BUFFER; + } else if (modeStr == "HOMOGENEOUS_BUFFER") { + params.mode_ = sarkar_params::Mode::HOMOGENEOUS_BUFFER; } else { throw std::invalid_argument( - "Invalid Sarkar mode: " + mode_str + "Invalid Sarkar mode: " + modeStr + "!\nChoose from: LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, " "LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER."); } } } - return std::make_unique>(params); + return std::make_unique>(params); - } else if (coarser_name == "SquashA") { - SquashAParams::Parameters params; - auto coarser = std::make_unique>(params); - if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { - const auto ¶ms_pt = params_opt.get(); - params.use_structured_poset - = params_pt.get_optional("use_structured_poset").value_or(params.use_structured_poset); - params.use_top_poset = params_pt.get_optional("use_top_poset").value_or(params.use_top_poset); - if (auto mode_str_opt = params_pt.get_optional("mode")) { - if (mode_str_opt.get() == "EDGE_WEIGHT") { - params.mode = SquashAParams::Mode::EDGE_WEIGHT; - } else if (mode_str_opt.get() == "TRIANGLES") { - params.mode = SquashAParams::Mode::TRIANGLES; + } else if (coarserName == "SquashA") { + squash_a_params::Parameters params; + auto coarser = std::make_unique>(params); + if (auto paramsOpt = coarserAlgorithm.get_child_optional("parameters")) { + const auto ¶msPt = paramsOpt.get(); + params.useStructuredPoset_ = paramsPt.get_optional("use_structured_poset").value_or(params.useStructuredPoset_); + params.useTopPoset_ = paramsPt.get_optional("use_top_poset").value_or(params.useTopPoset_); + if (auto modeStrOpt = paramsPt.get_optional("mode")) { + if (modeStrOpt.get() == "EDGE_WEIGHT") { + params.mode_ = squash_a_params::Mode::EDGE_WEIGHT; + } else if (modeStrOpt.get() == "TRIANGLES") { + params.mode_ = squash_a_params::Mode::TRIANGLES; } else { - throw std::invalid_argument("Invalid Squash mode: " + mode_str_opt.get() + throw std::invalid_argument("Invalid Squash mode: " + modeStrOpt.get() + "!\nChoose from: EDGE_WEIGHT, TRIANGLES."); } } } - coarser->setParams(params); + coarser->SetParams(params); return coarser; - } else if (coarser_name == "BspScheduleCoarser") { + } else if (coarserName == "BspScheduleCoarser") { // This coarser requires an initial schedule and must be handled specially by the caller. return nullptr; } - throw std::invalid_argument("Invalid coarser name: " + coarser_name); + throw std::invalid_argument("Invalid coarser name: " + coarserName); } -template -std::unique_ptr> get_multilevel_coarser_by_name( - const ConfigParser &, const boost::property_tree::ptree &coarser_algorithm) { - const std::string coarser_name = coarser_algorithm.get_child("name").get_value(); +template +std::unique_ptr> GetMultilevelCoarserByName( + const ConfigParser &, const boost::property_tree::ptree &coarserAlgorithm) { + const std::string coarserName = coarserAlgorithm.get_child("name").get_value(); - if (coarser_name == "Sarkar") { - auto coarser = std::make_unique>(); - SarkarParams::MulParameters> ml_params; + if (coarserName == "Sarkar") { + auto coarser = std::make_unique>(); + sarkar_params::MulParameters> mlParams; - if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { - const auto ¶ms_pt = params_opt.get(); - ml_params.seed = params_pt.get_optional("seed").value_or(ml_params.seed); - ml_params.geomDecay = params_pt.get_optional("geomDecay").value_or(ml_params.geomDecay); - ml_params.leniency = params_pt.get_optional("leniency").value_or(ml_params.leniency); - if (params_pt.get_child_optional("commCostVec")) { - ml_params.commCostVec.clear(); - for (const auto &item : params_pt.get_child("commCostVec")) { - ml_params.commCostVec.push_back(item.second.get_value>()); + if (auto paramsOpt = coarserAlgorithm.get_child_optional("parameters")) { + const auto ¶msPt = paramsOpt.get(); + mlParams.seed_ = paramsPt.get_optional("seed").value_or(mlParams.seed_); + mlParams.geomDecay_ = paramsPt.get_optional("geomDecay").value_or(mlParams.geomDecay_); + mlParams.leniency_ = paramsPt.get_optional("leniency").value_or(mlParams.leniency_); + if (paramsPt.get_child_optional("commCostVec")) { + mlParams.commCostVec_.clear(); + for (const auto &item : paramsPt.get_child("commCostVec")) { + mlParams.commCostVec_.push_back(item.second.get_value>()); } - std::sort(ml_params.commCostVec.begin(), ml_params.commCostVec.end()); + std::sort(mlParams.commCostVec_.begin(), mlParams.commCostVec_.end()); } - ml_params.maxWeight = params_pt.get_optional>("maxWeight").value_or(ml_params.maxWeight); - ml_params.smallWeightThreshold - = params_pt.get_optional>("smallWeightThreshold").value_or(ml_params.smallWeightThreshold); - ml_params.max_num_iteration_without_changes = params_pt.get_optional("max_num_iteration_without_changes") - .value_or(ml_params.max_num_iteration_without_changes); + mlParams.maxWeight_ = paramsPt.get_optional>("maxWeight").value_or(mlParams.maxWeight_); + mlParams.smallWeightThreshold_ + = paramsPt.get_optional>("smallWeightThreshold").value_or(mlParams.smallWeightThreshold_); + mlParams.maxNumIterationWithoutChanges_ = paramsPt.get_optional("max_num_iteration_without_changes") + .value_or(mlParams.maxNumIterationWithoutChanges_); - if (auto mode_str_opt = params_pt.get_optional("buffer_merge_mode")) { - const std::string &mode_str = mode_str_opt.get(); - if (mode_str == "OFF") { - ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::OFF; - } else if (mode_str == "FAN_IN") { - ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_IN; - } else if (mode_str == "FAN_OUT") { - ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_OUT; - } else if (mode_str == "HOMOGENEOUS") { - ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::HOMOGENEOUS; - } else if (mode_str == "FULL") { - ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FULL; + if (auto modeStrOpt = paramsPt.get_optional("buffer_merge_mode")) { + const std::string &modeStr = modeStrOpt.get(); + if (modeStr == "OFF") { + mlParams.bufferMergeMode_ = sarkar_params::BufferMergeMode::OFF; + } else if (modeStr == "FAN_IN") { + mlParams.bufferMergeMode_ = sarkar_params::BufferMergeMode::FAN_IN; + } else if (modeStr == "FAN_OUT") { + mlParams.bufferMergeMode_ = sarkar_params::BufferMergeMode::FAN_OUT; + } else if (modeStr == "HOMOGENEOUS") { + mlParams.bufferMergeMode_ = sarkar_params::BufferMergeMode::HOMOGENEOUS; + } else if (modeStr == "FULL") { + mlParams.bufferMergeMode_ = sarkar_params::BufferMergeMode::FULL; } else { - throw std::invalid_argument("Invalid Sarkar Buffer Merge mode: " + mode_str + throw std::invalid_argument("Invalid Sarkar Buffer Merge mode: " + modeStr + "!\nChoose from: OFF, FAN_IN, FAN_OUT, HOMOGENEOUS, FULL."); } } } - coarser->setParameters(ml_params); + coarser->SetParameters(mlParams); return coarser; - } else if (coarser_name == "SquashA") { - auto coarser = std::make_unique>(); - SquashAParams::Parameters params; + } else if (coarserName == "SquashA") { + auto coarser = std::make_unique>(); + squash_a_params::Parameters params; - if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { - const auto ¶ms_pt = params_opt.get(); - params.geom_decay_num_nodes - = params_pt.get_optional("geom_decay_num_nodes").value_or(params.geom_decay_num_nodes); - params.poisson_par = params_pt.get_optional("poisson_par").value_or(params.poisson_par); - params.noise = params_pt.get_optional("noise").value_or(params.noise); - params.num_rep_without_node_decrease - = params_pt.get_optional("num_rep_without_node_decrease").value_or(params.num_rep_without_node_decrease); - params.temperature_multiplier - = params_pt.get_optional("temperature_multiplier").value_or(params.temperature_multiplier); - params.number_of_temperature_increases = params_pt.get_optional("number_of_temperature_increases") - .value_or(params.number_of_temperature_increases); + if (auto paramsOpt = coarserAlgorithm.get_child_optional("parameters")) { + const auto ¶msPt = paramsOpt.get(); + params.geomDecayNumNodes_ = paramsPt.get_optional("geom_decay_num_nodes").value_or(params.geomDecayNumNodes_); + params.poissonPar_ = paramsPt.get_optional("poisson_par").value_or(params.poissonPar_); + params.noise_ = paramsPt.get_optional("noise").value_or(params.noise_); + params.numRepWithoutNodeDecrease_ + = paramsPt.get_optional("num_rep_without_node_decrease").value_or(params.numRepWithoutNodeDecrease_); + params.temperatureMultiplier_ + = paramsPt.get_optional("temperature_multiplier").value_or(params.temperatureMultiplier_); + params.numberOfTemperatureIncreases_ + = paramsPt.get_optional("number_of_temperature_increases").value_or(params.numberOfTemperatureIncreases_); - if (auto mode_str_opt = params_pt.get_optional("mode")) { - if (mode_str_opt.get() == "EDGE_WEIGHT") { - params.mode = SquashAParams::Mode::EDGE_WEIGHT; - } else if (mode_str_opt.get() == "TRIANGLES") { - params.mode = SquashAParams::Mode::TRIANGLES; + if (auto modeStrOpt = paramsPt.get_optional("mode")) { + if (modeStrOpt.get() == "EDGE_WEIGHT") { + params.mode_ = squash_a_params::Mode::EDGE_WEIGHT; + } else if (modeStrOpt.get() == "TRIANGLES") { + params.mode_ = squash_a_params::Mode::TRIANGLES; } else { - throw std::invalid_argument("Invalid Squash mode: " + mode_str_opt.get() + throw std::invalid_argument("Invalid Squash mode: " + modeStrOpt.get() + "!\nChoose from: EDGE_WEIGHT, TRIANGLES."); } } - coarser->setMinimumNumberVertices(params_pt.get_optional("min_nodes").value_or(1)); + coarser->SetMinimumNumberVertices(paramsPt.get_optional("min_nodes").value_or(1)); } - coarser->setParams(params); + coarser->SetParams(params); return coarser; } - throw std::invalid_argument("Invalid multilevel coarser name: " + coarser_name); + throw std::invalid_argument("Invalid multilevel coarser name: " + coarserName); } } // namespace osp diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp index 3ce4f75a..4d8fa110 100644 --- a/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp +++ b/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp @@ -34,31 +34,31 @@ limitations under the License. namespace osp { -const std::set get_available_bsp_recomp_scheduler_names() { return {"GreedyRecomputer"}; } +const std::set GetAvailableBspRecompSchedulerNames() { return {"GreedyRecomputer"}; } -template -RETURN_STATUS run_bsp_recomp_scheduler(const ConfigParser &parser, - const boost::property_tree::ptree &algorithm, - BspScheduleRecomp &schedule) { +template +ReturnStatus RunBspRecompScheduler(const ConfigParser &parser, + const boost::property_tree::ptree &algorithm, + BspScheduleRecomp &schedule) { // const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value(); // const bool use_memory_constraint = parser.global_params.get_child("use_memory_constraints").get_value(); std::cout << "Running algorithm: " << algorithm.get_child("name").get_value() << std::endl; if (algorithm.get_child("name").get_value() == "GreedyRecomputer") { - BspSchedule bsp_schedule(schedule.getInstance()); + BspSchedule bspSchedule(schedule.GetInstance()); - RETURN_STATUS status = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), bsp_schedule); + ReturnStatus status = RunBspScheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), bspSchedule); - BspScheduleCS initial_schedule(std::move(bsp_schedule)); + BspScheduleCS initialSchedule(std::move(bspSchedule)); - if (status == RETURN_STATUS::ERROR) { - return RETURN_STATUS::ERROR; + if (status == ReturnStatus::ERROR) { + return ReturnStatus::ERROR; } - GreedyRecomputer scheduler; + GreedyRecomputer scheduler; - return scheduler.computeRecompSchedule(initial_schedule, schedule); + return scheduler.ComputeRecompSchedule(initialSchedule, schedule); } else { throw std::invalid_argument("Parameter error: Unknown algorithm.\n"); diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp index 996faa08..067d1360 100644 --- a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp +++ b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp @@ -40,8 +40,8 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp" #include "osp/bsp/scheduler/ImprovementScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp" #include "osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/bsp/scheduler/Serial.hpp" @@ -56,7 +56,7 @@ limitations under the License. namespace osp { -const std::set get_available_bsp_scheduler_names() { +const std::set GetAvailableBspSchedulerNames() { return {"Serial", "GreedyBsp", "GrowLocal", @@ -73,104 +73,102 @@ const std::set get_available_bsp_scheduler_names() { "MultiLevel"}; } -template -std::unique_ptr> get_bsp_improver_by_name(const ConfigParser &, - const boost::property_tree::ptree &algorithm) { - const std::string improver_name = algorithm.get_child("name").get_value(); - - if (improver_name == "kl_total_comm") { - return std::make_unique>(); - } else if (improver_name == "kl_total_lambda_comm") { - return std::make_unique>(); - } else if (improver_name == "hill_climb") { - return std::make_unique>(); +template +std::unique_ptr> GetBspImproverByName(const ConfigParser &, + const boost::property_tree::ptree &algorithm) { + const std::string improverName = algorithm.get_child("name").get_value(); + + if (improverName == "kl_total_comm") { + return std::make_unique>(); + } else if (improverName == "kl_total_lambda_comm") { + return std::make_unique>(); + } else if (improverName == "hill_climb") { + return std::make_unique>(); } - throw std::invalid_argument("Invalid improver name: " + improver_name); + throw std::invalid_argument("Invalid improver name: " + improverName); } -template -std::unique_ptr> get_base_bsp_scheduler_by_name(const ConfigParser &parser, - const boost::property_tree::ptree &algorithm) { +template +std::unique_ptr> GetBaseBspSchedulerByName(const ConfigParser &parser, + const boost::property_tree::ptree &algorithm) { const std::string id = algorithm.get_child("id").get_value(); if (id == "Serial") { - auto scheduler = std::make_unique>(); + auto scheduler = std::make_unique>(); return scheduler; } else if (id == "GreedyBsp") { - float max_percent_idle_processors + float maxPercentIdleProcessors = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); - bool increase_parallelism_in_new_superstep + bool increaseParallelismInNewSuperstep = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); - auto scheduler - = std::make_unique>(max_percent_idle_processors, increase_parallelism_in_new_superstep); + auto scheduler = std::make_unique>(maxPercentIdleProcessors, increaseParallelismInNewSuperstep); return scheduler; } else if (id == "GrowLocal") { - GrowLocalAutoCores_Params> params; - params.minSuperstepSize = algorithm.get_child("parameters").get_child("minSuperstepSize").get_value(); - params.syncCostMultiplierMinSuperstepWeight - = algorithm.get_child("parameters").get_child("syncCostMultiplierMinSuperstepWeight").get_value>(); - params.syncCostMultiplierParallelCheck - = algorithm.get_child("parameters").get_child("syncCostMultiplierParallelCheck").get_value>(); + GrowLocalAutoCoresParams> params; + params.minSuperstepSize_ = algorithm.get_child("parameters").get_child("minSuperstepSize").get_value(); + params.syncCostMultiplierMinSuperstepWeight_ + = algorithm.get_child("parameters").get_child("syncCostMultiplierMinSuperstepWeight").get_value>(); + params.syncCostMultiplierParallelCheck_ + = algorithm.get_child("parameters").get_child("syncCostMultiplierParallelCheck").get_value>(); - return std::make_unique>(params); + return std::make_unique>(params); } else if (id == "BspLocking") { - float max_percent_idle_processors + float maxPercentIdleProcessors = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); - bool increase_parallelism_in_new_superstep + bool increaseParallelismInNewSuperstep = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); - auto scheduler = std::make_unique>(max_percent_idle_processors, increase_parallelism_in_new_superstep); + auto scheduler = std::make_unique>(maxPercentIdleProcessors, increaseParallelismInNewSuperstep); return scheduler; } else if (id == "Cilk") { - auto scheduler = std::make_unique>(); - algorithm.get_child("parameters").get_child("mode").get_value() == "SJF" ? scheduler->setMode(CilkMode::SJF) - : scheduler->setMode(CilkMode::CILK); + auto scheduler = std::make_unique>(); + algorithm.get_child("parameters").get_child("mode").get_value() == "SJF" ? scheduler->SetMode(CilkMode::SJF) + : scheduler->SetMode(CilkMode::CILK); return scheduler; } else if (id == "Etf") { - auto scheduler = std::make_unique>(); + auto scheduler = std::make_unique>(); algorithm.get_child("parameters").get_child("mode").get_value() == "BL_EST" - ? scheduler->setMode(EtfMode::BL_EST) - : scheduler->setMode(EtfMode::ETF); + ? scheduler->SetMode(EtfMode::BL_EST) + : scheduler->SetMode(EtfMode::ETF); return scheduler; } else if (id == "GreedyRandom") { - auto scheduler = std::make_unique>(); + auto scheduler = std::make_unique>(); return scheduler; } else if (id == "GreedyChildren") { - auto scheduler = std::make_unique>(); + auto scheduler = std::make_unique>(); return scheduler; } else if (id == "Variance") { - float max_percent_idle_processors + float maxPercentIdleProcessors = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); - bool increase_parallelism_in_new_superstep + bool increaseParallelismInNewSuperstep = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); - auto scheduler - = std::make_unique>(max_percent_idle_processors, increase_parallelism_in_new_superstep); + auto scheduler = std::make_unique>(maxPercentIdleProcessors, increaseParallelismInNewSuperstep); return scheduler; } - if constexpr (is_constructable_cdag_v || is_direct_constructable_cdag_v) { + if constexpr (isConstructableCdagV || isDirectConstructableCdagV) { if (id == "MultiHC") { - auto scheduler = std::make_unique>(); - const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value(); + auto scheduler = std::make_unique>(); + const unsigned timeLimit = parser.globalParams_.get_child("timeLimit").get_value(); unsigned step = algorithm.get_child("parameters").get_child("hill_climbing_steps").get_value(); - scheduler->setNumberOfHcSteps(step); + scheduler->SetNumberOfHcSteps(step); - const double contraction_rate = algorithm.get_child("parameters").get_child("contraction_rate").get_value(); - scheduler->setContractionRate(contraction_rate); - scheduler->useLinearRefinementSteps(20U); - scheduler->setMinTargetNrOfNodes(100U); + const double contractionRate = algorithm.get_child("parameters").get_child("contraction_rate").get_value(); + scheduler->SetContractionRate(contractionRate); + scheduler->UseLinearRefinementSteps(20U); + scheduler->SetMinTargetNrOfNodes(100U); return scheduler; } } @@ -178,110 +176,105 @@ std::unique_ptr> get_base_bsp_scheduler_by_name(const ConfigP throw std::invalid_argument("Invalid base scheduler name: " + id); } -template -RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, - const boost::property_tree::ptree &algorithm, - BspSchedule &schedule) { - using vertex_type_t_or_default - = std::conditional_t, v_type_t, unsigned>; - using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; - using boost_graph_t - = boost_graph, v_commw_t, v_memw_t, vertex_type_t_or_default, edge_commw_t_or_default>; +template +ReturnStatus RunBspScheduler(const ConfigParser &parser, + const boost::property_tree::ptree &algorithm, + BspSchedule &schedule) { + using VertexTypeTOrDefault = std::conditional_t, VTypeT, unsigned>; + using EdgeCommwTOrDefault = std::conditional_t, ECommwT, VCommwT>; + using BoostGraphT = BoostGraph, VCommwT, VMemwT, VertexTypeTOrDefault, EdgeCommwTOrDefault>; const std::string id = algorithm.get_child("id").get_value(); std::cout << "Running algorithm: " << id << std::endl; if (id == "LocalSearch") { - RETURN_STATUS status = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule); - if (status == RETURN_STATUS::ERROR) { - return RETURN_STATUS::ERROR; + ReturnStatus status = RunBspScheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule); + if (status == ReturnStatus::ERROR) { + return ReturnStatus::ERROR; } - std::unique_ptr> improver - = get_bsp_improver_by_name(parser, algorithm.get_child("parameters").get_child("improver")); - return improver->improveSchedule(schedule); + std::unique_ptr> improver + = GetBspImproverByName(parser, algorithm.get_child("parameters").get_child("improver")); + return improver->ImproveSchedule(schedule); #ifdef COPT } else if (id == "FullILP") { - CoptFullScheduler scheduler; - const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value(); + CoptFullScheduler scheduler; + const unsigned timeLimit = parser.globalParams_.get_child("timeLimit").get_value(); // max supersteps - scheduler.setMaxNumberOfSupersteps( + scheduler.SetMaxNumberOfSupersteps( algorithm.get_child("parameters").get_child("max_number_of_supersteps").get_value()); // initial solution if (algorithm.get_child("parameters").get_child("use_initial_solution").get_value()) { - std::string init_sched + std::string initSched = algorithm.get_child("parameters").get_child("initial_solution_scheduler").get_value(); - if (init_sched == "FullILP") { + if (initSched == "FullILP") { throw std::invalid_argument("Parameter error: Initial solution cannot be FullILP.\n"); } - BspSchedule initial_schedule(schedule.getInstance()); + BspSchedule initialSchedule(schedule.GetInstance()); - RETURN_STATUS status = run_bsp_scheduler( - parser, algorithm.get_child("parameters").get_child("initial_solution_scheduler"), initial_schedule); + ReturnStatus status = RunBspScheduler( + parser, algorithm.get_child("parameters").get_child("initial_solution_scheduler"), initialSchedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) { throw std::invalid_argument("Error while computing initial solution.\n"); } - BspScheduleCS initial_schedule_cs(initial_schedule); - scheduler.setInitialSolutionFromBspSchedule(initial_schedule_cs); + BspScheduleCS initialScheduleCs(initialSchedule); + scheduler.SetInitialSolutionFromBspSchedule(initialScheduleCs); } // intermediate solutions if (algorithm.get_child("parameters").get_child("write_intermediate_solutions").get_value()) { - scheduler.enableWriteIntermediateSol( + scheduler.EnableWriteIntermediateSol( algorithm.get_child("parameters").get_child("intermediate_solutions_directory").get_value(), algorithm.get_child("parameters").get_child("intermediate_solutions_prefix").get_value()); } - return scheduler.computeScheduleWithTimeLimit(schedule, timeLimit); + return scheduler.ComputeScheduleWithTimeLimit(schedule, timeLimit); #endif } else if (id == "Coarser") { - std::unique_ptr> coarser - = get_coarser_by_name(parser, algorithm.get_child("parameters").get_child("coarser")); - const auto &instance = schedule.getInstance(); - BspInstance instance_coarse; - std::vector> reverse_vertex_map; - bool status - = coarser->coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), reverse_vertex_map); + std::unique_ptr> coarser + = GetCoarserByName(parser, algorithm.get_child("parameters").get_child("coarser")); + const auto &instance = schedule.GetInstance(); + BspInstance instanceCoarse; + std::vector> reverseVertexMap; + bool status = coarser->CoarsenDag(instance.GetComputationalDag(), instanceCoarse.GetComputationalDag(), reverseVertexMap); if (!status) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } - instance_coarse.getArchitecture() = instance.getArchitecture(); - instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); - BspSchedule schedule_coarse(instance_coarse); + instanceCoarse.GetArchitecture() = instance.GetArchitecture(); + instanceCoarse.SetNodeProcessorCompatibility(instance.GetProcessorCompatibilityMatrix()); + BspSchedule scheduleCoarse(instanceCoarse); - const auto status_coarse - = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule_coarse); - if (status_coarse != RETURN_STATUS::OSP_SUCCESS and status_coarse != RETURN_STATUS::BEST_FOUND) { - return status_coarse; + const auto statusCoarse = RunBspScheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), scheduleCoarse); + if (statusCoarse != ReturnStatus::OSP_SUCCESS and statusCoarse != ReturnStatus::BEST_FOUND) { + return statusCoarse; } - status = coarser_util::pull_back_schedule(schedule_coarse, reverse_vertex_map, schedule); + status = coarser_util::PullBackSchedule(scheduleCoarse, reverseVertexMap, schedule); if (!status) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } else if (id == "MultiLevel") { - std::unique_ptr> ml_coarser - = get_multilevel_coarser_by_name(parser, - algorithm.get_child("parameters").get_child("coarser")); - std::unique_ptr> improver - = get_bsp_improver_by_name(parser, algorithm.get_child("parameters").get_child("improver")); - std::unique_ptr> scheduler - = get_base_bsp_scheduler_by_name(parser, algorithm.get_child("parameters").get_child("scheduler")); - - MultilevelCoarseAndSchedule coarse_and_schedule(*scheduler, *improver, *ml_coarser); - return coarse_and_schedule.computeSchedule(schedule); + std::unique_ptr> mlCoarser + = GetMultilevelCoarserByName(parser, algorithm.get_child("parameters").get_child("coarser")); + std::unique_ptr> improver + = GetBspImproverByName(parser, algorithm.get_child("parameters").get_child("improver")); + std::unique_ptr> scheduler + = GetBaseBspSchedulerByName(parser, algorithm.get_child("parameters").get_child("scheduler")); + + MultilevelCoarseAndSchedule coarseAndSchedule(*scheduler, *improver, *mlCoarser); + return coarseAndSchedule.ComputeSchedule(schedule); } else { - auto scheduler = get_base_bsp_scheduler_by_name(parser, algorithm); - return scheduler->computeSchedule(schedule); + auto scheduler = GetBaseBspSchedulerByName(parser, algorithm); + return scheduler->ComputeSchedule(schedule); } } diff --git a/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp b/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp index a03bcc67..6179c4cc 100644 --- a/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp +++ b/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp @@ -30,12 +30,12 @@ limitations under the License. namespace osp { -const std::set get_available_pebbler_names() { return {"Pebbler"}; } +const std::set GetAvailablePebblerNames() { return {"Pebbler"}; } -template -RETURN_STATUS run_pebbler(const ConfigParser &parser, - const boost::property_tree::ptree &algorithm, - PebblingSchedule &schedule) { +template +ReturnStatus RunPebbler(const ConfigParser &parser, + const boost::property_tree::ptree &algorithm, + PebblingSchedule &schedule) { // const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value(); // const bool use_memory_constraint = parser.global_params.get_child("use_memory_constraints").get_value(); diff --git a/include/osp/auxiliary/Balanced_Coin_Flips.hpp b/include/osp/auxiliary/Balanced_Coin_Flips.hpp index 44676800..dacbbc19 100644 --- a/include/osp/auxiliary/Balanced_Coin_Flips.hpp +++ b/include/osp/auxiliary/Balanced_Coin_Flips.hpp @@ -26,141 +26,141 @@ limitations under the License. namespace osp { -enum CoinType { Thue_Morse, Biased_Randomly }; +enum CoinType { THUE_MORSE, BIASED_RANDOMLY }; class BalancedCoinFlips { public: /// @brief Returns true/false in a pseudo-random balanced manner /// @return true/false - virtual bool get_flip() = 0; + virtual bool GetFlip() = 0; virtual ~BalancedCoinFlips() = default; }; -class Biased_Random : public BalancedCoinFlips { +class BiasedRandom : public BalancedCoinFlips { public: - bool get_flip() override { - int genuine_random_size = 3; - int die_size = 2 * genuine_random_size + abs(true_bias); - std::uniform_int_distribution distrib(0, die_size - 1); - int flip = distrib(gen); - if (true_bias >= 0) { - if (flip >= genuine_random_size) { - true_bias--; + bool GetFlip() override { + int genuineRandomSize = 3; + int dieSize = 2 * genuineRandomSize + abs(trueBias_); + std::uniform_int_distribution distrib(0, dieSize - 1); + int flip = distrib(gen_); + if (trueBias_ >= 0) { + if (flip >= genuineRandomSize) { + trueBias_--; return true; } else { - true_bias++; + trueBias_++; return false; } } else { - if (flip >= genuine_random_size) { - true_bias++; + if (flip >= genuineRandomSize) { + trueBias_++; return false; } else { - true_bias--; + trueBias_--; return true; } } throw std::runtime_error("Coin landed on its side!"); } - Biased_Random(std::size_t seed = 1729U) : gen(seed), true_bias(0) {}; + BiasedRandom(std::size_t seed = 1729U) : gen_(seed), trueBias_(0) {}; private: /// @brief Random number generator - std::mt19937 gen; + std::mt19937 gen_; /// @brief Biases the coin towards true - int true_bias; + int trueBias_; }; /// @brief Generates the Thue Morse Sequence /// @param shift Starting point in the sequence -class Thue_Morse_Sequence : public BalancedCoinFlips { +class ThueMorseSequence : public BalancedCoinFlips { public: - Thue_Morse_Sequence() { - next = static_cast(randInt(1024)); - sequence.emplace_back(false); + ThueMorseSequence() { + next_ = static_cast(RandInt(1024)); + sequence_.emplace_back(false); } - Thue_Morse_Sequence(long unsigned int shift) : next(shift) { sequence.emplace_back(false); } + ThueMorseSequence(long unsigned int shift) : next_(shift) { sequence_.emplace_back(false); } - bool get_flip() override { - for (long unsigned int i = sequence.size(); i <= next; i++) { + bool GetFlip() override { + for (long unsigned int i = sequence_.size(); i <= next_; i++) { if (i % 2 == 0) { - sequence.emplace_back(sequence[i / 2]); + sequence_.emplace_back(sequence_[i / 2]); } else { - sequence.emplace_back(!sequence[i / 2]); + sequence_.emplace_back(!sequence_[i / 2]); } } - return sequence[next++]; + return sequence_[next_++]; } private: - long unsigned int next; - std::vector sequence; + long unsigned int next_; + std::vector sequence_; }; /// @brief Coin flip with 1/3 chance to return previous toss otherwise fair toss -class Repeat_Chance : public BalancedCoinFlips { +class RepeatChance : public BalancedCoinFlips { public: - bool get_flip() override { - if (randInt(3) > 0) { - previous = (randInt(2) == 0); + bool GetFlip() override { + if (RandInt(3) > 0) { + previous_ = (RandInt(2) == 0); } - return previous; + return previous_; } - Repeat_Chance() { previous = (randInt(2) == 0); }; + RepeatChance() { previous_ = (RandInt(2) == 0); }; private: - bool previous; + bool previous_; }; -class Biased_Random_with_side_bias : public BalancedCoinFlips { +class BiasedRandomWithSideBias : public BalancedCoinFlips { public: - bool get_flip() override { - unsigned genuine_random_size = 3; + bool GetFlip() override { + unsigned genuineRandomSize = 3; - const long long abs_true_bias = std::abs(true_bias); - if (abs_true_bias > std::numeric_limits::max()) { + const long long absTrueBias = std::abs(trueBias_); + if (absTrueBias > std::numeric_limits::max()) { throw std::runtime_error("true_bias is too large!"); } - unsigned die_size = (side_ratio.first + side_ratio.second) * genuine_random_size + static_cast(abs_true_bias); + unsigned dieSize = (sideRatio_.first + sideRatio_.second) * genuineRandomSize + static_cast(absTrueBias); - if (die_size > static_cast(std::numeric_limits::max())) { + if (dieSize > static_cast(std::numeric_limits::max())) { throw std::runtime_error("die_size is too large!"); } - unsigned flip = static_cast(randInt(static_cast(die_size))); - if (true_bias >= 0) { - if (flip >= side_ratio.second * genuine_random_size) { - true_bias -= side_ratio.second; + unsigned flip = static_cast(RandInt(static_cast(dieSize))); + if (trueBias_ >= 0) { + if (flip >= sideRatio_.second * genuineRandomSize) { + trueBias_ -= sideRatio_.second; return true; } else { - true_bias += side_ratio.first; + trueBias_ += sideRatio_.first; return false; } } else { - if (flip >= side_ratio.first * genuine_random_size) { - true_bias += side_ratio.first; + if (flip >= sideRatio_.first * genuineRandomSize) { + trueBias_ += sideRatio_.first; return false; } else { - true_bias -= side_ratio.second; + trueBias_ -= sideRatio_.second; return true; } } throw std::runtime_error("Coin landed on its side!"); } - Biased_Random_with_side_bias(const std::pair side_ratio_ = std::make_pair(1, 1)) - : true_bias(0), side_ratio(side_ratio_) {}; + BiasedRandomWithSideBias(const std::pair sideRatio = std::make_pair(1, 1)) + : trueBias_(0), sideRatio_(sideRatio) {}; private: /// @brief Biases the coin towards true - long long int true_bias; + long long int trueBias_; /// @brief ratio true : false - const std::pair side_ratio; + const std::pair sideRatio_; }; } // namespace osp diff --git a/include/osp/auxiliary/datastructures/bit_mask.hpp b/include/osp/auxiliary/datastructures/bit_mask.hpp index a60c44b9..4526adca 100644 --- a/include/osp/auxiliary/datastructures/bit_mask.hpp +++ b/include/osp/auxiliary/datastructures/bit_mask.hpp @@ -23,16 +23,16 @@ limitations under the License. namespace osp { struct BitMask { - std::vector mask; + std::vector mask_; - BitMask(std::size_t n) : mask(std::vector(n, false)) {}; + BitMask(std::size_t n) : mask_(std::vector(n, false)) {}; BitMask &operator++() { - for (std::size_t i = 0; i < mask.size(); ++i) { - if (mask[i]) { - mask[i] = false; + for (std::size_t i = 0; i < mask_.size(); ++i) { + if (mask_[i]) { + mask_[i] = false; } else { - mask[i] = true; + mask_[i] = true; break; } } diff --git a/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp b/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp index 212c28ab..7b49a9af 100644 --- a/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp +++ b/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp @@ -39,197 +39,197 @@ namespace osp { * @tparam D The number of children for each node (the 'd' in d-ary). Must be >= 2. * @tparam Compare The comparison function object type. */ -template +template class DaryHeap { - static_assert(D >= 2, "D-ary heap must have at least 2 children per node."); + static_assert(d >= 2, "D-ary heap must have at least 2 children per node."); private: struct NodeInfo { - Value value; - size_t position; + Value value_; + size_t position_; }; public: - bool is_empty() const noexcept { return heap.empty(); } + bool IsEmpty() const noexcept { return heap_.empty(); } - size_t size() const noexcept { return heap.size(); } + size_t size() const noexcept { return heap_.size(); } - bool contains(const Key &key) const { return node_info.count(key); } + bool Contains(const Key &key) const { return nodeInfo_.count(key); } - void push(const Key &key, const Value &value) { - // emplace and check for success to avoid a separate lookup with contains() - auto [it, success] = node_info.emplace(key, NodeInfo{value, heap.size()}); + void Push(const Key &key, const Value &value) { + // emplace and check for success to avoid a separate lookup with Contains() + auto [it, success] = nodeInfo_.emplace(key, NodeInfo{value, heap_.size()}); if (!success) { throw std::invalid_argument("Key already exists in the heap."); } - heap.push_back(key); - sift_up(it->second.position); + heap_.push_back(key); + SiftUp(it->second.position_); } - const Key &top() const { - if (is_empty()) { + const Key &Top() const { + if (IsEmpty()) { throw std::out_of_range("Heap is empty."); } - return heap.front(); + return heap_.front(); } - Key pop() { - if (is_empty()) { + Key Pop() { + if (IsEmpty()) { throw std::out_of_range("Heap is empty."); } - Key top_key = std::move(heap.front()); + Key topKey = std::move(heap_.front()); - node_info.erase(top_key); + nodeInfo_.erase(topKey); - if (heap.size() > 1) { - heap[0] = std::move(heap.back()); - heap.pop_back(); - node_info.at(heap[0]).position = 0; - sift_down(0); + if (heap_.size() > 1) { + heap_[0] = std::move(heap_.back()); + heap_.pop_back(); + nodeInfo_.at(heap_[0]).position_ = 0; + SiftDown(0); } else { - heap.pop_back(); + heap_.pop_back(); } - return top_key; + return topKey; } - void update(const Key &key, const Value &new_value) { - auto it = node_info.find(key); - if (it == node_info.end()) { + void Update(const Key &key, const Value &newValue) { + auto it = nodeInfo_.find(key); + if (it == nodeInfo_.end()) { throw std::invalid_argument("Key does not exist in the heap."); } auto &info = it->second; - const Value old_value = info.value; - - if (comp(new_value, old_value)) { - info.value = new_value; - sift_up(info.position); - } else if (comp(old_value, new_value)) { - info.value = new_value; - sift_down(info.position); + const Value oldValue = info.value_; + + if (comp_(newValue, oldValue)) { + info.value_ = newValue; + SiftUp(info.position_); + } else if (comp_(oldValue, newValue)) { + info.value_ = newValue; + SiftDown(info.position_); } } - void erase(const Key &key) { - auto it = node_info.find(key); - if (it == node_info.end()) { + void Erase(const Key &key) { + auto it = nodeInfo_.find(key); + if (it == nodeInfo_.end()) { throw std::invalid_argument("Key does not exist in the heap."); } - size_t index = it->second.position; - size_t last_index = heap.size() - 1; + size_t index = it->second.position_; + size_t lastIndex = heap_.size() - 1; - if (index != last_index) { - swap_nodes(index, last_index); - heap.pop_back(); - node_info.erase(it); + if (index != lastIndex) { + SwapNodes(index, lastIndex); + heap_.pop_back(); + nodeInfo_.erase(it); - const Key &moved_key = heap[index]; - if (index > 0 && comp(node_info.at(moved_key).value, node_info.at(heap[parent(index)]).value)) { - sift_up(index); + const Key &movedKey = heap_[index]; + if (index > 0 && comp_(nodeInfo_.at(movedKey).value_, nodeInfo_.at(heap_[Parent(index)]).value_)) { + SiftUp(index); } else { - sift_down(index); + SiftDown(index); } } else { - heap.pop_back(); - node_info.erase(it); + heap_.pop_back(); + nodeInfo_.erase(it); } } - const Value &get_value(const Key &key) const { - auto it = node_info.find(key); - if (it == node_info.end()) { + const Value &GetValue(const Key &key) const { + auto it = nodeInfo_.find(key); + if (it == nodeInfo_.end()) { throw std::out_of_range("Key does not exist in the heap."); } - return it->second.value; + return it->second.value_; } /** * @brief Removes all elements from the heap. */ - void clear() noexcept { - heap.clear(); - node_info.clear(); + void Clear() noexcept { + heap_.clear(); + nodeInfo_.clear(); } private: - std::vector heap; - std::unordered_map node_info; - Compare comp; + std::vector heap_; + std::unordered_map nodeInfo_; + Compare comp_; - inline size_t parent(size_t i) const noexcept { return (i - 1) / D; } + inline size_t Parent(size_t i) const noexcept { return (i - 1) / d; } - inline size_t first_child(size_t i) const noexcept { return D * i + 1; } + inline size_t FirstChild(size_t i) const noexcept { return d * i + 1; } - inline void swap_nodes(size_t i, size_t j) { - node_info.at(heap[i]).position = j; - node_info.at(heap[j]).position = i; - std::swap(heap[i], heap[j]); + inline void SwapNodes(size_t i, size_t j) { + nodeInfo_.at(heap_[i]).position_ = j; + nodeInfo_.at(heap_[j]).position_ = i; + std::swap(heap_[i], heap_[j]); } - void sift_up(size_t index) { + void SiftUp(size_t index) { if (index == 0) { return; } - Key key_to_sift = std::move(heap[index]); - const Value &value_to_sift = node_info.at(key_to_sift).value; + Key keyToSift = std::move(heap_[index]); + const Value &valueToSift = nodeInfo_.at(keyToSift).value_; while (index > 0) { - size_t p_idx = parent(index); - if (comp(value_to_sift, node_info.at(heap[p_idx]).value)) { - heap[index] = std::move(heap[p_idx]); - node_info.at(heap[index]).position = index; - index = p_idx; + size_t pIdx = Parent(index); + if (comp_(valueToSift, nodeInfo_.at(heap_[pIdx]).value_)) { + heap_[index] = std::move(heap_[pIdx]); + nodeInfo_.at(heap_[index]).position_ = index; + index = pIdx; } else { break; } } - heap[index] = std::move(key_to_sift); - node_info.at(heap[index]).position = index; + heap_[index] = std::move(keyToSift); + nodeInfo_.at(heap_[index]).position_ = index; } - void sift_down(size_t index) { - Key key_to_sift = std::move(heap[index]); - const Value &value_to_sift = node_info.at(key_to_sift).value; - size_t size = heap.size(); + void SiftDown(size_t index) { + Key keyToSift = std::move(heap_[index]); + const Value &valueToSift = nodeInfo_.at(keyToSift).value_; + size_t size = heap_.size(); - while (first_child(index) < size) { - size_t best_child_idx = first_child(index); - const size_t last_child_idx = std::min(best_child_idx + D, size); + while (FirstChild(index) < size) { + size_t bestChildIdx = FirstChild(index); + const size_t lastChildIdx = std::min(bestChildIdx + d, size); // Find the best child among the D children - const Value *best_child_value = &node_info.at(heap[best_child_idx]).value; - for (size_t i = best_child_idx + 1; i < last_child_idx; ++i) { - const Value ¤t_child_value = node_info.at(heap[i]).value; - if (comp(current_child_value, *best_child_value)) { - best_child_idx = i; - best_child_value = ¤t_child_value; + const Value *bestChildValue = &nodeInfo_.at(heap_[bestChildIdx]).value_; + for (size_t i = bestChildIdx + 1; i < lastChildIdx; ++i) { + const Value ¤tChildValue = nodeInfo_.at(heap_[i]).value_; + if (comp_(currentChildValue, *bestChildValue)) { + bestChildIdx = i; + bestChildValue = ¤tChildValue; } } // After finding the best child, compare with the sifting element - if (comp(value_to_sift, *best_child_value)) { + if (comp_(valueToSift, *bestChildValue)) { break; } // Move hole down - heap[index] = std::move(heap[best_child_idx]); - node_info.at(heap[index]).position = index; - index = best_child_idx; + heap_[index] = std::move(heap_[bestChildIdx]); + nodeInfo_.at(heap_[index]).position_ = index; + index = bestChildIdx; } - heap[index] = std::move(key_to_sift); - node_info.at(heap[index]).position = index; + heap_[index] = std::move(keyToSift); + nodeInfo_.at(heap_[index]).position_ = index; } }; -template -using MaxDaryHeap = DaryHeap>; +template +using MaxDaryHeap = DaryHeap>; -template -using MinDaryHeap = DaryHeap>; +template +using MinDaryHeap = DaryHeap>; template using IndexedHeap = DaryHeap; diff --git a/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp b/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp index fe505ab1..b903e8fb 100644 --- a/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp +++ b/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp @@ -30,20 +30,20 @@ template class PairingHeap { private: struct Node { - Key key; - Value value; - Node *child = nullptr; // Leftmost child - Node *next_sibling = nullptr; // Sibling to the right - Node *prev_or_parent = nullptr; // If leftmost child, parent; otherwise, left sibling. + Key key_; + Value value_; + Node *child_ = nullptr; // Leftmost child + Node *nextSibling_ = nullptr; // Sibling to the right + Node *prevOrParent_ = nullptr; // If leftmost child, parent; otherwise, left sibling. }; - Node *root = nullptr; - std::unordered_map node_map; - size_t num_elements = 0; - Compare comp; + Node *root_ = nullptr; + std::unordered_map nodeMap_; + size_t numElements_ = 0; + Compare comp_; // Melds two heaps together. - Node *meld(Node *heap1, Node *heap2) { + Node *Meld(Node *heap1, Node *heap2) { if (!heap1) { return heap2; } @@ -51,129 +51,129 @@ class PairingHeap { return heap1; } - if (comp(heap2->value, heap1->value)) { + if (comp_(heap2->value_, heap1->value_)) { std::swap(heap1, heap2); } // heap2 becomes the new leftmost child of heap1 - heap2->next_sibling = heap1->child; - if (heap1->child) { - heap1->child->prev_or_parent = heap2; + heap2->nextSibling_ = heap1->child_; + if (heap1->child_) { + heap1->child_->prevOrParent_ = heap2; } - heap1->child = heap2; - heap2->prev_or_parent = heap1; + heap1->child_ = heap2; + heap2->prevOrParent_ = heap1; return heap1; } // Merges a list of sibling heaps using a two-pass strategy. - Node *multipass_merge(Node *first_sibling) { - if (!first_sibling) { + Node *MultipassMerge(Node *firstSibling) { + if (!firstSibling) { return nullptr; } - std::vector heap_list; - Node *current = first_sibling; + std::vector heapList; + Node *current = firstSibling; while (current) { - Node *next = current->next_sibling; - current->next_sibling = nullptr; - current->prev_or_parent = nullptr; - heap_list.push_back(current); + Node *next = current->nextSibling_; + current->nextSibling_ = nullptr; + current->prevOrParent_ = nullptr; + heapList.push_back(current); current = next; } - if (heap_list.size() <= 1) { - return heap_list.empty() ? nullptr : heap_list[0]; + if (heapList.size() <= 1) { + return heapList.empty() ? nullptr : heapList[0]; } // Merge pairs from left to right - std::vector merged_heaps; - merged_heaps.reserve((heap_list.size() + 1) / 2); - for (size_t i = 0; i + 1 < heap_list.size(); i += 2) { - merged_heaps.push_back(meld(heap_list[i], heap_list[i + 1])); + std::vector mergedHeaps; + mergedHeaps.reserve((heapList.size() + 1) / 2); + for (size_t i = 0; i + 1 < heapList.size(); i += 2) { + mergedHeaps.push_back(Meld(heapList[i], heapList[i + 1])); } - if (heap_list.size() % 2 == 1) { - merged_heaps.push_back(heap_list.back()); + if (heapList.size() % 2 == 1) { + mergedHeaps.push_back(heapList.back()); } // Merge resulting heaps from right to left - Node *final_heap = merged_heaps.back(); - for (auto it = merged_heaps.rbegin() + 1; it != merged_heaps.rend(); ++it) { - final_heap = meld(final_heap, *it); + Node *finalHeap = mergedHeaps.back(); + for (auto it = mergedHeaps.rbegin() + 1; it != mergedHeaps.rend(); ++it) { + finalHeap = Meld(finalHeap, *it); } - return final_heap; + return finalHeap; } // Cuts a node from its parent and siblings. - void cut(Node *node) { - if (node == root) { + void Cut(Node *node) { + if (node == root_) { return; } - if (node->prev_or_parent->child == node) { // is leftmost child - node->prev_or_parent->child = node->next_sibling; + if (node->prevOrParent_->child_ == node) { // is leftmost child + node->prevOrParent_->child_ = node->nextSibling_; } else { // is not leftmost child - node->prev_or_parent->next_sibling = node->next_sibling; + node->prevOrParent_->nextSibling_ = node->nextSibling_; } - if (node->next_sibling) { - node->next_sibling->prev_or_parent = node->prev_or_parent; + if (node->nextSibling_) { + node->nextSibling_->prevOrParent_ = node->prevOrParent_; } - node->next_sibling = nullptr; - node->prev_or_parent = nullptr; + node->nextSibling_ = nullptr; + node->prevOrParent_ = nullptr; } public: PairingHeap() = default; - ~PairingHeap() { clear(); } + ~PairingHeap() { Clear(); } - PairingHeap(const PairingHeap &other) : num_elements(other.num_elements), comp(other.comp) { - root = nullptr; - if (!other.root) { + PairingHeap(const PairingHeap &other) : numElements_(other.numElements_), comp_(other.comp_) { + root_ = nullptr; + if (!other.root_) { return; } - std::unordered_map old_to_new; + std::unordered_map oldToNew; std::vector q; - q.reserve(other.num_elements); + q.reserve(other.numElements_); // Create root - root = new Node{other.root->key, other.root->value}; - node_map[root->key] = root; - old_to_new[other.root] = root; - q.push_back(other.root); + root_ = new Node{other.root_->key_, other.root_->value_}; + nodeMap_[root_->key_] = root_; + oldToNew[other.root_] = root_; + q.push_back(other.root_); size_t head = 0; while (head < q.size()) { - const Node *old_parent = q[head++]; - Node *new_parent = old_to_new[old_parent]; + const Node *oldParent = q[head++]; + Node *newParent = oldToNew[oldParent]; - if (old_parent->child) { - const Node *old_child = old_parent->child; + if (oldParent->child_) { + const Node *oldChild = oldParent->child_; // First child - Node *new_child = new Node{old_child->key, old_child->value}; - new_parent->child = new_child; - new_child->prev_or_parent = new_parent; - node_map[new_child->key] = new_child; - old_to_new[old_child] = new_child; - q.push_back(old_child); + Node *newChild = new Node{oldChild->key_, oldChild->value_}; + newParent->child_ = newChild; + newChild->prevOrParent_ = newParent; + nodeMap_[newChild->key_] = newChild; + oldToNew[oldChild] = newChild; + q.push_back(oldChild); // Siblings - Node *prev_new_sibling = new_child; - while (old_child->next_sibling) { - old_child = old_child->next_sibling; - new_child = new Node{old_child->key, old_child->value}; + Node *prevNewSibling = newChild; + while (oldChild->nextSibling_) { + oldChild = oldChild->nextSibling_; + newChild = new Node{oldChild->key_, oldChild->value_}; - prev_new_sibling->next_sibling = new_child; - new_child->prev_or_parent = prev_new_sibling; + prevNewSibling->nextSibling_ = newChild; + newChild->prevOrParent_ = prevNewSibling; - node_map[new_child->key] = new_child; - old_to_new[old_child] = new_child; - q.push_back(old_child); + nodeMap_[newChild->key_] = newChild; + oldToNew[oldChild] = newChild; + q.push_back(oldChild); - prev_new_sibling = new_child; + prevNewSibling = newChild; } } } @@ -182,10 +182,10 @@ class PairingHeap { PairingHeap &operator=(const PairingHeap &other) { if (this != &other) { PairingHeap temp(other); - std::swap(root, temp.root); - std::swap(node_map, temp.node_map); - std::swap(num_elements, temp.num_elements); - std::swap(comp, temp.comp); + std::swap(root_, temp.root_); + std::swap(nodeMap_, temp.nodeMap_); + std::swap(numElements_, temp.numElements_); + std::swap(comp_, temp.comp_); } return *this; } @@ -194,194 +194,194 @@ class PairingHeap { PairingHeap &operator=(PairingHeap &&) = default; // Checks if the heap is empty. - bool is_empty() const { return root == nullptr; } + bool IsEmpty() const { return root_ == nullptr; } // Returns the number of elements in the heap. - size_t size() const { return num_elements; } + size_t size() const { return numElements_; } // Checks if a key exists in the heap. - bool contains(const Key &key) const { return node_map.count(key); } + bool Contains(const Key &key) const { return nodeMap_.count(key); } // Inserts a new key-value pair into the heap. - void push(const Key &key, const Value &value) { - Node *new_node = new Node{key, value}; + void Push(const Key &key, const Value &value) { + Node *newNode = new Node{key, value}; // emplace and check for success to avoid a separate lookup with contains() - const auto pair = node_map.emplace(key, new_node); + const auto pair = nodeMap_.emplace(key, newNode); const bool &success = pair.second; if (!success) { - delete new_node; // Avoid memory leak if key already exists + delete newNode; // Avoid memory leak if key already exists throw std::invalid_argument("Key already exists in the heap."); } - root = meld(root, new_node); - num_elements++; + root_ = Meld(root_, newNode); + numElements_++; } // Returns the key with the minimum value without removing it. - const Key &top() const { - if (is_empty()) { + const Key &Top() const { + if (IsEmpty()) { throw std::out_of_range("Heap is empty."); } - return root->key; + return root_->key_; } // Removes and returns the key with the minimum value. - Key pop() { - if (is_empty()) { + Key Pop() { + if (IsEmpty()) { throw std::out_of_range("Heap is empty."); } - Node *old_root = root; - Key top_key = old_root->key; + Node *oldRoot = root_; + Key topKey = oldRoot->key_; - root = multipass_merge(old_root->child); + root_ = MultipassMerge(oldRoot->child_); - node_map.erase(top_key); - delete old_root; - num_elements--; + nodeMap_.erase(topKey); + delete oldRoot; + numElements_--; - return top_key; + return topKey; } // Updates the value of an existing key. - void update(const Key &key, const Value &new_value) { - auto it = node_map.find(key); - if (it == node_map.end()) { + void Update(const Key &key, const Value &newValue) { + auto it = nodeMap_.find(key); + if (it == nodeMap_.end()) { throw std::invalid_argument("Key does not exist in the heap."); } Node *node = it->second; - const Value old_value = node->value; + const Value oldValue = node->value_; - if (comp(new_value, old_value)) { // Decrease key - node->value = new_value; - if (node != root) { - cut(node); - root = meld(root, node); + if (comp_(newValue, oldValue)) { // Decrease key + node->value_ = newValue; + if (node != root_) { + Cut(node); + root_ = Meld(root_, node); } - } else if (comp(old_value, new_value)) { // Increase key - node->value = new_value; - if (node != root) { - cut(node); - if (node->child) { - root = meld(root, multipass_merge(node->child)); - node->child = nullptr; + } else if (comp_(oldValue, newValue)) { // Increase key + node->value_ = newValue; + if (node != root_) { + Cut(node); + if (node->child_) { + root_ = Meld(root_, MultipassMerge(node->child_)); + node->child_ = nullptr; } - root = meld(root, node); + root_ = Meld(root_, node); } else { // The root's value increased, it might not be the minimum anymore. // We can treat it as if we popped it and re-inserted it, without the delete/new. - Node *old_root = root; - root = multipass_merge(old_root->child); - old_root->child = nullptr; - root = meld(root, old_root); + Node *oldRoot = root_; + root_ = MultipassMerge(oldRoot->child_); + oldRoot->child_ = nullptr; + root_ = Meld(root_, oldRoot); } } else { - node->value = new_value; + node->value_ = newValue; } // If values are equal, do nothing. } // Removes an arbitrary key from the heap. - void erase(const Key &key) { - auto it = node_map.find(key); - if (it == node_map.end()) { + void Erase(const Key &key) { + auto it = nodeMap_.find(key); + if (it == nodeMap_.end()) { throw std::invalid_argument("Key does not exist in the heap."); } - Node *node_to_erase = it->second; + Node *nodeToErase = it->second; - if (node_to_erase == root) { - pop(); + if (nodeToErase == root_) { + Pop(); return; } - cut(node_to_erase); + Cut(nodeToErase); // Merge its children into the main heap - if (node_to_erase->child) { - root = meld(root, multipass_merge(node_to_erase->child)); - node_to_erase->child = nullptr; + if (nodeToErase->child_) { + root_ = Meld(root_, MultipassMerge(nodeToErase->child_)); + nodeToErase->child_ = nullptr; } - node_map.erase(key); - delete node_to_erase; - num_elements--; + nodeMap_.erase(key); + delete nodeToErase; + numElements_--; } // Gets the value for a given key. - const Value &get_value(const Key &key) const { - auto it = node_map.find(key); - if (it == node_map.end()) { + const Value &GetValue(const Key &key) const { + auto it = nodeMap_.find(key); + if (it == nodeMap_.end()) { throw std::out_of_range("Key does not exist in the heap."); } - return it->second->value; + return it->second->value_; } // Removes all elements from the heap. - void clear() { - if (!root) { + void Clear() { + if (!root_) { return; } // Iterative post-order traversal to delete all nodes - std::vector to_visit; - if (num_elements > 0) { - to_visit.reserve(num_elements); + std::vector toVisit; + if (numElements_ > 0) { + toVisit.reserve(numElements_); } - to_visit.push_back(root); + toVisit.push_back(root_); - while (!to_visit.empty()) { - Node *current = to_visit.back(); - to_visit.pop_back(); + while (!toVisit.empty()) { + Node *current = toVisit.back(); + toVisit.pop_back(); - Node *child = current->child; + Node *child = current->child_; while (child) { - to_visit.push_back(child); - child = child->next_sibling; + toVisit.push_back(child); + child = child->nextSibling_; } delete current; } - root = nullptr; - node_map.clear(); - num_elements = 0; + root_ = nullptr; + nodeMap_.clear(); + numElements_ = 0; } // Retrieves keys with the top value, up to a specified limit. // If limit is 0, all keys with the top value are returned. - std::vector get_top_keys(size_t limit = 0) const { - std::vector top_keys; - if (is_empty()) { - return top_keys; + std::vector GetTopKeys(size_t limit = 0) const { + std::vector topKeys; + if (IsEmpty()) { + return topKeys; } if (limit > 0) { - top_keys.reserve(limit); + topKeys.reserve(limit); } - const Value &top_value = root->value; + const Value &topValue = root_->value_; std::vector q; - q.push_back(root); + q.push_back(root_); size_t head = 0; while (head < q.size()) { const Node *current = q[head++]; - if (comp(top_value, current->value)) { + if (comp_(topValue, current->value_)) { continue; } - top_keys.push_back(current->key); - if (limit > 0 && top_keys.size() >= limit) { - return top_keys; + topKeys.push_back(current->key_); + if (limit > 0 && topKeys.size() >= limit) { + return topKeys; } - Node *child = current->child; + Node *child = current->child_; while (child) { q.push_back(child); - child = child->next_sibling; + child = child->nextSibling_; } } - return top_keys; + return topKeys; } }; diff --git a/include/osp/auxiliary/datastructures/union_find.hpp b/include/osp/auxiliary/datastructures/union_find.hpp index f955eddc..6de0bda9 100644 --- a/include/osp/auxiliary/datastructures/union_find.hpp +++ b/include/osp/auxiliary/datastructures/union_find.hpp @@ -29,308 +29,308 @@ limitations under the License. namespace osp { /// @brief Structure to execute a union-find algorithm -template -struct union_find_object { - const T name; // unique identifier - index_t parent_index; - unsigned rank; - workw_t weight; - memw_t memory; - - explicit union_find_object(const T &name_, index_t parent_index_, workw_t weight_ = 0, memw_t memory_ = 0) - : name(name_), parent_index(parent_index_), weight(weight_), memory(memory_) { - rank = 1; +template +struct UnionFindObject { + const T name_; // unique identifier + IndexT parentIndex_; + unsigned rank_; + WorkwT weight_; + MemwT memory_; + + explicit UnionFindObject(const T &name, IndexT parentIndex, WorkwT weight = 0, MemwT memory = 0) + : name_(name), parentIndex_(parentIndex), weight_(weight), memory_(memory) { + rank_ = 1; } - union_find_object(const union_find_object &other) = default; - union_find_object &operator=(const union_find_object &other) = default; + UnionFindObject(const UnionFindObject &other) = default; + UnionFindObject &operator=(const UnionFindObject &other) = default; }; /// @brief Class to execute a union-find algorithm -template -class Union_Find_Universe { +template +class UnionFindUniverse { private: - std::vector> universe; - std::unordered_map names_to_indices; - std::set component_indices; - - index_t find_origin(index_t index) { - while (index != universe[index].parent_index) { - universe[index].parent_index = universe[universe[index].parent_index].parent_index; - index = universe[index].parent_index; + std::vector> universe_; + std::unordered_map namesToIndices_; + std::set componentIndices_; + + IndexT FindOrigin(IndexT index) { + while (index != universe_[index].parentIndex_) { + universe_[index].parentIndex_ = universe_[universe_[index].parentIndex_].parentIndex_; + index = universe_[index].parentIndex_; } return index; } - int join(index_t index, index_t other_index) { - index = find_origin(index); - other_index = find_origin(other_index); + int Join(IndexT index, IndexT otherIndex) { + index = FindOrigin(index); + otherIndex = FindOrigin(otherIndex); - if (index == other_index) { + if (index == otherIndex) { return 0; } - if (universe[index].rank >= universe[other_index].rank) { - universe[other_index].parent_index = index; - universe[index].weight += universe[other_index].weight; - universe[index].memory += universe[other_index].memory; - component_indices.erase(other_index); + if (universe_[index].rank_ >= universe_[otherIndex].rank_) { + universe_[otherIndex].parentIndex_ = index; + universe_[index].weight_ += universe_[otherIndex].weight_; + universe_[index].memory_ += universe_[otherIndex].memory_; + componentIndices_.erase(otherIndex); - if (universe[index].rank == universe[other_index].rank) { - universe[index].rank++; + if (universe_[index].rank_ == universe_[otherIndex].rank_) { + universe_[index].rank_++; } } else { - universe[index].parent_index = other_index; - universe[other_index].weight += universe[index].weight; - universe[other_index].memory += universe[index].memory; - component_indices.erase(index); + universe_[index].parentIndex_ = otherIndex; + universe_[otherIndex].weight_ += universe_[index].weight_; + universe_[otherIndex].memory_ += universe_[index].memory_; + componentIndices_.erase(index); } return -1; } - index_t get_index_from_name(const T &name) const { return names_to_indices.at(name); } + IndexT GetIndexFromName(const T &name) const { return namesToIndices_.at(name); } public: - void reset() { - universe.clear(); - names_to_indices.clear(); - component_indices.clear(); + void Reset() { + universe_.clear(); + namesToIndices_.clear(); + componentIndices_.clear(); } - bool is_in_universe(const T &name) const { return names_to_indices.find(name) != names_to_indices.end(); } + bool IsInUniverse(const T &name) const { return namesToIndices_.find(name) != namesToIndices_.end(); } /// @brief Loops till object is its own parent /// @param name of object /// @return returns (current) name of component - T find_origin_by_name(const T &name) { return universe[find_origin(names_to_indices.at(name))].name; } + T FindOriginByName(const T &name) { return universe_[FindOrigin(namesToIndices_.at(name))].name_; } /// @brief Joins two components /// @param name of object to join /// @param other_name of object to join - void join_by_name(const T &name, const T &other_name) { join(names_to_indices.at(name), names_to_indices.at(other_name)); } + void JoinByName(const T &name, const T &otherName) { Join(namesToIndices_.at(name), namesToIndices_.at(otherName)); } /// @brief Retrieves the current number of connected components - std::size_t get_number_of_connected_components() const { return component_indices.size(); } + std::size_t GetNumberOfConnectedComponents() const { return componentIndices_.size(); } /// @brief Retrieves the (current) names of components - std::vector get_component_names() const { - std::vector component_names; - component_names.reserve(component_indices.size()); - for (auto &indx : component_indices) { - component_names.emplace_back(universe[indx].name); + std::vector GetComponentNames() const { + std::vector componentNames; + componentNames.reserve(componentIndices_.size()); + for (auto &indx : componentIndices_) { + componentNames.emplace_back(universe_[indx].name_); } - return component_names; + return componentNames; } /// @brief Retrieves the (current) names of components together with their weight - std::vector> get_component_names_and_weights() const { - std::vector> component_names_and_weights; - component_names_and_weights.reserve(component_indices.size()); - for (auto &indx : component_indices) { - component_names_and_weights.emplace_back({universe[indx].name, universe[indx].weight}); + std::vector> GetComponentNamesAndWeights() const { + std::vector> componentNamesAndWeights; + componentNamesAndWeights.reserve(componentIndices_.size()); + for (auto &indx : componentIndices_) { + componentNamesAndWeights.emplace_back({universe_[indx].name_, universe_[indx].weight}); } - return component_names_and_weights; + return componentNamesAndWeights; } /// @brief Retrieves the (current) names of components together with their weight and memory - std::vector> get_component_names_weights_and_memory() const { - std::vector> component_names_weights_and_memory; - component_names_weights_and_memory.reserve(component_indices.size()); - for (auto &indx : component_indices) { - component_names_weights_and_memory.emplace_back({universe[indx].name, universe[indx].weight, universe[indx].memory}); + std::vector> GetComponentNamesWeightsAndMemory() const { + std::vector> componentNamesWeightsAndMemory; + componentNamesWeightsAndMemory.reserve(componentIndices_.size()); + for (auto &indx : componentIndices_) { + componentNamesWeightsAndMemory.emplace_back({universe_[indx].name_, universe_[indx].weight, universe_[indx].memory}); } - return component_names_weights_and_memory; + return componentNamesWeightsAndMemory; } /// @brief Retrieves the weight of the component containing the given object /// @param name of object - workw_t get_weight_of_component_by_name(const T &name) { - index_t index = get_index_from_name(name); - index = find_origin(index); - return universe[index].weight; + WorkwT GetWeightOfComponentByName(const T &name) { + IndexT index = GetIndexFromName(name); + index = FindOrigin(index); + return universe_[index].weight_; } /// @brief Retrieves the memory of the component containing the given object /// @param name of object - memw_t get_memory_of_component_by_name(const T &name) { - index_t index = get_index_from_name(name); - index = find_origin(index); - return universe[index].memory; + MemwT GetMemoryOfComponentByName(const T &name) { + IndexT index = GetIndexFromName(name); + index = FindOrigin(index); + return universe_[index].memory_; } /// @brief Retrieves the connected components /// @return Partition of the names of objects according to the connected components - std::vector> get_connected_components() { - std::vector> connected_components_by_index; - connected_components_by_index.resize(universe.size()); - for (index_t i = 0; i < static_cast(universe.size()); i++) { - connected_components_by_index[find_origin(i)].emplace_back(i); + std::vector> GetConnectedComponents() { + std::vector> connectedComponentsByIndex; + connectedComponentsByIndex.resize(universe_.size()); + for (IndexT i = 0; i < static_cast(universe_.size()); i++) { + connectedComponentsByIndex[FindOrigin(i)].emplace_back(i); } - std::vector> connected_components_by_name; - for (auto &comp : connected_components_by_index) { + std::vector> connectedComponentsByName; + for (auto &comp : connectedComponentsByIndex) { if (comp.empty()) { continue; } - std::vector names_in_comp; - names_in_comp.reserve(comp.size()); + std::vector namesInComp; + namesInComp.reserve(comp.size()); for (const auto &indx : comp) { - names_in_comp.emplace_back(universe[indx].name); + namesInComp.emplace_back(universe_[indx].name_); } - connected_components_by_name.push_back(names_in_comp); + connectedComponentsByName.push_back(namesInComp); } - return connected_components_by_name; + return connectedComponentsByName; } /// @brief Retrieves the connected components and their respective weights /// @return Partition of the names of objects according to the connected components together with their respective /// weight - std::vector, workw_t>> get_connected_components_and_weights() { - std::vector> connected_components_by_index; - connected_components_by_index.resize(universe.size()); - for (index_t i = 0; i < static_cast(universe.size()); i++) { - connected_components_by_index[find_origin(i)].emplace_back(i); + std::vector, WorkwT>> GetConnectedComponentsAndWeights() { + std::vector> connectedComponentsByIndex; + connectedComponentsByIndex.resize(universe_.size()); + for (IndexT i = 0; i < static_cast(universe_.size()); i++) { + connectedComponentsByIndex[FindOrigin(i)].emplace_back(i); } - std::vector, workw_t>> connected_components_by_name_incl_weight; - for (auto &comp : connected_components_by_index) { + std::vector, WorkwT>> connectedComponentsByNameInclWeight; + for (auto &comp : connectedComponentsByIndex) { if (comp.empty()) { continue; } - workw_t comp_weight = universe[find_origin(comp[0])].weight; + WorkwT compWeight = universe_[FindOrigin(comp[0])].weight_; - std::vector names_in_comp; - names_in_comp.reserve(comp.size()); + std::vector namesInComp; + namesInComp.reserve(comp.size()); for (auto &indx : comp) { - names_in_comp.emplace_back(universe[indx].name); + namesInComp.emplace_back(universe_[indx].name_); } - connected_components_by_name_incl_weight.emplace_back(names_in_comp, comp_weight); + connectedComponentsByNameInclWeight.emplace_back(namesInComp, compWeight); } - return connected_components_by_name_incl_weight; + return connectedComponentsByNameInclWeight; } /// @brief Retrieves the connected components and their respective weights and memories /// @return Partition of the names of objects according to the connected components together with their respective /// weight and memory - std::vector, workw_t, memw_t>> get_connected_components_weights_and_memories() { - std::vector> connected_components_by_index; - connected_components_by_index.resize(universe.size()); - for (index_t i = 0; i < static_cast(universe.size()); i++) { - connected_components_by_index[find_origin(i)].emplace_back(i); + std::vector, WorkwT, MemwT>> GetConnectedComponentsWeightsAndMemories() { + std::vector> connectedComponentsByIndex; + connectedComponentsByIndex.resize(universe_.size()); + for (IndexT i = 0; i < static_cast(universe_.size()); i++) { + connectedComponentsByIndex[FindOrigin(i)].emplace_back(i); } - std::vector, workw_t, memw_t>> connected_components_by_name_incl_weight_memory; - for (auto &comp : connected_components_by_index) { + std::vector, WorkwT, MemwT>> connectedComponentsByNameInclWeightMemory; + for (auto &comp : connectedComponentsByIndex) { if (comp.empty()) { continue; } - workw_t comp_weight = universe[find_origin(comp[0])].weight; - memw_t comp_memory = universe[find_origin(comp[0])].memory; + WorkwT compWeight = universe_[FindOrigin(comp[0])].weight_; + MemwT compMemory = universe_[FindOrigin(comp[0])].memory_; - std::vector names_in_comp; - names_in_comp.reserve(comp.size()); + std::vector namesInComp; + namesInComp.reserve(comp.size()); for (auto &indx : comp) { - names_in_comp.emplace_back(universe[indx].name); + namesInComp.emplace_back(universe_[indx].name_); } - connected_components_by_name_incl_weight_memory.emplace_back(names_in_comp, comp_weight, comp_memory); + connectedComponentsByNameInclWeightMemory.emplace_back(namesInComp, compWeight, compMemory); } - return connected_components_by_name_incl_weight_memory; + return connectedComponentsByNameInclWeightMemory; } /// @brief Adds object to the union-find structure /// @param name of object - void add_object(const T &name) { - if (names_to_indices.find(name) != names_to_indices.end()) { + void AddObject(const T &name) { + if (namesToIndices_.find(name) != namesToIndices_.end()) { throw std::runtime_error("This name already exists in the universe."); } - index_t new_index = static_cast(universe.size()); - universe.emplace_back(name, new_index); - names_to_indices[name] = new_index; - component_indices.emplace(new_index); + IndexT newIndex = static_cast(universe_.size()); + universe_.emplace_back(name, newIndex); + namesToIndices_[name] = newIndex; + componentIndices_.emplace(newIndex); } /// @brief Adds object to the union-find structure with given weight /// @param name of object /// @param weight of object - void add_object(const T &name, const workw_t weight) { - if (names_to_indices.find(name) != names_to_indices.end()) { + void AddObject(const T &name, const WorkwT weight) { + if (namesToIndices_.find(name) != namesToIndices_.end()) { throw std::runtime_error("This name already exists in the universe."); } - index_t new_index = static_cast(universe.size()); - universe.emplace_back(name, new_index, weight); - names_to_indices[name] = new_index; - component_indices.emplace(new_index); + IndexT newIndex = static_cast(universe_.size()); + universe_.emplace_back(name, newIndex, weight); + namesToIndices_[name] = newIndex; + componentIndices_.emplace(newIndex); } /// @brief Adds object to the union-find structure with given weight and memory /// @param name of object /// @param weight of object /// @param memory of object - void add_object(const T &name, const workw_t weight, const memw_t memory) { - if (names_to_indices.find(name) != names_to_indices.end()) { + void AddObject(const T &name, const WorkwT weight, const MemwT memory) { + if (namesToIndices_.find(name) != namesToIndices_.end()) { throw std::runtime_error("This name already exists in the universe."); } - index_t new_index = static_cast(universe.size()); - universe.emplace_back(name, new_index, weight, memory); - names_to_indices[name] = new_index; - component_indices.emplace(new_index); + IndexT newIndex = static_cast(universe_.size()); + universe_.emplace_back(name, newIndex, weight, memory); + namesToIndices_[name] = newIndex; + componentIndices_.emplace(newIndex); } /// @brief Adds objects to the union-find structure /// @param names of objects - void add_object(const std::vector &names) { + void AddObject(const std::vector &names) { // adjusting universe capacity - index_t additional_size = static_cast(names.size()); - index_t current_size = static_cast(universe.size()); - index_t current_capacity = static_cast(universe.capacity()); - if (additional_size + current_size > current_capacity) { - index_t new_min_capacity = std::max((current_capacity + 1) / 2 * 3, current_size + additional_size); - universe.reserve(new_min_capacity); + IndexT additionalSize = static_cast(names.size()); + IndexT currentSize = static_cast(universe_.size()); + IndexT currentCapacity = static_cast(universe_.capacity()); + if (additionalSize + currentSize > currentCapacity) { + IndexT newMinCapacity = std::max((currentCapacity + 1) / 2 * 3, currentSize + additionalSize); + universe_.reserve(newMinCapacity); } // adjusting names_to_indices capacity - current_size = static_cast(names_to_indices.size()); - if (additional_size + current_size > current_capacity) { - index_t new_min_capacity = std::max((current_capacity + 1) / 2 * 3, current_size + additional_size); - names_to_indices.reserve(new_min_capacity); + currentSize = static_cast(namesToIndices_.size()); + if (additionalSize + currentSize > currentCapacity) { + IndexT newMinCapacity = std::max((currentCapacity + 1) / 2 * 3, currentSize + additionalSize); + namesToIndices_.reserve(newMinCapacity); } for (auto &name : names) { - add_object(name); + AddObject(name); } } /// @brief Adds objects to the union-find structure /// @param names of objects /// @param weights of objects - void add_object(const std::vector &names, const std::vector &weights) { + void AddObject(const std::vector &names, const std::vector &weights) { if (names.size() != weights.size()) { throw std::runtime_error("Vectors of names and weights must be of equal length."); } // adjusting universe capacity - index_t additional_size = static_cast(names.size()); - index_t current_size = static_cast(universe.size()); - index_t current_capacity = static_cast(universe.capacity()); - if (additional_size + current_size > current_capacity) { - index_t new_min_capacity = std::max((current_capacity + 1) / 2 * 3, current_size + additional_size); - universe.reserve(new_min_capacity); + IndexT additionalSize = static_cast(names.size()); + IndexT currentSize = static_cast(universe_.size()); + IndexT currentCapacity = static_cast(universe_.capacity()); + if (additionalSize + currentSize > currentCapacity) { + IndexT newMinCapacity = std::max((currentCapacity + 1) / 2 * 3, currentSize + additionalSize); + universe_.reserve(newMinCapacity); } // adjusting names_to_indices capacity - current_size = static_cast(names_to_indices.size()); - if (additional_size + current_size > current_capacity) { - index_t new_min_capacity = std::max((current_capacity + 1) / 2 * 3, current_size + additional_size); - names_to_indices.reserve(new_min_capacity); + currentSize = static_cast(namesToIndices_.size()); + if (additionalSize + currentSize > currentCapacity) { + IndexT newMinCapacity = std::max((currentCapacity + 1) / 2 * 3, currentSize + additionalSize); + namesToIndices_.reserve(newMinCapacity); } for (std::size_t i = 0; i < names.size(); i++) { - add_object(names[i], weights[i]); + AddObject(names[i], weights[i]); } } @@ -338,59 +338,56 @@ class Union_Find_Universe { /// @param names of objects /// @param weights of objects /// @param memories of objects - void add_object(const std::vector &names, const std::vector &weights, const std::vector &memories) { + void AddObject(const std::vector &names, const std::vector &weights, const std::vector &memories) { if (names.size() != weights.size()) { throw std::runtime_error("Vectors of names and weights must be of equal length."); } // adjusting universe capacity - index_t additional_size = static_cast(names.size()); - index_t current_size = static_cast(universe.size()); - index_t current_capacity = static_cast(universe.capacity()); - if (additional_size + current_size > current_capacity) { - unsigned new_min_capacity = std::max((current_capacity + 1) / 2 * 3, current_size + additional_size); - universe.reserve(new_min_capacity); + IndexT additionalSize = static_cast(names.size()); + IndexT currentSize = static_cast(universe_.size()); + IndexT currentCapacity = static_cast(universe_.capacity()); + if (additionalSize + currentSize > currentCapacity) { + unsigned newMinCapacity = std::max((currentCapacity + 1) / 2 * 3, currentSize + additionalSize); + universe_.reserve(newMinCapacity); } // adjusting names_to_indices capacity - current_size = static_cast(names_to_indices.size()); - if (additional_size + current_size > current_capacity) { - index_t new_min_capacity = std::max((current_capacity + 1) / 2 * 3, current_size + additional_size); - names_to_indices.reserve(new_min_capacity); + currentSize = static_cast(namesToIndices_.size()); + if (additionalSize + currentSize > currentCapacity) { + IndexT newMinCapacity = std::max((currentCapacity + 1) / 2 * 3, currentSize + additionalSize); + namesToIndices_.reserve(newMinCapacity); } for (size_t i = 0; i < names.size(); i++) { - add_object(names[i], weights[i], memories[i]); + AddObject(names[i], weights[i], memories[i]); } } /// @brief Initiates a union-find structure - explicit Union_Find_Universe() {} + explicit UnionFindUniverse() {} /// @brief Initiates a union-find structure /// @param names of objects - explicit Union_Find_Universe(const std::vector &names) { add_object(names); } + explicit UnionFindUniverse(const std::vector &names) { AddObject(names); } /// @brief Initiates a union-find structure /// @param names of objects /// @param weights of objects - explicit Union_Find_Universe(const std::vector &names, const std::vector &weights) { add_object(names, weights); } + explicit UnionFindUniverse(const std::vector &names, const std::vector &weights) { AddObject(names, weights); } /// @brief Initiates a union-find structure /// @param names of objects /// @param weights of objects - explicit Union_Find_Universe(const std::vector &names, - const std::vector &weights, - const std::vector &memories) { - add_object(names, weights, memories); + explicit UnionFindUniverse(const std::vector &names, const std::vector &weights, const std::vector &memories) { + AddObject(names, weights, memories); } - Union_Find_Universe(const Union_Find_Universe &other) = default; - Union_Find_Universe &operator=(const Union_Find_Universe &other) = default; + UnionFindUniverse(const UnionFindUniverse &other) = default; + UnionFindUniverse &operator=(const UnionFindUniverse &other) = default; }; -template -using union_find_universe_t - = Union_Find_Universe, vertex_idx_t, v_workw_t, v_memw_t>; +template +using UnionFindUniverseT = UnionFindUniverse, VertexIdxT, VWorkwT, VMemwT>; } // namespace osp diff --git a/include/osp/auxiliary/hash_util.hpp b/include/osp/auxiliary/hash_util.hpp index 825e5ba5..dee2eb95 100644 --- a/include/osp/auxiliary/hash_util.hpp +++ b/include/osp/auxiliary/hash_util.hpp @@ -23,30 +23,30 @@ limitations under the License. namespace osp { template -struct uniform_node_hash_func { - using result_type = std::size_t; +struct UniformNodeHashFunc { + using ResultType = std::size_t; - result_type operator()(const VertexType &) { return defautlVal; } + ResultType operator()(const VertexType &) { return defautlVal; } }; template -struct vector_node_hash_func { - const std::vector &node_hashes_; +struct VectorNodeHashFunc { + const std::vector &nodeHashes_; - vector_node_hash_func(const std::vector &node_hashes) : node_hashes_(node_hashes) {} + VectorNodeHashFunc(const std::vector &nodeHashes) : nodeHashes_(nodeHashes) {} - using result_type = std::size_t; + using ResultType = std::size_t; - result_type operator()(const VertexType &v) const { return node_hashes_[v]; } + ResultType operator()(const VertexType &v) const { return nodeHashes_[v]; } }; template -void hash_combine(std::size_t &seed, const T &v) { +void HashCombine(std::size_t &seed, const T &v) { std::hash hasher; seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } -struct pair_hash { +struct PairHash { template std::size_t operator()(const std::pair &p) const { std::size_t h1 = std::hash{}(p.first); @@ -55,7 +55,7 @@ struct pair_hash { // Mainly for demonstration purposes, i.e. works but is overly simple // In the real world, use sth. like boost.hash_combine - hash_combine(h1, h2); + HashCombine(h1, h2); return h1; } }; diff --git a/include/osp/auxiliary/io/DotFileWriter.hpp b/include/osp/auxiliary/io/DotFileWriter.hpp index ef75c39f..8feb58ad 100644 --- a/include/osp/auxiliary/io/DotFileWriter.hpp +++ b/include/osp/auxiliary/io/DotFileWriter.hpp @@ -30,72 +30,73 @@ namespace osp { class DotFileWriter { private: - template + template - struct EdgeWriter_DOT { - const Graph_t &graph; + struct EdgeWriterDot { + const GraphT &graph_; - EdgeWriter_DOT(const Graph_t &graph_) : graph(graph_) {} + EdgeWriterDot(const GraphT &graph) : graph_(graph) {} - void operator()(std::ostream &out, const edge_desc_t &i) const { - out << source(i, graph) << "->" << target(i, graph) << " [" - << "comm_weight=\"" << graph.edge_comm_weight(i) << "\";" + void operator()(std::ostream &out, const EdgeDescT &i) const { + out << Source(i, graph_) << "->" << Target(i, graph_) << " [" + << "comm_weight=\"" << graph_.EdgeCommWeight(i) << "\";" << "]"; } }; - template - struct VertexWriterSchedule_DOT { - const BspSchedule &schedule; + template + struct VertexWriterScheduleDot { + const BspSchedule &schedule_; - VertexWriterSchedule_DOT(const BspSchedule &schedule_) : schedule(schedule_) {} + VertexWriterScheduleDot(const BspSchedule &schedule) : schedule_(schedule) {} - void operator()(std::ostream &out, const vertex_idx_t &i) const { + void operator()(std::ostream &out, const VertexIdxT &i) const { out << i << " [" - << "work_weight=\"" << schedule.getInstance().getComputationalDag().vertex_work_weight(i) << "\";" - << "comm_weight=\"" << schedule.getInstance().getComputationalDag().vertex_comm_weight(i) << "\";" - << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";"; + << "work_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexWorkWeight(i) << "\";" + << "comm_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexCommWeight(i) << "\";" + << "mem_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexMemWeight(i) << "\";"; - if constexpr (has_typed_vertices_v) { - out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";"; + if constexpr (hasTypedVerticesV) { + out << "type=\"" << schedule_.GetInstance().GetComputationalDag().VertexType(i) << "\";"; } - out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\"" << schedule.assignedSuperstep(i) << "\";"; + out << "proc=\"" << schedule_.AssignedProcessor(i) << "\";" << "superstep=\"" << schedule_.AssignedSuperstep(i) + << "\";"; out << "]"; } }; - template - struct VertexWriterScheduleRecomp_DOT { - const BspScheduleRecomp &schedule; + template + struct VertexWriterScheduleRecompDot { + const BspScheduleRecomp &schedule_; - VertexWriterScheduleRecomp_DOT(const BspScheduleRecomp &schedule_) : schedule(schedule_) {} + VertexWriterScheduleRecompDot(const BspScheduleRecomp &schedule) : schedule_(schedule) {} - void operator()(std::ostream &out, const vertex_idx_t &i) const { + void operator()(std::ostream &out, const VertexIdxT &i) const { out << i << " [" - << "work_weight=\"" << schedule.getInstance().getComputationalDag().vertex_work_weight(i) << "\";" - << "comm_weight=\"" << schedule.getInstance().getComputationalDag().vertex_comm_weight(i) << "\";" - << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";"; + << "work_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexWorkWeight(i) << "\";" + << "comm_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexCommWeight(i) << "\";" + << "mem_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexMemWeight(i) << "\";"; - if constexpr (has_typed_vertices_v) { - out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";"; + if constexpr (hasTypedVerticesV) { + out << "type=\"" << schedule_.GetInstance().GetComputationalDag().VertexType(i) << "\";"; } out << "proc=\"("; - for (size_t j = 0; j < schedule.assignments(i).size() - 1; ++j) { - out << schedule.assignments(i)[j].first << ","; + for (size_t j = 0; j < schedule_.Assignments(i).size() - 1; ++j) { + out << schedule_.Assignments(i)[j].first << ","; } - out << schedule.assignments(i)[schedule.assignments(i).size() - 1].first << ")\";" + out << schedule_.Assignments(i)[schedule_.Assignments(i).size() - 1].first << ")\";" << "superstep=\"("; - for (size_t j = 0; j < schedule.assignments(i).size() - 1; ++j) { - out << schedule.assignments(i)[j].second << ","; + for (size_t j = 0; j < schedule_.Assignments(i).size() - 1; ++j) { + out << schedule_.Assignments(i)[j].second << ","; } - out << schedule.assignments(i)[schedule.assignments(i).size() - 1].second << ")\";"; + out << schedule_.Assignments(i)[schedule_.Assignments(i).size() - 1].second << ")\";"; bool found = false; - for (const auto &[key, val] : schedule.getCommunicationSchedule()) { + for (const auto &[key, val] : schedule_.GetCommunicationSchedule()) { if (std::get<0>(key) == i) { if (!found) { out << "cs=\"["; @@ -116,50 +117,51 @@ class DotFileWriter { } }; - template - struct VertexWriterDuplicateRecompSchedule_DOT { - const Graph_t &graph; - const std::vector name; - const std::vector node_to_proc; - const std::vector node_to_superstep; + template + struct VertexWriterDuplicateRecompScheduleDot { + const GraphT &graph_; + const std::vector name_; + const std::vector nodeToProc_; + const std::vector nodeToSuperstep_; - VertexWriterDuplicateRecompSchedule_DOT(const Graph_t &graph_, - const std::vector &name_, - std::vector &node_to_proc_, - std::vector &node_to_superstep_) - : graph(graph_), name(name_), node_to_proc(node_to_proc_), node_to_superstep(node_to_superstep_) {} + VertexWriterDuplicateRecompScheduleDot(const GraphT &graph, + const std::vector &name, + std::vector &nodeToProc, + std::vector &nodeToSuperstep) + : graph_(graph), name_(name), nodeToProc_(nodeToProc), nodeToSuperstep_(nodeToSuperstep) {} template void operator()(std::ostream &out, const VertexOrEdge &i) const { - out << i << " [" << "label=\"" << name[i] << "\";" << "work_weight=\"" << graph.vertex_work_weight(i) << "\";" - << "comm_weight=\"" << graph.vertex_comm_weight(i) << "\";" << "mem_weight=\"" << graph.vertex_mem_weight(i) - << "\";" << "proc=\"" << node_to_proc[i] << "\";" << "superstep=\"" << node_to_superstep[i] << "\";"; + out << i << " [" << "label=\"" << name_[i] << "\";" << "work_weight=\"" << graph_.VertexWorkWeight(i) << "\";" + << "comm_weight=\"" << graph_.VertexCommWeight(i) << "\";" << "mem_weight=\"" << graph_.VertexMemWeight(i) + << "\";" << "proc=\"" << nodeToProc_[i] << "\";" << "superstep=\"" << nodeToSuperstep_[i] << "\";"; out << "]"; } }; - template - struct VertexWriterScheduleCS_DOT { - const BspScheduleCS &schedule; + template + struct VertexWriterScheduleCsDot { + const BspScheduleCS &schedule_; - VertexWriterScheduleCS_DOT(const BspScheduleCS &schedule_) : schedule(schedule_) {} + VertexWriterScheduleCsDot(const BspScheduleCS &schedule) : schedule_(schedule) {} - void operator()(std::ostream &out, const vertex_idx_t &i) const { + void operator()(std::ostream &out, const VertexIdxT &i) const { out << i << " [" - << "work_weight=\"" << schedule.getInstance().getComputationalDag().vertex_work_weight(i) << "\";" - << "comm_weight=\"" << schedule.getInstance().getComputationalDag().vertex_comm_weight(i) << "\";" - << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";"; + << "work_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexWorkWeight(i) << "\";" + << "comm_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexCommWeight(i) << "\";" + << "mem_weight=\"" << schedule_.GetInstance().GetComputationalDag().VertexMemWeight(i) << "\";"; - if constexpr (has_typed_vertices_v) { - out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";"; + if constexpr (hasTypedVerticesV) { + out << "type=\"" << schedule_.GetInstance().GetComputationalDag().VertexType(i) << "\";"; } - out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\"" << schedule.assignedSuperstep(i) << "\";"; + out << "proc=\"" << schedule_.AssignedProcessor(i) << "\";" << "superstep=\"" << schedule_.AssignedSuperstep(i) + << "\";"; bool found = false; - for (const auto &[key, val] : schedule.getCommunicationSchedule()) { + for (const auto &[key, val] : schedule_.GetCommunicationSchedule()) { if (std::get<0>(key) == i) { if (!found) { out << "cs=\"["; @@ -180,35 +182,35 @@ class DotFileWriter { } }; - template - struct VertexWriterGraph_DOT { - const Graph_t &graph; + template + struct VertexWriterGraphDot { + const GraphT &graph_; - VertexWriterGraph_DOT(const Graph_t &graph_) : graph(graph_) {} + VertexWriterGraphDot(const GraphT &graph) : graph_(graph) {} - void operator()(std::ostream &out, const vertex_idx_t &i) const { + void operator()(std::ostream &out, const VertexIdxT &i) const { out << i << " [" - << "work_weight=\"" << graph.vertex_work_weight(i) << "\";" - << "comm_weight=\"" << graph.vertex_comm_weight(i) << "\";" - << "mem_weight=\"" << graph.vertex_mem_weight(i) << "\";"; + << "work_weight=\"" << graph_.VertexWorkWeight(i) << "\";" + << "comm_weight=\"" << graph_.VertexCommWeight(i) << "\";" + << "mem_weight=\"" << graph_.VertexMemWeight(i) << "\";"; - if constexpr (has_typed_vertices_v) { - out << "type=\"" << graph.vertex_type(i) << "\";"; + if constexpr (hasTypedVerticesV) { + out << "type=\"" << graph_.VertexType(i) << "\";"; } out << "]"; } }; - template - struct ColoredVertexWriterGraph_DOT { - const Graph_t &graph; - const color_container_t &colors; - std::vector color_strings; - std::vector shape_strings; + template + struct ColoredVertexWriterGraphDot { + const GraphT &graph_; + const ColorContainerT &colors_; + std::vector colorStrings_; + std::vector shapeStrings_; - ColoredVertexWriterGraph_DOT(const Graph_t &graph_, const color_container_t &colors_) : graph(graph_), colors(colors_) { - color_strings = {"lightcoral", "palegreen", "lightblue", "gold", + ColoredVertexWriterGraphDot(const GraphT &graph, const ColorContainerT &colors) : graph_(graph), colors_(colors) { + colorStrings_ = {"lightcoral", "palegreen", "lightblue", "gold", "orchid", "sandybrown", "aquamarine", "burlywood", "hotpink", "yellowgreen", "skyblue", "khaki", "violet", "salmon", "turquoise", "tan", @@ -227,52 +229,52 @@ class DotFileWriter { "darkred", "darkgreen", "mediumblue", "ivory", "indigo", "orange", "darkcyan", "antiquewhite"}; - shape_strings = {"oval", "rect", "hexagon", "parallelogram"}; + shapeStrings_ = {"oval", "rect", "hexagon", "parallelogram"}; } - void operator()(std::ostream &out, const vertex_idx_t &i) const { - if (i >= static_cast>(colors.size())) { + void operator()(std::ostream &out, const VertexIdxT &i) const { + if (i >= static_cast>(colors_.size())) { // Fallback for safety: print without color if colors vector is mismatched or palette is empty. out << i << " ["; } else { // Use modulo operator to cycle through the fixed palette if there are more color // groups than available colors. - const std::string &color = color_strings[colors[i] % color_strings.size()]; + const std::string &color = colorStrings_[colors_[i] % colorStrings_.size()]; out << i << " [style=filled;fillcolor=" << color << ";"; } - out << "work_weight=\"" << graph.vertex_work_weight(i) << "\";" - << "comm_weight=\"" << graph.vertex_comm_weight(i) << "\";" - << "mem_weight=\"" << graph.vertex_mem_weight(i) << "\";"; + out << "work_weight=\"" << graph_.VertexWorkWeight(i) << "\";" + << "comm_weight=\"" << graph_.VertexCommWeight(i) << "\";" + << "mem_weight=\"" << graph_.VertexMemWeight(i) << "\";"; - if constexpr (has_typed_vertices_v) { - out << "type=\"" << graph.vertex_type(i) << "\";shape=\"" - << shape_strings[graph.vertex_type(i) % shape_strings.size()] << "\";"; + if constexpr (hasTypedVerticesV) { + out << "type=\"" << graph_.VertexType(i) << "\";shape=\"" + << shapeStrings_[graph_.VertexType(i) % shapeStrings_.size()] << "\";"; } out << "]"; } }; - template - void write_graph_structure(std::ostream &os, const Graph_t &graph, const vertex_writer_t &vertex_writer) const { + template + void WriteGraphStructure(std::ostream &os, const GraphT &graph, const VertexWriterT &vertexWriter) const { os << "digraph G {\n"; - for (const auto &v : graph.vertices()) { - vertex_writer(os, v); + for (const auto &v : graph.Vertices()) { + vertexWriter(os, v); os << "\n"; } - if constexpr (has_edge_weights_v) { - EdgeWriter_DOT edge_writer(graph); + if constexpr (hasEdgeWeightsV) { + EdgeWriterDot edgeWriter(graph); - for (const auto &e : edges(graph)) { - edge_writer(os, e); + for (const auto &e : Edges(graph)) { + edgeWriter(os, e); os << "\n"; } } else { - for (const auto &v : graph.vertices()) { - for (const auto &child : graph.children(v)) { + for (const auto &v : graph.Vertices()) { + for (const auto &child : graph.Children(v)) { os << v << "->" << child << "\n"; } } @@ -298,9 +300,9 @@ class DotFileWriter { * * @param os The output stream to write the DOT representation of the computational DAG. */ - template - void write_schedule(std::ostream &os, const BspSchedule &schedule) const { - write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterSchedule_DOT(schedule)); + template + void WriteSchedule(std::ostream &os, const BspSchedule &schedule) const { + WriteGraphStructure(os, schedule.GetInstance().GetComputationalDag(), VertexWriterScheduleDot(schedule)); } /** @@ -317,109 +319,106 @@ class DotFileWriter { * * @param filename The name of the file to write the DOT representation of the computational DAG. */ - template - void write_schedule(const std::string &filename, const BspSchedule &schedule) const { + template + void WriteSchedule(const std::string &filename, const BspSchedule &schedule) const { std::ofstream os(filename); - write_schedule(os, schedule); + WriteSchedule(os, schedule); } - template - void write_schedule_cs(std::ostream &os, const BspScheduleCS &schedule) const { - write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterScheduleCS_DOT(schedule)); + template + void WriteScheduleCS(std::ostream &os, const BspScheduleCS &schedule) const { + WriteGraphStructure(os, schedule.GetInstance().GetComputationalDag(), VertexWriterScheduleCsDot(schedule)); } - template - void write_schedule_cs(const std::string &filename, const BspScheduleCS &schedule) const { + template + void WriteScheduleCS(const std::string &filename, const BspScheduleCS &schedule) const { std::ofstream os(filename); - write_schedule_cs(os, schedule); + WriteScheduleCS(os, schedule); } - template - void write_schedule_recomp(std::ostream &os, const BspScheduleRecomp &schedule) const { - write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterScheduleRecomp_DOT(schedule)); + template + void WriteScheduleRecomp(std::ostream &os, const BspScheduleRecomp &schedule) const { + WriteGraphStructure(os, schedule.GetInstance().GetComputationalDag(), VertexWriterScheduleRecompDot(schedule)); } - template - void write_schedule_recomp(const std::string &filename, const BspScheduleRecomp &schedule) const { + template + void WriteScheduleRecomp(const std::string &filename, const BspScheduleRecomp &schedule) const { std::ofstream os(filename); - write_schedule_recomp(os, schedule); + WriteScheduleRecomp(os, schedule); } - template - void write_schedule_recomp_duplicate(std::ostream &os, const BspScheduleRecomp &schedule) const { - const auto &g = schedule.getInstance().getComputationalDag(); + template + void WriteScheduleRecompDuplicate(std::ostream &os, const BspScheduleRecomp &schedule) const { + const auto &g = schedule.GetInstance().GetComputationalDag(); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::vector names(schedule.getTotalAssignments()); - std::vector node_to_proc(schedule.getTotalAssignments()); - std::vector node_to_superstep(schedule.getTotalAssignments()); + std::vector names(schedule.GetTotalAssignments()); + std::vector nodeToProc(schedule.GetTotalAssignments()); + std::vector nodeToSuperstep(schedule.GetTotalAssignments()); - std::unordered_map> vertex_to_idx; + std::unordered_map> vertexToIdx; - using vertex_type_t_or_default - = std::conditional_t, v_type_t, unsigned>; - using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; + using VertexTypeTOrDefault = std::conditional_t, VTypeT, unsigned>; + using EdgeCommwTOrDefault = std::conditional_t, ECommwT, VCommwT>; - using cdag_vertex_impl_t - = cdag_vertex_impl, v_workw_t, v_commw_t, v_memw_t, vertex_type_t_or_default>; - using cdag_edge_impl_t = cdag_edge_impl; + using CDagVertexImplT + = CDagVertexImpl, VWorkwT, VCommwT, VMemwT, VertexTypeTOrDefault>; + using CDagEdgeImplT = CDagEdgeImpl; - using graph_t = computational_dag_edge_idx_vector_impl; + using GraphT2 = ComputationalDagEdgeIdxVectorImpl; - graph_t g2; + GraphT2 g2; - size_t idx_new = 0; + size_t idxNew = 0; - for (const auto &node : g.vertices()) { - if (schedule.assignments(node).size() == 1) { - g2.add_vertex( - g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node), g.vertex_type(node)); + for (const auto &node : g.Vertices()) { + if (schedule.Assignments(node).size() == 1) { + g2.AddVertex(g.VertexWorkWeight(node), g.VertexCommWeight(node), g.VertexMemWeight(node), g.VertexType(node)); - names[idx_new] = std::to_string(node); - node_to_proc[idx_new] = schedule.assignments(node)[0].first; - node_to_superstep[idx_new] = schedule.assignments(node)[0].second; + names[idxNew] = std::to_string(node); + nodeToProc[idxNew] = schedule.Assignments(node)[0].first; + nodeToSuperstep[idxNew] = schedule.Assignments(node)[0].second; - vertex_to_idx.insert({node, {idx_new}}); - idx_new++; + vertexToIdx.insert({node, {idxNew}}); + idxNew++; } else { std::vector idxs; - for (unsigned i = 0; i < schedule.assignments(node).size(); ++i) { - g2.add_vertex( - g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node), g.vertex_type(node)); + for (unsigned i = 0; i < schedule.Assignments(node).size(); ++i) { + g2.AddVertex(g.VertexWorkWeight(node), g.VertexCommWeight(node), g.VertexMemWeight(node), g.VertexType(node)); - names[idx_new] = std::to_string(node).append("_").append(std::to_string(i)); - node_to_proc[idx_new] = schedule.assignments(node)[i].first; - node_to_superstep[idx_new] = schedule.assignments(node)[i].second; + names[idxNew] = std::to_string(node).append("_").append(std::to_string(i)); + nodeToProc[idxNew] = schedule.Assignments(node)[i].first; + nodeToSuperstep[idxNew] = schedule.Assignments(node)[i].second; - idxs.push_back(idx_new++); + idxs.push_back(idxNew++); } - vertex_to_idx.insert({node, idxs}); + vertexToIdx.insert({node, idxs}); } } - for (const auto &[key, val] : vertex_to_idx) { + for (const auto &[key, val] : vertexToIdx) { if (val.size() == 1) { - for (const auto &target : g.children(key)) { - for (const auto &new_node_target : vertex_to_idx[target]) { - g2.add_edge(val[0], new_node_target); + for (const auto &target : g.Children(key)) { + for (const auto &newNodeTarget : vertexToIdx[target]) { + g2.AddEdge(val[0], newNodeTarget); } } } else { - std::unordered_set assigned_processors; + std::unordered_set assignedProcessors; - for (const auto &assignment : schedule.assignments(key)) { - assigned_processors.insert(assignment.first); + for (const auto &assignment : schedule.Assignments(key)) { + assignedProcessors.insert(assignment.first); } for (unsigned i = 0; i < val.size(); i++) { - for (const auto &target : g.children(key)) { - for (size_t j = 0; j < vertex_to_idx[target].size(); j++) { - if (assigned_processors.find(node_to_proc[vertex_to_idx[target][j]]) == assigned_processors.end() - || node_to_proc[val[i]] == node_to_proc[vertex_to_idx[target][j]]) { - g2.add_edge(val[i], vertex_to_idx[target][j]); + for (const auto &target : g.Children(key)) { + for (size_t j = 0; j < vertexToIdx[target].size(); j++) { + if (assignedProcessors.find(nodeToProc[vertexToIdx[target][j]]) == assignedProcessors.end() + || nodeToProc[val[i]] == nodeToProc[vertexToIdx[target][j]]) { + g2.AddEdge(val[i], vertexToIdx[target][j]); } } } @@ -427,43 +426,43 @@ class DotFileWriter { } } - write_graph_structure(os, g2, VertexWriterDuplicateRecompSchedule_DOT(g2, names, node_to_proc, node_to_superstep)); + WriteGraphStructure(os, g2, VertexWriterDuplicateRecompScheduleDot(g2, names, nodeToProc, nodeToSuperstep)); } - template - void write_schedule_recomp_duplicate(const std::string &filename, const BspScheduleRecomp &schedule) const { + template + void WriteScheduleRecompDuplicate(const std::string &filename, const BspScheduleRecomp &schedule) const { std::ofstream os(filename); - write_schedule_recomp_duplicate(os, schedule); + WriteScheduleRecompDuplicate(os, schedule); } - template - void write_colored_graph(std::ostream &os, const Graph_t &graph, const color_container_t &colors) const { - static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); + template + void WriteColoredGraph(std::ostream &os, const GraphT &graph, const ColorContainerT &colors) const { + static_assert(isComputationalDagV, "GraphT must be a computational DAG"); - write_graph_structure(os, graph, ColoredVertexWriterGraph_DOT(graph, colors)); + WriteGraphStructure(os, graph, ColoredVertexWriterGraphDot(graph, colors)); } - template - void write_colored_graph(const std::string &filename, const Graph_t &graph, const color_container_t &colors) const { - static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); + template + void WriteColoredGraph(const std::string &filename, const GraphT &graph, const ColorContainerT &colors) const { + static_assert(isComputationalDagV, "GraphT must be a computational DAG"); std::ofstream os(filename); - write_colored_graph(os, graph, colors); + WriteColoredGraph(os, graph, colors); } - template - void write_graph(std::ostream &os, const Graph_t &graph) const { - static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); + template + void WriteGraph(std::ostream &os, const GraphT &graph) const { + static_assert(isComputationalDagV, "GraphT must be a computational DAG"); - write_graph_structure(os, graph, VertexWriterGraph_DOT(graph)); + WriteGraphStructure(os, graph, VertexWriterGraphDot(graph)); } - template - void write_graph(const std::string &filename, const Graph_t &graph) const { - static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); + template + void WriteGraph(const std::string &filename, const GraphT &graph) const { + static_assert(isComputationalDagV, "GraphT must be a computational DAG"); std::ofstream os(filename); - write_graph(os, graph); + WriteGraph(os, graph); } }; diff --git a/include/osp/auxiliary/io/arch_file_reader.hpp b/include/osp/auxiliary/io/arch_file_reader.hpp index 68a269c2..e97952b5 100644 --- a/include/osp/auxiliary/io/arch_file_reader.hpp +++ b/include/osp/auxiliary/io/arch_file_reader.hpp @@ -27,8 +27,8 @@ limitations under the License. namespace osp { namespace file_reader { -template -bool readBspArchitecture(std::ifstream &infile, BspArchitecture &architecture) { +template +bool ReadBspArchitecture(std::ifstream &infile, BspArchitecture &architecture) { std::string line; // Skip comment lines @@ -40,50 +40,50 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture &archit // Parse architecture parameters unsigned p = 0; - int g = 0, L = 0; - int mem_type = -1; - int M = 0; + int g = 0, l = 0; + int memType = -1; + int m = 0; std::istringstream iss(line); - if (!(iss >> p >> g >> L)) { + if (!(iss >> p >> g >> l)) { std::cerr << "Error: Failed to parse p, g, L.\n"; return false; } // Try to read optional mem_type and M - if (!(iss >> mem_type >> M)) { - mem_type = -1; // Memory info not present + if (!(iss >> memType >> m)) { + memType = -1; // Memory info not present } - architecture.setNumberOfProcessors(p); - architecture.setCommunicationCosts(static_cast>(g)); - architecture.setSynchronisationCosts(static_cast>(L)); + architecture.SetNumberOfProcessors(p); + architecture.SetCommunicationCosts(static_cast>(g)); + architecture.SetSynchronisationCosts(static_cast>(l)); - if (0 <= mem_type && mem_type <= 3) { - using memw_t = v_memw_t; - switch (mem_type) { + if (0 <= memType && memType <= 3) { + using MemwT = VMemwT; + switch (memType) { case 0: - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::NONE); + architecture.SetMemoryConstraintType(MemoryConstraintType::NONE); break; case 1: - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL); - architecture.setMemoryBound(static_cast(M)); + architecture.SetMemoryConstraintType(MemoryConstraintType::LOCAL); + architecture.SetMemoryBound(static_cast(m)); break; case 2: - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::GLOBAL); - architecture.setMemoryBound(static_cast(M)); + architecture.SetMemoryConstraintType(MemoryConstraintType::GLOBAL); + architecture.SetMemoryBound(static_cast(m)); break; case 3: - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT); - architecture.setMemoryBound(static_cast(M)); + architecture.SetMemoryConstraintType(MemoryConstraintType::PERSISTENT_AND_TRANSIENT); + architecture.SetMemoryBound(static_cast(m)); break; default: std::cerr << "Invalid memory type.\n"; return false; } - } else if (mem_type == -1) { + } else if (memType == -1) { std::cout << "No memory type specified. Assuming \"NONE\".\n"; - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::NONE); + architecture.SetMemoryConstraintType(MemoryConstraintType::NONE); } else { std::cerr << "Invalid memory type.\n"; return false; @@ -120,7 +120,7 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture &archit return false; } - architecture.SetSendCosts(fromProc, toProc, static_cast>(value)); + architecture.SetSendCosts(fromProc, toProc, static_cast>(value)); } // Ensure there are no remaining non-comment lines @@ -134,15 +134,15 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture &archit return true; } -template -bool readBspArchitecture(const std::string &filename, BspArchitecture &architecture) { +template +bool ReadBspArchitecture(const std::string &filename, BspArchitecture &architecture) { std::ifstream infile(filename); if (!infile.is_open()) { std::cerr << "Unable to open machine parameter file: " << filename << '\n'; return false; } - return readBspArchitecture(infile, architecture); + return ReadBspArchitecture(infile, architecture); } } // namespace file_reader diff --git a/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp b/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp index 41062016..24b02dac 100644 --- a/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp +++ b/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp @@ -27,31 +27,31 @@ limitations under the License. namespace osp { namespace file_writer { -template -void write_txt(std::ostream &os, const BspSchedule &schedule) { - os << "%% BspSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and " - << schedule.numberOfSupersteps() << " supersteps." << std::endl; - os << schedule.getInstance().numberOfVertices() << " " << schedule.getInstance().numberOfProcessors() << " " - << schedule.numberOfSupersteps() << std::endl; - - for (const auto &vertex : schedule.getInstance().getComputationalDag().vertices()) { - os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex) << std::endl; +template +void WriteTxt(std::ostream &os, const BspSchedule &schedule) { + os << "%% BspSchedule for " << schedule.GetInstance().NumberOfProcessors() << " processors and " + << schedule.NumberOfSupersteps() << " supersteps." << std::endl; + os << schedule.GetInstance().NumberOfVertices() << " " << schedule.GetInstance().NumberOfProcessors() << " " + << schedule.NumberOfSupersteps() << std::endl; + + for (const auto &vertex : schedule.GetInstance().GetComputationalDag().Vertices()) { + os << vertex << " " << schedule.AssignedProcessor(vertex) << " " << schedule.AssignedSuperstep(vertex) << std::endl; } } -template -void write_txt(const std::string &filename, const BspSchedule &schedule) { +template +void WriteTxt(const std::string &filename, const BspSchedule &schedule) { std::ofstream os(filename); - write_txt(os, schedule); + WriteTxt(os, schedule); } -template -void write_txt(std::ostream &os, const BspScheduleCS &schedule) { - os << "%% BspSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and " - << schedule.numberOfSupersteps() << " supersteps." << std::endl; - os << schedule.getInstance().numberOfVertices() << " " << schedule.getInstance().numberOfProcessors() << " " - << schedule.numberOfSupersteps() << " "; - if (schedule.getCommunicationSchedule().empty()) { +template +void WriteTxt(std::ostream &os, const BspScheduleCS &schedule) { + os << "%% BspSchedule for " << schedule.GetInstance().NumberOfProcessors() << " processors and " + << schedule.NumberOfSupersteps() << " supersteps." << std::endl; + os << schedule.GetInstance().NumberOfVertices() << " " << schedule.GetInstance().NumberOfProcessors() << " " + << schedule.NumberOfSupersteps() << " "; + if (schedule.GetCommunicationSchedule().empty()) { os << 0 << " "; } else { os << 1 << " "; @@ -59,80 +59,79 @@ void write_txt(std::ostream &os, const BspScheduleCS &schedule) { os << std::endl; - for (const auto &vertex : schedule.getInstance().getComputationalDag().vertices()) { - os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex) << std::endl; + for (const auto &vertex : schedule.GetInstance().GetComputationalDag().Vertices()) { + os << vertex << " " << schedule.AssignedProcessor(vertex) << " " << schedule.AssignedSuperstep(vertex) << std::endl; } - if (schedule.getCommunicationSchedule().empty()) { + if (schedule.GetCommunicationSchedule().empty()) { os << "%% No communication schedule available." << std::endl; } else { os << "%% Communication schedule available." << std::endl; - for (const auto &[key, val] : schedule.getCommunicationSchedule()) { + for (const auto &[key, val] : schedule.GetCommunicationSchedule()) { os << std::get<0>(key) << " " << std::get<1>(key) << " " << std::get<2>(key) << " " << val << std::endl; } } } -template -void write_txt(const std::string &filename, const BspScheduleCS &schedule) { +template +void WriteTxt(const std::string &filename, const BspScheduleCS &schedule) { std::ofstream os(filename); - write_txt(os, schedule); + WriteTxt(os, schedule); } -template -void write_sankey(std::ostream &os, const BspScheduleCS &schedule) { +template +void WriteSankey(std::ostream &os, const BspScheduleCS &schedule) { // Computing workloads - std::vector>> proc_workloads( - schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors(), 0)); + std::vector>> procWorkloads( + schedule.NumberOfSupersteps(), std::vector>(schedule.GetInstance().NumberOfProcessors(), 0)); - for (size_t node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - proc_workloads[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] - += schedule.getInstance().getComputationalDag().vertex_work_weight(node); + for (size_t node = 0; node < schedule.GetInstance().NumberOfVertices(); node++) { + procWorkloads[schedule.AssignedSuperstep(node)][schedule.AssignedProcessor(node)] + += schedule.GetInstance().GetComputationalDag().VertexWorkWeight(node); } // Computing communicationloads - std::vector>>> commloads( - schedule.numberOfSupersteps() - 1, - std::vector>>( - schedule.getInstance().numberOfProcessors(), - std::vector>(schedule.getInstance().numberOfProcessors(), 0))); - - for (const auto &[comm_triple, sstep] : schedule.getCommunicationSchedule()) { - commloads[sstep][std::get<1>(comm_triple)][std::get<2>(comm_triple)] - += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(comm_triple)); + std::vector>>> commloads( + schedule.NumberOfSupersteps() - 1, + std::vector>>(schedule.GetInstance().NumberOfProcessors(), + std::vector>(schedule.GetInstance().NumberOfProcessors(), 0))); + + for (const auto &[commTriple, sstep] : schedule.GetCommunicationSchedule()) { + commloads[sstep][std::get<1>(commTriple)][std::get<2>(commTriple)] + += schedule.GetInstance().GetComputationalDag().VertexCommWeight(std::get<0>(commTriple)); } os << "BspSchedule: Number of Processors, Number of Supersteps" << std::endl; - os << schedule.getInstance().numberOfProcessors() << "," << schedule.numberOfSupersteps() << std::endl; + os << schedule.GetInstance().NumberOfProcessors() << "," << schedule.NumberOfSupersteps() << std::endl; os << "Processor workloads in Superstep" << std::endl; - for (const auto &sstep : proc_workloads) { - for (size_t proc_ind = 0; proc_ind < sstep.size(); proc_ind++) { - if (proc_ind != 0) { + for (const auto &sstep : procWorkloads) { + for (size_t procInd = 0; procInd < sstep.size(); procInd++) { + if (procInd != 0) { os << ","; } - os << sstep[proc_ind]; + os << sstep[procInd]; } os << std::endl; } os << "Communication between Processors in Supersteps" << std::endl; for (size_t sstep = 0; sstep < commloads.size(); sstep++) { - for (size_t send_proc = 0; send_proc < schedule.getInstance().numberOfProcessors(); send_proc++) { - for (size_t receive_proc = 0; receive_proc < schedule.getInstance().numberOfProcessors(); receive_proc++) { + for (size_t sendProc = 0; sendProc < schedule.GetInstance().NumberOfProcessors(); sendProc++) { + for (size_t receiveProc = 0; receiveProc < schedule.GetInstance().NumberOfProcessors(); receiveProc++) { // if (commloads[ sstep ][ send_proc ][ receive_proc ] == 0) continue; - os << sstep + 1 << "," << send_proc + 1 << "," << receive_proc + 1 << "," - << commloads[sstep][send_proc][receive_proc] << std::endl; + os << sstep + 1 << "," << sendProc + 1 << "," << receiveProc + 1 << "," << commloads[sstep][sendProc][receiveProc] + << std::endl; } } } } -template -void write_sankey(const std::string &filename, const BspScheduleCS &schedule) { +template +void WriteSankey(const std::string &filename, const BspScheduleCS &schedule) { std::ofstream os(filename); - write_sankey(os, schedule); + WriteSankey(os, schedule); } } // namespace file_writer diff --git a/include/osp/auxiliary/io/dot_graph_file_reader.hpp b/include/osp/auxiliary/io/dot_graph_file_reader.hpp index 2b4b7b58..53f3a2e6 100644 --- a/include/osp/auxiliary/io/dot_graph_file_reader.hpp +++ b/include/osp/auxiliary/io/dot_graph_file_reader.hpp @@ -33,7 +33,7 @@ limitations under the License. namespace osp { namespace file_reader { -std::vector split(const std::string &s, char delimiter) { +std::vector Split(const std::string &s, char delimiter) { std::vector tokens; std::string token; std::istringstream tokenStream(s); @@ -43,7 +43,7 @@ std::vector split(const std::string &s, char delimiter) { return tokens; } -std::string removeLeadingAndTrailingQuotes(const std::string &str) { +std::string RemoveLeadingAndTrailingQuotes(const std::string &str) { if (str.empty()) { return str; } @@ -62,27 +62,27 @@ std::string removeLeadingAndTrailingQuotes(const std::string &str) { return str.substr(start, end - start); } -template -void parseDotNode(const std::string &line, Graph_t &G) { +template +void ParseDotNode(const std::string &line, GraphT &g) { std::size_t pos = line.find('['); if (pos == std::string::npos) { return; } - std::size_t end_pos = line.find(']'); - if (end_pos == std::string::npos) { + std::size_t endPos = line.find(']'); + if (endPos == std::string::npos) { return; } - std::string properties = line.substr(pos + 1, end_pos - pos - 1); - std::vector keyValuePairs = split(properties, ';'); + std::string properties = line.substr(pos + 1, endPos - pos - 1); + std::vector keyValuePairs = Split(properties, ';'); - v_workw_t work_weight = 0; - v_memw_t mem_weight = 0; - v_commw_t comm_weight = 0; - v_type_t type = 0; + VWorkwT workWeight = 0; + VMemwT memWeight = 0; + VCommwT commWeight = 0; + VTypeT type = 0; for (const std::string &keyValuePair : keyValuePairs) { - std::vector keyValue = split(keyValuePair, '='); + std::vector keyValue = Split(keyValuePair, '='); if (keyValue.size() != 2) { continue; } @@ -96,68 +96,68 @@ void parseDotNode(const std::string &line, Graph_t &G) { continue; } - std::string value = removeLeadingAndTrailingQuotes(keyValue[1]); + std::string value = RemoveLeadingAndTrailingQuotes(keyValue[1]); try { if (key == "work_weight") { - work_weight = static_cast>(std::stoll(value)); + workWeight = static_cast>(std::stoll(value)); } else if (key == "mem_weight") { - mem_weight = static_cast>(std::stoll(value)); + memWeight = static_cast>(std::stoll(value)); } else if (key == "comm_weight") { - comm_weight = static_cast>(std::stoll(value)); + commWeight = static_cast>(std::stoll(value)); } else if (key == "type") { - type = static_cast>(std::stoll(value)); + type = static_cast>(std::stoll(value)); } } catch (...) { std::cerr << "Warning: Failed to parse property value: " << value << "\n"; } } - if constexpr (is_constructable_cdag_typed_vertex_v) { - G.add_vertex(work_weight, comm_weight, mem_weight, type); + if constexpr (isConstructableCdagTypedVertexV) { + g.AddVertex(workWeight, commWeight, memWeight, type); } else { - G.add_vertex(work_weight, comm_weight, mem_weight); + g.AddVertex(workWeight, commWeight, memWeight); } } -template -void parseDotEdge(const std::string &line, Graph_t &G) { - using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; +template +void ParseDotEdge(const std::string &line, GraphT &g) { + using EdgeCommwTOrDefault = std::conditional_t, ECommwT, VCommwT>; - std::size_t arrow_pos = line.find("->"); - if (arrow_pos == std::string::npos) { + std::size_t arrowPos = line.find("->"); + if (arrowPos == std::string::npos) { return; } - std::string source_str = line.substr(0, arrow_pos); - source_str.erase(source_str.find_last_not_of(" \t\n\r\f\v") + 1); + std::string sourceStr = line.substr(0, arrowPos); + sourceStr.erase(sourceStr.find_last_not_of(" \t\n\r\f\v") + 1); - std::string target_str; - std::size_t bracket_pos = line.find('[', arrow_pos); - if (bracket_pos != std::string::npos) { - target_str = line.substr(arrow_pos + 2, bracket_pos - (arrow_pos + 2)); + std::string targetStr; + std::size_t bracketPos = line.find('[', arrowPos); + if (bracketPos != std::string::npos) { + targetStr = line.substr(arrowPos + 2, bracketPos - (arrowPos + 2)); } else { - target_str = line.substr(arrow_pos + 2); + targetStr = line.substr(arrowPos + 2); } - target_str.erase(0, target_str.find_first_not_of(" \t\n\r\f\v")); - target_str.erase(target_str.find_last_not_of(" \t\n\r\f\v") + 1); + targetStr.erase(0, targetStr.find_first_not_of(" \t\n\r\f\v")); + targetStr.erase(targetStr.find_last_not_of(" \t\n\r\f\v") + 1); try { - vertex_idx_t source_node = static_cast>(std::stoll(source_str)); - vertex_idx_t target_node = static_cast>(std::stoll(target_str)); + VertexIdxT sourceNode = static_cast>(std::stoll(sourceStr)); + VertexIdxT targetNode = static_cast>(std::stoll(targetStr)); - if constexpr (is_constructable_cdag_comm_edge_v) { - edge_commw_t_or_default comm_weight = 0; + if constexpr (isConstructableCdagCommEdgeV) { + EdgeCommwTOrDefault commWeight = 0; - if (bracket_pos != std::string::npos) { - std::size_t end_bracket_pos = line.find(']', bracket_pos); - if (end_bracket_pos != std::string::npos) { - std::string properties = line.substr(bracket_pos + 1, end_bracket_pos - bracket_pos - 1); - std::vector keyValuePairs = split(properties, ';'); + if (bracketPos != std::string::npos) { + std::size_t endBracketPos = line.find(']', bracketPos); + if (endBracketPos != std::string::npos) { + std::string properties = line.substr(bracketPos + 1, endBracketPos - bracketPos - 1); + std::vector keyValuePairs = Split(properties, ';'); for (const auto &keyValuePair : keyValuePairs) { - std::vector keyValue = split(keyValuePair, '='); + std::vector keyValue = Split(keyValuePair, '='); if (keyValue.size() != 2) { continue; } @@ -169,26 +169,26 @@ void parseDotEdge(const std::string &line, Graph_t &G) { continue; } - std::string value = removeLeadingAndTrailingQuotes(keyValue[1]); + std::string value = RemoveLeadingAndTrailingQuotes(keyValue[1]); if (key == "comm_weight") { - comm_weight = static_cast(std::stoll(value)); + commWeight = static_cast(std::stoll(value)); } } } } - G.add_edge(source_node, target_node, comm_weight); + g.AddEdge(sourceNode, targetNode, commWeight); } else { - G.add_edge(source_node, target_node); + g.AddEdge(sourceNode, targetNode); } } catch (...) { std::cerr << "Warning: Failed to parse edge nodes from line: " << line << "\n"; } } -template -bool readComputationalDagDotFormat(std::ifstream &infile, Graph_t &graph) { +template +bool ReadComputationalDagDotFormat(std::ifstream &infile, GraphT &graph) { std::string line; while (std::getline(infile, line)) { if (line.length() > MAX_LINE_LENGTH) { @@ -204,24 +204,24 @@ bool readComputationalDagDotFormat(std::ifstream &infile, Graph_t &graph) { if (line.find("->") != std::string::npos) { // This is an edge - parseDotEdge(line, graph); + ParseDotEdge(line, graph); } else if (line.find('[') != std::string::npos) { // This is a node - parseDotNode(line, graph); + ParseDotNode(line, graph); } } return true; } -template -bool readComputationalDagDotFormat(const std::string &filename, Graph_t &graph) { +template +bool ReadComputationalDagDotFormat(const std::string &filename, GraphT &graph) { if (std::filesystem::path(filename).extension() != ".dot") { std::cerr << "Error: Only .dot files are accepted.\n"; return false; } - if (!isPathSafe(filename)) { + if (!IsPathSafe(filename)) { std::cerr << "Error: Unsafe file path.\n"; return false; } @@ -232,7 +232,7 @@ bool readComputationalDagDotFormat(const std::string &filename, Graph_t &graph) return false; } - return readComputationalDagDotFormat(infile, graph); + return ReadComputationalDagDotFormat(infile, graph); } } // namespace file_reader diff --git a/include/osp/auxiliary/io/filepath_checker.hpp b/include/osp/auxiliary/io/filepath_checker.hpp index f3ac03a4..e27f9abf 100644 --- a/include/osp/auxiliary/io/filepath_checker.hpp +++ b/include/osp/auxiliary/io/filepath_checker.hpp @@ -29,10 +29,10 @@ limitations under the License. namespace osp { namespace file_reader { -constexpr std::size_t MAX_LINE_LENGTH = 1 << 14; // 16 KB +static constexpr std::size_t MAX_LINE_LENGTH = 1 << 14; // 16 KB // Path safety to avoid symlink, traversal or malicious file types -inline bool isPathSafe(const std::string &path) { +inline bool IsPathSafe(const std::string &path) { try { std::filesystem::path resolved = std::filesystem::weakly_canonical(path); if (std::filesystem::is_symlink(resolved)) { diff --git a/include/osp/auxiliary/io/general_file_reader.hpp b/include/osp/auxiliary/io/general_file_reader.hpp index e05e5277..e847220e 100644 --- a/include/osp/auxiliary/io/general_file_reader.hpp +++ b/include/osp/auxiliary/io/general_file_reader.hpp @@ -26,9 +26,9 @@ limitations under the License. namespace osp { namespace file_reader { -template -bool readGraph(const std::string &filename, Graph_t &graph) { - if (!isPathSafe(filename)) { +template +bool ReadGraph(const std::string &filename, GraphT &graph) { + if (!IsPathSafe(filename)) { std::cerr << "Error: Unsafe file path (possible traversal or invalid type).\n"; return false; } @@ -40,18 +40,18 @@ bool readGraph(const std::string &filename, Graph_t &graph) { } bool status; - std::string file_ending = filename.substr(filename.rfind(".") + 1); - if (file_ending == "lhdag") { - status = file_reader::readComputationalDagHyperdagFormat(infile, graph); - } else if (file_ending == "hdag") { - status = file_reader::readComputationalDagHyperdagFormatDB(infile, graph); - } else if (file_ending == "mtx") { - status = file_reader::readComputationalDagMartixMarketFormat(infile, graph); - } else if (file_ending == "dot") { - status = file_reader::readComputationalDagDotFormat(infile, graph); + std::string fileEnding = filename.substr(filename.rfind(".") + 1); + if (fileEnding == "lhdag") { + status = file_reader::ReadComputationalDagHyperdagFormat(infile, graph); + } else if (fileEnding == "hdag") { + status = file_reader::ReadComputationalDagHyperdagFormatDB(infile, graph); + } else if (fileEnding == "mtx") { + status = file_reader::ReadComputationalDagMartixMarketFormat(infile, graph); + } else if (fileEnding == "dot") { + status = file_reader::ReadComputationalDagDotFormat(infile, graph); } else { - std::cout << "Unknown file ending: ." << file_ending << " ...assuming hyperDag format." << std::endl; - status = file_reader::readComputationalDagHyperdagFormatDB(infile, graph); + std::cout << "Unknown file ending: ." << fileEnding << " ...assuming hyperDag format." << std::endl; + status = file_reader::ReadComputationalDagHyperdagFormatDB(infile, graph); } return status; diff --git a/include/osp/auxiliary/io/hdag_graph_file_reader.hpp b/include/osp/auxiliary/io/hdag_graph_file_reader.hpp index b96c86ea..20c751a1 100644 --- a/include/osp/auxiliary/io/hdag_graph_file_reader.hpp +++ b/include/osp/auxiliary/io/hdag_graph_file_reader.hpp @@ -34,8 +34,8 @@ limitations under the License. namespace osp { namespace file_reader { -template -bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) { +template +bool ReadComputationalDagHyperdagFormat(std::ifstream &infile, GraphT &graph) { std::string line; // Skip comment lines starting with '%' @@ -46,16 +46,16 @@ bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) { return false; } - int hEdges, pins, N; + int hEdges, pins, n; std::istringstream headerStream(line); - if (!(headerStream >> hEdges >> N >> pins) || N <= 0 || hEdges <= 0 || pins <= 0) { + if (!(headerStream >> hEdges >> n >> pins) || n <= 0 || hEdges <= 0 || pins <= 0) { std::cerr << "Incorrect input file format (invalid or non-positive sizes).\n"; return false; } - const vertex_idx_t num_nodes = static_cast>(N); - for (vertex_idx_t i = 0; i < num_nodes; i++) { - graph.add_vertex(1, 1, 1); + const VertexIdxT numNodes = static_cast>(n); + for (VertexIdxT i = 0; i < numNodes; i++) { + graph.AddVertex(1, 1, 1); } std::vector edgeSource(static_cast(hEdges), -1); @@ -70,12 +70,12 @@ bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) { std::istringstream pinStream(line); int hEdge, node; - if (!(pinStream >> hEdge >> node) || hEdge < 0 || node < 0 || hEdge >= hEdges || node >= N) { + if (!(pinStream >> hEdge >> node) || hEdge < 0 || node < 0 || hEdge >= hEdges || node >= n) { std::cerr << "Incorrect input file format (invalid pin line or out-of-range index).\n"; return false; } - const std::size_t edgeIdx = static_cast>(hEdge); + const std::size_t edgeIdx = static_cast>(hEdge); if (edgeIdx >= edgeSource.size()) { std::cerr << "Error: hEdge out of bounds.\n"; return false; @@ -84,12 +84,12 @@ bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) { if (edgeSource[edgeIdx] == -1) { edgeSource[edgeIdx] = node; } else { - graph.add_edge(static_cast>(edgeSource[edgeIdx]), static_cast>(node)); + graph.AddEdge(static_cast>(edgeSource[edgeIdx]), static_cast>(node)); } } // Read node weights - for (int i = 0; i < N; ++i) { + for (int i = 0; i < n; ++i) { while (std::getline(infile, line) && line[0] == '%') {} if (line.empty() || line.length() > MAX_LINE_LENGTH) { std::cerr << "Incorrect input file format (invalid or long line).\n"; @@ -98,16 +98,16 @@ bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) { std::istringstream weightStream(line); int node; - v_workw_t work; - v_commw_t comm; + VWorkwT work; + VCommwT comm; - if (!(weightStream >> node >> work >> comm) || node < 0 || node >= N) { + if (!(weightStream >> node >> work >> comm) || node < 0 || node >= n) { std::cerr << "Incorrect input file format (invalid node or weights).\n"; return false; } - graph.set_vertex_comm_weight(static_cast>(node), comm); - graph.set_vertex_work_weight(static_cast>(node), work); + graph.SetVertexCommWeight(static_cast>(node), comm); + graph.SetVertexWorkWeight(static_cast>(node), work); } // Check for unexpected trailing lines @@ -120,7 +120,7 @@ bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) { } */ - if (!is_acyclic(graph)) { + if (!IsAcyclic(graph)) { std::cerr << "Error: DAG is not acyclic.\n"; return false; } @@ -128,9 +128,9 @@ bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) { return true; } -template -bool readComputationalDagHyperdagFormat(const std::string &filename, Graph_t &graph) { - if (!isPathSafe(filename)) { +template +bool ReadComputationalDagHyperdagFormat(const std::string &filename, GraphT &graph) { + if (!IsPathSafe(filename)) { std::cerr << "Error: Unsafe file path (possible traversal or invalid type).\n"; return false; } @@ -141,11 +141,11 @@ bool readComputationalDagHyperdagFormat(const std::string &filename, Graph_t &gr return false; } - return readComputationalDagHyperdagFormat(infile, graph); + return ReadComputationalDagHyperdagFormat(infile, graph); } -template -bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) { +template +bool ReadComputationalDagHyperdagFormatDB(std::ifstream &infile, GraphT &graph) { std::string line; // Skip comment lines @@ -156,15 +156,15 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) return false; } - int hEdges = 0, pins = 0, N = 0; + int hEdges = 0, pins = 0, n = 0; std::istringstream headerStream(line); - if (!(headerStream >> hEdges >> N >> pins) || N <= 0 || hEdges <= 0 || pins <= 0) { + if (!(headerStream >> hEdges >> n >> pins) || n <= 0 || hEdges <= 0 || pins <= 0) { std::cerr << "Incorrect input file format (invalid or non-positive sizes).\n"; return false; } - std::vector> hyperedge_comm_weights(static_cast(hEdges), 1); - std::vector> hyperedge_mem_weights(static_cast(hEdges), 1); + std::vector> hyperedgeCommWeights(static_cast(hEdges), 1); + std::vector> hyperedgeMemWeights(static_cast(hEdges), 1); // Read hyperedges for (int i = 0; i < hEdges; ++i) { @@ -175,25 +175,25 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) } std::istringstream edgeStream(line); - int hEdge = -1, comm_weight = 1, mem_weight = 1; + int hEdge = -1, commWeight = 1, memWeight = 1; if (!(edgeStream >> hEdge)) { std::cerr << "Warning: Could not read hyperedge ID for hyperedge " << i << ".\n"; continue; } - edgeStream >> comm_weight >> mem_weight; // optional + edgeStream >> commWeight >> memWeight; // optional if (hEdge < 0 || hEdge >= hEdges) { std::cerr << "Error: Hyperedge ID " << hEdge << " is out of range (0 to " << hEdges - 1 << ").\n"; continue; } - hyperedge_comm_weights[static_cast(hEdge)] = static_cast>(comm_weight); - hyperedge_mem_weights[static_cast(hEdge)] = static_cast>(mem_weight); + hyperedgeCommWeights[static_cast(hEdge)] = static_cast>(commWeight); + hyperedgeMemWeights[static_cast(hEdge)] = static_cast>(memWeight); } - graph = Graph_t(static_cast>(N)); + graph = GraphT(static_cast>(n)); // Read vertices - for (int i = 0; i < N; ++i) { + for (int i = 0; i < n; ++i) { while (std::getline(infile, line) && line[0] == '%') {} if (line.empty() || line.length() > MAX_LINE_LENGTH) { std::cerr << "Warning: Skipping invalid or overly long line for vertex " << i << ".\n"; @@ -208,15 +208,15 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) } vertexStream >> work >> type; - if (node < 0 || node >= N) { - std::cerr << "Error: Vertex ID " << node << " is out of range (0 to " << N - 1 << ").\n"; + if (node < 0 || node >= n) { + std::cerr << "Error: Vertex ID " << node << " is out of range (0 to " << n - 1 << ").\n"; continue; } - graph.set_vertex_work_weight(static_cast>(node), static_cast>(work)); + graph.SetVertexWorkWeight(static_cast>(node), static_cast>(work)); - if constexpr (has_typed_vertices_v) { - graph.set_vertex_type(static_cast>(node), static_cast>(type)); + if constexpr (hasTypedVerticesV) { + graph.SetVertexType(static_cast>(node), static_cast>(type)); } } @@ -238,7 +238,7 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) continue; } - if (hEdge < 0 || hEdge >= hEdges || node < 0 || node >= N) { + if (hEdge < 0 || hEdge >= hEdges || node < 0 || node >= n) { std::cerr << "Error: Invalid pin indices at line " << i << ".\n"; continue; } @@ -248,23 +248,22 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) if (edgeSource[edgeIdx] == -1) { edgeSource[edgeIdx] = node; - graph.set_vertex_comm_weight(static_cast>(node), hyperedge_comm_weights[edgeIdx]); - graph.set_vertex_mem_weight(static_cast>(node), hyperedge_mem_weights[edgeIdx]); + graph.SetVertexCommWeight(static_cast>(node), hyperedgeCommWeights[edgeIdx]); + graph.SetVertexMemWeight(static_cast>(node), hyperedgeMemWeights[edgeIdx]); } else { - if constexpr (is_modifiable_cdag_comm_edge_v) { - auto edge = graph.add_edge(static_cast>(edgeSource[edgeIdx]), - static_cast>(nodeIdx)); + if constexpr (isModifiableCdagCommEdgeV) { + auto edge = graph.AddEdge(static_cast>(edgeSource[edgeIdx]), + static_cast>(nodeIdx)); - graph.set_edge_comm_weight(edge.first, static_cast>(hyperedge_comm_weights[edgeIdx])); + graph.SetEdgeCommWeight(edge.first, static_cast>(hyperedgeCommWeights[edgeIdx])); } else { - graph.add_edge(static_cast>(edgeSource[edgeIdx]), - static_cast>(nodeIdx)); + graph.AddEdge(static_cast>(edgeSource[edgeIdx]), static_cast>(nodeIdx)); } } } - if (!is_acyclic(graph)) { + if (!IsAcyclic(graph)) { std::cerr << "Error: Constructed DAG is not acyclic.\n"; return false; } @@ -272,15 +271,15 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) return true; } -template -bool readComputationalDagHyperdagFormatDB(const std::string &filename, Graph_t &graph) { +template +bool ReadComputationalDagHyperdagFormatDB(const std::string &filename, GraphT &graph) { // Optional: limit file extension for safety if (std::filesystem::path(filename).extension() != ".hdag") { std::cerr << "Error: Only .hdag files are accepted.\n"; return false; } - if (!isPathSafe(filename)) { + if (!IsPathSafe(filename)) { std::cerr << "Error: Unsafe file path (potential traversal or invalid type).\n"; return false; } @@ -291,7 +290,7 @@ bool readComputationalDagHyperdagFormatDB(const std::string &filename, Graph_t & return false; } - return readComputationalDagHyperdagFormatDB(infile, graph); + return ReadComputationalDagHyperdagFormatDB(infile, graph); } } // namespace file_reader diff --git a/include/osp/auxiliary/io/hdag_graph_file_writer.hpp b/include/osp/auxiliary/io/hdag_graph_file_writer.hpp index be0638ac..eb3fd9f1 100644 --- a/include/osp/auxiliary/io/hdag_graph_file_writer.hpp +++ b/include/osp/auxiliary/io/hdag_graph_file_writer.hpp @@ -32,50 +32,50 @@ namespace file_writer { * * This function converts a given graph into a hypergraph representation where each node * with outgoing edges becomes a hyperedge source. The format is compatible with the - * `readComputationalDagHyperdagFormatDB` reader. + * `ReadComputationalDagHyperdagFormatDB` reader. * - * @tparam Graph_t The type of the graph, which must satisfy the ComputationalDag concept. + * @tparam GraphT The type of the graph, which must satisfy the ComputationalDag concept. * @param os The output stream to write to. * @param graph The computational DAG to write. */ -template -void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &graph, const bool write_comment_lines = false) { - static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); - - const auto num_vertices = graph.num_vertices(); - unsigned num_hyperedges = 0; - vertex_idx_t num_pins = 0; - std::vector> hyperedge_idx_to_node; - - for (const auto &u : graph.vertices()) { - if (graph.out_degree(u) > 0) { - hyperedge_idx_to_node.push_back(u); - num_hyperedges++; - num_pins += (graph.out_degree(u) + 1); +template +void WriteComputationalDagHyperdagFormatDb(std::ostream &os, const GraphT &graph, const bool writeCommentLines = false) { + static_assert(isComputationalDagV, "GraphT must be a computational DAG"); + + const auto numVertices = graph.NumVertices(); + unsigned numHyperedges = 0; + VertexIdxT numPins = 0; + std::vector> hyperedgeIdxToNode; + + for (const auto &u : graph.Vertices()) { + if (graph.OutDegree(u) > 0) { + hyperedgeIdxToNode.push_back(u); + numHyperedges++; + numPins += (graph.OutDegree(u) + 1); } } // Header os << "%% HyperdagDB format written by OneStopParallel\n"; - os << num_hyperedges << " " << num_vertices << " " << num_pins << "\n"; + os << numHyperedges << " " << numVertices << " " << numPins << "\n"; // Hyperedges - if (write_comment_lines) { + if (writeCommentLines) { os << "%% Hyperedges: ID comm_weight mem_weight\n"; } - for (unsigned i = 0; i < num_hyperedges; ++i) { - const auto u = hyperedge_idx_to_node[i]; - os << i << " " << graph.vertex_comm_weight(u) << " " << graph.vertex_mem_weight(u) << "\n"; + for (unsigned i = 0; i < numHyperedges; ++i) { + const auto u = hyperedgeIdxToNode[i]; + os << i << " " << graph.VertexCommWeight(u) << " " << graph.VertexMemWeight(u) << "\n"; } // Vertices - if (write_comment_lines) { + if (writeCommentLines) { os << "%% Vertices: ID work_weight type\n"; } - for (const auto &u : graph.vertices()) { - os << u << " " << graph.vertex_work_weight(u); - if constexpr (has_typed_vertices_v) { - os << " " << graph.vertex_type(u); + for (const auto &u : graph.Vertices()) { + os << u << " " << graph.VertexWorkWeight(u); + if constexpr (hasTypedVerticesV) { + os << " " << graph.VertexType(u); } else { os << " " << 0; } @@ -83,13 +83,13 @@ void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &grap } // Pins - if (write_comment_lines) { + if (writeCommentLines) { os << "%% Pins: HyperedgeID NodeID\n"; } - for (unsigned i = 0; i < num_hyperedges; ++i) { - const auto u = hyperedge_idx_to_node[i]; + for (unsigned i = 0; i < numHyperedges; ++i) { + const auto u = hyperedgeIdxToNode[i]; os << i << " " << u << "\n"; // Source pin - for (const auto &v : graph.children(u)) { + for (const auto &v : graph.Children(u)) { os << i << " " << v << "\n"; // Target pins } } @@ -98,21 +98,19 @@ void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &grap /** * @brief Writes a computational DAG to a file in the HyperdagDB format. * - * @tparam Graph_t The type of the graph, which must satisfy the ComputationalDag concept. + * @tparam GraphT The type of the graph, which must satisfy the ComputationalDag concept. * @param filename The path to the output file. * @param graph The computational DAG to write. * @return true if writing was successful, false otherwise. */ -template -bool writeComputationalDagHyperdagFormatDB(const std::string &filename, - const Graph_t &graph, - const bool write_comment_lines = false) { +template +bool WriteComputationalDagHyperdagFormatDb(const std::string &filename, const GraphT &graph, const bool writeCommentLines = false) { std::ofstream os(filename); if (!os.is_open()) { std::cerr << "Error: Failed to open file for writing: " << filename << "\n"; return false; } - writeComputationalDagHyperdagFormatDB(os, graph, write_comment_lines); + WriteComputationalDagHyperdagFormatDb(os, graph, writeCommentLines); return true; } diff --git a/include/osp/auxiliary/io/mtx_graph_file_reader.hpp b/include/osp/auxiliary/io/mtx_graph_file_reader.hpp index 5a98721e..63b20519 100644 --- a/include/osp/auxiliary/io/mtx_graph_file_reader.hpp +++ b/include/osp/auxiliary/io/mtx_graph_file_reader.hpp @@ -32,9 +32,9 @@ limitations under the License. namespace osp { namespace file_reader { -template -bool readComputationalDagMartixMarketFormat(std::ifstream &infile, Graph_t &graph) { - using vertex_t = vertex_idx_t; +template +bool ReadComputationalDagMartixMarketFormat(std::ifstream &infile, GraphT &graph) { + using VertexT = VertexIdxT; std::string line; @@ -62,29 +62,29 @@ bool readComputationalDagMartixMarketFormat(std::ifstream &infile, Graph_t &grap return false; } - int M_row = 0, M_col = 0, nEntries = 0; + int mRow = 0, mCol = 0, nEntries = 0; - std::istringstream header_stream(line); - if (!(header_stream >> M_row >> M_col >> nEntries) || M_row <= 0 || M_col <= 0 || M_row != M_col) { + std::istringstream headerStream(line); + if (!(headerStream >> mRow >> mCol >> nEntries) || mRow <= 0 || mCol <= 0 || mRow != mCol) { std::cerr << "Error: Invalid header or non-square matrix.\n"; return false; } - if (static_cast(M_row) > std::numeric_limits::max()) { + if (static_cast(mRow) > std::numeric_limits::max()) { std::cerr << "Error: Matrix dimension too large for vertex type.\n"; return false; } - const vertex_t num_nodes = static_cast(M_row); - std::vector node_work_wts(num_nodes, 0); - std::vector node_comm_wts(num_nodes, 1); + const VertexT numNodes = static_cast(mRow); + std::vector nodeWorkWts(numNodes, 0); + std::vector nodeCommWts(numNodes, 1); - for (vertex_t i = 0; i < num_nodes; ++i) { - graph.add_vertex(1, 1, 1); + for (VertexT i = 0; i < numNodes; ++i) { + graph.AddVertex(1, 1, 1); } - int entries_read = 0; - while (entries_read < nEntries && std::getline(infile, line)) { + int entriesRead = 0; + while (entriesRead < nEntries && std::getline(infile, line)) { if (line.empty() || line[0] == '%') { continue; } @@ -93,11 +93,11 @@ bool readComputationalDagMartixMarketFormat(std::ifstream &infile, Graph_t &grap return false; } - std::istringstream entry_stream(line); + std::istringstream entryStream(line); int row = -1, col = -1; double val = 0.0; - if (!(entry_stream >> row >> col >> val)) { + if (!(entryStream >> row >> col >> val)) { std::cerr << "Error: Malformed matrix entry.\n"; return false; } @@ -105,12 +105,12 @@ bool readComputationalDagMartixMarketFormat(std::ifstream &infile, Graph_t &grap row -= 1; col -= 1; // Convert to 0-based - if (row < 0 || col < 0 || row >= M_row || col >= M_col) { + if (row < 0 || col < 0 || row >= mRow || col >= mCol) { std::cerr << "Error: Matrix entry out of bounds.\n"; return false; } - if (static_cast(row) >= num_nodes || static_cast(col) >= num_nodes) { + if (static_cast(row) >= numNodes || static_cast(col) >= numNodes) { std::cerr << "Error: Index exceeds vertex type limit.\n"; return false; } @@ -121,22 +121,22 @@ bool readComputationalDagMartixMarketFormat(std::ifstream &infile, Graph_t &grap } if (row != col) { - graph.add_edge(static_cast(col), static_cast(row)); - node_work_wts[static_cast(row)] += 1; + graph.AddEdge(static_cast(col), static_cast(row)); + nodeWorkWts[static_cast(row)] += 1; } - ++entries_read; + ++entriesRead; } - if (entries_read != nEntries) { + if (entriesRead != nEntries) { std::cerr << "Error: Incomplete matrix entries.\n"; return false; } - for (vertex_t i = 0; i < num_nodes; ++i) { - graph.set_vertex_work_weight(i, static_cast>(node_work_wts[i])); - graph.set_vertex_comm_weight(i, static_cast>(node_comm_wts[i])); - graph.set_vertex_mem_weight(i, static_cast>(node_work_wts[i])); + for (VertexT i = 0; i < numNodes; ++i) { + graph.SetVertexWorkWeight(i, static_cast>(nodeWorkWts[i])); + graph.SetVertexCommWeight(i, static_cast>(nodeCommWts[i])); + graph.SetVertexMemWeight(i, static_cast>(nodeWorkWts[i])); } while (std::getline(infile, line)) { @@ -149,15 +149,15 @@ bool readComputationalDagMartixMarketFormat(std::ifstream &infile, Graph_t &grap return true; } -template -bool readComputationalDagMartixMarketFormat(const std::string &filename, Graph_t &graph) { +template +bool ReadComputationalDagMartixMarketFormat(const std::string &filename, GraphT &graph) { // Ensure the file is .mtx format if (std::filesystem::path(filename).extension() != ".mtx") { std::cerr << "Error: Only .mtx files are accepted.\n"; return false; } - if (!isPathSafe(filename)) { + if (!IsPathSafe(filename)) { std::cerr << "Error: Unsafe file path (potential traversal attack).\n"; return false; } @@ -178,7 +178,7 @@ bool readComputationalDagMartixMarketFormat(const std::string &filename, Graph_t return false; } - return readComputationalDagMartixMarketFormat(infile, graph); + return ReadComputationalDagMartixMarketFormat(infile, graph); } // bool readProblem(const std::string &filename, DAG &G, BSPproblem ¶ms, bool NoNUMA = true); @@ -186,16 +186,16 @@ bool readComputationalDagMartixMarketFormat(const std::string &filename, Graph_t // std::pair readBspInstance(const std::string &filename); // std::pair -// readComputationalDagMartixMarketFormat(const std::string &filename, +// ReadComputationalDagMartixMarketFormat(const std::string &filename, // std::unordered_map, double, pair_hash> &mtx); // std::pair -// readComputationalDagMartixMarketFormat(std::ifstream &infile, +// ReadComputationalDagMartixMarketFormat(std::ifstream &infile, // std::unordered_map, double, pair_hash> &mtx); -// std::pair readComputationalDagMartixMarketFormat(const std::string &filename); +// std::pair ReadComputationalDagMartixMarketFormat(const std::string &filename); -// std::pair readComputationalDagMartixMarketFormat(std::ifstream &infile); +// std::pair ReadComputationalDagMartixMarketFormat(std::ifstream &infile); // std::pair readCombinedSptrsvSpmvDagMartixMarket(const std::string &firstFilename, const std::string &secondFilename); @@ -203,9 +203,9 @@ bool readComputationalDagMartixMarketFormat(const std::string &filename, Graph_t // std::pair readComputationalDagMartixMarketFormat_csr(std::ifstream &infile); -// std::pair readBspArchitecture(const std::string &filename); +// std::pair ReadBspArchitecture(const std::string &filename); -// std::pair readBspArchitecture(std::ifstream &infile); +// std::pair ReadBspArchitecture(std::ifstream &infile); // std::pair readBspSchdeuleTxtFormat(const BspInstance &instance, const std::string &filename); diff --git a/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp b/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp index d7f64c9b..42acbd8d 100644 --- a/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp +++ b/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp @@ -33,8 +33,8 @@ namespace osp { namespace file_reader { // reads a matrix into Hypergraph format, where nonzeros are vertices, and rows/columns are hyperedges -template -bool readHypergraphMartixMarketFormat(std::ifstream &infile, Hypergraph &hgraph) { +template +bool ReadHypergraphMartixMarketFormat(std::ifstream &infile, Hypergraph &hgraph) { std::string line; // Skip comments or empty lines (robustly) @@ -61,27 +61,27 @@ bool readHypergraphMartixMarketFormat(std::ifstream &infile, Hypergraph> M_row >> M_col >> nEntries) || M_row <= 0 || M_col <= 0) { + std::istringstream headerStream(line); + if (!(headerStream >> mRow >> mCol >> nEntries) || mRow <= 0 || mCol <= 0) { std::cerr << "Error: Invalid header.\n"; return false; } - const index_type num_nodes = static_cast(nEntries); + const IndexType numNodes = static_cast(nEntries); - hgraph.reset(num_nodes, 0); - for (index_type node = 0; node < num_nodes; ++node) { - hgraph.set_vertex_work_weight(node, static_cast(1)); - hgraph.set_vertex_memory_weight(node, static_cast(1)); + hgraph.Reset(numNodes, 0); + for (IndexType node = 0; node < numNodes; ++node) { + hgraph.SetVertexWorkWeight(node, static_cast(1)); + hgraph.SetVertexMemoryWeight(node, static_cast(1)); } - std::vector> row_hyperedges(static_cast(M_row)); - std::vector> column_hyperedges(static_cast(M_col)); + std::vector> rowHyperedges(static_cast(mRow)); + std::vector> columnHyperedges(static_cast(mCol)); - int entries_read = 0; - while (entries_read < nEntries && std::getline(infile, line)) { + int entriesRead = 0; + while (entriesRead < nEntries && std::getline(infile, line)) { if (line.empty() || line[0] == '%') { continue; } @@ -90,11 +90,11 @@ bool readHypergraphMartixMarketFormat(std::ifstream &infile, Hypergraph> row >> col >> val)) { + if (!(entryStream >> row >> col >> val)) { std::cerr << "Error: Malformed matrix entry.\n"; return false; } @@ -102,23 +102,23 @@ bool readHypergraphMartixMarketFormat(std::ifstream &infile, Hypergraph= M_row || col >= M_col) { + if (row < 0 || col < 0 || row >= mRow || col >= mCol) { std::cerr << "Error: Matrix entry out of bounds.\n"; return false; } - if (static_cast(row) >= num_nodes || static_cast(col) >= num_nodes) { + if (static_cast(row) >= numNodes || static_cast(col) >= numNodes) { std::cerr << "Error: Index exceeds vertex type limit.\n"; return false; } - row_hyperedges[static_cast(row)].push_back(static_cast(entries_read)); - column_hyperedges[static_cast(col)].push_back(static_cast(entries_read)); + rowHyperedges[static_cast(row)].push_back(static_cast(entriesRead)); + columnHyperedges[static_cast(col)].push_back(static_cast(entriesRead)); - ++entries_read; + ++entriesRead; } - if (entries_read != nEntries) { + if (entriesRead != nEntries) { std::cerr << "Error: Incomplete matrix entries.\n"; return false; } @@ -130,31 +130,30 @@ bool readHypergraphMartixMarketFormat(std::ifstream &infile, Hypergraph(M_row); ++row) { - if (!row_hyperedges[row].empty()) { - hgraph.add_hyperedge(row_hyperedges[row]); + for (IndexType row = 0; row < static_cast(mRow); ++row) { + if (!rowHyperedges[row].empty()) { + hgraph.AddHyperedge(rowHyperedges[row]); } } - for (index_type col = 0; col < static_cast(M_col); ++col) { - if (!column_hyperedges[col].empty()) { - hgraph.add_hyperedge(column_hyperedges[col]); + for (IndexType col = 0; col < static_cast(mCol); ++col) { + if (!columnHyperedges[col].empty()) { + hgraph.AddHyperedge(columnHyperedges[col]); } } return true; } -template -bool readHypergraphMartixMarketFormat(const std::string &filename, - Hypergraph &hgraph) { +template +bool ReadHypergraphMartixMarketFormat(const std::string &filename, Hypergraph &hgraph) { // Ensure the file is .mtx format if (std::filesystem::path(filename).extension() != ".mtx") { std::cerr << "Error: Only .mtx files are accepted.\n"; return false; } - if (!isPathSafe(filename)) { + if (!IsPathSafe(filename)) { std::cerr << "Error: Unsafe file path (potential traversal attack).\n"; return false; } @@ -175,7 +174,7 @@ bool readHypergraphMartixMarketFormat(const std::string &filename, return false; } - return readHypergraphMartixMarketFormat(infile, hgraph); + return ReadHypergraphMartixMarketFormat(infile, hgraph); } } // namespace file_reader diff --git a/include/osp/auxiliary/io/partitioning_file_writer.hpp b/include/osp/auxiliary/io/partitioning_file_writer.hpp index b2dd2953..82f1eabc 100644 --- a/include/osp/auxiliary/io/partitioning_file_writer.hpp +++ b/include/osp/auxiliary/io/partitioning_file_writer.hpp @@ -27,42 +27,42 @@ limitations under the License. namespace osp { namespace file_writer { -template -void write_txt(std::ostream &os, const Partitioning &partition) { - using index_type = typename hypergraph_t::vertex_idx; +template +void WriteTxt(std::ostream &os, const Partitioning &partition) { + using IndexType = typename HypergraphT::VertexIdx; - os << "%% Partitioning for " << partition.getInstance().getNumberOfPartitions() << " parts." << std::endl; + os << "%% Partitioning for " << partition.GetInstance().GetNumberOfPartitions() << " parts." << std::endl; - for (index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node) { - os << node << " " << partition.assignedPartition(node) << std::endl; + for (IndexType node = 0; node < partition.GetInstance().GetHypergraph().NumVertices(); ++node) { + os << node << " " << partition.AssignedPartition(node) << std::endl; } } -template -void write_txt(const std::string &filename, const Partitioning &partition) { +template +void WriteTxt(const std::string &filename, const Partitioning &partition) { std::ofstream os(filename); - write_txt(os, partition); + WriteTxt(os, partition); } -template -void write_txt(std::ostream &os, const PartitioningWithReplication &partition) { - using index_type = typename hypergraph_t::vertex_idx; +template +void WriteTxt(std::ostream &os, const PartitioningWithReplication &partition) { + using IndexType = typename HypergraphT::VertexIdx; - os << "%% Partitioning for " << partition.getInstance().getNumberOfPartitions() << " parts with replication." << std::endl; + os << "%% Partitioning for " << partition.GetInstance().GetNumberOfPartitions() << " parts with replication." << std::endl; - for (index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node) { + for (IndexType node = 0; node < partition.GetInstance().GetHypergraph().NumVertices(); ++node) { os << node; - for (unsigned part : partition.assignedPartitions(node)) { + for (unsigned part : partition.AssignedPartitions(node)) { os << " " << part; } os << std::endl; } } -template -void write_txt(const std::string &filename, const PartitioningWithReplication &partition) { +template +void WriteTxt(const std::string &filename, const PartitioningWithReplication &partition) { std::ofstream os(filename); - write_txt(os, partition); + WriteTxt(os, partition); } } // namespace file_writer diff --git a/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp b/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp index e3849668..2317b9ce 100644 --- a/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp +++ b/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp @@ -26,48 +26,48 @@ limitations under the License. namespace osp { namespace file_writer { -template -void write_txt(std::ostream &os, const PebblingSchedule &schedule) { - using vertex_idx = vertex_idx_t; +template +void WriteTxt(std::ostream &os, const PebblingSchedule &schedule) { + using VertexIdx = VertexIdxT; - os << "%% PebblingSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and " - << schedule.numberOfSupersteps() << " supersteps." << std::endl; + os << "%% PebblingSchedule for " << schedule.GetInstance().NumberOfProcessors() << " processors and " + << schedule.NumberOfSupersteps() << " supersteps." << std::endl; - for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { + for (unsigned step = 0; step < schedule.NumberOfSupersteps(); ++step) { + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { const auto &computeSteps = schedule.GetComputeStepsForProcSuperstep(proc, step); for (const auto &computeStep : computeSteps) { - os << "Compute " << computeStep.node << " on proc " << proc << " in superstep " << step << std::endl; - for (vertex_idx to_evict : computeStep.nodes_evicted_after) { - os << "Evict " << to_evict << " from proc " << proc << " in superstep " << step << std::endl; + os << "Compute " << computeStep.node_ << " on proc " << proc << " in superstep " << step << std::endl; + for (VertexIdx toEvict : computeStep.nodesEvictedAfter_) { + os << "Evict " << toEvict << " from proc " << proc << " in superstep " << step << std::endl; } } } - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - const std::vector &nodesSentUp = schedule.GetNodesSentUp(proc, step); - for (vertex_idx node : nodesSentUp) { + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + const std::vector &nodesSentUp = schedule.GetNodesSentUp(proc, step); + for (VertexIdx node : nodesSentUp) { os << "Send up " << node << " from proc " << proc << " in superstep " << step << std::endl; } } - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - const std::vector &nodesEvictedInComm = schedule.GetNodesEvictedInComm(proc, step); - for (vertex_idx node : nodesEvictedInComm) { + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + const std::vector &nodesEvictedInComm = schedule.GetNodesEvictedInComm(proc, step); + for (VertexIdx node : nodesEvictedInComm) { os << "Evict " << node << " from proc " << proc << " in superstep " << step << std::endl; } } - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - const std::vector &nodesSentDown = schedule.GetNodesSentDown(proc, step); - for (vertex_idx node : nodesSentDown) { + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + const std::vector &nodesSentDown = schedule.GetNodesSentDown(proc, step); + for (VertexIdx node : nodesSentDown) { os << "Send down " << node << " to proc " << proc << " in superstep " << step << std::endl; } } } } -template -void write_txt(const std::string &filename, const PebblingSchedule &schedule) { +template +void WriteTxt(const std::string &filename, const PebblingSchedule &schedule) { std::ofstream os(filename); - write_txt(os, schedule); + WriteTxt(os, schedule); } } // namespace file_writer diff --git a/include/osp/auxiliary/math/divisors.hpp b/include/osp/auxiliary/math/divisors.hpp index fe268506..973485c2 100644 --- a/include/osp/auxiliary/math/divisors.hpp +++ b/include/osp/auxiliary/math/divisors.hpp @@ -25,20 +25,20 @@ limitations under the License. namespace osp { -template -integral_type intSqrtFloor(integral_type num) { - static_assert(std::is_integral_v); +template +IntegralType IntSqrtFloor(IntegralType num) { + static_assert(std::is_integral_v); assert(num > 0); - integral_type sqrt = 1; - integral_type numCopy = num; + IntegralType sqrt = 1; + IntegralType numCopy = num; while (numCopy >= 4) { sqrt *= 2; numCopy /= 4; } - integral_type power2 = sqrt / 2; + IntegralType power2 = sqrt / 2; while (power2 > 0) { - integral_type sum = sqrt + power2; + IntegralType sum = sqrt + power2; if (sum * sum <= num) { sqrt = sum; } @@ -48,22 +48,21 @@ integral_type intSqrtFloor(integral_type num) { return sqrt; } -template -std::vector divisorsList(integral_type num) { - static_assert(std::is_integral_v); +template +std::vector DivisorsList(IntegralType num) { + static_assert(std::is_integral_v); assert(num > 0); - std::vector divs; + std::vector divs; - integral_type ub = intSqrtFloor(num); - for (integral_type div = 1; div <= ub; ++div) { + IntegralType ub = IntSqrtFloor(num); + for (IntegralType div = 1; div <= ub; ++div) { if (num % div == 0) { divs.emplace_back(div); } } - for (std::size_t indx = divs.back() * divs.back() == num ? divs.size() - 2U : divs.size() - 1U; - indx != std::numeric_limits::max(); - --indx) { + const std::size_t beginIndx = divs.back() * divs.back() == num ? divs.size() - 2U : divs.size() - 1U; + for (std::size_t indx = beginIndx; indx != std::numeric_limits::max(); --indx) { divs.emplace_back(num / divs[indx]); } diff --git a/include/osp/auxiliary/math/math_helper.hpp b/include/osp/auxiliary/math/math_helper.hpp index b5227d43..2619d1a5 100644 --- a/include/osp/auxiliary/math/math_helper.hpp +++ b/include/osp/auxiliary/math/math_helper.hpp @@ -21,13 +21,13 @@ limitations under the License. namespace osp { -template -float_type log_sum_exp(float_type lhs, float_type rhs) { - static_assert(std::is_floating_point_v); +template +FloatType LogSumExp(FloatType lhs, FloatType rhs) { + static_assert(std::is_floating_point_v); - const float_type max = std::max(lhs, rhs); + const FloatType max = std::max(lhs, rhs); - float_type result = max; + FloatType result = max; result += std::log2(std::exp2(lhs - max) + std::exp2(rhs - max)); return result; } diff --git a/include/osp/auxiliary/misc.hpp b/include/osp/auxiliary/misc.hpp index 1b269f27..95c5e49f 100644 --- a/include/osp/auxiliary/misc.hpp +++ b/include/osp/auxiliary/misc.hpp @@ -35,7 +35,7 @@ limitations under the License. namespace osp { // unbiased random int generator -inline int randInt(int lim) { +inline int RandInt(int lim) { int rnd = std::rand(); while (rnd >= RAND_MAX - RAND_MAX % lim) { rnd = std::rand(); @@ -47,40 +47,39 @@ inline int randInt(int lim) { // pair of integers template struct Pair { - int a, b; + int a_, b_; - explicit Pair(const T1 a_ = T1(), const T2 b_ = T2()) : a(a_), b(b_) {} + explicit Pair(const T1 a = T1(), const T2 b = T2()) : a_(a), b_(b) {} - template - bool operator<(const Pair &other) const { - return (a < other.a || (a == other.a && b < other.b)); - } + bool operator<(const Pair &other) const { return (a_ < other.a_ || (a_ == other.a_ && b_ < other.b_)); } - std::ostream &operator<<(std::ostream &os) const { return os << ("(" + std::to_string(a) + ", " + std::to_string(b) + ")"); } + std::ostream &operator<<(std::ostream &os) const { + return os << ("(" + std::to_string(a_) + ", " + std::to_string(b_) + ")"); + } }; -using intPair = Pair; +using IntPair = Pair; // triple of integers template struct Triple { - T1 a; - T2 b; - T3 c; + T1 a_; + T2 b_; + T3 c_; - explicit Triple(const T1 a_ = T1(), const int b_ = T2(), const int c_ = T3()) : a(a_), b(b_), c(c_) {} + explicit Triple(const T1 a = T1(), const int b = T2(), const int c = T3()) : a_(a), b_(b), c_(c) {} std::ostream &operator<<(std::ostream &os) const { - return os << "(" << std::to_string(a) << ", " << std::to_string(b) << ", " << std::to_string(c) << ")"; + return os << "(" << std::to_string(a_) << ", " << std::to_string(b_) << ", " << std::to_string(c_) << ")"; } }; -using intTriple = Triple; +using IntTriple = Triple; -inline bool isDisjoint(std::vector &intervals) { +inline bool IsDisjoint(std::vector &intervals) { sort(intervals.begin(), intervals.end()); for (size_t i = 0; i + 1 < intervals.size(); ++i) { - if (intervals[i].b > intervals[i + 1].a) { + if (intervals[i].b_ > intervals[i + 1].a_) { return false; } } @@ -90,7 +89,7 @@ inline bool isDisjoint(std::vector &intervals) { // computes power of an integer template -constexpr T intpow(T base, unsigned exp) { +constexpr T Intpow(T base, unsigned exp) { static_assert(std::is_integral::value); if (exp == 0U) { @@ -100,23 +99,23 @@ constexpr T intpow(T base, unsigned exp) { return base; } - T tmp = intpow(base, exp / 2U); + T tmp = Intpow(base, exp / 2U); if (exp % 2U == 0U) { return tmp * tmp; } return base * tmp * tmp; } -struct contractionEdge { - intPair edge; - int nodeW; - int edgeW; +struct ContractionEdge { + IntPair edge_; + int nodeW_; + int edgeW_; - contractionEdge(const int from, const int to, const int Wnode, const int Wedge) - : edge(from, to), nodeW(Wnode), edgeW(Wedge) {} + ContractionEdge(const int from, const int to, const int wnode, const int wedge) + : edge_(from, to), nodeW_(wnode), edgeW_(wedge) {} - bool operator<(const contractionEdge &other) const { - return (nodeW < other.nodeW || (nodeW == other.nodeW && edgeW < other.edgeW)); + bool operator<(const ContractionEdge &other) const { + return (nodeW_ < other.nodeW_ || (nodeW_ == other.nodeW_ && edgeW_ < other.edgeW_)); } }; @@ -125,7 +124,7 @@ static const std::vector possibleModes{ "random", "SJF", "cilk", "BSPg", "ETF", "BL-EST", "ETF-NUMA", "BL-EST-NUMA", "Layers"}; // modify problem filename by adding substring at the right place -inline std::string editFilename(const std::string &filename, const std::string &toInsert) { +inline std::string EditFilename(const std::string &filename, const std::string &toInsert) { auto pos = filename.find("_coarse"); if (pos == std::string::npos) { pos = filename.find("_instance"); @@ -139,7 +138,7 @@ inline std::string editFilename(const std::string &filename, const std::string & // unordered set intersection template -std::unordered_set get_intersection(const std::unordered_set &a, const std::unordered_set &b) { +std::unordered_set GetIntersection(const std::unordered_set &a, const std::unordered_set &b) { std::vector result; const auto &larger = a.size() > b.size() ? a : b; const auto &smaller = a.size() <= b.size() ? a : b; @@ -153,7 +152,7 @@ std::unordered_set get_intersection(const std::unordered_set &a, const std // unordered set union template -std::unordered_set get_union(const std::unordered_set &a, const std::unordered_set &b) { +std::unordered_set GetUnion(const std::unordered_set &a, const std::unordered_set &b) { std::unordered_set larger = a.size() > b.size() ? a : b; std::unordered_set smaller = a.size() <= b.size() ? a : b; for (auto &elem : smaller) { @@ -162,9 +161,9 @@ std::unordered_set get_union(const std::unordered_set &a, const std::unord return larger; } -// zip two vectors of equal length +// Zip two vectors of equal length template -std::vector> zip(const std::vector &a, const std::vector &b) { +std::vector> Zip(const std::vector &a, const std::vector &b) { assert(a.size() == b.size()); std::vector> result; @@ -177,7 +176,7 @@ std::vector> zip(const std::vector &a, const std::vector & } template -void unzip(std::vector> &zipped, std::vector &a, std::vector &b) { +void Unzip(std::vector> &zipped, std::vector &a, std::vector &b) { a.resize(zipped.size()); b.resize(zipped.size()); @@ -188,26 +187,26 @@ void unzip(std::vector> &zipped, std::vector &a, std::vector< } template -std::vector sort_and_sorting_arrangement(std::vector &a) { +std::vector SortAndSortingArrangement(std::vector &a) { std::vector rearrangement; rearrangement.resize(a.size()); std::iota(rearrangement.begin(), rearrangement.end(), 0); - std::vector> zipped = zip(a, rearrangement); + std::vector> zipped = Zip(a, rearrangement); std::sort(zipped.begin(), zipped.end()); - unzip(zipped, a, rearrangement); + Unzip(zipped, a, rearrangement); return rearrangement; } -template -std::vector sorting_arrangement(const std::vector &a, bool increasing = true) { - std::vector rearrangement; +template +std::vector SortingArrangement(const std::vector &a, bool increasing = true) { + std::vector rearrangement; rearrangement.resize(a.size()); std::iota(rearrangement.begin(), rearrangement.end(), 0); - std::vector> zipped = zip(a, rearrangement); + std::vector> zipped = Zip(a, rearrangement); std::sort(zipped.begin(), zipped.end()); if (!increasing) { std::reverse(zipped.begin(), zipped.end()); @@ -221,7 +220,7 @@ std::vector sorting_arrangement(const std::vector &a, bool increasing = } // checks if a vector is rearrangement of 0... N-1 -inline bool check_vector_is_rearrangement_of_0_to_N(const std::vector &a) { +inline bool CheckVectorIsRearrangementOf0ToN(const std::vector &a) { std::vector contained(a.size(), false); for (auto &val : a) { if (val >= a.size()) { @@ -237,36 +236,36 @@ inline bool check_vector_is_rearrangement_of_0_to_N(const std::vector &a // sorts a vector like the arrangement template -void sort_like_arrangement(std::vector &a, const std::vector &arrangement) { +void SortLikeArrangement(std::vector &a, const std::vector &arrangement) { assert(a.size() == arrangement.size()); - assert(check_vector_is_rearrangement_of_0_to_N(arrangement)); + assert(CheckVectorIsRearrangementOf0ToN(arrangement)); std::vector moved(a.size(), false); for (size_t i = 0; i < a.size(); i++) { if (moved[i]) { continue; } - T i_val = a[i]; - size_t prev_j = i; + T iVal = a[i]; + size_t prevJ = i; size_t j = arrangement[i]; while (i != j) { - a[prev_j] = a[j]; - moved[prev_j] = true; - prev_j = j; + a[prevJ] = a[j]; + moved[prevJ] = true; + prevJ = j; j = arrangement[j]; } - a[prev_j] = i_val; // j == i - moved[prev_j] = true; + a[prevJ] = iVal; // j == i + moved[prevJ] = true; } } // sorts vector according to values in second vector w/o changing second vector template -void sort_like(std::vector &a, const std::vector &b) { +void SortLike(std::vector &a, const std::vector &b) { assert(a.size() == b.size()); - std::vector arrangement = sorting_arrangement(b); - sort_like_arrangement(a, arrangement); + std::vector arrangement = SortingArrangement(b); + SortLikeArrangement(a, arrangement); } /** @@ -278,14 +277,14 @@ void sort_like(std::vector &a, const std::vector &b) { * @return T KeyType of SetType */ template -T Get_Median(SetType ordered_set) { - assert(ordered_set.size() != 0); - typename SetType::iterator it = ordered_set.begin(); - if (ordered_set.size() % 2 == 1) { - std::advance(it, ordered_set.size() / 2); +T GetMedian(SetType orderedSet) { + assert(orderedSet.size() != 0); + typename SetType::iterator it = orderedSet.begin(); + if (orderedSet.size() % 2 == 1) { + std::advance(it, orderedSet.size() / 2); return *it; } else { - std::advance(it, (ordered_set.size() - 1) / 2); + std::advance(it, (orderedSet.size() - 1) / 2); T val1 = *it; T val2 = *(++it); return val1 + (val2 - val1) / 2; @@ -301,11 +300,11 @@ T Get_Median(SetType ordered_set) { * @return T KeyType of SetType */ template -T Get_Lower_Median(SetType ordered_set) { - assert(ordered_set.size() != 0); - typename SetType::iterator it = ordered_set.begin(); +T GetLowerMedian(SetType orderedSet) { + assert(orderedSet.size() != 0); + typename SetType::iterator it = orderedSet.begin(); - std::advance(it, (ordered_set.size() - 1) / 2); + std::advance(it, (orderedSet.size() - 1) / 2); return *it; } @@ -318,11 +317,11 @@ T Get_Lower_Median(SetType ordered_set) { * @return T KeyType of SetType */ template -T Get_upper_third_percentile(SetType ordered_set) { - assert(ordered_set.size() != 0); - typename SetType::iterator it = ordered_set.begin(); +T GetUpperThirdPercentile(SetType orderedSet) { + assert(orderedSet.size() != 0); + typename SetType::iterator it = orderedSet.begin(); - std::advance(it, (ordered_set.size() / 3) + ((ordered_set.size() + 1) / 3)); + std::advance(it, (orderedSet.size() / 3) + ((orderedSet.size() + 1) / 3)); return *it; } @@ -335,11 +334,11 @@ T Get_upper_third_percentile(SetType ordered_set) { * @return T KeyType of SetType */ template -T Get_lower_third_percentile(SetType ordered_set) { - assert(ordered_set.size() != 0); - typename SetType::iterator it = ordered_set.begin(); +T GetLowerThirdPercentile(SetType orderedSet) { + assert(orderedSet.size() != 0); + typename SetType::iterator it = orderedSet.begin(); - std::advance(it, (ordered_set.size() / 3)); + std::advance(it, (orderedSet.size() / 3)); return *it; } diff --git a/include/osp/auxiliary/permute.hpp b/include/osp/auxiliary/permute.hpp index 31ae7857..7815cc04 100644 --- a/include/osp/auxiliary/permute.hpp +++ b/include/osp/auxiliary/permute.hpp @@ -26,7 +26,7 @@ limitations under the License. namespace osp { template -void permute_inplace(std::vector &vec, std::vector &perm) { +void PermuteInplace(std::vector &vec, std::vector &perm) { static_assert(std::is_integral_v); static_assert(std::is_unsigned_v); @@ -58,7 +58,7 @@ void permute_inplace(std::vector &vec, std::vector &perm) { } template -void inverse_permute_inplace(std::vector &vec, std::vector &perm) { +void InversePermuteInplace(std::vector &vec, std::vector &perm) { static_assert(std::is_integral_v); static_assert(std::is_unsigned_v); diff --git a/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp b/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp index 692415c3..c809c921 100644 --- a/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp +++ b/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp @@ -30,38 +30,38 @@ namespace osp { /** * @brief Generates a Erdos Renyi random directed graph * - * @param num_vertices Number of vertices of the graph - * @param chance chance/num_vertices is the probability of edge inclusion + * @param numVertices Number of vertices of the graph + * @param chance chance/numVertices is the probability of edge inclusion * @return DAG */ -template -void erdos_renyi_graph_gen(Graph_t &dag_out, vertex_idx_t num_vertices, double chance) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG type"); +template +void ErdosRenyiGraphGen(GraphT &dagOut, VertexIdxT numVertices, double chance) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG type"); - dag_out = Graph_t(num_vertices); + dagOut = GraphT(numVertices); std::random_device rd; std::mt19937 gen(rd()); - for (const auto &v : dag_out.vertices()) { - const auto one = static_cast>(1); - std::binomial_distribution> bino_dist(num_vertices - one - v, chance / double(num_vertices)); - auto out_edges_num = bino_dist(gen); + for (const auto &v : dagOut.Vertices()) { + const auto one = static_cast>(1); + std::binomial_distribution> binoDist(numVertices - one - v, chance / double(numVertices)); + auto outEdgesNum = binoDist(gen); - std::unordered_set> out_edges; - while (out_edges.size() < static_cast(out_edges_num)) { - std::uniform_int_distribution> dist(0, num_vertices - one - v); - vertex_idx_t edge = v + one + dist(gen); + std::unordered_set> outEdges; + while (outEdges.size() < static_cast(outEdgesNum)) { + std::uniform_int_distribution> dist(0, numVertices - one - v); + VertexIdxT edge = v + one + dist(gen); - if (out_edges.find(edge) != out_edges.cend()) { + if (outEdges.find(edge) != outEdges.cend()) { continue; } - out_edges.emplace(edge); + outEdges.emplace(edge); } - for (auto &j : out_edges) { - dag_out.add_edge(v, j); + for (auto &j : outEdges) { + dagOut.AddEdge(v, j); } } } diff --git a/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp b/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp index 67728ad0..072c7e1c 100644 --- a/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp +++ b/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp @@ -29,33 +29,33 @@ namespace osp { * @brief Generates a random graph where an edge (i,j), with i -void near_diag_random_graph(Graph_t &dag_out, vertex_idx_t num_vertices, double bandwidth, double prob) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG type"); +template +void NearDiagRandomGraph(GraphT &dagOut, VertexIdxT numVertices, double bandwidth, double prob) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG type"); - dag_out = Graph_t(num_vertices); + dagOut = GraphT(numVertices); std::random_device rd; std::mt19937 gen(rd()); - for (vertex_idx_t v = 1; v < num_vertices; ++v) { - std::binomial_distribution> bino_dist(vertex_idx_t(num_vertices - v), - prob * std::exp(1.0 - static_cast(v) / bandwidth)); - vertex_idx_t off_diag_edges_num = bino_dist(gen); + for (VertexIdxT v = 1; v < numVertices; ++v) { + std::binomial_distribution> binoDist(VertexIdxT(numVertices - v), + prob * std::exp(1.0 - static_cast(v) / bandwidth)); + VertexIdxT offDiagEdgesNum = binoDist(gen); - std::vector> range(num_vertices - v, 0); + std::vector> range(numVertices - v, 0); std::iota(range.begin(), range.end(), 0); - std::vector> sampled; + std::vector> sampled; - std::sample(range.begin(), range.end(), std::back_inserter(sampled), off_diag_edges_num, gen); + std::sample(range.begin(), range.end(), std::back_inserter(sampled), offDiagEdgesNum, gen); for (const auto &j : sampled) { - dag_out.add_edge(j, j + 1); + dagOut.AddEdge(j, j + 1); } } } diff --git a/include/osp/auxiliary/return_status.hpp b/include/osp/auxiliary/return_status.hpp index 3aa23889..d0bb0332 100644 --- a/include/osp/auxiliary/return_status.hpp +++ b/include/osp/auxiliary/return_status.hpp @@ -22,21 +22,21 @@ limitations under the License. namespace osp { -enum class RETURN_STATUS { OSP_SUCCESS, BEST_FOUND, TIMEOUT, ERROR }; +enum class ReturnStatus { OSP_SUCCESS, BEST_FOUND, TIMEOUT, ERROR }; /** * @brief Converts the enum to a string literal. * Returns const char* to avoid std::string allocation overhead. */ -inline const char *to_string(const RETURN_STATUS status) { +inline const char *ToString(const ReturnStatus status) { switch (status) { - case RETURN_STATUS::OSP_SUCCESS: + case ReturnStatus::OSP_SUCCESS: return "SUCCESS"; - case RETURN_STATUS::BEST_FOUND: + case ReturnStatus::BEST_FOUND: return "BEST FOUND"; - case RETURN_STATUS::TIMEOUT: + case ReturnStatus::TIMEOUT: return "TIMEOUT"; - case RETURN_STATUS::ERROR: + case ReturnStatus::ERROR: return "ERROR"; default: return "UNKNOWN"; @@ -46,6 +46,6 @@ inline const char *to_string(const RETURN_STATUS status) { /** * @brief Stream operator overload using the helper function. */ -inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) { return os << to_string(status); } +inline std::ostream &operator<<(std::ostream &os, ReturnStatus status) { return os << ToString(status); } } // namespace osp diff --git a/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp b/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp index 2594267f..3378c5b9 100644 --- a/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp +++ b/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp @@ -27,7 +27,7 @@ limitations under the License. namespace osp { -enum SCHEDULE_NODE_PERMUTATION_MODES { LOOP_PROCESSORS, SNAKE_PROCESSORS, PROCESSOR_FIRST, NO_PERMUTE }; +enum ScheduleNodePermutationModes { LOOP_PROCESSORS, SNAKE_PROCESSORS, PROCESSOR_FIRST, NO_PERMUTE }; /** * @brief Computes a permutation to improve locality of a schedule, looping through processors @@ -36,36 +36,36 @@ enum SCHEDULE_NODE_PERMUTATION_MODES { LOOP_PROCESSORS, SNAKE_PROCESSORS, PROCES * @param mode ordering of processors * @return std::vector vec[prev_node_name] = new_node_name(location) */ -template -std::vector schedule_node_permuter_basic(const BspSchedule &sched, - const SCHEDULE_NODE_PERMUTATION_MODES mode = LOOP_PROCESSORS) { +template +std::vector ScheduleNodePermuterBasic(const BspSchedule &sched, + const ScheduleNodePermutationModes mode = LOOP_PROCESSORS) { // superstep, processor, nodes std::vector>> allocation( - sched.numberOfSupersteps(), - std::vector>(sched.getInstance().numberOfProcessors(), std::vector({}))); - for (size_t node = 0; node < sched.getInstance().numberOfVertices(); node++) { - allocation[sched.assignedSuperstep(node)][sched.assignedProcessor(node)].emplace_back(node); + sched.NumberOfSupersteps(), + std::vector>(sched.GetInstance().NumberOfProcessors(), std::vector({}))); + for (size_t node = 0; node < sched.GetInstance().NumberOfVertices(); node++) { + allocation[sched.AssignedSuperstep(node)][sched.AssignedProcessor(node)].emplace_back(node); } // reordering and allocating into permutation - std::vector permutation(sched.getInstance().numberOfVertices()); + std::vector permutation(sched.GetInstance().NumberOfVertices()); if (mode == LOOP_PROCESSORS || mode == SNAKE_PROCESSORS) { bool forward = true; size_t counter = 0; - for (auto step_it = allocation.begin(); step_it != allocation.cend(); step_it++) { + for (auto stepIt = allocation.begin(); stepIt != allocation.cend(); stepIt++) { if (forward) { - for (auto proc_it = step_it->begin(); proc_it != step_it->cend(); proc_it++) { + for (auto procIt = stepIt->begin(); procIt != stepIt->cend(); procIt++) { // topological_sort_for_data_locality_interior_basic(*proc_it, sched); - for (const auto &node : *proc_it) { + for (const auto &node : *procIt) { permutation[node] = counter; counter++; } } } else { - for (auto proc_it = step_it->rbegin(); proc_it != step_it->crend(); proc_it++) { + for (auto procIt = stepIt->rbegin(); procIt != stepIt->crend(); procIt++) { // topological_sort_for_data_locality_interior_basic(*proc_it, sched); - for (const auto &node : *proc_it) { + for (const auto &node : *procIt) { permutation[node] = counter; counter++; } diff --git a/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp b/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp index 86f4e0f3..436e3dd4 100644 --- a/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp +++ b/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp @@ -37,84 +37,84 @@ limitations under the License. namespace osp { -template +template class Sptrsv { - using uVertType = typename SparseMatrixImp::vertex_idx; + using UVertType = typename SparseMatrixImp::VertexIdx; private: - const BspInstance> *instance; + const BspInstance> *instance_; public: - std::vector val; - std::vector csc_val; + std::vector val_; + std::vector cscVal_; - std::vector col_idx; - std::vector row_ptr; + std::vector colIdx_; + std::vector rowPtr_; - std::vector row_idx; - std::vector col_ptr; + std::vector rowIdx_; + std::vector colPtr_; - std::vector> step_proc_ptr; - std::vector> step_proc_num; + std::vector> stepProcPtr_; + std::vector> stepProcNum_; - double *x; - const double *b; + double *x_; + const double *b_; - unsigned num_supersteps; + unsigned numSupersteps_; - std::vector>> vector_step_processor_vertices; - std::vector>> vector_step_processor_vertices_u; - std::vector ready; + std::vector>> vectorStepProcessorVertices_; + std::vector>> vectorStepProcessorVerticesU_; + std::vector ready_; - std::vector>> bounds_array_l; - std::vector>> bounds_array_u; + std::vector>> boundsArrayL_; + std::vector>> boundsArrayU_; Sptrsv() = default; - Sptrsv(BspInstance> &inst) : instance(&inst) {}; + Sptrsv(BspInstance> &inst) : instance_(&inst) {}; - void setup_csr_no_permutation(const BspSchedule> &schedule) { - vector_step_processor_vertices = std::vector>>( - schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); + void SetupCsrNoPermutation(const BspSchedule> &schedule) { + vectorStepProcessorVertices_ = std::vector>>( + schedule.NumberOfSupersteps(), std::vector>(schedule.GetInstance().NumberOfProcessors())); - vector_step_processor_vertices_u = std::vector>>( - schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); + vectorStepProcessorVerticesU_ = std::vector>>( + schedule.NumberOfSupersteps(), std::vector>(schedule.GetInstance().NumberOfProcessors())); - bounds_array_l = std::vector>>( - schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); - bounds_array_u = std::vector>>( - schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); + boundsArrayL_ = std::vector>>( + schedule.NumberOfSupersteps(), std::vector>(schedule.GetInstance().NumberOfProcessors())); + boundsArrayU_ = std::vector>>( + schedule.NumberOfSupersteps(), std::vector>(schedule.GetInstance().NumberOfProcessors())); - num_supersteps = schedule.numberOfSupersteps(); - size_t number_of_vertices = instance->getComputationalDag().num_vertices(); + numSupersteps_ = schedule.NumberOfSupersteps(); + size_t numberOfVertices = instance_->GetComputationalDag().NumVertices(); # pragma omp parallel num_threads(2) { int id = omp_get_thread_num(); switch (id) { case 0: { - for (size_t node = 0; node < number_of_vertices; ++node) { - vector_step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].push_back( - static_cast(node)); + for (size_t node = 0; node < numberOfVertices; ++node) { + vectorStepProcessorVertices_[schedule.AssignedSuperstep(node)][schedule.AssignedProcessor(node)].push_back( + static_cast(node)); } - for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) { - for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (!vector_step_processor_vertices[step][proc].empty()) { - eigen_idx_type start = vector_step_processor_vertices[step][proc][0]; - eigen_idx_type prev = vector_step_processor_vertices[step][proc][0]; - - for (size_t i = 1; i < vector_step_processor_vertices[step][proc].size(); ++i) { - if (vector_step_processor_vertices[step][proc][i] != prev + 1) { - bounds_array_l[step][proc].push_back(start); - bounds_array_l[step][proc].push_back(prev); - start = vector_step_processor_vertices[step][proc][i]; + for (unsigned int step = 0; step < schedule.NumberOfSupersteps(); ++step) { + for (unsigned int proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + if (!vectorStepProcessorVertices_[step][proc].empty()) { + EigenIdxType start = vectorStepProcessorVertices_[step][proc][0]; + EigenIdxType prev = vectorStepProcessorVertices_[step][proc][0]; + + for (size_t i = 1; i < vectorStepProcessorVertices_[step][proc].size(); ++i) { + if (vectorStepProcessorVertices_[step][proc][i] != prev + 1) { + boundsArrayL_[step][proc].push_back(start); + boundsArrayL_[step][proc].push_back(prev); + start = vectorStepProcessorVertices_[step][proc][i]; } - prev = vector_step_processor_vertices[step][proc][i]; + prev = vectorStepProcessorVertices_[step][proc][i]; } - bounds_array_l[step][proc].push_back(start); - bounds_array_l[step][proc].push_back(prev); + boundsArrayL_[step][proc].push_back(start); + boundsArrayL_[step][proc].push_back(prev); } } } @@ -122,32 +122,30 @@ class Sptrsv { break; } case 1: { - size_t node = number_of_vertices; + size_t node = numberOfVertices; do { node--; - vector_step_processor_vertices_u[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] - .push_back(static_cast(node)); + vectorStepProcessorVerticesU_[schedule.AssignedSuperstep(node)][schedule.AssignedProcessor(node)].push_back( + static_cast(node)); } while (node > 0); - for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) { - for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (!vector_step_processor_vertices_u[step][proc].empty()) { - eigen_idx_type start_u - = static_cast(vector_step_processor_vertices_u[step][proc][0]); - eigen_idx_type prev_u - = static_cast(vector_step_processor_vertices_u[step][proc][0]); - - for (size_t i = 1; i < vector_step_processor_vertices_u[step][proc].size(); ++i) { - if (static_cast(vector_step_processor_vertices_u[step][proc][i]) != prev_u - 1) { - bounds_array_u[step][proc].push_back(start_u); - bounds_array_u[step][proc].push_back(prev_u); - start_u = static_cast(vector_step_processor_vertices_u[step][proc][i]); + for (unsigned int step = 0; step < schedule.NumberOfSupersteps(); ++step) { + for (unsigned int proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + if (!vectorStepProcessorVerticesU_[step][proc].empty()) { + EigenIdxType startU = static_cast(vectorStepProcessorVerticesU_[step][proc][0]); + EigenIdxType prevU = static_cast(vectorStepProcessorVerticesU_[step][proc][0]); + + for (size_t i = 1; i < vectorStepProcessorVerticesU_[step][proc].size(); ++i) { + if (static_cast(vectorStepProcessorVerticesU_[step][proc][i]) != prevU - 1) { + boundsArrayU_[step][proc].push_back(startU); + boundsArrayU_[step][proc].push_back(prevU); + startU = static_cast(vectorStepProcessorVerticesU_[step][proc][i]); } - prev_u = static_cast(vector_step_processor_vertices_u[step][proc][i]); + prevU = static_cast(vectorStepProcessorVerticesU_[step][proc][i]); } - bounds_array_u[step][proc].push_back(start_u); - bounds_array_u[step][proc].push_back(prev_u); + boundsArrayU_[step][proc].push_back(startU); + boundsArrayU_[step][proc].push_back(prevU); } } } @@ -161,133 +159,132 @@ class Sptrsv { } } - void setup_csr_with_permutation(const BspSchedule> &schedule, std::vector &perm) { - std::vector perm_inv(perm.size()); + void SetupCsrWithPermutation(const BspSchedule> &schedule, std::vector &perm) { + std::vector permInv(perm.size()); for (size_t i = 0; i < perm.size(); i++) { - perm_inv[perm[i]] = i; + permInv[perm[i]] = i; } - num_supersteps = schedule.numberOfSupersteps(); + numSupersteps_ = schedule.NumberOfSupersteps(); - val.clear(); - val.reserve(static_cast(instance->getComputationalDag().getCSR()->nonZeros())); + val_.clear(); + val_.reserve(static_cast(instance_->GetComputationalDag().GetCSR()->nonZeros())); - col_idx.clear(); - col_idx.reserve(static_cast(instance->getComputationalDag().getCSR()->nonZeros())); + colIdx_.clear(); + colIdx_.reserve(static_cast(instance_->GetComputationalDag().GetCSR()->nonZeros())); - row_ptr.clear(); - row_ptr.reserve(instance->numberOfVertices() + 1); + rowPtr_.clear(); + rowPtr_.reserve(instance_->NumberOfVertices() + 1); - step_proc_ptr - = std::vector>(num_supersteps, std::vector(instance->numberOfProcessors(), 0)); + stepProcPtr_ + = std::vector>(numSupersteps_, std::vector(instance_->NumberOfProcessors(), 0)); - step_proc_num = schedule.numAssignedNodesPerSuperstepProcessor(); + stepProcNum_ = schedule.NumAssignedNodesPerSuperstepProcessor(); - unsigned current_step = 0; - unsigned current_processor = 0; + unsigned currentStep = 0; + unsigned currentProcessor = 0; - step_proc_ptr[current_step][current_processor] = 0; + stepProcPtr_[currentStep][currentProcessor] = 0; - for (const uVertType &node : perm_inv) { - if (schedule.assignedProcessor(node) != current_processor || schedule.assignedSuperstep(node) != current_step) { - while (schedule.assignedProcessor(node) != current_processor || schedule.assignedSuperstep(node) != current_step) { - if (current_processor < instance->numberOfProcessors() - 1) { - current_processor++; + for (const UVertType &node : permInv) { + if (schedule.AssignedProcessor(node) != currentProcessor || schedule.AssignedSuperstep(node) != currentStep) { + while (schedule.AssignedProcessor(node) != currentProcessor || schedule.AssignedSuperstep(node) != currentStep) { + if (currentProcessor < instance_->NumberOfProcessors() - 1) { + currentProcessor++; } else { - current_processor = 0; - current_step++; + currentProcessor = 0; + currentStep++; } } - step_proc_ptr[current_step][current_processor] = static_cast(row_ptr.size()); + stepProcPtr_[currentStep][currentProcessor] = static_cast(rowPtr_.size()); } - row_ptr.push_back(col_idx.size()); + rowPtr_.push_back(colIdx_.size()); - std::set parents; + std::set parents; - for (uVertType par : instance->getComputationalDag().parents(node)) { + for (UVertType par : instance_->GetComputationalDag().Parents(node)) { parents.insert(perm[par]); } - for (const uVertType &par : parents) { - col_idx.push_back(par); + for (const UVertType &par : parents) { + colIdx_.push_back(par); unsigned found = 0; - const auto *outer = instance->getComputationalDag().getCSR()->outerIndexPtr(); - for (uVertType par_ind = static_cast(outer[node]); - par_ind < static_cast(outer[node + 1] - 1); - ++par_ind) { - if (static_cast(instance->getComputationalDag().getCSR()->innerIndexPtr()[par_ind]) == perm_inv[par]) { - val.push_back(instance->getComputationalDag().getCSR()->valuePtr()[par_ind]); + const auto *outer = instance_->GetComputationalDag().GetCSR()->outerIndexPtr(); + for (UVertType parInd = static_cast(outer[node]); parInd < static_cast(outer[node + 1] - 1); + ++parInd) { + if (static_cast(instance_->GetComputationalDag().GetCSR()->innerIndexPtr()[parInd]) == permInv[par]) { + val_.push_back(instance_->GetComputationalDag().GetCSR()->valuePtr()[parInd]); found++; } } assert(found == 1); } - col_idx.push_back(perm[node]); - val.push_back(instance->getComputationalDag() - .getCSR() - ->valuePtr()[instance->getComputationalDag().getCSR()->outerIndexPtr()[node + 1] - 1]); + colIdx_.push_back(perm[node]); + val_.push_back(instance_->GetComputationalDag() + .GetCSR() + ->valuePtr()[instance_->GetComputationalDag().GetCSR()->outerIndexPtr()[node + 1] - 1]); } - row_ptr.push_back(col_idx.size()); + rowPtr_.push_back(colIdx_.size()); } - void lsolve_serial() { - eigen_idx_type number_of_vertices = static_cast(instance->numberOfVertices()); - for (eigen_idx_type i = 0; i < number_of_vertices; ++i) { - x[i] = b[i]; - for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i]; - j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1; + void LsolveSerial() { + EigenIdxType numberOfVertices = static_cast(instance_->NumberOfVertices()); + for (EigenIdxType i = 0; i < numberOfVertices; ++i) { + x_[i] = b_[i]; + for (EigenIdxType j = (*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[i]; + j < (*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[i + 1] - 1; ++j) { - x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j] - * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]]; + x_[i] -= (*(instance_->GetComputationalDag().GetCSR())).valuePtr()[j] + * x_[(*(instance_->GetComputationalDag().GetCSR())).innerIndexPtr()[j]]; } - x[i] /= (*(instance->getComputationalDag().getCSR())) - .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1]; + x_[i] /= (*(instance_->GetComputationalDag().GetCSR())) + .valuePtr()[(*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[i + 1] - 1]; } } - void usolve_serial() { - eigen_idx_type number_of_vertices = static_cast(instance->numberOfVertices()); + void UsolveSerial() { + EigenIdxType numberOfVertices = static_cast(instance_->NumberOfVertices()); - eigen_idx_type i = number_of_vertices; + EigenIdxType i = numberOfVertices; do { i--; - x[i] = b[i]; - for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1; - j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1]; + x_[i] = b_[i]; + for (EigenIdxType j = (*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[i] + 1; + j < (*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[i + 1]; ++j) { - x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j] - * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]]; + x_[i] -= (*(instance_->GetComputationalDag().GetCSC())).valuePtr()[j] + * x_[(*(instance_->GetComputationalDag().GetCSC())).innerIndexPtr()[j]]; } - x[i] /= (*(instance->getComputationalDag().getCSC())) - .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]]; + x_[i] /= (*(instance_->GetComputationalDag().GetCSC())) + .valuePtr()[(*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[i]]; } while (i != 0); } - void lsolve_no_permutation_in_place() { -# pragma omp parallel num_threads(instance->numberOfProcessors()) + void LsolveNoPermutationInPlace() { +# pragma omp parallel num_threads(instance_->NumberOfProcessors()) { const size_t proc = static_cast(omp_get_thread_num()); - for (unsigned step = 0; step < num_supersteps; ++step) { - const size_t bounds_str_size = bounds_array_l[step][proc].size(); + for (unsigned step = 0; step < numSupersteps_; ++step) { + const size_t boundsStrSize = boundsArrayL_[step][proc].size(); - for (size_t index = 0; index < bounds_str_size; index += 2) { - eigen_idx_type lower_b = bounds_array_l[step][proc][index]; - const eigen_idx_type upper_b = bounds_array_l[step][proc][index + 1]; + for (size_t index = 0; index < boundsStrSize; index += 2) { + EigenIdxType lowerB = boundsArrayL_[step][proc][index]; + const EigenIdxType upperB = boundsArrayL_[step][proc][index + 1]; - for (eigen_idx_type node = lower_b; node <= upper_b; ++node) { - for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node]; - i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1; + for (EigenIdxType node = lowerB; node <= upperB; ++node) { + for (EigenIdxType i = (*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[node]; + i < (*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[node + 1] - 1; ++i) { - x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i] - * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]]; + x_[node] -= (*(instance_->GetComputationalDag().GetCSR())).valuePtr()[i] + * x_[(*(instance_->GetComputationalDag().GetCSR())).innerIndexPtr()[i]]; } - x[node] /= (*(instance->getComputationalDag().getCSR())) - .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1]; + x_[node] /= (*(instance_->GetComputationalDag().GetCSR())) + .valuePtr()[(*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[node + 1] - 1]; } } # pragma omp barrier @@ -295,57 +292,57 @@ class Sptrsv { } } - void usolve_no_permutation_in_place() { -# pragma omp parallel num_threads(instance->numberOfProcessors()) + void UsolveNoPermutationInPlace() { +# pragma omp parallel num_threads(instance_->NumberOfProcessors()) { // Process each superstep starting from the last one (opposite of lsolve) const size_t proc = static_cast(omp_get_thread_num()); - unsigned step = num_supersteps; + unsigned step = numSupersteps_; do { step--; - const size_t bounds_str_size = bounds_array_u[step][proc].size(); - for (size_t index = 0; index < bounds_str_size; index += 2) { - eigen_idx_type node = bounds_array_u[step][proc][index] + 1; - const eigen_idx_type lower_b = bounds_array_u[step][proc][index + 1]; + const size_t boundsStrSize = boundsArrayU_[step][proc].size(); + for (size_t index = 0; index < boundsStrSize; index += 2) { + EigenIdxType node = boundsArrayU_[step][proc][index] + 1; + const EigenIdxType lowerB = boundsArrayU_[step][proc][index + 1]; do { node--; - for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1; - i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1]; + for (EigenIdxType i = (*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[node] + 1; + i < (*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[node + 1]; ++i) { - x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i] - * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]]; + x_[node] -= (*(instance_->GetComputationalDag().GetCSC())).valuePtr()[i] + * x_[(*(instance_->GetComputationalDag().GetCSC())).innerIndexPtr()[i]]; } - x[node] /= (*(instance->getComputationalDag().getCSC())) - .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]]; - } while (node != lower_b); + x_[node] /= (*(instance_->GetComputationalDag().GetCSC())) + .valuePtr()[(*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[node]]; + } while (node != lowerB); } # pragma omp barrier } while (step != 0); } } - void lsolve_no_permutation() { -# pragma omp parallel num_threads(instance->numberOfProcessors()) + void LsolveNoPermutation() { +# pragma omp parallel num_threads(instance_->NumberOfProcessors()) { const size_t proc = static_cast(omp_get_thread_num()); - for (unsigned step = 0; step < num_supersteps; ++step) { - const size_t bounds_str_size = bounds_array_l[step][proc].size(); + for (unsigned step = 0; step < numSupersteps_; ++step) { + const size_t boundsStrSize = boundsArrayL_[step][proc].size(); - for (size_t index = 0; index < bounds_str_size; index += 2) { - eigen_idx_type lower_b = bounds_array_l[step][proc][index]; - const eigen_idx_type upper_b = bounds_array_l[step][proc][index + 1]; + for (size_t index = 0; index < boundsStrSize; index += 2) { + EigenIdxType lowerB = boundsArrayL_[step][proc][index]; + const EigenIdxType upperB = boundsArrayL_[step][proc][index + 1]; - for (eigen_idx_type node = lower_b; node <= upper_b; ++node) { - x[node] = b[node]; - for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node]; - i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1; + for (EigenIdxType node = lowerB; node <= upperB; ++node) { + x_[node] = b_[node]; + for (EigenIdxType i = (*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[node]; + i < (*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[node + 1] - 1; ++i) { - x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i] - * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]]; + x_[node] -= (*(instance_->GetComputationalDag().GetCSR())).valuePtr()[i] + * x_[(*(instance_->GetComputationalDag().GetCSR())).innerIndexPtr()[i]]; } - x[node] /= (*(instance->getComputationalDag().getCSR())) - .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1]; + x_[node] /= (*(instance_->GetComputationalDag().GetCSR())) + .valuePtr()[(*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[node + 1] - 1]; } } # pragma omp barrier @@ -353,79 +350,79 @@ class Sptrsv { } } - void usolve_no_permutation() { -# pragma omp parallel num_threads(instance->numberOfProcessors()) + void UsolveNoPermutation() { +# pragma omp parallel num_threads(instance_->NumberOfProcessors()) { // Process each superstep starting from the last one (opposite of lsolve) const size_t proc = static_cast(omp_get_thread_num()); - unsigned step = num_supersteps; + unsigned step = numSupersteps_; do { step--; - const size_t bounds_str_size = bounds_array_u[step][proc].size(); - for (size_t index = 0; index < bounds_str_size; index += 2) { - eigen_idx_type node = bounds_array_u[step][proc][index] + 1; - const eigen_idx_type lower_b = bounds_array_u[step][proc][index + 1]; + const size_t boundsStrSize = boundsArrayU_[step][proc].size(); + for (size_t index = 0; index < boundsStrSize; index += 2) { + EigenIdxType node = boundsArrayU_[step][proc][index] + 1; + const EigenIdxType lowerB = boundsArrayU_[step][proc][index + 1]; do { node--; - x[node] = b[node]; - for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1; - i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1]; + x_[node] = b_[node]; + for (EigenIdxType i = (*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[node] + 1; + i < (*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[node + 1]; ++i) { - x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i] - * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]]; + x_[node] -= (*(instance_->GetComputationalDag().GetCSC())).valuePtr()[i] + * x_[(*(instance_->GetComputationalDag().GetCSC())).innerIndexPtr()[i]]; } - x[node] /= (*(instance->getComputationalDag().getCSC())) - .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]]; - } while (node != lower_b); + x_[node] /= (*(instance_->GetComputationalDag().GetCSC())) + .valuePtr()[(*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[node]]; + } while (node != lowerB); } # pragma omp barrier } while (step != 0); } } - void lsolve_serial_in_place() { - eigen_idx_type number_of_vertices = static_cast(instance->numberOfVertices()); - for (eigen_idx_type i = 0; i < number_of_vertices; ++i) { - for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i]; - j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1; + void LsolveSerialInPlace() { + EigenIdxType numberOfVertices = static_cast(instance_->NumberOfVertices()); + for (EigenIdxType i = 0; i < numberOfVertices; ++i) { + for (EigenIdxType j = (*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[i]; + j < (*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[i + 1] - 1; ++j) { - x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j] - * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]]; + x_[i] -= (*(instance_->GetComputationalDag().GetCSR())).valuePtr()[j] + * x_[(*(instance_->GetComputationalDag().GetCSR())).innerIndexPtr()[j]]; } - x[i] /= (*(instance->getComputationalDag().getCSR())) - .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1]; + x_[i] /= (*(instance_->GetComputationalDag().GetCSR())) + .valuePtr()[(*(instance_->GetComputationalDag().GetCSR())).outerIndexPtr()[i + 1] - 1]; } } - void usolve_serial_in_place() { - eigen_idx_type number_of_vertices = static_cast(instance->numberOfVertices()); - eigen_idx_type i = number_of_vertices; + void UsolveSerialInPlace() { + EigenIdxType numberOfVertices = static_cast(instance_->NumberOfVertices()); + EigenIdxType i = numberOfVertices; do { i--; - for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1; - j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1]; + for (EigenIdxType j = (*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[i] + 1; + j < (*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[i + 1]; ++j) { - x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j] - * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]]; + x_[i] -= (*(instance_->GetComputationalDag().GetCSC())).valuePtr()[j] + * x_[(*(instance_->GetComputationalDag().GetCSC())).innerIndexPtr()[j]]; } - x[i] /= (*(instance->getComputationalDag().getCSC())) - .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]]; + x_[i] /= (*(instance_->GetComputationalDag().GetCSC())) + .valuePtr()[(*(instance_->GetComputationalDag().GetCSC())).outerIndexPtr()[i]]; } while (i != 0); } - void lsolve_with_permutation_in_place() { -# pragma omp parallel num_threads(instance->numberOfProcessors()) + void LsolveWithPermutationInPlace() { +# pragma omp parallel num_threads(instance_->NumberOfProcessors()) { - for (unsigned step = 0; step < num_supersteps; step++) { + for (unsigned step = 0; step < numSupersteps_; step++) { const size_t proc = static_cast(omp_get_thread_num()); - const uVertType upper_limit = step_proc_ptr[step][proc] + step_proc_num[step][proc]; - for (uVertType _row_idx = step_proc_ptr[step][proc]; _row_idx < upper_limit; _row_idx++) { - for (uVertType i = row_ptr[_row_idx]; i < row_ptr[_row_idx + 1] - 1; i++) { - x[_row_idx] -= val[i] * x[col_idx[i]]; + const UVertType upperLimit = stepProcPtr_[step][proc] + stepProcNum_[step][proc]; + for (UVertType rowIdx = stepProcPtr_[step][proc]; rowIdx < upperLimit; rowIdx++) { + for (UVertType i = rowPtr_[rowIdx]; i < rowPtr_[rowIdx + 1] - 1; i++) { + x_[rowIdx] -= val_[i] * x_[colIdx_[i]]; } - x[_row_idx] /= val[row_ptr[_row_idx + 1] - 1]; + x_[rowIdx] /= val_[rowPtr_[rowIdx + 1] - 1]; } # pragma omp barrier @@ -433,19 +430,19 @@ class Sptrsv { } } - void lsolve_with_permutation() { -# pragma omp parallel num_threads(instance->numberOfProcessors()) + void LsolveWithPermutation() { +# pragma omp parallel num_threads(instance_->NumberOfProcessors()) { - for (unsigned step = 0; step < num_supersteps; step++) { + for (unsigned step = 0; step < numSupersteps_; step++) { const size_t proc = static_cast(omp_get_thread_num()); - const uVertType upper_limit = step_proc_ptr[step][proc] + step_proc_num[step][proc]; - for (uVertType _row_idx = step_proc_ptr[step][proc]; _row_idx < upper_limit; _row_idx++) { - x[_row_idx] = b[_row_idx]; - for (uVertType i = row_ptr[_row_idx]; i < row_ptr[_row_idx + 1] - 1; i++) { - x[_row_idx] -= val[i] * x[col_idx[i]]; + const UVertType upperLimit = stepProcPtr_[step][proc] + stepProcNum_[step][proc]; + for (UVertType rowIdx = stepProcPtr_[step][proc]; rowIdx < upperLimit; rowIdx++) { + x_[rowIdx] = b_[rowIdx]; + for (UVertType i = rowPtr_[rowIdx]; i < rowPtr_[rowIdx + 1] - 1; i++) { + x_[rowIdx] -= val_[i] * x_[colIdx_[i]]; } - x[_row_idx] /= val[row_ptr[_row_idx + 1] - 1]; + x_[rowIdx] /= val_[rowPtr_[rowIdx + 1] - 1]; } # pragma omp barrier @@ -453,34 +450,34 @@ class Sptrsv { } } - void reset_x() { - eigen_idx_type number_of_vertices = static_cast(instance->numberOfVertices()); - for (eigen_idx_type i = 0; i < number_of_vertices; i++) { - x[i] = 1.0; + void ResetX() { + EigenIdxType numberOfVertices = static_cast(instance_->NumberOfVertices()); + for (EigenIdxType i = 0; i < numberOfVertices; i++) { + x_[i] = 1.0; } } - void permute_x_vector(const std::vector &perm) { - std::vector vec_perm(perm.size()); + void PermuteXVector(const std::vector &perm) { + std::vector vecPerm(perm.size()); for (size_t i = 0; i < perm.size(); i++) { - vec_perm[i] = x[perm[i]]; + vecPerm[i] = x_[perm[i]]; } for (size_t i = 0; i < perm.size(); i++) { - x[i] = vec_perm[i]; + x_[i] = vecPerm[i]; } } - void permute_x_vector_inverse(const std::vector &perm) { - std::vector vec_unperm(perm.size()); + void PermuteXVectorInverse(const std::vector &perm) { + std::vector vecUnperm(perm.size()); for (size_t i = 0; i < perm.size(); i++) { - vec_unperm[perm[i]] = x[i]; + vecUnperm[perm[i]] = x_[i]; } for (size_t i = 0; i < perm.size(); i++) { - x[i] = vec_unperm[i]; + x_[i] = vecUnperm[i]; } } - std::size_t get_number_of_vertices() { return instance->numberOfVertices(); } + std::size_t GetNumberOfVertices() { return instance_->NumberOfVertices(); } virtual ~Sptrsv() = default; }; diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp index 535f0d98..8ff71be5 100644 --- a/include/osp/bsp/model/BspArchitecture.hpp +++ b/include/osp/bsp/model/BspArchitecture.hpp @@ -35,12 +35,12 @@ limitations under the License. namespace osp { /** - * @enum MEMORY_CONSTRAINT_TYPE + * @enum MemoryConstraintType * @brief Enumerates the different types of memory constraints. * Memory bounds are set per processor and apply to aggregated memory weights of nodes according to the different types of memory * constraints. */ -enum class MEMORY_CONSTRAINT_TYPE { +enum class MemoryConstraintType { NONE, /** No memory constraints. */ LOCAL, /** The memory bounds apply to the sum of memory weights of nodes assigned to the same processor and superstep. */ GLOBAL, /** The memory bounds apply to the sum of memory weights of the nodes assigned to the same processor. */ @@ -55,21 +55,21 @@ enum class MEMORY_CONSTRAINT_TYPE { * @brief Converts the enum to a string literal. * Returns const char* to avoid std::string allocation overhead. */ -inline const char *to_string(MEMORY_CONSTRAINT_TYPE type) { +inline const char *ToString(MemoryConstraintType type) { switch (type) { - case MEMORY_CONSTRAINT_TYPE::NONE: + case MemoryConstraintType::NONE: return "NONE"; - case MEMORY_CONSTRAINT_TYPE::LOCAL: + case MemoryConstraintType::LOCAL: return "LOCAL"; - case MEMORY_CONSTRAINT_TYPE::GLOBAL: + case MemoryConstraintType::GLOBAL: return "GLOBAL"; - case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT: + case MemoryConstraintType::PERSISTENT_AND_TRANSIENT: return "PERSISTENT_AND_TRANSIENT"; - case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT: + case MemoryConstraintType::LOCAL_IN_OUT: return "LOCAL_IN_OUT"; - case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES: + case MemoryConstraintType::LOCAL_INC_EDGES: return "LOCAL_INC_EDGES"; - case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES: + case MemoryConstraintType::LOCAL_SOURCES_INC_EDGES: return "LOCAL_SOURCES_INC_EDGES"; default: return "UNKNOWN"; @@ -79,7 +79,7 @@ inline const char *to_string(MEMORY_CONSTRAINT_TYPE type) { /** * @brief Stream operator overload using the helper function. */ -inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) { return os << to_string(type); } +inline std::ostream &operator<<(std::ostream &os, MemoryConstraintType type) { return os << ToString(type); } /** * @class BspArchitecture @@ -107,12 +107,12 @@ inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) { * By default, send costs are uniform (1 for distinct processors, 0 for self). * * **Memory Constraints:** - * Each processor has a memory bound. The `MEMORY_CONSTRAINT_TYPE` determines how these bounds are applied + * Each processor has a memory bound. The `MemoryConstraintType` determines how these bounds are applied * (e.g., local per superstep, global per processor). */ -template +template class BspArchitecture { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); + static_assert(isComputationalDagV, "BspSchedule can only be used with computational DAGs."); private: /** @brief The number of processors in the architecture. Must be at least 1. */ @@ -122,13 +122,13 @@ class BspArchitecture { unsigned numberOfProcessorTypes_; /** @brief The communication costs, typically denoted 'g' for the BSP model. */ - v_commw_t communicationCosts_; + VCommwT communicationCosts_; /** @brief The synchronisation costs, typically denoted 'L' for the BSP model. */ - v_commw_t synchronisationCosts_; + VCommwT synchronisationCosts_; /** @brief The architecture allows to specify memory bounds per processor. */ - std::vector> memoryBound_; + std::vector> memoryBound_; /** @brief Flag to indicate whether the architecture is NUMA , i.e., whether the send costs are different for different pairs of processors. */ bool isNuma_; @@ -138,10 +138,10 @@ class BspArchitecture { std::vector processorTypes_; /** @brief A flattened p x p matrix of send costs. Access via index [i * numberOfProcessors_ + j]. */ - std::vector> sendCosts_; + std::vector> sendCosts_; /** @brief The memory constraint type. */ - MEMORY_CONSTRAINT_TYPE memoryConstraintType_ = MEMORY_CONSTRAINT_TYPE::NONE; + MemoryConstraintType memoryConstraintType_ = MemoryConstraintType::NONE; /** @brief Helper function to calculate the index of a flattened p x p matrix. */ std::size_t FlatIndex(const unsigned row, const unsigned col) const { @@ -153,7 +153,7 @@ class BspArchitecture { return false; } - const v_commw_t val = sendCosts_[1U]; + const VCommwT val = sendCosts_[1U]; for (unsigned p1 = 0U; p1 < numberOfProcessors_; p1++) { for (unsigned p2 = 0U; p2 < numberOfProcessors_; p2++) { if (p1 == p2) { @@ -200,36 +200,36 @@ class BspArchitecture { * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal * entries are forced to zero. Default: empty (uniform costs). */ - BspArchitecture(const unsigned NumberOfProcessors = 2U, - const v_commw_t CommunicationCost = 1U, - const v_commw_t SynchronisationCost = 2U, - const v_memw_t MemoryBound = 100U, - const std::vector>> &SendCosts = {}) - : numberOfProcessors_(NumberOfProcessors), + BspArchitecture(const unsigned numberOfProcessors = 2U, + const VCommwT communicationCost = 1U, + const VCommwT synchronisationCost = 2U, + const VMemwT memoryBound = 100U, + const std::vector>> &sendCosts = {}) + : numberOfProcessors_(numberOfProcessors), numberOfProcessorTypes_(1U), - communicationCosts_(CommunicationCost), - synchronisationCosts_(SynchronisationCost), - memoryBound_(NumberOfProcessors, MemoryBound), + communicationCosts_(communicationCost), + synchronisationCosts_(synchronisationCost), + memoryBound_(numberOfProcessors, memoryBound), isNuma_(false), - processorTypes_(NumberOfProcessors, 0U) { - if (NumberOfProcessors == 0U) { + processorTypes_(numberOfProcessors, 0U) { + if (numberOfProcessors == 0U) { throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0."); } - if (SendCosts.empty()) { + if (sendCosts.empty()) { InitializeUniformSendCosts(); } else { - if (NumberOfProcessors != SendCosts.size()) { + if (numberOfProcessors != sendCosts.size()) { throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); } - if (std::any_of(SendCosts.begin(), SendCosts.end(), [NumberOfProcessors](const auto &thing) { - return thing.size() != NumberOfProcessors; + if (std::any_of(sendCosts.begin(), sendCosts.end(), [numberOfProcessors](const auto &thing) { + return thing.size() != numberOfProcessors; })) { throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); } - sendCosts_.reserve(NumberOfProcessors * NumberOfProcessors); - for (const auto &row : SendCosts) { + sendCosts_.reserve(numberOfProcessors * numberOfProcessors); + for (const auto &row : sendCosts) { sendCosts_.insert(sendCosts_.end(), row.begin(), row.end()); } @@ -250,24 +250,24 @@ class BspArchitecture { * @tparam Graph_t_other The graph type of the other BspArchitecture. * @param other The other BspArchitecture object. */ - template - BspArchitecture(const BspArchitecture &other) - : numberOfProcessors_(other.numberOfProcessors()), - numberOfProcessorTypes_(other.getNumberOfProcessorTypes()), - communicationCosts_(other.communicationCosts()), - synchronisationCosts_(other.synchronisationCosts()), - memoryBound_(other.memoryBound()), - isNuma_(other.isNumaArchitecture()), - processorTypes_(other.processorTypes()), - sendCosts_(other.sendCostsVector()) { - static_assert(std::is_same_v, v_memw_t>, - "BspArchitecture: Graph_t and Graph_t_other have the same memory weight type."); - - static_assert(std::is_same_v, v_commw_t>, - "BspArchitecture: Graph_t and Graph_t_other have the same communication weight type."); - - static_assert(std::is_same_v, v_type_t>, - "BspArchitecture: Graph_t and Graph_t_other have the same processor type."); + template + BspArchitecture(const BspArchitecture &other) + : numberOfProcessors_(other.NumberOfProcessors()), + numberOfProcessorTypes_(other.GetNumberOfProcessorTypes()), + communicationCosts_(other.CommunicationCosts()), + synchronisationCosts_(other.SynchronisationCosts()), + memoryBound_(other.MemoryBound()), + isNuma_(other.IsNumaArchitecture()), + processorTypes_(other.ProcessorTypes()), + sendCosts_(other.SendCostsVector()) { + static_assert(std::is_same_v, VMemwT>, + "BspArchitecture: GraphT and Graph_t_other have the same memory weight type."); + + static_assert(std::is_same_v, VCommwT>, + "BspArchitecture: GraphT and Graph_t_other have the same communication weight type."); + + static_assert(std::is_same_v, VTypeT>, + "BspArchitecture: GraphT and Graph_t_other have the same processor type."); } /** @@ -279,11 +279,11 @@ class BspArchitecture { * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal * entries are forced to zero. */ - BspArchitecture(const unsigned NumberOfProcessors, - const v_commw_t CommunicationCost, - const v_commw_t SynchronisationCost, - const std::vector>> &SendCosts) - : BspArchitecture(NumberOfProcessors, CommunicationCost, SynchronisationCost, 100U, SendCosts) {} + BspArchitecture(const unsigned numberOfProcessors, + const VCommwT communicationCost, + const VCommwT synchronisationCost, + const std::vector>> &sendCosts) + : BspArchitecture(numberOfProcessors, communicationCost, synchronisationCost, 100U, sendCosts) {} /** * @brief Sets the uniform send cost for each pair of processors. @@ -310,12 +310,12 @@ class BspArchitecture { * * @param base The base value used to calculate the send cost. */ - void SetExpSendCost(const v_commw_t base) { + void SetExpSendCost(const VCommwT base) { isNuma_ = true; unsigned maxPos = 1; constexpr unsigned two = 2; - for (; intpow(two, maxPos + 1) <= numberOfProcessors_ - 1; ++maxPos) {} + for (; Intpow(two, maxPos + 1) <= numberOfProcessors_ - 1; ++maxPos) {} for (unsigned i = 0U; i < numberOfProcessors_; ++i) { for (unsigned j = i + 1U; j < numberOfProcessors_; ++j) { @@ -334,7 +334,7 @@ class BspArchitecture { * @brief Returns a view of processor indices from 0 to numberOfProcessors_ - 1. * @return An integral view of processor indices. */ - [[nodiscard]] auto processors() const { return integral_range(numberOfProcessors_); } + [[nodiscard]] auto Processors() const { return IntegralRange(numberOfProcessors_); } /** * @brief Sets the send costs for the BspArchitecture. @@ -342,7 +342,7 @@ class BspArchitecture { * @param vec A 2D vector representing the send costs between processors. * @throws std::invalid_argument if the size of the vector is invalid or diagonal elements are not 0. */ - void SetSendCosts(const std::vector>> &vec) { + void SetSendCosts(const std::vector>> &vec) { if (vec.size() != numberOfProcessors_) { throw std::invalid_argument("Invalid Argument: Vector size mismatch."); } @@ -374,8 +374,8 @@ class BspArchitecture { * @param cost The cost of sending data between the processors. * @throws std::invalid_argument if the processor indices are out of bounds. */ - void SetSendCosts(const unsigned p1, const unsigned p2, const v_commw_t cost) { - if (p1 >= numberOfProcessors_ || p2 >= numberOfProcessors_) { // Fixed condition: p2 >= number_processors + void SetSendCosts(const unsigned p1, const unsigned p2, const VCommwT cost) { + if (p1 >= numberOfProcessors_ || p2 >= numberOfProcessors_) { throw std::invalid_argument("Invalid Argument: Processor index out of bounds."); } @@ -389,18 +389,18 @@ class BspArchitecture { * @brief Sets the memory bound for all processors. * @param MemoryBound The new memory bound for all processors. */ - void setMemoryBound(const v_memw_t MemoryBound) { memoryBound_.assign(numberOfProcessors_, MemoryBound); } + void SetMemoryBound(const VMemwT memoryBound) { memoryBound_.assign(numberOfProcessors_, memoryBound); } /** * @brief Sets the memory bound for all processors using a vector. * @param MemoryBound The vector of memory bounds. * @throws std::invalid_argument if the size of the vector is invalid. */ - void setMemoryBound(const std::vector> &MemoryBound) { - if (MemoryBound.size() != numberOfProcessors_) { + void SetMemoryBound(const std::vector> &memoryBound) { + if (memoryBound.size() != numberOfProcessors_) { throw std::invalid_argument("Invalid Argument: Memory bound vector size does not match number of processors."); } - memoryBound_ = MemoryBound; + memoryBound_ = memoryBound; } /** @@ -408,27 +408,27 @@ class BspArchitecture { * @param MemoryBound The new memory bound for the processor. * @param processorIndex The processor index. Must be less than numberOfProcessors_. */ - void setMemoryBound(const v_memw_t MemoryBound, const unsigned processorIndex) { - memoryBound_.at(processorIndex) = MemoryBound; + void SetMemoryBound(const VMemwT memoryBound, const unsigned processorIndex) { + memoryBound_.at(processorIndex) = memoryBound; } /** * @brief Sets the synchronization costs. * @param SynchCost The new synchronization costs. */ - void setSynchronisationCosts(const v_commw_t SynchCost) { synchronisationCosts_ = SynchCost; } + void SetSynchronisationCosts(const VCommwT synchCost) { synchronisationCosts_ = synchCost; } /** * @brief Sets the communication costs. * @param CommCost The new communication costs. */ - void setCommunicationCosts(const v_commw_t CommCost) { communicationCosts_ = CommCost; } + void SetCommunicationCosts(const VCommwT commCost) { communicationCosts_ = commCost; } /** * @brief Checks if the architecture is NUMA. * @return True if NUMA, false otherwise. */ - [[nodiscard]] bool isNumaArchitecture() const { return isNuma_; } + [[nodiscard]] bool IsNumaArchitecture() const { return isNuma_; } /** * @brief Sets the number of processors. Processor type is set to 0 for all processors. @@ -436,7 +436,7 @@ class BspArchitecture { * @param numberOfProcessors The number of processors. Must be greater than 0. * @throws std::invalid_argument if the number of processors is 0. */ - void setNumberOfProcessors(const unsigned numberOfProcessors) { + void SetNumberOfProcessors(const unsigned numberOfProcessors) { if (numberOfProcessors == 0) { throw std::invalid_argument("Invalid Argument: Number of processors must be greater than 0."); } @@ -455,7 +455,7 @@ class BspArchitecture { * vector. Resets send costs to uniform (1). Resets memory bound to 100 for all processors. * @param processorTypes The types of the respective processors. */ - void setProcessorsWithTypes(const std::vector> &processorTypes) { + void SetProcessorsWithTypes(const std::vector> &processorTypes) { if (processorTypes.empty()) { throw std::invalid_argument("Invalid Argument: Processor types vector is empty."); } @@ -480,8 +480,8 @@ class BspArchitecture { * @param processorTypeCount Vector where index is type and value is count of processors of that type. * @param processorTypeMemory Vector where index is type and value is memory bound for that type. */ - void SetProcessorsConsequTypes(const std::vector> &processorTypeCount, - const std::vector> &processorTypeMemory) { + void SetProcessorsConsequTypes(const std::vector> &processorTypeCount, + const std::vector> &processorTypeMemory) { if (processorTypeCount.size() != processorTypeMemory.size()) { throw std::invalid_argument("Invalid Argument: processorTypeCount and processorTypeMemory must have the same size."); } @@ -513,22 +513,20 @@ class BspArchitecture { * @brief Returns the memory bounds of all processors. * @return Vector of memory bounds. */ - [[nodiscard]] const std::vector> &memoryBound() const { return memoryBound_; } + [[nodiscard]] const std::vector> &MemoryBound() const { return memoryBound_; } /** * @brief Returns the memory bound of a specific processor. * @param proc The processor index. * @return The memory bound. */ - [[nodiscard]] v_memw_t memoryBound(const unsigned proc) const { return memoryBound_[proc]; } + [[nodiscard]] VMemwT MemoryBound(const unsigned proc) const { return memoryBound_[proc]; } /** * @brief Returns the maximum memory bound over all processors. * @return The maximum memory bound. */ - [[nodiscard]] v_memw_t maxMemoryBound() const { - return *(std::max_element(memoryBound_.begin(), memoryBound_.end())); - } + [[nodiscard]] VMemwT MaxMemoryBound() const { return *(std::max_element(memoryBound_.begin(), memoryBound_.end())); } /** * @brief Returns the maximum memory bound over all processors of a specific type. @@ -536,42 +534,41 @@ class BspArchitecture { * @param procType The processor type. * @return The maximum memory bound. */ - [[nodiscard]] v_memw_t maxMemoryBoundProcType(const v_type_t procType) const { - v_memw_t max_mem = 0U; + [[nodiscard]] VMemwT MaxMemoryBoundProcType(const VTypeT procType) const { + VMemwT maxMem = 0U; for (unsigned proc = 0U; proc < numberOfProcessors_; proc++) { if (processorTypes_[proc] == procType) { - max_mem = std::max(max_mem, memoryBound_[proc]); + maxMem = std::max(maxMem, memoryBound_[proc]); } } - return max_mem; + return maxMem; } /** * @brief Returns the number of processors. * @return The number of processors. */ - [[nodiscard]] unsigned numberOfProcessors() const { return numberOfProcessors_; } + [[nodiscard]] unsigned NumberOfProcessors() const { return numberOfProcessors_; } /** * @brief Returns the communication costs. * @return The communication costs. */ - [[nodiscard]] v_commw_t communicationCosts() const { return communicationCosts_; } + [[nodiscard]] VCommwT CommunicationCosts() const { return communicationCosts_; } /** * @brief Returns the synchronization costs. * @return The synchronization costs. */ - [[nodiscard]] v_commw_t synchronisationCosts() const { return synchronisationCosts_; } + [[nodiscard]] VCommwT SynchronisationCosts() const { return synchronisationCosts_; } /** * @brief Returns a the send costs matrix. Internally the matrix is stored as a flattened matrix. The allocates, computes and * returns the matrix on the fly. * @return The send costs matrix. */ - [[nodiscard]] std::vector>> sendCost() const { - std::vector>> matrix(numberOfProcessors_, - std::vector>(numberOfProcessors_)); + [[nodiscard]] std::vector>> SendCost() const { + std::vector>> matrix(numberOfProcessors_, std::vector>(numberOfProcessors_)); for (unsigned i = 0; i < numberOfProcessors_; ++i) { for (unsigned j = 0; j < numberOfProcessors_; ++j) { matrix[i][j] = sendCosts_[FlatIndex(i, j)]; @@ -584,13 +581,13 @@ class BspArchitecture { * @brief Returns the flattened send costs vector. * @return The send costs vector. */ - [[nodiscard]] const std::vector> &sendCostsVector() const { return sendCosts_; } + [[nodiscard]] const std::vector> &SendCostsVector() const { return sendCosts_; } /** * @brief Returns the processor types. * @return Vector of processor types. */ - [[nodiscard]] const std::vector &processorTypes() const { return processorTypes_; } + [[nodiscard]] const std::vector &ProcessorTypes() const { return processorTypes_; } /** * @brief Returns the communication costs between two processors. Does not perform bounds checking. @@ -600,7 +597,7 @@ class BspArchitecture { * @param p2 The index of the second processor. * @return The communication costs between the two processors. */ - [[nodiscard]] v_commw_t communicationCosts(const unsigned p1, const unsigned p2) const { + [[nodiscard]] VCommwT CommunicationCosts(const unsigned p1, const unsigned p2) const { return communicationCosts_ * sendCosts_[FlatIndex(p1, p2)]; } @@ -612,23 +609,21 @@ class BspArchitecture { * @param p2 The index of the second processor. * @return The send costs between the two processors. */ - [[nodiscard]] v_commw_t sendCosts(const unsigned p1, const unsigned p2) const { - return sendCosts_[FlatIndex(p1, p2)]; - } + [[nodiscard]] VCommwT SendCosts(const unsigned p1, const unsigned p2) const { return sendCosts_[FlatIndex(p1, p2)]; } /** * @brief Returns the type of a specific processor. Does not perform bounds checking. * @param p1 The processor index. * @return The processor type. */ - [[nodiscard]] v_type_t processorType(const unsigned p1) const { return processorTypes_[p1]; } + [[nodiscard]] VTypeT ProcessorType(const unsigned p1) const { return processorTypes_[p1]; } /** * @brief Sets the type of a specific processor. Performs bounds checking. * @param p1 The processor index. * @param type The new processor type. */ - void setProcessorType(const unsigned p1, const v_type_t type) { + void SetProcessorType(const unsigned p1, const VTypeT type) { processorTypes_.at(p1) = type; numberOfProcessorTypes_ = std::max(numberOfProcessorTypes_, type + 1U); } @@ -637,19 +632,19 @@ class BspArchitecture { * @brief Returns the count of processors for each type. * @return Vector where index is type and value is count. */ - [[nodiscard]] std::vector getProcessorTypeCount() const { - std::vector type_count(numberOfProcessorTypes_, 0U); + [[nodiscard]] std::vector GetProcessorTypeCount() const { + std::vector typeCount(numberOfProcessorTypes_, 0U); for (unsigned p = 0U; p < numberOfProcessors_; p++) { - type_count[processorTypes_[p]]++; + typeCount[processorTypes_[p]]++; } - return type_count; + return typeCount; } /** * @brief Prints the architecture details to the output stream. * @param os The output stream. */ - void print(std::ostream &os) const { + void Print(std::ostream &os) const { os << "Architecture info: number of processors: " << numberOfProcessors_ << ", Number of processor types: " << numberOfProcessorTypes_ << ", Communication costs: " << communicationCosts_ << ", Synchronization costs: " << synchronisationCosts_ << "\n"; @@ -670,11 +665,11 @@ class BspArchitecture { os << "\n"; } - [[nodiscard]] unsigned getNumberOfProcessorTypes() const { return numberOfProcessorTypes_; }; + [[nodiscard]] unsigned GetNumberOfProcessorTypes() const { return numberOfProcessorTypes_; }; - [[nodiscard]] MEMORY_CONSTRAINT_TYPE getMemoryConstraintType() const { return memoryConstraintType_; } + [[nodiscard]] MemoryConstraintType GetMemoryConstraintType() const { return memoryConstraintType_; } - void setMemoryConstraintType(const MEMORY_CONSTRAINT_TYPE memoryConstraintType) { + void SetMemoryConstraintType(const MemoryConstraintType memoryConstraintType) { memoryConstraintType_ = memoryConstraintType; } }; diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index 34c17b98..ea350396 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -55,11 +55,11 @@ namespace osp { * This class provides a unified interface to access and modify these components, facilitating * the development of scheduling algorithms that need to query problem constraints and properties. * - * @tparam Graph_t The type of the computational DAG, which must satisfy the `is_computational_dag` concept. + * @tparam GraphT The type of the computational DAG, which must satisfy the `is_computational_dag` concept. */ -template +template class BspInstance { - static_assert(is_computational_dag_v, "BspInstance can only be used with computational DAGs."); + static_assert(isComputationalDagV, "BspInstance can only be used with computational DAGs."); private: /** @@ -68,14 +68,14 @@ class BspInstance { * It contains the graph topology (nodes and directed edges) as well as attributes such as node types, * work weights, memory weights, and edge communication weights. */ - Graph_t cdag; + GraphT cdag_; /** * @brief The BSP architecture model. * * It defines the hardware characteristics including processor types, memory limits, * communication bandwidth/latency (send costs), and global synchronization costs. */ - BspArchitecture architecture; + BspArchitecture architecture_; /** * @brief Stores the compatibility between node types and processor types. @@ -85,15 +85,14 @@ class BspInstance { * if a node of that type can be assigned to a processor of the given type in a schedule. * @note The outer vector is indexed by node type, the inner vector is indexed by processor type. */ - std::vector> nodeProcessorCompatibility = std::vector>({{true}}); + std::vector> nodeProcessorCompatibility_ = std::vector>({{true}}); /** * @brief The type of the vectex types in the computational DAG. * If the DAG does not support vertex types, this is `unsigned`. */ - using vertex_type_t_or_default - = std::conditional_t, v_type_t, unsigned>; - using processor_type_t = unsigned; + using VertexTypeTOrDefault = std::conditional_t, VTypeT, unsigned>; + using ProcessorTypeT = unsigned; public: /** @@ -108,10 +107,10 @@ class BspInstance { * @param cdag The computational DAG for the instance. * @param architecture The BSP architecture for the instance. */ - BspInstance(const Graph_t &cdag_, - const BspArchitecture &architecture_, - std::vector> nodeProcessorCompatibility_ = std::vector>({{true}})) - : cdag(cdag_), architecture(architecture_), nodeProcessorCompatibility(nodeProcessorCompatibility_) {} + BspInstance(const GraphT &cdag, + const BspArchitecture &architecture, + std::vector> nodeProcessorCompatibility = std::vector>({{true}})) + : cdag_(cdag), architecture_(architecture), nodeProcessorCompatibility_(nodeProcessorCompatibility) {} /** * @brief Constructs a BspInstance object with the specified computational DAG and BSP architecture. @@ -120,62 +119,62 @@ class BspInstance { * @param cdag The computational DAG for the instance. * @param architecture The BSP architecture for the instance. */ - BspInstance(Graph_t &&cdag_, - BspArchitecture &&architecture_, - std::vector> nodeProcessorCompatibility_ = std::vector>({{true}})) - : cdag(std::move(cdag_)), - architecture(std::move(architecture_)), - nodeProcessorCompatibility(nodeProcessorCompatibility_) {} - - template - explicit BspInstance(const BspInstance &other) - : architecture(other.getArchitecture()), nodeProcessorCompatibility(other.getNodeProcessorCompatibilityMatrix()) { - constructComputationalDag(other.getComputationalDag(), cdag); + BspInstance(GraphT &&cdag, + BspArchitecture &&architecture, + std::vector> nodeProcessorCompatibility = std::vector>({{true}})) + : cdag_(std::move(cdag)), + architecture_(std::move(architecture)), + nodeProcessorCompatibility_(nodeProcessorCompatibility) {} + + template + explicit BspInstance(const BspInstance &other) + : architecture_(other.GetArchitecture()), nodeProcessorCompatibility_(other.GetNodeProcessorCompatibilityMatrix()) { + ConstructComputationalDag(other.GetComputationalDag(), cdag_); } - BspInstance(const BspInstance &other) = default; - BspInstance(BspInstance &&other) noexcept = default; + BspInstance(const BspInstance &other) = default; + BspInstance(BspInstance &&other) noexcept = default; - BspInstance &operator=(const BspInstance &other) = default; - BspInstance &operator=(BspInstance &&other) noexcept = default; + BspInstance &operator=(const BspInstance &other) = default; + BspInstance &operator=(BspInstance &&other) noexcept = default; /** * @brief Returns a reference to the BSP architecture of the instance. * Assigning the BSP architecture via the reference creates a copy of the architecture. * The move operator may be used to transfer ownership of the architecture. */ - [[nodiscard]] const BspArchitecture &getArchitecture() const { return architecture; } + [[nodiscard]] const BspArchitecture &GetArchitecture() const { return architecture_; } - [[nodiscard]] BspArchitecture &getArchitecture() { return architecture; } + [[nodiscard]] BspArchitecture &GetArchitecture() { return architecture_; } /** * @brief Returns a reference to the computational DAG of the instance. * Assigning the computational DAG via the reference creates a copy of the DAG. * The move operator may be used to transfer ownership of the DAG. */ - [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; } + [[nodiscard]] const GraphT &GetComputationalDag() const { return cdag_; } - [[nodiscard]] Graph_t &getComputationalDag() { return cdag; } + [[nodiscard]] GraphT &GetComputationalDag() { return cdag_; } /** * @brief Returns the number of vertices in the computational DAG. */ - [[nodiscard]] vertex_idx_t numberOfVertices() const { return cdag.num_vertices(); } + [[nodiscard]] VertexIdxT NumberOfVertices() const { return cdag_.NumVertices(); } /** * @brief Returns a view over the vertex indices of the computational DAG. */ - [[nodiscard]] auto vertices() const { return cdag.vertices(); } + [[nodiscard]] auto Vertices() const { return cdag_.Vertices(); } /** * @brief Returns a view over the processor indices of the BSP architecture. */ - [[nodiscard]] auto processors() const { return architecture.processors(); } + [[nodiscard]] auto Processors() const { return architecture_.Processors(); } /** * @brief Returns the number of processors in the BSP architecture. */ - [[nodiscard]] unsigned numberOfProcessors() const { return architecture.numberOfProcessors(); } + [[nodiscard]] unsigned NumberOfProcessors() const { return architecture_.NumberOfProcessors(); } /** * @brief Returns the communication costs between two processors. Does not perform bounds checking. @@ -184,8 +183,8 @@ class BspInstance { * @param p_send The index of the sending processor. * @param p_receive The index of the receiving processor. */ - [[nodiscard]] v_commw_t communicationCosts(const unsigned p_send, const unsigned p_receive) const { - return architecture.communicationCosts(p_send, p_receive); + [[nodiscard]] VCommwT CommunicationCosts(const unsigned pSend, const unsigned pReceive) const { + return architecture_.CommunicationCosts(pSend, pReceive); } /** @@ -195,47 +194,47 @@ class BspInstance { * @param p_send The index of the sending processor. * @param p_receive The index of the receiving processor. */ - [[nodiscard]] v_commw_t sendCosts(const unsigned p_send, const unsigned p_receive) const { - return architecture.sendCosts(p_send, p_receive); + [[nodiscard]] VCommwT SendCosts(const unsigned pSend, const unsigned pReceive) const { + return architecture_.SendCosts(pSend, pReceive); } /** * @brief Returns a copy of the send costs matrix. */ - [[nodiscard]] std::vector>> sendCosts() const { return architecture.sendCosts(); } + [[nodiscard]] std::vector>> SendCosts() const { return architecture_.SendCosts(); } /** * @brief Returns the flattened send costs vector. */ - [[nodiscard]] const std::vector> &sendCostsVector() const { return architecture.sendCostsVector(); } + [[nodiscard]] const std::vector> &SendCostsVector() const { return architecture_.SendCostsVector(); } /** * @brief Returns the communication costs of the BSP architecture. */ - [[nodiscard]] v_commw_t communicationCosts() const { return architecture.communicationCosts(); } + [[nodiscard]] VCommwT CommunicationCosts() const { return architecture_.CommunicationCosts(); } /** * @brief Returns the synchronization costs of the BSP architecture. */ - [[nodiscard]] v_commw_t synchronisationCosts() const { return architecture.synchronisationCosts(); } + [[nodiscard]] VCommwT SynchronisationCosts() const { return architecture_.SynchronisationCosts(); } /** * @brief Returns the memory bound for a specific processor. * @param proc The processor index. */ - [[nodiscard]] v_memw_t memoryBound(const unsigned proc) const { return architecture.memoryBound(proc); } + [[nodiscard]] VMemwT MemoryBound(const unsigned proc) const { return architecture_.MemoryBound(proc); } /** * @brief Sets the communication costs of the BSP architecture. * @param cost The communication costs to set. */ - void setCommunicationCosts(const v_commw_t cost) { architecture.setCommunicationCosts(cost); } + void SetCommunicationCosts(const VCommwT cost) { architecture_.SetCommunicationCosts(cost); } /** * @brief Sets the synchronisation costs of the BSP architecture. * @param cost The synchronisation costs to set. */ - void setSynchronisationCosts(const v_commw_t cost) { architecture.setSynchronisationCosts(cost); } + void SetSynchronisationCosts(const VCommwT cost) { architecture_.SetSynchronisationCosts(cost); } /** * @brief Sets the number of processors. Processor type is set to 0 for all processors. @@ -243,13 +242,13 @@ class BspInstance { * @param numberOfProcessors The number of processors. Must be greater than 0. * @throws std::invalid_argument if the number of processors is 0. */ - void setNumberOfProcessors(const unsigned num) { architecture.setNumberOfProcessors(num); } + void SetNumberOfProcessors(const unsigned num) { architecture_.SetNumberOfProcessors(num); } /** * @brief Returns the processor type for a given processor index. Does not perform bounds checking. * @param proc The processor index. */ - [[nodiscard]] vertex_type_t_or_default processorType(const unsigned proc) const { return architecture.processorType(proc); } + [[nodiscard]] VertexTypeTOrDefault ProcessorType(const unsigned proc) const { return architecture_.ProcessorType(proc); } /** * @brief Checks if a node is compatible with a processor. Does not perform bounds checking. @@ -258,8 +257,8 @@ class BspInstance { * @param processor_id The processor index. * @return True if the node is compatible with the processor, false otherwise. */ - [[nodiscard]] bool isCompatible(const vertex_idx_t &node, const unsigned processor_id) const { - return isCompatibleType(cdag.vertex_type(node), architecture.processorType(processor_id)); + [[nodiscard]] bool IsCompatible(const VertexIdxT &node, const unsigned processorId) const { + return IsCompatibleType(cdag_.VertexType(node), architecture_.ProcessorType(processorId)); } /** @@ -269,49 +268,49 @@ class BspInstance { * @param processorType The processor type. * @return True if the node type is compatible with the processor type, false otherwise. */ - [[nodiscard]] bool isCompatibleType(const vertex_type_t_or_default nodeType, const processor_type_t processorType) const { - return nodeProcessorCompatibility[nodeType][processorType]; + [[nodiscard]] bool IsCompatibleType(const VertexTypeTOrDefault nodeType, const ProcessorTypeT processorType) const { + return nodeProcessorCompatibility_[nodeType][processorType]; } /** * @brief Sets the node-processor compatibility matrix. The matrix is copied. Dimensions are not checked. * @param compatibility_ The compatibility matrix. */ - void setNodeProcessorCompatibility(const std::vector> &compatibility_) { - nodeProcessorCompatibility = compatibility_; + void SetNodeProcessorCompatibility(const std::vector> &compatibility) { + nodeProcessorCompatibility_ = compatibility; } /** * @brief Returns the node-processor compatibility matrix. */ - [[nodiscard]] const std::vector> &getNodeProcessorCompatibilityMatrix() const { - return nodeProcessorCompatibility; + [[nodiscard]] const std::vector> &GetNodeProcessorCompatibilityMatrix() const { + return nodeProcessorCompatibility_; } /** * @brief Returns the node type - processor type compatibility matrix. */ - [[nodiscard]] const std::vector> &getProcessorCompatibilityMatrix() const { - return nodeProcessorCompatibility; + [[nodiscard]] const std::vector> &GetProcessorCompatibilityMatrix() const { + return nodeProcessorCompatibility_; } /** * @brief Sets the compatibility matrix to be diagonal. This implies that node type `i` is only compatible with processor type `i`. * @param number_of_types The number of types. */ - void setDiagonalCompatibilityMatrix(const vertex_type_t_or_default number_of_types) { - nodeProcessorCompatibility.assign(number_of_types, std::vector(number_of_types, false)); - for (vertex_type_t_or_default i = 0; i < number_of_types; ++i) { - nodeProcessorCompatibility[i][i] = true; + void SetDiagonalCompatibilityMatrix(const VertexTypeTOrDefault numberOfTypes) { + nodeProcessorCompatibility_.assign(numberOfTypes, std::vector(numberOfTypes, false)); + for (VertexTypeTOrDefault i = 0; i < numberOfTypes; ++i) { + nodeProcessorCompatibility_[i][i] = true; } } /** * @brief Sets the compatibility matrix to all ones. This implies that all node types are compatible with all processor types. */ - void setAllOnesCompatibilityMatrix() { - nodeProcessorCompatibility.assign(cdag.num_vertex_types(), - std::vector(architecture.getNumberOfProcessorTypes(), true)); + void SetAllOnesCompatibilityMatrix() { + nodeProcessorCompatibility_.assign(cdag_.NumVertexTypes(), + std::vector(architecture_.GetNumberOfProcessorTypes(), true)); } /** @@ -319,19 +318,19 @@ class BspInstance { * @return True if the memory constraints are feasible, false otherwise. */ [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const { - std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0U; proc < architecture.numberOfProcessors(); proc++) { - max_memory_per_proc_type[architecture.processorType(proc)] - = std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); + std::vector> maxMemoryPerProcType(architecture_.GetNumberOfProcessorTypes(), 0); + for (unsigned proc = 0U; proc < architecture_.NumberOfProcessors(); proc++) { + maxMemoryPerProcType[architecture_.ProcessorType(proc)] + = std::max(maxMemoryPerProcType[architecture_.ProcessorType(proc)], architecture_.MemoryBound(proc)); } - for (vertex_type_t_or_default vertType = 0U; vertType < cdag.num_vertex_types(); vertType++) { - v_memw_t max_memory_of_type = max_memory_weight(vertType, cdag); + for (VertexTypeTOrDefault vertType = 0U; vertType < cdag_.NumVertexTypes(); vertType++) { + VMemwT maxMemoryOfType = MaxMemoryWeight(vertType, cdag_); bool fits = false; - for (processor_type_t proc_type = 0U; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) { - if (isCompatibleType(vertType, proc_type)) { - fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]); + for (ProcessorTypeT procType = 0U; procType < architecture_.GetNumberOfProcessorTypes(); procType++) { + if (IsCompatibleType(vertType, procType)) { + fits = fits | (maxMemoryOfType <= maxMemoryPerProcType[procType]); if (fits) { break; } @@ -350,14 +349,14 @@ class BspInstance { * @brief Returns a list of compatible processor types for each node type. * @return A vector where the index is the node type and the value is a vector of compatible processor types. */ - [[nodiscard]] std::vector> getProcTypesCompatibleWithNodeType() const { - vertex_type_t_or_default numberOfNodeTypes = cdag.num_vertex_types(); - processor_type_t numberOfProcTypes = architecture.getNumberOfProcessorTypes(); - std::vector> compatibleProcTypes(numberOfNodeTypes); + [[nodiscard]] std::vector> GetProcTypesCompatibleWithNodeType() const { + VertexTypeTOrDefault numberOfNodeTypes = cdag_.NumVertexTypes(); + ProcessorTypeT numberOfProcTypes = architecture_.GetNumberOfProcessorTypes(); + std::vector> compatibleProcTypes(numberOfNodeTypes); - for (vertex_type_t_or_default nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType) { - for (processor_type_t processorType = 0U; processorType < numberOfProcTypes; ++processorType) { - if (isCompatibleType(nodeType, processorType)) { + for (VertexTypeTOrDefault nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType) { + for (ProcessorTypeT processorType = 0U; processorType < numberOfProcTypes; ++processorType) { + if (IsCompatibleType(nodeType, processorType)) { compatibleProcTypes[nodeType].push_back(processorType); } } diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp index 23fe804e..644d660d 100644 --- a/include/osp/bsp/model/BspSchedule.hpp +++ b/include/osp/bsp/model/BspSchedule.hpp @@ -42,32 +42,32 @@ namespace osp { * - Compute costs associated with the schedule. * - Manipulate the schedule, including updating assignments and merging supersteps. * - * This class is templated on `Graph_t`, which must satisfy the `computational_dag_concept`. + * This class is templated on `GraphT`, which must satisfy the `computational_dag_concept`. * Moreover, the work and communication weights of the nodes must be of the same type in order to properly compute the cost. * * It interacts closely with `BspInstance` to access problem-specific data and constraints. In fact, a `BspSchedule` object is * tied to a `BspInstance` object. * - * @tparam Graph_t The type of the computational DAG, which must satisfy `is_computational_dag_v`. + * @tparam GraphT The type of the computational DAG, which must satisfy `is_computational_dag_v`. * @see BspInstance * @see IBspSchedule * @see IBspScheduleEval */ -template -class BspSchedule : public IBspSchedule, public IBspScheduleEval { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, +template +class BspSchedule : public IBspSchedule, public IBspScheduleEval { + static_assert(isComputationalDagV, "BspSchedule can only be used with computational DAGs."); + static_assert(std::is_same_v, VCommwT>, "BspSchedule requires work and comm. weights to have the same type."); protected: - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; - const BspInstance *instance; + const BspInstance *instance_; - unsigned number_of_supersteps; + unsigned numberOfSupersteps_; - std::vector node_to_processor_assignment; - std::vector node_to_superstep_assignment; + std::vector nodeToProcessorAssignment_; + std::vector nodeToSuperstepAssignment_; public: BspSchedule() = delete; @@ -77,11 +77,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &inst) - : instance(&inst), - number_of_supersteps(1), - node_to_processor_assignment(std::vector(inst.numberOfVertices(), 0)), - node_to_superstep_assignment(std::vector(inst.numberOfVertices(), 0)) {} + explicit BspSchedule(const BspInstance &inst) + : instance_(&inst), + numberOfSupersteps_(1), + nodeToProcessorAssignment_(std::vector(inst.NumberOfVertices(), 0)), + nodeToSuperstepAssignment_(std::vector(inst.NumberOfVertices(), 0)) {} /** * @brief Constructs a BspSchedule object with the specified BspInstance, processor assignment, and superstep @@ -91,13 +91,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &inst, - const std::vector &processor_assignment_, - const std::vector &superstep_assignment_) - : instance(&inst), - node_to_processor_assignment(processor_assignment_), - node_to_superstep_assignment(superstep_assignment_) { - updateNumberOfSupersteps(); + BspSchedule(const BspInstance &inst, + const std::vector &processorAssignment, + const std::vector &superstepAssignment) + : instance_(&inst), nodeToProcessorAssignment_(processorAssignment), nodeToSuperstepAssignment_(superstepAssignment) { + UpdateNumberOfSupersteps(); } /** @@ -105,14 +103,14 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &schedule) - : instance(&schedule.getInstance()), - number_of_supersteps(schedule.numberOfSupersteps()), - node_to_processor_assignment(schedule.getInstance().numberOfVertices()), - node_to_superstep_assignment(schedule.getInstance().numberOfVertices()) { - for (const auto &v : schedule.getInstance().getComputationalDag().vertices()) { - node_to_processor_assignment[v] = schedule.assignedProcessor(v); - node_to_superstep_assignment[v] = schedule.assignedSuperstep(v); + explicit BspSchedule(const IBspSchedule &schedule) + : instance_(&schedule.GetInstance()), + numberOfSupersteps_(schedule.NumberOfSupersteps()), + nodeToProcessorAssignment_(schedule.GetInstance().NumberOfVertices()), + nodeToSuperstepAssignment_(schedule.GetInstance().NumberOfVertices()) { + for (const auto &v : schedule.GetInstance().GetComputationalDag().Vertices()) { + nodeToProcessorAssignment_[v] = schedule.AssignedProcessor(v); + nodeToSuperstepAssignment_[v] = schedule.AssignedSuperstep(v); } } @@ -121,11 +119,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &schedule) - : instance(schedule.instance), - number_of_supersteps(schedule.number_of_supersteps), - node_to_processor_assignment(schedule.node_to_processor_assignment), - node_to_superstep_assignment(schedule.node_to_superstep_assignment) {} + BspSchedule(const BspSchedule &schedule) + : instance_(schedule.instance_), + numberOfSupersteps_(schedule.numberOfSupersteps_), + nodeToProcessorAssignment_(schedule.nodeToProcessorAssignment_), + nodeToSuperstepAssignment_(schedule.nodeToSuperstepAssignment_) {} /** * @brief Copy assignment operator. @@ -133,12 +131,12 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &operator=(const BspSchedule &schedule) { + BspSchedule &operator=(const BspSchedule &schedule) { if (this != &schedule) { - instance = schedule.instance; - number_of_supersteps = schedule.number_of_supersteps; - node_to_processor_assignment = schedule.node_to_processor_assignment; - node_to_superstep_assignment = schedule.node_to_superstep_assignment; + instance_ = schedule.instance_; + numberOfSupersteps_ = schedule.numberOfSupersteps_; + nodeToProcessorAssignment_ = schedule.nodeToProcessorAssignment_; + nodeToSuperstepAssignment_ = schedule.nodeToSuperstepAssignment_; } return *this; } @@ -148,11 +146,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &&schedule) noexcept - : instance(schedule.instance), - number_of_supersteps(schedule.number_of_supersteps), - node_to_processor_assignment(std::move(schedule.node_to_processor_assignment)), - node_to_superstep_assignment(std::move(schedule.node_to_superstep_assignment)) {} + BspSchedule(BspSchedule &&schedule) noexcept + : instance_(schedule.instance_), + numberOfSupersteps_(schedule.numberOfSupersteps_), + nodeToProcessorAssignment_(std::move(schedule.nodeToProcessorAssignment_)), + nodeToSuperstepAssignment_(std::move(schedule.nodeToSuperstepAssignment_)) {} /** * @brief Move assignment operator. @@ -160,12 +158,12 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &operator=(BspSchedule &&schedule) noexcept { + BspSchedule &operator=(BspSchedule &&schedule) noexcept { if (this != &schedule) { - instance = schedule.instance; - number_of_supersteps = schedule.number_of_supersteps; - node_to_processor_assignment = std::move(schedule.node_to_processor_assignment); - node_to_superstep_assignment = std::move(schedule.node_to_superstep_assignment); + instance_ = schedule.instance_; + numberOfSupersteps_ = schedule.numberOfSupersteps_; + nodeToProcessorAssignment_ = std::move(schedule.nodeToProcessorAssignment_); + nodeToSuperstepAssignment_ = std::move(schedule.nodeToSuperstepAssignment_); } return *this; } @@ -177,12 +175,12 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval - BspSchedule(const BspInstance &instance_, const BspSchedule &schedule) - : instance(&instance_), - number_of_supersteps(schedule.numberOfSupersteps()), - node_to_processor_assignment(schedule.assignedProcessors()), - node_to_superstep_assignment(schedule.assignedSupersteps()) {} + template + BspSchedule(const BspInstance &instance, const BspSchedule &schedule) + : instance_(&instance), + numberOfSupersteps_(schedule.NumberOfSupersteps()), + nodeToProcessorAssignment_(schedule.AssignedProcessors()), + nodeToSuperstepAssignment_(schedule.AssignedSupersteps()) {} /** * @brief Destructor for the BspSchedule class. @@ -194,23 +192,23 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &getInstance() const override { return *instance; } + [[nodiscard]] const BspInstance &GetInstance() const override { return *instance_; } /** * @brief Returns the number of supersteps in the schedule. * * @return The number of supersteps in the schedule. */ - [[nodiscard]] unsigned numberOfSupersteps() const override { return number_of_supersteps; } + [[nodiscard]] unsigned NumberOfSupersteps() const override { return numberOfSupersteps_; } /** * @brief Updates the number of supersteps based on the current assignment. */ - void updateNumberOfSupersteps() { - number_of_supersteps = 0; - for (vertex_idx_t i = 0; i < static_cast>(instance->numberOfVertices()); ++i) { - if (node_to_superstep_assignment[i] >= number_of_supersteps) { - number_of_supersteps = node_to_superstep_assignment[i] + 1; + void UpdateNumberOfSupersteps() { + numberOfSupersteps_ = 0; + for (VertexIdxT i = 0; i < static_cast>(instance_->NumberOfVertices()); ++i) { + if (nodeToSuperstepAssignment_[i] >= numberOfSupersteps_) { + numberOfSupersteps_ = nodeToSuperstepAssignment_[i] + 1; } } } @@ -221,7 +219,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEval &assignedSupersteps() const { return node_to_superstep_assignment; } + [[nodiscard]] const std::vector &AssignedSupersteps() const { return nodeToSuperstepAssignment_; } - [[nodiscard]] std::vector &assignedSupersteps() { return node_to_superstep_assignment; } + [[nodiscard]] std::vector &AssignedSupersteps() { return nodeToSuperstepAssignment_; } /** * @brief Returns the processor assignment for the schedule. * * @return The processor assignment for the schedule. */ - [[nodiscard]] const std::vector &assignedProcessors() const { return node_to_processor_assignment; } + [[nodiscard]] const std::vector &AssignedProcessors() const { return nodeToProcessorAssignment_; } - [[nodiscard]] std::vector &assignedProcessors() { return node_to_processor_assignment; } + [[nodiscard]] std::vector &AssignedProcessors() { return nodeToProcessorAssignment_; } /** * @brief Returns the staleness of the schedule. @@ -256,7 +254,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEvalnumberOfVertices()) { - node_to_superstep_assignment[node] = superstep; + void SetAssignedSuperstep(const VertexIdx node, const unsigned superstep) { + if (node < instance_->NumberOfVertices()) { + nodeToSuperstepAssignment_[node] = superstep; - if (superstep >= number_of_supersteps) { - number_of_supersteps = superstep + 1; + if (superstep >= numberOfSupersteps_) { + numberOfSupersteps_ = superstep + 1; } } else { @@ -283,8 +281,8 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEval &vec) { - if (vec.size() == static_cast(instance->numberOfVertices())) { - number_of_supersteps = 0; + void SetAssignedSupersteps(const std::vector &vec) { + if (vec.size() == static_cast(instance_->NumberOfVertices())) { + numberOfSupersteps_ = 0; - for (vertex_idx_t i = 0; i < instance->numberOfVertices(); ++i) { - if (vec[i] >= number_of_supersteps) { - number_of_supersteps = vec[i] + 1; + for (VertexIdxT i = 0; i < instance_->NumberOfVertices(); ++i) { + if (vec[i] >= numberOfSupersteps_) { + numberOfSupersteps_ = vec[i] + 1; } - node_to_superstep_assignment[i] = vec[i]; + nodeToSuperstepAssignment_[i] = vec[i]; } } else { throw std::invalid_argument("Invalid Argument while assigning supersteps: size does not match number of nodes."); @@ -323,14 +319,14 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &&vec) { - if (vec.size() == static_cast(instance->numberOfVertices())) { - node_to_superstep_assignment = std::move(vec); + void SetAssignedSupersteps(std::vector &&vec) { + if (vec.size() == static_cast(instance_->NumberOfVertices())) { + nodeToSuperstepAssignment_ = std::move(vec); } else { throw std::invalid_argument("Invalid Argument while assigning supersteps: size does not match number of nodes."); } - updateNumberOfSupersteps(); + UpdateNumberOfSupersteps(); } /** @@ -338,9 +334,9 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &vec) { - if (vec.size() == static_cast(instance->numberOfVertices())) { - node_to_processor_assignment = vec; + void SetAssignedProcessors(const std::vector &vec) { + if (vec.size() == static_cast(instance_->NumberOfVertices())) { + nodeToProcessorAssignment_ = vec; } else { throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } @@ -351,9 +347,9 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &&vec) { - if (vec.size() == static_cast(instance->numberOfVertices())) { - node_to_processor_assignment = std::move(vec); + void SetAssignedProcessors(std::vector &&vec) { + if (vec.size() == static_cast(instance_->NumberOfVertices())) { + nodeToProcessorAssignment_ = std::move(vec); } else { throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } @@ -367,14 +363,14 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval computeWorkCosts() const override { return cost_helpers::compute_work_costs(*this); } + virtual VWorkwT ComputeWorkCosts() const override { return cost_helpers::ComputeWorkCosts(*this); } /** * @brief Computes the costs of the schedule accoring to lazy communication cost evaluation. * * @return The costs of the schedule. */ - virtual v_workw_t computeCosts() const override { return LazyCommunicationCost()(*this); } + virtual VWorkwT ComputeCosts() const override { return LazyCommunicationCost()(*this); } /** * @brief Checks if the schedule is valid. @@ -383,8 +379,8 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEval>(node_to_processor_assignment.size()) != instance->numberOfVertices() - || static_cast>(node_to_superstep_assignment.size()) != instance->numberOfVertices()) { + [[nodiscard]] bool SatisfiesPrecedenceConstraints() const { + if (static_cast>(nodeToProcessorAssignment_.size()) != instance_->NumberOfVertices() + || static_cast>(nodeToSuperstepAssignment_.size()) != instance_->NumberOfVertices()) { return false; } - for (const auto &v : instance->vertices()) { - if (node_to_superstep_assignment[v] >= number_of_supersteps) { + for (const auto &v : instance_->Vertices()) { + if (nodeToSuperstepAssignment_[v] >= numberOfSupersteps_) { return false; } - if (node_to_processor_assignment[v] >= instance->numberOfProcessors()) { + if (nodeToProcessorAssignment_[v] >= instance_->NumberOfProcessors()) { return false; } - for (const auto &target : instance->getComputationalDag().children(v)) { - const unsigned different_processors - = (node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : getStaleness(); - if (node_to_superstep_assignment[v] + different_processors > node_to_superstep_assignment[target]) { + for (const auto &target : instance_->GetComputationalDag().Children(v)) { + const unsigned differentProcessors + = (nodeToProcessorAssignment_[v] == nodeToProcessorAssignment_[target]) ? 0u : GetStaleness(); + if (nodeToSuperstepAssignment_[v] + differentProcessors > nodeToSuperstepAssignment_[target]) { return false; } } @@ -429,13 +425,13 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfVertices()) { + [[nodiscard]] bool SatisfiesNodeTypeConstraints() const { + if (nodeToProcessorAssignment_.size() != instance_->NumberOfVertices()) { return false; } - for (const auto &node : instance->vertices()) { - if (!instance->isCompatible(node, node_to_processor_assignment[node])) { + for (const auto &node : instance_->Vertices()) { + if (!instance_->IsCompatible(node, nodeToProcessorAssignment_[node])) { return false; } } @@ -450,27 +446,27 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalgetArchitecture().getMemoryConstraintType()) { - case MEMORY_CONSTRAINT_TYPE::LOCAL: - return satisfiesLocalMemoryConstraints(); + [[nodiscard]] bool SatisfiesMemoryConstraints() const { + switch (instance_->GetArchitecture().GetMemoryConstraintType()) { + case MemoryConstraintType::LOCAL: + return SatisfiesLocalMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT: - return satisfiesPersistentAndTransientMemoryConstraints(); + case MemoryConstraintType::PERSISTENT_AND_TRANSIENT: + return SatisfiesPersistentAndTransientMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::GLOBAL: - return satisfiesGlobalMemoryConstraints(); + case MemoryConstraintType::GLOBAL: + return SatisfiesGlobalMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT: - return satisfiesLocalInOutMemoryConstraints(); + case MemoryConstraintType::LOCAL_IN_OUT: + return SatisfiesLocalInOutMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES: - return satisfiesLocalIncEdgesMemoryConstraints(); + case MemoryConstraintType::LOCAL_INC_EDGES: + return SatisfiesLocalIncEdgesMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES: - return satisfiesLocalSourcesIncEdgesMemoryConstraints(); + case MemoryConstraintType::LOCAL_SOURCES_INC_EDGES: + return SatisfiesLocalSourcesIncEdgesMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::NONE: + case MemoryConstraintType::NONE: return true; default: @@ -484,11 +480,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> getAssignedNodeVector(const unsigned processor) const { - std::vector> vec; + [[nodiscard]] std::vector> GetAssignedNodeVector(const unsigned processor) const { + std::vector> vec; - for (const auto &node : instance->vertices()) { - if (node_to_processor_assignment[node] == processor) { + for (const auto &node : instance_->Vertices()) { + if (nodeToProcessorAssignment_[node] == processor) { vec.push_back(node); } } @@ -503,12 +499,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> getAssignedNodeVector(const unsigned processor, - const unsigned superstep) const { - std::vector> vec; + [[nodiscard]] std::vector> GetAssignedNodeVector(const unsigned processor, const unsigned superstep) const { + std::vector> vec; - for (const auto &node : instance->vertices()) { - if (node_to_processor_assignment[node] == processor && node_to_superstep_assignment[node] == superstep) { + for (const auto &node : instance_->Vertices()) { + if (nodeToProcessorAssignment_[node] == processor && nodeToSuperstepAssignment_[node] == superstep) { vec.push_back(node); } } @@ -521,7 +516,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEvalvertices()) { - if (node_to_processor_assignment[node] == processor) { + for (const auto &node : instance_->Vertices()) { + if (nodeToProcessorAssignment_[node] == processor) { num++; } } @@ -546,11 +541,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval numAssignedNodesPerProcessor() const { - std::vector num(instance->numberOfProcessors(), 0); + [[nodiscard]] std::vector NumAssignedNodesPerProcessor() const { + std::vector num(instance_->NumberOfProcessors(), 0); - for (const auto &node : instance->vertices()) { - num[node_to_processor_assignment[node]]++; + for (const auto &node : instance_->Vertices()) { + num[nodeToProcessorAssignment_[node]]++; } return num; @@ -561,11 +556,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> numAssignedNodesPerSuperstepProcessor() const { - std::vector> num(number_of_supersteps, std::vector(instance->numberOfProcessors(), 0)); + [[nodiscard]] std::vector> NumAssignedNodesPerSuperstepProcessor() const { + std::vector> num(numberOfSupersteps_, std::vector(instance_->NumberOfProcessors(), 0)); - for (const auto &v : instance->vertices()) { - num[node_to_superstep_assignment[v]][node_to_processor_assignment[v]] += 1; + for (const auto &v : instance_->Vertices()) { + num[nodeToSuperstepAssignment_[v]][nodeToProcessorAssignment_[v]] += 1; } return num; @@ -574,30 +569,30 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval comm_phase_empty(number_of_supersteps, true); - for (const auto &node : instance->vertices()) { - for (const auto &child : instance->getComputationalDag().children(node)) { - if (node_to_processor_assignment[node] != node_to_processor_assignment[child]) { - for (unsigned offset = 1; offset <= getStaleness(); ++offset) { - comm_phase_empty[node_to_superstep_assignment[child] - offset] = false; + virtual void ShrinkByMergingSupersteps() { + std::vector commPhaseEmpty(numberOfSupersteps_, true); + for (const auto &node : instance_->Vertices()) { + for (const auto &child : instance_->GetComputationalDag().Children(node)) { + if (nodeToProcessorAssignment_[node] != nodeToProcessorAssignment_[child]) { + for (unsigned offset = 1; offset <= GetStaleness(); ++offset) { + commPhaseEmpty[nodeToSuperstepAssignment_[child] - offset] = false; } } } } - std::vector new_step_index(number_of_supersteps); - unsigned current_index = 0; - for (unsigned step = 0; step < number_of_supersteps; ++step) { - new_step_index[step] = current_index; - if (!comm_phase_empty[step]) { - current_index++; + std::vector newStepIndex(numberOfSupersteps_); + unsigned currentIndex = 0; + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + newStepIndex[step] = currentIndex; + if (!commPhaseEmpty[step]) { + currentIndex++; } } - for (const auto &node : instance->vertices()) { - node_to_superstep_assignment[node] = new_step_index[node_to_superstep_assignment[node]]; + for (const auto &node : instance_->Vertices()) { + nodeToSuperstepAssignment_[node] = newStepIndex[nodeToSuperstepAssignment_[node]]; } - setNumberOfSupersteps(current_index); + SetNumberOfSupersteps(currentIndex); } private: @@ -609,17 +604,17 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfProcessors(); proc++) { - v_memw_t memory = 0; - for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { - memory += instance->getComputationalDag().vertex_mem_weight(node); + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + VMemwT memory = 0; + for (const auto &node : setSchedule.stepProcessorVertices_[step][proc]) { + memory += instance_->GetComputationalDag().VertexMemWeight(node); } - if (memory > instance->getArchitecture().memoryBound(proc)) { + if (memory > instance_->GetArchitecture().MemoryBound(proc)) { return false; } } @@ -636,18 +631,18 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> current_proc_persistent_memory(instance->numberOfProcessors(), 0); - std::vector> current_proc_transient_memory(instance->numberOfProcessors(), 0); + bool SatisfiesPersistentAndTransientMemoryConstraints() const { + std::vector> currentProcPersistentMemory(instance_->NumberOfProcessors(), 0); + std::vector> currentProcTransientMemory(instance_->NumberOfProcessors(), 0); - for (const auto &node : instance->vertices()) { - const unsigned proc = node_to_processor_assignment[node]; - current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node); - current_proc_transient_memory[proc] - = std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(node)); + for (const auto &node : instance_->Vertices()) { + const unsigned proc = nodeToProcessorAssignment_[node]; + currentProcPersistentMemory[proc] += instance_->GetComputationalDag().VertexMemWeight(node); + currentProcTransientMemory[proc] + = std::max(currentProcTransientMemory[proc], instance_->GetComputationalDag().VertexCommWeight(node)); - if (current_proc_persistent_memory[proc] + current_proc_transient_memory[proc] - > instance->getArchitecture().memoryBound(proc)) { + if (currentProcPersistentMemory[proc] + currentProcTransientMemory[proc] + > instance_->GetArchitecture().MemoryBound(proc)) { return false; } } @@ -662,38 +657,38 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> current_proc_memory(instance->numberOfProcessors(), 0); + bool SatisfiesGlobalMemoryConstraints() const { + std::vector> currentProcMemory(instance_->NumberOfProcessors(), 0); - for (const auto &node : instance->vertices()) { - const unsigned proc = node_to_processor_assignment[node]; - current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node); + for (const auto &node : instance_->Vertices()) { + const unsigned proc = nodeToProcessorAssignment_[node]; + currentProcMemory[proc] += instance_->GetComputationalDag().VertexMemWeight(node); - if (current_proc_memory[proc] > instance->getArchitecture().memoryBound(proc)) { + if (currentProcMemory[proc] > instance_->GetArchitecture().MemoryBound(proc)) { return false; } } return true; } - bool satisfiesLocalInOutMemoryConstraints() const { - SetSchedule set_schedule = SetSchedule(*this); + bool SatisfiesLocalInOutMemoryConstraints() const { + SetSchedule setSchedule = SetSchedule(*this); - for (unsigned step = 0; step < number_of_supersteps; step++) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - v_memw_t memory = 0; - for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { - memory += instance->getComputationalDag().vertex_mem_weight(node) - + instance->getComputationalDag().vertex_comm_weight(node); + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + VMemwT memory = 0; + for (const auto &node : setSchedule.stepProcessorVertices_[step][proc]) { + memory += instance_->GetComputationalDag().VertexMemWeight(node) + + instance_->GetComputationalDag().VertexCommWeight(node); - for (const auto &parent : instance->getComputationalDag().parents(node)) { - if (node_to_processor_assignment[parent] == proc && node_to_superstep_assignment[parent] == step) { - memory -= instance->getComputationalDag().vertex_comm_weight(parent); + for (const auto &parent : instance_->GetComputationalDag().Parents(node)) { + if (nodeToProcessorAssignment_[parent] == proc && nodeToSuperstepAssignment_[parent] == step) { + memory -= instance_->GetComputationalDag().VertexCommWeight(parent); } } } - if (memory > instance->getArchitecture().memoryBound(proc)) { + if (memory > instance_->GetArchitecture().MemoryBound(proc)) { return false; } } @@ -702,29 +697,29 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfProcessors(); proc++) { - std::unordered_set> nodes_with_incoming_edges; + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + std::unordered_set> nodesWithIncomingEdges; - v_memw_t memory = 0; - for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { - memory += instance->getComputationalDag().vertex_comm_weight(node); + VMemwT memory = 0; + for (const auto &node : setSchedule.stepProcessorVertices_[step][proc]) { + memory += instance_->GetComputationalDag().VertexCommWeight(node); - for (const auto &parent : instance->getComputationalDag().parents(node)) { - if (node_to_superstep_assignment[parent] != step) { - nodes_with_incoming_edges.insert(parent); + for (const auto &parent : instance_->GetComputationalDag().Parents(node)) { + if (nodeToSuperstepAssignment_[parent] != step) { + nodesWithIncomingEdges.insert(parent); } } } - for (const auto &node : nodes_with_incoming_edges) { - memory += instance->getComputationalDag().vertex_comm_weight(node); + for (const auto &node : nodesWithIncomingEdges) { + memory += instance_->GetComputationalDag().VertexCommWeight(node); } - if (memory > instance->getArchitecture().memoryBound(proc)) { + if (memory > instance_->GetArchitecture().MemoryBound(proc)) { return false; } } @@ -732,31 +727,31 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfProcessors(); proc++) { - std::unordered_set> nodes_with_incoming_edges; + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + std::unordered_set> nodesWithIncomingEdges; - v_memw_t memory = 0; - for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { - if (is_source(node, instance->getComputationalDag())) { - memory += instance->getComputationalDag().vertex_mem_weight(node); + VMemwT memory = 0; + for (const auto &node : setSchedule.stepProcessorVertices_[step][proc]) { + if (IsSource(node, instance_->GetComputationalDag())) { + memory += instance_->GetComputationalDag().VertexMemWeight(node); } - for (const auto &parent : instance->getComputationalDag().parents(node)) { - if (node_to_superstep_assignment[parent] != step) { - nodes_with_incoming_edges.insert(parent); + for (const auto &parent : instance_->GetComputationalDag().Parents(node)) { + if (nodeToSuperstepAssignment_[parent] != step) { + nodesWithIncomingEdges.insert(parent); } } } - for (const auto &node : nodes_with_incoming_edges) { - memory += instance->getComputationalDag().vertex_comm_weight(node); + for (const auto &node : nodesWithIncomingEdges) { + memory += instance_->GetComputationalDag().VertexCommWeight(node); } - if (memory > instance->getArchitecture().memoryBound(proc)) { + if (memory > instance_->GetArchitecture().MemoryBound(proc)) { return false; } } diff --git a/include/osp/bsp/model/BspScheduleCS.hpp b/include/osp/bsp/model/BspScheduleCS.hpp index ac906e39..e311fb2d 100644 --- a/include/osp/bsp/model/BspScheduleCS.hpp +++ b/include/osp/bsp/model/BspScheduleCS.hpp @@ -48,29 +48,29 @@ namespace osp { * * @see BspInstance */ -template -class BspScheduleCS : public BspSchedule { - static_assert(is_computational_dag_v, "BspScheduleCS can only be used with computational DAGs."); +template +class BspScheduleCS : public BspSchedule { + static_assert(isComputationalDagV, "BspScheduleCS can only be used with computational DAGs."); public: - using KeyTriple = std::tuple, unsigned int, unsigned int>; + using KeyTriple = std::tuple, unsigned int, unsigned int>; private: - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; // contains entries: (vertex, from_proc, to_proc ) : step - std::map commSchedule; + std::map commSchedule_; protected: - void compute_cs_communication_costs_helper(std::vector>> &rec, - std::vector>> &send) const { - for (auto const &[key, val] : commSchedule) { + void ComputeCsCommunicationCostsHelper(std::vector>> &rec, + std::vector>> &send) const { + for (auto const &[key, val] : commSchedule_) { send[std::get<1>(key)][val] - += BspSchedule::instance->sendCosts(std::get<1>(key), std::get<2>(key)) - * BspSchedule::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); + += BspSchedule::instance_->SendCosts(std::get<1>(key), std::get<2>(key)) + * BspSchedule::instance_->GetComputationalDag().VertexCommWeight(std::get<0>(key)); rec[std::get<2>(key)][val] - += BspSchedule::instance->sendCosts(std::get<1>(key), std::get<2>(key)) - * BspSchedule::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); + += BspSchedule::instance_->SendCosts(std::get<1>(key), std::get<2>(key)) + * BspSchedule::instance_->GetComputationalDag().VertexCommWeight(std::get<0>(key)); } } @@ -82,7 +82,7 @@ class BspScheduleCS : public BspSchedule { * * @param inst The BspInstance for the schedule. */ - BspScheduleCS(const BspInstance &inst) : BspSchedule(inst) {} + BspScheduleCS(const BspInstance &inst) : BspSchedule(inst) {} /** * @brief Constructs a BspSchedule object with the specified BspInstance, processor assignment, and superstep @@ -92,10 +92,10 @@ class BspScheduleCS : public BspSchedule { * @param processor_assignment_ The processor assignment for the nodes. * @param superstep_assignment_ The superstep assignment for the nodes. */ - BspScheduleCS(const BspInstance &inst, - const std::vector &processor_assignment_, - const std::vector &superstep_assignment_) - : BspSchedule(inst, processor_assignment_, superstep_assignment_) {} + BspScheduleCS(const BspInstance &inst, + const std::vector &processorAssignment, + const std::vector &superstepAssignment) + : BspSchedule(inst, processorAssignment, superstepAssignment) {} /** * @brief Constructs a BspSchedule object with the specified BspInstance, processor assignment, superstep @@ -106,30 +106,30 @@ class BspScheduleCS : public BspSchedule { * @param superstep_assignment_ The superstep assignment for the nodes. * @param comm_ The communication schedule for the nodes. */ - BspScheduleCS(const BspInstance &inst, - const std::vector &processor_assignment_, - const std::vector &superstep_assignment_, - const std::map &comm_) - : BspSchedule(inst, processor_assignment_, superstep_assignment_), commSchedule(comm_) {} - - explicit BspScheduleCS(BspSchedule &&schedule) : BspSchedule(std::move(schedule)) { - setAutoCommunicationSchedule(); + BspScheduleCS(const BspInstance &inst, + const std::vector &processorAssignment, + const std::vector &superstepAssignment, + const std::map &comm) + : BspSchedule(inst, processorAssignment, superstepAssignment), commSchedule_(comm) {} + + explicit BspScheduleCS(BspSchedule &&schedule) : BspSchedule(std::move(schedule)) { + SetAutoCommunicationSchedule(); } - BspScheduleCS(BspSchedule &&schedule, const std::map &comm_) - : BspSchedule(std::move(schedule)), commSchedule(comm_) {} + BspScheduleCS(BspSchedule &&schedule, const std::map &comm) + : BspSchedule(std::move(schedule)), commSchedule_(comm) {} - BspScheduleCS(BspSchedule &&schedule, std::map &&comm_) - : BspSchedule(std::move(schedule)), commSchedule(std::move(comm_)) { - comm_.clear(); + BspScheduleCS(BspSchedule &&schedule, std::map &&comm) + : BspSchedule(std::move(schedule)), commSchedule_(std::move(comm)) { + comm.clear(); } - explicit BspScheduleCS(const BspSchedule &schedule) : BspSchedule(schedule) { - setAutoCommunicationSchedule(); + explicit BspScheduleCS(const BspSchedule &schedule) : BspSchedule(schedule) { + SetAutoCommunicationSchedule(); } - BspScheduleCS(const BspSchedule &schedule, const std::map &comm_) - : BspSchedule(schedule), commSchedule(comm_) {} + BspScheduleCS(const BspSchedule &schedule, const std::map &comm) + : BspSchedule(schedule), commSchedule_(comm) {} BspScheduleCS(const BspScheduleCS &other) = default; BspScheduleCS(BspScheduleCS &&other) = default; @@ -137,31 +137,31 @@ class BspScheduleCS : public BspSchedule { BspScheduleCS &operator=(BspScheduleCS &&other) = default; virtual ~BspScheduleCS() = default; - inline const std::map &getCommunicationSchedule() const { return commSchedule; } + inline const std::map &GetCommunicationSchedule() const { return commSchedule_; } - inline std::map &getCommunicationSchedule() { return commSchedule; } + inline std::map &GetCommunicationSchedule() { return commSchedule_; } - inline bool hasValidCommSchedule() const { return checkCommScheduleValidity(commSchedule); } + inline bool HasValidCommSchedule() const { return CheckCommScheduleValidity(commSchedule_); } - void addCommunicationScheduleEntry(KeyTriple key, unsigned step) { - if (step >= BspSchedule::number_of_supersteps) { + void AddCommunicationScheduleEntry(KeyTriple key, unsigned step) { + if (step >= BspSchedule::numberOfSupersteps_) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: step out of range."); } - if (std::get<0>(key) >= BspSchedule::instance->numberOfVertices()) { + if (std::get<0>(key) >= BspSchedule::instance_->NumberOfVertices()) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: node out of range."); } - if (std::get<1>(key) >= BspSchedule::instance->numberOfProcessors()) { + if (std::get<1>(key) >= BspSchedule::instance_->NumberOfProcessors()) { throw std::invalid_argument( "Invalid Argument while adding communication schedule entry: from processor out of range."); } - if (std::get<2>(key) >= BspSchedule::instance->numberOfProcessors()) { + if (std::get<2>(key) >= BspSchedule::instance_->NumberOfProcessors()) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: to processor out of range."); } - commSchedule[key] = step; + commSchedule_[key] = step; } /** @@ -172,8 +172,8 @@ class BspScheduleCS : public BspSchedule { * @param to_proc The processor to which the data is sent. * @param step The superstep in which the data is sent. */ - void addCommunicationScheduleEntry(vertex_idx node, unsigned from_proc, unsigned to_proc, unsigned step) { - addCommunicationScheduleEntry(std::make_tuple(node, from_proc, to_proc), step); + void AddCommunicationScheduleEntry(VertexIdx node, unsigned fromProc, unsigned toProc, unsigned step) { + AddCommunicationScheduleEntry(std::make_tuple(node, fromProc, toProc), step); } /** @@ -181,58 +181,56 @@ class BspScheduleCS : public BspSchedule { * * @param cs The communication schedule to set. */ - void setCommunicationSchedule(const std::map &cs) { - if (checkCommScheduleValidity(cs)) { - commSchedule = cs; + void SetCommunicationSchedule(const std::map &cs) { + if (CheckCommScheduleValidity(cs)) { + commSchedule_ = cs; } else { throw std::invalid_argument("Given communication schedule is not valid for instance"); } } - bool checkCommScheduleValidity(const std::map &cs) const { - std::vector> first_at - = std::vector>(BspSchedule::instance->numberOfVertices(), - std::vector(BspSchedule::instance->numberOfProcessors(), - BspSchedule::number_of_supersteps)); + bool CheckCommScheduleValidity(const std::map &cs) const { + std::vector> firstAt = std::vector>( + BspSchedule::instance_->NumberOfVertices(), + std::vector(BspSchedule::instance_->NumberOfProcessors(), BspSchedule::numberOfSupersteps_)); - for (const auto &node : BspSchedule::instance->vertices()) { - first_at[node][BspSchedule::node_to_processor_assignment[node]] - = BspSchedule::node_to_superstep_assignment[node]; + for (const auto &node : BspSchedule::instance_->Vertices()) { + firstAt[node][BspSchedule::nodeToProcessorAssignment_[node]] + = BspSchedule::nodeToSuperstepAssignment_[node]; } for (auto const &[key, val] : cs) { - if (val >= BspSchedule::number_of_supersteps) { + if (val >= BspSchedule::numberOfSupersteps_) { return false; } - if (std::get<0>(key) >= BspSchedule::instance->numberOfVertices()) { + if (std::get<0>(key) >= BspSchedule::instance_->NumberOfVertices()) { return false; } - if (std::get<1>(key) >= BspSchedule::instance->numberOfProcessors()) { + if (std::get<1>(key) >= BspSchedule::instance_->NumberOfProcessors()) { return false; } - if (std::get<2>(key) >= BspSchedule::instance->numberOfProcessors()) { + if (std::get<2>(key) >= BspSchedule::instance_->NumberOfProcessors()) { return false; } - first_at[std::get<0>(key)][std::get<2>(key)] - = std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + this->getStaleness()); + firstAt[std::get<0>(key)][std::get<2>(key)] + = std::min(firstAt[std::get<0>(key)][std::get<2>(key)], val + this->GetStaleness()); } for (auto const &[key, val] : cs) { - if (val < first_at[std::get<0>(key)][std::get<1>(key)]) { + if (val < firstAt[std::get<0>(key)][std::get<1>(key)]) { return false; } } - for (const auto &v : BspSchedule::instance->getComputationalDag().vertices()) { - for (const auto &target : BspSchedule::instance->getComputationalDag().children(v)) { - if (BspSchedule::node_to_processor_assignment[v] - != BspSchedule::node_to_processor_assignment[target]) { - if (first_at[v][BspSchedule::node_to_processor_assignment[target]] - > BspSchedule::node_to_superstep_assignment[target]) { + for (const auto &v : BspSchedule::instance_->GetComputationalDag().Vertices()) { + for (const auto &target : BspSchedule::instance_->GetComputationalDag().Children(v)) { + if (BspSchedule::nodeToProcessorAssignment_[v] != BspSchedule::nodeToProcessorAssignment_[target]) { + if (firstAt[v][BspSchedule::nodeToProcessorAssignment_[target]] + > BspSchedule::nodeToSuperstepAssignment_[target]) { return false; } } @@ -242,166 +240,161 @@ class BspScheduleCS : public BspSchedule { return true; } - v_commw_t compute_cs_communication_costs() const { - std::vector>> rec(this->instance->numberOfProcessors(), - std::vector>(this->number_of_supersteps, 0)); - std::vector>> send(this->instance->numberOfProcessors(), - std::vector>(this->number_of_supersteps, 0)); + VCommwT ComputeCsCommunicationCosts() const { + std::vector>> rec(this->instance_->NumberOfProcessors(), + std::vector>(this->numberOfSupersteps_, 0)); + std::vector>> send(this->instance_->NumberOfProcessors(), + std::vector>(this->numberOfSupersteps_, 0)); - compute_cs_communication_costs_helper(rec, send); - const std::vector> max_comm_per_step = cost_helpers::compute_max_comm_per_step(*this, rec, send); + ComputeCsCommunicationCostsHelper(rec, send); + const std::vector> maxCommPerStep = cost_helpers::ComputeMaxCommPerStep(*this, rec, send); - v_commw_t costs = 0; - for (unsigned step = 0; step < this->number_of_supersteps; step++) { - const auto step_comm_cost = max_comm_per_step[step]; - costs += step_comm_cost; + VCommwT costs = 0; + for (unsigned step = 0; step < this->numberOfSupersteps_; step++) { + const auto stepCommCost = maxCommPerStep[step]; + costs += stepCommCost; - if (step_comm_cost > 0) { - costs += this->instance->synchronisationCosts(); + if (stepCommCost > 0) { + costs += this->instance_->SynchronisationCosts(); } } return costs; } - virtual v_workw_t computeCosts() const override { - return compute_cs_communication_costs() + this->computeWorkCosts(); - } + virtual VWorkwT ComputeCosts() const override { return ComputeCsCommunicationCosts() + this->ComputeWorkCosts(); } - void setAutoCommunicationSchedule() { - std::map best_comm_schedule; - v_workw_t best_comm_cost - = std::numeric_limits>::max(); // computeCosts retunrs v_workw_t + void SetAutoCommunicationSchedule() { + std::map bestCommSchedule; + VWorkwT bestCommCost = std::numeric_limits>::max(); // ComputeCosts retunrs VWorkwT - if (hasValidCommSchedule()) { - v_workw_t costs_com = BspSchedule::computeCosts(); - if (costs_com < best_comm_cost) { - best_comm_schedule = commSchedule; - best_comm_cost = costs_com; + if (HasValidCommSchedule()) { + VWorkwT costsCom = BspSchedule::ComputeCosts(); + if (costsCom < bestCommCost) { + bestCommSchedule = commSchedule_; + bestCommCost = costsCom; } } - setImprovedLazyCommunicationSchedule(); - v_workw_t costs_com = BspSchedule::computeCosts(); + SetImprovedLazyCommunicationSchedule(); + VWorkwT costsCom = BspSchedule::ComputeCosts(); // std::cout << "Improved Lazy: " << costs_com << std::endl; - if (costs_com < best_comm_cost) { - best_comm_schedule = commSchedule; - best_comm_cost = costs_com; + if (costsCom < bestCommCost) { + bestCommSchedule = commSchedule_; + bestCommCost = costsCom; } - setLazyCommunicationSchedule(); - costs_com = BspSchedule::computeCosts(); + SetLazyCommunicationSchedule(); + costsCom = BspSchedule::ComputeCosts(); // std::cout << "Lazy: " << costs_com << std::endl; - if (costs_com < best_comm_cost) { - best_comm_schedule = commSchedule; - best_comm_cost = costs_com; + if (costsCom < bestCommCost) { + bestCommSchedule = commSchedule_; + bestCommCost = costsCom; } - setEagerCommunicationSchedule(); - costs_com = BspSchedule::computeCosts(); + SetEagerCommunicationSchedule(); + costsCom = BspSchedule::ComputeCosts(); // std::cout << "Eager: " << costs_com << std::endl; - if (costs_com < best_comm_cost) { - best_comm_schedule = commSchedule; - best_comm_cost = costs_com; + if (costsCom < bestCommCost) { + bestCommSchedule = commSchedule_; + bestCommCost = costsCom; } - commSchedule = best_comm_schedule; + commSchedule_ = bestCommSchedule; } - void setImprovedLazyCommunicationSchedule() { - commSchedule.clear(); - if (BspSchedule::instance->getComputationalDag().num_vertices() <= 1 - || BspSchedule::number_of_supersteps <= 1) { + void SetImprovedLazyCommunicationSchedule() { + commSchedule_.clear(); + if (BspSchedule::instance_->GetComputationalDag().NumVertices() <= 1 + || BspSchedule::numberOfSupersteps_ <= 1) { return; } - std::vector>>> step_proc_node_list( - BspSchedule::number_of_supersteps, - std::vector>>(BspSchedule::instance->numberOfProcessors(), - std::vector>())); - std::vector> node_to_proc_been_sent( - BspSchedule::instance->numberOfVertices(), - std::vector(BspSchedule::instance->numberOfProcessors(), false)); + std::vector>>> stepProcNodeList( + BspSchedule::numberOfSupersteps_, + std::vector>>(BspSchedule::instance_->NumberOfProcessors(), + std::vector>())); + std::vector> nodeToProcBeenSent( + BspSchedule::instance_->NumberOfVertices(), + std::vector(BspSchedule::instance_->NumberOfProcessors(), false)); - for (vertex_idx_t node = 0; node < BspSchedule::instance->numberOfVertices(); node++) { - step_proc_node_list[BspSchedule::node_to_superstep_assignment[node]] - [BspSchedule::node_to_processor_assignment[node]] - .push_back(node); - node_to_proc_been_sent[node][BspSchedule::node_to_processor_assignment[node]] = true; + for (VertexIdxT node = 0; node < BspSchedule::instance_->NumberOfVertices(); node++) { + stepProcNodeList[BspSchedule::nodeToSuperstepAssignment_[node]] + [BspSchedule::nodeToProcessorAssignment_[node]] + .push_back(node); + nodeToProcBeenSent[node][BspSchedule::nodeToProcessorAssignment_[node]] = true; } // The data structure stores for each processor a set of tuples representing required sends. // Each tuple is (communication_cost, source_node, destination_processor). - std::vector, vertex_idx_t, unsigned>, std::greater<>>> require_sending( - BspSchedule::instance->numberOfProcessors()); - - for (unsigned proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { - for (const auto &node : step_proc_node_list[0][proc]) { - for (const auto &target : BspSchedule::instance->getComputationalDag().children(node)) { - if (proc != BspSchedule::assignedProcessor(target)) { - require_sending[proc].insert({BspSchedule::instance->getComputationalDag().vertex_comm_weight(node) - * BspSchedule::instance->getArchitecture().sendCosts( - proc, BspSchedule::node_to_processor_assignment[target]), - node, - BspSchedule::node_to_processor_assignment[target]}); + std::vector, VertexIdxT, unsigned>, std::greater<>>> requireSending( + BspSchedule::instance_->NumberOfProcessors()); + + for (unsigned proc = 0; proc < BspSchedule::instance_->NumberOfProcessors(); proc++) { + for (const auto &node : stepProcNodeList[0][proc]) { + for (const auto &target : BspSchedule::instance_->GetComputationalDag().Children(node)) { + if (proc != BspSchedule::AssignedProcessor(target)) { + requireSending[proc].insert({BspSchedule::instance_->GetComputationalDag().VertexCommWeight(node) + * BspSchedule::instance_->GetArchitecture().SendCosts( + proc, BspSchedule::nodeToProcessorAssignment_[target]), + node, + BspSchedule::nodeToProcessorAssignment_[target]}); } } } } - for (unsigned step = 1; step < BspSchedule::number_of_supersteps; step++) { - std::vector> send_cost(BspSchedule::instance->numberOfProcessors(), 0); - std::vector> receive_cost(BspSchedule::instance->numberOfProcessors(), 0); + for (unsigned step = 1; step < BspSchedule::numberOfSupersteps_; step++) { + std::vector> sendCost(BspSchedule::instance_->NumberOfProcessors(), 0); + std::vector> receiveCost(BspSchedule::instance_->NumberOfProcessors(), 0); // must send in superstep step-1 - for (unsigned proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { - for (const auto &node : step_proc_node_list[step][proc]) { - for (const auto &source : BspSchedule::instance->getComputationalDag().parents(node)) { - if (!node_to_proc_been_sent[source][proc]) { - assert(BspSchedule::node_to_superstep_assignment[source] < step + 1 - this->getStaleness()); - commSchedule.emplace( - std::make_tuple(source, BspSchedule::node_to_processor_assignment[source], proc), - step - this->getStaleness()); - node_to_proc_been_sent[source][proc] = true; - v_commw_t comm_cost - = BspSchedule::instance->getComputationalDag().vertex_comm_weight(source) - * BspSchedule::instance->getArchitecture().sendCosts( - BspSchedule::node_to_processor_assignment[source], proc); - require_sending[BspSchedule::node_to_processor_assignment[source]].erase( - {comm_cost, source, proc}); - send_cost[BspSchedule::node_to_processor_assignment[source]] += comm_cost; - receive_cost[proc] += comm_cost; + for (unsigned proc = 0; proc < BspSchedule::instance_->NumberOfProcessors(); proc++) { + for (const auto &node : stepProcNodeList[step][proc]) { + for (const auto &source : BspSchedule::instance_->GetComputationalDag().Parents(node)) { + if (!nodeToProcBeenSent[source][proc]) { + assert(BspSchedule::nodeToSuperstepAssignment_[source] < step + 1 - this->GetStaleness()); + commSchedule_.emplace( + std::make_tuple(source, BspSchedule::nodeToProcessorAssignment_[source], proc), + step - this->GetStaleness()); + nodeToProcBeenSent[source][proc] = true; + VCommwT commCost + = BspSchedule::instance_->GetComputationalDag().VertexCommWeight(source) + * BspSchedule::instance_->GetArchitecture().SendCosts( + BspSchedule::nodeToProcessorAssignment_[source], proc); + requireSending[BspSchedule::nodeToProcessorAssignment_[source]].erase({commCost, source, proc}); + sendCost[BspSchedule::nodeToProcessorAssignment_[source]] += commCost; + receiveCost[proc] += commCost; } } } } // getting max costs - v_commw_t max_comm_cost = 0; - for (size_t proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { - max_comm_cost = std::max(max_comm_cost, send_cost[proc]); - max_comm_cost = std::max(max_comm_cost, receive_cost[proc]); + VCommwT maxCommCost = 0; + for (size_t proc = 0; proc < BspSchedule::instance_->NumberOfProcessors(); proc++) { + maxCommCost = std::max(maxCommCost, sendCost[proc]); + maxCommCost = std::max(maxCommCost, receiveCost[proc]); } // extra sends // TODO: permute the order of processors - for (size_t proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { - if (require_sending[proc].empty() - || std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] > max_comm_cost) { + for (size_t proc = 0; proc < BspSchedule::instance_->NumberOfProcessors(); proc++) { + if (requireSending[proc].empty() || std::get<0>(*requireSending[proc].rbegin()) + sendCost[proc] > maxCommCost) { continue; } - auto iter = require_sending[proc].begin(); - while (iter != require_sending[proc].end()) { - const auto &[comm_cost, node_to_send, dest_proc] = *iter; - if (comm_cost + send_cost[proc] > max_comm_cost || comm_cost + receive_cost[dest_proc] > max_comm_cost) { + auto iter = requireSending[proc].begin(); + while (iter != requireSending[proc].end()) { + const auto &[commCost, node_to_send, dest_proc] = *iter; + if (commCost + sendCost[proc] > maxCommCost || commCost + receiveCost[dest_proc] > maxCommCost) { iter++; } else { - commSchedule.emplace(std::make_tuple(node_to_send, proc, dest_proc), step - this->getStaleness()); - node_to_proc_been_sent[node_to_send][dest_proc] = true; - send_cost[proc] += comm_cost; - receive_cost[dest_proc] += comm_cost; - iter = require_sending[proc].erase(iter); - if (require_sending[proc].empty() - || std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] > max_comm_cost) { + commSchedule_.emplace(std::make_tuple(node_to_send, proc, dest_proc), step - this->GetStaleness()); + nodeToProcBeenSent[node_to_send][dest_proc] = true; + sendCost[proc] += commCost; + receiveCost[dest_proc] += commCost; + iter = requireSending[proc].erase(iter); + if (requireSending[proc].empty() + || std::get<0>(*requireSending[proc].rbegin()) + sendCost[proc] > maxCommCost) { break; // Exit if no more sends can possibly fit. } } @@ -409,16 +402,16 @@ class BspScheduleCS : public BspSchedule { } // updating require_sending - for (unsigned proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { - for (const auto &node : step_proc_node_list[step][proc]) { - for (const auto &target : BspSchedule::instance->getComputationalDag().children(node)) { - if (proc != BspSchedule::assignedProcessor(target)) { - require_sending[proc].insert( - {BspSchedule::instance->getComputationalDag().vertex_comm_weight(node) - * BspSchedule::instance->getArchitecture().sendCosts( - proc, BspSchedule::node_to_processor_assignment[target]), + for (unsigned proc = 0; proc < BspSchedule::instance_->NumberOfProcessors(); proc++) { + for (const auto &node : stepProcNodeList[step][proc]) { + for (const auto &target : BspSchedule::instance_->GetComputationalDag().Children(node)) { + if (proc != BspSchedule::AssignedProcessor(target)) { + requireSending[proc].insert( + {BspSchedule::instance_->GetComputationalDag().VertexCommWeight(node) + * BspSchedule::instance_->GetArchitecture().SendCosts( + proc, BspSchedule::nodeToProcessorAssignment_[target]), node, - BspSchedule::node_to_processor_assignment[target]}); + BspSchedule::nodeToProcessorAssignment_[target]}); } } } @@ -426,177 +419,176 @@ class BspScheduleCS : public BspSchedule { } } - void setLazyCommunicationSchedule() { - commSchedule.clear(); + void SetLazyCommunicationSchedule() { + commSchedule_.clear(); - for (const auto &source : BspSchedule::instance->getComputationalDag().vertices()) { - for (const auto &target : BspSchedule::instance->getComputationalDag().children(source)) { - if (BspSchedule::node_to_processor_assignment[source] - != BspSchedule::node_to_processor_assignment[target]) { + for (const auto &source : BspSchedule::instance_->GetComputationalDag().Vertices()) { + for (const auto &target : BspSchedule::instance_->GetComputationalDag().Children(source)) { + if (BspSchedule::nodeToProcessorAssignment_[source] + != BspSchedule::nodeToProcessorAssignment_[target]) { const auto tmp = std::make_tuple(source, - BspSchedule::node_to_processor_assignment[source], - BspSchedule::node_to_processor_assignment[target]); - if (commSchedule.find(tmp) == commSchedule.end()) { - commSchedule[tmp] = BspSchedule::node_to_superstep_assignment[target] - this->getStaleness(); + BspSchedule::nodeToProcessorAssignment_[source], + BspSchedule::nodeToProcessorAssignment_[target]); + if (commSchedule_.find(tmp) == commSchedule_.end()) { + commSchedule_[tmp] = BspSchedule::nodeToSuperstepAssignment_[target] - this->GetStaleness(); } else { - commSchedule[tmp] = std::min( - BspSchedule::node_to_superstep_assignment[target] - this->getStaleness(), commSchedule[tmp]); + commSchedule_[tmp] = std::min( + BspSchedule::nodeToSuperstepAssignment_[target] - this->GetStaleness(), commSchedule_[tmp]); } } } } } - void setEagerCommunicationSchedule() { - commSchedule.clear(); - - for (const auto &source : BspSchedule::instance->getComputationalDag().vertices()) { - for (const auto &target : BspSchedule::instance->getComputationalDag().children(source)) { - if (BspSchedule::node_to_processor_assignment[source] - != BspSchedule::node_to_processor_assignment[target]) { - commSchedule[std::make_tuple(source, - BspSchedule::node_to_processor_assignment[source], - BspSchedule::node_to_processor_assignment[target])] - = BspSchedule::node_to_superstep_assignment[source]; + void SetEagerCommunicationSchedule() { + commSchedule_.clear(); + + for (const auto &source : BspSchedule::instance_->GetComputationalDag().Vertices()) { + for (const auto &target : BspSchedule::instance_->GetComputationalDag().Children(source)) { + if (BspSchedule::nodeToProcessorAssignment_[source] + != BspSchedule::nodeToProcessorAssignment_[target]) { + commSchedule_[std::make_tuple(source, + BspSchedule::nodeToProcessorAssignment_[source], + BspSchedule::nodeToProcessorAssignment_[target])] + = BspSchedule::nodeToSuperstepAssignment_[source]; } } } } - virtual void shrinkByMergingSupersteps() override { - std::vector superstep_latest_dependency(this->number_of_supersteps, 0); - std::vector> first_at = getFirstPresence(); + virtual void ShrinkByMergingSupersteps() override { + std::vector superstepLatestDependency(this->numberOfSupersteps_, 0); + std::vector> firstAt = GetFirstPresence(); - for (auto const &[key, val] : commSchedule) { - if (this->assignedProcessor(std::get<0>(key)) != std::get<1>(key)) { - superstep_latest_dependency[val] - = std::max(superstep_latest_dependency[val], first_at[std::get<0>(key)][std::get<1>(key)]); + for (auto const &[key, val] : commSchedule_) { + if (this->AssignedProcessor(std::get<0>(key)) != std::get<1>(key)) { + superstepLatestDependency[val] + = std::max(superstepLatestDependency[val], firstAt[std::get<0>(key)][std::get<1>(key)]); } } - for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) { - for (const auto &child : BspSchedule::instance->getComputationalDag().children(node)) { - if (this->assignedProcessor(node) != this->assignedProcessor(child)) { - superstep_latest_dependency[this->assignedSuperstep(child)] - = std::max(superstep_latest_dependency[this->assignedSuperstep(child)], - first_at[node][this->assignedProcessor(child)]); + for (const auto &node : BspSchedule::instance_->GetComputationalDag().Vertices()) { + for (const auto &child : BspSchedule::instance_->GetComputationalDag().Children(node)) { + if (this->AssignedProcessor(node) != this->AssignedProcessor(child)) { + superstepLatestDependency[this->AssignedSuperstep(child)] = std::max( + superstepLatestDependency[this->AssignedSuperstep(child)], firstAt[node][this->AssignedProcessor(child)]); } } } - std::vector merge_with_previous(this->number_of_supersteps, false); - for (unsigned step = this->number_of_supersteps - 1; step < this->number_of_supersteps; --step) { + std::vector mergeWithPrevious(this->numberOfSupersteps_, false); + for (unsigned step = this->numberOfSupersteps_ - 1; step < this->numberOfSupersteps_; --step) { unsigned limit = 0; while (step > limit) { - limit = std::max(limit, superstep_latest_dependency[step]); + limit = std::max(limit, superstepLatestDependency[step]); if (step > limit) { - merge_with_previous[step] = true; + mergeWithPrevious[step] = true; --step; } } } - std::vector new_step_index(this->number_of_supersteps); - unsigned current_index = std::numeric_limits::max(); - for (unsigned step = 0; step < this->number_of_supersteps; ++step) { - if (!merge_with_previous[step]) { - current_index++; + std::vector newStepIndex(this->numberOfSupersteps_); + unsigned currentIndex = std::numeric_limits::max(); + for (unsigned step = 0; step < this->numberOfSupersteps_; ++step) { + if (!mergeWithPrevious[step]) { + currentIndex++; } - new_step_index[step] = current_index; + newStepIndex[step] = currentIndex; } - for (const auto &node : this->instance->vertices()) { - this->node_to_superstep_assignment[node] = new_step_index[this->node_to_superstep_assignment[node]]; + for (const auto &node : this->instance_->Vertices()) { + this->nodeToSuperstepAssignment_[node] = newStepIndex[this->nodeToSuperstepAssignment_[node]]; } - for (auto &[key, val] : commSchedule) { - val = new_step_index[val]; + for (auto &[key, val] : commSchedule_) { + val = newStepIndex[val]; } - this->setNumberOfSupersteps(current_index + 1); + this->SetNumberOfSupersteps(currentIndex + 1); } // for each vertex v and processor p, find the first superstep where v is present on p by the end of the compute phase - std::vector> getFirstPresence() const { - std::vector> first_at( - BspSchedule::instance->numberOfVertices(), - std::vector(BspSchedule::instance->numberOfProcessors(), std::numeric_limits::max())); + std::vector> GetFirstPresence() const { + std::vector> firstAt( + BspSchedule::instance_->NumberOfVertices(), + std::vector(BspSchedule::instance_->NumberOfProcessors(), std::numeric_limits::max())); - for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) { - first_at[node][this->assignedProcessor(node)] = this->assignedSuperstep(node); + for (const auto &node : BspSchedule::instance_->GetComputationalDag().Vertices()) { + firstAt[node][this->AssignedProcessor(node)] = this->AssignedSuperstep(node); } - for (auto const &[key, val] : commSchedule) { - first_at[std::get<0>(key)][std::get<2>(key)] - = std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + 1); // TODO: replace by staleness after merge + for (auto const &[key, val] : commSchedule_) { + firstAt[std::get<0>(key)][std::get<2>(key)] + = std::min(firstAt[std::get<0>(key)][std::get<2>(key)], val + 1); // TODO: replace by staleness after merge } - return first_at; + return firstAt; } // remove unneeded comm. schedule entries - these can happen in ILPs, partial ILPs, etc. - void cleanCommSchedule() { + void CleanCommSchedule() { // data that is already present before it arrives - std::vector>> arrives_at( - BspSchedule::instance->numberOfVertices(), - std::vector>(BspSchedule::instance->numberOfProcessors())); - for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) { - arrives_at[node][this->assignedProcessor(node)].insert(this->assignedSuperstep(node)); + std::vector>> arrivesAt( + BspSchedule::instance_->NumberOfVertices(), + std::vector>(BspSchedule::instance_->NumberOfProcessors())); + for (const auto &node : BspSchedule::instance_->GetComputationalDag().Vertices()) { + arrivesAt[node][this->AssignedProcessor(node)].insert(this->AssignedSuperstep(node)); } - for (auto const &[key, val] : commSchedule) { - arrives_at[std::get<0>(key)][std::get<2>(key)].insert(val); + for (auto const &[key, val] : commSchedule_) { + arrivesAt[std::get<0>(key)][std::get<2>(key)].insert(val); } std::vector toErase; - for (auto const &[key, val] : commSchedule) { - auto itr = arrives_at[std::get<0>(key)][std::get<2>(key)].begin(); + for (auto const &[key, val] : commSchedule_) { + auto itr = arrivesAt[std::get<0>(key)][std::get<2>(key)].begin(); if (*itr < val) { toErase.push_back(key); - } else if (*itr == val && ++itr != arrives_at[std::get<0>(key)][std::get<2>(key)].end() && *itr == val) { + } else if (*itr == val && ++itr != arrivesAt[std::get<0>(key)][std::get<2>(key)].end() && *itr == val) { toErase.push_back(key); - arrives_at[std::get<0>(key)][std::get<2>(key)].erase(itr); + arrivesAt[std::get<0>(key)][std::get<2>(key)].erase(itr); } } for (const KeyTriple &key : toErase) { - commSchedule.erase(key); + commSchedule_.erase(key); } // data that is not used after being sent - std::vector>> used_at( - BspSchedule::instance->numberOfVertices(), - std::vector>(BspSchedule::instance->numberOfProcessors())); - for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) { - for (const auto &child : BspSchedule::instance->getComputationalDag().children(node)) { - used_at[node][this->assignedProcessor(child)].insert(this->assignedSuperstep(child)); + std::vector>> usedAt( + BspSchedule::instance_->NumberOfVertices(), + std::vector>(BspSchedule::instance_->NumberOfProcessors())); + for (const auto &node : BspSchedule::instance_->GetComputationalDag().Vertices()) { + for (const auto &child : BspSchedule::instance_->GetComputationalDag().Children(node)) { + usedAt[node][this->AssignedProcessor(child)].insert(this->AssignedSuperstep(child)); } } - for (auto const &[key, val] : commSchedule) { - used_at[std::get<0>(key)][std::get<1>(key)].insert(val); + for (auto const &[key, val] : commSchedule_) { + usedAt[std::get<0>(key)][std::get<1>(key)].insert(val); } // (need to visit cs entries in reverse superstep order here) - std::vector> entries(this->number_of_supersteps); - for (auto const &[key, val] : commSchedule) { + std::vector> entries(this->numberOfSupersteps_); + for (auto const &[key, val] : commSchedule_) { entries[val].push_back(key); } toErase.clear(); - for (unsigned step = this->number_of_supersteps - 1; step < this->number_of_supersteps; --step) { + for (unsigned step = this->numberOfSupersteps_ - 1; step < this->numberOfSupersteps_; --step) { for (const KeyTriple &key : entries[step]) { - if (used_at[std::get<0>(key)][std::get<2>(key)].empty() - || *used_at[std::get<0>(key)][std::get<2>(key)].rbegin() <= step) { + if (usedAt[std::get<0>(key)][std::get<2>(key)].empty() + || *usedAt[std::get<0>(key)][std::get<2>(key)].rbegin() <= step) { toErase.push_back(key); - auto itr = used_at[std::get<0>(key)][std::get<1>(key)].find(step); - used_at[std::get<0>(key)][std::get<1>(key)].erase(itr); + auto itr = usedAt[std::get<0>(key)][std::get<1>(key)].find(step); + usedAt[std::get<0>(key)][std::get<1>(key)].erase(itr); } } } for (const KeyTriple &key : toErase) { - commSchedule.erase(key); + commSchedule_.erase(key); } } }; diff --git a/include/osp/bsp/model/BspScheduleRecomp.hpp b/include/osp/bsp/model/BspScheduleRecomp.hpp index 7f3f233c..14477a1f 100644 --- a/include/osp/bsp/model/BspScheduleRecomp.hpp +++ b/include/osp/bsp/model/BspScheduleRecomp.hpp @@ -24,57 +24,57 @@ limitations under the License. namespace osp { -template -class BspScheduleRecomp : public IBspScheduleEval { +template +class BspScheduleRecomp : public IBspScheduleEval { public: - using vertex_idx = vertex_idx_t; - using cost_type = v_workw_t; + using VertexIdx = VertexIdxT; + using CostType = VWorkwT; - using KeyTriple = std::tuple, unsigned int, unsigned int>; + using KeyTriple = std::tuple; - static_assert(is_computational_dag_v, "BspScheduleRecomp can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, + static_assert(isComputationalDagV, "BspScheduleRecomp can only be used with computational DAGs."); + static_assert(std::is_same_v, VCommwT>, "BspScheduleRecomp requires work and comm. weights to have the same type."); private: - const BspInstance *instance; + const BspInstance *instance_; - unsigned int number_of_supersteps = 0; + unsigned int numberOfSupersteps_ = 0; - std::vector>> node_to_processor_and_supertep_assignment; + std::vector>> nodeToProcessorAndSupertepAssignment_; - std::map commSchedule; + std::map commSchedule_; public: BspScheduleRecomp() = default; - BspScheduleRecomp(const BspInstance &inst) : instance(&inst) { - node_to_processor_and_supertep_assignment.resize(inst.numberOfVertices()); + BspScheduleRecomp(const BspInstance &inst) : instance_(&inst) { + nodeToProcessorAndSupertepAssignment_.resize(inst.NumberOfVertices()); } - BspScheduleRecomp(const BspScheduleCS &schedule); + BspScheduleRecomp(const BspScheduleCS &schedule); - BspScheduleRecomp(const BspSchedule &schedule) : BspScheduleRecomp(BspScheduleCS(schedule)) {} + BspScheduleRecomp(const BspSchedule &schedule) : BspScheduleRecomp(BspScheduleCS(schedule)) {} virtual ~BspScheduleRecomp() = default; - const BspInstance &getInstance() const { return *instance; } + const BspInstance &GetInstance() const { return *instance_; } /** * @brief Returns the number of supersteps in the schedule. * * @return The number of supersteps in the schedule. */ - virtual unsigned numberOfSupersteps() const override { return number_of_supersteps; } + virtual unsigned NumberOfSupersteps() const override { return numberOfSupersteps_; } - void setNumberOfSupersteps(unsigned number_of_supersteps_) { number_of_supersteps = number_of_supersteps_; } + void SetNumberOfSupersteps(unsigned numberOfSupersteps) { numberOfSupersteps_ = numberOfSupersteps; } - std::vector> &assignments(vertex_idx node) { - return node_to_processor_and_supertep_assignment[node]; + std::vector> &Assignments(VertexIdx node) { + return nodeToProcessorAndSupertepAssignment_[node]; } - const std::vector> &assignments(vertex_idx node) const { - return node_to_processor_and_supertep_assignment[node]; + const std::vector> &Assignments(VertexIdx node) const { + return nodeToProcessorAndSupertepAssignment_[node]; } /** @@ -82,7 +82,7 @@ class BspScheduleRecomp : public IBspScheduleEval { * * @param cs The communication schedule to set. */ - void setCommunicationSchedule(const std::map &cs); + void SetCommunicationSchedule(const std::map &cs); /** * @brief Adds an entry to the communication schedule. @@ -90,7 +90,7 @@ class BspScheduleRecomp : public IBspScheduleEval { * @param key The key for the communication schedule entry. * @param step The superstep for the communication schedule entry. */ - void addCommunicationScheduleEntry(KeyTriple key, unsigned step); + void AddCommunicationScheduleEntry(KeyTriple key, unsigned step); /** * @brief Adds an entry to the communication schedule. @@ -100,20 +100,20 @@ class BspScheduleRecomp : public IBspScheduleEval { * @param to_proc The processor to which the data is sent. * @param step The superstep in which the data is sent. */ - void addCommunicationScheduleEntry(unsigned node, unsigned from_proc, unsigned to_proc, unsigned step); + void AddCommunicationScheduleEntry(unsigned node, unsigned fromProc, unsigned toProc, unsigned step); /** * @brief Returns the communication schedule for the schedule. * * @return The communication schedule for the schedule. */ - const std::map &getCommunicationSchedule() const { return commSchedule; } + const std::map &GetCommunicationSchedule() const { return commSchedule_; } - std::map &getCommunicationSchedule() { return commSchedule; } + std::map &GetCommunicationSchedule() { return commSchedule_; } - virtual cost_type computeWorkCosts() const override; + virtual CostType ComputeWorkCosts() const override; - virtual cost_type computeCosts() const override; + virtual CostType ComputeCosts() const override; /** * @brief Returns true if the schedule is valid, i.e. if every time we compute a node, all its parents are already available @@ -121,93 +121,94 @@ class BspScheduleRecomp : public IBspScheduleEval { * * @return True if the schedule is valid, false otherwise. */ - bool satisfiesConstraints() const; + bool SatisfiesConstraints() const; - vertex_idx getTotalAssignments() const; + VertexIdx GetTotalAssignments() const; - void mergeSupersteps(); + void MergeSupersteps(); }; -template -BspScheduleRecomp::BspScheduleRecomp(const BspScheduleCS &schedule) : instance(&schedule.getInstance()) { - node_to_processor_and_supertep_assignment.clear(); - node_to_processor_and_supertep_assignment.resize(instance->numberOfVertices()); - number_of_supersteps = schedule.numberOfSupersteps(); +template +BspScheduleRecomp::BspScheduleRecomp(const BspScheduleCS &schedule) : instance_(&schedule.GetInstance()) { + nodeToProcessorAndSupertepAssignment_.clear(); + nodeToProcessorAndSupertepAssignment_.resize(instance_->NumberOfVertices()); + numberOfSupersteps_ = schedule.NumberOfSupersteps(); - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - node_to_processor_and_supertep_assignment[node].emplace_back(schedule.assignedProcessor(node), - schedule.assignedSuperstep(node)); + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + nodeToProcessorAndSupertepAssignment_[node].emplace_back(schedule.AssignedProcessor(node), + schedule.AssignedSuperstep(node)); } - commSchedule = schedule.getCommunicationSchedule(); + commSchedule_ = schedule.GetCommunicationSchedule(); } -template -void BspScheduleRecomp::addCommunicationScheduleEntry(unsigned node, unsigned from_proc, unsigned to_proc, unsigned step) { - addCommunicationScheduleEntry(std::make_tuple(node, from_proc, to_proc), step); +template +void BspScheduleRecomp::AddCommunicationScheduleEntry(unsigned node, unsigned fromProc, unsigned toProc, unsigned step) { + AddCommunicationScheduleEntry(std::make_tuple(node, fromProc, toProc), step); } -template -void BspScheduleRecomp::addCommunicationScheduleEntry(KeyTriple key, unsigned step) { - if (step >= number_of_supersteps) { +template +void BspScheduleRecomp::AddCommunicationScheduleEntry(KeyTriple key, unsigned step) { + if (step >= numberOfSupersteps_) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: step out of range."); } - if (std::get<0>(key) >= instance->numberOfVertices()) { + if (std::get<0>(key) >= instance_->NumberOfVertices()) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: node out of range."); } - if (std::get<1>(key) >= instance->numberOfProcessors()) { + if (std::get<1>(key) >= instance_->NumberOfProcessors()) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: from processor out of range."); } - if (std::get<2>(key) >= instance->numberOfProcessors()) { + if (std::get<2>(key) >= instance_->NumberOfProcessors()) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: to processor out of range."); } - commSchedule[key] = step; + commSchedule_[key] = step; } -template -bool BspScheduleRecomp::satisfiesConstraints() const { +template +bool BspScheduleRecomp::SatisfiesConstraints() const { // find first availability - std::vector> node_first_available_on_proc( - instance->numberOfVertices(), std::vector(instance->numberOfProcessors(), std::numeric_limits::max())); + std::vector> nodeFirstAvailableOnProc( + instance_->NumberOfVertices(), + std::vector(instance_->NumberOfProcessors(), std::numeric_limits::max())); - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - for (const std::pair &compute_step : node_to_processor_and_supertep_assignment[node]) { - node_first_available_on_proc[node][compute_step.first] - = std::min(node_first_available_on_proc[node][compute_step.first], compute_step.second); + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + for (const std::pair &computeStep : nodeToProcessorAndSupertepAssignment_[node]) { + nodeFirstAvailableOnProc[node][computeStep.first] + = std::min(nodeFirstAvailableOnProc[node][computeStep.first], computeStep.second); } } - for (auto const &[key, val] : commSchedule) { - const vertex_idx &node = std::get<0>(key); - const unsigned &to_proc = std::get<2>(key); + for (auto const &[key, val] : commSchedule_) { + const VertexIdx &node = std::get<0>(key); + const unsigned &toProc = std::get<2>(key); - node_first_available_on_proc[node][to_proc] = std::min(node_first_available_on_proc[node][to_proc], val + 1); + nodeFirstAvailableOnProc[node][toProc] = std::min(nodeFirstAvailableOnProc[node][toProc], val + 1); } // check validity - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { - for (const std::pair &compute_step : node_to_processor_and_supertep_assignment[node]) { - if (node_first_available_on_proc[pred][compute_step.first] > compute_step.second) { + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { + for (const std::pair &computeStep : nodeToProcessorAndSupertepAssignment_[node]) { + if (nodeFirstAvailableOnProc[pred][computeStep.first] > computeStep.second) { // std::cout << "Not a valid schedule: parent " << pred << " of node "<< node << - //" not yet available on processor " << compute_step.first << " in superstep "<< compute_step.second <<"." << std::endl; + //" not yet available on processor " << computeStep.first << " in superstep "<< computeStep.second <<"." << std::endl; return false; } } } } - for (auto const &[key, val] : commSchedule) { - const vertex_idx &node = std::get<0>(key); - const unsigned &from_proc = std::get<1>(key); + for (auto const &[key, val] : commSchedule_) { + const VertexIdx &node = std::get<0>(key); + const unsigned &fromProc = std::get<1>(key); - if (node_first_available_on_proc[node][from_proc] > val) { + if (nodeFirstAvailableOnProc[node][fromProc] > val) { // std::cout << "Not a valid schedule: node " << node << " not yet available for sending from processor " // << from_proc << " in superstep "<< val <<"." << std::endl; return false; @@ -217,111 +218,110 @@ bool BspScheduleRecomp::satisfiesConstraints() const { return true; } -template -v_workw_t BspScheduleRecomp::computeWorkCosts() const { - assert(satisfiesConstraints()); +template +VWorkwT BspScheduleRecomp::ComputeWorkCosts() const { + assert(SatisfiesConstraints()); - std::vector> step_proc_work(number_of_supersteps, - std::vector(instance->numberOfProcessors(), 0)); + std::vector> stepProcWork(numberOfSupersteps_, std::vector(instance_->NumberOfProcessors(), 0)); - for (vertex_idx node = 0; node < instance->numberOfVertices(); node++) { - for (const std::pair &processor_superstep : node_to_processor_and_supertep_assignment[node]) { - step_proc_work[processor_superstep.second][processor_superstep.first] - += instance->getComputationalDag().vertex_work_weight(node); + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); node++) { + for (const std::pair &processorSuperstep : nodeToProcessorAndSupertepAssignment_[node]) { + stepProcWork[processorSuperstep.second][processorSuperstep.first] + += instance_->GetComputationalDag().VertexWorkWeight(node); } } - cost_type total_costs = 0; - for (unsigned step = 0; step < number_of_supersteps; step++) { - cost_type max_work = 0; + CostType totalCosts = 0; + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + CostType maxWork = 0; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - if (max_work < step_proc_work[step][proc]) { - max_work = step_proc_work[step][proc]; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + if (maxWork < stepProcWork[step][proc]) { + maxWork = stepProcWork[step][proc]; } } - total_costs += max_work; + totalCosts += maxWork; } - return total_costs; + return totalCosts; } -template -v_workw_t BspScheduleRecomp::computeCosts() const { - assert(satisfiesConstraints()); +template +VWorkwT BspScheduleRecomp::ComputeCosts() const { + assert(SatisfiesConstraints()); - std::vector> rec(number_of_supersteps, std::vector(instance->numberOfProcessors(), 0)); - std::vector> send(number_of_supersteps, std::vector(instance->numberOfProcessors(), 0)); + std::vector> rec(numberOfSupersteps_, std::vector(instance_->NumberOfProcessors(), 0)); + std::vector> send(numberOfSupersteps_, std::vector(instance_->NumberOfProcessors(), 0)); - for (auto const &[key, val] : commSchedule) { - send[val][std::get<1>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key)) - * instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); - rec[val][std::get<2>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key)) - * instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); + for (auto const &[key, val] : commSchedule_) { + send[val][std::get<1>(key)] += instance_->SendCosts(std::get<1>(key), std::get<2>(key)) + * instance_->GetComputationalDag().VertexCommWeight(std::get<0>(key)); + rec[val][std::get<2>(key)] += instance_->SendCosts(std::get<1>(key), std::get<2>(key)) + * instance_->GetComputationalDag().VertexCommWeight(std::get<0>(key)); } - cost_type total_costs = 0; - for (unsigned step = 0; step < number_of_supersteps; step++) { - cost_type max_comm = 0; + CostType totalCosts = 0; + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + CostType maxComm = 0; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - if (max_comm < send[step][proc]) { - max_comm = send[step][proc]; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + if (maxComm < send[step][proc]) { + maxComm = send[step][proc]; } - if (max_comm < rec[step][proc]) { - max_comm = rec[step][proc]; + if (maxComm < rec[step][proc]) { + maxComm = rec[step][proc]; } } - if (max_comm > 0) { - total_costs += instance->synchronisationCosts() + max_comm * instance->communicationCosts(); + if (maxComm > 0) { + totalCosts += instance_->SynchronisationCosts() + maxComm * instance_->CommunicationCosts(); } } - total_costs += computeWorkCosts(); + totalCosts += ComputeWorkCosts(); - return total_costs; + return totalCosts; } -template -vertex_idx_t BspScheduleRecomp::getTotalAssignments() const { - vertex_idx total = 0; - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - total += node_to_processor_and_supertep_assignment[node].size(); +template +VertexIdxT BspScheduleRecomp::GetTotalAssignments() const { + VertexIdx total = 0; + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + total += nodeToProcessorAndSupertepAssignment_[node].size(); } return total; } -template -void BspScheduleRecomp::mergeSupersteps() { - std::vector new_step_idx(number_of_supersteps); - std::vector comm_phase_empty(number_of_supersteps, true); +template +void BspScheduleRecomp::MergeSupersteps() { + std::vector newStepIdx(numberOfSupersteps_); + std::vector commPhaseEmpty(numberOfSupersteps_, true); - for (auto const &[key, val] : commSchedule) { - comm_phase_empty[val] = false; + for (auto const &[key, val] : commSchedule_) { + commPhaseEmpty[val] = false; } - unsigned current_step_idx = 0; - for (unsigned step = 0; step < number_of_supersteps; ++step) { - new_step_idx[step] = current_step_idx; - if (!comm_phase_empty[step] || step == number_of_supersteps - 1) { - ++current_step_idx; + unsigned currentStepIdx = 0; + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + newStepIdx[step] = currentStepIdx; + if (!commPhaseEmpty[step] || step == numberOfSupersteps_ - 1) { + ++currentStepIdx; } } - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - std::vector> new_assignment; - for (const std::pair &entry : node_to_processor_and_supertep_assignment[node]) { - new_assignment.emplace_back(entry.first, new_step_idx[entry.second]); + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + std::vector> newAssignment; + for (const std::pair &entry : nodeToProcessorAndSupertepAssignment_[node]) { + newAssignment.emplace_back(entry.first, newStepIdx[entry.second]); } - node_to_processor_and_supertep_assignment[node] = new_assignment; + nodeToProcessorAndSupertepAssignment_[node] = newAssignment; } - for (auto &key_step_pair : commSchedule) { - auto &step = key_step_pair.second; - step = new_step_idx[step]; + for (auto &keyStepPair : commSchedule_) { + auto &step = keyStepPair.second; + step = newStepIdx[step]; } - number_of_supersteps = current_step_idx; + numberOfSupersteps_ = currentStepIdx; } } // namespace osp diff --git a/include/osp/bsp/model/IBspSchedule.hpp b/include/osp/bsp/model/IBspSchedule.hpp index 0a4a3d7e..431e31f0 100644 --- a/include/osp/bsp/model/IBspSchedule.hpp +++ b/include/osp/bsp/model/IBspSchedule.hpp @@ -24,41 +24,41 @@ namespace osp { /// @class IBspSchedule /// @brief Interface for a BSP (Bulk Synchronous Parallel) schedule. -template +template class IBspSchedule { - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; public: /// @brief Destructor. virtual ~IBspSchedule() = default; - virtual const BspInstance &getInstance() const = 0; + virtual const BspInstance &GetInstance() const = 0; /// @brief Set the assigned superstep for a node. /// @param node The node index. /// @param superstep The assigned superstep. - virtual void setAssignedSuperstep(vertex_idx node, unsigned int superstep) = 0; + virtual void SetAssignedSuperstep(VertexIdx node, unsigned int superstep) = 0; /// @brief Set the assigned processor for a node. /// @param node The node index. /// @param processor The assigned processor. - virtual void setAssignedProcessor(vertex_idx node, unsigned int processor) = 0; + virtual void SetAssignedProcessor(VertexIdx node, unsigned int processor) = 0; /// @brief Get the assigned superstep of a node. /// @param node The node index. /// @return The assigned superstep of the node. - /// If the node is not assigned to a superstep, this.numberOfSupersteps() is returned. - virtual unsigned assignedSuperstep(vertex_idx node) const = 0; + /// If the node is not assigned to a superstep, this.NumberOfSupersteps() is returned. + virtual unsigned AssignedSuperstep(VertexIdx node) const = 0; /// @brief Get the assigned processor of a node. /// @param node The node index. /// @return The assigned processor of the node. - /// If the node is not assigned to a processor, this.getInstance().numberOfProcessors() is returned. - virtual unsigned assignedProcessor(vertex_idx node) const = 0; + /// If the node is not assigned to a processor, this.GetInstance().NumberOfProcessors() is returned. + virtual unsigned AssignedProcessor(VertexIdx node) const = 0; /// @brief Get the number of supersteps in the schedule. /// @return The number of supersteps in the schedule. - virtual unsigned numberOfSupersteps() const = 0; + virtual unsigned NumberOfSupersteps() const = 0; }; } // namespace osp diff --git a/include/osp/bsp/model/IBspScheduleEval.hpp b/include/osp/bsp/model/IBspScheduleEval.hpp index 6e0f7a51..c9ceb81e 100644 --- a/include/osp/bsp/model/IBspScheduleEval.hpp +++ b/include/osp/bsp/model/IBspScheduleEval.hpp @@ -24,18 +24,18 @@ namespace osp { /// @class IBspSchedule /// @brief Interface for a BSP (Bulk Synchronous Parallel) schedule. -template +template class IBspScheduleEval { - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; public: /// @brief Destructor. virtual ~IBspScheduleEval() = default; - virtual v_workw_t computeCosts() const = 0; - virtual v_workw_t computeWorkCosts() const = 0; - virtual unsigned numberOfSupersteps() const = 0; - virtual const BspInstance &getInstance() const = 0; + virtual VWorkwT ComputeCosts() const = 0; + virtual VWorkwT ComputeWorkCosts() const = 0; + virtual unsigned NumberOfSupersteps() const = 0; + virtual const BspInstance &GetInstance() const = 0; }; } // namespace osp diff --git a/include/osp/bsp/model/MaxBspSchedule.hpp b/include/osp/bsp/model/MaxBspSchedule.hpp index d35024d2..93289861 100644 --- a/include/osp/bsp/model/MaxBspSchedule.hpp +++ b/include/osp/bsp/model/MaxBspSchedule.hpp @@ -38,14 +38,14 @@ namespace osp { * * @see BspInstance */ -template -class MaxBspSchedule : public BspSchedule { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, +template +class MaxBspSchedule : public BspSchedule { + static_assert(isComputationalDagV, "BspSchedule can only be used with computational DAGs."); + static_assert(std::is_same_v, VCommwT>, "BspSchedule requires work and comm. weights to have the same type."); protected: - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; public: MaxBspSchedule() = delete; @@ -55,7 +55,7 @@ class MaxBspSchedule : public BspSchedule { * * @param inst The BspInstance for the schedule. */ - MaxBspSchedule(const BspInstance &inst) : BspSchedule(inst) {} + MaxBspSchedule(const BspInstance &inst) : BspSchedule(inst) {} /** * @brief Constructs a BspSchedule object with the specified BspInstance, processor assignment, and superstep @@ -65,56 +65,55 @@ class MaxBspSchedule : public BspSchedule { * @param processor_assignment_ The processor assignment for the nodes. * @param superstep_assignment_ The superstep assignment for the nodes. */ - MaxBspSchedule(const BspInstance &inst, - const std::vector &processor_assignment_, - const std::vector &superstep_assignment_) - : BspSchedule(inst, processor_assignment_, superstep_assignment_) {} + MaxBspSchedule(const BspInstance &inst, + const std::vector &processorAssignment, + const std::vector &superstepAssignment) + : BspSchedule(inst, processorAssignment, superstepAssignment) {} - MaxBspSchedule(const IBspSchedule &schedule) : BspSchedule(schedule) {} + MaxBspSchedule(const IBspSchedule &schedule) : BspSchedule(schedule) {} - MaxBspSchedule(IBspSchedule &&schedule) : BspSchedule(std::move(schedule)) {} + MaxBspSchedule(IBspSchedule &&schedule) : BspSchedule(std::move(schedule)) {} - MaxBspSchedule(const MaxBspSchedule &schedule) = default; + MaxBspSchedule(const MaxBspSchedule &schedule) = default; - MaxBspSchedule &operator=(const MaxBspSchedule &schedule) = default; + MaxBspSchedule &operator=(const MaxBspSchedule &schedule) = default; - MaxBspSchedule(MaxBspSchedule &&schedule) noexcept = default; + MaxBspSchedule(MaxBspSchedule &&schedule) noexcept = default; - MaxBspSchedule &operator=(MaxBspSchedule &&schedule) noexcept = default; + MaxBspSchedule &operator=(MaxBspSchedule &&schedule) noexcept = default; - template - MaxBspSchedule(const BspInstance &instance_, const MaxBspSchedule &schedule) - : BspSchedule(instance_, schedule) {} + template + MaxBspSchedule(const BspInstance &instance, const MaxBspSchedule &schedule) + : BspSchedule(instance, schedule) {} /** * @brief Destructor for the BspSchedule class. */ virtual ~MaxBspSchedule() = default; - virtual v_workw_t computeCosts() const override { - std::vector>> rec(this->instance->numberOfProcessors(), - std::vector>(this->number_of_supersteps, 0)); - std::vector>> send(this->instance->numberOfProcessors(), - std::vector>(this->number_of_supersteps, 0)); + virtual VWorkwT ComputeCosts() const override { + std::vector>> rec(this->instance_->NumberOfProcessors(), + std::vector>(this->NumberOfSupersteps(), 0)); + std::vector>> send(this->instance_->NumberOfProcessors(), + std::vector>(this->NumberOfSupersteps(), 0)); - compute_lazy_communication_costs(*this, rec, send); - const std::vector> max_comm_per_step = cost_helpers::compute_max_comm_per_step(*this, rec, send); - const std::vector> max_work_per_step = cost_helpers::compute_max_work_per_step(*this); + ComputeLazyCommunicationCosts(*this, rec, send); + const std::vector> maxCommPerStep = cost_helpers::ComputeMaxCommPerStep(*this, rec, send); + const std::vector> maxWorkPerStep = cost_helpers::ComputeMaxWorkPerStep(*this); - v_workw_t costs = 0U; - for (unsigned step = 0U; step < this->number_of_supersteps; step++) { - const v_commw_t step_comm_cost = (step == 0U) ? static_cast>(0) - : max_comm_per_step[step - 1U]; - costs += std::max(step_comm_cost, max_work_per_step[step]); + VWorkwT costs = 0U; + for (unsigned step = 0U; step < this->NumberOfSupersteps(); step++) { + const VCommwT stepCommCost = (step == 0U) ? static_cast>(0) : maxCommPerStep[step - 1U]; + costs += std::max(stepCommCost, maxWorkPerStep[step]); - if (step_comm_cost > static_cast>(0)) { - costs += this->instance->synchronisationCosts(); + if (stepCommCost > static_cast>(0)) { + costs += this->instance_->SynchronisationCosts(); } } return costs; } - unsigned virtual getStaleness() const override { return 2; } + unsigned virtual GetStaleness() const override { return 2; } }; } // namespace osp diff --git a/include/osp/bsp/model/MaxBspScheduleCS.hpp b/include/osp/bsp/model/MaxBspScheduleCS.hpp index 7027ea53..80cc9e9a 100644 --- a/include/osp/bsp/model/MaxBspScheduleCS.hpp +++ b/include/osp/bsp/model/MaxBspScheduleCS.hpp @@ -31,14 +31,14 @@ limitations under the License. namespace osp { -template -class MaxBspScheduleCS : public BspScheduleCS { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, +template +class MaxBspScheduleCS : public BspScheduleCS { + static_assert(isComputationalDagV, "BspSchedule can only be used with computational DAGs."); + static_assert(std::is_same_v, VCommwT>, "BspSchedule requires work and comm. weights to have the same type."); protected: - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; public: MaxBspScheduleCS() = delete; @@ -48,7 +48,7 @@ class MaxBspScheduleCS : public BspScheduleCS { * * @param inst The BspInstance for the schedule. */ - MaxBspScheduleCS(const BspInstance &inst) : BspScheduleCS(inst) {} + MaxBspScheduleCS(const BspInstance &inst) : BspScheduleCS(inst) {} /** * @brief Constructs a BspSchedule object with the specified BspInstance, processor assignment, and superstep @@ -58,62 +58,62 @@ class MaxBspScheduleCS : public BspScheduleCS { * @param processor_assignment_ The processor assignment for the nodes. * @param superstep_assignment_ The superstep assignment for the nodes. */ - MaxBspScheduleCS(const BspInstance &inst, - const std::vector &processor_assignment_, - const std::vector &superstep_assignment_) - : BspScheduleCS(inst, processor_assignment_, superstep_assignment_) {} + MaxBspScheduleCS(const BspInstance &inst, + const std::vector &processorAssignment, + const std::vector &superstepAssignment) + : BspScheduleCS(inst, processorAssignment, superstepAssignment) {} - MaxBspScheduleCS(const BspScheduleCS &schedule) : BspScheduleCS(schedule) {} + MaxBspScheduleCS(const BspScheduleCS &schedule) : BspScheduleCS(schedule) {} - MaxBspScheduleCS(BspScheduleCS &&schedule) : BspScheduleCS(std::move(schedule)) {} + MaxBspScheduleCS(BspScheduleCS &&schedule) : BspScheduleCS(std::move(schedule)) {} - MaxBspScheduleCS(const MaxBspSchedule &schedule) : BspScheduleCS(schedule) { - this->setAutoCommunicationSchedule(); + MaxBspScheduleCS(const MaxBspSchedule &schedule) : BspScheduleCS(schedule) { + this->SetAutoCommunicationSchedule(); } - MaxBspScheduleCS(MaxBspSchedule &&schedule) : BspScheduleCS(std::move(schedule)) { - this->setAutoCommunicationSchedule(); + MaxBspScheduleCS(MaxBspSchedule &&schedule) : BspScheduleCS(std::move(schedule)) { + this->SetAutoCommunicationSchedule(); } - MaxBspScheduleCS(const MaxBspScheduleCS &schedule) = default; - MaxBspScheduleCS(MaxBspScheduleCS &&schedule) = default; + MaxBspScheduleCS(const MaxBspScheduleCS &schedule) = default; + MaxBspScheduleCS(MaxBspScheduleCS &&schedule) = default; - MaxBspScheduleCS &operator=(const MaxBspScheduleCS &schedule) = default; - MaxBspScheduleCS &operator=(MaxBspScheduleCS &&schedule) = default; + MaxBspScheduleCS &operator=(const MaxBspScheduleCS &schedule) = default; + MaxBspScheduleCS &operator=(MaxBspScheduleCS &&schedule) = default; - template - MaxBspScheduleCS(const BspInstance &instance_, const MaxBspScheduleCS &schedule) - : BspScheduleCS(instance_, schedule) {} + template + MaxBspScheduleCS(const BspInstance &instance, const MaxBspScheduleCS &schedule) + : BspScheduleCS(instance, schedule) {} /** * @brief Destructor for the BspSchedule class. */ virtual ~MaxBspScheduleCS() = default; - virtual v_workw_t computeCosts() const override { - std::vector>> rec(this->getInstance().numberOfProcessors(), - std::vector>(this->number_of_supersteps, 0)); + virtual VWorkwT ComputeCosts() const override { + std::vector>> rec(this->instance_->NumberOfProcessors(), + std::vector>(this->NumberOfSupersteps(), 0)); - std::vector>> send(this->getInstance().numberOfProcessors(), - std::vector>(this->number_of_supersteps, 0)); + std::vector>> send(this->instance_->NumberOfProcessors(), + std::vector>(this->NumberOfSupersteps(), 0)); - this->compute_cs_communication_costs_helper(rec, send); - const std::vector> max_comm_per_step = cost_helpers::compute_max_comm_per_step(*this, rec, send); - const std::vector> max_work_per_step = cost_helpers::compute_max_work_per_step(*this); + this->ComputeCsCommunicationCostsHelper(rec, send); + const std::vector> maxCommPerStep = cost_helpers::ComputeMaxCommPerStep(*this, rec, send); + const std::vector> maxWorkPerStep = cost_helpers::ComputeMaxWorkPerStep(*this); - v_workw_t costs = 0U; - for (unsigned step = 0U; step < this->number_of_supersteps; step++) { - const auto step_comm_cost = (step == 0U) ? static_cast>(0) : max_comm_per_step[step - 1U]; - costs += std::max(step_comm_cost, max_work_per_step[step]); + VWorkwT costs = 0U; + for (unsigned step = 0U; step < this->NumberOfSupersteps(); step++) { + const auto stepCommCost = (step == 0U) ? static_cast>(0) : maxCommPerStep[step - 1U]; + costs += std::max(stepCommCost, maxWorkPerStep[step]); - if (step_comm_cost > static_cast>(0)) { - costs += this->instance->synchronisationCosts(); + if (stepCommCost > static_cast>(0)) { + costs += this->instance_->SynchronisationCosts(); } } return costs; } - unsigned virtual getStaleness() const override { return 2; } + unsigned virtual GetStaleness() const override { return 2; } }; } // namespace osp diff --git a/include/osp/bsp/model/cost/BufferedSendingCost.hpp b/include/osp/bsp/model/cost/BufferedSendingCost.hpp index 747174d9..c18f3ce5 100644 --- a/include/osp/bsp/model/cost/BufferedSendingCost.hpp +++ b/include/osp/bsp/model/cost/BufferedSendingCost.hpp @@ -30,57 +30,57 @@ namespace osp { * @struct BufferedSendingCost * @brief Implements the buffered sending cost model. */ -template +template struct BufferedSendingCost { - using cost_type = v_commw_t; - - cost_type operator()(const BspSchedule &schedule) const { - const auto &instance = schedule.getInstance(); - unsigned number_of_supersteps = schedule.numberOfSupersteps(); - const auto &node_to_processor_assignment = schedule.assignedProcessors(); - const auto &node_to_superstep_assignment = schedule.assignedSupersteps(); - const auto staleness = schedule.getStaleness(); - - std::vector>> rec(instance.numberOfProcessors(), - std::vector>(number_of_supersteps, 0)); - std::vector>> send(instance.numberOfProcessors(), - std::vector>(number_of_supersteps, 0)); - - for (vertex_idx_t node = 0; node < instance.numberOfVertices(); node++) { - std::vector step_needed(instance.numberOfProcessors(), number_of_supersteps); - for (const auto &target : instance.getComputationalDag().children(node)) { - if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) { - step_needed[node_to_processor_assignment[target]] - = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]); + using CostType = VCommwT; + + CostType operator()(const BspSchedule &schedule) const { + const auto &instance = schedule.GetInstance(); + unsigned numberOfSupersteps = schedule.NumberOfSupersteps(); + const auto &nodeToProcessorAssignment = schedule.AssignedProcessors(); + const auto &nodeToSuperstepAssignment = schedule.AssignedSupersteps(); + const auto staleness = schedule.GetStaleness(); + + std::vector>> rec(instance.NumberOfProcessors(), + std::vector>(numberOfSupersteps, 0)); + std::vector>> send(instance.NumberOfProcessors(), + std::vector>(numberOfSupersteps, 0)); + + for (VertexIdxT node = 0; node < instance.NumberOfVertices(); node++) { + std::vector stepNeeded(instance.NumberOfProcessors(), numberOfSupersteps); + for (const auto &target : instance.GetComputationalDag().Children(node)) { + if (nodeToProcessorAssignment[node] != nodeToProcessorAssignment[target]) { + stepNeeded[nodeToProcessorAssignment[target]] + = std::min(stepNeeded[nodeToProcessorAssignment[target]], nodeToSuperstepAssignment[target]); } } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - if (step_needed[proc] < number_of_supersteps) { - send[node_to_processor_assignment[node]][node_to_superstep_assignment[node]] - += instance.sendCosts(node_to_processor_assignment[node], proc) - * instance.getComputationalDag().vertex_comm_weight(node); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + if (stepNeeded[proc] < numberOfSupersteps) { + send[nodeToProcessorAssignment[node]][nodeToSuperstepAssignment[node]] + += instance.SendCosts(nodeToProcessorAssignment[node], proc) + * instance.GetComputationalDag().VertexCommWeight(node); - if (step_needed[proc] >= staleness) { - rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) - * instance.getComputationalDag().vertex_comm_weight(node); + if (stepNeeded[proc] >= staleness) { + rec[proc][stepNeeded[proc] - staleness] += instance.SendCosts(nodeToProcessorAssignment[node], proc) + * instance.GetComputationalDag().VertexCommWeight(node); } } } } - const auto max_comm_per_step = cost_helpers::compute_max_comm_per_step(schedule, rec, send); - v_commw_t comm_costs = 0; - for (unsigned step = 0; step < number_of_supersteps; step++) { - const auto step_comm_cost = max_comm_per_step[step]; - comm_costs += step_comm_cost; + const auto maxCommPerStep = cost_helpers::ComputeMaxCommPerStep(schedule, rec, send); + VCommwT commCosts = 0; + for (unsigned step = 0; step < numberOfSupersteps; step++) { + const auto stepCommCost = maxCommPerStep[step]; + commCosts += stepCommCost; - if (step_comm_cost > 0) { - comm_costs += instance.synchronisationCosts(); + if (stepCommCost > 0) { + commCosts += instance.SynchronisationCosts(); } } - return comm_costs + cost_helpers::compute_work_costs(schedule); + return commCosts + cost_helpers::ComputeWorkCosts(schedule); } }; diff --git a/include/osp/bsp/model/cost/CostModelHelpers.hpp b/include/osp/bsp/model/cost/CostModelHelpers.hpp index fe9b269f..fa3307f9 100644 --- a/include/osp/bsp/model/cost/CostModelHelpers.hpp +++ b/include/osp/bsp/model/cost/CostModelHelpers.hpp @@ -25,89 +25,89 @@ limitations under the License. namespace osp { -template +template class BspSchedule; namespace cost_helpers { -template -std::vector> compute_max_comm_per_step(const BspInstance &instance, - unsigned number_of_supersteps, - const std::vector>> &rec, - const std::vector>> &send) { - std::vector> max_comm_per_step(number_of_supersteps, 0); - for (unsigned step = 0; step < number_of_supersteps; step++) { - v_commw_t max_send = 0; - v_commw_t max_rec = 0; - - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - if (max_send < send[proc][step]) { - max_send = send[proc][step]; +template +std::vector> ComputeMaxCommPerStep(const BspInstance &instance, + unsigned numberOfSupersteps, + const std::vector>> &rec, + const std::vector>> &send) { + std::vector> maxCommPerStep(numberOfSupersteps, 0); + for (unsigned step = 0; step < numberOfSupersteps; step++) { + VCommwT maxSend = 0; + VCommwT maxRec = 0; + + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + if (maxSend < send[proc][step]) { + maxSend = send[proc][step]; } - if (max_rec < rec[proc][step]) { - max_rec = rec[proc][step]; + if (maxRec < rec[proc][step]) { + maxRec = rec[proc][step]; } } - max_comm_per_step[step] = std::max(max_send, max_rec) * instance.communicationCosts(); + maxCommPerStep[step] = std::max(maxSend, maxRec) * instance.CommunicationCosts(); } - return max_comm_per_step; + return maxCommPerStep; } -template -std::vector> compute_max_comm_per_step(const BspSchedule &schedule, - const std::vector>> &rec, - const std::vector>> &send) { - return compute_max_comm_per_step(schedule.getInstance(), schedule.numberOfSupersteps(), rec, send); +template +std::vector> ComputeMaxCommPerStep(const BspSchedule &schedule, + const std::vector>> &rec, + const std::vector>> &send) { + return ComputeMaxCommPerStep(schedule.GetInstance(), schedule.NumberOfSupersteps(), rec, send); } -template -std::vector> compute_max_work_per_step(const BspInstance &instance, - unsigned number_of_supersteps, - const std::vector &node_to_processor_assignment, - const std::vector &node_to_superstep_assignment) { - std::vector>> work = std::vector>>( - number_of_supersteps, std::vector>(instance.numberOfProcessors(), 0)); - for (const auto &node : instance.vertices()) { - work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]] - += instance.getComputationalDag().vertex_work_weight(node); +template +std::vector> ComputeMaxWorkPerStep(const BspInstance &instance, + unsigned numberOfSupersteps, + const std::vector &nodeToProcessorAssignment, + const std::vector &nodeToSuperstepAssignment) { + std::vector>> work = std::vector>>( + numberOfSupersteps, std::vector>(instance.NumberOfProcessors(), 0)); + for (const auto &node : instance.Vertices()) { + work[nodeToSuperstepAssignment[node]][nodeToProcessorAssignment[node]] + += instance.GetComputationalDag().VertexWorkWeight(node); } - std::vector> max_work_per_step(number_of_supersteps, 0); - for (unsigned step = 0; step < number_of_supersteps; step++) { - v_workw_t max_work = 0; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - if (max_work < work[step][proc]) { - max_work = work[step][proc]; + std::vector> maxWorkPerStep(numberOfSupersteps, 0); + for (unsigned step = 0; step < numberOfSupersteps; step++) { + VWorkwT maxWork = 0; + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + if (maxWork < work[step][proc]) { + maxWork = work[step][proc]; } } - max_work_per_step[step] = max_work; + maxWorkPerStep[step] = maxWork; } - return max_work_per_step; + return maxWorkPerStep; } -template -std::vector> compute_max_work_per_step(const BspSchedule &schedule) { - return compute_max_work_per_step( - schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps()); +template +std::vector> ComputeMaxWorkPerStep(const BspSchedule &schedule) { + return ComputeMaxWorkPerStep( + schedule.GetInstance(), schedule.NumberOfSupersteps(), schedule.AssignedProcessors(), schedule.AssignedSupersteps()); } -template -v_workw_t compute_work_costs(const BspInstance &instance, - unsigned number_of_supersteps, - const std::vector &node_to_processor_assignment, - const std::vector &node_to_superstep_assignment) { - std::vector> max_work_per_step - = compute_max_work_per_step(instance, number_of_supersteps, node_to_processor_assignment, node_to_superstep_assignment); +template +VWorkwT ComputeWorkCosts(const BspInstance &instance, + unsigned numberOfSupersteps, + const std::vector &nodeToProcessorAssignment, + const std::vector &nodeToSuperstepAssignment) { + std::vector> maxWorkPerStep + = ComputeMaxWorkPerStep(instance, numberOfSupersteps, nodeToProcessorAssignment, nodeToSuperstepAssignment); - return std::accumulate(max_work_per_step.begin(), max_work_per_step.end(), static_cast>(0)); + return std::accumulate(maxWorkPerStep.begin(), maxWorkPerStep.end(), static_cast>(0)); } -template -v_workw_t compute_work_costs(const BspSchedule &schedule) { - return compute_work_costs( - schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps()); +template +VWorkwT ComputeWorkCosts(const BspSchedule &schedule) { + return ComputeWorkCosts( + schedule.GetInstance(), schedule.NumberOfSupersteps(), schedule.AssignedProcessors(), schedule.AssignedSupersteps()); } } // namespace cost_helpers diff --git a/include/osp/bsp/model/cost/LazyCommunicationCost.hpp b/include/osp/bsp/model/cost/LazyCommunicationCost.hpp index a0497174..523b2ba2 100644 --- a/include/osp/bsp/model/cost/LazyCommunicationCost.hpp +++ b/include/osp/bsp/model/cost/LazyCommunicationCost.hpp @@ -26,79 +26,77 @@ limitations under the License. namespace osp { -template -void compute_lazy_communication_costs(const BspInstance &instance, - unsigned number_of_supersteps, - const std::vector &node_to_processor_assignment, - const std::vector &node_to_superstep_assignment, - const unsigned staleness, - std::vector>> &rec, - std::vector>> &send) { - for (const auto &node : instance.vertices()) { - std::vector step_needed(instance.numberOfProcessors(), number_of_supersteps); - for (const auto &target : instance.getComputationalDag().children(node)) { - if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) { - step_needed[node_to_processor_assignment[target]] - = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]); +template +void ComputeLazyCommunicationCosts(const BspInstance &instance, + unsigned numberOfSupersteps, + const std::vector &nodeToProcessorAssignment, + const std::vector &nodeToSuperstepAssignment, + const unsigned staleness, + std::vector>> &rec, + std::vector>> &send) { + for (const auto &node : instance.Vertices()) { + std::vector stepNeeded(instance.NumberOfProcessors(), numberOfSupersteps); + for (const auto &target : instance.GetComputationalDag().Children(node)) { + if (nodeToProcessorAssignment[node] != nodeToProcessorAssignment[target]) { + stepNeeded[nodeToProcessorAssignment[target]] + = std::min(stepNeeded[nodeToProcessorAssignment[target]], nodeToSuperstepAssignment[target]); } } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - if (step_needed[proc] < number_of_supersteps) { - send[node_to_processor_assignment[node]][step_needed[proc] - staleness] - += instance.sendCosts(node_to_processor_assignment[node], proc) - * instance.getComputationalDag().vertex_comm_weight(node); - rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) - * instance.getComputationalDag().vertex_comm_weight(node); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + if (stepNeeded[proc] < numberOfSupersteps) { + send[nodeToProcessorAssignment[node]][stepNeeded[proc] - staleness] + += instance.SendCosts(nodeToProcessorAssignment[node], proc) + * instance.GetComputationalDag().VertexCommWeight(node); + rec[proc][stepNeeded[proc] - staleness] += instance.SendCosts(nodeToProcessorAssignment[node], proc) + * instance.GetComputationalDag().VertexCommWeight(node); } } } } -template -void compute_lazy_communication_costs(const BspSchedule &schedule, - std::vector>> &rec, - std::vector>> &send) { - compute_lazy_communication_costs(schedule.getInstance(), - schedule.numberOfSupersteps(), - schedule.assignedProcessors(), - schedule.assignedSupersteps(), - schedule.getStaleness(), - rec, - send); +template +void ComputeLazyCommunicationCosts(const BspSchedule &schedule, + std::vector>> &rec, + std::vector>> &send) { + ComputeLazyCommunicationCosts(schedule.GetInstance(), + schedule.NumberOfSupersteps(), + schedule.AssignedProcessors(), + schedule.AssignedSupersteps(), + schedule.GetStaleness(), + rec, + send); } /** * @struct LazyCommunicationCost * @brief Implements the lazy communication cost model. */ -template +template struct LazyCommunicationCost { - using cost_type = v_workw_t; + using CostType = VWorkwT; - cost_type operator()(const BspSchedule &schedule) const { - const auto &number_of_processors = schedule.getInstance().numberOfProcessors(); - const auto &number_of_supersteps = schedule.numberOfSupersteps(); + CostType operator()(const BspSchedule &schedule) const { + const auto &numberOfProcessors = schedule.GetInstance().NumberOfProcessors(); + const auto &numberOfSupersteps = schedule.NumberOfSupersteps(); - std::vector>> rec(number_of_processors, - std::vector>(number_of_supersteps, 0)); - std::vector>> send(number_of_processors, - std::vector>(number_of_supersteps, 0)); + std::vector>> rec(numberOfProcessors, std::vector>(numberOfSupersteps, 0)); + std::vector>> send(numberOfProcessors, std::vector>(numberOfSupersteps, 0)); - compute_lazy_communication_costs(schedule, rec, send); - const auto max_comm_per_step = cost_helpers::compute_max_comm_per_step(schedule, rec, send); + ComputeLazyCommunicationCosts(schedule, rec, send); + const auto maxCommPerStep = cost_helpers::ComputeMaxCommPerStep(schedule, rec, send); - v_commw_t comm_costs = 0; - for (unsigned step = 0; step < number_of_supersteps; step++) { - const auto step_comm_cost = max_comm_per_step[step]; - comm_costs += step_comm_cost; + VCommwT commCosts = 0; + for (unsigned step = 0; step < numberOfSupersteps; step++) { + const auto stepCommCost = maxCommPerStep[step]; + commCosts += stepCommCost; - if (step_comm_cost > 0) { - comm_costs += schedule.getInstance().synchronisationCosts(); + if (stepCommCost > 0) { + commCosts += schedule.GetInstance().SynchronisationCosts(); } } - return comm_costs + cost_helpers::compute_work_costs(schedule); + return commCosts + cost_helpers::ComputeWorkCosts(schedule); } }; diff --git a/include/osp/bsp/model/cost/TotalCommunicationCost.hpp b/include/osp/bsp/model/cost/TotalCommunicationCost.hpp index af97e5c8..b92c2c6f 100644 --- a/include/osp/bsp/model/cost/TotalCommunicationCost.hpp +++ b/include/osp/bsp/model/cost/TotalCommunicationCost.hpp @@ -27,35 +27,35 @@ namespace osp { * @struct TotalCommunicationCost * @brief Implements the total communication cost model. */ -template +template struct TotalCommunicationCost { - using cost_type = double; + using CostType = double; - cost_type operator()(const BspSchedule &schedule) const { - const auto &instance = schedule.getInstance(); - const auto &node_to_processor_assignment = schedule.assignedProcessors(); + CostType operator()(const BspSchedule &schedule) const { + const auto &instance = schedule.GetInstance(); + const auto &nodeToProcessorAssignment = schedule.AssignedProcessors(); - v_commw_t total_communication = 0; + VCommwT totalCommunication = 0; - for (const auto &v : instance.vertices()) { - for (const auto &target : instance.getComputationalDag().children(v)) { - if (node_to_processor_assignment[v] != node_to_processor_assignment[target]) { - total_communication += instance.sendCosts(node_to_processor_assignment[v], node_to_processor_assignment[target]) - * instance.getComputationalDag().vertex_comm_weight(v); + for (const auto &v : instance.Vertices()) { + for (const auto &target : instance.GetComputationalDag().Children(v)) { + if (nodeToProcessorAssignment[v] != nodeToProcessorAssignment[target]) { + totalCommunication += instance.SendCosts(nodeToProcessorAssignment[v], nodeToProcessorAssignment[target]) + * instance.GetComputationalDag().VertexCommWeight(v); } } } - auto comm_cost = total_communication * static_cast(instance.communicationCosts()) - / static_cast(instance.numberOfProcessors()); + auto commCost = totalCommunication * static_cast(instance.CommunicationCosts()) + / static_cast(instance.NumberOfProcessors()); - const unsigned number_of_supersteps = schedule.numberOfSupersteps(); + const unsigned numberOfSupersteps = schedule.NumberOfSupersteps(); - auto work_cost = cost_helpers::compute_work_costs(schedule); - auto sync_cost = static_cast>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0) - * instance.synchronisationCosts(); + auto workCost = cost_helpers::ComputeWorkCosts(schedule); + auto syncCost + = static_cast>(numberOfSupersteps > 1 ? numberOfSupersteps - 1 : 0) * instance.SynchronisationCosts(); - return comm_cost + work_cost + sync_cost; + return commCost + workCost + syncCost; } }; diff --git a/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp b/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp index 27641937..2adb71ad 100644 --- a/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp +++ b/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp @@ -29,43 +29,43 @@ namespace osp { * @struct TotalLambdaCommunicationCost * @brief Implements the total lambda communication cost model. */ -template +template struct TotalLambdaCommunicationCost { - using cost_type = double; + using CostType = double; - cost_type operator()(const BspSchedule &schedule) const { - const auto &instance = schedule.getInstance(); - const auto &node_to_processor_assignment = schedule.assignedProcessors(); + CostType operator()(const BspSchedule &schedule) const { + const auto &instance = schedule.GetInstance(); + const auto &nodeToProcessorAssignment = schedule.AssignedProcessors(); - v_commw_t comm_costs = 0; - const double comm_multiplier = 1.0 / instance.numberOfProcessors(); + VCommwT commCosts = 0; + const double commMultiplier = 1.0 / instance.NumberOfProcessors(); - for (const auto &v : instance.vertices()) { - if (instance.getComputationalDag().out_degree(v) == 0) { + for (const auto &v : instance.Vertices()) { + if (instance.GetComputationalDag().OutDegree(v) == 0) { continue; } - std::unordered_set target_procs; - for (const auto &target : instance.getComputationalDag().children(v)) { - target_procs.insert(node_to_processor_assignment[target]); + std::unordered_set targetProcs; + for (const auto &target : instance.GetComputationalDag().Children(v)) { + targetProcs.insert(nodeToProcessorAssignment[target]); } - const unsigned source_proc = node_to_processor_assignment[v]; - const auto v_comm_cost = instance.getComputationalDag().vertex_comm_weight(v); + const unsigned sourceProc = nodeToProcessorAssignment[v]; + const auto vCommCost = instance.GetComputationalDag().VertexCommWeight(v); - for (const auto &target_proc : target_procs) { - comm_costs += v_comm_cost * instance.sendCosts(source_proc, target_proc); + for (const auto &targetProc : targetProcs) { + commCosts += vCommCost * instance.SendCosts(sourceProc, targetProc); } } - const unsigned number_of_supersteps = schedule.numberOfSupersteps(); + const unsigned numberOfSupersteps = schedule.NumberOfSupersteps(); - auto comm_cost = comm_costs * comm_multiplier * static_cast(instance.communicationCosts()); - auto work_cost = cost_helpers::compute_work_costs(schedule); - auto sync_cost = static_cast>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0) - * instance.synchronisationCosts(); + auto commCost = commCosts * commMultiplier * static_cast(instance.CommunicationCosts()); + auto workCost = cost_helpers::ComputeWorkCosts(schedule); + auto syncCost + = static_cast>(numberOfSupersteps > 1 ? numberOfSupersteps - 1 : 0) * instance.SynchronisationCosts(); - return comm_cost + static_cast(work_cost) + static_cast(sync_cost); + return commCost + static_cast(workCost) + static_cast(syncCost); } }; diff --git a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp index 1cc65621..3c529b87 100644 --- a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp +++ b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp @@ -30,12 +30,12 @@ namespace osp { * * This class precomputes and stores the list of compatible processors for each node type. * - * @tparam Graph_t The type of the computational DAG. + * @tparam GraphT The type of the computational DAG. */ -template +template class CompatibleProcessorRange { - std::vector> typeProcessorIdx; - const BspInstance *instance = nullptr; + std::vector> typeProcessorIdx_; + const BspInstance *instance_ = nullptr; public: /** @@ -48,23 +48,23 @@ class CompatibleProcessorRange { * * @param inst The BspInstance. */ - CompatibleProcessorRange(const BspInstance &inst) { initialize(inst); } + CompatibleProcessorRange(const BspInstance &inst) { Initialize(inst); } /** * @brief Initializes the CompatibleProcessorRange with a BspInstance. * * @param inst The BspInstance. */ - void initialize(const BspInstance &inst) { - instance = &inst; + void Initialize(const BspInstance &inst) { + instance_ = &inst; - if constexpr (has_typed_vertices_v) { - typeProcessorIdx.resize(inst.getComputationalDag().num_vertex_types()); + if constexpr (hasTypedVerticesV) { + typeProcessorIdx_.resize(inst.GetComputationalDag().NumVertexTypes()); - for (v_type_t v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) { - for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++) { - if (inst.isCompatibleType(v_type, inst.processorType(proc))) { - typeProcessorIdx[v_type].push_back(proc); + for (VTypeT vType = 0; vType < inst.GetComputationalDag().NumVertexTypes(); vType++) { + for (unsigned proc = 0; proc < inst.NumberOfProcessors(); proc++) { + if (inst.IsCompatibleType(vType, inst.ProcessorType(proc))) { + typeProcessorIdx_[vType].push_back(proc); } } } @@ -77,12 +77,12 @@ class CompatibleProcessorRange { * @param type The node type. * @return A const reference to a vector of compatible processor indices. */ - [[nodiscard]] const auto &compatible_processors_type(const v_type_t type) const { - assert(instance != nullptr); - if constexpr (has_typed_vertices_v) { - return typeProcessorIdx[type]; + [[nodiscard]] const auto &CompatibleProcessorsType(const VTypeT type) const { + assert(instance_ != nullptr); + if constexpr (hasTypedVerticesV) { + return typeProcessorIdx_[type]; } else { - return instance->processors(); + return instance_->Processors(); } } @@ -92,9 +92,9 @@ class CompatibleProcessorRange { * @param vertex The vertex index. * @return A const reference to a vector of compatible processor indices. */ - [[nodiscard]] const auto &compatible_processors_vertex(const vertex_idx_t vertex) const { - assert(instance != nullptr); - return compatible_processors_type(instance->getComputationalDag().vertex_type(vertex)); + [[nodiscard]] const auto &CompatibleProcessorsVertex(const VertexIdxT vertex) const { + assert(instance_ != nullptr); + return CompatibleProcessorsType(instance_->GetComputationalDag().VertexType(vertex)); } }; diff --git a/include/osp/bsp/model/util/SetSchedule.hpp b/include/osp/bsp/model/util/SetSchedule.hpp index 2fce50d8..60277343 100644 --- a/include/osp/bsp/model/util/SetSchedule.hpp +++ b/include/osp/bsp/model/util/SetSchedule.hpp @@ -37,152 +37,148 @@ namespace osp { * * @note This class assumes that the `BspInstance` and `ICommunicationScheduler` classes are defined and accessible. */ -template -class SetSchedule : public IBspSchedule { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); +template +class SetSchedule : public IBspSchedule { + static_assert(isComputationalDagV, "BspSchedule can only be used with computational DAGs."); private: - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; - const BspInstance *instance; + const BspInstance *instance_; public: - unsigned number_of_supersteps; + unsigned numberOfSupersteps_; - std::vector>> step_processor_vertices; + std::vector>> stepProcessorVertices_; SetSchedule() = default; - SetSchedule(const BspInstance &inst, unsigned num_supersteps) - : instance(&inst), number_of_supersteps(num_supersteps) { - step_processor_vertices = std::vector>>( - num_supersteps, std::vector>(inst.numberOfProcessors())); + SetSchedule(const BspInstance &inst, unsigned numSupersteps) : instance_(&inst), numberOfSupersteps_(numSupersteps) { + stepProcessorVertices_ = std::vector>>( + numSupersteps, std::vector>(inst.NumberOfProcessors())); } - SetSchedule(const IBspSchedule &schedule) - : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()) { - step_processor_vertices = std::vector>>( - schedule.numberOfSupersteps(), - std::vector>(schedule.getInstance().numberOfProcessors())); + SetSchedule(const IBspSchedule &schedule) + : instance_(&schedule.GetInstance()), numberOfSupersteps_(schedule.NumberOfSupersteps()) { + stepProcessorVertices_ = std::vector>>( + schedule.NumberOfSupersteps(), std::vector>(schedule.GetInstance().NumberOfProcessors())); - for (const auto v : schedule.getInstance().vertices()) { - step_processor_vertices[schedule.assignedSuperstep(v)][schedule.assignedProcessor(v)].insert(v); + for (const auto v : schedule.GetInstance().Vertices()) { + stepProcessorVertices_[schedule.AssignedSuperstep(v)][schedule.AssignedProcessor(v)].insert(v); } } virtual ~SetSchedule() = default; - void clear() { - step_processor_vertices.clear(); - number_of_supersteps = 0; + void Clear() { + stepProcessorVertices_.clear(); + numberOfSupersteps_ = 0; } - const BspInstance &getInstance() const override { return *instance; } + const BspInstance &GetInstance() const override { return *instance_; } - unsigned numberOfSupersteps() const override { return number_of_supersteps; } + unsigned NumberOfSupersteps() const override { return numberOfSupersteps_; } - void setAssignedSuperstep(vertex_idx node, unsigned superstep) override { - unsigned assigned_processor = 0; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (unsigned step = 0; step < number_of_supersteps; step++) { - if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) { - assigned_processor = proc; - step_processor_vertices[step][proc].erase(node); + void SetAssignedSuperstep(VertexIdx node, unsigned superstep) override { + unsigned assignedProcessor = 0; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + if (stepProcessorVertices_[step][proc].find(node) != stepProcessorVertices_[step][proc].end()) { + assignedProcessor = proc; + stepProcessorVertices_[step][proc].erase(node); } } } - step_processor_vertices[superstep][assigned_processor].insert(node); + stepProcessorVertices_[superstep][assignedProcessor].insert(node); } - void setAssignedProcessor(vertex_idx node, unsigned processor) override { - unsigned assigned_step = 0; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (unsigned step = 0; step < number_of_supersteps; step++) { - if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) { - assigned_step = step; - step_processor_vertices[step][proc].erase(node); + void SetAssignedProcessor(VertexIdx node, unsigned processor) override { + unsigned assignedStep = 0; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + if (stepProcessorVertices_[step][proc].find(node) != stepProcessorVertices_[step][proc].end()) { + assignedStep = step; + stepProcessorVertices_[step][proc].erase(node); } } } - step_processor_vertices[assigned_step][processor].insert(node); + stepProcessorVertices_[assignedStep][processor].insert(node); } /// @brief returns number of supersteps if the node is not assigned /// @param node /// @return the assigned superstep - unsigned assignedSuperstep(vertex_idx node) const override { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (unsigned step = 0; step < number_of_supersteps; step++) { - if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) { + unsigned AssignedSuperstep(VertexIdx node) const override { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + if (stepProcessorVertices_[step][proc].find(node) != stepProcessorVertices_[step][proc].end()) { return step; } } } - return number_of_supersteps; + return numberOfSupersteps_; } /// @brief returns number of processors if node is not assigned /// @param node /// @return the assigned processor - unsigned assignedProcessor(vertex_idx node) const override { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (unsigned step = 0; step < number_of_supersteps; step++) { - if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) { + unsigned AssignedProcessor(VertexIdx node) const override { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (unsigned step = 0; step < numberOfSupersteps_; step++) { + if (stepProcessorVertices_[step][proc].find(node) != stepProcessorVertices_[step][proc].end()) { return proc; } } } - return instance->numberOfProcessors(); + return instance_->NumberOfProcessors(); } - void mergeSupersteps(unsigned start_step, unsigned end_step) { - unsigned step = start_step + 1; - for (; step <= end_step; step++) { - for (unsigned proc = 0; proc < getInstance().numberOfProcessors(); proc++) { - step_processor_vertices[start_step][proc].merge(step_processor_vertices[step][proc]); + void MergeSupersteps(unsigned startStep, unsigned endStep) { + unsigned step = startStep + 1; + for (; step <= endStep; step++) { + for (unsigned proc = 0; proc < GetInstance().NumberOfProcessors(); proc++) { + stepProcessorVertices_[startStep][proc].merge(stepProcessorVertices_[step][proc]); } } - for (; step < number_of_supersteps; step++) { - for (unsigned proc = 0; proc < getInstance().numberOfProcessors(); proc++) { - step_processor_vertices[step - (end_step - start_step)][proc] = std::move(step_processor_vertices[step][proc]); + for (; step < numberOfSupersteps_; step++) { + for (unsigned proc = 0; proc < GetInstance().NumberOfProcessors(); proc++) { + stepProcessorVertices_[step - (endStep - startStep)][proc] = std::move(stepProcessorVertices_[step][proc]); } } } }; -template -static void printSetScheduleWorkMemNodesGrid(std::ostream &os, - const SetSchedule &set_schedule, - bool print_detailed_node_assignment = false) { - const auto &instance = set_schedule.getInstance(); - const unsigned num_processors = instance.numberOfProcessors(); - const unsigned num_supersteps = set_schedule.numberOfSupersteps(); +template +static void PrintSetScheduleWorkMemNodesGrid(std::ostream &os, + const SetSchedule &setSchedule, + bool printDetailedNodeAssignment = false) { + const auto &instance = setSchedule.GetInstance(); + const unsigned numProcessors = instance.NumberOfProcessors(); + const unsigned numSupersteps = setSchedule.NumberOfSupersteps(); // Data structures to store aggregated work, memory, and nodes - std::vector>> total_work_per_cell(num_processors, - std::vector>(num_supersteps, 0.0)); - std::vector>> total_memory_per_cell(num_processors, - std::vector>(num_supersteps, 0.0)); - std::vector>>> nodes_per_cell( - num_processors, std::vector>>(num_supersteps)); + std::vector>> totalWorkPerCell(numProcessors, std::vector>(numSupersteps, 0.0)); + std::vector>> totalMemoryPerCell(numProcessors, std::vector>(numSupersteps, 0.0)); + std::vector>>> nodesPerCell( + numProcessors, std::vector>>(numSupersteps)); // Aggregate work, memory, and collect nodes // Loop order (p, s) matches total_work_per_cell[p][s] and nodes_per_cell[p][s] - for (unsigned p = 0; p < num_processors; ++p) { - for (unsigned s = 0; s < num_supersteps; ++s) { + for (unsigned p = 0; p < numProcessors; ++p) { + for (unsigned s = 0; s < numSupersteps; ++s) { // Access set_schedule.step_processor_vertices[s][p] as per the provided snippet. // Add checks for bounds as set_schedule.step_processor_vertices might not be fully initialized // for all s, p combinations if it's dynamically sized. - if (s < set_schedule.step_processor_vertices.size() && p < set_schedule.step_processor_vertices[s].size()) { - for (const auto &node_idx : set_schedule.step_processor_vertices[s][p]) { - total_work_per_cell[p][s] += instance.getComputationalDag().vertex_work_weight(node_idx); - total_memory_per_cell[p][s] += instance.getComputationalDag().vertex_mem_weight(node_idx); - nodes_per_cell[p][s].push_back(node_idx); + if (s < setSchedule.step_processor_vertices.size() && p < setSchedule.step_processor_vertices[s].size()) { + for (const auto &nodeIdx : setSchedule.step_processor_vertices[s][p]) { + totalWorkPerCell[p][s] += instance.GetComputationalDag().VertexWorkWeight(nodeIdx); + totalMemoryPerCell[p][s] += instance.GetComputationalDag().VertexMemWeight(nodeIdx); + nodesPerCell[p][s].push_back(nodeIdx); } } } @@ -190,44 +186,44 @@ static void printSetScheduleWorkMemNodesGrid(std::ostream &os, // Determine cell width for formatting // Accommodates "W:XXXXX M:XXXXX N:XXXXX" (max 5 digits for each) - const int cell_width = 25; + const int cellWidth = 25; // Print header row (Supersteps) - os << std::left << std::setw(cell_width) << "P\\SS"; - for (unsigned s = 0; s < num_supersteps; ++s) { - os << std::setw(cell_width) << ("SS " + std::to_string(s)); + os << std::left << std::setw(cellWidth) << "P\\SS"; + for (unsigned s = 0; s < numSupersteps; ++s) { + os << std::setw(cellWidth) << ("SS " + std::to_string(s)); } os << "\n"; // Print separator line - os << std::string(cell_width * (num_supersteps + 1), '-') << "\n"; + os << std::string(cellWidth * (numSupersteps + 1), '-') << "\n"; // Print data rows (Processors) - for (unsigned p = 0; p < num_processors; ++p) { - os << std::left << std::setw(cell_width) << ("P " + std::to_string(p)); - for (unsigned s = 0; s < num_supersteps; ++s) { - std::stringstream cell_content; - cell_content << "W:" << std::fixed << std::setprecision(0) << total_work_per_cell[p][s] << " M:" << std::fixed - << std::setprecision(0) << total_memory_per_cell[p][s] - << " N:" << nodes_per_cell[p][s].size(); // Add node count - os << std::left << std::setw(cell_width) << cell_content.str(); + for (unsigned p = 0; p < numProcessors; ++p) { + os << std::left << std::setw(cellWidth) << ("P " + std::to_string(p)); + for (unsigned s = 0; s < numSupersteps; ++s) { + std::stringstream cellContent; + cellContent << "W:" << std::fixed << std::setprecision(0) << totalWorkPerCell[p][s] << " M:" << std::fixed + << std::setprecision(0) << totalMemoryPerCell[p][s] + << " N:" << nodesPerCell[p][s].size(); // Add node count + os << std::left << std::setw(cellWidth) << cellContent.str(); } os << "\n"; } - if (print_detailed_node_assignment) { + if (printDetailedNodeAssignment) { os << "\n"; // Add a newline for separation between grid and detailed list // Print detailed node lists below the grid os << "Detailed Node Assignments:\n"; os << std::string(30, '=') << "\n"; // Separator - for (unsigned p = 0; p < num_processors; ++p) { - for (unsigned s = 0; s < num_supersteps; ++s) { - if (!nodes_per_cell[p][s].empty()) { + for (unsigned p = 0; p < numProcessors; ++p) { + for (unsigned s = 0; s < numSupersteps; ++s) { + if (!nodesPerCell[p][s].empty()) { os << "P" << p << " SS" << s << " Nodes: ["; - for (size_t i = 0; i < nodes_per_cell[p][s].size(); ++i) { - os << nodes_per_cell[p][s][i]; - if (i < nodes_per_cell[p][s].size() - 1) { + for (size_t i = 0; i < nodesPerCell[p][s].size(); ++i) { + os << nodesPerCell[p][s][i]; + if (i < nodesPerCell[p][s].size() - 1) { os << ", "; } } diff --git a/include/osp/bsp/model/util/VectorSchedule.hpp b/include/osp/bsp/model/util/VectorSchedule.hpp index 3c0ae212..0c0f1136 100644 --- a/include/osp/bsp/model/util/VectorSchedule.hpp +++ b/include/osp/bsp/model/util/VectorSchedule.hpp @@ -25,58 +25,58 @@ limitations under the License. namespace osp { -template -class VectorSchedule : public IBspSchedule { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); +template +class VectorSchedule : public IBspSchedule { + static_assert(isComputationalDagV, "BspSchedule can only be used with computational DAGs."); private: - const BspInstance *instance; + const BspInstance *instance_; public: - unsigned int number_of_supersteps; + unsigned int numberOfSupersteps_; - std::vector node_to_processor_assignment; - std::vector node_to_superstep_assignment; + std::vector nodeToProcessorAssignment_; + std::vector nodeToSuperstepAssignment_; /** * @brief Default constructor for VectorSchedule. */ - VectorSchedule() : instance(nullptr), number_of_supersteps(0) {} + VectorSchedule() : instance_(nullptr), numberOfSupersteps_(0) {} - VectorSchedule(const BspInstance &inst) : instance(&inst), number_of_supersteps(0) { - node_to_processor_assignment = std::vector(inst.numberOfVertices(), instance->numberOfProcessors()); - node_to_superstep_assignment = std::vector(inst.numberOfVertices(), 0); + VectorSchedule(const BspInstance &inst) : instance_(&inst), numberOfSupersteps_(0) { + nodeToProcessorAssignment_ = std::vector(inst.NumberOfVertices(), instance_->NumberOfProcessors()); + nodeToSuperstepAssignment_ = std::vector(inst.NumberOfVertices(), 0); } - VectorSchedule(const IBspSchedule &schedule) - : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()) { - node_to_processor_assignment - = std::vector(schedule.getInstance().numberOfVertices(), instance->numberOfProcessors()); - node_to_superstep_assignment - = std::vector(schedule.getInstance().numberOfVertices(), schedule.numberOfSupersteps()); + VectorSchedule(const IBspSchedule &schedule) + : instance_(&schedule.GetInstance()), numberOfSupersteps_(schedule.NumberOfSupersteps()) { + nodeToProcessorAssignment_ + = std::vector(schedule.GetInstance().NumberOfVertices(), instance_->NumberOfProcessors()); + nodeToSuperstepAssignment_ + = std::vector(schedule.GetInstance().NumberOfVertices(), schedule.NumberOfSupersteps()); - for (vertex_idx_t i = 0; i < schedule.getInstance().numberOfVertices(); i++) { - node_to_processor_assignment[i] = schedule.assignedProcessor(i); - node_to_superstep_assignment[i] = schedule.assignedSuperstep(i); + for (VertexIdxT i = 0; i < schedule.GetInstance().NumberOfVertices(); i++) { + nodeToProcessorAssignment_[i] = schedule.AssignedProcessor(i); + nodeToSuperstepAssignment_[i] = schedule.AssignedSuperstep(i); } } VectorSchedule(const VectorSchedule &other) - : instance(other.instance), - number_of_supersteps(other.number_of_supersteps), - node_to_processor_assignment(other.node_to_processor_assignment), - node_to_superstep_assignment(other.node_to_superstep_assignment) {} + : instance_(other.instance_), + numberOfSupersteps_(other.numberOfSupersteps_), + nodeToProcessorAssignment_(other.nodeToProcessorAssignment_), + nodeToSuperstepAssignment_(other.nodeToSuperstepAssignment_) {} - VectorSchedule &operator=(const IBspSchedule &other) { + VectorSchedule &operator=(const IBspSchedule &other) { if (this != &other) { - instance = &other.getInstance(); - number_of_supersteps = other.numberOfSupersteps(); - node_to_processor_assignment = std::vector(instance->numberOfVertices(), instance->numberOfProcessors()); - node_to_superstep_assignment = std::vector(instance->numberOfVertices(), number_of_supersteps); - - for (vertex_idx_t i = 0; i < instance->numberOfVertices(); i++) { - node_to_processor_assignment[i] = other.assignedProcessor(i); - node_to_superstep_assignment[i] = other.assignedSuperstep(i); + instance_ = &other.GetInstance(); + numberOfSupersteps_ = other.NumberOfSupersteps(); + nodeToProcessorAssignment_ = std::vector(instance_->NumberOfVertices(), instance_->NumberOfProcessors()); + nodeToSuperstepAssignment_ = std::vector(instance_->NumberOfVertices(), numberOfSupersteps_); + + for (VertexIdxT i = 0; i < instance_->NumberOfVertices(); i++) { + nodeToProcessorAssignment_[i] = other.AssignedProcessor(i); + nodeToSuperstepAssignment_[i] = other.AssignedSuperstep(i); } } return *this; @@ -84,66 +84,66 @@ class VectorSchedule : public IBspSchedule { VectorSchedule &operator=(const VectorSchedule &other) { if (this != &other) { - instance = other.instance; - number_of_supersteps = other.number_of_supersteps; - node_to_processor_assignment = other.node_to_processor_assignment; - node_to_superstep_assignment = other.node_to_superstep_assignment; + instance_ = other.instance_; + numberOfSupersteps_ = other.numberOfSupersteps_; + nodeToProcessorAssignment_ = other.nodeToProcessorAssignment_; + nodeToSuperstepAssignment_ = other.nodeToSuperstepAssignment_; } return *this; } VectorSchedule(VectorSchedule &&other) noexcept - : instance(other.instance), - number_of_supersteps(other.number_of_supersteps), - node_to_processor_assignment(std::move(other.node_to_processor_assignment)), - node_to_superstep_assignment(std::move(other.node_to_superstep_assignment)) {} + : instance_(other.instance_), + numberOfSupersteps_(other.numberOfSupersteps_), + nodeToProcessorAssignment_(std::move(other.nodeToProcessorAssignment_)), + nodeToSuperstepAssignment_(std::move(other.nodeToSuperstepAssignment_)) {} virtual ~VectorSchedule() = default; - void clear() { - node_to_processor_assignment.clear(); - node_to_superstep_assignment.clear(); - number_of_supersteps = 0; + void Clear() { + nodeToProcessorAssignment_.clear(); + nodeToSuperstepAssignment_.clear(); + numberOfSupersteps_ = 0; } - const BspInstance &getInstance() const override { return *instance; } + const BspInstance &GetInstance() const override { return *instance_; } - void setAssignedSuperstep(vertex_idx_t vertex, unsigned superstep) override { - node_to_superstep_assignment[vertex] = superstep; + void SetAssignedSuperstep(VertexIdxT vertex, unsigned superstep) override { + nodeToSuperstepAssignment_[vertex] = superstep; }; - void setAssignedProcessor(vertex_idx_t vertex, unsigned processor) override { - node_to_processor_assignment[vertex] = processor; + void SetAssignedProcessor(VertexIdxT vertex, unsigned processor) override { + nodeToProcessorAssignment_[vertex] = processor; }; - unsigned numberOfSupersteps() const override { return number_of_supersteps; } + unsigned NumberOfSupersteps() const override { return numberOfSupersteps_; } - unsigned assignedSuperstep(vertex_idx_t vertex) const override { return node_to_superstep_assignment[vertex]; } + unsigned AssignedSuperstep(VertexIdxT vertex) const override { return nodeToSuperstepAssignment_[vertex]; } - unsigned assignedProcessor(vertex_idx_t vertex) const override { return node_to_processor_assignment[vertex]; } + unsigned AssignedProcessor(VertexIdxT vertex) const override { return nodeToProcessorAssignment_[vertex]; } - void mergeSupersteps(unsigned start_step, unsigned end_step) { - number_of_supersteps = 0; + void MergeSupersteps(unsigned startStep, unsigned endStep) { + numberOfSupersteps_ = 0; - for (const auto &vertex : getInstance().vertices()) { - if (node_to_superstep_assignment[vertex] > start_step && node_to_superstep_assignment[vertex] <= end_step) { - node_to_superstep_assignment[vertex] = start_step; - } else if (node_to_superstep_assignment[vertex] > end_step) { - node_to_superstep_assignment[vertex] -= end_step - start_step; + for (const auto &vertex : GetInstance().Vertices()) { + if (nodeToSuperstepAssignment_[vertex] > startStep && nodeToSuperstepAssignment_[vertex] <= endStep) { + nodeToSuperstepAssignment_[vertex] = startStep; + } else if (nodeToSuperstepAssignment_[vertex] > endStep) { + nodeToSuperstepAssignment_[vertex] -= endStep - startStep; } - if (node_to_superstep_assignment[vertex] >= number_of_supersteps) { - number_of_supersteps = node_to_superstep_assignment[vertex] + 1; + if (nodeToSuperstepAssignment_[vertex] >= numberOfSupersteps_) { + numberOfSupersteps_ = nodeToSuperstepAssignment_[vertex] + 1; } } } - void insertSupersteps(const unsigned step_before, const unsigned num_new_steps) { - number_of_supersteps += num_new_steps; + void InsertSupersteps(const unsigned stepBefore, const unsigned numNewSteps) { + numberOfSupersteps_ += numNewSteps; - for (const auto &vertex : getInstance().vertices()) { - if (node_to_superstep_assignment[vertex] > step_before) { - node_to_superstep_assignment[vertex] += num_new_steps; + for (const auto &vertex : GetInstance().Vertices()) { + if (nodeToSuperstepAssignment_[vertex] > stepBefore) { + nodeToSuperstepAssignment_[vertex] += numNewSteps; } } } diff --git a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp index ec98b977..fde65b04 100644 --- a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp +++ b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp @@ -24,47 +24,47 @@ limitations under the License. namespace osp { -template -class CoarseAndSchedule : public Scheduler { +template +class CoarseAndSchedule : public Scheduler { private: - Coarser &coarser; - Scheduler &scheduler; + Coarser &coarser_; + Scheduler &scheduler_; public: - CoarseAndSchedule(Coarser &coarser_, Scheduler &scheduler_) - : coarser(coarser_), scheduler(scheduler_) {} + CoarseAndSchedule(Coarser &coarser, Scheduler &scheduler) + : coarser_(coarser), scheduler_(scheduler) {} - std::string getScheduleName() const override { - return "Coarse(" + coarser.getCoarserName() + ")AndSchedule(" + scheduler.getScheduleName() + ")"; + std::string GetScheduleName() const override { + return "Coarse(" + coarser_.GetCoarserName() + ")AndSchedule(" + scheduler_.GetScheduleName() + ")"; } - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); - BspInstance instance_coarse; + BspInstance instanceCoarse; - std::vector> reverse_vertex_map; + std::vector> reverseVertexMap; - bool status = coarser.coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), reverse_vertex_map); + bool status = coarser_.CoarsenDag(instance.GetComputationalDag(), instanceCoarse.GetComputationalDag(), reverseVertexMap); if (!status) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } - instance_coarse.getArchitecture() = instance.getArchitecture(); - instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); + instanceCoarse.GetArchitecture() = instance.GetArchitecture(); + instanceCoarse.SetNodeProcessorCompatibility(instance.GetProcessorCompatibilityMatrix()); - BspSchedule schedule_coarse(instance_coarse); + BspSchedule scheduleCoarse(instanceCoarse); - const auto status_coarse = scheduler.computeSchedule(schedule_coarse); + const auto statusCoarse = scheduler_.ComputeSchedule(scheduleCoarse); - if (status_coarse != RETURN_STATUS::OSP_SUCCESS and status_coarse != RETURN_STATUS::BEST_FOUND) { - return status_coarse; + if (statusCoarse != ReturnStatus::OSP_SUCCESS and statusCoarse != ReturnStatus::BEST_FOUND) { + return statusCoarse; } - coarser_util::pull_back_schedule(schedule_coarse, reverse_vertex_map, schedule); + coarser_util::PullBackSchedule(scheduleCoarse, reverseVertexMap, schedule); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } }; diff --git a/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp b/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp index e722989b..2b867ecd 100644 --- a/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp +++ b/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp @@ -23,198 +23,196 @@ limitations under the License. namespace osp { -template -class MultiLevelHillClimbingScheduler : public Scheduler { - using vertex_idx = vertex_idx_t; +template +class MultiLevelHillClimbingScheduler : public Scheduler { + using VertexIdx = VertexIdxT; - using vertex_type_t_or_default - = std::conditional_t, v_type_t, unsigned>; - using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; + using VertexTypeTOrDefault = std::conditional_t, VTypeT, unsigned>; + using EdgeCommwTOrDefault = std::conditional_t, ECommwT, VCommwT>; private: - typename StepByStepCoarser::COARSENING_STRATEGY coarsening_strategy - = StepByStepCoarser::COARSENING_STRATEGY::EDGE_BY_EDGE; - unsigned number_hc_steps; - unsigned target_nr_of_nodes = 0; - unsigned min_target_nr_of_nodes_ = 1U; - double contraction_rate_ = 0.5; - - unsigned linear_refinement_step_size_ = 20; - bool use_linear_refinement_ = true; - - double exponential_refinement_step_ratio_ = 1.1; - bool use_exponential_refinement_ = false; - - std::deque refinement_points; - - BspSchedule Refine(const BspInstance &instance, - const StepByStepCoarser &coarser, - const BspSchedule &coarse_schedule) const; - - BspSchedule ComputeUncontractedSchedule(const StepByStepCoarser &coarser, - const BspInstance &full_instance, - const BspSchedule &coarse_schedule, - vertex_idx index_until) const; - - void setLinearRefinementPoints(vertex_idx OriginalNrOfNodes, unsigned stepSize); - void setExponentialRefinementPoints(vertex_idx OriginalNrOfNodes, double stepRatio); - - void set_parameter(const size_t num_vertices) { - target_nr_of_nodes - = std::max(min_target_nr_of_nodes_, static_cast(static_cast(num_vertices) * contraction_rate_)); - target_nr_of_nodes = std::min(target_nr_of_nodes, static_cast(num_vertices)); - - if (use_linear_refinement_) { - setLinearRefinementPoints(num_vertices, linear_refinement_step_size_); - } else if (use_exponential_refinement_) { - setExponentialRefinementPoints(num_vertices, exponential_refinement_step_ratio_); + typename StepByStepCoarser::CoarseningStrategy coarseningStrategy_ + = StepByStepCoarser::CoarseningStrategy::EDGE_BY_EDGE; + unsigned numberHcSteps_; + unsigned targetNrOfNodes_ = 0; + unsigned minTargetNrOfNodes_ = 1U; + double contractionRate_ = 0.5; + + unsigned linearRefinementStepSize_ = 20; + bool useLinearRefinement_ = true; + + double exponentialRefinementStepRatio_ = 1.1; + bool useExponentialRefinement_ = false; + + std::deque refinementPoints_; + + BspSchedule Refine(const BspInstance &instance, + const StepByStepCoarser &coarser, + const BspSchedule &coarseSchedule) const; + + BspSchedule ComputeUncontractedSchedule(const StepByStepCoarser &coarser, + const BspInstance &fullInstance, + const BspSchedule &coarseSchedule, + VertexIdx indexUntil) const; + + void SetLinearRefinementPoints(VertexIdx originalNrOfNodes, unsigned stepSize); + void SetExponentialRefinementPoints(VertexIdx originalNrOfNodes, double stepRatio); + + void SetParameter(const size_t numVertices) { + targetNrOfNodes_ = std::max(minTargetNrOfNodes_, static_cast(static_cast(numVertices) * contractionRate_)); + targetNrOfNodes_ = std::min(targetNrOfNodes_, static_cast(numVertices)); + + if (useLinearRefinement_) { + SetLinearRefinementPoints(numVertices, linearRefinementStepSize_); + } else if (useExponentialRefinement_) { + SetExponentialRefinementPoints(numVertices, exponentialRefinementStepRatio_); } } public: virtual ~MultiLevelHillClimbingScheduler() = default; - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override; + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override; - virtual std::string getScheduleName() const override { return "MultiLevelHillClimbing"; } + virtual std::string GetScheduleName() const override { return "MultiLevelHillClimbing"; } - void setCoarseningStrategy(typename StepByStepCoarser::COARSENING_STRATEGY strategy_) { - coarsening_strategy = strategy_; + void SetCoarseningStrategy(typename StepByStepCoarser::CoarseningStrategy strategy) { + coarseningStrategy_ = strategy; } - void setContractionRate(double rate_) { contraction_rate_ = rate_; } + void SetContractionRate(double rate) { contractionRate_ = rate; } - void setNumberOfHcSteps(unsigned steps_) { number_hc_steps = steps_; } + void SetNumberOfHcSteps(unsigned steps) { numberHcSteps_ = steps; } - void setMinTargetNrOfNodes(unsigned min_target_nr_of_nodes) { min_target_nr_of_nodes_ = min_target_nr_of_nodes; } + void SetMinTargetNrOfNodes(unsigned minTargetNrOfNodes) { minTargetNrOfNodes_ = minTargetNrOfNodes; } - void useLinearRefinementSteps(unsigned steps) { - use_linear_refinement_ = true; - use_exponential_refinement_ = false; - linear_refinement_step_size_ = steps; + void UseLinearRefinementSteps(unsigned steps) { + useLinearRefinement_ = true; + useExponentialRefinement_ = false; + linearRefinementStepSize_ = steps; } - void useExponentialRefinementPoints(double ratio) { - use_exponential_refinement_ = true; - use_linear_refinement_ = false; - exponential_refinement_step_ratio_ = ratio; + void UseExponentialRefinementPoints(double ratio) { + useExponentialRefinement_ = true; + useLinearRefinement_ = false; + exponentialRefinementStepRatio_ = ratio; } }; -template -RETURN_STATUS MultiLevelHillClimbingScheduler::computeSchedule(BspSchedule &schedule) { - StepByStepCoarser coarser; - Graph_t coarseDAG; - std::vector new_vertex_id; +template +ReturnStatus MultiLevelHillClimbingScheduler::ComputeSchedule(BspSchedule &schedule) { + StepByStepCoarser coarser; + GraphT coarseDAG; + std::vector newVertexId; - const auto num_verices = schedule.getInstance().numberOfVertices(); - set_parameter(num_verices); + const auto numVertices = schedule.GetInstance().NumberOfVertices(); + SetParameter(numVertices); - coarser.coarsenDag(schedule.getInstance().getComputationalDag(), coarseDAG, new_vertex_id); + newVertexId = coarser.GenerateVertexContractionMap(schedule.GetInstance().GetComputationalDag()); + coarseDAG = coarser.Contract(newVertexId); - BspInstance coarse_instance(coarseDAG, schedule.getInstance().getArchitecture()); + BspInstance coarseInstance(coarseDAG, schedule.GetInstance().GetArchitecture()); - GreedyBspScheduler greedy; - BspSchedule coarse_schedule(coarse_instance); - greedy.computeSchedule(coarse_schedule); + GreedyBspScheduler greedy; + BspSchedule coarseSchedule(coarseInstance); + greedy.ComputeSchedule(coarseSchedule); - HillClimbingScheduler coarse_hc; - coarse_hc.improveSchedule(coarse_schedule); + HillClimbingScheduler coarseHc; + coarseHc.ImproveSchedule(coarseSchedule); - if (refinement_points.empty()) { - setExponentialRefinementPoints(num_verices, 1.1); + if (refinementPoints_.empty()) { + SetExponentialRefinementPoints(numVertices, 1.1); } - while (!refinement_points.empty() && refinement_points.front() <= coarseDAG.num_vertices()) { - refinement_points.pop_front(); + while (!refinementPoints_.empty() && refinementPoints_.front() <= coarseDAG.NumVertices()) { + refinementPoints_.pop_front(); } - schedule = Refine(schedule.getInstance(), coarser, coarse_schedule); + schedule = Refine(schedule.GetInstance(), coarser, coarseSchedule); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } // run refinement: uncoarsify the DAG in small batches, and apply some steps of hill climbing after each iteration -template -BspSchedule MultiLevelHillClimbingScheduler::Refine(const BspInstance &full_instance, - const StepByStepCoarser &coarser, - const BspSchedule &coarse_schedule) const { - BspSchedule schedule_on_full_graph - = ComputeUncontractedSchedule(coarser, full_instance, coarse_schedule, coarser.getContractionHistory().size()); +template +BspSchedule MultiLevelHillClimbingScheduler::Refine(const BspInstance &fullInstance, + const StepByStepCoarser &coarser, + const BspSchedule &coarseSchedule) const { + BspSchedule scheduleOnFullGraph + = ComputeUncontractedSchedule(coarser, fullInstance, coarseSchedule, coarser.GetContractionHistory().size()); - for (vertex_idx next_size : refinement_points) { - const vertex_idx contract_steps = coarser.getOriginalDag().num_vertices() - next_size; - std::vector new_ids = coarser.GetIntermediateIDs(contract_steps); - Graph_t dag = coarser.Contract(new_ids); + for (VertexIdx nextSize : refinementPoints_) { + const VertexIdx contractSteps = coarser.GetOriginalDag().NumVertices() - nextSize; + std::vector newIds = coarser.GetIntermediateIDs(contractSteps); + GraphT dag = coarser.Contract(newIds); - BspInstance instance(dag, full_instance.getArchitecture()); - BspSchedule schedule(instance); + BspInstance instance(dag, fullInstance.GetArchitecture()); + BspSchedule schedule(instance); // Project full schedule to current graph - for (vertex_idx node = 0; node < full_instance.numberOfVertices(); ++node) { - schedule.setAssignedProcessor(new_ids[node], schedule_on_full_graph.assignedProcessor(node)); - schedule.setAssignedSuperstep(new_ids[node], schedule_on_full_graph.assignedSuperstep(node)); + for (VertexIdx node = 0; node < fullInstance.NumberOfVertices(); ++node) { + schedule.SetAssignedProcessor(newIds[node], scheduleOnFullGraph.AssignedProcessor(node)); + schedule.SetAssignedSuperstep(newIds[node], scheduleOnFullGraph.AssignedSuperstep(node)); } - HillClimbingScheduler hc; - hc.improveScheduleWithStepLimit(schedule, number_hc_steps); + HillClimbingScheduler hc; + hc.ImproveScheduleWithStepLimit(schedule, numberHcSteps_); - schedule_on_full_graph = ComputeUncontractedSchedule(coarser, full_instance, schedule, contract_steps); + scheduleOnFullGraph = ComputeUncontractedSchedule(coarser, fullInstance, schedule, contractSteps); } - std::cout << "Refined cost: " << schedule_on_full_graph.computeCosts() << std::endl; - return schedule_on_full_graph; + std::cout << "Refined cost: " << scheduleOnFullGraph.ComputeCosts() << std::endl; + return scheduleOnFullGraph; } // given an original DAG G, a schedule on the coarsified G and the contraction steps, project the coarse schedule to the entire G -template -BspSchedule MultiLevelHillClimbingScheduler::ComputeUncontractedSchedule( - const StepByStepCoarser &coarser, - const BspInstance &full_instance, - const BspSchedule &coarse_schedule, - vertex_idx index_until) const { - std::vector new_ids = coarser.GetIntermediateIDs(index_until); - - BspSchedule schedule(full_instance); - - for (vertex_idx node = 0; node < full_instance.numberOfVertices(); ++node) { - schedule.setAssignedProcessor(node, coarse_schedule.assignedProcessor(new_ids[node])); - schedule.setAssignedSuperstep(node, coarse_schedule.assignedSuperstep(new_ids[node])); +template +BspSchedule MultiLevelHillClimbingScheduler::ComputeUncontractedSchedule(const StepByStepCoarser &coarser, + const BspInstance &fullInstance, + const BspSchedule &coarseSchedule, + VertexIdx indexUntil) const { + std::vector newIds = coarser.GetIntermediateIDs(indexUntil); + + BspSchedule schedule(fullInstance); + + for (VertexIdx node = 0; node < fullInstance.NumberOfVertices(); ++node) { + schedule.SetAssignedProcessor(node, coarseSchedule.AssignedProcessor(newIds[node])); + schedule.SetAssignedSuperstep(node, coarseSchedule.AssignedSuperstep(newIds[node])); } return schedule; } -template -void MultiLevelHillClimbingScheduler::setLinearRefinementPoints(vertex_idx OriginalNrOfNodes, unsigned stepSize) { - refinement_points.clear(); +template +void MultiLevelHillClimbingScheduler::SetLinearRefinementPoints(VertexIdx originalNrOfNodes, unsigned stepSize) { + refinementPoints_.clear(); if (stepSize < 5) { stepSize = 5; } - for (vertex_idx nextN = target_nr_of_nodes + stepSize; nextN < OriginalNrOfNodes; nextN += stepSize) { - refinement_points.push_back(nextN); + for (VertexIdx nextN = targetNrOfNodes_ + stepSize; nextN < originalNrOfNodes; nextN += stepSize) { + refinementPoints_.push_back(nextN); } - if (!refinement_points.empty()) { - refinement_points.pop_back(); + if (!refinementPoints_.empty()) { + refinementPoints_.pop_back(); } - refinement_points.push_back(OriginalNrOfNodes); + refinementPoints_.push_back(originalNrOfNodes); } -template -void MultiLevelHillClimbingScheduler::setExponentialRefinementPoints(vertex_idx OriginalNrOfNodes, double stepRatio) { - refinement_points.clear(); +template +void MultiLevelHillClimbingScheduler::SetExponentialRefinementPoints(VertexIdx originalNrOfNodes, double stepRatio) { + refinementPoints_.clear(); if (stepRatio < 1.01) { stepRatio = 1.01; } - for (vertex_idx nextN = std::max(static_cast(std::round(target_nr_of_nodes * stepRatio)), target_nr_of_nodes + 5); - nextN < OriginalNrOfNodes; + for (VertexIdx nextN = std::max(static_cast(std::round(targetNrOfNodes_ * stepRatio)), targetNrOfNodes_ + 5); + nextN < originalNrOfNodes; nextN - = std::max(static_cast(std::round(static_cast(nextN) * stepRatio)), refinement_points.back() + 5)) { - refinement_points.push_back(nextN); + = std::max(static_cast(std::round(static_cast(nextN) * stepRatio)), refinementPoints_.back() + 5)) { + refinementPoints_.push_back(nextN); } - refinement_points.push_back(OriginalNrOfNodes); + refinementPoints_.push_back(originalNrOfNodes); } } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp b/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp index 08a52aa0..8f78b846 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp @@ -39,25 +39,25 @@ namespace osp { * @brief The GreedyBspLocking class represents a scheduler that uses a greedy algorithm to compute schedules for * BspInstance. * - * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods. - * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. - * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. + * This class inherits from the Scheduler class and implements the ComputeSchedule() and GetScheduleName() methods. + * The ComputeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. + * The GetScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. */ -template -class BspLocking : public Scheduler { - static_assert(is_computational_dag_v, "BspLocking can only be used with computational DAGs."); +template +class BspLocking : public Scheduler { + static_assert(isComputationalDagV, "BspLocking can only be used with computational DAGs."); private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - constexpr static bool use_memory_constraint = is_memory_constraint_v - or is_memory_constraint_schedule_v; + constexpr static bool useMemoryConstraint_ = isMemoryConstraintV + or isMemoryConstraintScheduleV; - static_assert(not use_memory_constraint or std::is_same_v, - "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); + static_assert(not useMemoryConstraint_ or std::is_same_v, + "GraphT must be the same as MemoryConstraintT::GraphImplT."); - MemoryConstraint_t memory_constraint; + MemoryConstraintT memoryConstraint_; using Priority = std::tuple; @@ -75,71 +75,71 @@ class BspLocking : public Scheduler { using MaxHeap = PairingHeap; - std::vector max_proc_score_heap; - std::vector max_all_proc_score_heap; + std::vector maxProcScoreHeap_; + std::vector maxAllProcScoreHeap_; - static std::vector> get_longest_path(const Graph_t &graph) { - std::vector> longest_path(graph.num_vertices(), 0); + static std::vector> GetLongestPath(const GraphT &graph) { + std::vector> longestPath(graph.NumVertices(), 0); - const std::vector top_order = GetTopOrder(graph); + const std::vector topOrder = GetTopOrder(graph); - for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { - longest_path[*r_iter] = graph.vertex_work_weight(*r_iter); - if (graph.out_degree(*r_iter) > 0) { - v_workw_t max = 0; - for (const auto &child : graph.children(*r_iter)) { - if (max <= longest_path[child]) { - max = longest_path[child]; + for (auto rIter = topOrder.rbegin(); rIter != topOrder.crend(); rIter++) { + longestPath[*rIter] = graph.VertexWorkWeight(*rIter); + if (graph.OutDegree(*rIter) > 0) { + VWorkwT max = 0; + for (const auto &child : graph.Children(*rIter)) { + if (max <= longestPath[child]) { + max = longestPath[child]; } } - longest_path[*r_iter] += max; + longestPath[*rIter] += max; } } - return longest_path; + return longestPath; } - std::deque locked_set; - std::vector locked; - int lock_penalty = 1; - std::vector ready_phase; + std::deque lockedSet_; + std::vector locked_; + int lockPenalty_ = 1; + std::vector readyPhase_; - std::vector default_value; + std::vector defaultValue_; - double max_percent_idle_processors; - bool increase_parallelism_in_new_superstep; + double maxPercentIdleProcessors_; + bool increaseParallelismInNewSuperstep_; - int computeScore(VertexType node, unsigned proc, const BspInstance &instance) { + int ComputeScore(VertexType node, unsigned proc, const BspInstance &instance) { int score = 0; - for (const auto &succ : instance.getComputationalDag().children(node)) { - if (locked[succ] < instance.numberOfProcessors() && locked[succ] != proc) { - score -= lock_penalty; + for (const auto &succ : instance.GetComputationalDag().Children(node)) { + if (locked_[succ] < instance.NumberOfProcessors() && locked_[succ] != proc) { + score -= lockPenalty_; } } - return score + default_value[node]; + return score + defaultValue_[node]; }; - bool check_mem_feasibility(const BspInstance &instance, - const std::set &allReady, - const std::vector> &procReady) const { - if constexpr (use_memory_constraint) { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + bool CheckMemFeasibility(const BspInstance &instance, + const std::set &allReady, + const std::vector> &procReady) const { + if constexpr (useMemoryConstraint_) { + if (instance.GetArchitecture().GetMemoryConstraintType() == MemoryConstraintType::PERSISTENT_AND_TRANSIENT) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (!procReady[i].empty()) { - VertexType top_node = max_proc_score_heap[i].top(); + VertexType topNode = maxProcScoreHeap_[i].Top(); - if (memory_constraint.can_add(top_node, i)) { + if (memoryConstraint_.CanAdd(topNode, i)) { return true; } } } if (!allReady.empty()) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - VertexType top_node = max_all_proc_score_heap[i].top(); + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + VertexType topNode = maxAllProcScoreHeap_[i].Top(); - if (memory_constraint.can_add(top_node, i)) { + if (memoryConstraint_.CanAdd(topNode, i)) { return true; } } @@ -152,110 +152,110 @@ class BspLocking : public Scheduler { return true; } - bool Choose(const BspInstance &instance, + bool Choose(const BspInstance &instance, std::set &allReady, std::vector> &procReady, const std::vector &procFree, VertexType &node, unsigned &p, const bool endSupStep, - const v_workw_t remaining_time) { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + const VWorkwT remainingTime) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { if (procFree[proc] && !procReady[proc].empty()) { // select node - VertexType top_node = max_proc_score_heap[proc].top(); + VertexType topNode = maxProcScoreHeap_[proc].Top(); // filling up - bool procready_empty = false; - while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) { - procReady[proc].erase(top_node); - ready_phase[top_node] = std::numeric_limits::max(); - max_proc_score_heap[proc].pop(); + bool procReadyEmpty = false; + while (endSupStep && (remainingTime < instance.GetComputationalDag().VertexWorkWeight(topNode))) { + procReady[proc].erase(topNode); + readyPhase_[topNode] = std::numeric_limits::max(); + maxProcScoreHeap_[proc].Pop(); if (!procReady[proc].empty()) { - top_node = max_proc_score_heap[proc].top(); + topNode = maxProcScoreHeap_[proc].Top(); } else { - procready_empty = true; + procReadyEmpty = true; break; } } - if (procready_empty) { + if (procReadyEmpty) { continue; } - node = top_node; + node = topNode; p = proc; } } - if (p < instance.numberOfProcessors()) { + if (p < instance.NumberOfProcessors()) { return true; } - Priority best_priority = {std::numeric_limits::min(), 0, 0}; - bool found_node = false; + Priority bestPriority = {std::numeric_limits::min(), 0, 0}; + bool foundNode = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (!procFree[proc] or max_all_proc_score_heap[proc].is_empty()) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + if (!procFree[proc] or maxAllProcScoreHeap_[proc].IsEmpty()) { continue; } - VertexType top_node = max_all_proc_score_heap[proc].top(); + VertexType topNode = maxAllProcScoreHeap_[proc].Top(); // filling up - bool all_procready_empty = false; - while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) { - allReady.erase(top_node); - for (unsigned proc_del = 0; proc_del < instance.numberOfProcessors(); proc_del++) { - if (proc_del == proc || !instance.isCompatible(top_node, proc_del)) { + bool allProcreadyEmpty = false; + while (endSupStep && (remainingTime < instance.GetComputationalDag().VertexWorkWeight(topNode))) { + allReady.erase(topNode); + for (unsigned procDel = 0; procDel < instance.NumberOfProcessors(); procDel++) { + if (procDel == proc || !instance.IsCompatible(topNode, procDel)) { continue; } - max_all_proc_score_heap[proc_del].erase(top_node); + maxAllProcScoreHeap_[procDel].Erase(topNode); } - max_all_proc_score_heap[proc].pop(); - ready_phase[top_node] = std::numeric_limits::max(); - if (!max_all_proc_score_heap[proc].is_empty()) { - top_node = max_all_proc_score_heap[proc].top(); + maxAllProcScoreHeap_[proc].Pop(); + readyPhase_[topNode] = std::numeric_limits::max(); + if (!maxAllProcScoreHeap_[proc].IsEmpty()) { + topNode = maxAllProcScoreHeap_[proc].Top(); } else { - all_procready_empty = true; + allProcreadyEmpty = true; break; } } - if (all_procready_empty) { + if (allProcreadyEmpty) { continue; } - Priority top_priority = max_all_proc_score_heap[proc].get_value(top_node); - if (!found_node || PriorityCompare{}(top_priority, best_priority)) { - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(top_node, proc)) { - best_priority = top_priority; - node = top_node; + Priority topPriority = maxAllProcScoreHeap_[proc].GetValue(topNode); + if (!foundNode || PriorityCompare{}(topPriority, bestPriority)) { + if constexpr (useMemoryConstraint_) { + if (memoryConstraint_.CanAdd(topNode, proc)) { + bestPriority = topPriority; + node = topNode; p = proc; - found_node = true; + foundNode = true; } } else { - best_priority = top_priority; - node = top_node; + bestPriority = topPriority; + node = topNode; p = proc; - found_node = true; + foundNode = true; } } } - return (found_node && std::get<0>(best_priority) > -3); + return (foundNode && std::get<0>(bestPriority) > -3); } - bool CanChooseNode(const BspInstance &instance, + bool CanChooseNode(const BspInstance &instance, const std::vector> &procReady, const std::vector &procFree) const { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (procFree[i] && !procReady[i].empty()) { return true; } } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (procFree[i] && !max_all_proc_score_heap[i].is_empty()) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + if (procFree[i] && !maxAllProcScoreHeap_[i].IsEmpty()) { return true; } } @@ -263,34 +263,34 @@ class BspLocking : public Scheduler { return false; } - unsigned get_nr_parallelizable_nodes(const BspInstance &instance, - const std::vector &nr_ready_nodes_per_type, - const std::vector &nr_procs_per_type) const { - unsigned nr_nodes = 0; - - std::vector ready_nodes_per_type = nr_ready_nodes_per_type; - std::vector procs_per_type = nr_procs_per_type; - for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) { - for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { - if (instance.isCompatibleType(node_type, proc_type)) { - unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); - nr_nodes += matched; - ready_nodes_per_type[node_type] -= matched; - procs_per_type[proc_type] -= matched; + unsigned GetNrParallelizableNodes(const BspInstance &instance, + const std::vector &nrReadyNodesPerType, + const std::vector &nrProcsPerType) const { + unsigned nrNodes = 0; + + std::vector readyNodesPerType = nrReadyNodesPerType; + std::vector procsPerType = nrProcsPerType; + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); ++procType) { + for (unsigned nodeType = 0; nodeType < instance.GetComputationalDag().NumVertexTypes(); ++nodeType) { + if (instance.IsCompatibleType(nodeType, procType)) { + unsigned matched = std::min(readyNodesPerType[nodeType], procsPerType[procType]); + nrNodes += matched; + readyNodesPerType[nodeType] -= matched; + procsPerType[procType] -= matched; } } } - return nr_nodes; + return nrNodes; } public: /** * @brief Default constructor for GreedyBspLocking. */ - BspLocking(float max_percent_idle_processors_ = 0.4f, bool increase_parallelism_in_new_superstep_ = true) - : max_percent_idle_processors(max_percent_idle_processors_), - increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {} + BspLocking(float maxPercentIdleProcessors = 0.4f, bool increaseParallelismInNewSuperstep = true) + : maxPercentIdleProcessors_(maxPercentIdleProcessors), + increaseParallelismInNewSuperstep_(increaseParallelismInNewSuperstep) {} /** * @brief Default destructor for GreedyBspLocking. @@ -305,77 +305,78 @@ class BspLocking : public Scheduler { * @param instance The BspInstance object representing the instance to compute the schedule for. * @return A pair containing the return status and the computed BspSchedule. */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); - for (const auto &v : instance.getComputationalDag().vertices()) { - schedule.setAssignedProcessor(v, std::numeric_limits::max()); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + schedule.SetAssignedProcessor(v, std::numeric_limits::max()); } unsigned supstepIdx = 0; - if constexpr (is_memory_constraint_v) { - memory_constraint.initialize(instance); - } else if constexpr (is_memory_constraint_schedule_v) { - memory_constraint.initialize(schedule, supstepIdx); + if constexpr (isMemoryConstraintV) { + memoryConstraint_.Initialize(instance); + } else if constexpr (isMemoryConstraintScheduleV) { + memoryConstraint_.Initialize(schedule, supstepIdx); } - const auto &N = instance.numberOfVertices(); - const unsigned ¶ms_p = instance.numberOfProcessors(); - const auto &G = instance.getComputationalDag(); + const auto &n = instance.NumberOfVertices(); + const unsigned ¶msP = instance.NumberOfProcessors(); + const auto &g = instance.GetComputationalDag(); - const std::vector> path_length = get_longest_path(G); - v_workw_t max_path = 1; - for (const auto &i : instance.vertices()) { - if (path_length[i] > max_path) { - max_path = path_length[i]; + const std::vector> pathLength = GetLongestPath(g); + VWorkwT maxPath = 1; + for (const auto &i : instance.Vertices()) { + if (pathLength[i] > maxPath) { + maxPath = pathLength[i]; } } - default_value.clear(); - default_value.resize(N, 0); - for (const auto &i : instance.vertices()) { - // assert(path_length[i] * 20 / max_path <= std::numeric_limits::max()); - default_value[i] = static_cast(path_length[i] * static_cast>(20) / max_path); + defaultValue_.clear(); + defaultValue_.resize(n, 0); + for (const auto &i : instance.Vertices()) { + defaultValue_[i] = static_cast(pathLength[i] * static_cast>(20) / maxPath); } - max_proc_score_heap = std::vector(params_p); - max_all_proc_score_heap = std::vector(params_p); + maxProcScoreHeap_.clear(); + maxProcScoreHeap_.resize(paramsP); + maxAllProcScoreHeap_.clear(); + maxAllProcScoreHeap_.resize(paramsP); - locked_set.clear(); - locked.clear(); - locked.resize(N, std::numeric_limits::max()); + lockedSet_.clear(); + locked_.clear(); + locked_.resize(n, std::numeric_limits::max()); std::set ready; - ready_phase.clear(); - ready_phase.resize(N, std::numeric_limits::max()); + readyPhase_.clear(); + readyPhase_.resize(n, std::numeric_limits::max()); - std::vector> procReady(params_p); + std::vector> procReady(paramsP); std::set allReady; - std::vector nrPredecDone(N, 0); - std::vector procFree(params_p, true); - unsigned free = params_p; + std::vector nrPredecDone(n, 0); + std::vector procFree(paramsP, true); + unsigned free = paramsP; - std::vector nr_ready_nodes_per_type(G.num_vertex_types(), 0); - std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < params_p; ++proc) { - ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; + std::vector nrReadyNodesPerType(g.NumVertexTypes(), 0); + std::vector nrProcsPerType(instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); + for (unsigned proc = 0; proc < paramsP; ++proc) { + ++nrProcsPerType[instance.GetArchitecture().ProcessorType(proc)]; } - std::set, VertexType>> finishTimes; + std::set, VertexType>> finishTimes; finishTimes.emplace(0, std::numeric_limits::max()); - for (const auto &v : source_vertices_view(G)) { + for (const auto &v : SourceVerticesView(g)) { ready.insert(v); allReady.insert(v); - ++nr_ready_nodes_per_type[G.vertex_type(v)]; - ready_phase[v] = params_p; + ++nrReadyNodesPerType[g.VertexType(v)]; + readyPhase_[v] = paramsP; - for (unsigned proc = 0; proc < params_p; ++proc) { - if (instance.isCompatible(v, proc)) { - Priority priority = {default_value[v], static_cast(G.out_degree(v)), v}; - max_all_proc_score_heap[proc].push(v, priority); + for (unsigned proc = 0; proc < paramsP; ++proc) { + if (instance.IsCompatible(v, proc)) { + Priority priority = {defaultValue_[v], static_cast(g.OutDegree(v)), v}; + maxAllProcScoreHeap_[proc].Push(v, priority); } } } @@ -384,36 +385,36 @@ class BspLocking : public Scheduler { while (!ready.empty() || !finishTimes.empty()) { if (finishTimes.empty() && endSupStep) { - for (unsigned proc = 0; proc < params_p; ++proc) { + for (unsigned proc = 0; proc < paramsP; ++proc) { procReady[proc].clear(); - max_proc_score_heap[proc].clear(); + maxProcScoreHeap_[proc].Clear(); - if constexpr (use_memory_constraint) { - memory_constraint.reset(proc); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Reset(proc); } } allReady = ready; - for (const auto &node : locked_set) { - locked[node] = std::numeric_limits::max(); + for (const auto &node : lockedSet_) { + locked_[node] = std::numeric_limits::max(); } - locked_set.clear(); + lockedSet_.clear(); - for (unsigned proc = 0; proc < params_p; ++proc) { - max_all_proc_score_heap[proc].clear(); + for (unsigned proc = 0; proc < paramsP; ++proc) { + maxAllProcScoreHeap_[proc].Clear(); } for (const auto &v : ready) { - ready_phase[v] = params_p; - for (unsigned proc = 0; proc < params_p; ++proc) { - if (!instance.isCompatible(v, proc)) { + readyPhase_[v] = paramsP; + for (unsigned proc = 0; proc < paramsP; ++proc) { + if (!instance.IsCompatible(v, proc)) { continue; } - int score = computeScore(v, proc, instance); - Priority priority = {score, static_cast(G.out_degree(v)), v}; - max_all_proc_score_heap[proc].push(v, priority); + int score = ComputeScore(v, proc, instance); + Priority priority = {score, static_cast(g.OutDegree(v)), v}; + maxAllProcScoreHeap_[proc].Push(v, priority); } } @@ -424,8 +425,8 @@ class BspLocking : public Scheduler { finishTimes.emplace(0, std::numeric_limits::max()); } - const v_workw_t time = finishTimes.begin()->first; - const v_workw_t max_finish_time = finishTimes.rbegin()->first; + const VWorkwT time = finishTimes.begin()->first; + const VWorkwT maxFinishTime = finishTimes.rbegin()->first; // Find new ready jobs while (!finishTimes.empty() && finishTimes.begin()->first == time) { @@ -433,45 +434,45 @@ class BspLocking : public Scheduler { finishTimes.erase(finishTimes.begin()); if (node != std::numeric_limits::max()) { - for (const auto &succ : G.children(node)) { + for (const auto &succ : g.Children(node)) { ++nrPredecDone[succ]; - if (nrPredecDone[succ] == G.in_degree(succ)) { + if (nrPredecDone[succ] == g.InDegree(succ)) { ready.insert(succ); - ++nr_ready_nodes_per_type[G.vertex_type(succ)]; + ++nrReadyNodesPerType[g.VertexType(succ)]; bool canAdd = true; - for (const auto &pred : G.parents(succ)) { - if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) - && schedule.assignedSuperstep(pred) == supstepIdx) { + for (const auto &pred : g.Parents(succ)) { + if (schedule.AssignedProcessor(pred) != schedule.AssignedProcessor(node) + && schedule.AssignedSuperstep(pred) == supstepIdx) { canAdd = false; break; } } - if constexpr (use_memory_constraint) { + if constexpr (useMemoryConstraint_) { if (canAdd) { - if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) { + if (not memoryConstraint_.CanAdd(succ, schedule.AssignedProcessor(node))) { canAdd = false; } } } - if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) { + if (!instance.IsCompatible(succ, schedule.AssignedProcessor(node))) { canAdd = false; } if (canAdd) { - procReady[schedule.assignedProcessor(node)].insert(succ); - ready_phase[succ] = schedule.assignedProcessor(node); + procReady[schedule.AssignedProcessor(node)].insert(succ); + readyPhase_[succ] = schedule.AssignedProcessor(node); - int score = computeScore(succ, schedule.assignedProcessor(node), instance); - Priority priority = {score, static_cast(G.out_degree(succ)), succ}; + int score = ComputeScore(succ, schedule.AssignedProcessor(node), instance); + Priority priority = {score, static_cast(g.OutDegree(succ)), succ}; - max_proc_score_heap[schedule.assignedProcessor(node)].push(succ, priority); + maxProcScoreHeap_[schedule.AssignedProcessor(node)].Push(succ, priority); } } } - procFree[schedule.assignedProcessor(node)] = true; + procFree[schedule.AssignedProcessor(node)] = true; ++free; } } @@ -483,104 +484,104 @@ class BspLocking : public Scheduler { while (CanChooseNode(instance, procReady, procFree)) { VertexType nextNode = std::numeric_limits::max(); - unsigned nextProc = instance.numberOfProcessors(); - Choose(instance, allReady, procReady, procFree, nextNode, nextProc, endSupStep, max_finish_time - time); + unsigned nextProc = instance.NumberOfProcessors(); + Choose(instance, allReady, procReady, procFree, nextNode, nextProc, endSupStep, maxFinishTime - time); - if (nextNode == std::numeric_limits::max() || nextProc == instance.numberOfProcessors()) { + if (nextNode == std::numeric_limits::max() || nextProc == instance.NumberOfProcessors()) { endSupStep = true; break; } - if (ready_phase[nextNode] < params_p) { + if (readyPhase_[nextNode] < paramsP) { procReady[nextProc].erase(nextNode); - max_proc_score_heap[nextProc].erase(nextNode); + maxProcScoreHeap_[nextProc].Erase(nextNode); } else { allReady.erase(nextNode); - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (instance.isCompatible(nextNode, proc) && max_all_proc_score_heap[proc].contains(nextNode)) { - max_all_proc_score_heap[proc].erase(nextNode); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + if (instance.IsCompatible(nextNode, proc) && maxAllProcScoreHeap_[proc].Contains(nextNode)) { + maxAllProcScoreHeap_[proc].Erase(nextNode); } } } ready.erase(nextNode); - --nr_ready_nodes_per_type[G.vertex_type(nextNode)]; - schedule.setAssignedProcessor(nextNode, nextProc); - schedule.setAssignedSuperstep(nextNode, supstepIdx); + --nrReadyNodesPerType[g.VertexType(nextNode)]; + schedule.SetAssignedProcessor(nextNode, nextProc); + schedule.SetAssignedSuperstep(nextNode, supstepIdx); - ready_phase[nextNode] = std::numeric_limits::max(); + readyPhase_[nextNode] = std::numeric_limits::max(); - if constexpr (use_memory_constraint) { - memory_constraint.add(nextNode, nextProc); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Add(nextNode, nextProc); std::vector toErase; for (const auto &node : procReady[nextProc]) { - if (not memory_constraint.can_add(node, nextProc)) { + if (not memoryConstraint_.CanAdd(node, nextProc)) { toErase.push_back(node); } } for (const auto &node : toErase) { procReady[nextProc].erase(node); - max_proc_score_heap[nextProc].erase(node); - ready_phase[node] = std::numeric_limits::max(); + maxProcScoreHeap_[nextProc].Erase(node); + readyPhase_[node] = std::numeric_limits::max(); } } - finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode); + finishTimes.emplace(time + g.VertexWorkWeight(nextNode), nextNode); procFree[nextProc] = false; --free; // update auxiliary structures - for (const auto &succ : G.children(nextNode)) { - if (locked[succ] < params_p && locked[succ] != nextProc) { - for (const auto &parent : G.parents(succ)) { - if (ready_phase[parent] < std::numeric_limits::max() && ready_phase[parent] < params_p - && ready_phase[parent] != locked[succ]) { - Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent); - std::get<0>(p) += lock_penalty; - max_proc_score_heap[ready_phase[parent]].update(parent, p); + for (const auto &succ : g.Children(nextNode)) { + if (locked_[succ] < paramsP && locked_[succ] != nextProc) { + for (const auto &parent : g.Parents(succ)) { + if (readyPhase_[parent] < std::numeric_limits::max() && readyPhase_[parent] < paramsP + && readyPhase_[parent] != locked_[succ]) { + Priority p = maxProcScoreHeap_[readyPhase_[parent]].GetValue(parent); + std::get<0>(p) += lockPenalty_; + maxProcScoreHeap_[readyPhase_[parent]].Update(parent, p); } - if (ready_phase[parent] == params_p) { - for (unsigned proc = 0; proc < params_p; ++proc) { - if (proc == locked[succ] || !instance.isCompatible(parent, proc)) { + if (readyPhase_[parent] == paramsP) { + for (unsigned proc = 0; proc < paramsP; ++proc) { + if (proc == locked_[succ] || !instance.IsCompatible(parent, proc)) { continue; } - if (max_all_proc_score_heap[proc].contains(parent)) { - Priority p = max_all_proc_score_heap[proc].get_value(parent); - std::get<0>(p) += lock_penalty; - max_all_proc_score_heap[proc].update(parent, p); + if (maxAllProcScoreHeap_[proc].Contains(parent)) { + Priority p = maxAllProcScoreHeap_[proc].GetValue(parent); + std::get<0>(p) += lockPenalty_; + maxAllProcScoreHeap_[proc].Update(parent, p); } } } } - locked[succ] = params_p; - } else if (locked[succ] == std::numeric_limits::max()) { - locked_set.push_back(succ); - locked[succ] = nextProc; - - for (const auto &parent : G.parents(succ)) { - if (ready_phase[parent] < std::numeric_limits::max() && ready_phase[parent] < params_p - && ready_phase[parent] != nextProc) { - Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent); - std::get<0>(p) -= lock_penalty; - max_proc_score_heap[ready_phase[parent]].update(parent, p); + locked_[succ] = paramsP; + } else if (locked_[succ] == std::numeric_limits::max()) { + lockedSet_.push_back(succ); + locked_[succ] = nextProc; + + for (const auto &parent : g.Parents(succ)) { + if (readyPhase_[parent] < std::numeric_limits::max() && readyPhase_[parent] < paramsP + && readyPhase_[parent] != nextProc) { + Priority p = maxProcScoreHeap_[readyPhase_[parent]].GetValue(parent); + std::get<0>(p) -= lockPenalty_; + maxProcScoreHeap_[readyPhase_[parent]].Update(parent, p); } - if (ready_phase[parent] == params_p) { - for (unsigned proc = 0; proc < params_p; ++proc) { - if (proc == nextProc || !instance.isCompatible(parent, proc)) { + if (readyPhase_[parent] == paramsP) { + for (unsigned proc = 0; proc < paramsP; ++proc) { + if (proc == nextProc || !instance.IsCompatible(parent, proc)) { continue; } - if (max_all_proc_score_heap[proc].contains(parent)) { - Priority p = max_all_proc_score_heap[proc].get_value(parent); - std::get<0>(p) -= lock_penalty; - max_all_proc_score_heap[proc].update(parent, p); + if (maxAllProcScoreHeap_[proc].Contains(parent)) { + Priority p = maxAllProcScoreHeap_[proc].GetValue(parent); + std::get<0>(p) -= lockPenalty_; + maxAllProcScoreHeap_[proc].Update(parent, p); } } } @@ -589,24 +590,24 @@ class BspLocking : public Scheduler { } } - if constexpr (use_memory_constraint) { - if (not check_mem_feasibility(instance, allReady, procReady)) { - return RETURN_STATUS::ERROR; + if constexpr (useMemoryConstraint_) { + if (not CheckMemFeasibility(instance, allReady, procReady)) { + return ReturnStatus::ERROR; } } - if (free > params_p * max_percent_idle_processors - && ((!increase_parallelism_in_new_superstep) - || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) - >= std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), - params_p - free + (static_cast(0.5 * free))))) { + if (free > paramsP * maxPercentIdleProcessors_ + && ((!increaseParallelismInNewSuperstep_) + || GetNrParallelizableNodes(instance, nrReadyNodesPerType, nrProcsPerType) + >= std::min(std::min(paramsP, static_cast(1.2 * (paramsP - free))), + paramsP - free + (static_cast(0.5 * free))))) { endSupStep = true; } } - assert(schedule.satisfiesPrecedenceConstraints()); + assert(schedule.SatisfiesPrecedenceConstraints()); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } /** @@ -616,17 +617,15 @@ class BspLocking : public Scheduler { * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { - if (use_memory_constraint) { + virtual std::string GetScheduleName() const override { + if (useMemoryConstraint_) { return "BspGreedyLockingMemory"; } else { return "BspGreedyLocking"; } } - void set_max_percent_idle_processors(float max_percent_idle_processors_) { - max_percent_idle_processors = max_percent_idle_processors_; - } + void SetMaxPercentIdleProcessors(float maxPercentIdleProcessors) { maxPercentIdleProcessors_ = maxPercentIdleProcessors; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp b/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp index 7294e974..fa93635f 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp @@ -25,412 +25,410 @@ limitations under the License. namespace osp { -template +template class GreedyBspToMaxBspConverter { - static_assert(is_computational_dag_v, "GreedyBspToMaxBspConverter can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, + static_assert(isComputationalDagV, "GreedyBspToMaxBspConverter can only be used with computational DAGs."); + static_assert(std::is_same_v, VCommwT>, "GreedyBspToMaxBspConverter requires work and comm. weights to have the same type."); protected: - using vertex_idx = vertex_idx_t; - using cost_type = v_workw_t; - using KeyTriple = std::tuple, unsigned int, unsigned int>; + using VertexIdx = VertexIdxT; + using CostType = VWorkwT; + using KeyTriple = std::tuple, unsigned int, unsigned int>; - double latency_coefficient = 1.25; - double decay_factor = 0.5; + double latencyCoefficient_ = 1.25; + double decayFactor_ = 0.5; - std::vector>>> createSuperstepLists(const BspScheduleCS &schedule, - std::vector &priorities) const; + std::vector>>> CreateSuperstepLists(const BspScheduleCS &schedule, + std::vector &priorities) const; public: - MaxBspSchedule Convert(const BspSchedule &schedule) const; - MaxBspScheduleCS Convert(const BspScheduleCS &schedule) const; + MaxBspSchedule Convert(const BspSchedule &schedule) const; + MaxBspScheduleCS Convert(const BspScheduleCS &schedule) const; }; -template -MaxBspSchedule GreedyBspToMaxBspConverter::Convert(const BspSchedule &schedule) const { - BspScheduleCS schedule_cs(schedule); - return Convert(schedule_cs); +template +MaxBspSchedule GreedyBspToMaxBspConverter::Convert(const BspSchedule &schedule) const { + BspScheduleCS scheduleCs(schedule); + return Convert(scheduleCs); } -template -MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const BspScheduleCS &schedule) const { - const Graph_t &dag = schedule.getInstance().getComputationalDag(); +template +MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const BspScheduleCS &schedule) const { + const GraphT &dag = schedule.GetInstance().GetComputationalDag(); // Initialize data structures std::vector priorities; - std::vector>> proc_list = createSuperstepLists(schedule, priorities); - std::vector> work_remaining_proc_superstep(schedule.getInstance().numberOfProcessors(), - std::vector(schedule.numberOfSupersteps(), 0)); - std::vector nodes_remaining_superstep(schedule.numberOfSupersteps(), 0); - - MaxBspScheduleCS schedule_max(schedule.getInstance()); - for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - work_remaining_proc_superstep[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)] - += dag.vertex_work_weight(node); - ++nodes_remaining_superstep[schedule.assignedSuperstep(node)]; - schedule_max.setAssignedProcessor(node, schedule.assignedProcessor(node)); + std::vector>> procList = CreateSuperstepLists(schedule, priorities); + std::vector> workRemainingProcSuperstep(schedule.GetInstance().NumberOfProcessors(), + std::vector(schedule.NumberOfSupersteps(), 0)); + std::vector nodesRemainingSuperstep(schedule.NumberOfSupersteps(), 0); + + MaxBspScheduleCS scheduleMax(schedule.GetInstance()); + for (VertexIdx node = 0; node < schedule.GetInstance().NumberOfVertices(); node++) { + workRemainingProcSuperstep[schedule.AssignedProcessor(node)][schedule.AssignedSuperstep(node)] + += dag.VertexWorkWeight(node); + ++nodesRemainingSuperstep[schedule.AssignedSuperstep(node)]; + scheduleMax.SetAssignedProcessor(node, schedule.AssignedProcessor(node)); } - std::vector> send_comm_remaining_proc_superstep( - schedule.getInstance().numberOfProcessors(), std::vector(schedule.numberOfSupersteps(), 0)); - std::vector> rec_comm_remaining_proc_superstep( - schedule.getInstance().numberOfProcessors(), std::vector(schedule.numberOfSupersteps(), 0)); - - std::vector>> free_comm_steps_for_superstep(schedule.numberOfSupersteps()); - std::vector>> dependent_comm_steps_for_node( - schedule.getInstance().numberOfVertices()); - for (auto const &[key, val] : schedule.getCommunicationSchedule()) { - if (schedule.assignedSuperstep(std::get<0>(key)) == val) { - dependent_comm_steps_for_node[std::get<0>(key)].emplace_back(key, val); - - cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(key)) - * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key)); - send_comm_remaining_proc_superstep[std::get<1>(key)][val] += comm_cost; - rec_comm_remaining_proc_superstep[std::get<2>(key)][val] += comm_cost; + std::vector> sendCommRemainingProcSuperstep(schedule.GetInstance().NumberOfProcessors(), + std::vector(schedule.NumberOfSupersteps(), 0)); + std::vector> recCommRemainingProcSuperstep(schedule.GetInstance().NumberOfProcessors(), + std::vector(schedule.NumberOfSupersteps(), 0)); + + std::vector>> freeCommStepsForSuperstep(schedule.NumberOfSupersteps()); + std::vector>> dependentCommStepsForNode(schedule.GetInstance().NumberOfVertices()); + for (auto const &[key, val] : schedule.GetCommunicationSchedule()) { + if (schedule.AssignedSuperstep(std::get<0>(key)) == val) { + dependentCommStepsForNode[std::get<0>(key)].emplace_back(key, val); + + CostType commCost = dag.VertexCommWeight(std::get<0>(key)) + * schedule.GetInstance().GetArchitecture().SendCosts(std::get<1>(key), std::get<2>(key)); + sendCommRemainingProcSuperstep[std::get<1>(key)][val] += commCost; + recCommRemainingProcSuperstep[std::get<2>(key)][val] += commCost; } else { - free_comm_steps_for_superstep[val].emplace(key, val); + freeCommStepsForSuperstep[val].emplace(key, val); } } // Iterate through supersteps - unsigned current_step = 0; - for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { - std::vector work_done_on_proc(schedule.getInstance().numberOfProcessors(), 0); - cost_type max_work_done = 0; - std::vector> newly_freed_comm_steps; - std::vector send_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0), - rec_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0); + unsigned currentStep = 0; + for (unsigned step = 0; step < schedule.NumberOfSupersteps(); ++step) { + std::vector workDoneOnProc(schedule.GetInstance().NumberOfProcessors(), 0); + CostType maxWorkDone = 0; + std::vector> newlyFreedCommSteps; + std::vector sendSumOfNewlyFreeOnProc(schedule.GetInstance().NumberOfProcessors(), 0), + recSumOfNewlyFreeOnProc(schedule.GetInstance().NumberOfProcessors(), 0); - std::vector> comm_in_current_step; + std::vector> commInCurrentStep; - std::vector send_on_proc(schedule.getInstance().numberOfProcessors(), 0), - rec_on_proc(schedule.getInstance().numberOfProcessors(), 0); - bool empty_superstep = (nodes_remaining_superstep[step] == 0); + std::vector sendOnProc(schedule.GetInstance().NumberOfProcessors(), 0), + recOnProc(schedule.GetInstance().NumberOfProcessors(), 0); + bool emptySuperstep = (nodesRemainingSuperstep[step] == 0); - while (nodes_remaining_superstep[step] > 0) { + while (nodesRemainingSuperstep[step] > 0) { // I. Select the next node (from any proc) with highest priority - unsigned chosen_proc = schedule.getInstance().numberOfProcessors(); - double best_prio = std::numeric_limits::max(); - - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - if (!proc_list[proc][step].empty() - && (chosen_proc == schedule.getInstance().numberOfProcessors() - || priorities[proc_list[proc][step].front()] < best_prio)) { - chosen_proc = proc; - best_prio = priorities[proc_list[proc][step].front()]; + unsigned chosenProc = schedule.GetInstance().NumberOfProcessors(); + double bestPrio = std::numeric_limits::max(); + + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + if (!procList[proc][step].empty() + && (chosenProc == schedule.GetInstance().NumberOfProcessors() + || priorities[procList[proc][step].front()] < bestPrio)) { + chosenProc = proc; + bestPrio = priorities[procList[proc][step].front()]; } } - if (chosen_proc == schedule.getInstance().numberOfProcessors()) { + if (chosenProc == schedule.GetInstance().NumberOfProcessors()) { break; } - vertex_idx chosen_node = proc_list[chosen_proc][step].front(); - proc_list[chosen_proc][step].pop_front(); - work_done_on_proc[chosen_proc] += dag.vertex_work_weight(chosen_node); - work_remaining_proc_superstep[chosen_proc][step] -= dag.vertex_work_weight(chosen_node); - max_work_done = std::max(max_work_done, work_done_on_proc[chosen_proc]); - schedule_max.setAssignedSuperstep(chosen_node, current_step); - --nodes_remaining_superstep[step]; - for (const std::pair &entry : dependent_comm_steps_for_node[chosen_node]) { - newly_freed_comm_steps.push_back(entry); - cost_type comm_cost - = dag.vertex_comm_weight(chosen_node) - * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)); - send_sum_of_newly_free_on_proc[std::get<1>(entry.first)] += comm_cost; - rec_sum_of_newly_free_on_proc[std::get<2>(entry.first)] += comm_cost; + VertexIdx chosenNode = procList[chosenProc][step].front(); + procList[chosenProc][step].pop_front(); + workDoneOnProc[chosenProc] += dag.VertexWorkWeight(chosenNode); + workRemainingProcSuperstep[chosenProc][step] -= dag.VertexWorkWeight(chosenNode); + maxWorkDone = std::max(maxWorkDone, workDoneOnProc[chosenProc]); + scheduleMax.SetAssignedSuperstep(chosenNode, currentStep); + --nodesRemainingSuperstep[step]; + for (const std::pair &entry : dependentCommStepsForNode[chosenNode]) { + newlyFreedCommSteps.push_back(entry); + CostType commCost + = dag.VertexCommWeight(chosenNode) + * schedule.GetInstance().GetArchitecture().SendCosts(std::get<1>(entry.first), std::get<2>(entry.first)); + sendSumOfNewlyFreeOnProc[std::get<1>(entry.first)] += commCost; + recSumOfNewlyFreeOnProc[std::get<2>(entry.first)] += commCost; } // II. Add nodes on all other processors if this doesn't increase work cost - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - if (proc == chosen_proc) { + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + if (proc == chosenProc) { continue; } - while (!proc_list[proc][step].empty() - && work_done_on_proc[proc] + dag.vertex_work_weight(proc_list[proc][step].front()) <= max_work_done) { - vertex_idx node = proc_list[proc][step].front(); - proc_list[proc][step].pop_front(); - work_done_on_proc[proc] += dag.vertex_work_weight(node); - work_remaining_proc_superstep[proc][step] -= dag.vertex_work_weight(node); - schedule_max.setAssignedSuperstep(node, current_step); - --nodes_remaining_superstep[step]; - for (const std::pair &entry : dependent_comm_steps_for_node[node]) { - newly_freed_comm_steps.push_back(entry); - cost_type comm_cost = dag.vertex_comm_weight(node) - * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), - std::get<2>(entry.first)); - send_sum_of_newly_free_on_proc[std::get<1>(entry.first)] += comm_cost; - rec_sum_of_newly_free_on_proc[std::get<2>(entry.first)] += comm_cost; + while (!procList[proc][step].empty() + && workDoneOnProc[proc] + dag.VertexWorkWeight(procList[proc][step].front()) <= maxWorkDone) { + VertexIdx node = procList[proc][step].front(); + procList[proc][step].pop_front(); + workDoneOnProc[proc] += dag.VertexWorkWeight(node); + workRemainingProcSuperstep[proc][step] -= dag.VertexWorkWeight(node); + scheduleMax.SetAssignedSuperstep(node, currentStep); + --nodesRemainingSuperstep[step]; + for (const std::pair &entry : dependentCommStepsForNode[node]) { + newlyFreedCommSteps.push_back(entry); + CostType commCost = dag.VertexCommWeight(node) + * schedule.GetInstance().GetArchitecture().SendCosts(std::get<1>(entry.first), + std::get<2>(entry.first)); + sendSumOfNewlyFreeOnProc[std::get<1>(entry.first)] += commCost; + recSumOfNewlyFreeOnProc[std::get<2>(entry.first)] += commCost; } } } // III. Add communication steps that are already available - for (auto itr = free_comm_steps_for_superstep[step].begin(); itr != free_comm_steps_for_superstep[step].end();) { - if (send_on_proc[std::get<1>(itr->first)] < max_work_done && rec_on_proc[std::get<2>(itr->first)] < max_work_done) { - cost_type comm_cost - = dag.vertex_comm_weight(std::get<0>(itr->first)) - * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(itr->first), std::get<2>(itr->first)) - * schedule.getInstance().getArchitecture().communicationCosts(); - send_on_proc[std::get<1>(itr->first)] += comm_cost; - rec_on_proc[std::get<2>(itr->first)] += comm_cost; - if (current_step - 1 >= schedule_max.numberOfSupersteps()) { - schedule_max.setNumberOfSupersteps(current_step); + for (auto itr = freeCommStepsForSuperstep[step].begin(); itr != freeCommStepsForSuperstep[step].end();) { + if (sendOnProc[std::get<1>(itr->first)] < maxWorkDone && recOnProc[std::get<2>(itr->first)] < maxWorkDone) { + CostType commCost + = dag.VertexCommWeight(std::get<0>(itr->first)) + * schedule.GetInstance().GetArchitecture().SendCosts(std::get<1>(itr->first), std::get<2>(itr->first)) + * schedule.GetInstance().GetArchitecture().CommunicationCosts(); + sendOnProc[std::get<1>(itr->first)] += commCost; + recOnProc[std::get<2>(itr->first)] += commCost; + if (currentStep - 1 >= scheduleMax.NumberOfSupersteps()) { + scheduleMax.SetNumberOfSupersteps(currentStep); } - schedule_max.addCommunicationScheduleEntry(itr->first, current_step - 1); - comm_in_current_step.emplace_back(*itr); - free_comm_steps_for_superstep[step].erase(itr++); + scheduleMax.AddCommunicationScheduleEntry(itr->first, currentStep - 1); + commInCurrentStep.emplace_back(*itr); + freeCommStepsForSuperstep[step].erase(itr++); } else { ++itr; } } // IV. Decide whether to split superstep here - if (!free_comm_steps_for_superstep[step].empty() || nodes_remaining_superstep[step] == 0) { + if (!freeCommStepsForSuperstep[step].empty() || nodesRemainingSuperstep[step] == 0) { continue; } - cost_type max_work_remaining = 0, max_comm_remaining = 0, comm_after_reduction = 0; - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - max_work_remaining = std::max(max_work_remaining, work_remaining_proc_superstep[proc][step]); - max_comm_remaining = std::max(max_comm_remaining, send_comm_remaining_proc_superstep[proc][step]); - max_comm_remaining = std::max(max_comm_remaining, rec_comm_remaining_proc_superstep[proc][step]); - comm_after_reduction = std::max( - comm_after_reduction, send_comm_remaining_proc_superstep[proc][step] - send_sum_of_newly_free_on_proc[proc]); - comm_after_reduction = std::max( - comm_after_reduction, rec_comm_remaining_proc_superstep[proc][step] - rec_sum_of_newly_free_on_proc[proc]); + CostType maxWorkRemaining = 0, maxCommRemaining = 0, commAfterReduction = 0; + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + maxWorkRemaining = std::max(maxWorkRemaining, workRemainingProcSuperstep[proc][step]); + maxCommRemaining = std::max(maxCommRemaining, sendCommRemainingProcSuperstep[proc][step]); + maxCommRemaining = std::max(maxCommRemaining, recCommRemainingProcSuperstep[proc][step]); + commAfterReduction + = std::max(commAfterReduction, sendCommRemainingProcSuperstep[proc][step] - sendSumOfNewlyFreeOnProc[proc]); + commAfterReduction + = std::max(commAfterReduction, recCommRemainingProcSuperstep[proc][step] - recSumOfNewlyFreeOnProc[proc]); } - cost_type comm_reduction - = (max_comm_remaining - comm_after_reduction) * schedule.getInstance().getArchitecture().communicationCosts(); + CostType commReduction + = (maxCommRemaining - commAfterReduction) * schedule.GetInstance().GetArchitecture().CommunicationCosts(); - cost_type gain = std::min(comm_reduction, max_work_remaining); + CostType gain = std::min(commReduction, maxWorkRemaining); if (gain > 0 - && static_cast(gain) >= static_cast(schedule.getInstance().getArchitecture().synchronisationCosts()) - * latency_coefficient) { + && static_cast(gain) >= static_cast(schedule.GetInstance().GetArchitecture().SynchronisationCosts()) + * latencyCoefficient_) { // Split superstep - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - work_done_on_proc[proc] = 0; - send_on_proc[proc] = 0; - rec_on_proc[proc] = 0; - send_sum_of_newly_free_on_proc[proc] = 0; - rec_sum_of_newly_free_on_proc[proc] = 0; + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + workDoneOnProc[proc] = 0; + sendOnProc[proc] = 0; + recOnProc[proc] = 0; + sendSumOfNewlyFreeOnProc[proc] = 0; + recSumOfNewlyFreeOnProc[proc] = 0; } - max_work_done = 0; - for (const std::pair &entry : newly_freed_comm_steps) { - free_comm_steps_for_superstep[step].insert(entry); - - cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first)) - * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), - std::get<2>(entry.first)); - send_comm_remaining_proc_superstep[std::get<1>(entry.first)][step] -= comm_cost; - rec_comm_remaining_proc_superstep[std::get<2>(entry.first)][step] -= comm_cost; + maxWorkDone = 0; + for (const std::pair &entry : newlyFreedCommSteps) { + freeCommStepsForSuperstep[step].insert(entry); + + CostType commCost = dag.VertexCommWeight(std::get<0>(entry.first)) + * schedule.GetInstance().GetArchitecture().SendCosts(std::get<1>(entry.first), + std::get<2>(entry.first)); + sendCommRemainingProcSuperstep[std::get<1>(entry.first)][step] -= commCost; + recCommRemainingProcSuperstep[std::get<2>(entry.first)][step] -= commCost; } - newly_freed_comm_steps.clear(); - comm_in_current_step.clear(); - ++current_step; + newlyFreedCommSteps.clear(); + commInCurrentStep.clear(); + ++currentStep; } } - if (!empty_superstep) { - ++current_step; + if (!emptySuperstep) { + ++currentStep; } - for (const std::pair &entry : newly_freed_comm_steps) { - free_comm_steps_for_superstep[step].insert(entry); + for (const std::pair &entry : newlyFreedCommSteps) { + freeCommStepsForSuperstep[step].insert(entry); } - if (free_comm_steps_for_superstep[step].empty()) { + if (freeCommStepsForSuperstep[step].empty()) { continue; } // Handle the remaining communication steps: creating a new superstep afterwards with no work - cost_type max_comm_current = 0; - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - max_comm_current = std::max(max_comm_current, send_on_proc[proc]); - max_comm_current = std::max(max_comm_current, rec_on_proc[proc]); + CostType maxCommCurrent = 0; + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + maxCommCurrent = std::max(maxCommCurrent, sendOnProc[proc]); + maxCommCurrent = std::max(maxCommCurrent, recOnProc[proc]); } - send_on_proc.clear(); - send_on_proc.resize(schedule.getInstance().numberOfProcessors(), 0); - rec_on_proc.clear(); - rec_on_proc.resize(schedule.getInstance().numberOfProcessors(), 0); - - std::set> late_arriving_nodes; - for (const std::pair &entry : free_comm_steps_for_superstep[step]) { - schedule_max.addCommunicationScheduleEntry(entry.first, current_step - 1); - cost_type comm_cost - = dag.vertex_comm_weight(std::get<0>(entry.first)) - * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) - * schedule.getInstance().getArchitecture().communicationCosts(); - send_on_proc[std::get<1>(entry.first)] += comm_cost; - rec_on_proc[std::get<2>(entry.first)] += comm_cost; - late_arriving_nodes.emplace(std::get<0>(entry.first), std::get<2>(entry.first)); + sendOnProc.clear(); + sendOnProc.resize(schedule.GetInstance().NumberOfProcessors(), 0); + recOnProc.clear(); + recOnProc.resize(schedule.GetInstance().NumberOfProcessors(), 0); + + std::set> lateArrivingNodes; + for (const std::pair &entry : freeCommStepsForSuperstep[step]) { + scheduleMax.AddCommunicationScheduleEntry(entry.first, currentStep - 1); + CostType commCost + = dag.VertexCommWeight(std::get<0>(entry.first)) + * schedule.GetInstance().GetArchitecture().SendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) + * schedule.GetInstance().GetArchitecture().CommunicationCosts(); + sendOnProc[std::get<1>(entry.first)] += commCost; + recOnProc[std::get<2>(entry.first)] += commCost; + lateArrivingNodes.emplace(std::get<0>(entry.first), std::get<2>(entry.first)); } // Edge case - check if it is worth moving all communications from the current superstep to the next one instead (thus // saving a sync cost) (for this we need to compute the h-relation-max in the current superstep, the next superstep, and // also their union) - cost_type max_comm_after = 0; - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - max_comm_after = std::max(max_comm_after, send_on_proc[proc]); - max_comm_after = std::max(max_comm_after, rec_on_proc[proc]); + CostType maxCommAfter = 0; + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + maxCommAfter = std::max(maxCommAfter, sendOnProc[proc]); + maxCommAfter = std::max(maxCommAfter, recOnProc[proc]); } - for (const std::pair &entry : comm_in_current_step) { - cost_type comm_cost - = dag.vertex_comm_weight(std::get<0>(entry.first)) - * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) - * schedule.getInstance().getArchitecture().communicationCosts(); - send_on_proc[std::get<1>(entry.first)] += comm_cost; - rec_on_proc[std::get<2>(entry.first)] += comm_cost; + for (const std::pair &entry : commInCurrentStep) { + CostType commCost + = dag.VertexCommWeight(std::get<0>(entry.first)) + * schedule.GetInstance().GetArchitecture().SendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) + * schedule.GetInstance().GetArchitecture().CommunicationCosts(); + sendOnProc[std::get<1>(entry.first)] += commCost; + recOnProc[std::get<2>(entry.first)] += commCost; } - cost_type max_comm_together = 0; - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - max_comm_together = std::max(max_comm_together, send_on_proc[proc]); - max_comm_together = std::max(max_comm_together, rec_on_proc[proc]); + CostType maxCommTogether = 0; + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + maxCommTogether = std::max(maxCommTogether, sendOnProc[proc]); + maxCommTogether = std::max(maxCommTogether, recOnProc[proc]); } - cost_type work_limit = max_comm_after; - if (max_comm_together + max_work_done <= max_comm_after + std::max(max_work_done, max_comm_current) - + schedule.getInstance().getArchitecture().synchronisationCosts()) { - work_limit = max_comm_together; - for (const std::pair &entry : comm_in_current_step) { - if (current_step - 1 >= schedule_max.numberOfSupersteps()) { - schedule_max.setNumberOfSupersteps(current_step); + CostType workLimit = maxCommAfter; + if (maxCommTogether + maxWorkDone <= maxCommAfter + std::max(maxWorkDone, maxCommCurrent) + + schedule.GetInstance().GetArchitecture().SynchronisationCosts()) { + workLimit = maxCommTogether; + for (const std::pair &entry : commInCurrentStep) { + if (currentStep - 1 >= scheduleMax.NumberOfSupersteps()) { + scheduleMax.SetNumberOfSupersteps(currentStep); } - schedule_max.addCommunicationScheduleEntry(entry.first, current_step - 1); - late_arriving_nodes.emplace(std::get<0>(entry.first), std::get<2>(entry.first)); + scheduleMax.AddCommunicationScheduleEntry(entry.first, currentStep - 1); + lateArrivingNodes.emplace(std::get<0>(entry.first), std::get<2>(entry.first)); } } // Bring computation steps into the extra superstep from the next superstep, if possible,a s long as it does not increase cost - if (step == schedule.numberOfSupersteps() - 1) { + if (step == schedule.NumberOfSupersteps() - 1) { continue; } - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - cost_type work_so_far = 0; - std::set brought_forward; - for (vertex_idx node : proc_list[proc][step + 1]) { - if (work_so_far + dag.vertex_work_weight(node) > work_limit) { + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + CostType workSoFar = 0; + std::set broughtForward; + for (VertexIdx node : procList[proc][step + 1]) { + if (workSoFar + dag.VertexWorkWeight(node) > workLimit) { continue; } - bool has_dependency = false; + bool hasDependency = false; - for (const vertex_idx &parent : dag.parents(node)) { - if (schedule.assignedProcessor(node) != schedule.assignedProcessor(parent) - && late_arriving_nodes.find(std::make_pair(parent, proc)) != late_arriving_nodes.end()) { - has_dependency = true; + for (const VertexIdx &parent : dag.Parents(node)) { + if (schedule.AssignedProcessor(node) != schedule.AssignedProcessor(parent) + && lateArrivingNodes.find(std::make_pair(parent, proc)) != lateArrivingNodes.end()) { + hasDependency = true; } - if (schedule.assignedProcessor(node) == schedule.assignedProcessor(parent) - && schedule.assignedSuperstep(parent) == step + 1 - && brought_forward.find(parent) == brought_forward.end()) { - has_dependency = true; + if (schedule.AssignedProcessor(node) == schedule.AssignedProcessor(parent) + && schedule.AssignedSuperstep(parent) == step + 1 && broughtForward.find(parent) == broughtForward.end()) { + hasDependency = true; } } - if (has_dependency) { + if (hasDependency) { continue; } - brought_forward.insert(node); - work_so_far += dag.vertex_work_weight(node); - schedule_max.setAssignedSuperstep(node, current_step); - work_remaining_proc_superstep[proc][step + 1] -= dag.vertex_work_weight(node); - --nodes_remaining_superstep[step + 1]; + broughtForward.insert(node); + workSoFar += dag.VertexWorkWeight(node); + scheduleMax.SetAssignedSuperstep(node, currentStep); + workRemainingProcSuperstep[proc][step + 1] -= dag.VertexWorkWeight(node); + --nodesRemainingSuperstep[step + 1]; - for (const std::pair &entry : dependent_comm_steps_for_node[node]) { - free_comm_steps_for_superstep[step + 1].insert(entry); + for (const std::pair &entry : dependentCommStepsForNode[node]) { + freeCommStepsForSuperstep[step + 1].insert(entry); } } - std::deque remaining; - for (vertex_idx node : proc_list[proc][step + 1]) { - if (brought_forward.find(node) == brought_forward.end()) { + std::deque remaining; + for (VertexIdx node : procList[proc][step + 1]) { + if (broughtForward.find(node) == broughtForward.end()) { remaining.push_back(node); } } - proc_list[proc][step + 1] = remaining; + procList[proc][step + 1] = remaining; } - ++current_step; + ++currentStep; } - return schedule_max; + return scheduleMax; } // Auxiliary function: creates a separate vectors for each proc-supstep combination, collecting the nodes in a priority-based // topological order -template -std::vector>>> GreedyBspToMaxBspConverter::createSuperstepLists( - const BspScheduleCS &schedule, std::vector &priorities) const { - const Graph_t &dag = schedule.getInstance().getComputationalDag(); - std::vector top_order = GetTopOrder(dag); +template +std::vector>>> GreedyBspToMaxBspConverter::CreateSuperstepLists( + const BspScheduleCS &schedule, std::vector &priorities) const { + const GraphT &dag = schedule.GetInstance().GetComputationalDag(); + std::vector topOrder = GetTopOrder(dag); priorities.clear(); - priorities.resize(dag.num_vertices()); - std::vector local_in_degree(dag.num_vertices(), 0); + priorities.resize(dag.NumVertices()); + std::vector localInDegree(dag.NumVertices(), 0); // compute for each node the amount of dependent send cost in the same superstep - std::vector comm_dependency(dag.num_vertices(), 0); - for (auto const &[key, val] : schedule.getCommunicationSchedule()) { - if (schedule.assignedSuperstep(std::get<0>(key)) == val) { - comm_dependency[std::get<0>(key)] - += dag.vertex_comm_weight(std::get<0>(key)) - * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key)); + std::vector commDependency(dag.NumVertices(), 0); + for (auto const &[key, val] : schedule.GetCommunicationSchedule()) { + if (schedule.AssignedSuperstep(std::get<0>(key)) == val) { + commDependency[std::get<0>(key)] + += dag.VertexCommWeight(std::get<0>(key)) + * schedule.GetInstance().GetArchitecture().SendCosts(std::get<1>(key), std::get<2>(key)); } } // assign priority to nodes - based on their own work/comm ratio, and that of its successors in the same proc/supstep - for (auto itr = top_order.rbegin(); itr != top_order.rend(); ++itr) { - vertex_idx node = *itr; - double base = static_cast(dag.vertex_work_weight(node)); - if (comm_dependency[node] > 0) { - base /= static_cast(2 * comm_dependency[node]); + for (auto itr = topOrder.rbegin(); itr != topOrder.rend(); ++itr) { + VertexIdx node = *itr; + double base = static_cast(dag.VertexWorkWeight(node)); + if (commDependency[node] > 0) { + base /= static_cast(2 * commDependency[node]); } double successors = 0; - unsigned num_children = 0; - for (const vertex_idx &child : dag.children(node)) { - if (schedule.assignedProcessor(node) == schedule.assignedProcessor(child) - && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child)) { - ++num_children; + unsigned numChildren = 0; + for (const VertexIdx &child : dag.Children(node)) { + if (schedule.AssignedProcessor(node) == schedule.AssignedProcessor(child) + && schedule.AssignedSuperstep(node) == schedule.AssignedSuperstep(child)) { + ++numChildren; successors += priorities[child]; - ++local_in_degree[child]; + ++localInDegree[child]; } } - if (num_children > 0) { - successors = successors * decay_factor / static_cast(num_children); + if (numChildren > 0) { + successors = successors * decayFactor_ / static_cast(numChildren); } priorities[node] = base + successors; } // create lists for each processor-superstep pair, in a topological order, sorted by priority - std::vector>> superstep_lists( - schedule.getInstance().numberOfProcessors(), std::vector>(schedule.numberOfSupersteps())); + std::vector>> superstepLists( + schedule.GetInstance().NumberOfProcessors(), std::vector>(schedule.NumberOfSupersteps())); - std::set> free; - for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - if (local_in_degree[node] == 0) { + std::set> free; + for (VertexIdx node = 0; node < schedule.GetInstance().NumberOfVertices(); node++) { + if (localInDegree[node] == 0) { free.emplace(priorities[node], node); } } while (!free.empty()) { - vertex_idx node = free.begin()->second; + VertexIdx node = free.begin()->second; free.erase(free.begin()); - superstep_lists[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)].push_back(node); - for (const vertex_idx &child : dag.children(node)) { - if (schedule.assignedProcessor(node) == schedule.assignedProcessor(child) - && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child)) { - if (--local_in_degree[child] == 0) { + superstepLists[schedule.AssignedProcessor(node)][schedule.AssignedSuperstep(node)].push_back(node); + for (const VertexIdx &child : dag.Children(node)) { + if (schedule.AssignedProcessor(node) == schedule.AssignedProcessor(child) + && schedule.AssignedSuperstep(node) == schedule.AssignedSuperstep(child)) { + if (--localInDegree[child] == 0) { free.emplace(priorities[child], child); } } } } - return superstep_lists; + return superstepLists; } } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp index 9aeab9cc..7c0b3db2 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp @@ -41,48 +41,40 @@ enum CilkMode { CILK, SJF }; * a greedy scheduling algorithm for Cilk-based BSP (Bulk Synchronous Parallel) systems. The scheduler * selects the next node and processor to execute a task based on a greedy strategy. */ -template -class CilkScheduler : public Scheduler { - static_assert(is_computational_dag_v, "CilkScheduler can only be used with computational DAGs."); +template +class CilkScheduler : public Scheduler { + static_assert(isComputationalDagV, "CilkScheduler can only be used with computational DAGs."); private: - using tv_pair = std::pair, vertex_idx_t>; + using TvPair = std::pair, VertexIdxT>; - CilkMode mode; /**< The mode of the Cilk scheduler. */ + CilkMode mode_; /**< The mode of the Cilk scheduler. */ - // constexpr static bool use_memory_constraint = is_memory_constraint_v; + std::mt19937 gen_; - // static_assert(not use_memory_constraint || - // std::is_same_v>, - // "CilkScheduler implements only persistent_transient_memory_constraint."); - - // MemoryConstraint_t memory_constraint; - - std::mt19937 gen; - - void Choose(const BspInstance &instance, - std::vector>> &procQueue, - const std::set> &readyNodes, + void Choose(const BspInstance &instance, + std::vector>> &procQueue, + const std::set> &readyNodes, const std::vector &procFree, - vertex_idx_t &node, + VertexIdxT &node, unsigned &p) { - if (mode == SJF) { + if (mode_ == SJF) { node = *readyNodes.begin(); for (auto &r : readyNodes) { - if (instance.getComputationalDag().vertex_work_weight(r) < instance.getComputationalDag().vertex_work_weight(node)) { + if (instance.GetComputationalDag().VertexWorkWeight(r) < instance.GetComputationalDag().VertexWorkWeight(node)) { node = r; } } p = 0; - for (; p < instance.numberOfProcessors(); ++p) { + for (; p < instance.NumberOfProcessors(); ++p) { if (procFree[p]) { break; } } - } else if (mode == CILK) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + } else if (mode_ == CILK) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (procFree[i] && !procQueue[i].empty()) { p = i; node = procQueue[i].back(); @@ -92,7 +84,7 @@ class CilkScheduler : public Scheduler { } // Time to steal - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (procFree[i]) { p = i; break; @@ -100,20 +92,20 @@ class CilkScheduler : public Scheduler { } std::vector canStealFrom; - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (!procQueue[i].empty()) { canStealFrom.push_back(i); } } if (canStealFrom.empty()) { - node = std::numeric_limits>::max(); + node = std::numeric_limits>::max(); return; } // Pick a random queue to steal from std::uniform_int_distribution dis(0, static_cast(canStealFrom.size() - 1)); - const unsigned chosenIndex = dis(gen); + const unsigned chosenIndex = dis(gen_); const unsigned chosenQueue = canStealFrom[chosenIndex]; node = procQueue[chosenQueue].front(); procQueue[chosenQueue].pop_front(); @@ -126,9 +118,9 @@ class CilkScheduler : public Scheduler { * * This constructor initializes a GreedyCilkScheduler object with the specified Cilk mode. * - * @param mode_ The Cilk mode for the scheduler. + * @param mode The Cilk mode for the scheduler. */ - CilkScheduler(CilkMode mode_ = CILK) : Scheduler(), mode(mode_), gen(std::random_device{}()) {} + CilkScheduler(CilkMode mode = CILK) : Scheduler(), mode_(mode), gen_(std::random_device{}()) {} /** * @brief Destroys the GreedyCilkScheduler object. @@ -141,82 +133,74 @@ class CilkScheduler : public Scheduler { * @brief Computes the schedule for the given BSP instance using the greedy scheduling algorithm. * * This member function computes the schedule for the given BSP instance using the greedy scheduling algorithm. - * It overrides the computeSchedule() function of the base Scheduler class. + * It overrides the ComputeSchedule() function of the base Scheduler class. * * @param instance The BSP instance to compute the schedule for. * @return A pair containing the return status and the computed BSP schedule. */ - virtual RETURN_STATUS computeSchedule(BspSchedule &bsp_schedule) override { - // if constexpr (use_memory_constraint) { - // memory_constraint.initialize(instance); - // } + virtual ReturnStatus ComputeSchedule(BspSchedule &bspSchedule) override { + const auto &instance = bspSchedule.GetInstance(); - const auto &instance = bsp_schedule.getInstance(); + CSchedule schedule(instance.NumberOfVertices()); - CSchedule schedule(instance.numberOfVertices()); + std::set> ready; - std::set> ready; + std::vector nrPredecDone(instance.NumberOfVertices(), 0); - std::vector nrPredecDone(instance.numberOfVertices(), 0); + std::vector procFree(instance.NumberOfProcessors(), true); - std::vector procFree(instance.numberOfProcessors(), true); + unsigned nrProcFree = instance.NumberOfProcessors(); - unsigned nrProcFree = instance.numberOfProcessors(); + std::vector>> procQueue(instance.NumberOfProcessors()); + std::vector>> greedyProcLists(instance.NumberOfProcessors()); - std::vector>> procQueue(instance.numberOfProcessors()); - std::vector>> greedyProcLists(instance.numberOfProcessors()); - - std::set finishTimes; - const tv_pair start(0, std::numeric_limits>::max()); + std::set finishTimes; + const TvPair start(0, std::numeric_limits>::max()); finishTimes.insert(start); - for (const auto &v : source_vertices_view(instance.getComputationalDag())) { + for (const auto &v : SourceVerticesView(instance.GetComputationalDag())) { ready.insert(v); - if (mode == CILK) { + if (mode_ == CILK) { procQueue[0].push_front(v); } } while (!finishTimes.empty()) { - const v_workw_t time = finishTimes.begin()->first; + const VWorkwT time = finishTimes.begin()->first; // Find new ready jobs while (!finishTimes.empty() && finishTimes.begin()->first == time) { - const tv_pair ¤tPair = *finishTimes.begin(); + const TvPair ¤tPair = *finishTimes.begin(); finishTimes.erase(finishTimes.begin()); - const vertex_idx_t &node = currentPair.second; - if (node != std::numeric_limits>::max()) { - for (const auto &succ : instance.getComputationalDag().children(node)) { + const VertexIdxT &node = currentPair.second; + if (node != std::numeric_limits>::max()) { + for (const auto &succ : instance.GetComputationalDag().Children(node)) { ++nrPredecDone[succ]; - if (nrPredecDone[succ] == instance.getComputationalDag().in_degree(succ)) { + if (nrPredecDone[succ] == instance.GetComputationalDag().InDegree(succ)) { ready.insert(succ); - if (mode == CILK) { - procQueue[schedule.proc[node]].push_back(succ); + if (mode_ == CILK) { + procQueue[schedule.proc_[node]].push_back(succ); } } } - procFree[schedule.proc[node]] = true; + procFree[schedule.proc_[node]] = true; ++nrProcFree; } } // Assign new jobs to processors while (nrProcFree > 0 && !ready.empty()) { - unsigned nextProc = instance.numberOfProcessors(); - vertex_idx_t nextNode = std::numeric_limits>::max(); + unsigned nextProc = instance.NumberOfProcessors(); + VertexIdxT nextNode = std::numeric_limits>::max(); Choose(instance, procQueue, ready, procFree, nextNode, nextProc); ready.erase(nextNode); - schedule.proc[nextNode] = nextProc; - schedule.time[nextNode] = time; - - // if constexpr (use_memory_constraint) { - // memory_constraint.add(nextNode, nextProc); - // } + schedule.proc_[nextNode] = nextProc; + schedule.time_[nextNode] = time; - finishTimes.insert({time + instance.getComputationalDag().vertex_work_weight(nextNode), nextNode}); + finishTimes.insert({time + instance.GetComputationalDag().VertexWorkWeight(nextNode), nextNode}); procFree[nextProc] = false; if (nrProcFree > 0) { @@ -227,9 +211,9 @@ class CilkScheduler : public Scheduler { } } - schedule.convertToBspSchedule(instance, greedyProcLists, bsp_schedule); + schedule.ConvertToBspSchedule(instance, greedyProcLists, bspSchedule); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } /** @@ -237,9 +221,9 @@ class CilkScheduler : public Scheduler { * * This member function sets the Cilk mode for the scheduler. * - * @param mode_ The Cilk mode to set. + * @param mode The Cilk mode to set. */ - inline void setMode(CilkMode mode_) { mode = mode_; } + inline void SetMode(CilkMode mode) { mode_ = mode; } /** * @brief Gets the Cilk mode of the scheduler. @@ -248,7 +232,7 @@ class CilkScheduler : public Scheduler { * * @return The Cilk mode of the scheduler. */ - inline CilkMode getMode() const { return mode; } + inline CilkMode GetMode() const { return mode_; } /** * @brief Gets the name of the schedule. @@ -257,8 +241,8 @@ class CilkScheduler : public Scheduler { * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { - switch (mode) { + virtual std::string GetScheduleName() const override { + switch (mode_) { case CILK: return "CilkGreedy"; break; diff --git a/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp b/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp index eceac14e..34c62c7c 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp @@ -32,22 +32,22 @@ namespace osp { * * This class stores the processor and time information for a schedule. */ -template +template class CSchedule { private: - using vertex_idx = vertex_idx_t; - using workw_t = v_workw_t; + using VertexIdx = VertexIdxT; + using WorkwT = VWorkwT; public: - std::vector proc; /**< The processor assigned to each task. */ - std::vector time; /**< The time at which each task starts. */ + std::vector proc_; /**< The processor assigned to each task. */ + std::vector time_; /**< The time at which each task starts. */ /** * @brief Constructs a CSchedule object with the given size. * @param size The size of the schedule. */ CSchedule(std::size_t size) - : proc(std::vector(size, std::numeric_limits::max())), time(std::vector(size, 0)) {} + : proc_(std::vector(size, std::numeric_limits::max())), time_(std::vector(size, 0)) {} /** * @brief Converts the CSchedule object to a BspSchedule object. @@ -56,35 +56,35 @@ class CSchedule { * @return The converted BspSchedule object. */ - void convertToBspSchedule(const BspInstance &instance, - const std::vector> &procAssignmentLists, - BspSchedule &bsp_schedule) { - for (const auto &v : instance.vertices()) { - bsp_schedule.setAssignedProcessor(v, proc[v]); + void ConvertToBspSchedule(const BspInstance &instance, + const std::vector> &procAssignmentLists, + BspSchedule &bspSchedule) { + for (const auto &v : instance.Vertices()) { + bspSchedule.SetAssignedProcessor(v, proc_[v]); } - const vertex_idx N = instance.numberOfVertices(); - const unsigned P = instance.numberOfProcessors(); + const VertexIdx n = instance.NumberOfVertices(); + const unsigned p = instance.NumberOfProcessors(); unsigned superStepIdx = 0, totalNodesDone = 0; - std::vector processed(N, false); + std::vector processed(n, false); - std::vector done(P), limit(P); + std::vector done(p), limit(p); - for (unsigned j = 0; j < P; ++j) { + for (unsigned j = 0; j < p; ++j) { done[j] = procAssignmentLists[j].cbegin(); } - while (totalNodesDone < N) { + while (totalNodesDone < n) { // create next superstep - workw_t timeLimit = std::numeric_limits::max(); - for (unsigned j = 0; j < P; ++j) { + WorkwT timeLimit = std::numeric_limits::max(); + for (unsigned j = 0; j < p; ++j) { for (limit[j] = done[j]; limit[j] != procAssignmentLists[j].end(); ++limit[j]) { - const vertex_idx node = *limit[j]; + const VertexIdx node = *limit[j]; bool cut = false; - for (const auto &source : instance.getComputationalDag().parents(node)) { - if (!processed[source] && proc[source] != proc[node]) { + for (const auto &source : instance.GetComputationalDag().Parents(node)) { + if (!processed[source] && proc_[source] != proc_[node]) { cut = true; } } @@ -93,18 +93,18 @@ class CSchedule { break; } } - if (limit[j] != procAssignmentLists[j].end() && time[*limit[j]] < timeLimit) { - timeLimit = time[*limit[j]]; + if (limit[j] != procAssignmentLists[j].end() && time_[*limit[j]] < timeLimit) { + timeLimit = time_[*limit[j]]; } } - for (unsigned j = 0; j < P; ++j) { + for (unsigned j = 0; j < p; ++j) { for (; done[j] != limit[j] - && (time[*done[j]] < timeLimit - || (time[*done[j]] == timeLimit && instance.getComputationalDag().vertex_work_weight(*done[j]) == 0)); + && (time_[*done[j]] < timeLimit + || (time_[*done[j]] == timeLimit && instance.GetComputationalDag().VertexWorkWeight(*done[j]) == 0)); ++done[j]) { processed[*done[j]] = true; - bsp_schedule.setAssignedSuperstep(*done[j], superStepIdx); + bspSchedule.SetAssignedSuperstep(*done[j], superStepIdx); ++totalNodesDone; } } diff --git a/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp index 78a573c5..7d59501f 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp @@ -47,28 +47,28 @@ enum EtfMode { ETF, BL_EST }; * each processor. The algorithm selects the task with the earliest EST and assigns it to the processor with the * earliest available start time. The process is repeated until all tasks are scheduled. */ -template -class EtfScheduler : public Scheduler { - static_assert(is_computational_dag_v, "EtfScheduler can only be used with computational DAGs."); +template +class EtfScheduler : public Scheduler { + static_assert(isComputationalDagV, "EtfScheduler can only be used with computational DAGs."); - static_assert(std::is_convertible_v, v_workw_t>, + static_assert(std::is_convertible_v, VWorkwT>, "EtfScheduler requires that work and communication weights are convertible."); - static_assert(not has_edge_weights_v || std::is_convertible_v, v_workw_t>, + static_assert(not hasEdgeWeightsV || std::is_convertible_v, VWorkwT>, "EtfScheduler requires that work and communication weights are convertible."); private: - using tv_pair = std::pair, vertex_idx_t>; + using TvPair = std::pair, VertexIdxT>; - EtfMode mode; // The mode of the scheduler (ETF or BL_EST) - bool use_numa; // Flag indicating whether to use NUMA-aware scheduling + EtfMode mode_; // The mode of the scheduler (ETF or BL_EST) + bool useNuma_; // Flag indicating whether to use NUMA-aware scheduling - constexpr static bool use_memory_constraint = is_memory_constraint_v; + constexpr static bool useMemoryConstraint_ = isMemoryConstraintV; - static_assert(not use_memory_constraint || std::is_same_v>, - "EtfScheduler implements only persistent_transient_memory_constraint."); + static_assert(not useMemoryConstraint_ || std::is_same_v>, + "EtfScheduler implements only PersistentTransientMemoryConstraint."); - MemoryConstraint_t memory_constraint; + MemoryConstraintT memoryConstraint_; /** * @brief Computes the bottom level of each task. @@ -77,54 +77,54 @@ class EtfScheduler : public Scheduler { * @param avg_ The average execution time of the tasks. * @return A vector containing the bottom level of each task. */ - std::vector> ComputeBottomLevel(const BspInstance &instance) const { - std::vector> BL(instance.numberOfVertices(), 0); + std::vector> ComputeBottomLevel(const BspInstance &instance) const { + std::vector> bl(instance.NumberOfVertices(), 0); - const std::vector> topOrder = GetTopOrder(instance.getComputationalDag()); - auto r_iter = topOrder.rbegin(); + const std::vector> topOrder = GetTopOrder(instance.GetComputationalDag()); + auto rIter = topOrder.rbegin(); - for (; r_iter != topOrder.rend(); ++r_iter) { - const auto node = *r_iter; + for (; rIter != topOrder.rend(); ++rIter) { + const auto node = *rIter; - v_workw_t maxval = 0; + VWorkwT maxval = 0; - if constexpr (has_edge_weights_v) { - for (const auto &out_edge : out_edges(node, instance.getComputationalDag())) { - const v_workw_t tmp_val = BL[target(out_edge, instance.getComputationalDag())] - + instance.getComputationalDag().edge_comm_weight(out_edge); + if constexpr (hasEdgeWeightsV) { + for (const auto &outEdge : OutEdges(node, instance.GetComputationalDag())) { + const VWorkwT tmpVal = bl[Target(outEdge, instance.GetComputationalDag())] + + instance.GetComputationalDag().EdgeCommWeight(outEdge); - if (tmp_val > maxval) { - maxval = tmp_val; + if (tmpVal > maxval) { + maxval = tmpVal; } } } else { - for (const auto &child : instance.getComputationalDag().children(node)) { - const v_workw_t tmp_val = BL[child] + instance.getComputationalDag().vertex_comm_weight(child); + for (const auto &child : instance.GetComputationalDag().Children(node)) { + const VWorkwT tmpVal = bl[child] + instance.GetComputationalDag().VertexCommWeight(child); - if (tmp_val > maxval) { - maxval = tmp_val; + if (tmpVal > maxval) { + maxval = tmpVal; } } } - BL[node] = maxval + instance.getComputationalDag().vertex_work_weight(node); + bl[node] = maxval + instance.GetComputationalDag().VertexWorkWeight(node); } - return BL; + return bl; } - bool check_mem_feasibility(const BspInstance &instance, const std::set &ready) const { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { + bool CheckMemFeasibility(const BspInstance &instance, const std::set &ready) const { + if (instance.GetArchitecture().GetMemoryConstraintType() == MemoryConstraintType::PERSISTENT_AND_TRANSIENT) { if (ready.empty()) { return true; } - for (const auto &node_pair : ready) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - const auto node = node_pair.second; + for (const auto &nodePair : ready) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + const auto node = nodePair.second; - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(node, i)) { + if constexpr (useMemoryConstraint_) { + if (memoryConstraint_.CanAdd(node, i)) { return true; } } @@ -150,43 +150,43 @@ class EtfScheduler : public Scheduler { * @param avg_ The average execution time of the tasks. * @return The earliest start time (EST) for the task on the processor. */ - v_workw_t GetESTforProc(const BspInstance &instance, - CSchedule &schedule, - vertex_idx_t node, - unsigned proc, - const v_workw_t procAvailableFrom, - std::vector> &send, - std::vector> &rec) const { - std::vector predec; - for (const auto &pred : instance.getComputationalDag().parents(node)) { - predec.emplace_back(schedule.time[pred] + instance.getComputationalDag().vertex_work_weight(pred), pred); + VWorkwT GetESTforProc(const BspInstance &instance, + CSchedule &schedule, + VertexIdxT node, + unsigned proc, + const VWorkwT procAvailableFrom, + std::vector> &send, + std::vector> &rec) const { + std::vector predec; + for (const auto &pred : instance.GetComputationalDag().Parents(node)) { + predec.emplace_back(schedule.time_[pred] + instance.GetComputationalDag().VertexWorkWeight(pred), pred); } std::sort(predec.begin(), predec.end()); - v_workw_t EST = procAvailableFrom; + VWorkwT est = procAvailableFrom; for (const auto &next : predec) { - v_workw_t t = schedule.time[next.second] + instance.getComputationalDag().vertex_work_weight(next.second); - if (schedule.proc[next.second] != proc) { - t = std::max(t, send[schedule.proc[next.second]]); + VWorkwT t = schedule.time_[next.second] + instance.GetComputationalDag().VertexWorkWeight(next.second); + if (schedule.proc_[next.second] != proc) { + t = std::max(t, send[schedule.proc_[next.second]]); t = std::max(t, rec[proc]); - if constexpr (has_edge_weights_v) { - t += instance.getComputationalDag().edge_comm_weight( - edge_desc(next.second, node, instance.getComputationalDag()).first) - * instance.sendCosts(schedule.proc[next.second], proc); + if constexpr (hasEdgeWeightsV) { + t += instance.GetComputationalDag().EdgeCommWeight( + EdgeDesc(next.second, node, instance.GetComputationalDag()).first) + * instance.SendCosts(schedule.proc_[next.second], proc); } else { - t += instance.getComputationalDag().vertex_comm_weight(next.second) - * instance.sendCosts(schedule.proc[next.second], proc); + t += instance.GetComputationalDag().VertexCommWeight(next.second) + * instance.SendCosts(schedule.proc_[next.second], proc); } - send[schedule.proc[next.second]] = t; + send[schedule.proc_[next.second]] = t; rec[proc] = t; } - EST = std::max(EST, t); + est = std::max(est, t); } - return EST; + return est; }; /** @@ -201,29 +201,29 @@ class EtfScheduler : public Scheduler { * @param avg_ The average execution time of the tasks. * @return A triple containing the best EST, the node index, and the processor index. */ - tv_pair GetBestESTforNodes(const BspInstance &instance, - CSchedule &schedule, - const std::vector> &nodeList, - const std::vector> &procAvailableFrom, - std::vector> &send, - std::vector> &rec, - unsigned &bestProc) const { - v_workw_t bestEST = std::numeric_limits>::max(); - vertex_idx_t bestNode = 0; - std::vector> bestSend, bestRec; + TvPair GetBestESTforNodes(const BspInstance &instance, + CSchedule &schedule, + const std::vector> &nodeList, + const std::vector> &procAvailableFrom, + std::vector> &send, + std::vector> &rec, + unsigned &bestProc) const { + VWorkwT bestEST = std::numeric_limits>::max(); + VertexIdxT bestNode = 0; + std::vector> bestSend, bestRec; for (const auto &node : nodeList) { - for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) { - if constexpr (use_memory_constraint) { - if (not memory_constraint.can_add(node, j)) { + for (unsigned j = 0; j < instance.NumberOfProcessors(); ++j) { + if constexpr (useMemoryConstraint_) { + if (not memoryConstraint_.CanAdd(node, j)) { continue; } } - std::vector> newSend = send; - std::vector> newRec = rec; - v_workw_t EST = GetESTforProc(instance, schedule, node, j, procAvailableFrom[j], newSend, newRec); - if (EST < bestEST) { - bestEST = EST; + std::vector> newSend = send; + std::vector> newRec = rec; + VWorkwT est = GetESTforProc(instance, schedule, node, j, procAvailableFrom[j], newSend, newRec); + if (est < bestEST) { + bestEST = est; bestProc = j; bestNode = node; bestSend = newSend; @@ -244,7 +244,7 @@ class EtfScheduler : public Scheduler { * * @param mode_ The mode of the scheduler (ETF or BL_EST). */ - EtfScheduler(EtfMode mode_ = ETF) : Scheduler(), mode(mode_), use_numa(true) {} + EtfScheduler(EtfMode mode = ETF) : Scheduler(), mode_(mode), useNuma_(true) {} /** * @brief Default destructor for the EtfScheduler class. @@ -257,82 +257,82 @@ class EtfScheduler : public Scheduler { * @param instance The BspInstance object representing the BSP instance. * @return A pair containing the return status and the computed BspSchedule object. */ - virtual RETURN_STATUS computeSchedule(BspSchedule &bsp_schedule) override { - const auto &instance = bsp_schedule.getInstance(); + virtual ReturnStatus ComputeSchedule(BspSchedule &bspSchedule) override { + const auto &instance = bspSchedule.GetInstance(); - if constexpr (use_memory_constraint) { - memory_constraint.initialize(instance); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Initialize(instance); } - CSchedule schedule(instance.numberOfVertices()); + CSchedule schedule(instance.NumberOfVertices()); - std::vector>> greedyProcLists(instance.numberOfProcessors()); + std::vector>> greedyProcLists(instance.NumberOfProcessors()); - std::vector> predecProcessed(instance.numberOfVertices(), 0); + std::vector> predecProcessed(instance.NumberOfVertices(), 0); - std::vector> finishTimes(instance.numberOfProcessors(), 0), send(instance.numberOfProcessors(), 0), - rec(instance.numberOfProcessors(), 0); + std::vector> finishTimes(instance.NumberOfProcessors(), 0), send(instance.NumberOfProcessors(), 0), + rec(instance.NumberOfProcessors(), 0); - std::vector> BL; - if (mode == BL_EST) { - BL = ComputeBottomLevel(instance); + std::vector> bl; + if (mode_ == BL_EST) { + bl = ComputeBottomLevel(instance); } else { - BL = std::vector>(instance.numberOfVertices(), 0); + bl = std::vector>(instance.NumberOfVertices(), 0); } - std::set ready; + std::set ready; - for (const auto &v : source_vertices_view(instance.getComputationalDag())) { - ready.insert({BL[v], v}); + for (const auto &v : SourceVerticesView(instance.GetComputationalDag())) { + ready.insert({bl[v], v}); } while (!ready.empty()) { - tv_pair best_tv(0, 0); - unsigned best_proc = 0; + TvPair bestTv(0, 0); + unsigned bestProc = 0; - if (mode == BL_EST) { - std::vector> nodeList{ready.begin()->second}; + if (mode_ == BL_EST) { + std::vector> nodeList{ready.begin()->second}; ready.erase(ready.begin()); - best_tv = GetBestESTforNodes(instance, schedule, nodeList, finishTimes, send, rec, best_proc); + bestTv = GetBestESTforNodes(instance, schedule, nodeList, finishTimes, send, rec, bestProc); } - if (mode == ETF) { - std::vector> nodeList; + if (mode_ == ETF) { + std::vector> nodeList; for (const auto &next : ready) { nodeList.push_back(next.second); } - best_tv = GetBestESTforNodes(instance, schedule, nodeList, finishTimes, send, rec, best_proc); - ready.erase(tv_pair({0, best_tv.second})); + bestTv = GetBestESTforNodes(instance, schedule, nodeList, finishTimes, send, rec, bestProc); + ready.erase(TvPair({0, bestTv.second})); } - const auto node = best_tv.second; + const auto node = bestTv.second; - schedule.proc[node] = best_proc; - greedyProcLists[best_proc].push_back(node); + schedule.proc_[node] = bestProc; + greedyProcLists[bestProc].push_back(node); - schedule.time[node] = best_tv.first; - finishTimes[best_proc] = schedule.time[node] + instance.getComputationalDag().vertex_work_weight(node); + schedule.time_[node] = bestTv.first; + finishTimes[bestProc] = schedule.time_[node] + instance.GetComputationalDag().VertexWorkWeight(node); - if constexpr (use_memory_constraint) { - memory_constraint.add(node, best_proc); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Add(node, bestProc); } - for (const auto &succ : instance.getComputationalDag().children(node)) { + for (const auto &succ : instance.GetComputationalDag().Children(node)) { ++predecProcessed[succ]; - if (predecProcessed[succ] == instance.getComputationalDag().in_degree(succ)) { - ready.insert({BL[succ], succ}); + if (predecProcessed[succ] == instance.GetComputationalDag().InDegree(succ)) { + ready.insert({bl[succ], succ}); } } - if constexpr (use_memory_constraint) { - if (not check_mem_feasibility(instance, ready)) { - return RETURN_STATUS::ERROR; + if constexpr (useMemoryConstraint_) { + if (not CheckMemFeasibility(instance, ready)) { + return ReturnStatus::ERROR; } } } - schedule.convertToBspSchedule(instance, greedyProcLists, bsp_schedule); + schedule.ConvertToBspSchedule(instance, greedyProcLists, bspSchedule); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } /** @@ -340,36 +340,36 @@ class EtfScheduler : public Scheduler { * * @param mode_ The mode of the scheduler (ETF or BL_EST). */ - inline void setMode(EtfMode mode_) { mode = mode_; } + inline void SetMode(EtfMode mode) { mode_ = mode; } /** * @brief Gets the mode of the scheduler. * * @return The mode of the scheduler (ETF or BL_EST). */ - inline EtfMode getMode() const { return mode; } + inline EtfMode GetMode() const { return mode_; } /** * @brief Sets whether to use NUMA-aware scheduling. * * @param numa Flag indicating whether to use NUMA-aware scheduling. */ - inline void setUseNuma(bool numa) { use_numa = numa; } + inline void SetUseNuma(bool numa) { useNuma_ = numa; } /** * @brief Checks if NUMA-aware scheduling is enabled. * * @return True if NUMA-aware scheduling is enabled, false otherwise. */ - inline bool useNuma() const { return use_numa; } + inline bool UseNuma() const { return useNuma_; } /** * @brief Gets the name of the schedule. * * @return The name of the schedule based on the mode. */ - virtual std::string getScheduleName() const override { - switch (mode) { + virtual std::string GetScheduleName() const override { + switch (mode_) { case ETF: return "ETFGreedy"; diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp index c67389aa..25c064c1 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp @@ -36,119 +36,121 @@ namespace osp { * @brief The GreedyBspScheduler class represents a scheduler that uses a greedy algorithm to compute schedules for * BspInstance. * - * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods. - * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. - * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. + * This class inherits from the Scheduler class and implements the ComputeSchedule() and GetScheduleName() methods. + * The ComputeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. + * The GetScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. */ -template -class GreedyBspScheduler : public Scheduler { - static_assert(is_computational_dag_v, "GreedyBspScheduler can only be used with computational DAGs."); +template +class GreedyBspScheduler : public Scheduler { + static_assert(isComputationalDagV, "GreedyBspScheduler can only be used with computational DAGs."); private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - constexpr static bool use_memory_constraint = is_memory_constraint_v - or is_memory_constraint_schedule_v; + constexpr static bool useMemoryConstraint_ = isMemoryConstraintV + or isMemoryConstraintScheduleV; - static_assert(not use_memory_constraint or std::is_same_v, - "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); + static_assert(not useMemoryConstraint_ or std::is_same_v, + "GraphT must be the same as MemoryConstraintT::GraphImplT."); - MemoryConstraint_t memory_constraint; + MemoryConstraintT memoryConstraint_; - struct heap_node { - VertexType node; + struct HeapNode { + VertexType node_; - double score; + double score_; - heap_node() : node(0), score(0) {} + HeapNode() : node_(0), score_(0) {} - heap_node(VertexType node_arg, double score_arg) : node(node_arg), score(score_arg) {} + HeapNode(VertexType nodeArg, double scoreArg) : node_(nodeArg), score_(scoreArg) {} - bool operator<(heap_node const &rhs) const { return (score < rhs.score) || (score <= rhs.score and node < rhs.node); } + bool operator<(HeapNode const &rhs) const { + return (score_ < rhs.score_) || (score_ <= rhs.score_ and node_ < rhs.node_); + } }; - std::vector> max_proc_score_heap; - std::vector> max_all_proc_score_heap; + std::vector> maxProcScoreHeap_; + std::vector> maxAllProcScoreHeap_; - using heap_handle = typename boost::heap::fibonacci_heap::handle_type; + using HeapHandle = typename boost::heap::fibonacci_heap::handle_type; - std::vector> node_proc_heap_handles; - std::vector> node_all_proc_heap_handles; + std::vector> nodeProcHeapHandles_; + std::vector> nodeAllProcHeapHandles_; - float max_percent_idle_processors; - bool increase_parallelism_in_new_superstep; + float maxPercentIdleProcessors_; + bool increaseParallelismInNewSuperstep_; - double computeScore(VertexType node, + double ComputeScore(VertexType node, unsigned proc, const std::vector> &procInHyperedge, - const BspInstance &instance) const { + const BspInstance &instance) const { double score = 0; - for (const auto &pred : instance.getComputationalDag().parents(node)) { + for (const auto &pred : instance.GetComputationalDag().Parents(node)) { if (procInHyperedge[pred][proc]) { - score += static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) - / static_cast(instance.getComputationalDag().out_degree(pred)); + score += static_cast(instance.GetComputationalDag().VertexCommWeight(pred)) + / static_cast(instance.GetComputationalDag().OutDegree(pred)); } } return score; } - void Choose(const BspInstance &instance, + void Choose(const BspInstance &instance, const std::vector> &procReady, const std::vector &procFree, VertexType &node, unsigned &p) const { - double max_score = -1.0; + double maxScore = -1.0; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { if (procFree[proc] && !procReady[proc].empty()) { // select node - heap_node top_node = max_proc_score_heap[proc].top(); + HeapNode topNode = maxProcScoreHeap_[proc].top(); - if (top_node.score > max_score) { - max_score = top_node.score; - node = top_node.node; + if (topNode.score_ > maxScore) { + maxScore = topNode.score_; + node = topNode.node_; p = proc; return; } } } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (!procFree[proc] or max_all_proc_score_heap[proc].empty()) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + if (!procFree[proc] or maxAllProcScoreHeap_[proc].empty()) { continue; } - heap_node top_node = max_all_proc_score_heap[proc].top(); + HeapNode topNode = maxAllProcScoreHeap_[proc].top(); - if (top_node.score > max_score) { - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(top_node.node, proc)) { - max_score = top_node.score; - node = top_node.node; + if (topNode.score_ > maxScore) { + if constexpr (useMemoryConstraint_) { + if (memoryConstraint_.CanAdd(topNode.node_, proc)) { + maxScore = topNode.score_; + node = topNode.node_; p = proc; } } else { - max_score = top_node.score; - node = top_node.node; + maxScore = topNode.score_; + node = topNode.node_; p = proc; } } } }; - bool CanChooseNode(const BspInstance &instance, + bool CanChooseNode(const BspInstance &instance, const std::set &allReady, const std::vector> &procReady, const std::vector &procFree) const { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (procFree[i] && !procReady[i].empty()) { return true; } } if (!allReady.empty()) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (procFree[i]) { return true; } @@ -158,36 +160,36 @@ class GreedyBspScheduler : public Scheduler { return false; }; - bool check_mem_feasibility(const BspInstance &instance, - const std::set &allReady, - const std::vector> &procReady) const { - if constexpr (use_memory_constraint) { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - unsigned num_empty_proc = 0; + bool CheckMemFeasibility(const BspInstance &instance, + const std::set &allReady, + const std::vector> &procReady) const { + if constexpr (useMemoryConstraint_) { + if (instance.GetArchitecture().GetMemoryConstraintType() == MemoryConstraintType::PERSISTENT_AND_TRANSIENT) { + unsigned numEmptyProc = 0; - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (!procReady[i].empty()) { - const heap_node &top_node = max_proc_score_heap[i].top(); + const HeapNode &topNode = maxProcScoreHeap_[i].top(); // todo check if this is correct - if (memory_constraint.can_add(top_node.node, i)) { + if (memoryConstraint_.CanAdd(topNode.node_, i)) { return true; } } else { - ++num_empty_proc; + ++numEmptyProc; } } - if (num_empty_proc == instance.numberOfProcessors() && allReady.empty()) { + if (numEmptyProc == instance.NumberOfProcessors() && allReady.empty()) { return true; } if (!allReady.empty()) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - const heap_node &top_node = max_all_proc_score_heap[i].top(); + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + const HeapNode &topNode = maxAllProcScoreHeap_[i].top(); // todo check if this is correct - if (memory_constraint.can_add(top_node.node, i)) { + if (memoryConstraint_.CanAdd(topNode.node_, i)) { return true; } } @@ -199,34 +201,34 @@ class GreedyBspScheduler : public Scheduler { return true; } - unsigned get_nr_parallelizable_nodes(const BspInstance &instance, - const std::vector &nr_ready_nodes_per_type, - const std::vector &nr_procs_per_type) const { - unsigned nr_nodes = 0; - - std::vector ready_nodes_per_type = nr_ready_nodes_per_type; - std::vector procs_per_type = nr_procs_per_type; - for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) { - for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { - if (instance.isCompatibleType(node_type, proc_type)) { - unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); - nr_nodes += matched; - ready_nodes_per_type[node_type] -= matched; - procs_per_type[proc_type] -= matched; + unsigned GetNrParallelizableNodes(const BspInstance &instance, + const std::vector &nrReadyNodesPerType, + const std::vector &nrProcsPerType) const { + unsigned nrNodes = 0; + + std::vector readyNodesPerType = nrReadyNodesPerType; + std::vector procsPerType = nrProcsPerType; + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); ++procType) { + for (unsigned nodeType = 0; nodeType < instance.GetComputationalDag().NumVertexTypes(); ++nodeType) { + if (instance.IsCompatibleType(nodeType, procType)) { + unsigned matched = std::min(readyNodesPerType[nodeType], procsPerType[procType]); + nrNodes += matched; + readyNodesPerType[nodeType] -= matched; + procsPerType[procType] -= matched; } } } - return nr_nodes; + return nrNodes; } public: /** * @brief Default constructor for GreedyBspScheduler. */ - GreedyBspScheduler(float max_percent_idle_processors_ = 0.2f, bool increase_parallelism_in_new_superstep_ = true) - : max_percent_idle_processors(max_percent_idle_processors_), - increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {} + GreedyBspScheduler(float maxPercentIdleProcessors = 0.2f, bool increaseParallelismInNewSuperstep = true) + : maxPercentIdleProcessors_(maxPercentIdleProcessors), + increaseParallelismInNewSuperstep_(increaseParallelismInNewSuperstep) {} /** * @brief Default destructor for GreedyBspScheduler. @@ -241,60 +243,60 @@ class GreedyBspScheduler : public Scheduler { * @param instance The BspInstance object representing the instance to compute the schedule for. * @return A pair containing the return status and the computed BspSchedule. */ - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); - for (const auto &v : instance.getComputationalDag().vertices()) { - schedule.setAssignedProcessor(v, std::numeric_limits::max()); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + schedule.SetAssignedProcessor(v, std::numeric_limits::max()); } unsigned supstepIdx = 0; - if constexpr (is_memory_constraint_v) { - memory_constraint.initialize(instance); - } else if constexpr (is_memory_constraint_schedule_v) { - memory_constraint.initialize(schedule, supstepIdx); + if constexpr (isMemoryConstraintV) { + memoryConstraint_.Initialize(instance); + } else if constexpr (isMemoryConstraintScheduleV) { + memoryConstraint_.Initialize(schedule, supstepIdx); } - const std::size_t &N = instance.numberOfVertices(); - const unsigned ¶ms_p = instance.numberOfProcessors(); - const auto &G = instance.getComputationalDag(); + const std::size_t &n = instance.NumberOfVertices(); + const unsigned ¶msP = instance.NumberOfProcessors(); + const auto &g = instance.GetComputationalDag(); - max_proc_score_heap = std::vector>(params_p); - max_all_proc_score_heap = std::vector>(params_p); + maxProcScoreHeap_ = std::vector>(paramsP); + maxAllProcScoreHeap_ = std::vector>(paramsP); - node_proc_heap_handles = std::vector>(params_p); - node_all_proc_heap_handles = std::vector>(params_p); + nodeProcHeapHandles_ = std::vector>(paramsP); + nodeAllProcHeapHandles_ = std::vector>(paramsP); std::set ready; - std::vector> procInHyperedge = std::vector>(N, std::vector(params_p, false)); + std::vector> procInHyperedge = std::vector>(n, std::vector(paramsP, false)); - std::vector> procReady(params_p); + std::vector> procReady(paramsP); std::set allReady; - std::vector nrPredecDone(N, 0); - std::vector procFree(params_p, true); - unsigned free = params_p; + std::vector nrPredecDone(n, 0); + std::vector procFree(paramsP, true); + unsigned free = paramsP; - std::vector nr_ready_nodes_per_type(G.num_vertex_types(), 0); - std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < params_p; ++proc) { - ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; + std::vector nrReadyNodesPerType(g.NumVertexTypes(), 0); + std::vector nrProcsPerType(instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); + for (unsigned proc = 0; proc < paramsP; ++proc) { + ++nrProcsPerType[instance.GetArchitecture().ProcessorType(proc)]; } - std::set, VertexType>> finishTimes; + std::set, VertexType>> finishTimes; finishTimes.emplace(0, std::numeric_limits::max()); - for (const auto &v : source_vertices_view(G)) { + for (const auto &v : SourceVerticesView(g)) { ready.insert(v); allReady.insert(v); - ++nr_ready_nodes_per_type[G.vertex_type(v)]; + ++nrReadyNodesPerType[g.VertexType(v)]; - for (unsigned proc = 0; proc < params_p; ++proc) { - if (instance.isCompatible(v, proc)) { - heap_node new_node(v, 0.0); - node_all_proc_heap_handles[proc][v] = max_all_proc_score_heap[proc].push(new_node); + for (unsigned proc = 0; proc < paramsP; ++proc) { + if (instance.IsCompatible(v, proc)) { + HeapNode newNode(v, 0.0); + nodeAllProcHeapHandles_[proc][v] = maxAllProcScoreHeap_[proc].push(newNode); } } } @@ -302,32 +304,32 @@ class GreedyBspScheduler : public Scheduler { bool endSupStep = false; while (!ready.empty() || !finishTimes.empty()) { if (finishTimes.empty() && endSupStep) { - for (unsigned proc = 0; proc < params_p; ++proc) { + for (unsigned proc = 0; proc < paramsP; ++proc) { procReady[proc].clear(); - max_proc_score_heap[proc].clear(); - node_proc_heap_handles[proc].clear(); + maxProcScoreHeap_[proc].clear(); + nodeProcHeapHandles_[proc].clear(); - if constexpr (use_memory_constraint) { - memory_constraint.reset(proc); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Reset(proc); } } allReady = ready; - for (unsigned proc = 0; proc < params_p; ++proc) { - max_all_proc_score_heap[proc].clear(); - node_all_proc_heap_handles[proc].clear(); + for (unsigned proc = 0; proc < paramsP; ++proc) { + maxAllProcScoreHeap_[proc].clear(); + nodeAllProcHeapHandles_[proc].clear(); } for (const auto &v : ready) { - for (unsigned proc = 0; proc < params_p; ++proc) { - if (!instance.isCompatible(v, proc)) { + for (unsigned proc = 0; proc < paramsP; ++proc) { + if (!instance.IsCompatible(v, proc)) { continue; } - double score = computeScore(v, proc, procInHyperedge, instance); - heap_node new_node(v, score); - node_all_proc_heap_handles[proc][v] = max_all_proc_score_heap[proc].push(new_node); + double score = ComputeScore(v, proc, procInHyperedge, instance); + HeapNode newNode(v, score); + nodeAllProcHeapHandles_[proc][v] = maxAllProcScoreHeap_[proc].push(newNode); } } @@ -338,7 +340,7 @@ class GreedyBspScheduler : public Scheduler { finishTimes.emplace(0, std::numeric_limits::max()); } - const v_workw_t time = finishTimes.begin()->first; + const VWorkwT time = finishTimes.begin()->first; // Find new ready jobs while (!finishTimes.empty() && finishTimes.begin()->first == time) { @@ -346,45 +348,45 @@ class GreedyBspScheduler : public Scheduler { finishTimes.erase(finishTimes.begin()); if (node != std::numeric_limits::max()) { - for (const auto &succ : G.children(node)) { + for (const auto &succ : g.Children(node)) { ++nrPredecDone[succ]; - if (nrPredecDone[succ] == G.in_degree(succ)) { + if (nrPredecDone[succ] == g.InDegree(succ)) { ready.insert(succ); - ++nr_ready_nodes_per_type[G.vertex_type(succ)]; + ++nrReadyNodesPerType[g.VertexType(succ)]; bool canAdd = true; - for (const auto &pred : G.parents(succ)) { - if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) - && schedule.assignedSuperstep(pred) == supstepIdx) { + for (const auto &pred : g.Parents(succ)) { + if (schedule.AssignedProcessor(pred) != schedule.AssignedProcessor(node) + && schedule.AssignedSuperstep(pred) == supstepIdx) { canAdd = false; break; } } - if constexpr (use_memory_constraint) { + if constexpr (useMemoryConstraint_) { if (canAdd) { - if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) { + if (not memoryConstraint_.CanAdd(succ, schedule.AssignedProcessor(node))) { canAdd = false; } } } - if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) { + if (!instance.IsCompatible(succ, schedule.AssignedProcessor(node))) { canAdd = false; } if (canAdd) { - procReady[schedule.assignedProcessor(node)].insert(succ); + procReady[schedule.AssignedProcessor(node)].insert(succ); - double score = computeScore(succ, schedule.assignedProcessor(node), procInHyperedge, instance); + double score = ComputeScore(succ, schedule.AssignedProcessor(node), procInHyperedge, instance); - heap_node new_node(succ, score); - node_proc_heap_handles[schedule.assignedProcessor(node)][succ] - = max_proc_score_heap[schedule.assignedProcessor(node)].push(new_node); + HeapNode newNode(succ, score); + nodeProcHeapHandles_[schedule.AssignedProcessor(node)][succ] + = maxProcScoreHeap_[schedule.AssignedProcessor(node)].push(newNode); } } } - procFree[schedule.assignedProcessor(node)] = true; + procFree[schedule.AssignedProcessor(node)] = true; ++free; } } @@ -400,10 +402,10 @@ class GreedyBspScheduler : public Scheduler { while (CanChooseNode(instance, allReady, procReady, procFree)) { VertexType nextNode = std::numeric_limits::max(); - unsigned nextProc = instance.numberOfProcessors(); + unsigned nextProc = instance.NumberOfProcessors(); Choose(instance, procReady, procFree, nextNode, nextProc); - if (nextNode == std::numeric_limits::max() || nextProc == instance.numberOfProcessors()) { + if (nextNode == std::numeric_limits::max() || nextProc == instance.NumberOfProcessors()) { endSupStep = true; break; } @@ -411,92 +413,92 @@ class GreedyBspScheduler : public Scheduler { if (procReady[nextProc].find(nextNode) != procReady[nextProc].end()) { procReady[nextProc].erase(nextNode); - max_proc_score_heap[nextProc].erase(node_proc_heap_handles[nextProc][nextNode]); - node_proc_heap_handles[nextProc].erase(nextNode); + maxProcScoreHeap_[nextProc].erase(nodeProcHeapHandles_[nextProc][nextNode]); + nodeProcHeapHandles_[nextProc].erase(nextNode); } else { allReady.erase(nextNode); - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (instance.isCompatible(nextNode, proc)) { - max_all_proc_score_heap[proc].erase(node_all_proc_heap_handles[proc][nextNode]); - node_all_proc_heap_handles[proc].erase(nextNode); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + if (instance.IsCompatible(nextNode, proc)) { + maxAllProcScoreHeap_[proc].erase(nodeAllProcHeapHandles_[proc][nextNode]); + nodeAllProcHeapHandles_[proc].erase(nextNode); } } } ready.erase(nextNode); - --nr_ready_nodes_per_type[G.vertex_type(nextNode)]; - schedule.setAssignedProcessor(nextNode, nextProc); - schedule.setAssignedSuperstep(nextNode, supstepIdx); + --nrReadyNodesPerType[g.VertexType(nextNode)]; + schedule.SetAssignedProcessor(nextNode, nextProc); + schedule.SetAssignedSuperstep(nextNode, supstepIdx); - if constexpr (use_memory_constraint) { - memory_constraint.add(nextNode, nextProc); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Add(nextNode, nextProc); std::vector toErase; for (const auto &node : procReady[nextProc]) { - if (not memory_constraint.can_add(node, nextProc)) { + if (not memoryConstraint_.CanAdd(node, nextProc)) { toErase.push_back(node); } } for (const auto &node : toErase) { procReady[nextProc].erase(node); - max_proc_score_heap[nextProc].erase(node_proc_heap_handles[nextProc][node]); - node_proc_heap_handles[nextProc].erase(node); + maxProcScoreHeap_[nextProc].erase(nodeProcHeapHandles_[nextProc][node]); + nodeProcHeapHandles_[nextProc].erase(node); } } - finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode); + finishTimes.emplace(time + g.VertexWorkWeight(nextNode), nextNode); procFree[nextProc] = false; --free; // update comm auxiliary structure procInHyperedge[nextNode][nextProc] = true; - for (const auto &pred : G.parents(nextNode)) { + for (const auto &pred : g.Parents(nextNode)) { if (procInHyperedge[pred][nextProc]) { continue; } procInHyperedge[pred][nextProc] = true; - for (const auto &child : G.children(pred)) { + for (const auto &child : g.Children(pred)) { if (child != nextNode && procReady[nextProc].find(child) != procReady[nextProc].end()) { - (*node_proc_heap_handles[nextProc][child]).score - += static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) - / static_cast(instance.getComputationalDag().out_degree(pred)); - max_proc_score_heap[nextProc].update(node_proc_heap_handles[nextProc][child]); + (*nodeProcHeapHandles_[nextProc][child]).score_ + += static_cast(instance.GetComputationalDag().VertexCommWeight(pred)) + / static_cast(instance.GetComputationalDag().OutDegree(pred)); + maxProcScoreHeap_[nextProc].update(nodeProcHeapHandles_[nextProc][child]); } - if (child != nextNode && allReady.find(child) != allReady.end() && instance.isCompatible(child, nextProc)) { - (*node_all_proc_heap_handles[nextProc][child]).score - += static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) - / static_cast(instance.getComputationalDag().out_degree(pred)); - max_all_proc_score_heap[nextProc].update(node_all_proc_heap_handles[nextProc][child]); + if (child != nextNode && allReady.find(child) != allReady.end() && instance.IsCompatible(child, nextProc)) { + (*nodeAllProcHeapHandles_[nextProc][child]).score_ + += static_cast(instance.GetComputationalDag().VertexCommWeight(pred)) + / static_cast(instance.GetComputationalDag().OutDegree(pred)); + maxAllProcScoreHeap_[nextProc].update(nodeAllProcHeapHandles_[nextProc][child]); } } } } - if constexpr (use_memory_constraint) { - if (not check_mem_feasibility(instance, allReady, procReady)) { - return RETURN_STATUS::ERROR; + if constexpr (useMemoryConstraint_) { + if (not CheckMemFeasibility(instance, allReady, procReady)) { + return ReturnStatus::ERROR; } } - if (free > static_cast(static_cast(params_p) * max_percent_idle_processors) - && ((!increase_parallelism_in_new_superstep) - || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) - >= std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), - params_p - free + (static_cast(0.5 * free))))) { + if (free > static_cast(static_cast(paramsP) * maxPercentIdleProcessors_) + && ((!increaseParallelismInNewSuperstep_) + || GetNrParallelizableNodes(instance, nrReadyNodesPerType, nrProcsPerType) + >= std::min(std::min(paramsP, static_cast(1.2 * (paramsP - free))), + paramsP - free + (static_cast(0.5 * free))))) { endSupStep = true; } } - assert(schedule.satisfiesPrecedenceConstraints()); + assert(schedule.SatisfiesPrecedenceConstraints()); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; }; /** @@ -506,7 +508,7 @@ class GreedyBspScheduler : public Scheduler { * * @return The name of the schedule. */ - std::string getScheduleName() const override { return "BspGreedy"; } + std::string GetScheduleName() const override { return "BspGreedy"; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp index 57b043dc..edb4ce84 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp @@ -28,112 +28,112 @@ limitations under the License. namespace osp { -template -class GreedyChildren : public Scheduler { +template +class GreedyChildren : public Scheduler { private: - bool ensure_enough_sources; + bool ensureEnoughSources_; public: - GreedyChildren(bool ensure_enough_sources_ = true) : Scheduler(), ensure_enough_sources(ensure_enough_sources_) {}; + GreedyChildren(bool ensureEnoughSources = true) : Scheduler(), ensureEnoughSources_(ensureEnoughSources) {}; - RETURN_STATUS computeSchedule(BspSchedule &sched) override { - using VertexType = vertex_idx_t; - const auto &instance = sched.getInstance(); + ReturnStatus ComputeSchedule(BspSchedule &sched) override { + using VertexType = VertexIdxT; + const auto &instance = sched.GetInstance(); - for (const auto &v : instance.getComputationalDag().vertices()) { - sched.setAssignedProcessor(v, std::numeric_limits::max()); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + sched.SetAssignedProcessor(v, std::numeric_limits::max()); } - const auto &graph = instance.getComputationalDag(); + const auto &graph = instance.GetComputationalDag(); - unsigned superstep_counter = 0; + unsigned superstepCounter = 0; - std::vector predecessors_count(instance.numberOfVertices(), 0); + std::vector predecessorsCount(instance.NumberOfVertices(), 0); std::multiset, std::greater<>> next; - for (const VertexType &i : source_vertices_view(graph)) { - next.emplace(graph.out_degree(i), i); + for (const VertexType &i : SourceVerticesView(graph)) { + next.emplace(graph.OutDegree(i), i); } while (!next.empty()) { - std::unordered_set nodes_assigned_this_superstep; - std::vector> processor_weights(instance.numberOfProcessors(), 0); + std::unordered_set nodesAssignedThisSuperstep; + std::vector> processorWeights(instance.NumberOfProcessors(), 0); - bool few_sources = next.size() < instance.numberOfProcessors() ? true : false; - bool node_added = true; - while (!next.empty() && node_added) { - node_added = false; + bool fewSources = next.size() < instance.NumberOfProcessors() ? true : false; + bool nodeAdded = true; + while (!next.empty() && nodeAdded) { + nodeAdded = false; for (auto iter = next.begin(); iter != next.cend(); iter++) { const auto &node = iter->second; - bool processor_set = false; - bool failed_to_allocate = false; - unsigned processor_to_be_allocated = 0; - - for (const auto &par : graph.parents(node)) { - if (nodes_assigned_this_superstep.count(par)) { - if (!processor_set) { - const unsigned par_proc = sched.assignedProcessor(par); - if (!instance.isCompatible(node, par_proc)) { - failed_to_allocate = true; + bool processorSet = false; + bool failedToAllocate = false; + unsigned processorToBeAllocated = 0; + + for (const auto &par : graph.Parents(node)) { + if (nodesAssignedThisSuperstep.count(par)) { + if (!processorSet) { + const unsigned parProc = sched.AssignedProcessor(par); + if (!instance.IsCompatible(node, parProc)) { + failedToAllocate = true; break; } - processor_set = true; - processor_to_be_allocated = par_proc; - } else if (sched.assignedProcessor(par) != processor_to_be_allocated) { - failed_to_allocate = true; + processorSet = true; + processorToBeAllocated = parProc; + } else if (sched.AssignedProcessor(par) != processorToBeAllocated) { + failedToAllocate = true; break; } } } - if (failed_to_allocate) { + if (failedToAllocate) { continue; } - sched.setAssignedSuperstep(node, superstep_counter); - if (processor_set) { - sched.setAssignedProcessor(node, processor_to_be_allocated); + sched.SetAssignedSuperstep(node, superstepCounter); + if (processorSet) { + sched.SetAssignedProcessor(node, processorToBeAllocated); } else { - v_workw_t min_weight = std::numeric_limits>::max(); - unsigned best_proc = std::numeric_limits::max(); - for (unsigned p = 0; p < instance.numberOfProcessors(); ++p) { - if (instance.isCompatible(node, p)) { - if (processor_weights[p] < min_weight) { - min_weight = processor_weights[p]; - best_proc = p; + VWorkwT minWeight = std::numeric_limits>::max(); + unsigned bestProc = std::numeric_limits::max(); + for (unsigned p = 0; p < instance.NumberOfProcessors(); ++p) { + if (instance.IsCompatible(node, p)) { + if (processorWeights[p] < minWeight) { + minWeight = processorWeights[p]; + bestProc = p; } } } - sched.setAssignedProcessor(node, best_proc); + sched.SetAssignedProcessor(node, bestProc); } - nodes_assigned_this_superstep.emplace(node); - processor_weights[sched.assignedProcessor(node)] += graph.vertex_work_weight(node); - std::vector new_nodes; - for (const auto &chld : graph.children(node)) { - predecessors_count[chld]++; - if (predecessors_count[chld] == graph.in_degree(chld)) { - new_nodes.emplace_back(chld); + nodesAssignedThisSuperstep.emplace(node); + processorWeights[sched.AssignedProcessor(node)] += graph.VertexWorkWeight(node); + std::vector newNodes; + for (const auto &chld : graph.Children(node)) { + predecessorsCount[chld]++; + if (predecessorsCount[chld] == graph.InDegree(chld)) { + newNodes.emplace_back(chld); } } next.erase(iter); - for (const auto &vrt : new_nodes) { - next.emplace(graph.out_degree(vrt), vrt); + for (const auto &vrt : newNodes) { + next.emplace(graph.OutDegree(vrt), vrt); } - node_added = true; + nodeAdded = true; break; } - if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors()) { + if (ensureEnoughSources_ && fewSources && next.size() >= instance.NumberOfProcessors()) { break; } } - superstep_counter++; + superstepCounter++; } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return ensure_enough_sources ? "GreedyChildrenS" : "GreedyChildren"; } + std::string GetScheduleName() const override { return ensureEnoughSources_ ? "GreedyChildrenS" : "GreedyChildren"; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp index 9aff8fb3..51765e15 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp @@ -32,71 +32,71 @@ namespace osp { * @brief The GreedyMetaScheduler class represents a meta-scheduler that selects the best schedule produced from a list of * added schedulers. * - * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods. - * The computeSchedule() method iterates through a list of schedulers, computes a schedule using each one, + * This class inherits from the Scheduler class and implements the ComputeSchedule() and GetScheduleName() methods. + * The ComputeSchedule() method iterates through a list of schedulers, computes a schedule using each one, * and returns the schedule with the minimum cost. * - * @tparam Graph_t The graph type representing the computational DAG. + * @tparam GraphT The graph type representing the computational DAG. * @tparam CostModel The cost model functor to evaluate schedules. Defaults to LazyCommunicationCost. */ -template > -class GreedyMetaScheduler : public Scheduler { - Serial serial_scheduler_; - std::vector *> schedulers_; +template > +class GreedyMetaScheduler : public Scheduler { + Serial serialScheduler_; + std::vector *> schedulers_; - static constexpr bool verbose = false; + static constexpr bool verbose_ = false; public: /** * @brief Default constructor for GreedyMetaScheduler. */ - GreedyMetaScheduler() : Scheduler() {} + GreedyMetaScheduler() : Scheduler() {} /** * @brief Default destructor for MetaScheduler. */ ~GreedyMetaScheduler() override = default; - void addSerialScheduler() { schedulers_.push_back(&serial_scheduler_); } + void AddSerialScheduler() { schedulers_.push_back(&serialScheduler_); } - void addScheduler(Scheduler &s) { schedulers_.push_back(&s); } + void AddScheduler(Scheduler &s) { schedulers_.push_back(&s); } - void resetScheduler() { schedulers_.clear(); } + void ResetScheduler() { schedulers_.clear(); } - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - if (schedule.getInstance().getArchitecture().numberOfProcessors() == 1) { - if constexpr (verbose) { + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + if (schedule.GetInstance().GetArchitecture().NumberOfProcessors() == 1) { + if constexpr (verbose_) { std::cout << "Using serial scheduler for P=1." << std::endl; } - serial_scheduler_.computeSchedule(schedule); - return RETURN_STATUS::OSP_SUCCESS; + serialScheduler_.ComputeSchedule(schedule); + return ReturnStatus::OSP_SUCCESS; } - v_workw_t best_schedule_cost = std::numeric_limits>::max(); - BspSchedule current_schedule(schedule.getInstance()); + VWorkwT bestScheduleCost = std::numeric_limits>::max(); + BspSchedule currentSchedule(schedule.GetInstance()); - for (Scheduler *scheduler : schedulers_) { - scheduler->computeSchedule(current_schedule); - const v_workw_t schedule_cost = CostModel()(current_schedule); + for (Scheduler *scheduler : schedulers_) { + scheduler->ComputeSchedule(currentSchedule); + const VWorkwT scheduleCost = CostModel()(currentSchedule); - if constexpr (verbose) { - std::cout << "Executed scheduler " << scheduler->getScheduleName() << ", costs: " << schedule_cost - << ", nr. supersteps: " << current_schedule.numberOfSupersteps() << std::endl; + if constexpr (verbose_) { + std::cout << "Executed scheduler " << scheduler->GetScheduleName() << ", costs: " << scheduleCost + << ", nr. supersteps: " << currentSchedule.NumberOfSupersteps() << std::endl; } - if (schedule_cost < best_schedule_cost) { - best_schedule_cost = schedule_cost; - schedule = current_schedule; - if constexpr (verbose) { + if (scheduleCost < bestScheduleCost) { + bestScheduleCost = scheduleCost; + schedule = currentSchedule; + if constexpr (verbose_) { std::cout << "New best schedule!" << std::endl; } } } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return "GreedyMetaScheduler"; } + std::string GetScheduleName() const override { return "GreedyMetaScheduler"; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp index 5f943110..20e3fdc3 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp @@ -26,16 +26,16 @@ namespace osp { * @brief The GreedyReccomputer class applies a greedy algorithm to remove some of the communciation steps in * a BspSchedule by recomputation steps if this decreases the cost. */ -template +template class GreedyRecomputer { - static_assert(is_computational_dag_v, "GreedyRecomputer can only be used with computational DAGs."); + static_assert(isComputationalDagV, "GreedyRecomputer can only be used with computational DAGs."); private: - using vertex_idx = vertex_idx_t; - using cost_type = v_workw_t; - using KeyTriple = std::tuple, unsigned int, unsigned int>; + using VertexIdx = VertexIdxT; + using CostType = VWorkwT; + using KeyTriple = std::tuple, unsigned int, unsigned int>; - static_assert(std::is_same_v, v_commw_t>, + static_assert(std::is_same_v, VCommwT>, "GreedyRecomputer requires work and comm. weights to have the same type."); public: @@ -44,179 +44,179 @@ class GreedyRecomputer { */ virtual ~GreedyRecomputer() = default; - RETURN_STATUS computeRecompSchedule(BspScheduleCS &initial_schedule, BspScheduleRecomp &out_schedule) const; + ReturnStatus ComputeRecompSchedule(BspScheduleCS &initialSchedule, BspScheduleRecomp &outSchedule) const; }; -template -RETURN_STATUS GreedyRecomputer::computeRecompSchedule(BspScheduleCS &initial_schedule, - BspScheduleRecomp &out_schedule) const { - const vertex_idx &N = initial_schedule.getInstance().numberOfVertices(); - const unsigned &P = initial_schedule.getInstance().numberOfProcessors(); - const unsigned &S = initial_schedule.numberOfSupersteps(); - const Graph_t &G = initial_schedule.getInstance().getComputationalDag(); +template +ReturnStatus GreedyRecomputer::ComputeRecompSchedule(BspScheduleCS &initialSchedule, + BspScheduleRecomp &outSchedule) const { + const VertexIdx &n = initialSchedule.GetInstance().NumberOfVertices(); + const unsigned &p = initialSchedule.GetInstance().NumberOfProcessors(); + const unsigned &s = initialSchedule.NumberOfSupersteps(); + const GraphT &g = initialSchedule.GetInstance().GetComputationalDag(); - out_schedule = BspScheduleRecomp(initial_schedule.getInstance()); - out_schedule.setNumberOfSupersteps(initial_schedule.numberOfSupersteps()); + outSchedule = BspScheduleRecomp(initialSchedule.GetInstance()); + outSchedule.SetNumberOfSupersteps(initialSchedule.NumberOfSupersteps()); // Initialize required data structures - std::vector> work_cost(P, std::vector(S, 0)), send_cost(P, std::vector(S, 0)), - rec_cost(P, std::vector(S, 0)); + std::vector> workCost(p, std::vector(s, 0)), sendCost(p, std::vector(s, 0)), + recCost(p, std::vector(s, 0)); - std::vector> first_computable(N, std::vector(P, 0U)), - first_present(N, std::vector(P, std::numeric_limits::max())); + std::vector> firstComputable(n, std::vector(p, 0U)), + firstPresent(n, std::vector(p, std::numeric_limits::max())); - std::vector>> needed_on_proc(N, std::vector>(P, {S})); + std::vector>> neededOnProc(n, std::vector>(p, {s})); - std::vector max_work(S, 0), max_comm(S, 0); + std::vector maxWork(s, 0), maxComm(s, 0); - std::vector> comm_steps(S); + std::vector> commSteps(s); - for (vertex_idx node = 0; node < N; ++node) { - const unsigned &proc = initial_schedule.assignedProcessor(node); - const unsigned &step = initial_schedule.assignedSuperstep(node); + for (VertexIdx node = 0; node < n; ++node) { + const unsigned &proc = initialSchedule.AssignedProcessor(node); + const unsigned &step = initialSchedule.AssignedSuperstep(node); - work_cost[proc][step] += G.vertex_work_weight(node); - first_present[node][proc] = std::min(first_present[node][proc], step); - for (vertex_idx pred : G.parents(node)) { - needed_on_proc[pred][proc].insert(step); + workCost[proc][step] += g.VertexWorkWeight(node); + firstPresent[node][proc] = std::min(firstPresent[node][proc], step); + for (VertexIdx pred : g.Parents(node)) { + neededOnProc[pred][proc].insert(step); } - out_schedule.assignments(node).emplace_back(proc, step); + outSchedule.Assignments(node).emplace_back(proc, step); } - for (const std::pair item : initial_schedule.getCommunicationSchedule()) { - const vertex_idx &node = std::get<0>(item.first); - const unsigned &from_proc = std::get<1>(item.first); - const unsigned &to_proc = std::get<2>(item.first); + for (const std::pair item : initialSchedule.GetCommunicationSchedule()) { + const VertexIdx &node = std::get<0>(item.first); + const unsigned &fromProc = std::get<1>(item.first); + const unsigned &toProc = std::get<2>(item.first); const unsigned &step = item.second; - send_cost[from_proc][step] += G.vertex_comm_weight(node) - * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); - rec_cost[to_proc][step] += G.vertex_comm_weight(node) - * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); - - comm_steps[step].emplace(item.first); - needed_on_proc[node][from_proc].insert(step); - first_present[node][to_proc] = std::min(first_present[node][to_proc], step + 1); + sendCost[fromProc][step] + += g.VertexCommWeight(node) * initialSchedule.GetInstance().GetArchitecture().CommunicationCosts(fromProc, toProc); + recCost[toProc][step] + += g.VertexCommWeight(node) * initialSchedule.GetInstance().GetArchitecture().CommunicationCosts(fromProc, toProc); + + commSteps[step].emplace(item.first); + neededOnProc[node][fromProc].insert(step); + firstPresent[node][toProc] = std::min(firstPresent[node][toProc], step + 1); } - for (unsigned step = 0; step < S; ++step) { - for (unsigned proc = 0; proc < P; ++proc) { - max_work[step] = std::max(max_work[step], work_cost[proc][step]); - max_comm[step] = std::max(max_comm[step], send_cost[proc][step]); - max_comm[step] = std::max(max_comm[step], rec_cost[proc][step]); + for (unsigned step = 0; step < s; ++step) { + for (unsigned proc = 0; proc < p; ++proc) { + maxWork[step] = std::max(maxWork[step], workCost[proc][step]); + maxComm[step] = std::max(maxComm[step], sendCost[proc][step]); + maxComm[step] = std::max(maxComm[step], recCost[proc][step]); } } - for (vertex_idx node = 0; node < N; ++node) { - for (const vertex_idx &pred : G.parents(node)) { - for (unsigned proc = 0; proc < P; ++proc) { - first_computable[node][proc] = std::max(first_computable[node][proc], first_present[pred][proc]); + for (VertexIdx node = 0; node < n; ++node) { + for (const VertexIdx &pred : g.Parents(node)) { + for (unsigned proc = 0; proc < p; ++proc) { + firstComputable[node][proc] = std::max(firstComputable[node][proc], firstPresent[pred][proc]); } } } // Find improvement steps - bool still_improved = true; - while (still_improved) { - still_improved = false; + bool stillImproved = true; + while (stillImproved) { + stillImproved = false; - for (unsigned step = 0; step < S; ++step) { - std::vector to_erase; - for (const KeyTriple &entry : comm_steps[step]) { - const vertex_idx &node = std::get<0>(entry); - const unsigned &from_proc = std::get<1>(entry); - const unsigned &to_proc = std::get<2>(entry); + for (unsigned step = 0; step < s; ++step) { + std::vector toErase; + for (const KeyTriple &entry : commSteps[step]) { + const VertexIdx &node = std::get<0>(entry); + const unsigned &fromProc = std::get<1>(entry); + const unsigned &toProc = std::get<2>(entry); // check how much comm cost we save by removing comm schedule entry - cost_type comm_induced = G.vertex_comm_weight(node) - * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); + CostType commInduced = g.VertexCommWeight(node) + * initialSchedule.GetInstance().GetArchitecture().CommunicationCosts(fromProc, toProc); - cost_type new_max_comm = 0; - for (unsigned proc = 0; proc < P; ++proc) { - if (proc == from_proc) { - new_max_comm = std::max(new_max_comm, send_cost[proc][step] - comm_induced); + CostType newMaxComm = 0; + for (unsigned proc = 0; proc < p; ++proc) { + if (proc == fromProc) { + newMaxComm = std::max(newMaxComm, sendCost[proc][step] - commInduced); } else { - new_max_comm = std::max(new_max_comm, send_cost[proc][step]); + newMaxComm = std::max(newMaxComm, sendCost[proc][step]); } - if (proc == to_proc) { - new_max_comm = std::max(new_max_comm, rec_cost[proc][step] - comm_induced); + if (proc == toProc) { + newMaxComm = std::max(newMaxComm, recCost[proc][step] - commInduced); } else { - new_max_comm = std::max(new_max_comm, rec_cost[proc][step]); + newMaxComm = std::max(newMaxComm, recCost[proc][step]); } } - if (new_max_comm == max_comm[step]) { + if (newMaxComm == maxComm[step]) { continue; } - if (!initial_schedule.getInstance().isCompatible(node, to_proc)) { + if (!initialSchedule.GetInstance().IsCompatible(node, toProc)) { continue; } - cost_type decrease = max_comm[step] - new_max_comm; - if (max_comm[step] > 0 && new_max_comm == 0) { - decrease += initial_schedule.getInstance().getArchitecture().synchronisationCosts(); + CostType decrease = maxComm[step] - newMaxComm; + if (maxComm[step] > 0 && newMaxComm == 0) { + decrease += initialSchedule.GetInstance().GetArchitecture().SynchronisationCosts(); } // check how much it would increase the work cost instead - unsigned best_step = S; - cost_type smallest_increase = std::numeric_limits::max(); - for (unsigned comp_step = first_computable[node][to_proc]; comp_step <= *needed_on_proc[node][to_proc].begin(); - ++comp_step) { - cost_type increase = work_cost[to_proc][comp_step] + G.vertex_work_weight(node) > max_work[comp_step] - ? work_cost[to_proc][comp_step] + G.vertex_work_weight(node) - max_work[comp_step] - : 0; - - if (increase < smallest_increase) { - best_step = comp_step; - smallest_increase = increase; + unsigned bestStep = s; + CostType smallestIncrease = std::numeric_limits::max(); + for (unsigned compStep = firstComputable[node][toProc]; compStep <= *neededOnProc[node][toProc].begin(); + ++compStep) { + CostType increase = workCost[toProc][compStep] + g.VertexWorkWeight(node) > maxWork[compStep] + ? workCost[toProc][compStep] + g.VertexWorkWeight(node) - maxWork[compStep] + : 0; + + if (increase < smallestIncrease) { + bestStep = compStep; + smallestIncrease = increase; } } // check if this modification is beneficial - if (best_step == S || smallest_increase > decrease) { + if (bestStep == s || smallestIncrease > decrease) { continue; } // execute the modification - to_erase.emplace_back(entry); - out_schedule.assignments(node).emplace_back(to_proc, best_step); + toErase.emplace_back(entry); + outSchedule.Assignments(node).emplace_back(toProc, bestStep); - send_cost[from_proc][step] -= comm_induced; - rec_cost[to_proc][step] -= comm_induced; - max_comm[step] = new_max_comm; + sendCost[fromProc][step] -= commInduced; + recCost[toProc][step] -= commInduced; + maxComm[step] = newMaxComm; - work_cost[to_proc][best_step] += G.vertex_work_weight(node); - max_work[best_step] += smallest_increase; + workCost[toProc][bestStep] += g.VertexWorkWeight(node); + maxWork[bestStep] += smallestIncrease; // update movability bounds - for (const vertex_idx &pred : G.parents(node)) { - needed_on_proc[pred][to_proc].insert(best_step); + for (const VertexIdx &pred : g.Parents(node)) { + neededOnProc[pred][toProc].insert(bestStep); } - needed_on_proc[node][from_proc].erase(needed_on_proc[node][from_proc].lower_bound(step)); + neededOnProc[node][fromProc].erase(neededOnProc[node][fromProc].lower_bound(step)); - first_present[node][to_proc] = best_step; - for (const vertex_idx &succ : G.children(node)) { - for (const vertex_idx &pred : G.parents(node)) { - first_computable[succ][to_proc] = std::max(first_computable[succ][to_proc], first_present[pred][to_proc]); + firstPresent[node][toProc] = bestStep; + for (const VertexIdx &succ : g.Children(node)) { + for (const VertexIdx &pred : g.Parents(node)) { + firstComputable[succ][toProc] = std::max(firstComputable[succ][toProc], firstPresent[pred][toProc]); } } - still_improved = true; + stillImproved = true; } - for (const KeyTriple &entry : to_erase) { - comm_steps[step].erase(entry); + for (const KeyTriple &entry : toErase) { + commSteps[step].erase(entry); } } } - for (unsigned step = 0; step < S; ++step) { - for (const KeyTriple &entry : comm_steps[step]) { - out_schedule.getCommunicationSchedule().emplace(entry, step); + for (unsigned step = 0; step < s; ++step) { + for (const KeyTriple &entry : commSteps[step]) { + outSchedule.GetCommunicationSchedule().emplace(entry, step); } } - out_schedule.mergeSupersteps(); + outSchedule.MergeSupersteps(); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp index 857e4e02..54292889 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp @@ -42,67 +42,67 @@ namespace osp { * * It computes schedules for BspInstance using variance-based priorities. */ -template -class GreedyVarianceSspScheduler : public MaxBspScheduler { - static_assert(is_computational_dag_v, "GreedyVarianceSspScheduler can only be used with computational DAGs."); +template +class GreedyVarianceSspScheduler : public MaxBspScheduler { + static_assert(isComputationalDagV, "GreedyVarianceSspScheduler can only be used with computational DAGs."); private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - constexpr static bool use_memory_constraint = is_memory_constraint_v - or is_memory_constraint_schedule_v; + constexpr static bool useMemoryConstraint_ = isMemoryConstraintV + or isMemoryConstraintScheduleV; - static_assert(not use_memory_constraint or std::is_same_v, - "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); + static_assert(not useMemoryConstraint_ or std::is_same_v, + "GraphT must be the same as MemoryConstraintT::GraphImplT."); - MemoryConstraint_t memory_constraint; - double max_percent_idle_processors; - bool increase_parallelism_in_new_superstep; + MemoryConstraintT memoryConstraint_; + double maxPercentIdleProcessors_; + bool increaseParallelismInNewSuperstep_; - std::vector compute_work_variance(const Graph_t &graph) const { - std::vector work_variance(graph.num_vertices(), 0.0); - const std::vector top_order = GetTopOrder(graph); + std::vector ComputeWorkVariance(const GraphT &graph) const { + std::vector workVariance(graph.NumVertices(), 0.0); + const std::vector topOrder = GetTopOrder(graph); - for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { + for (auto rIter = topOrder.rbegin(); rIter != topOrder.crend(); rIter++) { double temp = 0; - double max_priority = 0; - for (const auto &child : graph.children(*r_iter)) { - max_priority = std::max(work_variance[child], max_priority); + double maxPriority = 0; + for (const auto &child : graph.Children(*rIter)) { + maxPriority = std::max(workVariance[child], maxPriority); } - for (const auto &child : graph.children(*r_iter)) { - temp += std::exp(2 * (work_variance[child] - max_priority)); + for (const auto &child : graph.Children(*rIter)) { + temp += std::exp(2 * (workVariance[child] - maxPriority)); } - temp = std::log(temp) / 2 + max_priority; + temp = std::log(temp) / 2 + maxPriority; - double node_weight - = std::log(static_cast(std::max(graph.vertex_work_weight(*r_iter), static_cast>(1)))); - double larger_val = node_weight > temp ? node_weight : temp; + double nodeWeight + = std::log(static_cast(std::max(graph.VertexWorkWeight(*rIter), static_cast>(1)))); + double largerVal = nodeWeight > temp ? nodeWeight : temp; - work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; + workVariance[*rIter] = std::log(std::exp(nodeWeight - largerVal) + std::exp(temp - largerVal)) + largerVal; } - return work_variance; + return workVariance; } - std::vector>> procTypesCompatibleWithNodeType_omit_procType( - const BspInstance &instance) const { - const std::vector> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); + std::vector>> ProcTypesCompatibleWithNodeTypeOmitProcType( + const BspInstance &instance) const { + const std::vector> procTypesCompatibleWithNodeType = instance.GetProcTypesCompatibleWithNodeType(); - std::vector>> procTypesCompatibleWithNodeType_skip( - instance.getArchitecture().getNumberOfProcessorTypes(), - std::vector>(instance.getComputationalDag().num_vertex_types())); - for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { - for (unsigned nodeType = 0; nodeType < instance.getComputationalDag().num_vertex_types(); nodeType++) { + std::vector>> procTypesCompatibleWithNodeTypeSkip( + instance.GetArchitecture().GetNumberOfProcessorTypes(), + std::vector>(instance.GetComputationalDag().NumVertexTypes())); + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); procType++) { + for (unsigned nodeType = 0; nodeType < instance.GetComputationalDag().NumVertexTypes(); nodeType++) { for (unsigned otherProcType : procTypesCompatibleWithNodeType[nodeType]) { if (procType == otherProcType) { continue; } - procTypesCompatibleWithNodeType_skip[procType][nodeType].emplace_back(otherProcType); + procTypesCompatibleWithNodeTypeSkip[procType][nodeType].emplace_back(otherProcType); } } } - return procTypesCompatibleWithNodeType_skip; + return procTypesCompatibleWithNodeTypeSkip; } struct VarianceCompare { @@ -111,18 +111,18 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } }; - bool CanChooseNode(const BspInstance &instance, + bool CanChooseNode(const BspInstance &instance, const std::vector, VarianceCompare>> &allReady, const std::vector, VarianceCompare>> &procReady, const std::vector &procFree) const { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (procFree[i] && !procReady[i].empty()) { return true; } } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + if (procFree[i] && !allReady[instance.GetArchitecture().ProcessorType(i)].empty()) { return true; } } @@ -130,27 +130,27 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { return false; } - void Choose(const BspInstance &instance, - const std::vector &work_variance, + void Choose(const BspInstance &instance, + const std::vector &workVariance, std::vector, VarianceCompare>> &allReady, std::vector, VarianceCompare>> &procReady, const std::vector &procFree, VertexType &node, unsigned &p, const bool endSupStep, - const v_workw_t remaining_time, - const std::vector>> &procTypesCompatibleWithNodeType_skip_proctype) const { + const VWorkwT remainingTime, + const std::vector>> &procTypesCompatibleWithNodeTypeSkipProctype) const { double maxScore = -1; - bool found_allocation = false; + bool foundAllocation = false; - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (!procFree[i] || procReady[i].empty()) { continue; } auto it = procReady[i].begin(); while (it != procReady[i].end()) { - if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { + if (endSupStep && (remainingTime < instance.GetComputationalDag().VertexWorkWeight(it->first))) { it = procReady[i].erase(it); continue; } @@ -158,24 +158,24 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { const double &score = it->second; if (score > maxScore) { - const unsigned procType = instance.getArchitecture().processorType(i); + const unsigned procType = instance.GetArchitecture().ProcessorType(i); - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(it->first, i)) { + if constexpr (useMemoryConstraint_) { + if (memoryConstraint_.CanAdd(it->first, i)) { node = it->first; p = i; - found_allocation = true; + foundAllocation = true; - if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) { + if (procType < procTypesCompatibleWithNodeTypeSkipProctype.size()) { const auto &compatibleTypes - = procTypesCompatibleWithNodeType_skip_proctype[procType] - [instance.getComputationalDag().vertex_type(node)]; + = procTypesCompatibleWithNodeTypeSkipProctype[procType] + [instance.GetComputationalDag().VertexType(node)]; for (unsigned otherType : compatibleTypes) { - for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) { - if (j != i && instance.getArchitecture().processorType(j) == otherType + for (unsigned j = 0; j < instance.NumberOfProcessors(); ++j) { + if (j != i && instance.GetArchitecture().ProcessorType(j) == otherType && j < procReady.size()) { - procReady[j].erase(std::make_pair(node, work_variance[node])); + procReady[j].erase(std::make_pair(node, workVariance[node])); } } } @@ -186,17 +186,17 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } else { node = it->first; p = i; - found_allocation = true; + foundAllocation = true; - if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) { + if (procType < procTypesCompatibleWithNodeTypeSkipProctype.size()) { const auto &compatibleTypes - = procTypesCompatibleWithNodeType_skip_proctype[procType] - [instance.getComputationalDag().vertex_type(node)]; + = procTypesCompatibleWithNodeTypeSkipProctype[procType] + [instance.GetComputationalDag().VertexType(node)]; for (unsigned otherType : compatibleTypes) { - for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) { - if (j != i && instance.getArchitecture().processorType(j) == otherType && j < procReady.size()) { - procReady[j].erase(std::make_pair(node, work_variance[node])); + for (unsigned j = 0; j < instance.NumberOfProcessors(); ++j) { + if (j != i && instance.GetArchitecture().ProcessorType(j) == otherType && j < procReady.size()) { + procReady[j].erase(std::make_pair(node, workVariance[node])); } } } @@ -210,12 +210,12 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } } - if (found_allocation) { + if (foundAllocation) { return; } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - const unsigned procType = instance.getArchitecture().processorType(i); + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + const unsigned procType = instance.GetArchitecture().ProcessorType(i); if (!procFree[i] || procType >= allReady.size() || allReady[procType].empty()) { continue; } @@ -224,7 +224,7 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { auto it = readyList.begin(); while (it != readyList.end()) { - if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { + if (endSupStep && (remainingTime < instance.GetComputationalDag().VertexWorkWeight(it->first))) { it = readyList.erase(it); continue; } @@ -232,18 +232,18 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { const double &score = it->second; if (score > maxScore) { - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(it->first, i)) { + if constexpr (useMemoryConstraint_) { + if (memoryConstraint_.CanAdd(it->first, i)) { node = it->first; p = i; const auto &compatibleTypes - = procTypesCompatibleWithNodeType_skip_proctype[procType] - [instance.getComputationalDag().vertex_type(node)]; + = procTypesCompatibleWithNodeTypeSkipProctype[procType] + [instance.GetComputationalDag().VertexType(node)]; for (unsigned otherType : compatibleTypes) { if (otherType < allReady.size()) { - allReady[otherType].erase(std::make_pair(node, work_variance[node])); + allReady[otherType].erase(std::make_pair(node, workVariance[node])); } } @@ -254,12 +254,11 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { p = i; const auto &compatibleTypes - = procTypesCompatibleWithNodeType_skip_proctype[procType] - [instance.getComputationalDag().vertex_type(node)]; + = procTypesCompatibleWithNodeTypeSkipProctype[procType][instance.GetComputationalDag().VertexType(node)]; for (unsigned otherType : compatibleTypes) { if (otherType < allReady.size()) { - allReady[otherType].erase(std::make_pair(node, work_variance[node])); + allReady[otherType].erase(std::make_pair(node, workVariance[node])); } } @@ -271,31 +270,31 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } }; - bool check_mem_feasibility(const BspInstance &instance, - const std::vector, VarianceCompare>> &allReady, - const std::vector, VarianceCompare>> &procReady) const { - if constexpr (use_memory_constraint) { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + bool CheckMemFeasibility(const BspInstance &instance, + const std::vector, VarianceCompare>> &allReady, + const std::vector, VarianceCompare>> &procReady) const { + if constexpr (useMemoryConstraint_) { + if (instance.GetArchitecture().GetMemoryConstraintType() == MemoryConstraintType::PERSISTENT_AND_TRANSIENT) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (!procReady[i].empty()) { - const std::pair &node_pair = *procReady[i].begin(); - VertexType top_node = node_pair.first; + const std::pair &nodePair = *procReady[i].begin(); + VertexType topNode = nodePair.first; - if (memory_constraint.can_add(top_node, i)) { + if (memoryConstraint_.CanAdd(topNode, i)) { return true; } } } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (allReady[instance.getArchitecture().processorType(i)].empty()) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + if (allReady[instance.GetArchitecture().ProcessorType(i)].empty()) { continue; } - const std::pair &node_pair = *allReady[instance.getArchitecture().processorType(i)].begin(); - VertexType top_node = node_pair.first; + const std::pair &nodePair = *allReady[instance.GetArchitecture().ProcessorType(i)].begin(); + VertexType topNode = nodePair.first; - if (memory_constraint.can_add(top_node, i)) { + if (memoryConstraint_.CanAdd(topNode, i)) { return true; } } @@ -307,183 +306,181 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { return true; } - unsigned get_nr_parallelizable_nodes(const BspInstance &instance, - const unsigned &stale, - const std::vector &nr_old_ready_nodes_per_type, - const std::vector &nr_ready_nodes_per_type, - const std::vector, VarianceCompare>> &procReady, - const std::vector &nr_procs_per_type) const { - unsigned nr_nodes = 0; - unsigned num_proc_types = instance.getArchitecture().getNumberOfProcessorTypes(); + unsigned GetNrParallelizableNodes(const BspInstance &instance, + const unsigned &stale, + const std::vector &nrOldReadyNodesPerType, + const std::vector &nrReadyNodesPerType, + const std::vector, VarianceCompare>> &procReady, + const std::vector &nrProcsPerType) const { + unsigned nrNodes = 0; + unsigned numProcTypes = instance.GetArchitecture().GetNumberOfProcessorTypes(); - std::vector procs_per_type = nr_procs_per_type; + std::vector procsPerType = nrProcsPerType; if (stale > 1) { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { if (!procReady[proc].empty()) { - procs_per_type[instance.getArchitecture().processorType(proc)]--; - nr_nodes++; + procsPerType[instance.GetArchitecture().ProcessorType(proc)]--; + nrNodes++; } } } - std::vector ready_nodes_per_type = nr_ready_nodes_per_type; - for (unsigned node_type = 0; node_type < ready_nodes_per_type.size(); node_type++) { - ready_nodes_per_type[node_type] += nr_old_ready_nodes_per_type[node_type]; + std::vector readyNodesPerType = nrReadyNodesPerType; + for (unsigned nodeType = 0; nodeType < readyNodesPerType.size(); nodeType++) { + readyNodesPerType[nodeType] += nrOldReadyNodesPerType[nodeType]; } - for (unsigned proc_type = 0; proc_type < num_proc_types; ++proc_type) { - for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { - if (instance.isCompatibleType(node_type, proc_type)) { - unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); - nr_nodes += matched; - ready_nodes_per_type[node_type] -= matched; - procs_per_type[proc_type] -= matched; + for (unsigned procType = 0; procType < numProcTypes; ++procType) { + for (unsigned nodeType = 0; nodeType < instance.GetComputationalDag().NumVertexTypes(); ++nodeType) { + if (instance.IsCompatibleType(nodeType, procType)) { + unsigned matched = std::min(readyNodesPerType[nodeType], procsPerType[procType]); + nrNodes += matched; + readyNodesPerType[nodeType] -= matched; + procsPerType[procType] -= matched; } } } - return nr_nodes; + return nrNodes; } public: /** * @brief Default constructor for GreedyVarianceSspScheduler. */ - GreedyVarianceSspScheduler(float max_percent_idle_processors_ = 0.2f, bool increase_parallelism_in_new_superstep_ = true) - : max_percent_idle_processors(max_percent_idle_processors_), - increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {} + GreedyVarianceSspScheduler(float maxPercentIdleProcessors = 0.2f, bool increaseParallelismInNewSuperstep = true) + : maxPercentIdleProcessors_(maxPercentIdleProcessors), + increaseParallelismInNewSuperstep_(increaseParallelismInNewSuperstep) {} /** * @brief Default destructor for GreedyVarianceSspScheduler. */ virtual ~GreedyVarianceSspScheduler() = default; - RETURN_STATUS computeSspSchedule(BspSchedule &schedule, unsigned stale) { - const auto &instance = schedule.getInstance(); - const auto &G = instance.getComputationalDag(); - const VertexType &N = instance.numberOfVertices(); - const unsigned &P = instance.numberOfProcessors(); + ReturnStatus ComputeSspSchedule(BspSchedule &schedule, unsigned stale) { + const auto &instance = schedule.GetInstance(); + const auto &g = instance.GetComputationalDag(); + const VertexType &n = instance.NumberOfVertices(); + const unsigned &p = instance.NumberOfProcessors(); unsigned supstepIdx = 0; - if constexpr (is_memory_constraint_v) { - memory_constraint.initialize(instance); - } else if constexpr (is_memory_constraint_schedule_v) { - memory_constraint.initialize(schedule, supstepIdx); + if constexpr (isMemoryConstraintV) { + memoryConstraint_.Initialize(instance); + } else if constexpr (isMemoryConstraintScheduleV) { + memoryConstraint_.Initialize(schedule, supstepIdx); } - const std::vector work_variances = compute_work_variance(G); + const std::vector workVariances = ComputeWorkVariance(g); - std::set, VarianceCompare> old_ready; + std::set, VarianceCompare> oldReady; std::vector, VarianceCompare>> ready(stale); std::vector, VarianceCompare>>> procReady( - stale, std::vector, VarianceCompare>>(P)); + stale, std::vector, VarianceCompare>>(p)); std::vector, VarianceCompare>> allReady( - instance.getArchitecture().getNumberOfProcessorTypes()); + instance.GetArchitecture().GetNumberOfProcessorTypes()); - const auto procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); - const std::vector>> procTypesCompatibleWithNodeType_skip_proctype - = procTypesCompatibleWithNodeType_omit_procType(instance); + const auto procTypesCompatibleWithNodeType = instance.GetProcTypesCompatibleWithNodeType(); + const std::vector>> procTypesCompatibleWithNodeTypeSkipProctype + = ProcTypesCompatibleWithNodeTypeOmitProcType(instance); - std::vector nr_old_ready_nodes_per_type(G.num_vertex_types(), 0); - std::vector> nr_ready_stale_nodes_per_type(stale, std::vector(G.num_vertex_types(), 0)); - std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (auto proc = 0u; proc < P; ++proc) { - ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; + std::vector nrOldReadyNodesPerType(g.NumVertexTypes(), 0); + std::vector> nrReadyStaleNodesPerType(stale, std::vector(g.NumVertexTypes(), 0)); + std::vector nrProcsPerType(instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); + for (auto proc = 0u; proc < p; ++proc) { + ++nrProcsPerType[instance.GetArchitecture().ProcessorType(proc)]; } - std::vector nrPredecRemain(N); + std::vector nrPredecRemain(n); - for (VertexType node = 0; node < N; ++node) { - const auto num_parents = G.in_degree(node); + for (VertexType node = 0; node < n; ++node) { + const auto numParents = g.InDegree(node); - nrPredecRemain[node] = num_parents; + nrPredecRemain[node] = numParents; - if (num_parents == 0) { - ready[0].insert(std::make_pair(node, work_variances[node])); - nr_ready_stale_nodes_per_type[0][G.vertex_type(node)]++; + if (numParents == 0) { + ready[0].insert(std::make_pair(node, workVariances[node])); + nrReadyStaleNodesPerType[0][g.VertexType(node)]++; } } - std::vector procFree(P, true); - unsigned free = P; + std::vector procFree(p, true); + unsigned free = p; - std::set, VertexType>> finishTimes; + std::set, VertexType>> finishTimes; finishTimes.emplace(0, std::numeric_limits::max()); - std::vector number_of_allocated_allReady_tasks_in_superstep( - instance.getArchitecture().getNumberOfProcessorTypes(), 0); - std::vector limit_of_number_of_allocated_allReady_tasks_in_superstep( - instance.getArchitecture().getNumberOfProcessorTypes(), 0); + std::vector numberOfAllocatedAllReadyTasksInSuperstep(instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); + std::vector limitOfNumberOfAllocatedAllReadyTasksInSuperstep( + instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); bool endSupStep = true; - bool begin_outer_while = true; - bool able_to_schedule_in_step = false; - unsigned successive_empty_supersteps = 0u; + bool beginOuterWhile = true; + bool ableToScheduleInStep = false; + unsigned successiveEmptySupersteps = 0u; - auto nonempty_ready = [&]() { + auto nonemptyReady = [&]() { return std::any_of( - ready.cbegin(), ready.cend(), [](const std::set, VarianceCompare> &ready_set) { - return !ready_set.empty(); + ready.cbegin(), ready.cend(), [](const std::set, VarianceCompare> &readySet) { + return !readySet.empty(); }); }; - while (!old_ready.empty() || nonempty_ready() || !finishTimes.empty()) { + while (!oldReady.empty() || nonemptyReady() || !finishTimes.empty()) { if (finishTimes.empty() && endSupStep) { - able_to_schedule_in_step = false; - number_of_allocated_allReady_tasks_in_superstep - = std::vector(instance.getArchitecture().getNumberOfProcessorTypes(), 0); + ableToScheduleInStep = false; + numberOfAllocatedAllReadyTasksInSuperstep + = std::vector(instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); - for (unsigned i = 0; i < P; ++i) { + for (unsigned i = 0; i < p; ++i) { procReady[supstepIdx % stale][i].clear(); } - if (!begin_outer_while) { + if (!beginOuterWhile) { supstepIdx++; } else { - begin_outer_while = false; + beginOuterWhile = false; } - for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType) { + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); ++procType) { allReady[procType].clear(); } - old_ready.insert(ready[supstepIdx % stale].begin(), ready[supstepIdx % stale].end()); + oldReady.insert(ready[supstepIdx % stale].begin(), ready[supstepIdx % stale].end()); ready[supstepIdx % stale].clear(); - for (unsigned node_type = 0; node_type < G.num_vertex_types(); ++node_type) { - nr_old_ready_nodes_per_type[node_type] += nr_ready_stale_nodes_per_type[supstepIdx % stale][node_type]; - nr_ready_stale_nodes_per_type[supstepIdx % stale][node_type] = 0; + for (unsigned nodeType = 0; nodeType < g.NumVertexTypes(); ++nodeType) { + nrOldReadyNodesPerType[nodeType] += nrReadyStaleNodesPerType[supstepIdx % stale][nodeType]; + nrReadyStaleNodesPerType[supstepIdx % stale][nodeType] = 0; } - for (const auto &nodeAndValuePair : old_ready) { + for (const auto &nodeAndValuePair : oldReady) { VertexType node = nodeAndValuePair.first; - for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) { + for (unsigned procType : procTypesCompatibleWithNodeType[g.VertexType(node)]) { allReady[procType].insert(allReady[procType].end(), nodeAndValuePair); } } - if constexpr (use_memory_constraint) { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { - for (unsigned proc = 0; proc < P; proc++) { - memory_constraint.reset(proc); + if constexpr (useMemoryConstraint_) { + if (instance.GetArchitecture().GetMemoryConstraintType() == MemoryConstraintType::LOCAL) { + for (unsigned proc = 0; proc < p; proc++) { + memoryConstraint_.Reset(proc); } } } - for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { - unsigned equal_split = (static_cast(allReady[procType].size()) + stale - 1) / stale; - unsigned at_least_for_long_step = 3 * nr_procs_per_type[procType]; - limit_of_number_of_allocated_allReady_tasks_in_superstep[procType] - = std::max(at_least_for_long_step, equal_split); + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); procType++) { + unsigned equalSplit = (static_cast(allReady[procType].size()) + stale - 1) / stale; + unsigned atLeastForLongStep = 3 * nrProcsPerType[procType]; + limitOfNumberOfAllocatedAllReadyTasksInSuperstep[procType] = std::max(atLeastForLongStep, equalSplit); } endSupStep = false; finishTimes.emplace(0, std::numeric_limits::max()); } - const v_workw_t time = finishTimes.begin()->first; - const v_workw_t max_finish_time = finishTimes.rbegin()->first; + const VWorkwT time = finishTimes.begin()->first; + const VWorkwT maxFinishTime = finishTimes.rbegin()->first; // Find new ready jobs while (!finishTimes.empty() && finishTimes.begin()->first == time) { @@ -491,40 +488,40 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { finishTimes.erase(finishTimes.begin()); if (node != std::numeric_limits::max()) { - const unsigned proc_of_node = schedule.assignedProcessor(node); + const unsigned procOfNode = schedule.AssignedProcessor(node); - for (const auto &succ : G.children(node)) { + for (const auto &succ : g.Children(node)) { nrPredecRemain[succ]--; if (nrPredecRemain[succ] == 0) { - ready[supstepIdx % stale].emplace(succ, work_variances[succ]); - nr_ready_stale_nodes_per_type[supstepIdx % stale][G.vertex_type(succ)]++; + ready[supstepIdx % stale].emplace(succ, workVariances[succ]); + nrReadyStaleNodesPerType[supstepIdx % stale][g.VertexType(succ)]++; - unsigned earliest_add = supstepIdx; - for (const auto &pred : G.parents(succ)) { - if (schedule.assignedProcessor(pred) != proc_of_node) { - earliest_add = std::max(earliest_add, stale + schedule.assignedSuperstep(pred)); + unsigned earliestAdd = supstepIdx; + for (const auto &pred : g.Parents(succ)) { + if (schedule.AssignedProcessor(pred) != procOfNode) { + earliestAdd = std::max(earliestAdd, stale + schedule.AssignedSuperstep(pred)); } } - if (instance.isCompatible(succ, proc_of_node)) { - bool memory_ok = true; + if (instance.IsCompatible(succ, procOfNode)) { + bool memoryOk = true; - if constexpr (use_memory_constraint) { - if (earliest_add == supstepIdx) { - memory_ok = memory_constraint.can_add(succ, proc_of_node); + if constexpr (useMemoryConstraint_) { + if (earliestAdd == supstepIdx) { + memoryOk = memoryConstraint_.CanAdd(succ, procOfNode); } } - for (unsigned step_to_add = earliest_add; step_to_add < supstepIdx + stale; ++step_to_add) { - if ((step_to_add == supstepIdx) && !memory_ok) { + for (unsigned stepToAdd = earliestAdd; stepToAdd < supstepIdx + stale; ++stepToAdd) { + if ((stepToAdd == supstepIdx) && !memoryOk) { continue; } - procReady[step_to_add % stale][proc_of_node].emplace(succ, work_variances[succ]); + procReady[stepToAdd % stale][procOfNode].emplace(succ, workVariances[succ]); } } } } - procFree[proc_of_node] = true; + procFree[procOfNode] = true; ++free; } } @@ -536,104 +533,104 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { while (CanChooseNode(instance, allReady, procReady[supstepIdx % stale], procFree)) { VertexType nextNode = std::numeric_limits::max(); - unsigned nextProc = P; + unsigned nextProc = p; Choose(instance, - work_variances, + workVariances, allReady, procReady[supstepIdx % stale], procFree, nextNode, nextProc, endSupStep, - max_finish_time - time, - procTypesCompatibleWithNodeType_skip_proctype); + maxFinishTime - time, + procTypesCompatibleWithNodeTypeSkipProctype); - if (nextNode == std::numeric_limits::max() || nextProc == P) { + if (nextNode == std::numeric_limits::max() || nextProc == p) { endSupStep = true; break; } - if (procReady[supstepIdx % stale][nextProc].find(std::make_pair(nextNode, work_variances[nextNode])) + if (procReady[supstepIdx % stale][nextProc].find(std::make_pair(nextNode, workVariances[nextNode])) != procReady[supstepIdx % stale][nextProc].end()) { for (size_t i = 0; i < stale; i++) { - procReady[i][nextProc].erase(std::make_pair(nextNode, work_variances[nextNode])); + procReady[i][nextProc].erase(std::make_pair(nextNode, workVariances[nextNode])); } } else { - for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(nextNode)]) { - allReady[procType].erase(std::make_pair(nextNode, work_variances[nextNode])); + for (unsigned procType : procTypesCompatibleWithNodeType[g.VertexType(nextNode)]) { + allReady[procType].erase(std::make_pair(nextNode, workVariances[nextNode])); } - nr_old_ready_nodes_per_type[G.vertex_type(nextNode)]--; - const unsigned nextProcType = instance.getArchitecture().processorType(nextProc); - number_of_allocated_allReady_tasks_in_superstep[nextProcType]++; + nrOldReadyNodesPerType[g.VertexType(nextNode)]--; + const unsigned nextProcType = instance.GetArchitecture().ProcessorType(nextProc); + numberOfAllocatedAllReadyTasksInSuperstep[nextProcType]++; - if (number_of_allocated_allReady_tasks_in_superstep[nextProcType] - >= limit_of_number_of_allocated_allReady_tasks_in_superstep[nextProcType]) { + if (numberOfAllocatedAllReadyTasksInSuperstep[nextProcType] + >= limitOfNumberOfAllocatedAllReadyTasksInSuperstep[nextProcType]) { allReady[nextProcType].clear(); } } for (size_t i = 0; i < stale; i++) { - ready[i].erase(std::make_pair(nextNode, work_variances[nextNode])); + ready[i].erase(std::make_pair(nextNode, workVariances[nextNode])); } - old_ready.erase(std::make_pair(nextNode, work_variances[nextNode])); + oldReady.erase(std::make_pair(nextNode, workVariances[nextNode])); - schedule.setAssignedProcessor(nextNode, nextProc); - schedule.setAssignedSuperstep(nextNode, supstepIdx); - able_to_schedule_in_step = true; + schedule.SetAssignedProcessor(nextNode, nextProc); + schedule.SetAssignedSuperstep(nextNode, supstepIdx); + ableToScheduleInStep = true; - if constexpr (use_memory_constraint) { - memory_constraint.add(nextNode, nextProc); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Add(nextNode, nextProc); std::vector> toErase; - for (const auto &node_pair : procReady[supstepIdx % stale][nextProc]) { - if (!memory_constraint.can_add(node_pair.first, nextProc)) { - toErase.push_back(node_pair); + for (const auto &nodePair : procReady[supstepIdx % stale][nextProc]) { + if (!memoryConstraint_.CanAdd(nodePair.first, nextProc)) { + toErase.push_back(nodePair); } } - for (const auto &n : toErase) { - procReady[supstepIdx % stale][nextProc].erase(n); + for (const auto &vert : toErase) { + procReady[supstepIdx % stale][nextProc].erase(vert); } } - finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode); + finishTimes.emplace(time + g.VertexWorkWeight(nextNode), nextNode); procFree[nextProc] = false; --free; } - if (able_to_schedule_in_step) { - successive_empty_supersteps = 0; - } else if (++successive_empty_supersteps > 100 + stale) { - return RETURN_STATUS::ERROR; + if (ableToScheduleInStep) { + successiveEmptySupersteps = 0; + } else if (++successiveEmptySupersteps > 100 + stale) { + return ReturnStatus::ERROR; } - if (free > (P * max_percent_idle_processors) - && ((!increase_parallelism_in_new_superstep) - || get_nr_parallelizable_nodes(instance, - stale, - nr_old_ready_nodes_per_type, - nr_ready_stale_nodes_per_type[(supstepIdx + 1) % stale], - procReady[(supstepIdx + 1) % stale], - nr_procs_per_type) - >= std::min(std::min(P, static_cast(1.2 * (P - free))), - P - free + static_cast(0.5 * free)))) { + if (free > (p * maxPercentIdleProcessors_) + && ((!increaseParallelismInNewSuperstep_) + || GetNrParallelizableNodes(instance, + stale, + nrOldReadyNodesPerType, + nrReadyStaleNodesPerType[(supstepIdx + 1) % stale], + procReady[(supstepIdx + 1) % stale], + nrProcsPerType) + >= std::min(std::min(p, static_cast(1.2 * (p - free))), + p - free + static_cast(0.5 * free)))) { endSupStep = true; } } - assert(schedule.satisfiesPrecedenceConstraints()); - // schedule.setAutoCommunicationSchedule(); + assert(schedule.SatisfiesPrecedenceConstraints()); + // schedule.SetAutoCommunicationSchedule(); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { return computeSspSchedule(schedule, 1U); } + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { return ComputeSspSchedule(schedule, 1U); } - RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) override { return computeSspSchedule(schedule, 2U); } + ReturnStatus ComputeSchedule(MaxBspSchedule &schedule) override { return ComputeSspSchedule(schedule, 2U); } - std::string getScheduleName() const override { - if constexpr (use_memory_constraint) { + std::string GetScheduleName() const override { + if constexpr (useMemoryConstraint_) { return "GreedyVarianceSspMemory"; } else { return "GreedyVarianceSsp"; diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp index 981b6dfa..975030f3 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp @@ -34,47 +34,47 @@ limitations under the License. namespace osp { -template -struct GrowLocalAutoCores_Params { - unsigned minSuperstepSize = 20; - weight_t syncCostMultiplierMinSuperstepWeight = 1; - weight_t syncCostMultiplierParallelCheck = 4; +template +struct GrowLocalAutoCoresParams { + unsigned minSuperstepSize_ = 20; + WeightT syncCostMultiplierMinSuperstepWeight_ = 1; + WeightT syncCostMultiplierParallelCheck_ = 4; }; /** * @brief The GreedyBspGrowLocalAutoCores class represents a scheduler that uses a greedy algorithm to compute * schedules for BspInstance. * - * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods. - * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. - * The getScheduleName() method returns the name of the schedule, which is "GreedyBspGrowLocalAutoCores" in this + * This class inherits from the Scheduler class and implements the ComputeSchedule() and GetScheduleName() methods. + * The ComputeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. + * The GetScheduleName() method returns the name of the schedule, which is "GreedyBspGrowLocalAutoCores" in this * case. */ -template -class GrowLocalAutoCores : public Scheduler { +template +class GrowLocalAutoCores : public Scheduler { private: - GrowLocalAutoCores_Params> params; + GrowLocalAutoCoresParams> params_; - constexpr static bool use_memory_constraint = is_memory_constraint_v - or is_memory_constraint_schedule_v; + constexpr static bool useMemoryConstraint_ = isMemoryConstraintV + or isMemoryConstraintScheduleV; - static_assert(not use_memory_constraint or std::is_same_v, - "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); + static_assert(not useMemoryConstraint_ or std::is_same_v, + "GraphT must be the same as MemoryConstraintT::GraphImplT."); - static_assert(not use_memory_constraint - or not(std::is_same_v> - or std::is_same_v>), - "MemoryConstraint_t must not be persistent_transient_memory_constraint or global_memory_constraint. Not " + static_assert(not useMemoryConstraint_ + or not(std::is_same_v> + or std::is_same_v>), + "MemoryConstraintT must not be PersistentTransientMemoryConstraint or global_memory_constraint. Not " "supported in GrowLocalAutoCores."); - MemoryConstraint_t local_memory_constraint; + MemoryConstraintT localMemoryConstraint_; public: /** * @brief Default constructor for GreedyBspGrowLocalAutoCores. */ - GrowLocalAutoCores(GrowLocalAutoCores_Params> params_ = GrowLocalAutoCores_Params>()) - : params(params_) {} + GrowLocalAutoCores(GrowLocalAutoCoresParams> params = GrowLocalAutoCoresParams>()) + : params_(params) {} /** * @brief Default destructor for GreedyBspGrowLocalAutoCores. @@ -89,264 +89,265 @@ class GrowLocalAutoCores : public Scheduler { * @param instance The BspInstance object representing the instance to compute the schedule for. * @return A pair containing the return status and the computed BspSchedule. */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - using vertex_idx = typename Graph_t::vertex_idx; - const auto &instance = schedule.getInstance(); + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + using VertexIdx = typename GraphT::VertexIdx; + const auto &instance = schedule.GetInstance(); - for (const auto &v : instance.getComputationalDag().vertices()) { - schedule.setAssignedProcessor(v, std::numeric_limits::max()); - schedule.setAssignedSuperstep(v, std::numeric_limits::max()); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + schedule.SetAssignedProcessor(v, std::numeric_limits::max()); + schedule.SetAssignedSuperstep(v, std::numeric_limits::max()); } unsigned supstep = 0; - if constexpr (is_memory_constraint_v) { - local_memory_constraint.initialize(instance); - } else if constexpr (is_memory_constraint_schedule_v) { - local_memory_constraint.initialize(schedule, supstep); + if constexpr (isMemoryConstraintV) { + localMemoryConstraint_.Initialize(instance); + } else if constexpr (isMemoryConstraintScheduleV) { + localMemoryConstraint_.Initialize(schedule, supstep); } - auto &node_to_proc = schedule.assignedProcessors(); - auto &node_to_supstep = schedule.assignedSupersteps(); + auto &nodeToProc = schedule.AssignedProcessors(); + auto &nodeToSupstep = schedule.AssignedSupersteps(); - const auto N = instance.numberOfVertices(); - const unsigned P = instance.numberOfProcessors(); - const auto &G = instance.getComputationalDag(); + const auto n = instance.NumberOfVertices(); + const unsigned p = instance.NumberOfProcessors(); + const auto &g = instance.GetComputationalDag(); - std::unordered_set ready; + std::unordered_set ready; - std::vector allReady; - std::vector> procReady(P); + std::vector allReady; + std::vector> procReady(p); - std::vector predec(N); + std::vector predec(n); - for (const auto &node : G.vertices()) { - predec[node] = G.in_degree(node); + for (const auto &node : g.Vertices()) { + predec[node] = g.InDegree(node); if (predec[node] == 0) { ready.insert(node); } } - std::vector> new_assignments(P); - std::vector> best_new_assignments(P); + std::vector> newAssignments(p); + std::vector> bestNewAssignments(p); - std::vector new_ready; - std::vector best_new_ready; + std::vector newReady; + std::vector bestNewReady; - const v_workw_t minWeightParallelCheck = params.syncCostMultiplierParallelCheck * instance.synchronisationCosts(); - const v_workw_t minSuperstepWeight = params.syncCostMultiplierMinSuperstepWeight * instance.synchronisationCosts(); + const VWorkwT minWeightParallelCheck = params_.syncCostMultiplierParallelCheck_ * instance.SynchronisationCosts(); + const VWorkwT minSuperstepWeight = params_.syncCostMultiplierMinSuperstepWeight_ * instance.SynchronisationCosts(); - double desiredParallelism = static_cast(P); + double desiredParallelism = static_cast(p); - vertex_idx total_assigned = 0; - while (total_assigned < N) { - unsigned limit = params.minSuperstepSize; - double best_score = 0; - double best_parallelism = 0; + VertexIdx totalAssigned = 0; + while (totalAssigned < n) { + unsigned limit = params_.minSuperstepSize_; + double bestScore = 0; + double bestParallelism = 0; bool continueSuperstepAttempts = true; while (continueSuperstepAttempts) { - for (unsigned p = 0; p < P; p++) { - new_assignments[p].clear(); - procReady[p].clear(); + for (unsigned pIdx = 0; pIdx < p; pIdx++) { + newAssignments[pIdx].clear(); + procReady[pIdx].clear(); } - new_ready.clear(); + newReady.clear(); allReady.assign(ready.begin(), ready.end()); - std::make_heap(allReady.begin(), allReady.end(), std::greater()); + std::make_heap(allReady.begin(), allReady.end(), std::greater()); - vertex_idx new_total_assigned = 0; - v_workw_t weight_limit = 0, total_weight_assigned = 0; + VertexIdx newTotalAssigned = 0; + VWorkwT weightLimit = 0; + VWorkwT totalWeightAssigned = 0; - bool early_memory_break = false; + bool earlyMemoryBreak = false; // Processor 0 - while (new_assignments[0].size() < limit) { - vertex_idx chosen_node = std::numeric_limits::max(); + while (newAssignments[0].size() < limit) { + VertexIdx chosenNode = std::numeric_limits::max(); - if constexpr (use_memory_constraint) { - if (!procReady[0].empty() && local_memory_constraint.can_add(procReady[0].front(), 0)) { - chosen_node = procReady[0].front(); - std::pop_heap(procReady[0].begin(), procReady[0].end(), std::greater()); + if constexpr (useMemoryConstraint_) { + if (!procReady[0].empty() && localMemoryConstraint_.CanAdd(procReady[0].front(), 0)) { + chosenNode = procReady[0].front(); + std::pop_heap(procReady[0].begin(), procReady[0].end(), std::greater()); procReady[0].pop_back(); - } else if (!allReady.empty() && local_memory_constraint.can_add(allReady.front(), 0)) { - chosen_node = allReady.front(); - std::pop_heap(allReady.begin(), allReady.end(), std::greater()); + } else if (!allReady.empty() && localMemoryConstraint_.CanAdd(allReady.front(), 0)) { + chosenNode = allReady.front(); + std::pop_heap(allReady.begin(), allReady.end(), std::greater()); allReady.pop_back(); } else { - early_memory_break = true; + earlyMemoryBreak = true; break; } } else { if (!procReady[0].empty()) { - chosen_node = procReady[0].front(); - std::pop_heap(procReady[0].begin(), procReady[0].end(), std::greater()); + chosenNode = procReady[0].front(); + std::pop_heap(procReady[0].begin(), procReady[0].end(), std::greater()); procReady[0].pop_back(); } else if (!allReady.empty()) { - chosen_node = allReady.front(); - std::pop_heap(allReady.begin(), allReady.end(), std::greater()); + chosenNode = allReady.front(); + std::pop_heap(allReady.begin(), allReady.end(), std::greater()); allReady.pop_back(); } else { break; } } - new_assignments[0].push_back(chosen_node); - node_to_proc[chosen_node] = 0; - new_total_assigned++; - weight_limit += G.vertex_work_weight(chosen_node); + newAssignments[0].push_back(chosenNode); + nodeToProc[chosenNode] = 0; + newTotalAssigned++; + weightLimit += g.VertexWorkWeight(chosenNode); - if constexpr (use_memory_constraint) { - local_memory_constraint.add(chosen_node, 0); + if constexpr (useMemoryConstraint_) { + localMemoryConstraint_.Add(chosenNode, 0); } - for (const auto &succ : G.children(chosen_node)) { - if (node_to_proc[succ] == std::numeric_limits::max()) { - node_to_proc[succ] = 0; - } else if (node_to_proc[succ] != 0) { - node_to_proc[succ] = P; + for (const auto &succ : g.Children(chosenNode)) { + if (nodeToProc[succ] == std::numeric_limits::max()) { + nodeToProc[succ] = 0; + } else if (nodeToProc[succ] != 0) { + nodeToProc[succ] = p; } predec[succ]--; if (predec[succ] == 0) { - new_ready.push_back(succ); + newReady.push_back(succ); - if (node_to_proc[succ] == 0) { + if (nodeToProc[succ] == 0) { procReady[0].push_back(succ); - std::push_heap(procReady[0].begin(), procReady[0].end(), std::greater()); + std::push_heap(procReady[0].begin(), procReady[0].end(), std::greater()); } } } } - total_weight_assigned += weight_limit; + totalWeightAssigned += weightLimit; // Processors 1 through P-1 - for (unsigned proc = 1; proc < P; ++proc) { - v_workw_t current_weight_assigned = 0; - while (current_weight_assigned < weight_limit) { - vertex_idx chosen_node = std::numeric_limits::max(); - - if constexpr (use_memory_constraint) { - if (!procReady[proc].empty() && local_memory_constraint.can_add(procReady[proc].front(), proc)) { - chosen_node = procReady[proc].front(); - std::pop_heap(procReady[proc].begin(), procReady[proc].end(), std::greater()); + for (unsigned proc = 1; proc < p; ++proc) { + VWorkwT currentWeightAssigned = 0; + while (currentWeightAssigned < weightLimit) { + VertexIdx chosenNode = std::numeric_limits::max(); + + if constexpr (useMemoryConstraint_) { + if (!procReady[proc].empty() && localMemoryConstraint_.CanAdd(procReady[proc].front(), proc)) { + chosenNode = procReady[proc].front(); + std::pop_heap(procReady[proc].begin(), procReady[proc].end(), std::greater()); procReady[proc].pop_back(); - } else if (!allReady.empty() && local_memory_constraint.can_add(allReady.front(), proc)) { - chosen_node = allReady.front(); - std::pop_heap(allReady.begin(), allReady.end(), std::greater()); + } else if (!allReady.empty() && localMemoryConstraint_.CanAdd(allReady.front(), proc)) { + chosenNode = allReady.front(); + std::pop_heap(allReady.begin(), allReady.end(), std::greater()); allReady.pop_back(); } else { - early_memory_break = true; + earlyMemoryBreak = true; break; } } else { if (!procReady[proc].empty()) { - chosen_node = procReady[proc].front(); - std::pop_heap(procReady[proc].begin(), procReady[proc].end(), std::greater()); + chosenNode = procReady[proc].front(); + std::pop_heap(procReady[proc].begin(), procReady[proc].end(), std::greater()); procReady[proc].pop_back(); } else if (!allReady.empty()) { - chosen_node = allReady.front(); - std::pop_heap(allReady.begin(), allReady.end(), std::greater()); + chosenNode = allReady.front(); + std::pop_heap(allReady.begin(), allReady.end(), std::greater()); allReady.pop_back(); } else { break; } } - new_assignments[proc].push_back(chosen_node); - node_to_proc[chosen_node] = proc; - new_total_assigned++; - current_weight_assigned += G.vertex_work_weight(chosen_node); + newAssignments[proc].push_back(chosenNode); + nodeToProc[chosenNode] = proc; + newTotalAssigned++; + currentWeightAssigned += g.VertexWorkWeight(chosenNode); - if constexpr (use_memory_constraint) { - local_memory_constraint.add(chosen_node, proc); + if constexpr (useMemoryConstraint_) { + localMemoryConstraint_.Add(chosenNode, proc); } - for (const auto &succ : G.children(chosen_node)) { - if (node_to_proc[succ] == std::numeric_limits::max()) { - node_to_proc[succ] = proc; - } else if (node_to_proc[succ] != proc) { - node_to_proc[succ] = P; + for (const auto &succ : g.Children(chosenNode)) { + if (nodeToProc[succ] == std::numeric_limits::max()) { + nodeToProc[succ] = proc; + } else if (nodeToProc[succ] != proc) { + nodeToProc[succ] = p; } predec[succ]--; if (predec[succ] == 0) { - new_ready.push_back(succ); + newReady.push_back(succ); - if (node_to_proc[succ] == proc) { + if (nodeToProc[succ] == proc) { procReady[proc].push_back(succ); - std::push_heap(procReady[proc].begin(), procReady[proc].end(), std::greater()); + std::push_heap(procReady[proc].begin(), procReady[proc].end(), std::greater()); } } } } - weight_limit = std::max(weight_limit, current_weight_assigned); - total_weight_assigned += current_weight_assigned; + weightLimit = std::max(weightLimit, currentWeightAssigned); + totalWeightAssigned += currentWeightAssigned; } - bool accept_step = false; + bool acceptStep = false; - double score = static_cast(total_weight_assigned) - / static_cast(weight_limit + instance.synchronisationCosts()); + double score = static_cast(totalWeightAssigned) + / static_cast(weightLimit + instance.SynchronisationCosts()); double parallelism = 0; - if (weight_limit > 0) { - parallelism = static_cast(total_weight_assigned) / static_cast(weight_limit); + if (weightLimit > 0) { + parallelism = static_cast(totalWeightAssigned) / static_cast(weightLimit); } - if (score > 0.97 * best_score) { - best_score = std::max(best_score, score); - best_parallelism = parallelism; - accept_step = true; + if (score > 0.97 * bestScore) { + bestScore = std::max(bestScore, score); + bestParallelism = parallelism; + acceptStep = true; } else { continueSuperstepAttempts = false; } - if (weight_limit >= minWeightParallelCheck) { + if (weightLimit >= minWeightParallelCheck) { if (parallelism < std::max(2.0, 0.8 * desiredParallelism)) { continueSuperstepAttempts = false; } } - if (weight_limit <= minSuperstepWeight) { + if (weightLimit <= minSuperstepWeight) { continueSuperstepAttempts = true; - if (total_assigned + new_total_assigned == N) { - accept_step = true; + if (totalAssigned + newTotalAssigned == n) { + acceptStep = true; continueSuperstepAttempts = false; } } - if (total_assigned + new_total_assigned == N) { + if (totalAssigned + newTotalAssigned == n) { continueSuperstepAttempts = false; } - if constexpr (use_memory_constraint) { - if (early_memory_break) { + if constexpr (useMemoryConstraint_) { + if (earlyMemoryBreak) { continueSuperstepAttempts = false; } } // undo proc assingments and predec decreases in any case - for (unsigned proc = 0; proc < P; ++proc) { - for (const auto &node : new_assignments[proc]) { - node_to_proc[node] = std::numeric_limits::max(); + for (unsigned proc = 0; proc < p; ++proc) { + for (const auto &node : newAssignments[proc]) { + nodeToProc[node] = std::numeric_limits::max(); - for (const auto &succ : G.children(node)) { + for (const auto &succ : g.Children(node)) { predec[succ]++; - node_to_proc[succ] = std::numeric_limits::max(); + nodeToProc[succ] = std::numeric_limits::max(); } } - if constexpr (use_memory_constraint) { - local_memory_constraint.reset(proc); + if constexpr (useMemoryConstraint_) { + localMemoryConstraint_.Reset(proc); } } - if (accept_step) { - best_new_assignments.swap(new_assignments); - best_new_ready.swap(new_ready); + if (acceptStep) { + bestNewAssignments.swap(newAssignments); + bestNewReady.swap(newReady); } limit++; @@ -354,32 +355,32 @@ class GrowLocalAutoCores : public Scheduler { } // apply best iteration - for (const auto &node : best_new_ready) { + for (const auto &node : bestNewReady) { ready.insert(node); } - for (unsigned proc = 0; proc < P; ++proc) { - for (const auto &node : best_new_assignments[proc]) { - node_to_proc[node] = proc; - node_to_supstep[node] = supstep; + for (unsigned proc = 0; proc < p; ++proc) { + for (const auto &node : bestNewAssignments[proc]) { + nodeToProc[node] = proc; + nodeToSupstep[node] = supstep; ready.erase(node); - ++total_assigned; + ++totalAssigned; - for (const auto &succ : G.children(node)) { + for (const auto &succ : g.Children(node)) { predec[succ]--; } } } - desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism) - + (0.1 * static_cast(P)); // weights should sum up to one + desiredParallelism = (0.3 * desiredParallelism) + (0.6 * bestParallelism) + + (0.1 * static_cast(p)); // weights should sum up to one ++supstep; } - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } /** @@ -389,7 +390,7 @@ class GrowLocalAutoCores : public Scheduler { * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { return "GrowLocalAutoCores"; } + virtual std::string GetScheduleName() const override { return "GrowLocalAutoCores"; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index 5569195d..7f9ac6cb 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -39,51 +39,51 @@ limitations under the License. namespace osp { -static constexpr unsigned CacheLineSize = 64; +static constexpr std::size_t CACHE_LINE_SIZE = 64; -template -struct GrowLocalAutoCoresParallel_Params { - vert_t minSuperstepSize = 20; - weight_t syncCostMultiplierMinSuperstepWeight = 1; - weight_t syncCostMultiplierParallelCheck = 4; +template +struct GrowLocalAutoCoresParallelParams { + VertT minSuperstepSize_ = 20; + WeightT syncCostMultiplierMinSuperstepWeight_ = 1; + WeightT syncCostMultiplierParallelCheck_ = 4; - unsigned numThreads = 0; // 0 for auto - unsigned maxNumThreads = UINT_MAX; // used when auto num threads + unsigned numThreads_ = 0; // 0 for auto + unsigned maxNumThreads_ = UINT_MAX; // used when auto num threads }; /** * @brief The GrowLocalAutoCoresParallel class represents a scheduler that uses a greedy algorithm to compute * schedules for BspInstance. * - * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods. - * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. - * The getScheduleName() method returns the name of the schedule, which is "GrowLocalAutoCoresParallel" in this + * This class inherits from the Scheduler class and implements the ComputeSchedule() and GetScheduleName() methods. + * The ComputeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. + * The GetScheduleName() method returns the name of the schedule, which is "GrowLocalAutoCoresParallel" in this * case. */ -template -class GrowLocalAutoCoresParallel : public Scheduler { - static_assert(is_directed_graph_v); - static_assert(has_vertex_weights_v); +template +class GrowLocalAutoCoresParallel : public Scheduler { + static_assert(isDirectedGraphV); + static_assert(hasVertexWeightsV); private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - GrowLocalAutoCoresParallel_Params, v_workw_t> params; + GrowLocalAutoCoresParallelParams, VWorkwT> params_; public: /** * @brief Default constructor for GrowLocalAutoCoresParallel. */ - GrowLocalAutoCoresParallel(GrowLocalAutoCoresParallel_Params, v_workw_t> params_ - = GrowLocalAutoCoresParallel_Params, v_workw_t>()) - : params(params_) {} + GrowLocalAutoCoresParallel(GrowLocalAutoCoresParallelParams, VWorkwT> params + = GrowLocalAutoCoresParallelParams, VWorkwT>()) + : params_(params) {} /** * @brief Default destructor for GrowLocalAutoCoresParallel. */ virtual ~GrowLocalAutoCoresParallel() = default; - void computePartialSchedule(BspSchedule &schedule, + void ComputePartialSchedule(BspSchedule &schedule, const std::vector &topOrder, const std::vector &posInTopOrder, const VertexType startNode, @@ -92,26 +92,26 @@ class GrowLocalAutoCoresParallel : public Scheduler { #ifdef TIME_THREADS_GROW_LOCAL_PARALLEL double startTime = omp_get_wtime(); #endif - const BspInstance &instance = schedule.getInstance(); - const Graph_t &graph = instance.getComputationalDag(); + const BspInstance &instance = schedule.GetInstance(); + const GraphT &graph = instance.GetComputationalDag(); - const VertexType N = endNode - startNode; - const unsigned P = instance.numberOfProcessors(); + const VertexType n = endNode - startNode; + const unsigned p = instance.NumberOfProcessors(); std::set ready; std::vector futureReady; - std::vector best_futureReady; + std::vector bestFutureReady; - std::vector> procReady(P); - std::vector> best_procReady(P); + std::vector> procReady(p); + std::vector> bestProcReady(p); - std::vector predec(N, 0); + std::vector predec(n, 0); - if constexpr (has_vertices_in_top_order_v) { - if constexpr (has_children_in_vertex_order_v) { + if constexpr (hasVerticesInTopOrderV) { + if constexpr (hasChildrenInVertexOrderV) { for (VertexType vert = startNode; vert < endNode; ++vert) { - for (const VertexType &chld : graph.children(vert)) { + for (const VertexType &chld : graph.Children(vert)) { if (chld >= endNode) { break; } @@ -120,7 +120,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } else { for (VertexType vert = startNode; vert < endNode; ++vert) { - for (const VertexType &chld : graph.children(vert)) { + for (const VertexType &chld : graph.Children(vert)) { if (chld < endNode) { ++predec[chld - startNode]; } @@ -130,7 +130,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { } else { for (VertexType index = startNode; index < endNode; ++index) { VertexType vert = topOrder[index]; - for (const VertexType &par : graph.parents(vert)) { + for (const VertexType &par : graph.Parents(vert)) { VertexType posPar = posInTopOrder[par]; if (posPar >= startNode) { ++predec[index - startNode]; @@ -142,7 +142,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { for (VertexType nodePos = startNode; nodePos < endNode; nodePos++) { VertexType index = nodePos - startNode; if (predec[index] == 0) { - if constexpr (has_vertices_in_top_order_v) { + if constexpr (hasVerticesInTopOrderV) { ready.insert(nodePos); } else { ready.insert(topOrder[nodePos]); @@ -150,21 +150,21 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - std::vector> new_assignments(P); - std::vector> best_new_assignments(P); + std::vector> newAssignments(p); + std::vector> bestNewAssignments(p); - const v_workw_t minWeightParallelCheck = params.syncCostMultiplierParallelCheck * instance.synchronisationCosts(); - const v_workw_t minSuperstepWeight = params.syncCostMultiplierMinSuperstepWeight * instance.synchronisationCosts(); + const VWorkwT minWeightParallelCheck = params_.syncCostMultiplierParallelCheck_ * instance.SynchronisationCosts(); + const VWorkwT minSuperstepWeight = params_.syncCostMultiplierMinSuperstepWeight_ * instance.SynchronisationCosts(); - double desiredParallelism = static_cast(P); + double desiredParallelism = static_cast(p); - VertexType total_assigned = 0; + VertexType totalAssigned = 0; supstep = 0; - while (total_assigned < N) { - VertexType limit = params.minSuperstepSize; - double best_score = 0; - double best_parallelism = 0; + while (totalAssigned < n) { + VertexType limit = params_.minSuperstepSize_; + double bestScore = 0; + double bestParallelism = 0; typename std::set::iterator readyIter; typename std::set::iterator bestReadyIter; @@ -172,42 +172,42 @@ class GrowLocalAutoCoresParallel : public Scheduler { bool continueSuperstepAttempts = true; while (continueSuperstepAttempts) { - for (unsigned p = 0; p < P; p++) { - new_assignments[p].clear(); + for (unsigned proc = 0; proc < p; proc++) { + newAssignments[proc].clear(); } futureReady.clear(); - for (unsigned p = 0; p < P; p++) { - procReady[p].clear(); + for (unsigned proc = 0; proc < p; proc++) { + procReady[proc].clear(); } readyIter = ready.begin(); - VertexType new_total_assigned = 0; - v_workw_t weight_limit = 0; - v_workw_t total_weight_assigned = 0; + VertexType newTotalAssigned = 0; + VWorkwT weightLimit = 0; + VWorkwT totalWeightAssigned = 0; // Processor 0 - while (new_assignments[0].size() < limit) { - VertexType chosen_node = std::numeric_limits::max(); + while (newAssignments[0].size() < limit) { + VertexType chosenNode = std::numeric_limits::max(); if (!procReady[0].empty()) { - chosen_node = *procReady[0].begin(); + chosenNode = *procReady[0].begin(); procReady[0].erase(procReady[0].begin()); } else if (readyIter != ready.end()) { - chosen_node = *readyIter; + chosenNode = *readyIter; readyIter++; } else { break; } - new_assignments[0].push_back(chosen_node); - schedule.setAssignedProcessor(chosen_node, 0); - new_total_assigned++; - weight_limit += graph.vertex_work_weight(chosen_node); + newAssignments[0].push_back(chosenNode); + schedule.SetAssignedProcessor(chosenNode, 0); + newTotalAssigned++; + weightLimit += graph.VertexWorkWeight(chosenNode); - for (const VertexType &succ : graph.children(chosen_node)) { - if constexpr (has_vertices_in_top_order_v) { - if constexpr (has_children_in_vertex_order_v) { + for (const VertexType &succ : graph.Children(chosenNode)) { + if constexpr (hasVerticesInTopOrderV) { + if constexpr (hasChildrenInVertexOrderV) { if (succ >= endNode) { break; } @@ -222,14 +222,14 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - if (schedule.assignedProcessor(succ) == UINT_MAX) { - schedule.setAssignedProcessor(succ, 0); - } else if (schedule.assignedProcessor(succ) != 0) { - schedule.setAssignedProcessor(succ, P); + if (schedule.AssignedProcessor(succ) == UINT_MAX) { + schedule.SetAssignedProcessor(succ, 0); + } else if (schedule.AssignedProcessor(succ) != 0) { + schedule.SetAssignedProcessor(succ, p); } VertexType succIndex; - if constexpr (has_vertices_in_top_order_v) { + if constexpr (hasVerticesInTopOrderV) { succIndex = succ - startNode; } else { succIndex = posInTopOrder[succ] - startNode; @@ -237,7 +237,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { --predec[succIndex]; if (predec[succIndex] == 0) { - if (schedule.assignedProcessor(succ) == 0) { + if (schedule.AssignedProcessor(succ) == 0) { procReady[0].insert(succ); } else { futureReady.push_back(succ); @@ -246,31 +246,31 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - total_weight_assigned += weight_limit; + totalWeightAssigned += weightLimit; // Processors 1 through P-1 - for (unsigned proc = 1; proc < P; ++proc) { - v_workw_t current_weight_assigned = 0; - while (current_weight_assigned < weight_limit) { - VertexType chosen_node = std::numeric_limits::max(); + for (unsigned proc = 1; proc < p; ++proc) { + VWorkwT currentWeightAssigned = 0; + while (currentWeightAssigned < weightLimit) { + VertexType chosenNode = std::numeric_limits::max(); if (!procReady[proc].empty()) { - chosen_node = *procReady[proc].begin(); + chosenNode = *procReady[proc].begin(); procReady[proc].erase(procReady[proc].begin()); } else if (readyIter != ready.end()) { - chosen_node = *readyIter; + chosenNode = *readyIter; readyIter++; } else { break; } - new_assignments[proc].push_back(chosen_node); - schedule.setAssignedProcessor(chosen_node, proc); - new_total_assigned++; - current_weight_assigned += graph.vertex_work_weight(chosen_node); + newAssignments[proc].push_back(chosenNode); + schedule.SetAssignedProcessor(chosenNode, proc); + newTotalAssigned++; + currentWeightAssigned += graph.VertexWorkWeight(chosenNode); - for (const VertexType &succ : graph.children(chosen_node)) { - if constexpr (has_vertices_in_top_order_v) { - if constexpr (has_children_in_vertex_order_v) { + for (const VertexType &succ : graph.Children(chosenNode)) { + if constexpr (hasVerticesInTopOrderV) { + if constexpr (hasChildrenInVertexOrderV) { if (succ >= endNode) { break; } @@ -285,14 +285,14 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - if (schedule.assignedProcessor(succ) == UINT_MAX) { - schedule.setAssignedProcessor(succ, proc); - } else if (schedule.assignedProcessor(succ) != proc) { - schedule.setAssignedProcessor(succ, P); + if (schedule.AssignedProcessor(succ) == UINT_MAX) { + schedule.SetAssignedProcessor(succ, proc); + } else if (schedule.AssignedProcessor(succ) != proc) { + schedule.SetAssignedProcessor(succ, p); } VertexType succIndex; - if constexpr (has_vertices_in_top_order_v) { + if constexpr (hasVerticesInTopOrderV) { succIndex = succ - startNode; } else { succIndex = posInTopOrder[succ] - startNode; @@ -300,7 +300,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { --predec[succIndex]; if (predec[succIndex] == 0) { - if (schedule.assignedProcessor(succ) == proc) { + if (schedule.AssignedProcessor(succ) == proc) { procReady[proc].insert(succ); } else { futureReady.push_back(succ); @@ -309,58 +309,58 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - weight_limit = std::max(weight_limit, current_weight_assigned); - total_weight_assigned += current_weight_assigned; + weightLimit = std::max(weightLimit, currentWeightAssigned); + totalWeightAssigned += currentWeightAssigned; } - bool accept_step = false; + bool acceptStep = false; - double score = static_cast(total_weight_assigned) - / static_cast(weight_limit + instance.synchronisationCosts()); + double score = static_cast(totalWeightAssigned) + / static_cast(weightLimit + instance.SynchronisationCosts()); double parallelism = 0; - if (weight_limit > 0) { - parallelism = static_cast(total_weight_assigned) / static_cast(weight_limit); + if (weightLimit > 0) { + parallelism = static_cast(totalWeightAssigned) / static_cast(weightLimit); } - if (score > 0.97 * best_score) { // It is possible to make this less strict, i.e. score > 0.98 * best_score. - // The purpose of this would be to encourage larger supersteps. - best_score = std::max(best_score, score); - best_parallelism = parallelism; - accept_step = true; + if (score > 0.97 * bestScore) { // It is possible to make this less strict, i.e. score > 0.98 * best_score. + // The purpose of this would be to encourage larger supersteps. + bestScore = std::max(bestScore, score); + bestParallelism = parallelism; + acceptStep = true; } else { continueSuperstepAttempts = false; } - if (weight_limit >= minWeightParallelCheck) { + if (weightLimit >= minWeightParallelCheck) { if (parallelism < std::max(2.0, 0.8 * desiredParallelism)) { continueSuperstepAttempts = false; } } - if (weight_limit <= minSuperstepWeight) { + if (weightLimit <= minSuperstepWeight) { continueSuperstepAttempts = true; - if (total_assigned + new_total_assigned == N) { - accept_step = true; + if (totalAssigned + newTotalAssigned == n) { + acceptStep = true; continueSuperstepAttempts = false; } } - if (total_assigned + new_total_assigned == N) { + if (totalAssigned + newTotalAssigned == n) { continueSuperstepAttempts = false; } // undo proc assingments and predec increases in any case - for (unsigned proc = 0; proc < P; ++proc) { - for (const VertexType &node : new_assignments[proc]) { - schedule.setAssignedProcessor(node, UINT_MAX); + for (unsigned proc = 0; proc < p; ++proc) { + for (const VertexType &node : newAssignments[proc]) { + schedule.SetAssignedProcessor(node, UINT_MAX); } } - for (unsigned proc = 0; proc < P; ++proc) { - for (const VertexType &node : new_assignments[proc]) { - for (const VertexType &succ : graph.children(node)) { - if constexpr (has_vertices_in_top_order_v) { - if constexpr (has_children_in_vertex_order_v) { + for (unsigned proc = 0; proc < p; ++proc) { + for (const VertexType &node : newAssignments[proc]) { + for (const VertexType &succ : graph.Children(node)) { + if constexpr (hasVerticesInTopOrderV) { + if constexpr (hasChildrenInVertexOrderV) { if (succ >= endNode) { break; } @@ -376,7 +376,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { } VertexType succIndex; - if constexpr (has_vertices_in_top_order_v) { + if constexpr (hasVerticesInTopOrderV) { succIndex = succ - startNode; } else { succIndex = posInTopOrder[succ] - startNode; @@ -387,11 +387,11 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - for (unsigned proc = 0; proc < P; ++proc) { - for (const VertexType &node : new_assignments[proc]) { - for (const VertexType &succ : graph.children(node)) { - if constexpr (has_vertices_in_top_order_v) { - if constexpr (has_children_in_vertex_order_v) { + for (unsigned proc = 0; proc < p; ++proc) { + for (const VertexType &node : newAssignments[proc]) { + for (const VertexType &succ : graph.Children(node)) { + if constexpr (hasVerticesInTopOrderV) { + if constexpr (hasChildrenInVertexOrderV) { if (succ >= endNode) { break; } @@ -406,15 +406,15 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - schedule.setAssignedProcessor(succ, UINT_MAX); + schedule.SetAssignedProcessor(succ, UINT_MAX); } } } - if (accept_step) { - best_new_assignments.swap(new_assignments); - best_futureReady.swap(futureReady); - best_procReady.swap(procReady); + if (acceptStep) { + bestNewAssignments.swap(newAssignments); + bestFutureReady.swap(futureReady); + bestProcReady.swap(procReady); bestReadyIter = readyIter; } @@ -424,20 +424,20 @@ class GrowLocalAutoCoresParallel : public Scheduler { // apply best iteration ready.erase(ready.begin(), bestReadyIter); - ready.insert(best_futureReady.begin(), best_futureReady.end()); - for (unsigned proc = 0; proc < P; proc++) { - ready.merge(best_procReady[proc]); + ready.insert(bestFutureReady.begin(), bestFutureReady.end()); + for (unsigned proc = 0; proc < p; proc++) { + ready.merge(bestProcReady[proc]); } - for (unsigned proc = 0; proc < P; ++proc) { - for (const VertexType &node : best_new_assignments[proc]) { - schedule.setAssignedProcessor(node, proc); - schedule.setAssignedSuperstepNoUpdateNumSuperstep(node, supstep); - ++total_assigned; + for (unsigned proc = 0; proc < p; ++proc) { + for (const VertexType &node : bestNewAssignments[proc]) { + schedule.SetAssignedProcessor(node, proc); + schedule.SetAssignedSuperstepNoUpdateNumSuperstep(node, supstep); + ++totalAssigned; - for (const VertexType &succ : graph.children(node)) { - if constexpr (has_vertices_in_top_order_v) { - if constexpr (has_children_in_vertex_order_v) { + for (const VertexType &succ : graph.Children(node)) { + if constexpr (hasVerticesInTopOrderV) { + if constexpr (hasChildrenInVertexOrderV) { if (succ >= endNode) { break; } @@ -453,7 +453,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { } VertexType succIndex; - if constexpr (has_vertices_in_top_order_v) { + if constexpr (hasVerticesInTopOrderV) { succIndex = succ - startNode; } else { succIndex = posInTopOrder[succ] - startNode; @@ -464,8 +464,8 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism) - + (0.1 * static_cast(P)); // weights should sum up to one + desiredParallelism = (0.3 * desiredParallelism) + (0.6 * bestParallelism) + + (0.1 * static_cast(p)); // weights should sum up to one ++supstep; } @@ -482,37 +482,37 @@ class GrowLocalAutoCoresParallel : public Scheduler { #endif } - void incrementScheduleSupersteps(BspSchedule &schedule, + void IncrementScheduleSupersteps(BspSchedule &schedule, const VertexType startNode, const VertexType endNode, const unsigned incr) const { for (VertexType node = startNode; node < endNode; node++) { - schedule.setAssignedSuperstepNoUpdateNumSuperstep(node, schedule.assignedSuperstep(node) + incr); + schedule.SetAssignedSuperstepNoUpdateNumSuperstep(node, schedule.AssignedSuperstep(node) + incr); } } - void incrementScheduleSupersteps_TopOrder(BspSchedule &schedule, - const std::vector &topOrder, - const VertexType startIndex, - const VertexType endIndex, - const unsigned incr) const { + void IncrementScheduleSuperstepsTopOrder(BspSchedule &schedule, + const std::vector &topOrder, + const VertexType startIndex, + const VertexType endIndex, + const unsigned incr) const { for (VertexType index = startIndex; index < endIndex; index++) { const VertexType node = topOrder[index]; - schedule.setAssignedSuperstepNoUpdateNumSuperstep(node, schedule.assignedSuperstep(node) + incr); + schedule.SetAssignedSuperstepNoUpdateNumSuperstep(node, schedule.AssignedSuperstep(node) + incr); } } - RETURN_STATUS computeScheduleParallel(BspSchedule &schedule, unsigned int numThreads) const { - const BspInstance &instance = schedule.getInstance(); - const Graph_t &graph = instance.getComputationalDag(); + ReturnStatus ComputeScheduleParallel(BspSchedule &schedule, unsigned int numThreads) const { + const BspInstance &instance = schedule.GetInstance(); + const GraphT &graph = instance.GetComputationalDag(); - const VertexType N = instance.numberOfVertices(); + const VertexType n = instance.NumberOfVertices(); - for (VertexType vert = 0; vert < N; ++vert) { - schedule.setAssignedProcessor(vert, UINT_MAX); + for (VertexType vert = 0; vert < n; ++vert) { + schedule.SetAssignedProcessor(vert, UINT_MAX); } - VertexType numNodesPerThread = N / numThreads; + VertexType numNodesPerThread = n / numThreads; std::vector startNodes; startNodes.reserve(numThreads + 1); VertexType startNode = 0; @@ -520,21 +520,21 @@ class GrowLocalAutoCoresParallel : public Scheduler { startNodes.push_back(startNode); startNode += numNodesPerThread; } - startNodes.push_back(N); + startNodes.push_back(n); - static constexpr unsigned UnsignedPadding = (CacheLineSize + sizeof(unsigned) - 1) / sizeof(unsigned); - std::vector superstepsThread(numThreads * UnsignedPadding, 0); + static constexpr unsigned unsignedPadding = (CACHE_LINE_SIZE + sizeof(unsigned) - 1) / sizeof(unsigned); + std::vector superstepsThread(numThreads * unsignedPadding, 0); std::vector supstepIncr(numThreads, 0); unsigned incr = 0; std::vector topOrder; - if constexpr (not has_vertices_in_top_order_v) { + if constexpr (not hasVerticesInTopOrderV) { topOrder = GetTopOrder(graph); } std::vector posInTopOrder; - if constexpr (not has_vertices_in_top_order_v) { - posInTopOrder = std::vector(graph.num_vertices()); + if constexpr (not hasVerticesInTopOrderV) { + posInTopOrder = std::vector(graph.NumVertices()); for (VertexType ind = 0; ind < static_cast(topOrder.size()); ++ind) { posInTopOrder[topOrder[ind]] = ind; } @@ -545,15 +545,15 @@ class GrowLocalAutoCoresParallel : public Scheduler { { #pragma omp for schedule(static, 1) for (unsigned thr = 0; thr < numThreads; thr++) { - computePartialSchedule( - schedule, topOrder, posInTopOrder, startNodes[thr], startNodes[thr + 1], superstepsThread[thr * UnsignedPadding]); + ComputePartialSchedule( + schedule, topOrder, posInTopOrder, startNodes[thr], startNodes[thr + 1], superstepsThread[thr * unsignedPadding]); } #pragma omp master { for (unsigned thr = 0; thr < numThreads; thr++) { supstepIncr[thr] = incr; - incr += superstepsThread[thr * UnsignedPadding]; + incr += superstepsThread[thr * unsignedPadding]; } // the value of incr is now the number of supersteps } @@ -562,18 +562,17 @@ class GrowLocalAutoCoresParallel : public Scheduler { #pragma omp for schedule(static, 1) for (unsigned thr = 0; thr < numThreads; thr++) { - if constexpr (has_vertices_in_top_order_v) { - incrementScheduleSupersteps(schedule, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]); + if constexpr (hasVerticesInTopOrderV) { + IncrementScheduleSupersteps(schedule, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]); } else { - incrementScheduleSupersteps_TopOrder( - schedule, topOrder, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]); + IncrementScheduleSuperstepsTopOrder(schedule, topOrder, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]); } } } - schedule.setNumberOfSupersteps(incr); + schedule.SetNumberOfSupersteps(incr); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } /** @@ -584,19 +583,19 @@ class GrowLocalAutoCoresParallel : public Scheduler { * @param instance The BspInstance object representing the instance to compute the schedule for. * @return A pair containing the return status and the computed BspSchedule. */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - unsigned numThreads = params.numThreads; + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + unsigned numThreads = params_.numThreads_; if (numThreads == 0) { - // numThreads = static_cast(std::sqrt( static_cast((schedule.getInstance().numberOfVertices() / 1000000)))) + 1; + // numThreads = static_cast(std::sqrt( static_cast((schedule.GetInstance().NumberOfVertices() / 1000000)))) + 1; numThreads - = static_cast(std::log2(static_cast((schedule.getInstance().numberOfVertices() / 1000)))) + 1; + = static_cast(std::log2(static_cast((schedule.GetInstance().NumberOfVertices() / 1000)))) + 1; } - numThreads = std::min(numThreads, params.maxNumThreads); + numThreads = std::min(numThreads, params_.maxNumThreads_); if (numThreads == 0) { numThreads = 1; } - return computeScheduleParallel(schedule, numThreads); + return ComputeScheduleParallel(schedule, numThreads); } /** @@ -606,7 +605,7 @@ class GrowLocalAutoCoresParallel : public Scheduler { * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { return "GrowLocalAutoCoresParallel"; } + virtual std::string GetScheduleName() const override { return "GrowLocalAutoCoresParallel"; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp b/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp index f56ed2f2..f1ebae87 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp @@ -30,370 +30,367 @@ namespace osp { * */ template -struct is_memory_constraint : std::false_type {}; +struct IsMemoryConstraint : std::false_type {}; template -struct is_memory_constraint< +struct IsMemoryConstraint< T, - std::void_t().initialize(std::declval>())), - decltype(std::declval().can_add(std::declval>(), std::declval())), - decltype(std::declval().add(std::declval>(), std::declval())), - decltype(std::declval().reset(std::declval())), + std::void_t().Initialize(std::declval>())), + decltype(std::declval().CanAdd(std::declval>(), std::declval())), + decltype(std::declval().Add(std::declval>(), std::declval())), + decltype(std::declval().Reset(std::declval())), decltype(T())>> : std::true_type {}; template -inline constexpr bool is_memory_constraint_v = is_memory_constraint::value; +inline constexpr bool isMemoryConstraintV = IsMemoryConstraint::value; /** * @brief The default memory constraint type, no memory constraints apply. * */ -struct no_memory_constraint { - using Graph_impl_t = void; +struct NoMemoryConstraint { + using GraphImplT = void; }; /** * @brief A memory constraint module for local memory constraints. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. */ -template -struct local_memory_constraint { - using Graph_impl_t = Graph_t; +template +struct LocalMemoryConstraint { + using GraphImplT = GraphT; - const BspInstance *instance; + const BspInstance *instance_; - std::vector> current_proc_memory; + std::vector> currentProcMemory_; - local_memory_constraint() : instance(nullptr) {} + LocalMemoryConstraint() : instance_(nullptr) {} - inline void initialize(const BspInstance &instance_) { - instance = &instance_; - current_proc_memory = std::vector>(instance->numberOfProcessors(), 0); + inline void Initialize(const BspInstance &instance) { + instance_ = &instance; + currentProcMemory_ = std::vector>(instance.NumberOfProcessors(), 0); - if (instance->getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL) { + if (instance.GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::LOCAL) { throw std::invalid_argument("Memory constraint type is not LOCAL"); } } - inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) - <= instance->getArchitecture().memoryBound(proc); + inline bool CanAdd(const VertexIdxT &v, const unsigned proc) const { + return currentProcMemory_[proc] + instance_->GetComputationalDag().VertexMemWeight(v) + <= instance_->GetArchitecture().MemoryBound(proc); } - inline void add(const vertex_idx_t &v, const unsigned proc) { - current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v); + inline void Add(const VertexIdxT &v, const unsigned proc) { + currentProcMemory_[proc] += instance_->GetComputationalDag().VertexMemWeight(v); } - inline bool can_add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_memw_t &) const { - return current_proc_memory[proc] + custom_mem_weight <= instance->getArchitecture().memoryBound(proc); + inline bool CanAdd(const unsigned proc, const VMemwT &customMemWeight, const VMemwT &) const { + return currentProcMemory_[proc] + customMemWeight <= instance_->GetArchitecture().MemoryBound(proc); } - inline void add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_memw_t &) { - current_proc_memory[proc] += custom_mem_weight; + inline void Add(const unsigned proc, const VMemwT &customMemWeight, const VMemwT &) { + currentProcMemory_[proc] += customMemWeight; } - inline void reset(const unsigned proc) { current_proc_memory[proc] = 0; } + inline void Reset(const unsigned proc) { currentProcMemory_[proc] = 0; } }; /** * @brief A memory constraint module for local memory constraints. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. */ /** * @brief A memory constraint module for persistent and transient memory constraints. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. */ -template -struct persistent_transient_memory_constraint { - static_assert(std::is_convertible_v, v_memw_t>, - "persistent_transient_memory_constraint requires that memory and communication weights are convertible."); +template +struct PersistentTransientMemoryConstraint { + static_assert(std::is_convertible_v, VMemwT>, + "PersistentTransientMemoryConstraint requires that memory and communication weights are convertible."); - using Graph_impl_t = Graph_t; + using GraphImplT = GraphT; - const BspInstance *instance; + const BspInstance *instance_; - std::vector> current_proc_persistent_memory; - std::vector> current_proc_transient_memory; + std::vector> currentProcPersistentMemory_; + std::vector> currentProcTransientMemory_; - persistent_transient_memory_constraint() : instance(nullptr) {} + PersistentTransientMemoryConstraint() : instance_(nullptr) {} - inline void initialize(const BspInstance &instance_) { - instance = &instance_; + inline void Initialize(const BspInstance &instance) { + instance_ = &instance; - current_proc_persistent_memory = std::vector>(instance->numberOfProcessors(), 0); - current_proc_transient_memory = std::vector>(instance->numberOfProcessors(), 0); + currentProcPersistentMemory_.assign(instance.NumberOfProcessors(), 0); + currentProcTransientMemory_.assign(instance.NumberOfProcessors(), 0); - if (instance->getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { + if (instance.GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::PERSISTENT_AND_TRANSIENT) { throw std::invalid_argument("Memory constraint type is not PERSISTENT_AND_TRANSIENT"); } } - inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - return (current_proc_persistent_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) - + std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v)) - <= instance->getArchitecture().memoryBound(proc)); + inline bool CanAdd(const VertexIdxT &v, const unsigned proc) const { + return (currentProcPersistentMemory_[proc] + instance_->GetComputationalDag().VertexMemWeight(v) + + std::max(currentProcTransientMemory_[proc], instance_->GetComputationalDag().VertexCommWeight(v)) + <= instance_->GetArchitecture().MemoryBound(proc)); } - inline void add(const vertex_idx_t &v, const unsigned proc) { - current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v); - current_proc_transient_memory[proc] - = std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v)); + inline void Add(const VertexIdxT &v, const unsigned proc) { + currentProcPersistentMemory_[proc] += instance_->GetComputationalDag().VertexMemWeight(v); + currentProcTransientMemory_[proc] + = std::max(currentProcTransientMemory_[proc], instance_->GetComputationalDag().VertexCommWeight(v)); } - inline bool can_add(const unsigned proc, - const v_memw_t &custom_mem_weight, - const v_commw_t &custom_comm_weight) const { - return (current_proc_persistent_memory[proc] + custom_mem_weight - + std::max(current_proc_transient_memory[proc], custom_comm_weight) - <= instance->getArchitecture().memoryBound(proc)); + inline bool CanAdd(const unsigned proc, const VMemwT &customMemWeight, const VCommwT &customCommWeight) const { + return (currentProcPersistentMemory_[proc] + customMemWeight + std::max(currentProcTransientMemory_[proc], customCommWeight) + <= instance_->GetArchitecture().MemoryBound(proc)); } - inline void add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_commw_t &custom_comm_weight) { - current_proc_persistent_memory[proc] += custom_mem_weight; - current_proc_transient_memory[proc] = std::max(current_proc_transient_memory[proc], custom_comm_weight); + inline void Add(const unsigned proc, const VMemwT &customMemWeight, const VCommwT &customCommWeight) { + currentProcPersistentMemory_[proc] += customMemWeight; + currentProcTransientMemory_[proc] = std::max(currentProcTransientMemory_[proc], customCommWeight); } - inline void reset(const unsigned) {} + inline void Reset(const unsigned) {} }; -template -struct global_memory_constraint { - using Graph_impl_t = Graph_t; +template +struct GlobalMemoryConstraint { + using GraphImplT = GraphT; - const BspInstance *instance; + const BspInstance *instance_; - std::vector> current_proc_memory; + std::vector> currentProcMemory_; - global_memory_constraint() : instance(nullptr) {} + GlobalMemoryConstraint() : instance_(nullptr) {} - inline void initialize(const BspInstance &instance_) { - instance = &instance_; - current_proc_memory = std::vector>(instance->numberOfProcessors(), 0); + inline void Initialize(const BspInstance &instance) { + instance_ = &instance; + currentProcMemory_ = std::vector>(instance.NumberOfProcessors(), 0); - if (instance->getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::GLOBAL) { + if (instance.GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::GLOBAL) { throw std::invalid_argument("Memory constraint type is not GLOBAL"); } } - inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) - <= instance->getArchitecture().memoryBound(proc); + inline bool CanAdd(const VertexIdxT &v, const unsigned proc) const { + return currentProcMemory_[proc] + instance_->GetComputationalDag().VertexMemWeight(v) + <= instance_->GetArchitecture().MemoryBound(proc); } - inline void add(const vertex_idx_t &v, const unsigned proc) { - current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v); + inline void Add(const VertexIdxT &v, const unsigned proc) { + currentProcMemory_[proc] += instance_->GetComputationalDag().VertexMemWeight(v); } - inline bool can_add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_commw_t &) const { - return current_proc_memory[proc] + custom_mem_weight <= instance->getArchitecture().memoryBound(proc); + inline bool CanAdd(const unsigned proc, const VMemwT &customMemWeight, const VCommwT &) const { + return currentProcMemory_[proc] + customMemWeight <= instance_->GetArchitecture().MemoryBound(proc); } - inline void add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_commw_t &) { - current_proc_memory[proc] += custom_mem_weight; + inline void Add(const unsigned proc, const VMemwT &customMemWeight, const VCommwT &) { + currentProcMemory_[proc] += customMemWeight; } - inline void reset(const unsigned) {} + inline void Reset(const unsigned) {} }; template -struct is_memory_constraint_schedule : std::false_type {}; +struct IsMemoryConstraintSchedule : std::false_type {}; template -struct is_memory_constraint_schedule< +struct IsMemoryConstraintSchedule< T, - std::void_t().initialize(std::declval>(), std::declval())), - decltype(std::declval().can_add(std::declval>(), std::declval())), - decltype(std::declval().add(std::declval>(), std::declval())), - decltype(std::declval().reset(std::declval())), + std::void_t().Initialize(std::declval>(), std::declval())), + decltype(std::declval().CanAdd(std::declval>(), std::declval())), + decltype(std::declval().Add(std::declval>(), std::declval())), + decltype(std::declval().Reset(std::declval())), decltype(T())>> : std::true_type {}; template -inline constexpr bool is_memory_constraint_schedule_v = is_memory_constraint_schedule::value; +inline constexpr bool isMemoryConstraintScheduleV = IsMemoryConstraintSchedule::value; -template -struct local_in_out_memory_constraint { - static_assert(std::is_convertible_v, v_memw_t>, - "local_in_out_memory_constraint requires that memory and communication weights are convertible."); +template +struct LocalInOutMemoryConstraint { + static_assert(std::is_convertible_v, VMemwT>, + "LocalInOutMemoryConstraint requires that memory and communication weights are convertible."); - using Graph_impl_t = Graph_t; + using GraphImplT = GraphT; - const BspInstance *instance; - const BspSchedule *schedule; + const BspInstance *instance_; + const BspSchedule *schedule_; - const unsigned *current_superstep = 0; + const unsigned *currentSuperstep_ = 0; - std::vector> current_proc_memory; + std::vector> currentProcMemory_; - local_in_out_memory_constraint() : instance(nullptr), schedule(nullptr) {} + LocalInOutMemoryConstraint() : instance_(nullptr), schedule_(nullptr) {} - inline void initialize(const BspSchedule &schedule_, const unsigned &supstepIdx) { - current_superstep = &supstepIdx; - schedule = &schedule_; - instance = &schedule->getInstance(); - current_proc_memory = std::vector>(instance->numberOfProcessors(), 0); + inline void Initialize(const BspSchedule &schedule, const unsigned &supstepIdx) { + currentSuperstep_ = &supstepIdx; + schedule_ = &schedule; + instance_ = &schedule_->GetInstance(); + currentProcMemory_.assign(instance_->NumberOfProcessors(), 0); - if (instance->getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT) { + if (instance_->GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::LOCAL_IN_OUT) { throw std::invalid_argument("Memory constraint type is not LOCAL_IN_OUT"); } } - inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - v_memw_t inc_memory - = instance->getComputationalDag().vertex_mem_weight(v) + instance->getComputationalDag().vertex_comm_weight(v); + inline bool CanAdd(const VertexIdxT &v, const unsigned proc) const { + VMemwT incMemory + = instance_->GetComputationalDag().VertexMemWeight(v) + instance_->GetComputationalDag().VertexCommWeight(v); - for (const auto &pred : instance->getComputationalDag().parents(v)) { - if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v) - && schedule->assignedSuperstep(pred) == *current_superstep) { - inc_memory -= instance->getComputationalDag().vertex_comm_weight(pred); + for (const auto &pred : instance_->GetComputationalDag().Parents(v)) { + if (schedule_->AssignedProcessor(pred) == schedule_->AssignedProcessor(v) + && schedule_->AssignedSuperstep(pred) == *currentSuperstep_) { + incMemory -= instance_->GetComputationalDag().VertexCommWeight(pred); } } - return current_proc_memory[proc] + inc_memory <= instance->getArchitecture().memoryBound(proc); + return currentProcMemory_[proc] + incMemory <= instance_->GetArchitecture().MemoryBound(proc); } - inline void add(const vertex_idx_t &v, const unsigned proc) { - current_proc_memory[proc] - += instance->getComputationalDag().vertex_mem_weight(v) + instance->getComputationalDag().vertex_comm_weight(v); + inline void Add(const VertexIdxT &v, const unsigned proc) { + currentProcMemory_[proc] + += instance_->GetComputationalDag().VertexMemWeight(v) + instance_->GetComputationalDag().VertexCommWeight(v); - for (const auto &pred : instance->getComputationalDag().parents(v)) { - if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v) - && schedule->assignedSuperstep(pred) == *current_superstep) { - current_proc_memory[proc] -= instance->getComputationalDag().vertex_comm_weight(pred); + for (const auto &pred : instance_->GetComputationalDag().Parents(v)) { + if (schedule_->AssignedProcessor(pred) == schedule_->AssignedProcessor(v) + && schedule_->AssignedSuperstep(pred) == *currentSuperstep_) { + currentProcMemory_[proc] -= instance_->GetComputationalDag().VertexCommWeight(pred); } } } - inline void reset(const unsigned proc) { current_proc_memory[proc] = 0; } + inline void Reset(const unsigned proc) { currentProcMemory_[proc] = 0; } }; -template -struct local_inc_edges_memory_constraint { - using Graph_impl_t = Graph_t; +template +struct LocalIncEdgesMemoryConstraint { + using GraphImplT = GraphT; - const BspInstance *instance; - const BspSchedule *schedule; + const BspInstance *instance_; + const BspSchedule *schedule_; - const unsigned *current_superstep = 0; + const unsigned *currentSuperstep_ = 0; - std::vector> current_proc_memory; - std::vector>> current_proc_predec; + std::vector> currentProcMemory_; + std::vector>> currentProcPredec_; - local_inc_edges_memory_constraint() : instance(nullptr), schedule(nullptr) {} + LocalIncEdgesMemoryConstraint() : instance_(nullptr), schedule_(nullptr) {} - inline void initialize(const BspSchedule &schedule_, const unsigned &supstepIdx) { - current_superstep = &supstepIdx; - schedule = &schedule_; - instance = &schedule->getInstance(); + inline void Initialize(const BspSchedule &schedule, const unsigned &supstepIdx) { + currentSuperstep_ = &supstepIdx; + schedule_ = &schedule; + instance_ = &schedule_->GetInstance(); - current_proc_memory = std::vector>(instance->numberOfProcessors(), 0); - current_proc_predec = std::vector>>(instance->numberOfProcessors()); + currentProcMemory_.assign(instance_->NumberOfProcessors(), 0); + currentProcPredec_.assign(instance_->NumberOfProcessors(), std::unordered_set>()); - if (instance->getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES) { + if (instance_->GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::LOCAL_INC_EDGES) { throw std::invalid_argument("Memory constraint type is not LOCAL_INC_EDGES"); } } - inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - v_commw_t inc_memory = instance->getComputationalDag().vertex_comm_weight(v); + inline bool CanAdd(const VertexIdxT &v, const unsigned proc) const { + VCommwT incMemory = instance_->GetComputationalDag().VertexCommWeight(v); - for (const auto &pred : instance->getComputationalDag().parents(v)) { - if (schedule->assignedSuperstep(pred) != *current_superstep - && current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) { - inc_memory += instance->getComputationalDag().vertex_comm_weight(pred); + for (const auto &pred : instance_->GetComputationalDag().Parents(v)) { + if (schedule_->AssignedSuperstep(pred) != *currentSuperstep_ + && currentProcPredec_[proc].find(pred) == currentProcPredec_[proc].end()) { + incMemory += instance_->GetComputationalDag().VertexCommWeight(pred); } } - return current_proc_memory[proc] + inc_memory <= instance->getArchitecture().memoryBound(proc); + return currentProcMemory_[proc] + incMemory <= instance_->GetArchitecture().MemoryBound(proc); } - inline void add(const vertex_idx_t &v, const unsigned proc) { - current_proc_memory[proc] += instance->getComputationalDag().vertex_comm_weight(v); + inline void Add(const VertexIdxT &v, const unsigned proc) { + currentProcMemory_[proc] += instance_->GetComputationalDag().VertexCommWeight(v); - for (const auto &pred : instance->getComputationalDag().parents(v)) { - if (schedule->assignedSuperstep(pred) != *current_superstep) { - const auto pair = current_proc_predec[proc].insert(pred); + for (const auto &pred : instance_->GetComputationalDag().Parents(v)) { + if (schedule_->AssignedSuperstep(pred) != *currentSuperstep_) { + const auto pair = currentProcPredec_[proc].insert(pred); if (pair.second) { - current_proc_memory[proc] += instance->getComputationalDag().vertex_comm_weight(pred); + currentProcMemory_[proc] += instance_->GetComputationalDag().VertexCommWeight(pred); } } } } - inline void reset(const unsigned proc) { - current_proc_memory[proc] = 0; - current_proc_predec[proc].clear(); + inline void Reset(const unsigned proc) { + currentProcMemory_[proc] = 0; + currentProcPredec_[proc].clear(); } }; -template -struct local_sources_inc_edges_memory_constraint { - static_assert(std::is_convertible_v, v_memw_t>, - "local_sources_inc_edges_memory_constraint requires that memory and communication weights are convertible."); +template +struct LocalSourcesIncEdgesMemoryConstraint { + static_assert(std::is_convertible_v, VMemwT>, + "LocalSourcesIncEdgesMemoryConstraint requires that memory and communication weights are convertible."); - using Graph_impl_t = Graph_t; + using GraphImplT = GraphT; - const BspInstance *instance; - const BspSchedule *schedule; + const BspInstance *instance_; + const BspSchedule *schedule_; - const unsigned *current_superstep = 0; + const unsigned *currentSuperstep_ = 0; - std::vector> current_proc_memory; - std::vector>> current_proc_predec; + std::vector> currentProcMemory_; + std::vector>> currentProcPredec_; - local_sources_inc_edges_memory_constraint() : instance(nullptr), schedule(nullptr) {} + LocalSourcesIncEdgesMemoryConstraint() : instance_(nullptr), schedule_(nullptr) {} - inline void initialize(const BspSchedule &schedule_, const unsigned &supstepIdx) { - current_superstep = &supstepIdx; - schedule = &schedule_; - instance = &schedule->getInstance(); + inline void Initialize(const BspSchedule &schedule, const unsigned &supstepIdx) { + currentSuperstep_ = &supstepIdx; + schedule_ = &schedule; + instance_ = &schedule_->GetInstance(); - current_proc_memory = std::vector>(instance->numberOfProcessors(), 0); - current_proc_predec = std::vector>>(instance->numberOfProcessors()); + currentProcMemory_.assign(instance_->NumberOfProcessors(), 0); + currentProcPredec_.assign(instance_->NumberOfProcessors(), std::unordered_set>()); - if (instance->getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES) { - throw std::invalid_argument("Memory constraint type is not LOCAL_INC_EDGES_2"); + if (instance_->GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::LOCAL_SOURCES_INC_EDGES) { + throw std::invalid_argument("Memory constraint type is not LOCAL_SOURCES_INC_EDGES"); } } - inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - v_memw_t inc_memory = 0; + inline bool CanAdd(const VertexIdxT &v, const unsigned proc) const { + VMemwT incMemory = 0; - if (is_source(v, instance->getComputationalDag())) { - inc_memory += instance->getComputationalDag().vertex_mem_weight(v); + if (IsSource(v, instance_->GetComputationalDag())) { + incMemory += instance_->GetComputationalDag().VertexMemWeight(v); } - for (const auto &pred : instance->getComputationalDag().parents(v)) { - if (schedule->assignedSuperstep(v) != *current_superstep - && current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) { - inc_memory += instance->getComputationalDag().vertex_comm_weight(pred); + for (const auto &pred : instance_->GetComputationalDag().Parents(v)) { + if (schedule_->AssignedSuperstep(pred) != *currentSuperstep_ + && currentProcPredec_[proc].find(pred) == currentProcPredec_[proc].end()) { + incMemory += instance_->GetComputationalDag().VertexCommWeight(pred); } } - return current_proc_memory[proc] + inc_memory <= instance->getArchitecture().memoryBound(proc); + return currentProcMemory_[proc] + incMemory <= instance_->GetArchitecture().MemoryBound(proc); } - inline void add(const vertex_idx_t &v, const unsigned proc) { - if (is_source(v, instance->getComputationalDag())) { - current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v); + inline void Add(const VertexIdxT &v, const unsigned proc) { + if (IsSource(v, instance_->GetComputationalDag())) { + currentProcMemory_[proc] += instance_->GetComputationalDag().VertexMemWeight(v); } - for (const auto &pred : instance->getComputationalDag().parents(v)) { - if (schedule->assignedSuperstep(pred) != *current_superstep) { - const auto pair = current_proc_predec[proc].insert(pred); + for (const auto &pred : instance_->GetComputationalDag().Parents(v)) { + if (schedule_->AssignedSuperstep(pred) != *currentSuperstep_) { + const auto pair = currentProcPredec_[proc].insert(pred); if (pair.second) { - current_proc_memory[proc] += instance->getComputationalDag().vertex_comm_weight(pred); + currentProcMemory_[proc] += instance_->GetComputationalDag().VertexCommWeight(pred); } } } } - inline void reset(const unsigned proc) { - current_proc_memory[proc] = 0; - current_proc_predec[proc].clear(); + inline void Reset(const unsigned proc) { + currentProcMemory_[proc] = 0; + currentProcPredec_[proc].clear(); } }; diff --git a/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp b/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp index 795290fd..6297a7ba 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp @@ -28,33 +28,33 @@ limitations under the License. namespace osp { -template -class RandomGreedy : public Scheduler { - static_assert(is_computational_dag_v, "RandomGreedy can only be used with computational DAGs."); +template +class RandomGreedy : public Scheduler { + static_assert(isComputationalDagV, "RandomGreedy can only be used with computational DAGs."); private: - bool ensure_enough_sources; + bool ensureEnoughSources_; public: - RandomGreedy(bool ensure_enough_sources_ = true) : Scheduler(), ensure_enough_sources(ensure_enough_sources_) {}; + RandomGreedy(bool ensureEnoughSources = true) : Scheduler(), ensureEnoughSources_(ensureEnoughSources) {}; - RETURN_STATUS computeSchedule(BspSchedule &sched) override { - using VertexType = vertex_idx_t; + ReturnStatus ComputeSchedule(BspSchedule &sched) override { + using VertexType = VertexIdxT; - const auto &instance = sched.getInstance(); + const auto &instance = sched.GetInstance(); - for (const auto &v : instance.getComputationalDag().vertices()) { - sched.setAssignedProcessor(v, std::numeric_limits::max()); - sched.setAssignedSuperstep(v, std::numeric_limits::max()); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + sched.SetAssignedProcessor(v, std::numeric_limits::max()); + sched.SetAssignedSuperstep(v, std::numeric_limits::max()); } - const auto &graph = instance.getComputationalDag(); + const auto &graph = instance.GetComputationalDag(); - unsigned superstep_counter = 0; + unsigned superstepCounter = 0; - std::vector predecessors_count(instance.numberOfVertices(), 0); + std::vector predecessorsCount(instance.NumberOfVertices(), 0); std::vector next; - for (const auto &i : source_vertices_view(graph)) { + for (const auto &i : SourceVerticesView(graph)) { next.push_back(i); } @@ -63,75 +63,75 @@ class RandomGreedy : public Scheduler { while (!next.empty()) { std::shuffle(next.begin(), next.end(), g); - std::unordered_set nodes_assigned_this_superstep; - std::vector> processor_weights(instance.numberOfProcessors(), 0); - - bool few_sources = next.size() < instance.numberOfProcessors() ? true : false; - unsigned fail_counter = 0; - while (!next.empty() && fail_counter < 20) { - std::uniform_int_distribution rand_node_idx(0, next.size() - 1); - VertexType node_ind = rand_node_idx(g); - const auto &node = next[node_ind]; - bool processor_set = false; - bool failed_to_allocate = false; - unsigned processor_to_be_allocated = 0; - - for (const auto &par : graph.parents(node)) { - if (processor_set && (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend()) - && (sched.assignedProcessor(par) != processor_to_be_allocated)) { - failed_to_allocate = true; + std::unordered_set nodesAssignedThisSuperstep; + std::vector> processorWeights(instance.NumberOfProcessors(), 0); + + bool fewSources = next.size() < instance.NumberOfProcessors() ? true : false; + unsigned failCounter = 0; + while (!next.empty() && failCounter < 20) { + std::uniform_int_distribution randNodeIdx(0, next.size() - 1); + VertexType nodeInd = randNodeIdx(g); + const auto &node = next[nodeInd]; + bool processorSet = false; + bool failedToAllocate = false; + unsigned processorToBeAllocated = 0; + + for (const auto &par : graph.Parents(node)) { + if (processorSet && (nodesAssignedThisSuperstep.find(par) != nodesAssignedThisSuperstep.cend()) + && (sched.AssignedProcessor(par) != processorToBeAllocated)) { + failedToAllocate = true; break; } - if ((!processor_set) && (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend())) { - processor_set = true; - processor_to_be_allocated = sched.assignedProcessor(par); + if ((!processorSet) && (nodesAssignedThisSuperstep.find(par) != nodesAssignedThisSuperstep.cend())) { + processorSet = true; + processorToBeAllocated = sched.AssignedProcessor(par); } } - if (failed_to_allocate) { - fail_counter++; + if (failedToAllocate) { + failCounter++; continue; } else { - fail_counter = 0; + failCounter = 0; } - sched.setAssignedSuperstep(node, superstep_counter); - if (processor_set) { - sched.setAssignedProcessor(node, processor_to_be_allocated); + sched.SetAssignedSuperstep(node, superstepCounter); + if (processorSet) { + sched.SetAssignedProcessor(node, processorToBeAllocated); } else { - auto min_iter = std::min_element(processor_weights.begin(), processor_weights.end()); + auto minIter = std::min_element(processorWeights.begin(), processorWeights.end()); - assert(std::distance(processor_weights.begin(), min_iter) >= 0); + assert(std::distance(processorWeights.begin(), minIter) >= 0); - sched.setAssignedProcessor(node, static_cast(std::distance(processor_weights.begin(), min_iter))); + sched.SetAssignedProcessor(node, static_cast(std::distance(processorWeights.begin(), minIter))); } - nodes_assigned_this_superstep.emplace(node); - processor_weights[sched.assignedProcessor(node)] += graph.vertex_work_weight(node); - std::vector new_nodes; - for (const auto &chld : graph.children(node)) { - predecessors_count[chld]++; - if (predecessors_count[chld] == graph.in_degree(chld)) { - new_nodes.emplace_back(chld); + nodesAssignedThisSuperstep.emplace(node); + processorWeights[sched.AssignedProcessor(node)] += graph.VertexWorkWeight(node); + std::vector newNodes; + for (const auto &chld : graph.Children(node)) { + predecessorsCount[chld]++; + if (predecessorsCount[chld] == graph.InDegree(chld)) { + newNodes.emplace_back(chld); } } auto it = next.begin(); - std::advance(it, node_ind); + std::advance(it, nodeInd); next.erase(it); - next.insert(next.end(), new_nodes.cbegin(), new_nodes.cend()); + next.insert(next.end(), newNodes.cbegin(), newNodes.cend()); - if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors()) { + if (ensureEnoughSources_ && fewSources && next.size() >= instance.NumberOfProcessors()) { break; } } - superstep_counter++; + superstepCounter++; } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return ensure_enough_sources ? "RandomGreedyS" : "RandomGreedy"; } + std::string GetScheduleName() const override { return ensureEnoughSources_ ? "RandomGreedyS" : "RandomGreedy"; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp b/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp index 91541ca6..c88136e3 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp @@ -41,73 +41,73 @@ namespace osp { * @brief The VarianceFillup class represents a scheduler that uses a greedy algorithm to compute * schedules for BspInstance. * - * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods. - * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. - * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. + * This class inherits from the Scheduler class and implements the ComputeSchedule() and GetScheduleName() methods. + * The ComputeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. + * The GetScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. */ -template -class VarianceFillup : public Scheduler { - static_assert(is_computational_dag_v, "VarianceFillup can only be used with computational DAGs."); +template +class VarianceFillup : public Scheduler { + static_assert(isComputationalDagV, "VarianceFillup can only be used with computational DAGs."); private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - constexpr static bool use_memory_constraint = is_memory_constraint_v - or is_memory_constraint_schedule_v; + constexpr static bool useMemoryConstraint_ = isMemoryConstraintV + or isMemoryConstraintScheduleV; - static_assert(not use_memory_constraint or std::is_same_v, - "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); + static_assert(not useMemoryConstraint_ or std::is_same_v, + "GraphT must be the same as MemoryConstraintT::GraphImplT."); - MemoryConstraint_t memory_constraint; + MemoryConstraintT memoryConstraint_; - double max_percent_idle_processors; - bool increase_parallelism_in_new_superstep; + double maxPercentIdleProcessors_; + bool increaseParallelismInNewSuperstep_; - std::vector compute_work_variance(const Graph_t &graph) const { - std::vector work_variance(graph.num_vertices(), 0.0); + std::vector ComputeWorkVariance(const GraphT &graph) const { + std::vector workVariance(graph.NumVertices(), 0.0); - const std::vector top_order = GetTopOrder(graph); + const std::vector topOrder = GetTopOrder(graph); - for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { + for (auto rIter = topOrder.rbegin(); rIter != topOrder.crend(); rIter++) { double temp = 0; - double max_priority = 0; - for (const auto &child : graph.children(*r_iter)) { - max_priority = std::max(work_variance[child], max_priority); + double maxPriority = 0; + for (const auto &child : graph.Children(*rIter)) { + maxPriority = std::max(workVariance[child], maxPriority); } - for (const auto &child : graph.children(*r_iter)) { - temp += std::exp(2 * (work_variance[child] - max_priority)); + for (const auto &child : graph.Children(*rIter)) { + temp += std::exp(2 * (workVariance[child] - maxPriority)); } - temp = std::log(temp) / 2 + max_priority; + temp = std::log(temp) / 2 + maxPriority; - double node_weight - = std::log(static_cast(std::max(graph.vertex_work_weight(*r_iter), static_cast>(1)))); - double larger_val = node_weight > temp ? node_weight : temp; + double nodeWeight + = std::log(static_cast(std::max(graph.VertexWorkWeight(*rIter), static_cast>(1)))); + double largerVal = nodeWeight > temp ? nodeWeight : temp; - work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; + workVariance[*rIter] = std::log(std::exp(nodeWeight - largerVal) + std::exp(temp - largerVal)) + largerVal; } - return work_variance; + return workVariance; } - std::vector>> procTypesCompatibleWithNodeType_omit_procType( - const BspInstance &instance) const { - const std::vector> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); + std::vector>> ProcTypesCompatibleWithNodeTypeOmitProcType( + const BspInstance &instance) const { + const std::vector> procTypesCompatibleWithNodeType = instance.GetProcTypesCompatibleWithNodeType(); - std::vector>> procTypesCompatibleWithNodeType_skip( - instance.getArchitecture().getNumberOfProcessorTypes(), - std::vector>(instance.getComputationalDag().num_vertex_types())); - for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { - for (unsigned nodeType = 0; nodeType < instance.getComputationalDag().num_vertex_types(); nodeType++) { + std::vector>> procTypesCompatibleWithNodeTypeSkip( + instance.GetArchitecture().GetNumberOfProcessorTypes(), + std::vector>(instance.GetComputationalDag().NumVertexTypes())); + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); procType++) { + for (unsigned nodeType = 0; nodeType < instance.GetComputationalDag().NumVertexTypes(); nodeType++) { for (unsigned otherProcType : procTypesCompatibleWithNodeType[nodeType]) { if (procType == otherProcType) { continue; } - procTypesCompatibleWithNodeType_skip[procType][nodeType].emplace_back(otherProcType); + procTypesCompatibleWithNodeTypeSkip[procType][nodeType].emplace_back(otherProcType); } } } - return procTypesCompatibleWithNodeType_skip; + return procTypesCompatibleWithNodeTypeSkip; } struct VarianceCompare { @@ -116,31 +116,31 @@ class VarianceFillup : public Scheduler { } }; - bool check_mem_feasibility(const BspInstance &instance, - const std::vector, VarianceCompare>> &allReady, - const std::vector, VarianceCompare>> &procReady) const { - if constexpr (use_memory_constraint) { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + bool CheckMemFeasibility(const BspInstance &instance, + const std::vector, VarianceCompare>> &allReady, + const std::vector, VarianceCompare>> &procReady) const { + if constexpr (useMemoryConstraint_) { + if (instance.GetArchitecture().GetMemoryConstraintType() == MemoryConstraintType::PERSISTENT_AND_TRANSIENT) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (!procReady[i].empty()) { - const std::pair &node_pair = *procReady[i].begin(); - VertexType top_node = node_pair.first; + const std::pair &nodePair = *procReady[i].begin(); + VertexType topNode = nodePair.first; - if (memory_constraint.can_add(top_node, i)) { + if (memoryConstraint_.CanAdd(topNode, i)) { return true; } } } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (allReady[instance.getArchitecture().processorType(i)].empty()) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + if (allReady[instance.GetArchitecture().ProcessorType(i)].empty()) { continue; } - const std::pair &node_pair = *allReady[instance.getArchitecture().processorType(i)].begin(); - VertexType top_node = node_pair.first; + const std::pair &nodePair = *allReady[instance.GetArchitecture().ProcessorType(i)].begin(); + VertexType topNode = nodePair.first; - if (memory_constraint.can_add(top_node, i)) { + if (memoryConstraint_.CanAdd(topNode, i)) { return true; } } @@ -152,64 +152,64 @@ class VarianceFillup : public Scheduler { return true; }; - void Choose(const BspInstance &instance, - const std::vector &work_variance, + void Choose(const BspInstance &instance, + const std::vector &workVariance, std::vector, VarianceCompare>> &allReady, std::vector, VarianceCompare>> &procReady, const std::vector &procFree, VertexType &node, unsigned &p, const bool endSupStep, - const v_workw_t remaining_time, - const std::vector>> &procTypesCompatibleWithNodeType_skip_proctype) const { + const VWorkwT remainingTime, + const std::vector>> &procTypesCompatibleWithNodeTypeSkipProctype) const { double maxScore = -1; - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (procFree[i] && !procReady[i].empty()) { // select node - for (auto node_pair_it = procReady[i].begin(); node_pair_it != procReady[i].end();) { - if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(node_pair_it->first))) { - node_pair_it = procReady[i].erase(node_pair_it); + for (auto nodePairIt = procReady[i].begin(); nodePairIt != procReady[i].end();) { + if (endSupStep && (remainingTime < instance.GetComputationalDag().VertexWorkWeight(nodePairIt->first))) { + nodePairIt = procReady[i].erase(nodePairIt); continue; } - const double &score = node_pair_it->second; + const double &score = nodePairIt->second; if (score > maxScore) { maxScore = score; - node = node_pair_it->first; + node = nodePairIt->first; p = i; - procReady[i].erase(node_pair_it); + procReady[i].erase(nodePairIt); return; } - node_pair_it++; + nodePairIt++; } } } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + if (procFree[i] && !allReady[instance.GetArchitecture().ProcessorType(i)].empty()) { // select node - for (auto it = allReady[instance.getArchitecture().processorType(i)].begin(); - it != allReady[instance.getArchitecture().processorType(i)].end();) { - if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { - it = allReady[instance.getArchitecture().processorType(i)].erase(it); + for (auto it = allReady[instance.GetArchitecture().ProcessorType(i)].begin(); + it != allReady[instance.GetArchitecture().ProcessorType(i)].end();) { + if (endSupStep && (remainingTime < instance.GetComputationalDag().VertexWorkWeight(it->first))) { + it = allReady[instance.GetArchitecture().ProcessorType(i)].erase(it); continue; } const double &score = it->second; if (score > maxScore) { - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(it->first, i)) { + if constexpr (useMemoryConstraint_) { + if (memoryConstraint_.CanAdd(it->first, i)) { node = it->first; p = i; - allReady[instance.getArchitecture().processorType(i)].erase(it); + allReady[instance.GetArchitecture().ProcessorType(i)].erase(it); for (unsigned procType : - procTypesCompatibleWithNodeType_skip_proctype[instance.getArchitecture().processorType( - i)][instance.getComputationalDag().vertex_type(node)]) { - allReady[procType].erase(std::make_pair(node, work_variance[node])); + procTypesCompatibleWithNodeTypeSkipProctype[instance.GetArchitecture().ProcessorType(i)] + [instance.GetComputationalDag().VertexType(node)]) { + allReady[procType].erase(std::make_pair(node, workVariance[node])); } return; } @@ -217,11 +217,11 @@ class VarianceFillup : public Scheduler { node = it->first; p = i; - allReady[instance.getArchitecture().processorType(i)].erase(it); + allReady[instance.GetArchitecture().ProcessorType(i)].erase(it); for (unsigned procType : - procTypesCompatibleWithNodeType_skip_proctype[instance.getArchitecture().processorType(i)] - [instance.getComputationalDag().vertex_type(node)]) { - allReady[procType].erase(std::make_pair(node, work_variance[node])); + procTypesCompatibleWithNodeTypeSkipProctype[instance.GetArchitecture().ProcessorType(i)] + [instance.GetComputationalDag().VertexType(node)]) { + allReady[procType].erase(std::make_pair(node, workVariance[node])); } return; } @@ -232,18 +232,18 @@ class VarianceFillup : public Scheduler { } } - bool CanChooseNode(const BspInstance &instance, + bool CanChooseNode(const BspInstance &instance, const std::vector, VarianceCompare>> &allReady, const std::vector, VarianceCompare>> &procReady, const std::vector &procFree) const { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { if (procFree[i] && !procReady[i].empty()) { return true; } } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) { + for (unsigned i = 0; i < instance.NumberOfProcessors(); ++i) { + if (procFree[i] && !allReady[instance.GetArchitecture().ProcessorType(i)].empty()) { return true; } } @@ -251,34 +251,34 @@ class VarianceFillup : public Scheduler { return false; } - unsigned get_nr_parallelizable_nodes(const BspInstance &instance, - const std::vector &nr_ready_nodes_per_type, - const std::vector &nr_procs_per_type) const { - unsigned nr_nodes = 0; - - std::vector ready_nodes_per_type = nr_ready_nodes_per_type; - std::vector procs_per_type = nr_procs_per_type; - for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) { - for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { - if (instance.isCompatibleType(node_type, proc_type)) { - unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); - nr_nodes += matched; - ready_nodes_per_type[node_type] -= matched; - procs_per_type[proc_type] -= matched; + unsigned GetNrParallelizableNodes(const BspInstance &instance, + const std::vector &nrReadyNodesPerType, + const std::vector &nrProcsPerType) const { + unsigned nrNodes = 0; + + std::vector readyNodesPerType = nrReadyNodesPerType; + std::vector procsPerType = nrProcsPerType; + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); ++procType) { + for (unsigned nodeType = 0; nodeType < instance.GetComputationalDag().NumVertexTypes(); ++nodeType) { + if (instance.IsCompatibleType(nodeType, procType)) { + unsigned matched = std::min(readyNodesPerType[nodeType], procsPerType[procType]); + nrNodes += matched; + readyNodesPerType[nodeType] -= matched; + procsPerType[procType] -= matched; } } } - return nr_nodes; + return nrNodes; } public: /** * @brief Default constructor for VarianceFillup. */ - VarianceFillup(float max_percent_idle_processors_ = 0.2f, bool increase_parallelism_in_new_superstep_ = true) - : max_percent_idle_processors(max_percent_idle_processors_), - increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {} + VarianceFillup(float maxPercentIdleProcessors = 0.2f, bool increaseParallelismInNewSuperstep = true) + : maxPercentIdleProcessors_(maxPercentIdleProcessors), + increaseParallelismInNewSuperstep_(increaseParallelismInNewSuperstep) {} /** * @brief Default destructor for VarianceFillup. @@ -293,79 +293,79 @@ class VarianceFillup : public Scheduler { * @param instance The BspInstance object representing the instance to compute the schedule for. * @return A pair containing the return status and the computed BspSchedule. */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); - for (const auto &v : instance.getComputationalDag().vertices()) { - schedule.setAssignedProcessor(v, std::numeric_limits::max()); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + schedule.SetAssignedProcessor(v, std::numeric_limits::max()); } unsigned supstepIdx = 0; - if constexpr (is_memory_constraint_v) { - memory_constraint.initialize(instance); - } else if constexpr (is_memory_constraint_schedule_v) { - memory_constraint.initialize(schedule, supstepIdx); + if constexpr (isMemoryConstraintV) { + memoryConstraint_.Initialize(instance); + } else if constexpr (isMemoryConstraintScheduleV) { + memoryConstraint_.Initialize(schedule, supstepIdx); } - const auto &N = instance.numberOfVertices(); - const unsigned ¶ms_p = instance.numberOfProcessors(); - const auto &G = instance.getComputationalDag(); + const auto &n = instance.NumberOfVertices(); + const unsigned ¶msP = instance.NumberOfProcessors(); + const auto &g = instance.GetComputationalDag(); - const std::vector work_variances = compute_work_variance(G); + const std::vector workVariances = ComputeWorkVariance(g); std::set, VarianceCompare> ready; - std::vector, VarianceCompare>> procReady(params_p); + std::vector, VarianceCompare>> procReady(paramsP); std::vector, VarianceCompare>> allReady( - instance.getArchitecture().getNumberOfProcessorTypes()); + instance.GetArchitecture().GetNumberOfProcessorTypes()); - const std::vector> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); - const std::vector>> procTypesCompatibleWithNodeType_skip_proctype - = procTypesCompatibleWithNodeType_omit_procType(instance); + const std::vector> procTypesCompatibleWithNodeType = instance.GetProcTypesCompatibleWithNodeType(); + const std::vector>> procTypesCompatibleWithNodeTypeSkipProctype + = ProcTypesCompatibleWithNodeTypeOmitProcType(instance); - std::vector nr_ready_nodes_per_type(G.num_vertex_types(), 0); - std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < params_p; ++proc) { - ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; + std::vector nrReadyNodesPerType(g.NumVertexTypes(), 0); + std::vector nrProcsPerType(instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); + for (unsigned proc = 0; proc < paramsP; ++proc) { + ++nrProcsPerType[instance.GetArchitecture().ProcessorType(proc)]; } - std::vector nrPredecRemain(N); - for (VertexType node = 0; node < N; node++) { - const auto num_parents = G.in_degree(node); - nrPredecRemain[node] = num_parents; - if (num_parents == 0) { - ready.insert(std::make_pair(node, work_variances[node])); - ++nr_ready_nodes_per_type[G.vertex_type(node)]; - for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) { - allReady[procType].insert(std::make_pair(node, work_variances[node])); + std::vector nrPredecRemain(n); + for (VertexType node = 0; node < n; node++) { + const auto numParents = g.InDegree(node); + nrPredecRemain[node] = numParents; + if (numParents == 0) { + ready.insert(std::make_pair(node, workVariances[node])); + ++nrReadyNodesPerType[g.VertexType(node)]; + for (unsigned procType : procTypesCompatibleWithNodeType[g.VertexType(node)]) { + allReady[procType].insert(std::make_pair(node, workVariances[node])); } } } - std::vector procFree(params_p, true); - unsigned free = params_p; + std::vector procFree(paramsP, true); + unsigned free = paramsP; - std::set, VertexType>> finishTimes; + std::set, VertexType>> finishTimes; finishTimes.emplace(0, std::numeric_limits::max()); bool endSupStep = false; while (!ready.empty() || !finishTimes.empty()) { if (finishTimes.empty() && endSupStep) { - for (unsigned i = 0; i < params_p; ++i) { + for (unsigned i = 0; i < paramsP; ++i) { procReady[i].clear(); - if constexpr (use_memory_constraint) { - memory_constraint.reset(i); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Reset(i); } } - for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType) { + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); ++procType) { allReady[procType].clear(); } for (const auto &nodeAndValuePair : ready) { const auto node = nodeAndValuePair.first; - for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) { + for (unsigned procType : procTypesCompatibleWithNodeType[g.VertexType(node)]) { allReady[procType].insert(allReady[procType].end(), nodeAndValuePair); } } @@ -376,46 +376,46 @@ class VarianceFillup : public Scheduler { finishTimes.emplace(0, std::numeric_limits::max()); } - const v_workw_t time = finishTimes.begin()->first; - const v_workw_t max_finish_time = finishTimes.rbegin()->first; + const VWorkwT time = finishTimes.begin()->first; + const VWorkwT maxFinishTime = finishTimes.rbegin()->first; // Find new ready jobs while (!finishTimes.empty() && finishTimes.begin()->first == time) { const VertexType node = finishTimes.begin()->second; finishTimes.erase(finishTimes.begin()); if (node != std::numeric_limits::max()) { - for (const auto &succ : G.children(node)) { + for (const auto &succ : g.Children(node)) { nrPredecRemain[succ]--; if (nrPredecRemain[succ] == 0) { - ready.emplace(succ, work_variances[succ]); - ++nr_ready_nodes_per_type[G.vertex_type(succ)]; + ready.emplace(succ, workVariances[succ]); + ++nrReadyNodesPerType[g.VertexType(succ)]; bool canAdd = true; - for (const auto &pred : G.parents(succ)) { - if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) - && schedule.assignedSuperstep(pred) == supstepIdx) { + for (const auto &pred : g.Parents(succ)) { + if (schedule.AssignedProcessor(pred) != schedule.AssignedProcessor(node) + && schedule.AssignedSuperstep(pred) == supstepIdx) { canAdd = false; } } - if constexpr (use_memory_constraint) { + if constexpr (useMemoryConstraint_) { if (canAdd) { - if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) { + if (not memoryConstraint_.CanAdd(succ, schedule.AssignedProcessor(node))) { canAdd = false; } } } - if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) { + if (!instance.IsCompatible(succ, schedule.AssignedProcessor(node))) { canAdd = false; } if (canAdd) { - procReady[schedule.assignedProcessor(node)].emplace(succ, work_variances[succ]); + procReady[schedule.AssignedProcessor(node)].emplace(succ, workVariances[succ]); } } } - procFree[schedule.assignedProcessor(node)] = true; + procFree[schedule.AssignedProcessor(node)] = true; ++free; } } @@ -426,36 +426,36 @@ class VarianceFillup : public Scheduler { } while (CanChooseNode(instance, allReady, procReady, procFree)) { VertexType nextNode = std::numeric_limits::max(); - unsigned nextProc = params_p; + unsigned nextProc = paramsP; Choose(instance, - work_variances, + workVariances, allReady, procReady, procFree, nextNode, nextProc, endSupStep, - max_finish_time - time, - procTypesCompatibleWithNodeType_skip_proctype); + maxFinishTime - time, + procTypesCompatibleWithNodeTypeSkipProctype); - if (nextNode == std::numeric_limits::max() || nextProc == params_p) { + if (nextNode == std::numeric_limits::max() || nextProc == paramsP) { endSupStep = true; break; } - ready.erase(std::make_pair(nextNode, work_variances[nextNode])); - --nr_ready_nodes_per_type[G.vertex_type(nextNode)]; - schedule.setAssignedProcessor(nextNode, nextProc); - schedule.setAssignedSuperstep(nextNode, supstepIdx); + ready.erase(std::make_pair(nextNode, workVariances[nextNode])); + --nrReadyNodesPerType[g.VertexType(nextNode)]; + schedule.SetAssignedProcessor(nextNode, nextProc); + schedule.SetAssignedSuperstep(nextNode, supstepIdx); - if constexpr (use_memory_constraint) { - memory_constraint.add(nextNode, nextProc); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Add(nextNode, nextProc); std::vector> toErase; - for (const auto &node_pair : procReady[nextProc]) { - if (not memory_constraint.can_add(node_pair.first, nextProc)) { - toErase.push_back(node_pair); + for (const auto &nodePair : procReady[nextProc]) { + if (not memoryConstraint_.CanAdd(nodePair.first, nextProc)) { + toErase.push_back(nodePair); } } @@ -464,29 +464,29 @@ class VarianceFillup : public Scheduler { } } - finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode); + finishTimes.emplace(time + g.VertexWorkWeight(nextNode), nextNode); procFree[nextProc] = false; --free; } - if constexpr (use_memory_constraint) { - if (not check_mem_feasibility(instance, allReady, procReady)) { - return RETURN_STATUS::ERROR; + if constexpr (useMemoryConstraint_) { + if (not CheckMemFeasibility(instance, allReady, procReady)) { + return ReturnStatus::ERROR; } } - if (free > params_p * max_percent_idle_processors - && ((!increase_parallelism_in_new_superstep) - || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) - >= std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), - params_p - free + (static_cast(0.5 * free))))) { + if (free > paramsP * maxPercentIdleProcessors_ + && ((!increaseParallelismInNewSuperstep_) + || GetNrParallelizableNodes(instance, nrReadyNodesPerType, nrProcsPerType) + >= std::min(std::min(paramsP, static_cast(1.2 * (paramsP - free))), + paramsP - free + (static_cast(0.5 * free))))) { endSupStep = true; } } - assert(schedule.satisfiesPrecedenceConstraints()); + assert(schedule.SatisfiesPrecedenceConstraints()); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } /** @@ -496,8 +496,8 @@ class VarianceFillup : public Scheduler { * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { - if constexpr (use_memory_constraint) { + virtual std::string GetScheduleName() const override { + if constexpr (useMemoryConstraint_) { return "VarianceGreedyFillupMemory"; } else { return "VarianceGreedyFillup"; diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp index 862c9bb2..e84709df 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp @@ -33,100 +33,100 @@ namespace osp { * a BSP schedule, with the assignment of vertices to processors and supersteps fixed. */ -template +template class CoptCommScheduleOptimizer { - static_assert(is_computational_dag_v, "CoptFullScheduler can only be used with computational DAGs."); + static_assert(isComputationalDagV, "CoptFullScheduler can only be used with computational DAGs."); - bool ignore_latency = false; + bool ignoreLatency_ = false; - unsigned int timeLimitSeconds = 600; + unsigned int timeLimitSeconds_ = 600; protected: - VarArray superstep_has_comm; - VarArray max_comm_superstep_var; - std::vector>> comm_processor_to_processor_superstep_node_var; + VarArray superstepHasComm_; + VarArray maxCommSuperstepVar_; + std::vector>> commProcessorToProcessorSuperstepNodeVar_; - void setupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model); + void SetupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model); - void setInitialSolution(BspScheduleCS &schedule, Model &model); + void SetInitialSolution(BspScheduleCS &schedule, Model &model); - bool canShrinkResultingSchedule(unsigned number_of_supersteps) const; + bool CanShrinkResultingSchedule(unsigned numberOfSupersteps) const; - void updateCommSchedule(BspScheduleCS &schedule) const; + void UpdateCommSchedule(BspScheduleCS &schedule) const; public: - using KeyTriple = std::tuple, unsigned int, unsigned int>; + using KeyTriple = std::tuple, unsigned int, unsigned int>; virtual ~CoptCommScheduleOptimizer() = default; - virtual RETURN_STATUS improveSchedule(BspScheduleCS &schedule); + virtual ReturnStatus ImproveSchedule(BspScheduleCS &schedule); - virtual std::string getScheduleName() const { return "ILPCommunication"; } + virtual std::string GetScheduleName() const { return "ILPCommunication"; } - virtual void setTimeLimitSeconds(unsigned int limit) { timeLimitSeconds = limit; } + virtual void SetTimeLimitSeconds(unsigned int limit) { timeLimitSeconds_ = limit; } - inline unsigned int getTimeLimitSeconds() const { return timeLimitSeconds; } + inline unsigned int GetTimeLimitSeconds() const { return timeLimitSeconds_; } - virtual void setIgnoreLatency(bool ignore_latency_) { ignore_latency = ignore_latency_; } + virtual void SetIgnoreLatency(bool ignoreLatency) { ignoreLatency_ = ignoreLatency; } }; -template -RETURN_STATUS CoptCommScheduleOptimizer::improveSchedule(BspScheduleCS &schedule) { +template +ReturnStatus CoptCommScheduleOptimizer::ImproveSchedule(BspScheduleCS &schedule) { Envr env; Model model = env.CreateModel("bsp_schedule_cs"); - setupVariablesConstraintsObjective(schedule, model); + SetupVariablesConstraintsObjective(schedule, model); - setInitialSolution(schedule, model); + SetInitialSolution(schedule, model); - model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds); + model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds_); model.SetIntParam(COPT_INTPARAM_THREADS, 128); model.Solve(); if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - updateCommSchedule(schedule); - if (canShrinkResultingSchedule(schedule.numberOfSupersteps())) { - schedule.shrinkByMergingSupersteps(); + UpdateCommSchedule(schedule); + if (CanShrinkResultingSchedule(schedule.NumberOfSupersteps())) { + schedule.ShrinkByMergingSupersteps(); } } if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } else { if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - return RETURN_STATUS::BEST_FOUND; + return ReturnStatus::BEST_FOUND; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } } -template -bool CoptCommScheduleOptimizer::canShrinkResultingSchedule(unsigned number_of_supersteps) const { - for (unsigned step = 0; step < number_of_supersteps - 1; step++) { - if (superstep_has_comm[static_cast(step)].Get(COPT_DBLINFO_VALUE) <= 0.01) { +template +bool CoptCommScheduleOptimizer::CanShrinkResultingSchedule(unsigned numberOfSupersteps) const { + for (unsigned step = 0; step < numberOfSupersteps - 1; step++) { + if (superstepHasComm_[static_cast(step)].Get(COPT_DBLINFO_VALUE) <= 0.01) { return true; } } return false; } -template -void CoptCommScheduleOptimizer::updateCommSchedule(BspScheduleCS &schedule) const { - std::map &cs = schedule.getCommunicationSchedule(); +template +void CoptCommScheduleOptimizer::UpdateCommSchedule(BspScheduleCS &schedule) const { + std::map &cs = schedule.GetCommunicationSchedule(); cs.clear(); - for (const auto &node : schedule.getInstance().vertices()) { - for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) { - for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) { - if (p_from != p_to) { - for (unsigned int step = 0; step < schedule.numberOfSupersteps(); step++) { - if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get( + for (const auto &node : schedule.GetInstance().Vertices()) { + for (unsigned int pFrom = 0; pFrom < schedule.GetInstance().NumberOfProcessors(); pFrom++) { + for (unsigned int pTo = 0; pTo < schedule.GetInstance().NumberOfProcessors(); pTo++) { + if (pFrom != pTo) { + for (unsigned int step = 0; step < schedule.NumberOfSupersteps(); step++) { + if (commProcessorToProcessorSuperstepNodeVar_[pFrom][pTo][step][static_cast(node)].Get( COPT_DBLINFO_VALUE) >= .99) { - cs[std::make_tuple(node, p_from, p_to)] = step; + cs[std::make_tuple(node, pFrom, pTo)] = step; } } } @@ -135,203 +135,201 @@ void CoptCommScheduleOptimizer::updateCommSchedule(BspScheduleCS -void CoptCommScheduleOptimizer::setInitialSolution(BspScheduleCS &schedule, Model &model) { - const Graph_t &DAG = schedule.getInstance().getComputationalDag(); - const BspArchitecture &arch = schedule.getInstance().getArchitecture(); - const unsigned &num_processors = schedule.getInstance().numberOfProcessors(); - const unsigned &num_supersteps = schedule.numberOfSupersteps(); - const auto &cs = schedule.getCommunicationSchedule(); - - std::vector> first_at(DAG.num_vertices(), - std::vector(num_processors, std::numeric_limits::max())); - for (const auto &node : DAG.vertices()) { - first_at[node][schedule.assignedProcessor(node)] = schedule.assignedSuperstep(node); +template +void CoptCommScheduleOptimizer::SetInitialSolution(BspScheduleCS &schedule, Model &model) { + const GraphT &dag = schedule.GetInstance().GetComputationalDag(); + const BspArchitecture &arch = schedule.GetInstance().GetArchitecture(); + const unsigned &numProcessors = schedule.GetInstance().NumberOfProcessors(); + const unsigned &numSupersteps = schedule.NumberOfSupersteps(); + const auto &cs = schedule.GetCommunicationSchedule(); + + std::vector> firstAt(dag.NumVertices(), + std::vector(numProcessors, std::numeric_limits::max())); + for (const auto &node : dag.Vertices()) { + firstAt[node][schedule.AssignedProcessor(node)] = schedule.AssignedSuperstep(node); } - for (const auto &node : DAG.vertices()) { - for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { + for (const auto &node : dag.Vertices()) { + for (unsigned p1 = 0; p1 < numProcessors; p1++) { + for (unsigned p2 = 0; p2 < numProcessors; p2++) { if (p1 == p2) { continue; } - for (unsigned step = 0; step < num_supersteps; step++) { + for (unsigned step = 0; step < numSupersteps; step++) { const auto &key = std::make_tuple(node, p1, p2); if (cs.find(key) != cs.end() && cs.at(key) == step) { - model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)], 1); - first_at[node][p2] = std::min(first_at[node][p2], step + 1); + model.SetMipStart(commProcessorToProcessorSuperstepNodeVar_[p1][p2][step][static_cast(node)], 1); + firstAt[node][p2] = std::min(firstAt[node][p2], step + 1); } else { - model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)], 0); + model.SetMipStart(commProcessorToProcessorSuperstepNodeVar_[p1][p2][step][static_cast(node)], 0); } } } } } - for (const auto &node : DAG.vertices()) { - for (unsigned proc = 0; proc < num_processors; proc++) { - for (unsigned step = 0; step < num_supersteps; step++) { - if (step >= first_at[node][proc]) { - model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast(node)], 1); + for (const auto &node : dag.Vertices()) { + for (unsigned proc = 0; proc < numProcessors; proc++) { + for (unsigned step = 0; step < numSupersteps; step++) { + if (step >= firstAt[node][proc]) { + model.SetMipStart(commProcessorToProcessorSuperstepNodeVar_[proc][proc][step][static_cast(node)], 1); } else { - model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast(node)], 0); + model.SetMipStart(commProcessorToProcessorSuperstepNodeVar_[proc][proc][step][static_cast(node)], 0); } } } } - if (!ignore_latency) { - std::vector comm_phase_used(num_supersteps, 0); + if (!ignoreLatency_) { + std::vector commPhaseUsed(numSupersteps, 0); for (auto const &[key, val] : cs) { - comm_phase_used[val] = 1; + commPhaseUsed[val] = 1; } - for (unsigned step = 0; step < num_supersteps; step++) { - model.SetMipStart(superstep_has_comm[static_cast(step)], comm_phase_used[step]); + for (unsigned step = 0; step < numSupersteps; step++) { + model.SetMipStart(superstepHasComm_[static_cast(step)], commPhaseUsed[step]); } } - std::vector>> send(num_supersteps, std::vector>(num_processors, 0)); - std::vector>> rec(num_supersteps, std::vector>(num_processors, 0)); + std::vector>> send(numSupersteps, std::vector>(numProcessors, 0)); + std::vector>> rec(numSupersteps, std::vector>(numProcessors, 0)); for (const auto &[key, val] : cs) { - send[val][std::get<1>(key)] - += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); - rec[val][std::get<2>(key)] += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); + send[val][std::get<1>(key)] += dag.VertexCommWeight(std::get<0>(key)) * arch.SendCosts(std::get<1>(key), std::get<2>(key)); + rec[val][std::get<2>(key)] += dag.VertexCommWeight(std::get<0>(key)) * arch.SendCosts(std::get<1>(key), std::get<2>(key)); } - for (unsigned step = 0; step < num_supersteps; step++) { - v_commw_t max_comm = 0; - for (unsigned proc = 0; proc < num_processors; proc++) { - max_comm = std::max(max_comm, send[step][proc]); - max_comm = std::max(max_comm, rec[step][proc]); + for (unsigned step = 0; step < numSupersteps; step++) { + VCommwT maxComm = 0; + for (unsigned proc = 0; proc < numProcessors; proc++) { + maxComm = std::max(maxComm, send[step][proc]); + maxComm = std::max(maxComm, rec[step][proc]); } - model.SetMipStart(max_comm_superstep_var[static_cast(step)], max_comm); + model.SetMipStart(maxCommSuperstepVar_[static_cast(step)], maxComm); } model.LoadMipStart(); model.SetIntParam(COPT_INTPARAM_MIPSTARTMODE, 2); } -template -void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model) { - const unsigned &max_number_supersteps = schedule.numberOfSupersteps(); - const unsigned &num_processors = schedule.getInstance().numberOfProcessors(); - const unsigned num_vertices = static_cast(schedule.getInstance().numberOfVertices()); +template +void CoptCommScheduleOptimizer::SetupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model) { + const unsigned &maxNumberSupersteps = schedule.NumberOfSupersteps(); + const unsigned &numProcessors = schedule.GetInstance().NumberOfProcessors(); + const unsigned numVertices = static_cast(schedule.GetInstance().NumberOfVertices()); // variables indicating if superstep is used at all - if (!ignore_latency) { - superstep_has_comm = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_has_comm"); + if (!ignoreLatency_) { + superstepHasComm_ = model.AddVars(static_cast(maxNumberSupersteps), COPT_BINARY, "superstepHasComm_"); } - max_comm_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_comm_superstep"); + maxCommSuperstepVar_ = model.AddVars(static_cast(maxNumberSupersteps), COPT_INTEGER, "max_comm_superstep"); // communicate node from p1 to p2 at superstep - comm_processor_to_processor_superstep_node_var = std::vector>>( - num_processors, std::vector>(num_processors, std::vector(max_number_supersteps))); + commProcessorToProcessorSuperstepNodeVar_ = std::vector>>( + numProcessors, std::vector>(numProcessors, std::vector(maxNumberSupersteps))); - for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - comm_processor_to_processor_superstep_node_var[p1][p2][step] - = model.AddVars(static_cast(num_vertices), COPT_BINARY, "comm_processor_to_processor_superstep_node"); + for (unsigned p1 = 0; p1 < numProcessors; p1++) { + for (unsigned p2 = 0; p2 < numProcessors; p2++) { + for (unsigned step = 0; step < maxNumberSupersteps; step++) { + commProcessorToProcessorSuperstepNodeVar_[p1][p2][step] + = model.AddVars(static_cast(numVertices), COPT_BINARY, "comm_processor_to_processor_superstep_node"); } } } - if (!ignore_latency) { - unsigned M = num_processors * num_processors * num_vertices; - for (unsigned int step = 0; step < schedule.numberOfSupersteps(); step++) { + if (!ignoreLatency_) { + unsigned m = numProcessors * numProcessors * numVertices; + for (unsigned int step = 0; step < schedule.NumberOfSupersteps(); step++) { Expr expr; - for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { + for (unsigned p1 = 0; p1 < numProcessors; p1++) { + for (unsigned p2 = 0; p2 < numProcessors; p2++) { if (p1 != p2) { - for (unsigned node = 0; node < num_vertices; node++) { - expr += comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)]; + for (unsigned node = 0; node < numVertices; node++) { + expr += commProcessorToProcessorSuperstepNodeVar_[p1][p2][step][static_cast(node)]; } } } } - model.AddConstr(expr <= M * superstep_has_comm[static_cast(step)]); + model.AddConstr(expr <= m * superstepHasComm_[static_cast(step)]); } } // precedence constraint: if task is computed then all of its predecessors must have been present // and vertex is present where it was computed - for (unsigned node = 0; node < num_vertices; node++) { - const unsigned &processor = schedule.assignedProcessor(node); - const unsigned &superstep = schedule.assignedSuperstep(node); + for (unsigned node = 0; node < numVertices; node++) { + const unsigned &processor = schedule.AssignedProcessor(node); + const unsigned &superstep = schedule.AssignedSuperstep(node); Expr expr; - unsigned num_com_edges = 0; - for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node)) { - if (schedule.assignedProcessor(node) != schedule.assignedProcessor(pred)) { - num_com_edges += 1; - expr += comm_processor_to_processor_superstep_node_var[processor][processor][superstep][static_cast(pred)]; + unsigned numComEdges = 0; + for (const auto &pred : schedule.GetInstance().GetComputationalDag().Parents(node)) { + if (schedule.AssignedProcessor(node) != schedule.AssignedProcessor(pred)) { + numComEdges += 1; + expr += commProcessorToProcessorSuperstepNodeVar_[processor][processor][superstep][static_cast(pred)]; model.AddConstr( - comm_processor_to_processor_superstep_node_var[schedule.assignedProcessor(pred)][schedule.assignedProcessor( - pred)][schedule.assignedSuperstep(pred)][static_cast(pred)] + commProcessorToProcessorSuperstepNodeVar_[schedule.AssignedProcessor(pred)][schedule.AssignedProcessor(pred)] + [schedule.AssignedSuperstep(pred)][static_cast(pred)] == 1); } } - if (num_com_edges > 0) { - model.AddConstr(expr >= num_com_edges); + if (numComEdges > 0) { + model.AddConstr(expr >= numComEdges); } } // combines two constraints: node can only be communicated if it is present; and node is present if it was computed // or communicated - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { - for (unsigned int node = 0; node < num_vertices; node++) { - if (processor == schedule.assignedProcessor(node) && step >= schedule.assignedSuperstep(node)) { + for (unsigned int step = 0; step < maxNumberSupersteps; step++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { + for (unsigned int node = 0; node < numVertices; node++) { + if (processor == schedule.AssignedProcessor(node) && step >= schedule.AssignedSuperstep(node)) { continue; } Expr expr1, expr2; if (step > 0) { - for (unsigned int p_from = 0; p_from < num_processors; p_from++) { - expr1 - += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast(node)]; + for (unsigned int pFrom = 0; pFrom < numProcessors; pFrom++) { + expr1 += commProcessorToProcessorSuperstepNodeVar_[pFrom][processor][step - 1][static_cast(node)]; } } - for (unsigned int p_to = 0; p_to < num_processors; p_to++) { - expr2 += comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; + for (unsigned int pTo = 0; pTo < numProcessors; pTo++) { + expr2 += commProcessorToProcessorSuperstepNodeVar_[processor][pTo][step][static_cast(node)]; } - model.AddConstr(num_processors * expr1 >= expr2); + model.AddConstr(numProcessors * expr1 >= expr2); } } } - for (unsigned step = 0; step < max_number_supersteps; step++) { - for (unsigned processor = 0; processor < num_processors; processor++) { + for (unsigned step = 0; step < maxNumberSupersteps; step++) { + for (unsigned processor = 0; processor < numProcessors; processor++) { Expr expr1, expr2; - for (unsigned node = 0; node < num_vertices; node++) { - for (unsigned p_to = 0; p_to < num_processors; p_to++) { - if (processor != p_to) { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule.getInstance().sendCosts(processor, p_to) - * comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; + for (unsigned node = 0; node < numVertices; node++) { + for (unsigned pTo = 0; pTo < numProcessors; pTo++) { + if (processor != pTo) { + expr1 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule.GetInstance().SendCosts(processor, pTo) + * commProcessorToProcessorSuperstepNodeVar_[processor][pTo][step][static_cast(node)]; } } - for (unsigned int p_from = 0; p_from < num_processors; p_from++) { - if (processor != p_from) { - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule.getInstance().sendCosts(p_from, processor) - * comm_processor_to_processor_superstep_node_var[p_from][processor][step][static_cast(node)]; + for (unsigned int pFrom = 0; pFrom < numProcessors; pFrom++) { + if (processor != pFrom) { + expr2 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule.GetInstance().SendCosts(pFrom, processor) + * commProcessorToProcessorSuperstepNodeVar_[pFrom][processor][step][static_cast(node)]; } } } - model.AddConstr(max_comm_superstep_var[static_cast(step)] >= expr1); - model.AddConstr(max_comm_superstep_var[static_cast(step)] >= expr2); + model.AddConstr(maxCommSuperstepVar_[static_cast(step)] >= expr1); + model.AddConstr(maxCommSuperstepVar_[static_cast(step)] >= expr2); } } @@ -340,17 +338,17 @@ void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(cons */ Expr expr; - if (!ignore_latency) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast(step)] - + schedule.getInstance().synchronisationCosts() * superstep_has_comm[static_cast(step)]; + if (!ignoreLatency_) { + for (unsigned int step = 0; step < maxNumberSupersteps; step++) { + expr += schedule.GetInstance().CommunicationCosts() * maxCommSuperstepVar_[static_cast(step)] + + schedule.GetInstance().SynchronisationCosts() * superstepHasComm_[static_cast(step)]; } } else { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast(step)]; + for (unsigned int step = 0; step < maxNumberSupersteps; step++) { + expr += schedule.GetInstance().CommunicationCosts() * maxCommSuperstepVar_[static_cast(step)]; } } - model.SetObjective(expr - schedule.getInstance().synchronisationCosts(), COPT_MINIMIZE); + model.SetObjective(expr - schedule.GetInstance().SynchronisationCosts(), COPT_MINIMIZE); } } // namespace osp diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp index fdd3f5c1..69250c9d 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp @@ -55,105 +55,103 @@ namespace osp { * supersteps, enable/disable writing intermediate solutions, and get information about the best gap, objective value, * and bound found by the solver. */ -template -class CoptFullScheduler : public Scheduler { - static_assert(is_computational_dag_v, "CoptFullScheduler can only be used with computational DAGs."); +template +class CoptFullScheduler : public Scheduler { + static_assert(isComputationalDagV, "CoptFullScheduler can only be used with computational DAGs."); private: - bool allow_recomputation; - bool use_memory_constraint; - bool use_initial_schedule_recomp = false; - bool use_initial_schedule = false; - bool write_solutions_found; - bool is_max_bsp = false; + bool allowRecomputation_; + bool useMemoryConstraint_; + bool useInitialScheduleRecomp_ = false; + bool useInitialSchedule_ = false; + bool writeSolutionsFound_; + bool isMaxBsp_ = false; - unsigned timeLimitSeconds = 0; + unsigned timeLimitSeconds_ = 0; - const BspScheduleCS *initial_schedule; - const BspScheduleRecomp *initial_schedule_recomp; + const BspScheduleCS *initialSchedule_; + const BspScheduleRecomp *initialScheduleRecomp_; - std::string write_solutions_path; - std::string solution_file_prefix; + std::string writeSolutionsPath_; + std::string solutionFilePrefix_; class WriteSolutionCallback : public CallbackBase { private: - unsigned counter; - unsigned max_number_solution; + unsigned counter_; + unsigned maxNumberSolution_; - double best_obj; + double bestObj_; public: WriteSolutionCallback() - : counter(0), - max_number_solution(500), - best_obj(COPT_INFINITY), - allow_recomputation_cb(false), - write_solutions_path_cb(""), - solution_file_prefix_cb(""), - instance_ptr(), - node_to_processor_superstep_var_ptr(), - comm_processor_to_processor_superstep_node_var_ptr() {} - - bool allow_recomputation_cb; - std::string write_solutions_path_cb; - std::string solution_file_prefix_cb; - const BspInstance *instance_ptr; - - std::vector> *node_to_processor_superstep_var_ptr; - std::vector>> *comm_processor_to_processor_superstep_node_var_ptr; + : counter_(0), + maxNumberSolution_(500), + bestObj_(COPT_INFINITY), + allowRecomputationCb_(false), + writeSolutionsPathCb_(""), + solutionFilePrefixCb_(""), + instancePtr_(), + nodeToProcessorSuperstepVarPtr_(), + commProcessorToProcessorSuperstepNodeVarPtr_() {} + + bool allowRecomputationCb_; + std::string writeSolutionsPathCb_; + std::string solutionFilePrefixCb_; + const BspInstance *instancePtr_; + + std::vector> *nodeToProcessorSuperstepVarPtr_; + std::vector>> *commProcessorToProcessorSuperstepNodeVarPtr_; void callback() override { - if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { + if (Where() == COPT_CBCONTEXT_MIPSOL && counter_ < maxNumberSolution_ && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { try { - if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { - best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); + if (GetDblInfo(COPT_CBINFO_BESTOBJ) < bestObj_ && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { + bestObj_ = GetDblInfo(COPT_CBINFO_BESTOBJ); - if (allow_recomputation_cb) { - auto sched = constructBspScheduleRecompFromCallback(); - DotFileWriter sched_writer; - sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb - + "_" + std::to_string(counter) + "_schedule.dot", - sched); + if (allowRecomputationCb_) { + auto sched = ConstructBspScheduleRecompFromCallback(); + DotFileWriter schedWriter; + schedWriter.WriteScheduleRecomp(writeSolutionsPathCb_ + "intmed_sol_" + solutionFilePrefixCb_ + "_" + + std::to_string(counter_) + "_schedule.dot", + sched); } else { - BspSchedule sched = constructBspScheduleFromCallback(); - DotFileWriter sched_writer; - sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" - + std::to_string(counter) + "_schedule.dot", - sched); + BspSchedule sched = ConstructBspScheduleFromCallback(); + DotFileWriter schedWriter; + schedWriter.WriteSchedule(writeSolutionsPathCb_ + "intmed_sol_" + solutionFilePrefixCb_ + "_" + + std::to_string(counter_) + "_schedule.dot", + sched); } - counter++; + counter_++; } } catch (const std::exception &e) {} } } - BspScheduleCS constructBspScheduleFromCallback() { - BspScheduleCS schedule(*instance_ptr); + BspScheduleCS ConstructBspScheduleFromCallback() { + BspScheduleCS schedule(*instancePtr_); - for (const auto &node : instance_ptr->vertices()) { - for (unsigned int processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); - step++) { - if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= .99) { - schedule.setAssignedProcessor(node, processor); - schedule.setAssignedSuperstep(node, step); + for (const auto &node : instancePtr_->Vertices()) { + for (unsigned int processor = 0; processor < instancePtr_->NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < static_cast((*nodeToProcessorSuperstepVarPtr_)[0][0].Size()); step++) { + if (GetSolution((*nodeToProcessorSuperstepVarPtr_)[node][processor][static_cast(step)]) >= .99) { + schedule.SetAssignedProcessor(node, processor); + schedule.SetAssignedSuperstep(node, step); } } } } - for (const auto &node : instance_ptr->vertices()) { - for (unsigned int p_from = 0; p_from < instance_ptr->numberOfProcessors(); p_from++) { - for (unsigned int p_to = 0; p_to < instance_ptr->numberOfProcessors(); p_to++) { - if (p_from != p_to) { - for (int step = 0; step < (*node_to_processor_superstep_var_ptr)[0][0].Size(); step++) { - if (GetSolution( - (*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][static_cast( - step)][static_cast(node)]) + for (const auto &node : instancePtr_->Vertices()) { + for (unsigned int pFrom = 0; pFrom < instancePtr_->NumberOfProcessors(); pFrom++) { + for (unsigned int pTo = 0; pTo < instancePtr_->NumberOfProcessors(); pTo++) { + if (pFrom != pTo) { + for (int step = 0; step < (*nodeToProcessorSuperstepVarPtr_)[0][0].Size(); step++) { + if (GetSolution((*commProcessorToProcessorSuperstepNodeVarPtr_)[pFrom][pTo][static_cast( + step)][static_cast(node)]) >= .99) { - schedule.addCommunicationScheduleEntry(node, p_from, p_to, static_cast(step)); + schedule.AddCommunicationScheduleEntry(node, pFrom, pTo, static_cast(step)); } } } @@ -164,38 +162,36 @@ class CoptFullScheduler : public Scheduler { return schedule; } - BspScheduleRecomp constructBspScheduleRecompFromCallback() { - unsigned number_of_supersteps = 0; - BspScheduleRecomp schedule(*instance_ptr); + BspScheduleRecomp ConstructBspScheduleRecompFromCallback() { + unsigned numberOfSupersteps = 0; + BspScheduleRecomp schedule(*instancePtr_); - for (unsigned int node = 0; node < instance_ptr->numberOfVertices(); node++) { - for (unsigned int processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); - step++) { - if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= .99) { - schedule.assignments(node).emplace_back(processor, step); + for (unsigned int node = 0; node < instancePtr_->NumberOfVertices(); node++) { + for (unsigned int processor = 0; processor < instancePtr_->NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < static_cast((*nodeToProcessorSuperstepVarPtr_)[0][0].Size()); step++) { + if (GetSolution((*nodeToProcessorSuperstepVarPtr_)[node][processor][static_cast(step)]) >= .99) { + schedule.Assignments(node).emplace_back(processor, step); - if (step >= number_of_supersteps) { - number_of_supersteps = step + 1; + if (step >= numberOfSupersteps) { + numberOfSupersteps = step + 1; } } } } } - schedule.setNumberOfSupersteps(number_of_supersteps); + schedule.SetNumberOfSupersteps(numberOfSupersteps); - for (unsigned int node = 0; node < instance_ptr->numberOfVertices(); node++) { - for (unsigned int p_from = 0; p_from < instance_ptr->numberOfProcessors(); p_from++) { - for (unsigned int p_to = 0; p_to < instance_ptr->numberOfProcessors(); p_to++) { - if (p_from != p_to) { - for (unsigned step = 0; - step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); + for (unsigned int node = 0; node < instancePtr_->NumberOfVertices(); node++) { + for (unsigned int pFrom = 0; pFrom < instancePtr_->NumberOfProcessors(); pFrom++) { + for (unsigned int pTo = 0; pTo < instancePtr_->NumberOfProcessors(); pTo++) { + if (pFrom != pTo) { + for (unsigned step = 0; step < static_cast((*nodeToProcessorSuperstepVarPtr_)[0][0].Size()); step++) { - if (GetSolution((*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][step] - [static_cast(node)]) + if (GetSolution( + (*commProcessorToProcessorSuperstepNodeVarPtr_)[pFrom][pTo][step][static_cast(node)]) >= .99) { - schedule.addCommunicationScheduleEntry(node, p_from, p_to, step); + schedule.AddCommunicationScheduleEntry(node, pFrom, pTo, step); } } } @@ -210,51 +206,51 @@ class CoptFullScheduler : public Scheduler { // WriteSolutionCallback solution_callback; protected: - unsigned int max_number_supersteps; + unsigned int maxNumberSupersteps_; - VarArray superstep_used_var; - std::vector> node_to_processor_superstep_var; - std::vector>> comm_processor_to_processor_superstep_node_var; + VarArray superstepUsedVar_; + std::vector> nodeToProcessorSuperstepVar_; + std::vector>> commProcessorToProcessorSuperstepNodeVar_; - VarArray max_comm_superstep_var; - VarArray max_work_superstep_var; + VarArray maxCommSuperstepVar_; + VarArray maxWorkSuperstepVar_; - void constructBspScheduleFromSolution(BspScheduleCS &schedule, bool cleanup_ = false) { - const auto &instance = schedule.getInstance(); + void ConstructBspScheduleFromSolution(BspScheduleCS &schedule, bool cleanup = false) { + const auto &instance = schedule.GetInstance(); - unsigned number_of_supersteps = 0; + unsigned numberOfSupersteps = 0; - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (superstep_used_var[static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { - number_of_supersteps++; + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (superstepUsedVar_[static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { + numberOfSupersteps++; } } - for (const auto &node : instance.vertices()) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (node_to_processor_superstep_var[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { - schedule.setAssignedProcessor(node, processor); - schedule.setAssignedSuperstep(node, step); + for (const auto &node : instance.Vertices()) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (nodeToProcessorSuperstepVar_[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { + schedule.SetAssignedProcessor(node, processor); + schedule.SetAssignedSuperstep(node, step); } } } } - if (is_max_bsp && number_of_supersteps > 0) { // can ignore last 2 comm phases in this case - --number_of_supersteps; + if (isMaxBsp_ && numberOfSupersteps > 0) { // can ignore last 2 comm phases in this case + --numberOfSupersteps; } - schedule.getCommunicationSchedule().clear(); - for (const auto &node : instance.vertices()) { - for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { - for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { - if (p_from != p_to) { - for (unsigned int step = 0; step < number_of_supersteps - 1; step++) { - if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get( + schedule.GetCommunicationSchedule().clear(); + for (const auto &node : instance.Vertices()) { + for (unsigned int pFrom = 0; pFrom < instance.NumberOfProcessors(); pFrom++) { + for (unsigned int pTo = 0; pTo < instance.NumberOfProcessors(); pTo++) { + if (pFrom != pTo) { + for (unsigned int step = 0; step < numberOfSupersteps - 1; step++) { + if (commProcessorToProcessorSuperstepNodeVar_[pFrom][pTo][step][static_cast(node)].Get( COPT_DBLINFO_VALUE) >= .99) { - schedule.addCommunicationScheduleEntry(node, p_from, p_to, step); + schedule.AddCommunicationScheduleEntry(node, pFrom, pTo, step); } } } @@ -262,43 +258,43 @@ class CoptFullScheduler : public Scheduler { } } - if (cleanup_) { - node_to_processor_superstep_var.clear(); - comm_processor_to_processor_superstep_node_var.clear(); + if (cleanup) { + nodeToProcessorSuperstepVar_.clear(); + commProcessorToProcessorSuperstepNodeVar_.clear(); } } - void constructBspScheduleRecompFromSolution(BspScheduleRecomp &schedule, bool cleanup_) { - unsigned number_of_supersteps = 0; + void ConstructBspScheduleRecompFromSolution(BspScheduleRecomp &schedule, bool cleanup) { + unsigned numberOfSupersteps = 0; - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (superstep_used_var[static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { - number_of_supersteps++; + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (superstepUsedVar_[static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { + numberOfSupersteps++; } } - schedule.setNumberOfSupersteps(number_of_supersteps); + schedule.SetNumberOfSupersteps(numberOfSupersteps); - for (unsigned node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) { - for (unsigned step = 0; step < number_of_supersteps - 1; step++) { - if (node_to_processor_superstep_var[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { - schedule.assignments(node).emplace_back(processor, step); + for (unsigned node = 0; node < schedule.GetInstance().NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < schedule.GetInstance().NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < numberOfSupersteps - 1; step++) { + if (nodeToProcessorSuperstepVar_[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { + schedule.Assignments(node).emplace_back(processor, step); } } } } - schedule.getCommunicationSchedule().clear(); - for (unsigned int node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) { - for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) { - if (p_from != p_to) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get( + schedule.GetCommunicationSchedule().clear(); + for (unsigned int node = 0; node < schedule.GetInstance().NumberOfVertices(); node++) { + for (unsigned int pFrom = 0; pFrom < schedule.GetInstance().NumberOfProcessors(); pFrom++) { + for (unsigned int pTo = 0; pTo < schedule.GetInstance().NumberOfProcessors(); pTo++) { + if (pFrom != pTo) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + if (commProcessorToProcessorSuperstepNodeVar_[pFrom][pTo][step][static_cast(node)].Get( COPT_DBLINFO_VALUE) >= .99) { - schedule.addCommunicationScheduleEntry(node, p_from, p_to, step); + schedule.AddCommunicationScheduleEntry(node, pFrom, pTo, step); } } } @@ -306,96 +302,96 @@ class CoptFullScheduler : public Scheduler { } } - if (cleanup_) { - node_to_processor_superstep_var.clear(); - comm_processor_to_processor_superstep_node_var.clear(); + if (cleanup) { + nodeToProcessorSuperstepVar_.clear(); + commProcessorToProcessorSuperstepNodeVar_.clear(); } } - void loadInitialSchedule(Model &model, const BspInstance &instance) { - if (use_initial_schedule_recomp - && (max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() - || instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() - || instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) { + void LoadInitialSchedule(Model &model, const BspInstance &instance) { + if (useInitialScheduleRecomp_ + && (maxNumberSupersteps_ < initialScheduleRecomp_->NumberOfSupersteps() + || instance.NumberOfProcessors() != initialScheduleRecomp_->GetInstance().NumberOfProcessors() + || instance.NumberOfVertices() != initialScheduleRecomp_->GetInstance().NumberOfVertices())) { throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not " "agree with those of the initial schedule's instance!"); } - if (!use_initial_schedule_recomp & use_initial_schedule - && (max_number_supersteps < initial_schedule->numberOfSupersteps() - || instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() - || instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) { + if (!useInitialScheduleRecomp_ & useInitialSchedule_ + && (maxNumberSupersteps_ < initialSchedule_->NumberOfSupersteps() + || instance.NumberOfProcessors() != initialSchedule_->GetInstance().NumberOfProcessors() + || instance.NumberOfVertices() != initialSchedule_->GetInstance().NumberOfVertices())) { throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not " "agree with those of the initial schedule's instance!"); } - const auto &DAG = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getComputationalDag() - : initial_schedule->getInstance().getComputationalDag(); + const auto &dag = useInitialScheduleRecomp_ ? initialScheduleRecomp_->GetInstance().GetComputationalDag() + : initialSchedule_->GetInstance().GetComputationalDag(); - const auto &arch = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getArchitecture() - : initial_schedule->getInstance().getArchitecture(); + const auto &arch = useInitialScheduleRecomp_ ? initialScheduleRecomp_->GetInstance().GetArchitecture() + : initialSchedule_->GetInstance().GetArchitecture(); - const unsigned &num_processors = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().numberOfProcessors() - : initial_schedule->getInstance().numberOfProcessors(); + const unsigned &numProcessors = useInitialScheduleRecomp_ ? initialScheduleRecomp_->GetInstance().NumberOfProcessors() + : initialSchedule_->GetInstance().NumberOfProcessors(); - const unsigned &num_supersteps = use_initial_schedule_recomp ? initial_schedule_recomp->numberOfSupersteps() - : initial_schedule->numberOfSupersteps(); + const unsigned &numSupersteps = useInitialScheduleRecomp_ ? initialScheduleRecomp_->NumberOfSupersteps() + : initialSchedule_->NumberOfSupersteps(); - const auto &cs = use_initial_schedule_recomp ? initial_schedule_recomp->getCommunicationSchedule() - : initial_schedule->getCommunicationSchedule(); + const auto &cs = useInitialScheduleRecomp_ ? initialScheduleRecomp_->GetCommunicationSchedule() + : initialSchedule_->GetCommunicationSchedule(); - assert(max_number_supersteps <= static_cast(std::numeric_limits::max())); - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (step < num_supersteps) { - model.SetMipStart(superstep_used_var[static_cast(step)], 1); + assert(maxNumberSupersteps_ <= static_cast(std::numeric_limits::max())); + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (step < numSupersteps) { + model.SetMipStart(superstepUsedVar_[static_cast(step)], 1); } else { - model.SetMipStart(superstep_used_var[static_cast(step)], 0); + model.SetMipStart(superstepUsedVar_[static_cast(step)], 0); } // model.SetMipStart(max_work_superstep_var[step], COPT_INFINITY); // model.SetMipStart(max_comm_superstep_var[step], COPT_INFINITY); } - std::vector>> computed(DAG.num_vertices()); - for (const auto &node : DAG.vertices()) { - if (use_initial_schedule_recomp) { - for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) { + std::vector>> computed(dag.NumVertices()); + for (const auto &node : dag.Vertices()) { + if (useInitialScheduleRecomp_) { + for (const std::pair &assignment : initialScheduleRecomp_->Assignments(node)) { computed[node].emplace(assignment); } } else { - computed[node].emplace(initial_schedule->assignedProcessor(node), initial_schedule->assignedSuperstep(node)); + computed[node].emplace(initialSchedule_->AssignedProcessor(node), initialSchedule_->AssignedSuperstep(node)); } } - std::vector> first_at(DAG.num_vertices(), - std::vector(num_processors, std::numeric_limits::max())); - for (const auto &node : DAG.vertices()) { - if (use_initial_schedule_recomp) { - for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) { - first_at[node][assignment.first] = std::min(first_at[node][assignment.first], assignment.second); + std::vector> firstAt(dag.NumVertices(), + std::vector(numProcessors, std::numeric_limits::max())); + for (const auto &node : dag.Vertices()) { + if (useInitialScheduleRecomp_) { + for (const std::pair &assignment : initialScheduleRecomp_->Assignments(node)) { + firstAt[node][assignment.first] = std::min(firstAt[node][assignment.first], assignment.second); } } else { - first_at[node][initial_schedule->assignedProcessor(node)] = std::min( - first_at[node][initial_schedule->assignedProcessor(node)], initial_schedule->assignedSuperstep(node)); + firstAt[node][initialSchedule_->AssignedProcessor(node)] = std::min( + firstAt[node][initialSchedule_->AssignedProcessor(node)], initialSchedule_->AssignedSuperstep(node)); } } - unsigned staleness = is_max_bsp ? 2 : 1; - for (const auto &node : DAG.vertices()) { - for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { + unsigned staleness = isMaxBsp_ ? 2 : 1; + for (const auto &node : dag.Vertices()) { + for (unsigned p1 = 0; p1 < numProcessors; p1++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned p2 = 0; p2 < numProcessors; p2++) { if (p1 != p2) { const auto &key = std::make_tuple(node, p1, p2); if (cs.find(key) != cs.end()) { if (cs.at(key) == step) { model.SetMipStart( - comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)], 1); - first_at[node][p2] = std::min(first_at[node][p2], step + staleness); + commProcessorToProcessorSuperstepNodeVar_[p1][p2][step][static_cast(node)], 1); + firstAt[node][p2] = std::min(firstAt[node][p2], step + staleness); } else { model.SetMipStart( - comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)], 0); + commProcessorToProcessorSuperstepNodeVar_[p1][p2][step][static_cast(node)], 0); } } } @@ -404,196 +400,191 @@ class CoptFullScheduler : public Scheduler { } } - for (const auto &node : DAG.vertices()) { - for (unsigned proc = 0; proc < num_processors; proc++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (step >= first_at[node][proc]) { - model.SetMipStart( - comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast(node)], 1); + for (const auto &node : dag.Vertices()) { + for (unsigned proc = 0; proc < numProcessors; proc++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (step >= firstAt[node][proc]) { + model.SetMipStart(commProcessorToProcessorSuperstepNodeVar_[proc][proc][step][static_cast(node)], 1); } else { - model.SetMipStart( - comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast(node)], 0); + model.SetMipStart(commProcessorToProcessorSuperstepNodeVar_[proc][proc][step][static_cast(node)], 0); } } } } - for (const auto &node : DAG.vertices()) { - for (unsigned proc = 0; proc < num_processors; proc++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { + for (const auto &node : dag.Vertices()) { + for (unsigned proc = 0; proc < numProcessors; proc++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { if (computed[node].find(std::make_pair(proc, step)) != computed[node].end()) { - model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast(step)], 1); + model.SetMipStart(nodeToProcessorSuperstepVar_[node][proc][static_cast(step)], 1); } else { - model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast(step)], 0); + model.SetMipStart(nodeToProcessorSuperstepVar_[node][proc][static_cast(step)], 0); } } } } - std::vector>> work(max_number_supersteps, - std::vector>(num_processors, 0)); + std::vector>> work(maxNumberSupersteps_, std::vector>(numProcessors, 0)); - if (use_initial_schedule_recomp) { - for (const auto &node : initial_schedule_recomp->getInstance().vertices()) { - for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) { - work[assignment.second][assignment.first] += DAG.vertex_work_weight(node); + if (useInitialScheduleRecomp_) { + for (const auto &node : initialScheduleRecomp_->GetInstance().Vertices()) { + for (const std::pair &assignment : initialScheduleRecomp_->Assignments(node)) { + work[assignment.second][assignment.first] += dag.VertexWorkWeight(node); } } } else { - for (const auto &node : initial_schedule->getInstance().vertices()) { - work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] - += DAG.vertex_work_weight(node); + for (const auto &node : initialSchedule_->GetInstance().Vertices()) { + work[initialSchedule_->AssignedSuperstep(node)][initialSchedule_->AssignedProcessor(node)] + += dag.VertexWorkWeight(node); } } - std::vector>> send(max_number_supersteps, - std::vector>(num_processors, 0)); + std::vector>> send(maxNumberSupersteps_, std::vector>(numProcessors, 0)); - std::vector>> rec(max_number_supersteps, std::vector>(num_processors, 0)); + std::vector>> rec(maxNumberSupersteps_, std::vector>(numProcessors, 0)); for (const auto &[key, val] : cs) { send[val][std::get<1>(key)] - += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); + += dag.VertexCommWeight(std::get<0>(key)) * arch.SendCosts(std::get<1>(key), std::get<2>(key)); rec[val][std::get<2>(key)] - += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); + += dag.VertexCommWeight(std::get<0>(key)) * arch.SendCosts(std::get<1>(key), std::get<2>(key)); } - for (unsigned step = 0; step < max_number_supersteps; step++) { - v_workw_t max_work = 0; - for (unsigned i = 0; i < num_processors; i++) { - if (max_work < work[step][i]) { - max_work = work[step][i]; + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + VWorkwT maxWork = 0; + for (unsigned i = 0; i < numProcessors; i++) { + if (maxWork < work[step][i]) { + maxWork = work[step][i]; } } - v_commw_t max_comm = 0; - for (unsigned i = 0; i < num_processors; i++) { - if (max_comm < send[step][i]) { - max_comm = send[step][i]; + VCommwT maxComm = 0; + for (unsigned i = 0; i < numProcessors; i++) { + if (maxComm < send[step][i]) { + maxComm = send[step][i]; } - if (max_comm < rec[step][i]) { - max_comm = rec[step][i]; + if (maxComm < rec[step][i]) { + maxComm = rec[step][i]; } } - model.SetMipStart(max_work_superstep_var[static_cast(step)], max_work); - model.SetMipStart(max_comm_superstep_var[static_cast(step)], max_comm); + model.SetMipStart(maxWorkSuperstepVar_[static_cast(step)], maxWork); + model.SetMipStart(maxCommSuperstepVar_[static_cast(step)], maxComm); } model.LoadMipStart(); model.SetIntParam(COPT_INTPARAM_MIPSTARTMODE, 2); } - void setupVariablesConstraintsObjective(const BspInstance &instance, Model &model) { + void SetupVariablesConstraintsObjective(const BspInstance &instance, Model &model) { /* Variables */ - assert(max_number_supersteps <= static_cast(std::numeric_limits::max())); - assert(instance.numberOfProcessors() <= static_cast(std::numeric_limits::max())); + assert(maxNumberSupersteps_ <= static_cast(std::numeric_limits::max())); + assert(instance.NumberOfProcessors() <= static_cast(std::numeric_limits::max())); // variables indicating if superstep is used at all - superstep_used_var = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_used"); + superstepUsedVar_ = model.AddVars(static_cast(maxNumberSupersteps_), COPT_BINARY, "superstep_used"); - VarArray superstep_has_comm, mergeable_superstep_penalty; - if (is_max_bsp) { + VarArray superstepHasComm, mergeableSuperstepPenalty; + if (isMaxBsp_) { // variables indicating if there is any communication in superstep - superstep_has_comm = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_has_comm"); + superstepHasComm = model.AddVars(static_cast(maxNumberSupersteps_), COPT_BINARY, "superstep_has_comm"); // variables that incentivize the schedule to be continuous - needs to be done differently for maxBsp - mergeable_superstep_penalty - = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "mergeable_superstep_penalty"); + mergeableSuperstepPenalty + = model.AddVars(static_cast(maxNumberSupersteps_), COPT_BINARY, "mergeable_superstep_penalty"); } // variables for assigments of nodes to processor and superstep - node_to_processor_superstep_var = std::vector>( - instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); + nodeToProcessorSuperstepVar_ = std::vector>(instance.NumberOfVertices(), + std::vector(instance.NumberOfProcessors())); - for (const auto &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - node_to_processor_superstep_var[node][processor] - = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); + for (const auto &node : instance.Vertices()) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + nodeToProcessorSuperstepVar_[node][processor] + = model.AddVars(static_cast(maxNumberSupersteps_), COPT_BINARY, "node_to_processor_superstep"); } } /* Constraints */ - if (use_memory_constraint) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { + if (useMemoryConstraint_) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { Expr expr; - for (const auto &node : instance.vertices()) { - expr += node_to_processor_superstep_var[node][processor][static_cast(step)] - * instance.getComputationalDag().vertex_mem_weight(node); + for (const auto &node : instance.Vertices()) { + expr += nodeToProcessorSuperstepVar_[node][processor][static_cast(step)] + * instance.GetComputationalDag().VertexMemWeight(node); } - model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor)); + model.AddConstr(expr <= instance.GetArchitecture().MemoryBound(processor)); } } } // use consecutive supersteps starting from 0 - model.AddConstr(superstep_used_var[0] == 1); + model.AddConstr(superstepUsedVar_[0] == 1); - for (unsigned int step = 0; step < max_number_supersteps - 1; step++) { - model.AddConstr(superstep_used_var[static_cast(step)] >= superstep_used_var[static_cast(step + 1)]); + for (unsigned int step = 0; step < maxNumberSupersteps_ - 1; step++) { + model.AddConstr(superstepUsedVar_[static_cast(step)] >= superstepUsedVar_[static_cast(step + 1)]); } // superstep is used at all - for (unsigned int step = 0; step < max_number_supersteps; step++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { Expr expr; - for (const auto &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - expr += node_to_processor_superstep_var[node][processor][static_cast(step)]; + for (const auto &node : instance.Vertices()) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + expr += nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; } } - model.AddConstr(expr <= static_cast(instance.numberOfVertices() * instance.numberOfProcessors()) - * superstep_used_var[static_cast(step)]); + model.AddConstr(expr <= static_cast(instance.NumberOfVertices() * instance.NumberOfProcessors()) + * superstepUsedVar_[static_cast(step)]); } // nodes are assigend depending on whether recomputation is allowed or not - for (const auto &node : instance.vertices()) { + for (const auto &node : instance.Vertices()) { Expr expr; - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += node_to_processor_superstep_var[node][processor].GetVar(static_cast(step)); + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + expr += nodeToProcessorSuperstepVar_[node][processor].GetVar(static_cast(step)); } } - model.AddConstr(allow_recomputation ? expr >= .99 : expr == 1); + model.AddConstr(allowRecomputation_ ? expr >= .99 : expr == 1); } - if (allow_recomputation) { - std::cout << "setting up constraints with recomputation: " << allow_recomputation << std::endl; + if (allowRecomputation_) { + std::cout << "setting up constraints with recomputation: " << allowRecomputation_ << std::endl; } - comm_processor_to_processor_superstep_node_var = std::vector>>( - instance.numberOfProcessors(), - std::vector>(instance.numberOfProcessors(), std::vector(max_number_supersteps))); + commProcessorToProcessorSuperstepNodeVar_ = std::vector>>( + instance.NumberOfProcessors(), + std::vector>(instance.NumberOfProcessors(), std::vector(maxNumberSupersteps_))); - for (unsigned int p1 = 0; p1 < instance.numberOfProcessors(); p1++) { - for (unsigned int p2 = 0; p2 < instance.numberOfProcessors(); p2++) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - comm_processor_to_processor_superstep_node_var[p1][p2][step] = model.AddVars( - static_cast(instance.numberOfVertices()), COPT_BINARY, "comm_processor_to_processor_superstep_node"); + for (unsigned int p1 = 0; p1 < instance.NumberOfProcessors(); p1++) { + for (unsigned int p2 = 0; p2 < instance.NumberOfProcessors(); p2++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + commProcessorToProcessorSuperstepNodeVar_[p1][p2][step] = model.AddVars( + static_cast(instance.NumberOfVertices()), COPT_BINARY, "comm_processor_to_processor_superstep_node"); } } } // precedence constraint: if task is computed then all of its predecessors must have been present - for (const auto &node : instance.vertices()) { - if (instance.getComputationalDag().in_degree(node) > 0) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { + for (const auto &node : instance.Vertices()) { + if (instance.GetComputationalDag().InDegree(node) > 0) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { Expr expr; - for (const auto &parent : instance.getComputationalDag().parents(node)) { - expr += comm_processor_to_processor_superstep_node_var[processor][processor][step] - [static_cast(parent)]; + for (const auto &parent : instance.GetComputationalDag().Parents(node)) { + expr += commProcessorToProcessorSuperstepNodeVar_[processor][processor][step][static_cast(parent)]; } - model.AddConstr(expr >= static_cast(instance.getComputationalDag().in_degree(node)) - * node_to_processor_superstep_var[node][processor][static_cast(step)]); + model.AddConstr(expr >= static_cast(instance.GetComputationalDag().InDegree(node)) + * nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]); } } } @@ -601,117 +592,116 @@ class CoptFullScheduler : public Scheduler { // combines two constraints: node can only be communicated if it is present; and node is present if it was // computed or communicated - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (const auto &node : instance.vertices()) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (const auto &node : instance.Vertices()) { Expr expr1, expr2; if (step > 0) { - for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { - if (!is_max_bsp || p_from == processor) { - expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1] - [static_cast(node)]; + for (unsigned int pFrom = 0; pFrom < instance.NumberOfProcessors(); pFrom++) { + if (!isMaxBsp_ || pFrom == processor) { + expr1 + += commProcessorToProcessorSuperstepNodeVar_[pFrom][processor][step - 1][static_cast(node)]; } else if (step > 1) { - expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 2] - [static_cast(node)]; + expr1 + += commProcessorToProcessorSuperstepNodeVar_[pFrom][processor][step - 2][static_cast(node)]; } } } - expr1 += node_to_processor_superstep_var[node][processor][static_cast(step)]; + expr1 += nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; - for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { - expr2 += comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; + for (unsigned int pTo = 0; pTo < instance.NumberOfProcessors(); pTo++) { + expr2 += commProcessorToProcessorSuperstepNodeVar_[processor][pTo][step][static_cast(node)]; } - model.AddConstr(instance.numberOfProcessors() * (expr1) >= expr2); + model.AddConstr(instance.NumberOfProcessors() * (expr1) >= expr2); } } } // synchronization cost calculation & forcing continuous schedule in maxBsp - if (is_max_bsp) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { + if (isMaxBsp_) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { Expr expr; - for (const auto &node : instance.vertices()) { - for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { - for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { - if (p_from != p_to) { - expr += comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)]; + for (const auto &node : instance.Vertices()) { + for (unsigned int pFrom = 0; pFrom < instance.NumberOfProcessors(); pFrom++) { + for (unsigned int pTo = 0; pTo < instance.NumberOfProcessors(); pTo++) { + if (pFrom != pTo) { + expr += commProcessorToProcessorSuperstepNodeVar_[pFrom][pTo][step][static_cast(node)]; } } } } - model.AddConstr(static_cast(instance.numberOfProcessors() * instance.numberOfProcessors() - * instance.numberOfVertices()) - * superstep_has_comm[static_cast(step)] + model.AddConstr(static_cast(instance.NumberOfProcessors() * instance.NumberOfProcessors() + * instance.NumberOfVertices()) + * superstepHasComm[static_cast(step)] >= expr); } // if step i and (i+1) has no comm, and (i+2) has work, then (i+1) and (i+2) are mergeable -> penalize - for (unsigned int step = 0; step < max_number_supersteps - 2; step++) { - model.AddConstr(superstep_used_var[static_cast(step + 2)] - superstep_has_comm[static_cast(step)] - - superstep_has_comm[static_cast(step + 1)] - <= mergeable_superstep_penalty[static_cast(step)]); + for (unsigned int step = 0; step < maxNumberSupersteps_ - 2; step++) { + model.AddConstr(superstepUsedVar_[static_cast(step + 2)] - superstepHasComm[static_cast(step)] + - superstepHasComm[static_cast(step + 1)] + <= mergeableSuperstepPenalty[static_cast(step)]); } } - max_comm_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_comm_superstep"); - // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_comm_superstep"); + maxCommSuperstepVar_ = model.AddVars(static_cast(maxNumberSupersteps_), COPT_INTEGER, "max_comm_superstep"); + // coptModel.AddVars(maxNumberSupersteps_, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_comm_superstep"); - max_work_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_work_superstep"); - // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_work_superstep"); + maxWorkSuperstepVar_ = model.AddVars(static_cast(maxNumberSupersteps_), COPT_INTEGER, "max_work_superstep"); + // coptModel.AddVars(maxNumberSupersteps_, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_work_superstep"); - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { Expr expr; - for (unsigned int node = 0; node < instance.numberOfVertices(); node++) { - expr += instance.getComputationalDag().vertex_work_weight(node) - * node_to_processor_superstep_var[node][processor][static_cast(step)]; + for (unsigned int node = 0; node < instance.NumberOfVertices(); node++) { + expr += instance.GetComputationalDag().VertexWorkWeight(node) + * nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; } - model.AddConstr(max_work_superstep_var[static_cast(step)] >= expr); + model.AddConstr(maxWorkSuperstepVar_[static_cast(step)] >= expr); } } - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { Expr expr; - for (const auto &node : instance.vertices()) { - for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { - if (processor != p_to) { - expr += instance.getComputationalDag().vertex_comm_weight(node) * instance.sendCosts(processor, p_to) - * comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; + for (const auto &node : instance.Vertices()) { + for (unsigned int pTo = 0; pTo < instance.NumberOfProcessors(); pTo++) { + if (processor != pTo) { + expr += instance.GetComputationalDag().VertexCommWeight(node) * instance.SendCosts(processor, pTo) + * commProcessorToProcessorSuperstepNodeVar_[processor][pTo][step][static_cast(node)]; } } } - model.AddConstr(max_comm_superstep_var[static_cast(step)] >= expr); + model.AddConstr(maxCommSuperstepVar_[static_cast(step)] >= expr); } } - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { Expr expr; - for (const auto &node : instance.vertices()) { - for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { - if (processor != p_from) { - expr - += instance.getComputationalDag().vertex_comm_weight(node) * instance.sendCosts(p_from, processor) - * comm_processor_to_processor_superstep_node_var[p_from][processor][step][static_cast(node)]; + for (const auto &node : instance.Vertices()) { + for (unsigned int pFrom = 0; pFrom < instance.NumberOfProcessors(); pFrom++) { + if (processor != pFrom) { + expr += instance.GetComputationalDag().VertexCommWeight(node) * instance.SendCosts(pFrom, processor) + * commProcessorToProcessorSuperstepNodeVar_[pFrom][processor][step][static_cast(node)]; } } } - model.AddConstr(max_comm_superstep_var[static_cast(step)] >= expr); + model.AddConstr(maxCommSuperstepVar_[static_cast(step)] >= expr); } } // vertex type restrictions - for (const vertex_idx_t &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - if (!instance.isCompatible(node, processor)) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast(step)] == 0); + for (const VertexIdxT &node : instance.Vertices()) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + if (!instance.IsCompatible(node, processor)) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + model.AddConstr(nodeToProcessorSuperstepVar_[node][processor][static_cast(step)] == 0); } } } @@ -722,93 +712,93 @@ class CoptFullScheduler : public Scheduler { */ Expr expr; - if (is_max_bsp) { - VarArray max_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_superstep"); - for (unsigned int step = 0; step < max_number_supersteps; step++) { - model.AddConstr(max_superstep_var[static_cast(step)] >= max_work_superstep_var[static_cast(step)]); + if (isMaxBsp_) { + VarArray maxSuperstepVar = model.AddVars(static_cast(maxNumberSupersteps_), COPT_INTEGER, "max_superstep"); + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + model.AddConstr(maxSuperstepVar[static_cast(step)] >= maxWorkSuperstepVar_[static_cast(step)]); if (step > 0) { - model.AddConstr(max_superstep_var[static_cast(step)] - >= instance.communicationCosts() * max_comm_superstep_var[static_cast(step - 1)]); + model.AddConstr(maxSuperstepVar[static_cast(step)] + >= instance.CommunicationCosts() * maxCommSuperstepVar_[static_cast(step - 1)]); } - expr += max_superstep_var[static_cast(step)]; - expr += instance.synchronisationCosts() * superstep_has_comm[static_cast(step)]; - expr += instance.synchronisationCosts() * mergeable_superstep_penalty[static_cast(step)]; + expr += maxSuperstepVar[static_cast(step)]; + expr += instance.SynchronisationCosts() * superstepHasComm[static_cast(step)]; + expr += instance.SynchronisationCosts() * mergeableSuperstepPenalty[static_cast(step)]; } } else { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += max_work_superstep_var[static_cast(step)] - + instance.communicationCosts() * max_comm_superstep_var[static_cast(step)] - + instance.synchronisationCosts() * superstep_used_var[static_cast(step)]; + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + expr += maxWorkSuperstepVar_[static_cast(step)] + + instance.CommunicationCosts() * maxCommSuperstepVar_[static_cast(step)] + + instance.SynchronisationCosts() * superstepUsedVar_[static_cast(step)]; } - expr -= instance.synchronisationCosts(); + expr -= instance.SynchronisationCosts(); } model.SetObjective(expr, COPT_MINIMIZE); } - RETURN_STATUS run_scheduler(BspScheduleCS &schedule) { - auto &instance = schedule.getInstance(); + ReturnStatus RunSchedulerInternal(BspScheduleCS &schedule) { + auto &instance = schedule.GetInstance(); Envr env; Model model = env.CreateModel("bsp_schedule"); - setupVariablesConstraintsObjective(instance, model); + SetupVariablesConstraintsObjective(instance, model); - if (use_initial_schedule) { - loadInitialSchedule(model, instance); + if (useInitialSchedule_) { + LoadInitialSchedule(model, instance); } - computeScheduleBase(schedule, model); + ComputeScheduleBase(schedule, model); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructBspScheduleFromSolution(schedule, true); - return RETURN_STATUS::OSP_SUCCESS; + ConstructBspScheduleFromSolution(schedule, true); + return ReturnStatus::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } else { if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructBspScheduleFromSolution(schedule, true); - return RETURN_STATUS::BEST_FOUND; + ConstructBspScheduleFromSolution(schedule, true); + return ReturnStatus::BEST_FOUND; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } } public: CoptFullScheduler(unsigned steps = 5) - : allow_recomputation(false), - use_memory_constraint(false), - use_initial_schedule(false), - write_solutions_found(false), - initial_schedule(0), - max_number_supersteps(steps) { + : allowRecomputation_(false), + useMemoryConstraint_(false), + useInitialSchedule_(false), + writeSolutionsFound_(false), + initialSchedule_(0), + maxNumberSupersteps_(steps) { // solution_callback.comm_processor_to_processor_superstep_node_var_ptr = // &comm_processor_to_processor_superstep_node_var; - // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; + // solution_callback.node_to_processor_superstep_var_ptr = &nodeToProcessorSuperstepVar_; } - CoptFullScheduler(const BspScheduleCS &schedule) - : allow_recomputation(false), - use_memory_constraint(false), - use_initial_schedule(true), - write_solutions_found(false), - initial_schedule(&schedule), - max_number_supersteps(schedule.numberOfSupersteps()) { + CoptFullScheduler(const BspScheduleCS &schedule) + : allowRecomputation_(false), + useMemoryConstraint_(false), + useInitialSchedule_(true), + writeSolutionsFound_(false), + initialSchedule_(&schedule), + maxNumberSupersteps_(schedule.NumberOfSupersteps()) { // solution_callback.comm_processor_to_processor_superstep_node_var_ptr = // &comm_processor_to_processor_superstep_node_var; - // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; + // solution_callback.node_to_processor_superstep_var_ptr = &nodeToProcessorSuperstepVar_; } - CoptFullScheduler(const BspScheduleRecomp &schedule) - : allow_recomputation(true), - use_memory_constraint(false), - use_initial_schedule_recomp(true), - write_solutions_found(false), - initial_schedule_recomp(&schedule), - max_number_supersteps(schedule.numberOfSupersteps()) {} + CoptFullScheduler(const BspScheduleRecomp &schedule) + : allowRecomputation_(true), + useMemoryConstraint_(false), + useInitialScheduleRecomp_(true), + writeSolutionsFound_(false), + initialScheduleRecomp_(&schedule), + maxNumberSupersteps_(schedule.NumberOfSupersteps()) {} virtual ~CoptFullScheduler() = default; @@ -822,82 +812,80 @@ class CoptFullScheduler : public Scheduler { * @throws std::invalid_argument if the instance parameters do not * agree with those of the initial schedule's instance */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - BspScheduleCS schedule_cs(schedule.getInstance()); - RETURN_STATUS status = computeScheduleCS(schedule_cs); - if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) { - schedule = std::move(schedule_cs); + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + BspScheduleCS scheduleCs(schedule.GetInstance()); + ReturnStatus status = RunSchedulerInternal(scheduleCs); + if (status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND) { + schedule = std::move(scheduleCs); return status; } else { return status; } } - virtual RETURN_STATUS computeScheduleWithTimeLimit(BspSchedule &schedule, unsigned timeLimit) { - timeLimitSeconds = timeLimit; - return computeSchedule(schedule); + virtual ReturnStatus ComputeScheduleWithTimeLimit(BspSchedule &schedule, unsigned timeLimit) { + timeLimitSeconds_ = timeLimit; + return ComputeSchedule(schedule); } - virtual RETURN_STATUS computeMaxBspSchedule(MaxBspSchedule &schedule) { - MaxBspScheduleCS schedule_cs(schedule.getInstance()); - RETURN_STATUS status = computeMaxBspScheduleCS(schedule_cs); - if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) { - schedule = std::move(schedule_cs); + virtual ReturnStatus ComputeMaxBspSchedule(MaxBspSchedule &schedule) { + MaxBspScheduleCS scheduleCs(schedule.GetInstance()); + ReturnStatus status = ComputeMaxBspScheduleCs(scheduleCs); + if (status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND) { + schedule = std::move(scheduleCs); return status; } else { return status; } } - virtual RETURN_STATUS computeMaxBspScheduleCS(MaxBspScheduleCS &schedule) { - allow_recomputation = false; - is_max_bsp = true; - return run_scheduler(schedule); + virtual ReturnStatus ComputeMaxBspScheduleCs(MaxBspScheduleCS &schedule) { + allowRecomputation_ = false; + isMaxBsp_ = true; + return RunSchedulerInternal(schedule); } - virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) override { - allow_recomputation = false; - is_max_bsp = false; - return run_scheduler(schedule); + virtual ReturnStatus ComputeScheduleCS(BspScheduleCS &schedule) override { + allowRecomputation_ = false; + isMaxBsp_ = false; + return RunSchedulerInternal(schedule); } - virtual RETURN_STATUS computeScheduleRecomp(BspScheduleRecomp &schedule) { - allow_recomputation = true; - is_max_bsp = false; + virtual ReturnStatus ComputeScheduleRecomp(BspScheduleRecomp &schedule) { + allowRecomputation_ = true; + isMaxBsp_ = false; Envr env; Model model = env.CreateModel("bsp_schedule"); - setupVariablesConstraintsObjective(schedule.getInstance(), model); + SetupVariablesConstraintsObjective(schedule.GetInstance(), model); - if (use_initial_schedule || use_initial_schedule_recomp) { - loadInitialSchedule(model, schedule.getInstance()); + if (useInitialSchedule_ || useInitialScheduleRecomp_) { + LoadInitialSchedule(model, schedule.GetInstance()); } - computeScheduleBase(schedule, model); + ComputeScheduleBase(schedule, model); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructBspScheduleRecompFromSolution(schedule, true); - return RETURN_STATUS::OSP_SUCCESS; + ConstructBspScheduleRecompFromSolution(schedule, true); + return ReturnStatus::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } else { if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructBspScheduleRecompFromSolution(schedule, true); - return RETURN_STATUS::BEST_FOUND; + ConstructBspScheduleRecompFromSolution(schedule, true); + return ReturnStatus::BEST_FOUND; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } }; - virtual void computeScheduleBase(const BspScheduleRecomp &schedule, Model &model) { - if (timeLimitSeconds > 0) { - model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds); - } + virtual void ComputeScheduleBase(const BspScheduleRecomp &schedule, Model &model) { + model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds_); model.SetIntParam(COPT_INTPARAM_THREADS, 128); model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); @@ -910,16 +898,16 @@ class CoptFullScheduler : public Scheduler { model.SetIntParam(COPT_INTPARAM_TREECUTLEVEL, 2); // model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2); - if (write_solutions_found) { - WriteSolutionCallback solution_callback; - solution_callback.instance_ptr = &schedule.getInstance(); - solution_callback.comm_processor_to_processor_superstep_node_var_ptr = &comm_processor_to_processor_superstep_node_var; - solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; - solution_callback.solution_file_prefix_cb = solution_file_prefix; - solution_callback.write_solutions_path_cb = write_solutions_path; - solution_callback.allow_recomputation_cb = allow_recomputation; - std::cout << "setting up callback with recomputation: " << allow_recomputation << std::endl; - model.SetCallback(&solution_callback, COPT_CBCONTEXT_MIPSOL); + if (writeSolutionsFound_) { + WriteSolutionCallback solutionCallback; + solutionCallback.instancePtr_ = &schedule.GetInstance(); + solutionCallback.commProcessorToProcessorSuperstepNodeVarPtr_ = &commProcessorToProcessorSuperstepNodeVar_; + solutionCallback.nodeToProcessorSuperstepVarPtr_ = &nodeToProcessorSuperstepVar_; + solutionCallback.solutionFilePrefixCb_ = solutionFilePrefix_; + solutionCallback.writeSolutionsPathCb_ = writeSolutionsPath_; + solutionCallback.allowRecomputationCb_ = allowRecomputation_; + std::cout << "setting up callback with recomputation: " << allowRecomputation_ << std::endl; + model.SetCallback(&solutionCallback, COPT_CBCONTEXT_MIPSOL); } model.Solve(); @@ -934,12 +922,12 @@ class CoptFullScheduler : public Scheduler { * * @param schedule The provided schedule. */ - inline void setInitialSolutionFromBspSchedule(const BspScheduleCS &schedule) { - initial_schedule = &schedule; + inline void SetInitialSolutionFromBspSchedule(const BspScheduleCS &schedule) { + initialSchedule_ = &schedule; - max_number_supersteps = schedule.numberOfSupersteps(); + maxNumberSupersteps_ = schedule.NumberOfSupersteps(); - use_initial_schedule = true; + useInitialSchedule_ = true; } /** @@ -957,14 +945,14 @@ class CoptFullScheduler : public Scheduler { * supersteps is less than the number of supersteps in * the initial solution. */ - void setMaxNumberOfSupersteps(unsigned max) { - if (use_initial_schedule && max < initial_schedule->numberOfSupersteps()) { + void SetMaxNumberOfSupersteps(unsigned max) { + if (useInitialSchedule_ && max < initialSchedule_->NumberOfSupersteps()) { throw std::invalid_argument("Invalid Argument while setting " "max number of supersteps to a value " "which is less than the number of " "supersteps of the initial schedule!"); } - max_number_supersteps = max; + maxNumberSupersteps_ = max; } /** @@ -978,10 +966,10 @@ class CoptFullScheduler : public Scheduler { * @param path The path where the solutions will be written. * @param file_prefix The prefix that will be used for the solution files. */ - inline void enableWriteIntermediateSol(std::string path, std::string file_prefix) { - write_solutions_found = true; - write_solutions_path = path; - solution_file_prefix = file_prefix; + inline void EnableWriteIntermediateSol(std::string path, std::string filePrefix) { + writeSolutionsFound_ = true; + writeSolutionsPath_ = path; + solutionFilePrefix_ = filePrefix; } /** @@ -991,7 +979,7 @@ class CoptFullScheduler : public Scheduler { * calling this function, the `enableWriteIntermediateSol` function needs * to be called again in order to enable writing of intermediate solutions. */ - inline void disableWriteIntermediateSol() { write_solutions_found = false; } + inline void DisableWriteIntermediateSol() { writeSolutionsFound_ = false; } /** * @brief Set the use of memory constraint. @@ -1002,28 +990,28 @@ class CoptFullScheduler : public Scheduler { * * @param use True if the memory constraint should be used, false otherwise. */ - inline void setUseMemoryConstraint(bool use) { use_memory_constraint = use; } + inline void SetUseMemoryConstraint(bool use) { useMemoryConstraint_ = use; } /** * @brief Get the maximum number of supersteps. * * @return The maximum number of supersteps. */ - inline unsigned getMaxNumberOfSupersteps() const { return max_number_supersteps; } + inline unsigned GetMaxNumberOfSupersteps() const { return maxNumberSupersteps_; } /** * @brief Sets the time limit for the ILP solving. * * @param time_limit_seconds_ The time limit in seconds. */ - inline void setTimeLimitSeconds(unsigned time_limit_seconds_) { timeLimitSeconds = time_limit_seconds_; } + inline void SetTimeLimitSeconds(unsigned timeLimitSeconds) { timeLimitSeconds_ = timeLimitSeconds; } /** * @brief Get the name of the schedule. * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { return "FullIlp"; } + virtual std::string GetScheduleName() const override { return "FullIlp"; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp index e0369177..14e44b11 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp @@ -32,164 +32,162 @@ namespace osp { * a BSP schedule, from a starting superstep to and ending superstep. */ -template +template class CoptPartialScheduler { - static_assert(is_computational_dag_v, "CoptPartialScheduler can only be used with computational DAGs."); + static_assert(isComputationalDagV, "CoptPartialScheduler can only be used with computational DAGs."); - using KeyTriple = std::tuple, unsigned int, unsigned int>; + using KeyTriple = std::tuple, unsigned int, unsigned int>; - unsigned int timeLimitSeconds = 600; + unsigned int timeLimitSeconds_ = 600; protected: - unsigned start_superstep = 1, end_superstep = 3; + unsigned startSuperstep_ = 1, endSuperstep_ = 3; - std::vector> node_global_ID; - std::unordered_map, vertex_idx_t> node_local_ID; + std::vector> nodeGlobalId_; + std::unordered_map, VertexIdxT> nodeLocalId_; - std::vector> source_global_ID; - std::unordered_map, vertex_idx_t> source_local_ID; + std::vector> sourceGlobalId_; + std::unordered_map, VertexIdxT> sourceLocalId_; - std::vector> node_needed_after_on_proc, source_needed_after_on_proc; - std::vector, unsigned, unsigned, unsigned>> fixed_comm_steps; - std::set> source_present_before; + std::vector> nodeNeededAfterOnProc_, sourceNeededAfterOnProc_; + std::vector, unsigned, unsigned, unsigned>> fixedCommSteps_; + std::set> sourcePresentBefore_; - unsigned max_number_supersteps; + unsigned maxNumberSupersteps_; - VarArray superstep_used_var; - VarArray keep_fixed_comm_step; + VarArray superstepUsedVar_; + VarArray keepFixedCommStep_; - std::vector> node_to_processor_superstep_var; - std::vector>> comm_processor_to_processor_superstep_node_var; - std::vector> comm_to_processor_superstep_source_var; + std::vector> nodeToProcessorSuperstepVar_; + std::vector>> commProcessorToProcessorSuperstepNodeVar_; + std::vector> commToProcessorSuperstepSourceVar_; - bool has_fixed_comm_in_preceding_step; + bool hasFixedCommInPrecedingStep_; - void setupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model); + void SetupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model); - void setInitialSolution(const BspScheduleCS &schedule, Model &model); + void SetInitialSolution(const BspScheduleCS &schedule, Model &model); - void updateSchedule(BspScheduleCS &schedule) const; + void UpdateSchedule(BspScheduleCS &schedule) const; - void setupVertexMaps(const BspScheduleCS &schedule); + void SetupVertexMaps(const BspScheduleCS &schedule); public: - virtual RETURN_STATUS improveSchedule(BspScheduleCS &schedule); + virtual ReturnStatus ImproveSchedule(BspScheduleCS &schedule); - virtual std::string getScheduleName() const { return "ILPPartial"; } + virtual std::string GetScheduleName() const { return "ILPPartial"; } - virtual void setTimeLimitSeconds(unsigned int limit) { timeLimitSeconds = limit; } + virtual void SetTimeLimitSeconds(unsigned int limit) { timeLimitSeconds_ = limit; } - inline unsigned int getTimeLimitSeconds() const { return timeLimitSeconds; } + inline unsigned int GetTimeLimitSeconds() const { return timeLimitSeconds_; } - virtual void setStartAndEndSuperstep(unsigned start_, unsigned end_) { - start_superstep = start_; - end_superstep = end_; + virtual void SetStartAndEndSuperstep(unsigned start, unsigned end) { + startSuperstep_ = start; + endSuperstep_ = end; } virtual ~CoptPartialScheduler() = default; }; -template -RETURN_STATUS CoptPartialScheduler::improveSchedule(BspScheduleCS &schedule) { +template +ReturnStatus CoptPartialScheduler::ImproveSchedule(BspScheduleCS &schedule) { Envr env; Model model = env.CreateModel("bsp_schedule_partial"); - setupVertexMaps(schedule); + SetupVertexMaps(schedule); - setupVariablesConstraintsObjective(schedule, model); + SetupVariablesConstraintsObjective(schedule, model); - setInitialSolution(schedule, model); + SetInitialSolution(schedule, model); - model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds); + model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds_); model.SetIntParam(COPT_INTPARAM_THREADS, 128); model.Solve(); if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - updateSchedule(schedule); + UpdateSchedule(schedule); } if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } else { if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - return RETURN_STATUS::BEST_FOUND; + return ReturnStatus::BEST_FOUND; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } } -template -void CoptPartialScheduler::setInitialSolution(const BspScheduleCS &schedule, Model &model) { - const Graph_t &DAG = schedule.getInstance().getComputationalDag(); - const unsigned &num_processors = schedule.getInstance().numberOfProcessors(); - const auto &cs = schedule.getCommunicationSchedule(); +template +void CoptPartialScheduler::SetInitialSolution(const BspScheduleCS &schedule, Model &model) { + const GraphT &dag = schedule.GetInstance().GetComputationalDag(); + const unsigned &numProcessors = schedule.GetInstance().NumberOfProcessors(); + const auto &cs = schedule.GetCommunicationSchedule(); - for (const vertex_idx_t &node : DAG.vertices()) { - if (node_local_ID.find(node) == node_local_ID.end()) { + for (const VertexIdxT &node : dag.Vertices()) { + if (nodeLocalId_.find(node) == nodeLocalId_.end()) { continue; } - for (unsigned proc = 0; proc < num_processors; proc++) { - for (unsigned step = 0; step < max_number_supersteps; ++step) { - if (schedule.assignedProcessor(node) == proc && schedule.assignedSuperstep(node) == start_superstep + step) { - model.SetMipStart(node_to_processor_superstep_var[node_local_ID[node]][proc][static_cast(step)], 1); + for (unsigned proc = 0; proc < numProcessors; proc++) { + for (unsigned step = 0; step < maxNumberSupersteps_; ++step) { + if (schedule.AssignedProcessor(node) == proc && schedule.AssignedSuperstep(node) == startSuperstep_ + step) { + model.SetMipStart(nodeToProcessorSuperstepVar_[nodeLocalId_[node]][proc][static_cast(step)], 1); } else { - model.SetMipStart(node_to_processor_superstep_var[node_local_ID[node]][proc][static_cast(step)], 0); + model.SetMipStart(nodeToProcessorSuperstepVar_[nodeLocalId_[node]][proc][static_cast(step)], 0); } } } } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { - model.SetMipStart(keep_fixed_comm_step[static_cast(index)], 1); + for (unsigned index = 0; index < fixedCommSteps_.size(); ++index) { + model.SetMipStart(keepFixedCommStep_[static_cast(index)], 1); } - for (const auto &node : DAG.vertices()) { - if (node_local_ID.find(node) == node_local_ID.end()) { + for (const auto &node : dag.Vertices()) { + if (nodeLocalId_.find(node) == nodeLocalId_.end()) { continue; } - for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { + for (unsigned p1 = 0; p1 < numProcessors; p1++) { + for (unsigned p2 = 0; p2 < numProcessors; p2++) { if (p1 == p2) { continue; } - for (unsigned step = 0; step < max_number_supersteps && step <= end_superstep - start_superstep; step++) { + for (unsigned step = 0; step < maxNumberSupersteps_ && step <= endSuperstep_ - startSuperstep_; step++) { const auto &key = std::make_tuple(node, p1, p2); - if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step) { + if (cs.find(key) != cs.end() && cs.at(key) == startSuperstep_ + step) { model.SetMipStart( - comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node_local_ID[node])], 1); + commProcessorToProcessorSuperstepNodeVar_[p1][p2][step][static_cast(nodeLocalId_[node])], 1); } else { model.SetMipStart( - comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node_local_ID[node])], 0); + commProcessorToProcessorSuperstepNodeVar_[p1][p2][step][static_cast(nodeLocalId_[node])], 0); } } } } } - for (const auto &source : DAG.vertices()) { - if (source_local_ID.find(source) == source_local_ID.end()) { + for (const auto &source : dag.Vertices()) { + if (sourceLocalId_.find(source) == sourceLocalId_.end()) { continue; } - for (unsigned proc = 0; proc < num_processors; proc++) { - if (proc == schedule.assignedProcessor(source)) { + for (unsigned proc = 0; proc < numProcessors; proc++) { + if (proc == schedule.AssignedProcessor(source)) { continue; } - for (unsigned step = 0; step < max_number_supersteps + 1 && step <= end_superstep - start_superstep + 1; step++) { - const auto &key = std::make_tuple(source, schedule.assignedProcessor(source), proc); - if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step - 1) { - model.SetMipStart( - comm_to_processor_superstep_source_var[proc][step][static_cast(source_local_ID[source])], 1); + for (unsigned step = 0; step < maxNumberSupersteps_ + 1 && step <= endSuperstep_ - startSuperstep_ + 1; step++) { + const auto &key = std::make_tuple(source, schedule.AssignedProcessor(source), proc); + if (cs.find(key) != cs.end() && cs.at(key) == startSuperstep_ + step - 1) { + model.SetMipStart(commToProcessorSuperstepSourceVar_[proc][step][static_cast(sourceLocalId_[source])], 1); } else if (step > 0) { - model.SetMipStart( - comm_to_processor_superstep_source_var[proc][step][static_cast(source_local_ID[source])], 0); + model.SetMipStart(commToProcessorSuperstepSourceVar_[proc][step][static_cast(sourceLocalId_[source])], 0); } } } @@ -199,47 +197,46 @@ void CoptPartialScheduler::setInitialSolution(const BspScheduleCS -void CoptPartialScheduler::updateSchedule(BspScheduleCS &schedule) const { - unsigned number_of_supersteps = 0; +template +void CoptPartialScheduler::UpdateSchedule(BspScheduleCS &schedule) const { + unsigned numberOfSupersteps = 0; - while (number_of_supersteps < max_number_supersteps - && superstep_used_var[static_cast(number_of_supersteps)].Get(COPT_DBLINFO_VALUE) >= .99) { - number_of_supersteps++; + while (numberOfSupersteps < maxNumberSupersteps_ + && superstepUsedVar_[static_cast(numberOfSupersteps)].Get(COPT_DBLINFO_VALUE) >= .99) { + numberOfSupersteps++; } - const int offset = static_cast(number_of_supersteps) - static_cast(end_superstep - start_superstep + 1); + const int offset = static_cast(numberOfSupersteps) - static_cast(endSuperstep_ - startSuperstep_ + 1); - for (vertex_idx_t node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - if (schedule.assignedSuperstep(node) > end_superstep) { - schedule.setAssignedSuperstep(node, static_cast(static_cast(schedule.assignedSuperstep(node)) + offset)); + for (VertexIdxT node = 0; node < schedule.GetInstance().NumberOfVertices(); node++) { + if (schedule.AssignedSuperstep(node) > endSuperstep_) { + schedule.SetAssignedSuperstep(node, static_cast(static_cast(schedule.AssignedSuperstep(node)) + offset)); } } - for (vertex_idx_t node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - if (node_local_ID.find(node) == node_local_ID.end()) { + for (VertexIdxT node = 0; node < schedule.GetInstance().NumberOfVertices(); node++) { + if (nodeLocalId_.find(node) == nodeLocalId_.end()) { continue; } - for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (node_to_processor_superstep_var[node_local_ID.at(node)][processor][static_cast(step)].Get( - COPT_DBLINFO_VALUE) + for (unsigned processor = 0; processor < schedule.GetInstance().NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (nodeToProcessorSuperstepVar_[nodeLocalId_.at(node)][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { - schedule.setAssignedSuperstep(node, start_superstep + step); - schedule.setAssignedProcessor(node, processor); + schedule.SetAssignedSuperstep(node, startSuperstep_ + step); + schedule.SetAssignedProcessor(node, processor); } } } } - std::map &commSchedule = schedule.getCommunicationSchedule(); + std::map &commSchedule = schedule.GetCommunicationSchedule(); std::vector toErase; - for (const auto &[key, val] : schedule.getCommunicationSchedule()) { - if (val > end_superstep) { + for (const auto &[key, val] : schedule.GetCommunicationSchedule()) { + if (val > endSuperstep_) { commSchedule[key] = static_cast(static_cast(val) + offset); - } else if (static_cast(val) >= static_cast(start_superstep) - 1) { + } else if (static_cast(val) >= static_cast(startSuperstep_) - 1) { toErase.push_back(key); } } @@ -247,25 +244,25 @@ void CoptPartialScheduler::updateSchedule(BspScheduleCS &sched commSchedule.erase(key); } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { - const auto &entry = fixed_comm_steps[index]; - if (keep_fixed_comm_step[static_cast(index)].Get(COPT_DBLINFO_VALUE) >= .99 - && std::get<3>(entry) < start_superstep + number_of_supersteps) { + for (unsigned index = 0; index < fixedCommSteps_.size(); ++index) { + const auto &entry = fixedCommSteps_[index]; + if (keepFixedCommStep_[static_cast(index)].Get(COPT_DBLINFO_VALUE) >= .99 + && std::get<3>(entry) < startSuperstep_ + numberOfSupersteps) { commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = std::get<3>(entry); } else { - commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = start_superstep - 1; + commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = startSuperstep_ - 1; } } - for (vertex_idx_t node = 0; node < node_global_ID.size(); node++) { - for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) { - for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) { - if (p_from != p_to) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get( + for (VertexIdxT node = 0; node < nodeGlobalId_.size(); node++) { + for (unsigned int pFrom = 0; pFrom < schedule.GetInstance().NumberOfProcessors(); pFrom++) { + for (unsigned int pTo = 0; pTo < schedule.GetInstance().NumberOfProcessors(); pTo++) { + if (pFrom != pTo) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + if (commProcessorToProcessorSuperstepNodeVar_[pFrom][pTo][step][static_cast(node)].Get( COPT_DBLINFO_VALUE) >= .99) { - commSchedule[std::make_tuple(node_global_ID[node], p_from, p_to)] = start_superstep + step; + commSchedule[std::make_tuple(nodeGlobalId_[node], pFrom, pTo)] = startSuperstep_ + step; break; } } @@ -274,15 +271,13 @@ void CoptPartialScheduler::updateSchedule(BspScheduleCS &sched } } - for (vertex_idx_t source = 0; source < source_global_ID.size(); source++) { - for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) { - if (source_present_before.find(std::make_pair(source, p_to)) == source_present_before.end()) { - for (unsigned int step = 0; step < max_number_supersteps + 1; step++) { - if (comm_to_processor_superstep_source_var[p_to][step][static_cast(source)].Get(COPT_DBLINFO_VALUE) - >= .99) { - commSchedule[std::make_tuple( - source_global_ID[source], schedule.assignedProcessor(source_global_ID[source]), p_to)] - = start_superstep - 1 + step; + for (VertexIdxT source = 0; source < sourceGlobalId_.size(); source++) { + for (unsigned int pTo = 0; pTo < schedule.GetInstance().NumberOfProcessors(); pTo++) { + if (sourcePresentBefore_.find(std::make_pair(source, pTo)) == sourcePresentBefore_.end()) { + for (unsigned int step = 0; step < maxNumberSupersteps_ + 1; step++) { + if (commToProcessorSuperstepSourceVar_[pTo][step][static_cast(source)].Get(COPT_DBLINFO_VALUE) >= .99) { + commSchedule[std::make_tuple(sourceGlobalId_[source], schedule.AssignedProcessor(sourceGlobalId_[source]), pTo)] + = startSuperstep_ - 1 + step; break; } } @@ -290,157 +285,155 @@ void CoptPartialScheduler::updateSchedule(BspScheduleCS &sched } } - schedule.cleanCommSchedule(); - schedule.shrinkByMergingSupersteps(); -}; + schedule.CleanCommSchedule(); + schedule.ShrinkByMergingSupersteps(); +} -template -void CoptPartialScheduler::setupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model) { - const vertex_idx_t num_vertices = static_cast>(node_global_ID.size()); - const vertex_idx_t num_sources = static_cast>(source_global_ID.size()); - const unsigned num_processors = schedule.getInstance().numberOfProcessors(); +template +void CoptPartialScheduler::SetupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model) { + const VertexIdxT numVertices = static_cast>(nodeGlobalId_.size()); + const VertexIdxT numSources = static_cast>(sourceGlobalId_.size()); + const unsigned numProcessors = schedule.GetInstance().NumberOfProcessors(); /* Variables */ // variables indicating if superstep is used at all - superstep_used_var = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_used"); - VarArray superstep_has_comm = model.AddVars(static_cast(max_number_supersteps + 1), COPT_BINARY, "superstep_has_comm"); - VarArray has_comm_at_end = model.AddVars(1, COPT_BINARY, "has_comm_at_end"); + superstepUsedVar_ = model.AddVars(static_cast(maxNumberSupersteps_), COPT_BINARY, "superstep_used"); + VarArray superstepHasComm = model.AddVars(static_cast(maxNumberSupersteps_ + 1), COPT_BINARY, "superstepHasComm"); + VarArray hasCommAtEnd = model.AddVars(1, COPT_BINARY, "hasCommAtEnd"); // variables for assigments of nodes to processor and superstep - node_to_processor_superstep_var = std::vector>(num_vertices, std::vector(num_processors)); + nodeToProcessorSuperstepVar_ = std::vector>(numVertices, std::vector(numProcessors)); - for (unsigned int node = 0; node < num_vertices; node++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { - node_to_processor_superstep_var[node][processor] - = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); + for (unsigned int node = 0; node < numVertices; node++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { + nodeToProcessorSuperstepVar_[node][processor] + = model.AddVars(static_cast(maxNumberSupersteps_), COPT_BINARY, "node_to_processor_superstep"); } } // communicate node from p1 to p2 at superstep - comm_processor_to_processor_superstep_node_var = std::vector>>( - num_processors, std::vector>(num_processors, std::vector(max_number_supersteps))); + commProcessorToProcessorSuperstepNodeVar_ = std::vector>>( + numProcessors, std::vector>(numProcessors, std::vector(maxNumberSupersteps_))); - for (unsigned int p1 = 0; p1 < num_processors; p1++) { - for (unsigned int p2 = 0; p2 < num_processors; p2++) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - comm_processor_to_processor_superstep_node_var[p1][p2][step] - = model.AddVars(static_cast(num_vertices), COPT_BINARY, "comm_processor_to_processor_superstep_node"); + for (unsigned int p1 = 0; p1 < numProcessors; p1++) { + for (unsigned int p2 = 0; p2 < numProcessors; p2++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + commProcessorToProcessorSuperstepNodeVar_[p1][p2][step] + = model.AddVars(static_cast(numVertices), COPT_BINARY, "comm_processor_to_processor_superstep_node"); } } } - // communicate nodes in supersteps smaller than start_superstep - comm_to_processor_superstep_source_var - = std::vector>(num_processors, std::vector(max_number_supersteps + 1)); - std::vector> present_on_processor_superstep_source_var - = std::vector>(num_processors, std::vector(max_number_supersteps)); + // communicate nodes in supersteps smaller than startSuperstep_ + commToProcessorSuperstepSourceVar_ + = std::vector>(numProcessors, std::vector(maxNumberSupersteps_ + 1)); + std::vector> presentOnProcessorSuperstepSourceVar + = std::vector>(numProcessors, std::vector(maxNumberSupersteps_)); - for (unsigned int proc = 0; proc < num_processors; proc++) { - for (unsigned int step = 0; step < max_number_supersteps + 1; step++) { - comm_to_processor_superstep_source_var[proc][step] - = model.AddVars(static_cast(num_sources), COPT_BINARY, "comm_to_processor_superstep_source"); + for (unsigned int proc = 0; proc < numProcessors; proc++) { + for (unsigned int step = 0; step < maxNumberSupersteps_ + 1; step++) { + commToProcessorSuperstepSourceVar_[proc][step] + = model.AddVars(static_cast(numSources), COPT_BINARY, "comm_to_processor_superstep_source"); - if (step < max_number_supersteps) { - present_on_processor_superstep_source_var[proc][step] - = model.AddVars(static_cast(num_sources), COPT_BINARY, "present_on_processor_superstep_source"); + if (step < maxNumberSupersteps_) { + presentOnProcessorSuperstepSourceVar[proc][step] + = model.AddVars(static_cast(numSources), COPT_BINARY, "present_on_processor_superstep_source"); } } } - VarArray max_comm_superstep_var - = model.AddVars(static_cast(max_number_supersteps + 1), COPT_INTEGER, "max_comm_superstep"); + VarArray maxCommSuperstepVar = model.AddVars(static_cast(maxNumberSupersteps_ + 1), COPT_INTEGER, "max_comm_superstep"); - VarArray max_work_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_work_superstep"); + VarArray maxWorkSuperstepVar = model.AddVars(static_cast(maxNumberSupersteps_), COPT_INTEGER, "max_work_superstep"); - keep_fixed_comm_step = model.AddVars(static_cast(fixed_comm_steps.size()), COPT_BINARY, "keep_fixed_comm_step"); + keepFixedCommStep_ = model.AddVars(static_cast(fixedCommSteps_.size()), COPT_BINARY, "keepFixedCommStep_"); /* Constraints */ // use consecutive supersteps starting from 0 - model.AddConstr(superstep_used_var[0] == 1); + model.AddConstr(superstepUsedVar_[0] == 1); - for (unsigned int step = 0; step < max_number_supersteps - 1; step++) { - model.AddConstr(superstep_used_var[static_cast(step)] >= superstep_used_var[static_cast(step + 1)]); + for (unsigned int step = 0; step < maxNumberSupersteps_ - 1; step++) { + model.AddConstr(superstepUsedVar_[static_cast(step)] >= superstepUsedVar_[static_cast(step + 1)]); } // check whether superstep is used at all (work or comm), and whether superstep has any communication at all - unsigned large_constant_work = static_cast(num_vertices) * num_processors; - unsigned large_constant_comm = static_cast(num_vertices + num_sources) * num_processors * num_processors - + static_cast(fixed_comm_steps.size()); - for (unsigned int step = 0; step < max_number_supersteps; step++) { - Expr expr_work, expr_comm; - for (vertex_idx_t node = 0; node < num_vertices; node++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { - expr_work += node_to_processor_superstep_var[node][processor][static_cast(step)]; - - for (unsigned int p_other = 0; p_other < num_processors; p_other++) { - if (processor != p_other) { - expr_comm - += comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast(node)]; + unsigned largeConstantWork = static_cast(numVertices) * numProcessors; + unsigned largeConstantComm = static_cast(numVertices + numSources) * numProcessors * numProcessors + + static_cast(fixedCommSteps_.size()); + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + Expr exprWork, exprComm; + for (VertexIdxT node = 0; node < numVertices; node++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { + exprWork += nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; + + for (unsigned int pOther = 0; pOther < numProcessors; pOther++) { + if (processor != pOther) { + exprComm += commProcessorToProcessorSuperstepNodeVar_[processor][pOther][step][static_cast(node)]; } } } } - for (vertex_idx_t source = 0; source < num_sources; source++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { - if (source_present_before.find(std::make_pair(source, processor)) == source_present_before.end()) { - expr_comm += comm_to_processor_superstep_source_var[processor][step + 1][static_cast(source)]; + for (VertexIdxT source = 0; source < numSources; source++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { + if (sourcePresentBefore_.find(std::make_pair(source, processor)) == sourcePresentBefore_.end()) { + exprComm += commToProcessorSuperstepSourceVar_[processor][step + 1][static_cast(source)]; } } } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { - if (std::get<3>(fixed_comm_steps[index]) == start_superstep + step) { - expr_comm += keep_fixed_comm_step[static_cast(index)]; + for (unsigned index = 0; index < fixedCommSteps_.size(); ++index) { + if (std::get<3>(fixedCommSteps_[index]) == startSuperstep_ + step) { + exprComm += keepFixedCommStep_[static_cast(index)]; } } - model.AddConstr(expr_comm <= large_constant_comm * superstep_has_comm[static_cast(step + 1)]); - model.AddConstr(expr_work <= large_constant_work * superstep_used_var[static_cast(step)]); - model.AddConstr(superstep_has_comm[static_cast(step + 1)] <= superstep_used_var[static_cast(step)]); + model.AddConstr(exprComm <= largeConstantComm * superstepHasComm[static_cast(step + 1)]); + model.AddConstr(exprWork <= largeConstantWork * superstepUsedVar_[static_cast(step)]); + model.AddConstr(superstepHasComm[static_cast(step + 1)] <= superstepUsedVar_[static_cast(step)]); } // check communication usage in edge case: comm phase before the segment - if (has_fixed_comm_in_preceding_step) { - model.AddConstr(superstep_has_comm[0] == 1); + if (hasFixedCommInPrecedingStep_) { + model.AddConstr(superstepHasComm[0] == 1); } else { - Expr expr_comm_0; - for (vertex_idx_t source = 0; source < num_sources; source++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { - if (source_present_before.find(std::make_pair(source, processor)) == source_present_before.end()) { - expr_comm_0 += comm_to_processor_superstep_source_var[processor][0][static_cast(source)]; + Expr exprComm0; + for (VertexIdxT source = 0; source < numSources; source++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { + if (sourcePresentBefore_.find(std::make_pair(source, processor)) == sourcePresentBefore_.end()) { + exprComm0 += commToProcessorSuperstepSourceVar_[processor][0][static_cast(source)]; } } } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { - expr_comm_0 += 1 - keep_fixed_comm_step[static_cast(index)]; + for (unsigned index = 0; index < fixedCommSteps_.size(); ++index) { + exprComm0 += 1 - keepFixedCommStep_[static_cast(index)]; } - model.AddConstr(expr_comm_0 - <= (static_cast(num_sources) * num_processors + static_cast(fixed_comm_steps.size())) - * superstep_has_comm[0]); + model.AddConstr(exprComm0 + <= (static_cast(numSources) * numProcessors + static_cast(fixedCommSteps_.size())) + * superstepHasComm[0]); } // check if there is any communication at the end of the subschedule - for (unsigned int step = 0; step < max_number_supersteps - 1; step++) { - model.AddConstr(superstep_used_var[static_cast(step)] - superstep_used_var[static_cast(step + 1)] - + superstep_has_comm[static_cast(step + 1)] - 1 - <= has_comm_at_end[0]); + for (unsigned int step = 0; step < maxNumberSupersteps_ - 1; step++) { + model.AddConstr(superstepUsedVar_[static_cast(step)] - superstepUsedVar_[static_cast(step + 1)] + + superstepHasComm[static_cast(step + 1)] - 1 + <= hasCommAtEnd[0]); } - model.AddConstr(superstep_used_var[static_cast(max_number_supersteps - 1)] - + superstep_has_comm[static_cast(max_number_supersteps)] - 1 - <= has_comm_at_end[0]); + model.AddConstr(superstepUsedVar_[static_cast(maxNumberSupersteps_ - 1)] + + superstepHasComm[static_cast(maxNumberSupersteps_)] - 1 + <= hasCommAtEnd[0]); // nodes are assigend - for (vertex_idx_t node = 0; node < num_vertices; node++) { + for (VertexIdxT node = 0; node < numVertices; node++) { Expr expr; - for (unsigned int processor = 0; processor < num_processors; processor++) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += node_to_processor_superstep_var[node][processor][static_cast(step)]; + for (unsigned int processor = 0; processor < numProcessors; processor++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + expr += nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; } } @@ -448,26 +441,26 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp } // precedence constraint: if task is computed then all of its predecessors must have been present - for (vertex_idx_t node = 0; node < num_vertices; node++) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { + for (VertexIdxT node = 0; node < numVertices; node++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { Expr expr; - unsigned num_terms = 0; - for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node_global_ID[node])) { - if (node_local_ID.find(pred) != node_local_ID.end()) { - ++num_terms; - expr += comm_processor_to_processor_superstep_node_var[processor][processor][step] - [static_cast(node_local_ID[pred])]; - } else if (source_local_ID.find(pred) != source_local_ID.end() - && source_present_before.find(std::make_pair(source_local_ID[pred], processor)) - == source_present_before.end()) { - ++num_terms; - expr += present_on_processor_superstep_source_var[processor][step][static_cast(source_local_ID[pred])]; + unsigned numTerms = 0; + for (const auto &pred : schedule.GetInstance().GetComputationalDag().Parents(nodeGlobalId_[node])) { + if (nodeLocalId_.find(pred) != nodeLocalId_.end()) { + ++numTerms; + expr += commProcessorToProcessorSuperstepNodeVar_[processor][processor][step] + [static_cast(nodeLocalId_[pred])]; + } else if (sourceLocalId_.find(pred) != sourceLocalId_.end() + && sourcePresentBefore_.find(std::make_pair(sourceLocalId_[pred], processor)) + == sourcePresentBefore_.end()) { + ++numTerms; + expr += presentOnProcessorSuperstepSourceVar[processor][step][static_cast(sourceLocalId_[pred])]; } } - if (num_terms > 0) { - model.AddConstr(expr >= num_terms * node_to_processor_superstep_var[node][processor][static_cast(step)]); + if (numTerms > 0) { + model.AddConstr(expr >= numTerms * nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]); } } } @@ -475,43 +468,42 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp // combines two constraints: node can only be communicated if it is present; and node is present if it was computed // or communicated - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { - for (vertex_idx_t node = 0; node < num_vertices; node++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { + for (VertexIdxT node = 0; node < numVertices; node++) { Expr expr1, expr2; if (step > 0) { - for (unsigned int p_from = 0; p_from < num_processors; p_from++) { - expr1 - += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast(node)]; + for (unsigned int pFrom = 0; pFrom < numProcessors; pFrom++) { + expr1 += commProcessorToProcessorSuperstepNodeVar_[pFrom][processor][step - 1][static_cast(node)]; } } - expr1 += node_to_processor_superstep_var[node][processor][static_cast(step)]; + expr1 += nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; - for (unsigned int p_to = 0; p_to < num_processors; p_to++) { - expr2 += comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; + for (unsigned int pTo = 0; pTo < numProcessors; pTo++) { + expr2 += commProcessorToProcessorSuperstepNodeVar_[processor][pTo][step][static_cast(node)]; } - model.AddConstr(num_processors * (expr1) >= expr2); + model.AddConstr(numProcessors * (expr1) >= expr2); } } } // combines two constraints: node can only be communicated if it is present; and node is present if it was computed // or communicated - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { - for (vertex_idx_t source_node = 0; source_node < num_sources; source_node++) { - if (source_present_before.find(std::make_pair(source_node, processor)) != source_present_before.end()) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { + for (VertexIdxT sourceNode = 0; sourceNode < numSources; sourceNode++) { + if (sourcePresentBefore_.find(std::make_pair(sourceNode, processor)) != sourcePresentBefore_.end()) { continue; } - Expr expr1 = comm_to_processor_superstep_source_var[processor][step][static_cast(source_node)]; + Expr expr1 = commToProcessorSuperstepSourceVar_[processor][step][static_cast(sourceNode)]; if (step > 0) { - expr1 += present_on_processor_superstep_source_var[processor][step - 1][static_cast(source_node)]; + expr1 += presentOnProcessorSuperstepSourceVar[processor][step - 1][static_cast(sourceNode)]; } - Expr expr2 = present_on_processor_superstep_source_var[processor][step][static_cast(source_node)]; + Expr expr2 = presentOnProcessorSuperstepSourceVar[processor][step][static_cast(sourceNode)]; model.AddConstr(expr1 >= expr2); } @@ -519,123 +511,123 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp } // boundary conditions at the end - for (const std::pair, unsigned> node_and_proc : node_needed_after_on_proc) { + for (const std::pair, unsigned> nodeAndProc : nodeNeededAfterOnProc_) { Expr expr; - for (unsigned int p_from = 0; p_from < num_processors; p_from++) { - expr += comm_processor_to_processor_superstep_node_var[p_from][node_and_proc.second][max_number_supersteps - 1] - [static_cast(node_and_proc.first)]; + for (unsigned int pFrom = 0; pFrom < numProcessors; pFrom++) { + expr += commProcessorToProcessorSuperstepNodeVar_[pFrom][nodeAndProc.second][maxNumberSupersteps_ - 1] + [static_cast(nodeAndProc.first)]; } model.AddConstr(expr >= 1); } - for (const std::pair, unsigned> source_and_proc : source_needed_after_on_proc) { - Expr expr = present_on_processor_superstep_source_var[source_and_proc.second][max_number_supersteps - 1] - [static_cast(source_and_proc.first)]; - expr += comm_to_processor_superstep_source_var[source_and_proc.second][max_number_supersteps] - [static_cast(source_and_proc.first)]; + for (const std::pair, unsigned> sourceAndProc : sourceNeededAfterOnProc_) { + Expr expr = presentOnProcessorSuperstepSourceVar[sourceAndProc.second][maxNumberSupersteps_ - 1] + [static_cast(sourceAndProc.first)]; + expr + += commToProcessorSuperstepSourceVar_[sourceAndProc.second][maxNumberSupersteps_][static_cast(sourceAndProc.first)]; model.AddConstr(expr >= 1); } // cost calculation - work - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { Expr expr; - for (unsigned int node = 0; node < num_vertices; node++) { - expr += schedule.getInstance().getComputationalDag().vertex_work_weight(node_global_ID[node]) - * node_to_processor_superstep_var[node][processor][static_cast(step)]; + for (unsigned int node = 0; node < numVertices; node++) { + expr += schedule.GetInstance().GetComputationalDag().VertexWorkWeight(nodeGlobalId_[node]) + * nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; } - model.AddConstr(max_work_superstep_var[static_cast(step)] >= expr); + model.AddConstr(maxWorkSuperstepVar[static_cast(step)] >= expr); } } // cost calculation - comm - for (unsigned int step = 0; step < max_number_supersteps; step++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { Expr expr1, expr2; - for (vertex_idx_t node = 0; node < num_vertices; node++) { - for (unsigned int p_other = 0; p_other < num_processors; p_other++) { - if (processor != p_other) { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node]) - * schedule.getInstance().sendCosts(processor, p_other) - * comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast(node)]; - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node]) - * schedule.getInstance().sendCosts(p_other, processor) - * comm_processor_to_processor_superstep_node_var[p_other][processor][step][static_cast(node)]; + for (VertexIdxT node = 0; node < numVertices; node++) { + for (unsigned int pOther = 0; pOther < numProcessors; pOther++) { + if (processor != pOther) { + expr1 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(nodeGlobalId_[node]) + * schedule.GetInstance().SendCosts(processor, pOther) + * commProcessorToProcessorSuperstepNodeVar_[processor][pOther][step][static_cast(node)]; + expr2 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(nodeGlobalId_[node]) + * schedule.GetInstance().SendCosts(pOther, processor) + * commProcessorToProcessorSuperstepNodeVar_[pOther][processor][step][static_cast(node)]; } } } - for (vertex_idx_t source = 0; source < num_sources; source++) { - const unsigned origin_proc = schedule.assignedProcessor(source_global_ID[source]); - if (origin_proc == processor) { - for (unsigned int p_other = 0; p_other < num_processors; p_other++) { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) - * schedule.getInstance().sendCosts(processor, p_other) - * comm_to_processor_superstep_source_var[p_other][step + 1][static_cast(source)]; + for (VertexIdxT source = 0; source < numSources; source++) { + const unsigned originProc = schedule.AssignedProcessor(sourceGlobalId_[source]); + if (originProc == processor) { + for (unsigned int pOther = 0; pOther < numProcessors; pOther++) { + expr1 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(sourceGlobalId_[source]) + * schedule.GetInstance().SendCosts(processor, pOther) + * commToProcessorSuperstepSourceVar_[pOther][step + 1][static_cast(source)]; } } - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) - * schedule.getInstance().sendCosts(origin_proc, processor) - * comm_to_processor_superstep_source_var[processor][step + 1][static_cast(source)]; + expr2 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(sourceGlobalId_[source]) + * schedule.GetInstance().SendCosts(originProc, processor) + * commToProcessorSuperstepSourceVar_[processor][step + 1][static_cast(source)]; } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { - const auto &entry = fixed_comm_steps[index]; - if (std::get<3>(entry) != start_superstep + step) { + for (unsigned index = 0; index < fixedCommSteps_.size(); ++index) { + const auto &entry = fixedCommSteps_[index]; + if (std::get<3>(entry) != startSuperstep_ + step) { continue; } if (std::get<1>(entry) == processor) { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) - * schedule.getInstance().sendCosts(processor, std::get<2>(entry)) - * keep_fixed_comm_step[static_cast(index)]; + expr1 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(std::get<0>(entry)) + * schedule.GetInstance().SendCosts(processor, std::get<2>(entry)) + * keepFixedCommStep_[static_cast(index)]; } if (std::get<2>(entry) == processor) { - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) - * schedule.getInstance().sendCosts(std::get<1>(entry), processor) - * keep_fixed_comm_step[static_cast(index)]; + expr2 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(std::get<0>(entry)) + * schedule.GetInstance().SendCosts(std::get<1>(entry), processor) + * keepFixedCommStep_[static_cast(index)]; } } - model.AddConstr(max_comm_superstep_var[static_cast(step + 1)] >= expr1); - model.AddConstr(max_comm_superstep_var[static_cast(step + 1)] >= expr2); + model.AddConstr(maxCommSuperstepVar[static_cast(step + 1)] >= expr1); + model.AddConstr(maxCommSuperstepVar[static_cast(step + 1)] >= expr2); } } // cost calculation - first comm phase handled separately - for (unsigned int processor = 0; processor < num_processors; processor++) { + for (unsigned int processor = 0; processor < numProcessors; processor++) { Expr expr1, expr2; - for (vertex_idx_t source = 0; source < num_sources; source++) { - const unsigned origin_proc = schedule.assignedProcessor(source_global_ID[source]); - if (origin_proc == processor) { - for (unsigned int p_other = 0; p_other < num_processors; p_other++) { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) - * schedule.getInstance().sendCosts(processor, p_other) - * comm_to_processor_superstep_source_var[p_other][0][static_cast(source)]; + for (VertexIdxT source = 0; source < numSources; source++) { + const unsigned originProc = schedule.AssignedProcessor(sourceGlobalId_[source]); + if (originProc == processor) { + for (unsigned int pOther = 0; pOther < numProcessors; pOther++) { + expr1 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(sourceGlobalId_[source]) + * schedule.GetInstance().SendCosts(processor, pOther) + * commToProcessorSuperstepSourceVar_[pOther][0][static_cast(source)]; } } - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) - * schedule.getInstance().sendCosts(origin_proc, processor) - * comm_to_processor_superstep_source_var[processor][0][static_cast(source)]; + expr2 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(sourceGlobalId_[source]) + * schedule.GetInstance().SendCosts(originProc, processor) + * commToProcessorSuperstepSourceVar_[processor][0][static_cast(source)]; } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { - const auto &entry = fixed_comm_steps[index]; + for (unsigned index = 0; index < fixedCommSteps_.size(); ++index) { + const auto &entry = fixedCommSteps_[index]; if (std::get<1>(entry) == processor) { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) - * schedule.getInstance().sendCosts(processor, std::get<2>(entry)) - * (1 - keep_fixed_comm_step[static_cast(index)]); + expr1 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(std::get<0>(entry)) + * schedule.GetInstance().SendCosts(processor, std::get<2>(entry)) + * (1 - keepFixedCommStep_[static_cast(index)]); } if (std::get<2>(entry) == processor) { - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) - * schedule.getInstance().sendCosts(std::get<1>(entry), processor) - * (1 - keep_fixed_comm_step[static_cast(index)]); + expr2 += schedule.GetInstance().GetComputationalDag().VertexCommWeight(std::get<0>(entry)) + * schedule.GetInstance().SendCosts(std::get<1>(entry), processor) + * (1 - keepFixedCommStep_[static_cast(index)]); } } - model.AddConstr(max_comm_superstep_var[0] >= expr1); - model.AddConstr(max_comm_superstep_var[0] >= expr2); + model.AddConstr(maxCommSuperstepVar[0] >= expr1); + model.AddConstr(maxCommSuperstepVar[0] >= expr2); } /* @@ -643,48 +635,48 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp */ Expr expr; - for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += max_work_superstep_var[static_cast(step)] - + schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast(step + 1)] - + schedule.getInstance().synchronisationCosts() * superstep_used_var[static_cast(step)]; + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + expr += maxWorkSuperstepVar[static_cast(step)] + + schedule.GetInstance().CommunicationCosts() * maxCommSuperstepVar[static_cast(step + 1)] + + schedule.GetInstance().SynchronisationCosts() * superstepUsedVar_[static_cast(step)]; } - expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[0]; - expr += schedule.getInstance().synchronisationCosts() * superstep_has_comm[0]; - expr += schedule.getInstance().synchronisationCosts() * has_comm_at_end[0]; + expr += schedule.GetInstance().CommunicationCosts() * maxCommSuperstepVar[0]; + expr += schedule.GetInstance().SynchronisationCosts() * superstepHasComm[0]; + expr += schedule.GetInstance().SynchronisationCosts() * hasCommAtEnd[0]; - model.SetObjective(expr - schedule.getInstance().synchronisationCosts(), COPT_MINIMIZE); -}; + model.SetObjective(expr - schedule.GetInstance().SynchronisationCosts(), COPT_MINIMIZE); +} -template -void CoptPartialScheduler::setupVertexMaps(const BspScheduleCS &schedule) { - node_local_ID.clear(); - node_global_ID.clear(); - source_local_ID.clear(); - source_global_ID.clear(); +template +void CoptPartialScheduler::SetupVertexMaps(const BspScheduleCS &schedule) { + nodeLocalId_.clear(); + nodeGlobalId_.clear(); + sourceLocalId_.clear(); + sourceGlobalId_.clear(); - node_needed_after_on_proc.clear(); - source_needed_after_on_proc.clear(); - fixed_comm_steps.clear(); - source_present_before.clear(); + nodeNeededAfterOnProc_.clear(); + sourceNeededAfterOnProc_.clear(); + fixedCommSteps_.clear(); + sourcePresentBefore_.clear(); - std::vector> first_at = schedule.getFirstPresence(); + std::vector> firstAt = schedule.GetFirstPresence(); - max_number_supersteps = end_superstep - start_superstep + 3; + maxNumberSupersteps_ = endSuperstep_ - startSuperstep_ + 3; - for (unsigned node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - if (schedule.assignedSuperstep(node) >= start_superstep && schedule.assignedSuperstep(node) <= end_superstep) { - node_local_ID[node] = static_cast>(node_global_ID.size()); - node_global_ID.push_back(node); + for (unsigned node = 0; node < schedule.GetInstance().NumberOfVertices(); node++) { + if (schedule.AssignedSuperstep(node) >= startSuperstep_ && schedule.AssignedSuperstep(node) <= endSuperstep_) { + nodeLocalId_[node] = static_cast>(nodeGlobalId_.size()); + nodeGlobalId_.push_back(node); - for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node)) { - if (schedule.assignedSuperstep(pred) < start_superstep) { - if (source_local_ID.find(pred) == source_local_ID.end()) { - source_local_ID[pred] = static_cast>(source_global_ID.size()); - source_global_ID.push_back(pred); + for (const auto &pred : schedule.GetInstance().GetComputationalDag().Parents(node)) { + if (schedule.AssignedSuperstep(pred) < startSuperstep_) { + if (sourceLocalId_.find(pred) == sourceLocalId_.end()) { + sourceLocalId_[pred] = static_cast>(sourceGlobalId_.size()); + sourceGlobalId_.push_back(pred); } - } else if (schedule.assignedSuperstep(pred) > end_superstep) { + } else if (schedule.AssignedSuperstep(pred) > endSuperstep_) { throw std::invalid_argument("Initial Schedule might be invalid?!"); } } @@ -692,83 +684,83 @@ void CoptPartialScheduler::setupVertexMaps(const BspScheduleCS } // find where the sources are already present before the segment - for (const auto &source_and_ID : source_local_ID) { - vertex_idx_t source = source_and_ID.first; - for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { - if (first_at[source][proc] < start_superstep) { - source_present_before.emplace(std::make_pair(source_and_ID.second, proc)); + for (const auto &sourceAndId : sourceLocalId_) { + VertexIdxT source = sourceAndId.first; + for (unsigned proc = 0; proc < schedule.GetInstance().NumberOfProcessors(); ++proc) { + if (firstAt[source][proc] < startSuperstep_) { + sourcePresentBefore_.emplace(std::make_pair(sourceAndId.second, proc)); } } } // collect values that are needed by the end of the segment - for (const auto &source_and_ID : source_local_ID) { - vertex_idx_t source = source_and_ID.first; + for (const auto &sourceAndId : sourceLocalId_) { + VertexIdxT source = sourceAndId.first; - std::set procs_needing_this; - for (const auto &succ : schedule.getInstance().getComputationalDag().children(source)) { - if (schedule.assignedProcessor(succ) != schedule.assignedProcessor(source) - && schedule.assignedSuperstep(succ) > end_superstep) { - procs_needing_this.insert(schedule.assignedProcessor(succ)); + std::set procsNeedingThis; + for (const auto &succ : schedule.GetInstance().GetComputationalDag().Children(source)) { + if (schedule.AssignedProcessor(succ) != schedule.AssignedProcessor(source) + && schedule.AssignedSuperstep(succ) > endSuperstep_) { + procsNeedingThis.insert(schedule.AssignedProcessor(succ)); } } - for (unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1) { - for (unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2) { + for (unsigned proc1 = 0; proc1 < schedule.GetInstance().NumberOfProcessors(); ++proc1) { + for (unsigned proc2 = 0; proc2 < schedule.GetInstance().NumberOfProcessors(); ++proc2) { if (proc1 == proc2) { continue; } - auto itr = schedule.getCommunicationSchedule().find(std::make_tuple(source, proc1, proc2)); - if (itr != schedule.getCommunicationSchedule().end() && itr->second > end_superstep) { - procs_needing_this.insert(schedule.assignedProcessor(proc1)); + auto itr = schedule.GetCommunicationSchedule().find(std::make_tuple(source, proc1, proc2)); + if (itr != schedule.GetCommunicationSchedule().end() && itr->second > endSuperstep_) { + procsNeedingThis.insert(schedule.AssignedProcessor(proc1)); } } } - for (unsigned proc : procs_needing_this) { - if (first_at[source][proc] >= start_superstep && first_at[source][proc] <= end_superstep + 1) { - source_needed_after_on_proc.emplace_back(source_and_ID.second, proc); + for (unsigned proc : procsNeedingThis) { + if (firstAt[source][proc] >= startSuperstep_ && firstAt[source][proc] <= endSuperstep_ + 1) { + sourceNeededAfterOnProc_.emplace_back(sourceAndId.second, proc); } } } - for (const auto &node_and_ID : node_local_ID) { - vertex_idx_t node = node_and_ID.first; + for (const auto &nodeAndId : nodeLocalId_) { + VertexIdxT node = nodeAndId.first; - std::set procs_needing_this; - for (const auto &succ : schedule.getInstance().getComputationalDag().children(node)) { - if (schedule.assignedSuperstep(succ) > end_superstep) { - procs_needing_this.insert(schedule.assignedProcessor(succ)); + std::set procsNeedingThis; + for (const auto &succ : schedule.GetInstance().GetComputationalDag().Children(node)) { + if (schedule.AssignedSuperstep(succ) > endSuperstep_) { + procsNeedingThis.insert(schedule.AssignedProcessor(succ)); } } - for (unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1) { - for (unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2) { - auto itr = schedule.getCommunicationSchedule().find(std::make_tuple(node, proc1, proc2)); - if (itr != schedule.getCommunicationSchedule().end() && proc1 != proc2 && itr->second > end_superstep) { - procs_needing_this.insert(schedule.assignedProcessor(proc1)); + for (unsigned proc1 = 0; proc1 < schedule.GetInstance().NumberOfProcessors(); ++proc1) { + for (unsigned proc2 = 0; proc2 < schedule.GetInstance().NumberOfProcessors(); ++proc2) { + auto itr = schedule.GetCommunicationSchedule().find(std::make_tuple(node, proc1, proc2)); + if (itr != schedule.GetCommunicationSchedule().end() && proc1 != proc2 && itr->second > endSuperstep_) { + procsNeedingThis.insert(schedule.AssignedProcessor(proc1)); } } } - for (unsigned proc : procs_needing_this) { - if (first_at[node][proc] <= end_superstep + 1) { - node_needed_after_on_proc.emplace_back(node_and_ID.second, proc); + for (unsigned proc : procsNeedingThis) { + if (firstAt[node][proc] <= endSuperstep_ + 1) { + nodeNeededAfterOnProc_.emplace_back(nodeAndId.second, proc); } } } // comm steps that just happen to be in this interval, but not connected to the nodes within - has_fixed_comm_in_preceding_step = false; - for (const auto &[key, val] : schedule.getCommunicationSchedule()) { - vertex_idx_t source = std::get<0>(key); - if (source_local_ID.find(source) == source_local_ID.end() && schedule.assignedSuperstep(source) < start_superstep - && val >= start_superstep - 1 && val <= end_superstep) { - fixed_comm_steps.emplace_back(std::get<0>(key), std::get<1>(key), std::get<2>(key), val); - if (val == start_superstep - 1) { - has_fixed_comm_in_preceding_step = true; + hasFixedCommInPrecedingStep_ = false; + for (const auto &[key, val] : schedule.GetCommunicationSchedule()) { + VertexIdxT source = std::get<0>(key); + if (sourceLocalId_.find(source) == sourceLocalId_.end() && schedule.AssignedSuperstep(source) < startSuperstep_ + && val >= startSuperstep_ - 1 && val <= endSuperstep_) { + fixedCommSteps_.emplace_back(std::get<0>(key), std::get<1>(key), std::get<2>(key), val); + if (val == startSuperstep_ - 1) { + hasFixedCommInPrecedingStep_ = true; } } } -}; +} } // namespace osp diff --git a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp index 150c6f73..fb9531ee 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp @@ -22,336 +22,334 @@ limitations under the License. #include #include "osp/auxiliary/io/DotFileWriter.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/graph_algorithms/directed_graph_edge_view.hpp" namespace osp { -template -class TotalCommunicationScheduler : public Scheduler { +template +class TotalCommunicationScheduler : public Scheduler { private: - Envr env; - Model model; + Envr env_; + Model model_; - bool use_memory_constraint; - bool ignore_workload_balance; + bool useMemoryConstraint_; + bool ignoreWorkloadBalance_; - bool use_initial_schedule; - const BspSchedule *initial_schedule; + bool useInitialSchedule_; + const BspSchedule *initialSchedule_; - bool write_solutions_found; - bool use_lk_heuristic_callback; + bool writeSolutionsFound_; + bool useLkHeuristicCallback_; class WriteSolutionCallback : public CallbackBase { private: - unsigned counter; - unsigned max_number_solution; + unsigned counter_; + unsigned maxNumberSolution_; - double best_obj; + double bestObj_; public: WriteSolutionCallback() - : counter(0), - max_number_solution(100), - best_obj(COPT_INFINITY), - write_solutions_path_cb(""), - solution_file_prefix_cb(""), - instance_ptr(0), - node_to_processor_superstep_var_ptr() {} + : counter_(0), + maxNumberSolution_(100), + bestObj_(COPT_INFINITY), + writeSolutionsPathCb_(""), + solutionFilePrefixCb_(""), + instancePtr_(0), + nodeToProcessorSuperstepVarPtr_() {} - std::string write_solutions_path_cb; - std::string solution_file_prefix_cb; - const BspInstance *instance_ptr; + std::string writeSolutionsPathCb_; + std::string solutionFilePrefixCb_; + const BspInstance *instancePtr_; - std::vector> *node_to_processor_superstep_var_ptr; + std::vector> *nodeToProcessorSuperstepVarPtr_; void callback() override { - if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { + if (Where() == COPT_CBCONTEXT_MIPSOL && counter_ < maxNumberSolution_ && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { try { - if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { - best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); - - auto sched = constructBspScheduleFromCallback(); - DotFileWriter sched_writer; - sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" - + std::to_string(counter) + "_schedule.dot", - sched); - counter++; + if (GetDblInfo(COPT_CBINFO_BESTOBJ) < bestObj_ && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { + bestObj_ = GetDblInfo(COPT_CBINFO_BESTOBJ); + + auto sched = ConstructBspScheduleFromCallback(); + DotFileWriter schedWriter; + schedWriter.WriteSchedule(writeSolutionsPathCb_ + "intmed_sol_" + solutionFilePrefixCb_ + "_" + + std::to_string(counter_) + "_schedule.dot", + sched); + counter_++; } } catch (const std::exception &e) {} } } - BspSchedule constructBspScheduleFromCallback() { - BspSchedule schedule(*instance_ptr); - - for (const auto &node : instance_ptr->vertices()) { - for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); - step++) { - assert(size < std::numeric_limits::max()); - if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= .99) { - schedule.setAssignedProcessor(node, processor); - schedule.setAssignedSuperstep(node, step); - } - } - } - } - - return schedule; - } - }; - - class LKHeuristicCallback : public CallbackBase { - private: - kl_total_comm lk_heuristic; - - double best_obj; - - public: - LKHeuristicCallback() - : lk_heuristic(), - best_obj(COPT_INFINITY), - num_step(0), - instance_ptr(0), - max_work_superstep_var_ptr(0), - superstep_used_var_ptr(0), - node_to_processor_superstep_var_ptr(0), - edge_vars_ptr(0) {} - - unsigned num_step; - const BspInstance *instance_ptr; - - VarArray *max_work_superstep_var_ptr; - VarArray *superstep_used_var_ptr; - std::vector> *node_to_processor_superstep_var_ptr; - std::vector> *edge_vars_ptr; - - void callback() override { - if (Where() == COPT_CBCONTEXT_MIPSOL && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { - try { - if (0.0 < GetDblInfo(COPT_CBINFO_BESTBND) && 1.0 < GetDblInfo(COPT_CBINFO_BESTOBJ) && - // GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && - 0.1 < (GetDblInfo(COPT_CBINFO_BESTOBJ) - GetDblInfo(COPT_CBINFO_BESTBND)) - / GetDblInfo(COPT_CBINFO_BESTOBJ)) { - // best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); - - auto sched = constructBspScheduleFromCallback(); + BspSchedule ConstructBspScheduleFromCallback() { + BspSchedule schedule(*instancePtr_); - if (sched.numberOfSupersteps() > 2) { - auto status = lk_heuristic.improveSchedule(sched); - - if (status == RETURN_STATUS::OSP_SUCCESS) { - feedImprovedSchedule(sched); - } - } - } - - } catch (const std::exception &e) {} - } - } - - BspSchedule constructBspScheduleFromCallback() { - BspSchedule schedule(*instance_ptr); - - for (const auto &node : instance_ptr->vertices()) { - for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); - step++) { - assert(step <= std::numeric_limits::max()); - if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= .99) { - schedule.setAssignedProcessor(node, processor); - schedule.setAssignedSuperstep(node, step); + for (const auto &node : instancePtr_->Vertices()) { + for (unsigned processor = 0; processor < instancePtr_->NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < static_cast((*nodeToProcessorSuperstepVarPtr_)[0][0].Size()); step++) { + assert(step < std::numeric_limits::max()); + if (GetSolution((*nodeToProcessorSuperstepVarPtr_)[node][processor][static_cast(step)]) >= .99) { + schedule.SetAssignedProcessor(node, processor); + schedule.SetAssignedSuperstep(node, step); } } } } return schedule; - }; - - void feedImprovedSchedule(const BspSchedule &schedule) { - for (unsigned step = 0; step < num_step; step++) { - if (step < schedule.numberOfSupersteps()) { - assert(step <= std::numeric_limits::max()); - SetSolution((*superstep_used_var_ptr)[static_cast(step)], 1.0); - } else { - assert(step <= std::numeric_limits::max()); - SetSolution((*superstep_used_var_ptr)[static_cast(step)], 0.0); - } - } - - for (const auto &node : instance_ptr->vertices()) { - for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); - step++) { - if (schedule.assignedProcessor(node) == processor && schedule.assignedSuperstep(node) == step) { - assert(step <= std::numeric_limits::max()); - SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)], 1.0); - } else { - assert(step <= std::numeric_limits::max()); - SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)], 0.0); - } - } - } - } - - std::vector>> work( - num_step, std::vector>(instance_ptr->numberOfProcessors(), 0)); - - for (const auto &node : instance_ptr->vertices()) { - work[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] - += instance_ptr->getComputationalDag().vertex_work_weight(node); - } - - for (unsigned step = 0; step < num_step; step++) { - v_workw_t max_work = 0; - for (unsigned proc = 0; proc < instance_ptr->numberOfProcessors(); proc++) { - if (max_work < work[step][proc]) { - max_work = work[step][proc]; - } - } - - assert(step <= std::numeric_limits::max()); - SetSolution((*max_work_superstep_var_ptr)[static_cast(step)], max_work); - } - - if (instance_ptr->getArchitecture().isNumaArchitecture()) { - for (unsigned p1 = 0; p1 < instance_ptr->numberOfProcessors(); p1++) { - for (unsigned p2 = 0; p2 < instance_ptr->numberOfProcessors(); p2++) { - if (p1 != p2) { - int edge_id = 0; - for (const auto &ep : edge_view(instance_ptr->getComputationalDag())) { - if (schedule.assignedProcessor(ep.source) == p1 && schedule.assignedProcessor(ep.target) == p2) { - SetSolution((*edge_vars_ptr)[p1][p2][edge_id], 1.0); - } else { - SetSolution((*edge_vars_ptr)[p1][p2][edge_id], 0.0); - } - - edge_id++; - } - } - } - } - - } else { - int edge_id = 0; - for (const auto &ep : edge_view(instance_ptr->getComputationalDag())) { - if (schedule.assignedProcessor(ep.source) != schedule.assignedProcessor(ep.target)) { - SetSolution((*edge_vars_ptr)[0][0][edge_id], 1.0); - } else { - SetSolution((*edge_vars_ptr)[0][0][edge_id], 0.0); - } - - edge_id++; - } - } - - LoadSolution(); } }; - WriteSolutionCallback solution_callback; - LKHeuristicCallback heuristic_callback; + // class LKHeuristicCallback : public CallbackBase { + // private: + // kl_total_comm lkHeuristic_; + + // double bestObj_; + + // public: + // LKHeuristicCallback() + // : lk_heuristic(), + // bestObj_(COPT_INFINITY), + // numStep_(0), + // instancePtr_(0), + // maxWorkSuperstepVarPtr_(0), + // superstepUsedVarPtr_(0), + // nodeToProcessorSuperstepVarPtr_(0), + // edgeVarsPtr_(0) {} + + // unsigned numStep_; + // const BspInstance *instancePtr_; + + // VarArray *maxWorkSuperstepVarPtr_; + // VarArray *superstepUsedVarPtr_; + // std::vector> *nodeToProcessorSuperstepVarPtr_; + // std::vector> *edgeVarsPtr_; + + // void Callback() override { + // if (Where() == COPT_CBCONTEXT_MIPSOL && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { + // try { + // if (0.0 < GetDblInfo(COPT_CBINFO_BESTBND) && 1.0 < GetDblInfo(COPT_CBINFO_BESTOBJ) && + // // GetDblInfo(COPT_CBINFO_BESTOBJ) < bestObj_ && + // 0.1 < (GetDblInfo(COPT_CBINFO_BESTOBJ) - GetDblInfo(COPT_CBINFO_BESTBND)) + // / GetDblInfo(COPT_CBINFO_BESTOBJ)) { + // // bestObj_ = GetDblInfo(COPT_CBINFO_BESTOBJ); + + // auto sched = ConstructBspScheduleFromCallback(); + + // if (sched.NumberOfSupersteps() > 2) { + // auto status = lk_heuristic.ImproveSchedule(sched); + + // if (status == ReturnStatus::OSP_SUCCESS) { + // FeedImprovedSchedule(sched); + // } + // } + // } + + // } catch (const std::exception &e) {} + // } + // } + + // BspSchedule ConstructBspScheduleFromCallback() { + // BspSchedule schedule(*instancePtr_); + + // for (const auto &node : instancePtr_->Vertices()) { + // for (unsigned processor = 0; processor < instancePtr_->NumberOfProcessors(); processor++) { + // for (unsigned step = 0; step < static_cast((*nodeToProcessorSuperstepVarPtr_)[0][0].Size()); + // step++) { + // assert(step <= std::numeric_limits::max()); + // if (GetSolution((*nodeToProcessorSuperstepVarPtr_)[node][processor][static_cast(step)]) >= .99) { + // schedule.SetAssignedProcessor(node, processor); + // schedule.SetAssignedSuperstep(node, step); + // } + // } + // } + // } + + // return schedule; + // }; + + // void FeedImprovedSchedule(const BspSchedule &schedule) { + // for (unsigned step = 0; step < numStep_; step++) { + // if (step < schedule.NumberOfSupersteps()) { + // assert(step <= std::numeric_limits::max()); + // SetSolution((*superstepUsedVarPtr_)[static_cast(step)], 1.0); + // } else { + // assert(step <= std::numeric_limits::max()); + // SetSolution((*superstepUsedVarPtr_)[static_cast(step)], 0.0); + // } + // } + + // for (const auto &node : instancePtr_->Vertices()) { + // for (unsigned processor = 0; processor < instancePtr_->NumberOfProcessors(); processor++) { + // for (unsigned step = 0; step < static_cast((*nodeToProcessorSuperstepVarPtr_)[0][0].Size()); + // step++) { + // if (schedule.AssignedProcessor(node) == processor && schedule.AssignedSuperstep(node) == step) { + // assert(step <= std::numeric_limits::max()); + // SetSolution((*nodeToProcessorSuperstepVarPtr_)[node][processor][static_cast(step)], 1.0); + // } else { + // assert(step <= std::numeric_limits::max()); + // SetSolution((*nodeToProcessorSuperstepVarPtr_)[node][processor][static_cast(step)], 0.0); + // } + // } + // } + // } + + // std::vector>> work(num_step, + // std::vector>(instance_ptr->NumberOfProcessors(), 0)); + + // for (const auto &node : instancePtr_->Vertices()) { + // work[schedule.AssignedSuperstep(node)][schedule.AssignedProcessor(node)] + // += instancePtr_->GetComputationalDag().VertexWorkWeight(node); + // } + + // for (unsigned step = 0; step < numStep_; step++) { + // VWorkwT maxWork = 0; + // for (unsigned proc = 0; proc < instancePtr_->NumberOfProcessors(); proc++) { + // if (maxWork < work[step][proc]) { + // maxWork = work[step][proc]; + // } + // } + + // assert(step <= std::numeric_limits::max()); + // SetSolution((*maxWorkSuperstepVarPtr_)[static_cast(step)], maxWork); + // } + + // if (instancePtr_->GetArchitecture().IsNumaArchitecture()) { + // for (unsigned p1 = 0; p1 < instancePtr_->NumberOfProcessors(); p1++) { + // for (unsigned p2 = 0; p2 < instancePtr_->NumberOfProcessors(); p2++) { + // if (p1 != p2) { + // int edgeId = 0; + // for (const auto &ep : EdgeView(instancePtr_->GetComputationalDag())) { + // if (schedule.AssignedProcessor(ep.source) == p1 && schedule.AssignedProcessor(ep.target) == p2) { + // SetSolution((*edgeVarsPtr_)[p1][p2][edgeId], 1.0); + // } else { + // SetSolution((*edgeVarsPtr_)[p1][p2][edgeId], 0.0); + // } + + // edgeId++; + // } + // } + // } + // } + + // } else { + // int edgeId = 0; + // for (const auto &ep : EdgeView(instancePtr_->GetComputationalDag())) { + // if (schedule.AssignedProcessor(ep.source) != schedule.AssignedProcessor(ep.target)) { + // SetSolution((*edgeVarsPtr_)[0][0][edgeId], 1.0); + // } else { + // SetSolution((*edgeVarsPtr_)[0][0][edgeId], 0.0); + // } + + // edgeId++; + // } + // } + + // LoadSolution(); + // } + // }; + + WriteSolutionCallback solutionCallback_; + // LKHeuristicCallback heuristicCallback_; protected: - unsigned int max_number_supersteps; + unsigned int maxNumberSupersteps_; - unsigned time_limit_seconds; + unsigned timeLimitSeconds_; - VarArray superstep_used_var; - std::vector> node_to_processor_superstep_var; - std::vector> edge_vars; - VarArray max_work_superstep_var; + VarArray superstepUsedVar_; + std::vector> nodeToProcessorSuperstepVar_; + std::vector> edgeVars_; + VarArray maxWorkSuperstepVar_; - void constructBspScheduleFromSolution(BspSchedule &schedule, bool cleanup_ = false) { - const auto &instance = schedule.getInstance(); + void ConstructBspScheduleFromSolution(BspSchedule &schedule, bool cleanup = false) { + const auto &instance = schedule.GetInstance(); - for (const auto &node : instance.vertices()) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (node_to_processor_superstep_var[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) { - schedule.setAssignedProcessor(node, processor); - schedule.setAssignedSuperstep(node, step); + for (const auto &node : instance.Vertices()) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (nodeToProcessorSuperstepVar_[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) { + schedule.SetAssignedProcessor(node, processor); + schedule.SetAssignedSuperstep(node, step); } } } } - if (cleanup_) { - node_to_processor_superstep_var.clear(); + if (cleanup) { + nodeToProcessorSuperstepVar_.clear(); } } - void loadInitialSchedule() { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (step < initial_schedule->numberOfSupersteps()) { + void LoadInitialSchedule() { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (step < initialSchedule_->NumberOfSupersteps()) { assert(step <= std::numeric_limits::max()); - model.SetMipStart(superstep_used_var[static_cast(step)], 1); + model_.SetMipStart(superstepUsedVar_[static_cast(step)], 1); } else { assert(step <= std::numeric_limits::max()); - model.SetMipStart(superstep_used_var[static_cast(step)], 0); + model_.SetMipStart(superstepUsedVar_[static_cast(step)], 0); } } - for (const auto &node : initial_schedule->getInstance().vertices()) { - for (unsigned proc = 0; proc < initial_schedule->getInstance().numberOfProcessors(); proc++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (proc == initial_schedule->assignedProcessor(node) && step == initial_schedule->assignedSuperstep(node)) { + for (const auto &node : initialSchedule_->GetInstance().Vertices()) { + for (unsigned proc = 0; proc < initialSchedule_->GetInstance().NumberOfProcessors(); proc++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + if (proc == initialSchedule_->AssignedProcessor(node) && step == initialSchedule_->AssignedSuperstep(node)) { assert(step <= std::numeric_limits::max()); - model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast(step)], 1); + model_.SetMipStart(nodeToProcessorSuperstepVar_[node][proc][static_cast(step)], 1); } else { assert(step <= std::numeric_limits::max()); - model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast(step)], 0); + model_.SetMipStart(nodeToProcessorSuperstepVar_[node][proc][static_cast(step)], 0); } } } } - std::vector>> work( - max_number_supersteps, std::vector>(initial_schedule->getInstance().numberOfProcessors(), 0)); + std::vector>> work( + maxNumberSupersteps_, std::vector>(initialSchedule_->GetInstance().NumberOfProcessors(), 0)); - for (const auto &node : initial_schedule->getInstance().vertices()) { - work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] - += initial_schedule->getInstance().getComputationalDag().vertex_work_weight(node); + for (const auto &node : initialSchedule_->GetInstance().Vertices()) { + work[initialSchedule_->AssignedSuperstep(node)][initialSchedule_->AssignedProcessor(node)] + += initialSchedule_->GetInstance().GetComputationalDag().VertexWorkWeight(node); } - for (unsigned step = 0; step < max_number_supersteps; step++) { - v_workw_t max_work = 0; - for (unsigned i = 0; i < initial_schedule->getInstance().numberOfProcessors(); i++) { - if (max_work < work[step][i]) { - max_work = work[step][i]; + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + VWorkwT maxWork = 0; + for (unsigned i = 0; i < initialSchedule_->GetInstance().NumberOfProcessors(); i++) { + if (maxWork < work[step][i]) { + maxWork = work[step][i]; } } assert(step <= std::numeric_limits::max()); - model.SetMipStart(max_work_superstep_var[static_cast(step)], max_work); + model_.SetMipStart(maxWorkSuperstepVar_[static_cast(step)], maxWork); } - model.LoadMipStart(); - model.SetIntParam(COPT_INTPARAM_MIPSTARTMODE, 2); + model_.LoadMipStart(); + model_.SetIntParam(COPT_INTPARAM_MIPSTARTMODE, 2); } - void setupVariablesConstraintsObjective(const BspInstance &instance) { + void SetupVariablesConstraintsObjective(const BspInstance &instance) { /* Variables */ // variables indicating if superstep is used at all - superstep_used_var = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_used"); + superstepUsedVar_ = model_.AddVars(static_cast(maxNumberSupersteps_), COPT_BINARY, "superstep_used"); - node_to_processor_superstep_var = std::vector>( - instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); - assert(max_number_supersteps <= std::numeric_limits::max()); + nodeToProcessorSuperstepVar_ = std::vector>(instance.NumberOfVertices(), + std::vector(instance.NumberOfProcessors())); + assert(maxNumberSupersteps_ <= std::numeric_limits::max()); // variables for assigments of nodes to processor and superstep - for (const auto &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - node_to_processor_superstep_var[node][processor] - = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); + for (const auto &node : instance.Vertices()) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + nodeToProcessorSuperstepVar_[node][processor] + = model_.AddVars(static_cast(maxNumberSupersteps_), COPT_BINARY, "node_to_processor_superstep"); } } @@ -362,165 +360,171 @@ class TotalCommunicationScheduler : public Scheduler { /* Constraints */ - if (use_memory_constraint) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { + if (useMemoryConstraint_) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { Expr expr; - for (unsigned int node = 0; node < instance.numberOfVertices(); node++) { - expr += node_to_processor_superstep_var[node][processor][static_cast(step)] - * instance.getComputationalDag().vertex_mem_weight(node); + for (unsigned int node = 0; node < instance.NumberOfVertices(); node++) { + expr += nodeToProcessorSuperstepVar_[node][processor][static_cast(step)] + * instance.GetComputationalDag().VertexMemWeight(node); } - model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor)); + model_.AddConstr(expr <= instance.GetArchitecture().MemoryBound(processor)); } } } // use consecutive supersteps starting from 0 - model.AddConstr(superstep_used_var[0] == 1); + model_.AddConstr(superstepUsedVar_[0] == 1); - for (unsigned int step = 0; step < max_number_supersteps - 1; step++) { - model.AddConstr(superstep_used_var[static_cast(step)] >= superstep_used_var[static_cast(step + 1)]); + for (unsigned int step = 0; step < maxNumberSupersteps_ - 1; step++) { + model_.AddConstr(superstepUsedVar_[static_cast(step)] >= superstepUsedVar_[static_cast(step + 1)]); } // superstep is used at all - for (unsigned int step = 0; step < max_number_supersteps; step++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { Expr expr; - for (const auto &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - expr += node_to_processor_superstep_var[node][processor][static_cast(step)]; + for (const auto &node : instance.Vertices()) { + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + expr += nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; } } - model.AddConstr(expr <= static_cast(instance.numberOfVertices() * instance.numberOfProcessors()) - * superstep_used_var.GetVar(static_cast(step))); + model_.AddConstr(expr <= static_cast(instance.NumberOfVertices() * instance.NumberOfProcessors()) + * superstepUsedVar_.GetVar(static_cast(step))); } // nodes are assigend depending on whether recomputation is allowed or not - for (const auto &node : instance.vertices()) { + for (const auto &node : instance.Vertices()) { Expr expr; - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - assert(max_number_supersteps <= std::numeric_limits::max()); - for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += node_to_processor_superstep_var[node][processor].GetVar(static_cast(step)); + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + assert(maxNumberSupersteps_ <= std::numeric_limits::max()); + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { + expr += nodeToProcessorSuperstepVar_[node][processor].GetVar(static_cast(step)); } } - model.AddConstr(expr == 1); + model_.AddConstr(expr == 1); // model.AddConstr(instance.allowRecomputation() ? expr >= .99 : expr == 1); } - for (const auto &node : instance.vertices()) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - assert(max_number_supersteps <= std::numeric_limits::max()); - for (unsigned step = 0; step < max_number_supersteps; step++) { - for (const auto &source : instance.getComputationalDag().parents(node)) { + for (const auto &node : instance.Vertices()) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + assert(maxNumberSupersteps_ <= std::numeric_limits::max()); + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + for (const auto &source : instance.GetComputationalDag().Parents(node)) { Expr expr1; - for (unsigned p2 = 0; p2 < instance.numberOfProcessors(); p2++) { - for (unsigned step_prime = 0; step_prime < step; step_prime++) { - expr1 += node_to_processor_superstep_var[source][p2][static_cast(step_prime)]; + for (unsigned p2 = 0; p2 < instance.NumberOfProcessors(); p2++) { + for (unsigned stepPrime = 0; stepPrime < step; stepPrime++) { + expr1 += nodeToProcessorSuperstepVar_[source][p2][static_cast(stepPrime)]; } } - expr1 += node_to_processor_superstep_var[source][processor][static_cast(step)]; + expr1 += nodeToProcessorSuperstepVar_[source][processor][static_cast(step)]; - model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast(step)] <= expr1); + model_.AddConstr(nodeToProcessorSuperstepVar_[node][processor][static_cast(step)] <= expr1); } } } } - Expr total_edges_cut; + Expr totalEdgesCut; - if (instance.getArchitecture().isNumaArchitecture()) { - edge_vars = std::vector>(instance.numberOfProcessors(), - std::vector(instance.numberOfProcessors())); + if (instance.GetArchitecture().IsNumaArchitecture()) { + edgeVars_ = std::vector>(instance.NumberOfProcessors(), + std::vector(instance.NumberOfProcessors())); - for (unsigned int p1 = 0; p1 < instance.numberOfProcessors(); p1++) { - for (unsigned int p2 = 0; p2 < instance.numberOfProcessors(); p2++) { + for (unsigned int p1 = 0; p1 < instance.NumberOfProcessors(); p1++) { + for (unsigned int p2 = 0; p2 < instance.NumberOfProcessors(); p2++) { if (p1 != p2) { - assert(instance.getComputationalDag().num_edges() <= std::numeric_limits::max()); - edge_vars[p1][p2] - = model.AddVars(static_cast(instance.getComputationalDag().num_edges()), COPT_BINARY, "edge"); + assert(instance.GetComputationalDag().NumEdges() <= std::numeric_limits::max()); + edgeVars_[p1][p2] + = model_.AddVars(static_cast(instance.GetComputationalDag().NumEdges()), COPT_BINARY, "edge"); - int edge_id = 0; - for (const auto &ep : edge_view(instance.getComputationalDag())) { + int edgeId = 0; + for (const auto &ep : EdgeView(instance.GetComputationalDag())) { Expr expr1, expr2; - assert(max_number_supersteps <= std::numeric_limits::max()); - for (unsigned step = 0; step < max_number_supersteps; step++) { - expr1 += node_to_processor_superstep_var[ep.source][p1][static_cast(step)]; - expr2 += node_to_processor_superstep_var[ep.target][p2][static_cast(step)]; + assert(maxNumberSupersteps_ <= std::numeric_limits::max()); + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + expr1 += nodeToProcessorSuperstepVar_[Source(ep, instance.GetComputationalDag())][p1] + [static_cast(step)]; + expr2 += nodeToProcessorSuperstepVar_[Target(ep, instance.GetComputationalDag())][p2] + [static_cast(step)]; } - model.AddConstr(edge_vars[p1][p2][edge_id] >= expr1 + expr2 - 1.001); + model_.AddConstr(edgeVars_[p1][p2][edgeId] >= expr1 + expr2 - 1.001); - total_edges_cut += edge_vars[p1][p2][edge_id] - * instance.getComputationalDag().vertex_comm_weight(ep.source) - * instance.sendCosts(p1, p2); + totalEdgesCut + += edgeVars_[p1][p2][edgeId] + * instance.GetComputationalDag().VertexCommWeight(Source(ep, instance.GetComputationalDag())) + * instance.SendCosts(p1, p2); - edge_id++; + edgeId++; } } } } } else { - edge_vars = std::vector>(1, std::vector(1)); - assert(instance.getComputationalDag().num_edges() <= std::numeric_limits::max()); - edge_vars[0][0] = model.AddVars(static_cast(instance.getComputationalDag().num_edges()), COPT_BINARY, "edge"); + edgeVars_ = std::vector>(1, std::vector(1)); + assert(instance.GetComputationalDag().NumEdges() <= std::numeric_limits::max()); + edgeVars_[0][0] = model_.AddVars(static_cast(instance.GetComputationalDag().NumEdges()), COPT_BINARY, "edge"); - int edge_id = 0; - for (const auto &ep : edge_view(instance.getComputationalDag())) { - for (unsigned p1 = 0; p1 < instance.numberOfProcessors(); p1++) { + int edgeId = 0; + for (const auto &ep : EdgeView(instance.GetComputationalDag())) { + for (unsigned p1 = 0; p1 < instance.NumberOfProcessors(); p1++) { Expr expr1, expr2; - for (unsigned step = 0; step < max_number_supersteps; step++) { - expr1 += node_to_processor_superstep_var[ep.source][p1][static_cast(step)]; + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + expr1 + += nodeToProcessorSuperstepVar_[Source(ep, instance.GetComputationalDag())][p1][static_cast(step)]; } - for (unsigned p2 = 0; p2 < instance.numberOfProcessors(); p2++) { + for (unsigned p2 = 0; p2 < instance.NumberOfProcessors(); p2++) { if (p1 != p2) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - expr2 += node_to_processor_superstep_var[ep.target][p2][static_cast(step)]; + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { + expr2 += nodeToProcessorSuperstepVar_[Target(ep, instance.GetComputationalDag())][p2] + [static_cast(step)]; } } } - model.AddConstr(edge_vars[0][0][edge_id] >= expr1 + expr2 - 1.001); + model_.AddConstr(edgeVars_[0][0][edgeId] >= expr1 + expr2 - 1.001); } - total_edges_cut += instance.getComputationalDag().vertex_comm_weight(ep.source) * edge_vars[0][0][edge_id]; + totalEdgesCut += instance.GetComputationalDag().VertexCommWeight(Source(ep, instance.GetComputationalDag())) + * edgeVars_[0][0][edgeId]; - edge_id++; + edgeId++; } } Expr expr; - if (ignore_workload_balance) { - for (unsigned step = 0; step < max_number_supersteps; step++) { + if (ignoreWorkloadBalance_) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { assert(step <= std::numeric_limits::max()); - expr += instance.synchronisationCosts() * superstep_used_var[static_cast(step)]; + expr += instance.SynchronisationCosts() * superstepUsedVar_[static_cast(step)]; } } else { - assert(max_number_supersteps <= std::numeric_limits::max()); - max_work_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_CONTINUOUS, "max_work_superstep"); - // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_work_superstep"); + assert(maxNumberSupersteps_ <= std::numeric_limits::max()); + maxWorkSuperstepVar_ = model_.AddVars(static_cast(maxNumberSupersteps_), COPT_CONTINUOUS, "max_work_superstep"); + // coptModel.AddVars(maxNumberSupersteps_, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_work_superstep"); - for (unsigned int step = 0; step < max_number_supersteps; step++) { + for (unsigned int step = 0; step < maxNumberSupersteps_; step++) { assert(step <= std::numeric_limits::max()); - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr_work; - for (const auto &node : instance.vertices()) { - expr_work += instance.getComputationalDag().vertex_work_weight(node) - * node_to_processor_superstep_var[node][processor][static_cast(step)]; + for (unsigned int processor = 0; processor < instance.NumberOfProcessors(); processor++) { + Expr exprWork; + for (const auto &node : instance.Vertices()) { + exprWork += instance.GetComputationalDag().VertexWorkWeight(node) + * nodeToProcessorSuperstepVar_[node][processor][static_cast(step)]; } - model.AddConstr(max_work_superstep_var[static_cast(step)] >= expr_work); + model_.AddConstr(maxWorkSuperstepVar_[static_cast(step)] >= exprWork); } } - for (unsigned step = 0; step < max_number_supersteps; step++) { + for (unsigned step = 0; step < maxNumberSupersteps_; step++) { assert(step <= std::numeric_limits::max()); - expr += max_work_superstep_var[static_cast(step)] - + instance.synchronisationCosts() * superstep_used_var[static_cast(step)]; + expr += maxWorkSuperstepVar_[static_cast(step)] + + instance.SynchronisationCosts() * superstepUsedVar_[static_cast(step)]; } } @@ -528,58 +532,56 @@ class TotalCommunicationScheduler : public Scheduler { Objective function */ - double comm_cost = static_cast(instance.communicationCosts()) / instance.numberOfProcessors(); - model.SetObjective(comm_cost * total_edges_cut + expr - instance.synchronisationCosts(), COPT_MINIMIZE); + double commCost = static_cast(instance.CommunicationCosts()) / instance.NumberOfProcessors(); + model_.SetObjective(commCost * totalEdgesCut + expr - instance.SynchronisationCosts(), COPT_MINIMIZE); } public: TotalCommunicationScheduler(unsigned steps = 5) - : Scheduler(), - env(), - model(env.CreateModel("TotalCommScheduler")), - use_memory_constraint(false), - ignore_workload_balance(false), - use_initial_schedule(false), - initial_schedule(0), - write_solutions_found(false), - use_lk_heuristic_callback(true), - solution_callback(), - heuristic_callback(), - max_number_supersteps(steps) { - heuristic_callback.max_work_superstep_var_ptr = &max_work_superstep_var; - heuristic_callback.superstep_used_var_ptr = &superstep_used_var; - heuristic_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; - heuristic_callback.edge_vars_ptr = &edge_vars; - - solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; + : Scheduler(), + env_(), + model_(env_.CreateModel("TotalCommScheduler")), + useMemoryConstraint_(false), + ignoreWorkloadBalance_(false), + useInitialSchedule_(false), + initialSchedule_(0), + writeSolutionsFound_(false), + useLkHeuristicCallback_(true), + solutionCallback_(), + maxNumberSupersteps_(steps) { + // heuristicCallback_.maxWorkSuperstepVarPtr_ = &maxWorkSuperstepVar_; + // heuristicCallback_.superstepUsedVarPtr_ = &superstepUsedVar_; + // heuristicCallback_.nodeToProcessorSuperstepVarPtr_ = &nodeToProcessorSuperstepVar_; + // heuristicCallback_.edgeVarsPtr_ = &edgeVars_; + + solutionCallback_.nodeToProcessorSuperstepVarPtr_ = &nodeToProcessorSuperstepVar_; } - TotalCommunicationScheduler(const BspSchedule &schedule) - : Scheduler(), - env(), - model(env.CreateModel("TotalCommScheduler")), - use_memory_constraint(false), - ignore_workload_balance(false), - use_initial_schedule(true), - initial_schedule(&schedule), - write_solutions_found(false), - use_lk_heuristic_callback(true), - solution_callback(), - heuristic_callback(), - max_number_supersteps(schedule.numberOfSupersteps()) { - heuristic_callback.max_work_superstep_var_ptr = &max_work_superstep_var; - heuristic_callback.superstep_used_var_ptr = &superstep_used_var; - heuristic_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; - heuristic_callback.edge_vars_ptr = &edge_vars; - - solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; + TotalCommunicationScheduler(const BspSchedule &schedule) + : Scheduler(), + env_(), + model_(env_.CreateModel("TotalCommScheduler")), + useMemoryConstraint_(false), + ignoreWorkloadBalance_(false), + useInitialSchedule_(true), + initialSchedule_(&schedule), + writeSolutionsFound_(false), + useLkHeuristicCallback_(true), + solutionCallback_(), + maxNumberSupersteps_(schedule.NumberOfSupersteps()) { + // heuristicCallback_.maxWorkSuperstepVarPtr_ = &maxWorkSuperstepVar_; + // heuristicCallback_.superstepUsedVarPtr_ = &superstepUsedVar_; + // heuristicCallback_.nodeToProcessorSuperstepVarPtr_ = &nodeToProcessorSuperstepVar_; + // heuristicCallback_.edgeVarsPtr_ = &edgeVars_; + + solutionCallback_.nodeToProcessorSuperstepVarPtr_ = &nodeToProcessorSuperstepVar_; } virtual ~TotalCommunicationScheduler() = default; - virtual RETURN_STATUS computeScheduleWithTimeLimit(BspSchedule &schedule, unsigned timeout) { - model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeout); - return computeSchedule(schedule); + virtual ReturnStatus ComputeScheduleWithTimeLimit(BspSchedule &schedule, unsigned timeout) { + model_.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeout); + return ComputeSchedule(schedule); } /** @@ -592,59 +594,60 @@ class TotalCommunicationScheduler : public Scheduler { * @throws std::invalid_argument if the instance parameters do not * agree with those of the initial schedule's instance */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - auto &instance = schedule.getInstance(); + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + auto &instance = schedule.GetInstance(); - assert(!ignore_workload_balance || !use_lk_heuristic_callback); + assert(!ignoreWorkloadBalance_ || !useLkHeuristicCallback_); - if (use_initial_schedule - && (max_number_supersteps < initial_schedule->numberOfSupersteps() - || instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() - || instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) { - throw std::invalid_argument("Invalid Argument while computeSchedule(instance): instance parameters do not " + if (useInitialSchedule_ + && (maxNumberSupersteps_ < initialSchedule_->NumberOfSupersteps() + || instance.NumberOfProcessors() != initialSchedule_->GetInstance().NumberOfProcessors() + || instance.NumberOfVertices() != initialSchedule_->GetInstance().NumberOfVertices())) { + throw std::invalid_argument("Invalid Argument while ComputeSchedule(instance): instance parameters do not " "agree with those of the initial schedule's instance!"); } - setupVariablesConstraintsObjective(instance); + SetupVariablesConstraintsObjective(instance); - if (use_initial_schedule) { - loadInitialSchedule(); + if (useInitialSchedule_) { + LoadInitialSchedule(); } - model.SetIntParam(COPT_INTPARAM_THREADS, 128); - model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); - model.SetIntParam(COPT_INTPARAM_LPMETHOD, 1); - model.SetIntParam(COPT_INTPARAM_ROUNDINGHEURLEVEL, 1); - model.SetIntParam(COPT_INTPARAM_SUBMIPHEURLEVEL, 1); + model_.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds_); + model_.SetIntParam(COPT_INTPARAM_THREADS, 128); + model_.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); + model_.SetIntParam(COPT_INTPARAM_LPMETHOD, 1); + model_.SetIntParam(COPT_INTPARAM_ROUNDINGHEURLEVEL, 1); + model_.SetIntParam(COPT_INTPARAM_SUBMIPHEURLEVEL, 1); // model.SetIntParam(COPT_INTPARAM_PRESOLVE, 1); // model.SetIntParam(COPT_INTPARAM_CUTLEVEL, 0); - model.SetIntParam(COPT_INTPARAM_TREECUTLEVEL, 2); + model_.SetIntParam(COPT_INTPARAM_TREECUTLEVEL, 2); // model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2); - if (write_solutions_found) { - solution_callback.instance_ptr = &instance; - model.SetCallback(&solution_callback, COPT_CBCONTEXT_MIPSOL); - } - if (use_lk_heuristic_callback) { - heuristic_callback.instance_ptr = &instance; - heuristic_callback.num_step = max_number_supersteps; - model.SetCallback(&heuristic_callback, COPT_CBCONTEXT_MIPSOL); + if (writeSolutionsFound_) { + solutionCallback_.instancePtr_ = &instance; + model_.SetCallback(&solutionCallback_, COPT_CBCONTEXT_MIPSOL); } + // if (useLkHeuristicCallback_) { + // heuristicCallback_.instancePtr_ = &instance; + // heuristicCallback_.numStep_ = maxNumberSupersteps_; + // model.SetCallback(&heuristicCallback_, COPT_CBCONTEXT_MIPSOL); + // } - model.Solve(); + model_.Solve(); - if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - return RETURN_STATUS::OSP_SUCCESS; //, constructBspScheduleFromSolution(instance, true)}; + if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { + return ReturnStatus::OSP_SUCCESS; //, constructBspScheduleFromSolution(instance, true)}; - } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + } else if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { + return ReturnStatus::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - return RETURN_STATUS::BEST_FOUND; //, constructBspScheduleFromSolution(instance, true)}; + if (model_.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { + return ReturnStatus::BEST_FOUND; //, constructBspScheduleFromSolution(instance, true)}; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } }; @@ -658,12 +661,12 @@ class TotalCommunicationScheduler : public Scheduler { * * @param schedule The provided schedule. */ - inline void setInitialSolutionFromBspSchedule(const BspSchedule &schedule) { - initial_schedule = &schedule; + inline void SetInitialSolutionFromBspSchedule(const BspSchedule &schedule) { + initialSchedule_ = &schedule; - max_number_supersteps = schedule.numberOfSupersteps(); + maxNumberSupersteps_ = schedule.NumberOfSupersteps(); - use_initial_schedule = true; + useInitialSchedule_ = true; } /** @@ -681,14 +684,14 @@ class TotalCommunicationScheduler : public Scheduler { * supersteps is less than the number of supersteps in * the initial solution. */ - void setMaxNumberOfSupersteps(unsigned max) { - if (use_initial_schedule && max < initial_schedule->numberOfSupersteps()) { + void SetMaxNumberOfSupersteps(unsigned max) { + if (useInitialSchedule_ && max < initialSchedule_->NumberOfSupersteps()) { throw std::invalid_argument("Invalid Argument while setting " "max number of supersteps to a value " "which is less than the number of " "supersteps of the initial schedule!"); } - max_number_supersteps = max; + maxNumberSupersteps_ = max; } /** @@ -702,10 +705,10 @@ class TotalCommunicationScheduler : public Scheduler { * @param path The path where the solutions will be written. * @param file_prefix The prefix that will be used for the solution files. */ - inline void enableWriteIntermediateSol(std::string path, std::string file_prefix) { - write_solutions_found = true; - solution_callback.write_solutions_path_cb = path; - solution_callback.solution_file_prefix_cb = file_prefix; + inline void EnableWriteIntermediateSol(std::string path, std::string filePrefix) { + writeSolutionsFound_ = true; + solutionCallback_.writeSolutionsPathCb_ = path; + solutionCallback_.solutionFilePrefixCb_ = filePrefix; } /** @@ -717,7 +720,7 @@ class TotalCommunicationScheduler : public Scheduler { * * @param use True if the memory constraint should be used, false otherwise. */ - inline void setUseMemoryConstraint(bool use) { use_memory_constraint = use; } + inline void SetUseMemoryConstraint(bool use) { useMemoryConstraint_ = use; } /** * @brief Set the use of workload balance constraint. @@ -729,7 +732,7 @@ class TotalCommunicationScheduler : public Scheduler { * * @param use True if the workload balance constraint should be used, false otherwise. */ - inline void setIgnoreWorkloadBalance(bool use) { ignore_workload_balance = use; } + inline void SetIgnoreWorkloadBalance(bool use) { ignoreWorkloadBalance_ = use; } /** * @brief Set the use of LK heuristic callback. @@ -741,7 +744,7 @@ class TotalCommunicationScheduler : public Scheduler { * * @param use True if the LK heuristic callback should be used, false otherwise. */ - inline void setUseLkHeuristicCallback(bool use) { use_lk_heuristic_callback = use; } + inline void SetUseLkHeuristicCallback(bool use) { useLkHeuristicCallback_ = use; } /** * Disables writing intermediate solutions. @@ -750,49 +753,49 @@ class TotalCommunicationScheduler : public Scheduler { * calling this function, the `enableWriteIntermediateSol` function needs * to be called again in order to enable writing of intermediate solutions. */ - inline void disableWriteIntermediateSol() { write_solutions_found = false; } + inline void DisableWriteIntermediateSol() { writeSolutionsFound_ = false; } /** * @brief Get the maximum number of supersteps. * * @return The maximum number of supersteps. */ - inline unsigned getMaxNumberOfSupersteps() const { return max_number_supersteps; } + inline unsigned GetMaxNumberOfSupersteps() const { return maxNumberSupersteps_; } /** * @brief Get the best gap found by the solver. * * @return The best gap found by the solver. */ - inline double bestGap() { return model.GetDblAttr(COPT_DBLATTR_BESTGAP); } + inline double BestGap() { return model_.GetDblAttr(COPT_DBLATTR_BESTGAP); } /** * @brief Get the best objective value found by the solver. * * @return The best objective value found by the solver. */ - inline double bestObjective() { return model.GetDblAttr(COPT_DBLATTR_BESTOBJ); } + inline double BestObjective() { return model_.GetDblAttr(COPT_DBLATTR_BESTOBJ); } /** * @brief Get the best bound found by the solver. * * @return The best bound found by the solver. */ - inline double bestBound() { return model.GetDblAttr(COPT_DBLATTR_BESTBND); } + inline double BestBound() { return model_.GetDblAttr(COPT_DBLATTR_BESTBND); } /** * @brief Sets the time limit for the ILP solving. * * @param time_limit_seconds_ The time limit in seconds. */ - inline void setTimeLimitSeconds(unsigned time_limit_seconds_) { time_limit_seconds = time_limit_seconds_; } + inline void SetTimeLimitSeconds(unsigned timeLimitSeconds) { timeLimitSeconds_ = timeLimitSeconds; } /** * @brief Get the name of the schedule. * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { return "TotalCommIlp"; } + virtual std::string GetScheduleName() const override { return "TotalCommIlp"; } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/ImprovementScheduler.hpp b/include/osp/bsp/scheduler/ImprovementScheduler.hpp index 05bfcfb4..71937ac2 100644 --- a/include/osp/bsp/scheduler/ImprovementScheduler.hpp +++ b/include/osp/bsp/scheduler/ImprovementScheduler.hpp @@ -28,17 +28,17 @@ namespace osp { * The ImprovementScheduler class provides a common interface for improvement scheduling scheduler. * Subclasses of this class can implement specific improvement scheduler by overriding the virtual methods. */ -template +template class ImprovementScheduler { protected: - unsigned timeLimitSeconds; /**< The time limit in seconds for the improvement algorithm. */ + unsigned timeLimitSeconds_; /**< The time limit in seconds for the improvement algorithm. */ public: /** * @brief Constructor for ImprovementScheduler. * @param timelimit The time limit in seconds for the improvement algorithm. Default is 3600 seconds (1 hour). */ - ImprovementScheduler(unsigned timelimit = 3600) : timeLimitSeconds(timelimit) {} + ImprovementScheduler(unsigned timelimit = 3600) : timeLimitSeconds_(timelimit) {} /** * @brief Destructor for ImprovementScheduler. @@ -49,70 +49,70 @@ class ImprovementScheduler { * @brief Set the time limit in seconds for the improvement algorithm. * @param limit The time limit in seconds. */ - virtual void setTimeLimitSeconds(unsigned int limit) { timeLimitSeconds = limit; } + virtual void SetTimeLimitSeconds(unsigned int limit) { timeLimitSeconds_ = limit; } /** * @brief Set the time limit in hours for the improvement algorithm. * @param limit The time limit in hours. */ - virtual void setTimeLimitHours(unsigned int limit) { timeLimitSeconds = limit * 3600; } + virtual void SetTimeLimitHours(unsigned int limit) { timeLimitSeconds_ = limit * 3600; } /** * @brief Get the time limit in seconds for the improvement algorithm. * @return The time limit in seconds. */ - inline unsigned int getTimeLimitSeconds() const { return timeLimitSeconds; } + inline unsigned int GetTimeLimitSeconds() const { return timeLimitSeconds_; } /** * @brief Get the time limit in hours for the improvement algorithm. * @return The time limit in hours. */ - inline unsigned int getTimeLimitHours() const { return timeLimitSeconds / 3600; } + inline unsigned int GetTimeLimitHours() const { return timeLimitSeconds_ / 3600; } /** * @brief Get the name of the improvement scheduling algorithm. * @return The name of the algorithm as a string. */ - virtual std::string getScheduleName() const = 0; + virtual std::string GetScheduleName() const = 0; /** * @brief Improve the given BspSchedule. * @param schedule The BspSchedule to be improved. * @return The status of the improvement operation. */ - virtual RETURN_STATUS improveSchedule(BspSchedule &schedule) = 0; + virtual ReturnStatus ImproveSchedule(BspSchedule &schedule) = 0; /** * @brief Improve the given BspSchedule within the time limit. * @param schedule The BspSchedule to be improved. * @return The status of the improvement operation. */ - virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule &schedule) = 0; + virtual ReturnStatus ImproveScheduleWithTimeLimit(BspSchedule &schedule) = 0; }; -template -class ComboScheduler : public Scheduler { +template +class ComboScheduler : public Scheduler { private: - Scheduler &base_scheduler; - ImprovementScheduler &improvement_scheduler; + Scheduler &baseScheduler_; + ImprovementScheduler &improvementScheduler_; public: - ComboScheduler(Scheduler &base, ImprovementScheduler &improvement) - : Scheduler(), base_scheduler(base), improvement_scheduler(improvement) {} + ComboScheduler(Scheduler &base, ImprovementScheduler &improvement) + : Scheduler(), baseScheduler_(base), improvementScheduler_(improvement) {} virtual ~ComboScheduler() = default; - virtual std::string getScheduleName() const override { - return base_scheduler.getScheduleName() + "+" + improvement_scheduler.getScheduleName(); + virtual std::string GetScheduleName() const override { + return baseScheduler_.GetScheduleName() + "+" + improvementScheduler_.GetScheduleName(); } - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - RETURN_STATUS status = base_scheduler.computeSchedule(schedule); - if (status != RETURN_STATUS::OSP_SUCCESS and status != RETURN_STATUS::BEST_FOUND) { + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + ReturnStatus status = baseScheduler_.ComputeSchedule(schedule); + if (status != ReturnStatus::OSP_SUCCESS and status != ReturnStatus::BEST_FOUND) { return status; } - return improvement_scheduler.improveSchedule(schedule); + return improvementScheduler_.ImproveSchedule(schedule); } }; diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp index 32818027..2240bea9 100644 --- a/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp +++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp @@ -23,95 +23,95 @@ limitations under the License. namespace osp { -template -std::vector>> heavy_edge_preprocess(const Graph_t &graph, - const double heavy_is_x_times_median, - const double min_percent_components_retained, - const double bound_component_weight_percent) { - static_assert(is_computational_dag_edge_desc_v, +template +std::vector>> HeavyEdgePreprocess(const GraphT &graph, + const double heavyIsXTimesMedian, + const double minPercentComponentsRetained, + const double boundComponentWeightPercent) { + static_assert(isComputationalDagEdgeDescV, "HeavyEdgePreProcess can only be used with computational DAGs with edge weights."); - using VertexType = vertex_idx_t; - using EdgeType = edge_desc_t; + using VertexType = VertexIdxT; + using EdgeType = EdgeDescT; // Initialising the union find structure - union_find_universe_t uf_structure; - for (const VertexType &vert : graph.vertices()) { - uf_structure.add_object(vert, graph.vertex_work_weight(vert)); + UnionFindUniverseT ufStructure; + for (const VertexType &vert : graph.Vertices()) { + ufStructure.AddObject(vert, graph.VertexWorkWeight(vert)); } // Making edge comunications list - std::vector> edge_communications; - edge_communications.reserve(graph.num_edges()); - for (const auto &edge : edges(graph)) { - if constexpr (has_edge_weights_v) { - edge_communications.emplace_back(graph.edge_comm_weight(edge)); + std::vector> edgeCommunications; + edgeCommunications.reserve(graph.NumEdges()); + for (const auto &edge : Edges(graph)) { + if constexpr (hasEdgeWeightsV) { + edgeCommunications.emplace_back(graph.EdgeCommWeight(edge)); } else { - edge_communications.emplace_back(graph.vertex_comm_weight(source(edge, graph))); + edgeCommunications.emplace_back(graph.VertexCommWeight(Source(edge, graph))); } } // Computing the median and setting it to at least one - e_commw_t median_edge_weight = 1; - if (not edge_communications.empty()) { - auto median_it = edge_communications.begin(); - std::advance(median_it, edge_communications.size() / 2); - std::nth_element(edge_communications.begin(), median_it, edge_communications.end()); - median_edge_weight = std::max(edge_communications[edge_communications.size() / 2], static_cast>(1)); + ECommwT medianEdgeWeight = 1; + if (not edgeCommunications.empty()) { + auto medianIt = edgeCommunications.begin(); + std::advance(medianIt, edgeCommunications.size() / 2); + std::nth_element(edgeCommunications.begin(), medianIt, edgeCommunications.end()); + medianEdgeWeight = std::max(edgeCommunications[edgeCommunications.size() / 2], static_cast>(1)); } // Making edge list - e_commw_t minimal_edge_weight = static_cast>(heavy_is_x_times_median * median_edge_weight); - std::vector edge_list; - edge_list.reserve(graph.num_edges()); - for (const auto &edge : edges(graph)) { - if constexpr (has_edge_weights_v) { - if (graph.edge_comm_weight(edge) > minimal_edge_weight) { - edge_list.emplace_back(edge); + ECommwT minimalEdgeWeight = static_cast>(heavyIsXTimesMedian * medianEdgeWeight); + std::vector edgeList; + edgeList.reserve(graph.NumEdges()); + for (const auto &edge : Edges(graph)) { + if constexpr (hasEdgeWeightsV) { + if (graph.EdgeCommWeight(edge) > minimalEdgeWeight) { + edgeList.emplace_back(edge); } } else { - if (graph.vertex_comm_weight(source(edge, graph)) > minimal_edge_weight) { - edge_list.emplace_back(edge); + if (graph.VertexCommWeight(Source(edge, graph)) > minimalEdgeWeight) { + edgeList.emplace_back(edge); } } } - if constexpr (has_edge_weights_v) { + if constexpr (hasEdgeWeightsV) { // Sorting edge list - std::sort(edge_list.begin(), edge_list.end(), [graph](const EdgeType &left, const EdgeType &right) { - return graph.edge_comm_weight(left) > graph.edge_comm_weight(right); + std::sort(edgeList.begin(), edgeList.end(), [graph](const EdgeType &left, const EdgeType &right) { + return graph.EdgeCommWeight(left) > graph.EdgeCommWeight(right); }); } else { - std::sort(edge_list.begin(), edge_list.end(), [graph](const EdgeType &left, const EdgeType &right) { - return graph.vertex_comm_weight(source(left, graph)) > graph.vertex_comm_weight(source(right, graph)); + std::sort(edgeList.begin(), edgeList.end(), [graph](const EdgeType &left, const EdgeType &right) { + return graph.VertexCommWeight(Source(left, graph)) > graph.VertexCommWeight(Source(right, graph)); }); } // Computing max component size - v_workw_t max_component_size = 0; - for (const VertexType &vert : graph.vertices()) { - max_component_size += graph.vertex_work_weight(vert); + VWorkwT maxComponentSize = 0; + for (const VertexType &vert : graph.Vertices()) { + maxComponentSize += graph.VertexWorkWeight(vert); } - max_component_size = static_cast>(max_component_size * bound_component_weight_percent); + maxComponentSize = static_cast>(maxComponentSize * boundComponentWeightPercent); // Joining heavy edges - for (const EdgeType &edge : edge_list) { - if (static_cast(uf_structure.get_number_of_connected_components()) - 1.0 - < min_percent_components_retained * static_cast(graph.num_vertices())) { + for (const EdgeType &edge : edgeList) { + if (static_cast(ufStructure.GetNumberOfConnectedComponents()) - 1.0 + < minPercentComponentsRetained * static_cast(graph.NumVertices())) { break; } - v_workw_t weight_comp_a = uf_structure.get_weight_of_component_by_name(source(edge, graph)); - v_workw_t weight_comp_b = uf_structure.get_weight_of_component_by_name(target(edge, graph)); - if (weight_comp_a + weight_comp_b > max_component_size) { + VWorkwT weightCompA = ufStructure.GetWeightOfComponentByName(Source(edge, graph)); + VWorkwT weightCompB = ufStructure.GetWeightOfComponentByName(Target(edge, graph)); + if (weightCompA + weightCompB > maxComponentSize) { continue; } - uf_structure.join_by_name(source(edge, graph), target(edge, graph)); + ufStructure.JoinByName(Source(edge, graph), Target(edge, graph)); } - return uf_structure.get_connected_components(); + return ufStructure.GetConnectedComponents(); } } // namespace osp diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp index 00ffb584..a063c9c0 100644 --- a/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp +++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp @@ -23,10 +23,10 @@ limitations under the License. namespace osp { -template -class LightEdgeVariancePartitioner : public VariancePartitioner { +template +class LightEdgeVariancePartitioner : public VariancePartitioner { private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; struct VarianceCompare { bool operator()(const std::pair &lhs, const std::pair &rhs) const { @@ -35,154 +35,151 @@ class LightEdgeVariancePartitioner : public VariancePartitioner(max_percent_idle_processors_, - variance_power_, - increase_parallelism_in_new_superstep_, - max_priority_difference_percent_, - slack_), - heavy_is_x_times_median(heavy_is_x_times_median_), - min_percent_components_retained(min_percent_components_retained_), - bound_component_weight_percent(bound_component_weight_percent_) {}; + LightEdgeVariancePartitioner(double maxPercentIdleProcessors = 0.2, + double variancePower = 2, + double heavyIsXTimesMedian = 5.0, + double minPercentComponentsRetained = 0.8, + double boundComponentWeightPercent = 0.7, + bool increaseParallelismInNewSuperstep = true, + float maxPriorityDifferencePercent = 0.34f, + float slack = 0.0f) + : VariancePartitioner( + maxPercentIdleProcessors, variancePower, increaseParallelismInNewSuperstep, maxPriorityDifferencePercent, slack), + heavyIsXTimesMedian_(heavyIsXTimesMedian), + minPercentComponentsRetained_(minPercentComponentsRetained), + boundComponentWeightPercent_(boundComponentWeightPercent) {}; virtual ~LightEdgeVariancePartitioner() = default; - std::string getScheduleName() const override { return "LightEdgeVariancePartitioner"; }; + std::string GetScheduleName() const override { return "LightEdgeVariancePartitioner"; }; - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { // DAGPartition output_partition(instance); - using base = VariancePartitioner; + using Base = VariancePartitioner; - const auto &instance = schedule.getInstance(); - const auto &n_vert = instance.numberOfVertices(); - const unsigned &n_processors = instance.numberOfProcessors(); - const auto &graph = instance.getComputationalDag(); + const auto &instance = schedule.GetInstance(); + const auto &nVert = instance.NumberOfVertices(); + const unsigned &nProcessors = instance.NumberOfProcessors(); + const auto &graph = instance.GetComputationalDag(); unsigned superstep = 0; - if constexpr (is_memory_constraint_v) { - base::memory_constraint.initialize(instance); - } else if constexpr (is_memory_constraint_schedule_v) { - base::memory_constraint.initialize(schedule, superstep); + if constexpr (isMemoryConstraintV) { + Base::memoryConstraint_.Initialize(instance); + } else if constexpr (isMemoryConstraintScheduleV) { + Base::memoryConstraint_.Initialize(schedule, superstep); } - std::vector has_vertex_been_assigned(n_vert, false); + std::vector hasVertexBeenAssigned(nVert, false); std::set, VarianceCompare> ready; - std::vector, VarianceCompare>> procReady(n_processors); + std::vector, VarianceCompare>> procReady(nProcessors); std::set, VarianceCompare> allReady; - std::vector, VarianceCompare>> procReadyPrior(n_processors); + std::vector, VarianceCompare>> procReadyPrior(nProcessors); - std::vector which_proc_ready_prior(n_vert, n_processors); + std::vector whichProcReadyPrior(nVert, nProcessors); - std::vector variance_priorities = base::compute_work_variance(graph, base::variance_power); - std::vector num_unallocated_parents(n_vert, 0); + std::vector variancePriorities = Base::ComputeWorkVariance(graph, Base::variancePower_); + std::vector numUnallocatedParents(nVert, 0); - v_workw_t total_work = 0; - for (const auto &v : graph.vertices()) { - schedule.setAssignedProcessor(v, n_processors); + VWorkwT totalWork = 0; + for (const auto &v : graph.Vertices()) { + schedule.SetAssignedProcessor(v, nProcessors); - total_work += graph.vertex_work_weight(v); + totalWork += graph.VertexWorkWeight(v); - if (is_source(v, graph)) { - ready.insert(std::make_pair(v, variance_priorities[v])); - allReady.insert(std::make_pair(v, variance_priorities[v])); + if (IsSource(v, graph)) { + ready.insert(std::make_pair(v, variancePriorities[v])); + allReady.insert(std::make_pair(v, variancePriorities[v])); } else { - num_unallocated_parents[v] = graph.in_degree(v); + numUnallocatedParents[v] = graph.InDegree(v); } } - std::vector> total_partition_work(n_processors, 0); - std::vector> superstep_partition_work(n_processors, 0); + std::vector> totalPartitionWork(nProcessors, 0); + std::vector> superstepPartitionWork(nProcessors, 0); - std::vector> preprocessed_partition = heavy_edge_preprocess( - graph, heavy_is_x_times_median, min_percent_components_retained, bound_component_weight_percent / n_processors); + std::vector> preprocessedPartition = HeavyEdgePreprocess( + graph, heavyIsXTimesMedian_, minPercentComponentsRetained_, boundComponentWeightPercent_ / nProcessors); - std::vector which_preprocess_partition(graph.num_vertices()); - for (size_t i = 0; i < preprocessed_partition.size(); i++) { - for (const VertexType &vert : preprocessed_partition[i]) { - which_preprocess_partition[vert] = i; + std::vector whichPreprocessPartition(graph.NumVertices()); + for (size_t i = 0; i < preprocessedPartition.size(); i++) { + for (const VertexType &vert : preprocessedPartition[i]) { + whichPreprocessPartition[vert] = i; } } - std::vector> memory_cost_of_preprocessed_partition(preprocessed_partition.size(), 0); - for (size_t i = 0; i < preprocessed_partition.size(); i++) { - for (const auto &vert : preprocessed_partition[i]) { - memory_cost_of_preprocessed_partition[i] += graph.vertex_mem_weight(vert); + std::vector> memoryCostOfPreprocessedPartition(preprocessedPartition.size(), 0); + for (size_t i = 0; i < preprocessedPartition.size(); i++) { + for (const auto &vert : preprocessedPartition[i]) { + memoryCostOfPreprocessedPartition[i] += graph.VertexMemWeight(vert); } } - std::vector> transient_cost_of_preprocessed_partition(preprocessed_partition.size(), 0); - for (size_t i = 0; i < preprocessed_partition.size(); i++) { - for (const auto &vert : preprocessed_partition[i]) { - transient_cost_of_preprocessed_partition[i] - = std::max(transient_cost_of_preprocessed_partition[i], graph.vertex_comm_weight(vert)); + std::vector> transientCostOfPreprocessedPartition(preprocessedPartition.size(), 0); + for (size_t i = 0; i < preprocessedPartition.size(); i++) { + for (const auto &vert : preprocessedPartition[i]) { + transientCostOfPreprocessedPartition[i] + = std::max(transientCostOfPreprocessedPartition[i], graph.VertexCommWeight(vert)); } } - std::set free_processors; + std::set freeProcessors; bool endsuperstep = false; - unsigned num_unable_to_partition_node_loop = 0; + unsigned numUnableToPartitionNodeLoop = 0; while (!ready.empty()) { // Increase memory capacity if needed - if (num_unable_to_partition_node_loop == 1) { + if (numUnableToPartitionNodeLoop == 1) { endsuperstep = true; // std::cout << "\nCall for new superstep - unable to schedule.\n"; } else { - if constexpr (base::use_memory_constraint) { - if (num_unable_to_partition_node_loop >= 2) { - return RETURN_STATUS::ERROR; + if constexpr (Base::useMemoryConstraint_) { + if (numUnableToPartitionNodeLoop >= 2) { + return ReturnStatus::ERROR; } } } // Checking if new superstep is needed - // std::cout << "freeprocessor " << free_processors.size() << " idle thresh " << max_percent_idle_processors - // * n_processors << " ready size " << ready.size() << " small increase " << 1.2 * (n_processors - - // free_processors.size()) << " large increase " << n_processors - free_processors.size() + (0.5 * - // free_processors.size()) << "\n"; - if (num_unable_to_partition_node_loop == 0 - && static_cast(free_processors.size()) > base::max_percent_idle_processors * n_processors - && ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors - || static_cast(ready.size()) >= 1.2 * (n_processors - static_cast(free_processors.size())) - || static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) - + (0.5 * static_cast(free_processors.size())))) { + // std::cout << "freeprocessor " << freeProcessors.size() << " idle thresh " << maxPercentIdleProcessors_ + // * nProcessors << " ready size " << ready.size() << " small increase " << 1.2 * (nProcessors - + // freeProcessors.size()) << " large increase " << nProcessors - freeProcessors.size() + (0.5 * + // freeProcessors.size()) << "\n"; + if (numUnableToPartitionNodeLoop == 0 + && static_cast(freeProcessors.size()) > Base::maxPercentIdleProcessors_ * nProcessors + && ((!Base::increaseParallelismInNewSuperstep_) || ready.size() >= nProcessors + || static_cast(ready.size()) >= 1.2 * (nProcessors - static_cast(freeProcessors.size())) + || static_cast(ready.size()) >= nProcessors - static_cast(freeProcessors.size()) + + (0.5 * static_cast(freeProcessors.size())))) { endsuperstep = true; // std::cout << "\nCall for new superstep - parallelism.\n"; } - std::vector processor_priorities = base::computeProcessorPrioritiesInterpolation( - superstep_partition_work, total_partition_work, total_work, instance); + std::vector processorPriorities + = Base::ComputeProcessorPrioritiesInterpolation(superstepPartitionWork, totalPartitionWork, totalWork, instance); - float min_priority = processor_priorities[0]; - float max_priority = processor_priorities[0]; - for (const auto &prio : processor_priorities) { - min_priority = std::min(min_priority, prio); - max_priority = std::max(max_priority, prio); + float minPriority = processorPriorities[0]; + float maxPriority = processorPriorities[0]; + for (const auto &prio : processorPriorities) { + minPriority = std::min(minPriority, prio); + maxPriority = std::max(maxPriority, prio); } - if (num_unable_to_partition_node_loop == 0 - && (max_priority - min_priority) > base::max_priority_difference_percent * static_cast(total_work) - / static_cast(n_processors)) { + if (numUnableToPartitionNodeLoop == 0 + && (maxPriority - minPriority) + > Base::maxPriorityDifferencePercent_ * static_cast(totalWork) / static_cast(nProcessors)) { endsuperstep = true; // std::cout << "\nCall for new superstep - difference.\n"; } @@ -190,20 +187,20 @@ class LightEdgeVariancePartitioner : public VariancePartitioner processors_in_order = base::computeProcessorPriority( - superstep_partition_work, total_partition_work, total_work, instance, base::slack); + std::vector processorsInOrder + = Base::ComputeProcessorPriority(superstepPartitionWork, totalPartitionWork, totalWork, instance, Base::slack_); - for (unsigned &proc : processors_in_order) { - if ((free_processors.find(proc)) != free_processors.cend()) { + for (unsigned &proc : processorsInOrder) { + if ((freeProcessors.find(proc)) != freeProcessors.cend()) { continue; } // Check for too many free processors - needed here because free processors may not have been detected // yet - if (num_unable_to_partition_node_loop == 0 - && static_cast(free_processors.size()) > base::max_percent_idle_processors * n_processors - && ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors - || static_cast(ready.size()) >= 1.2 * (n_processors - static_cast(free_processors.size())) - || static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) - + (0.5 * static_cast(free_processors.size())))) { + if (numUnableToPartitionNodeLoop == 0 + && static_cast(freeProcessors.size()) > this->maxPercentIdleProcessors_ * nProcessors + && ((!this->increaseParallelismInNewSuperstep_) || ready.size() >= nProcessors + || static_cast(ready.size()) >= 1.2 * (nProcessors - static_cast(freeProcessors.size())) + || static_cast(ready.size()) >= nProcessors - static_cast(freeProcessors.size()) + + (0.5 * static_cast(freeProcessors.size())))) { endsuperstep = true; // std::cout << "\nCall for new superstep - parallelism.\n"; break; } - assigned_a_node = false; + assignedANode = false; // Choosing next node - VertexType next_node; - for (auto vertex_prior_pair_iter = procReady[proc].begin(); vertex_prior_pair_iter != procReady[proc].end(); - vertex_prior_pair_iter++) { - if (assigned_a_node) { + VertexType nextNode; + for (auto vertexPriorPairIter = procReady[proc].begin(); vertexPriorPairIter != procReady[proc].end(); + vertexPriorPairIter++) { + if (assignedANode) { break; } - const VertexType &vert = vertex_prior_pair_iter->first; - if constexpr (base::use_memory_constraint) { - if (has_vertex_been_assigned[vert] - || base::memory_constraint.can_add( + const VertexType &vert = vertexPriorPairIter->first; + if constexpr (Base::useMemoryConstraint_) { + if (hasVertexBeenAssigned[vert] + || Base::memoryConstraint_.CanAdd( proc, - memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], - transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { - next_node = vert; - assigned_a_node = true; + memoryCostOfPreprocessedPartition[whichPreprocessPartition[vert]], + transientCostOfPreprocessedPartition[whichPreprocessPartition[vert]])) { + nextNode = vert; + assignedANode = true; } } else { - next_node = vert; - assigned_a_node = true; + nextNode = vert; + assignedANode = true; } } - for (auto vertex_prior_pair_iter = procReadyPrior[proc].begin(); - vertex_prior_pair_iter != procReadyPrior[proc].end(); - vertex_prior_pair_iter++) { - if (assigned_a_node) { + for (auto vertexPriorPairIter = procReadyPrior[proc].begin(); vertexPriorPairIter != procReadyPrior[proc].end(); + vertexPriorPairIter++) { + if (assignedANode) { break; } - const VertexType &vert = vertex_prior_pair_iter->first; - if constexpr (base::use_memory_constraint) { - if (has_vertex_been_assigned[vert] - || base::memory_constraint.can_add( + const VertexType &vert = vertexPriorPairIter->first; + if constexpr (Base::useMemoryConstraint_) { + if (hasVertexBeenAssigned[vert] + || Base::memoryConstraint_.CanAdd( proc, - memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], - transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { - next_node = vert; - assigned_a_node = true; + memoryCostOfPreprocessedPartition[whichPreprocessPartition[vert]], + transientCostOfPreprocessedPartition[whichPreprocessPartition[vert]])) { + nextNode = vert; + assignedANode = true; } } else { - next_node = vert; - assigned_a_node = true; + nextNode = vert; + assignedANode = true; } } - for (auto vertex_prior_pair_iter = allReady.begin(); vertex_prior_pair_iter != allReady.cend(); - vertex_prior_pair_iter++) { - if (assigned_a_node) { + for (auto vertexPriorPairIter = allReady.begin(); vertexPriorPairIter != allReady.cend(); vertexPriorPairIter++) { + if (assignedANode) { break; } - const VertexType &vert = vertex_prior_pair_iter->first; - if constexpr (base::use_memory_constraint) { - if (has_vertex_been_assigned[vert] - || base::memory_constraint.can_add( + const VertexType &vert = vertexPriorPairIter->first; + if constexpr (Base::useMemoryConstraint_) { + if (hasVertexBeenAssigned[vert] + || Base::memoryConstraint_.CanAdd( proc, - memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], - transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { - next_node = vert; - assigned_a_node = true; + memoryCostOfPreprocessedPartition[whichPreprocessPartition[vert]], + transientCostOfPreprocessedPartition[whichPreprocessPartition[vert]])) { + nextNode = vert; + assignedANode = true; } } else { - next_node = vert; - assigned_a_node = true; + nextNode = vert; + assignedANode = true; } } - if (!assigned_a_node) { - free_processors.insert(proc); + if (!assignedANode) { + freeProcessors.insert(proc); } else { // Assignments - if (has_vertex_been_assigned[next_node]) { - unsigned proc_alloc_prior = schedule.assignedProcessor(next_node); + if (hasVertexBeenAssigned[nextNode]) { + unsigned procAllocPrior = schedule.AssignedProcessor(nextNode); - // std::cout << "Allocated node " << next_node << " to processor " << proc_alloc_prior << " + // std::cout << "Allocated node " << nextNode << " to processor " << procAllocPrior << " // previously.\n"; - schedule.setAssignedSuperstep(next_node, superstep); + schedule.SetAssignedSuperstep(nextNode, superstep); - num_unable_to_partition_node_loop = 0; + numUnableToPartitionNodeLoop = 0; // Updating loads - superstep_partition_work[proc_alloc_prior] += graph.vertex_work_weight(next_node); + superstepPartitionWork[procAllocPrior] += graph.VertexWorkWeight(nextNode); // Deletion from Queues - std::pair pair = std::make_pair(next_node, variance_priorities[next_node]); + std::pair pair = std::make_pair(nextNode, variancePriorities[nextNode]); ready.erase(pair); procReady[proc].erase(pair); procReadyPrior[proc].erase(pair); allReady.erase(pair); - if (which_proc_ready_prior[next_node] != n_processors) { - procReadyPrior[which_proc_ready_prior[next_node]].erase(pair); + if (whichProcReadyPrior[nextNode] != nProcessors) { + procReadyPrior[whichProcReadyPrior[nextNode]].erase(pair); } // Checking children - for (const auto &chld : graph.children(next_node)) { - num_unallocated_parents[chld] -= 1; - if (num_unallocated_parents[chld] == 0) { + for (const auto &chld : graph.Children(nextNode)) { + numUnallocatedParents[chld] -= 1; + if (numUnallocatedParents[chld] == 0) { // std::cout << "Inserting child " << chld << " into ready.\n"; - ready.insert(std::make_pair(chld, variance_priorities[chld])); - bool is_proc_ready = true; - for (const auto &parent : graph.parents(chld)) { - if ((schedule.assignedProcessor(parent) != proc_alloc_prior) - && (schedule.assignedSuperstep(parent) == superstep)) { - is_proc_ready = false; + ready.insert(std::make_pair(chld, variancePriorities[chld])); + bool isProcReady = true; + for (const auto &parent : graph.Parents(chld)) { + if ((schedule.AssignedProcessor(parent) != procAllocPrior) + && (schedule.AssignedSuperstep(parent) == superstep)) { + isProcReady = false; break; } } - if (is_proc_ready) { - procReady[proc_alloc_prior].insert(std::make_pair(chld, variance_priorities[chld])); + if (isProcReady) { + procReady[procAllocPrior].insert(std::make_pair(chld, variancePriorities[chld])); // std::cout << "Inserting child " << chld << " into procReady for processor " << - // proc_alloc_prior << ".\n"; + // procAllocPrior << ".\n"; } } } } else { - schedule.setAssignedProcessor(next_node, proc); - has_vertex_been_assigned[next_node] = true; - // std::cout << "Allocated node " << next_node << " to processor " << proc << ".\n"; + schedule.SetAssignedProcessor(nextNode, proc); + hasVertexBeenAssigned[nextNode] = true; + // std::cout << "Allocated node " << nextNode << " to processor " << proc << ".\n"; - schedule.setAssignedSuperstep(next_node, superstep); - num_unable_to_partition_node_loop = 0; + schedule.SetAssignedSuperstep(nextNode, superstep); + numUnableToPartitionNodeLoop = 0; // Updating loads - total_partition_work[proc] += graph.vertex_work_weight(next_node); - superstep_partition_work[proc] += graph.vertex_work_weight(next_node); + totalPartitionWork[proc] += graph.VertexWorkWeight(nextNode); + superstepPartitionWork[proc] += graph.VertexWorkWeight(nextNode); - if constexpr (base::use_memory_constraint) { - base::memory_constraint.add(next_node, proc); + if constexpr (Base::useMemoryConstraint_) { + Base::memoryConstraint_.Add(nextNode, proc); } - // total_partition_memory[proc] += graph.vertex_mem_weight(next_node); + // total_partition_memory[proc] += graph.VertexMemWeight(nextNode); // transient_partition_memory[proc] = - // std::max(transient_partition_memory[proc], graph.vertex_comm_weight(next_node)); + // std::max(transient_partition_memory[proc], graph.VertexCommWeight(nextNode)); // Deletion from Queues - std::pair pair = std::make_pair(next_node, variance_priorities[next_node]); + std::pair pair = std::make_pair(nextNode, variancePriorities[nextNode]); ready.erase(pair); procReady[proc].erase(pair); procReadyPrior[proc].erase(pair); allReady.erase(pair); - if (which_proc_ready_prior[next_node] != n_processors) { - procReadyPrior[which_proc_ready_prior[next_node]].erase(pair); + if (whichProcReadyPrior[nextNode] != nProcessors) { + procReadyPrior[whichProcReadyPrior[nextNode]].erase(pair); } // Checking children - for (const auto &chld : graph.children(next_node)) { - num_unallocated_parents[chld] -= 1; - if (num_unallocated_parents[chld] == 0) { + for (const auto &chld : graph.Children(nextNode)) { + numUnallocatedParents[chld] -= 1; + if (numUnallocatedParents[chld] == 0) { // std::cout << "Inserting child " << chld << " into ready.\n"; - ready.insert(std::make_pair(chld, variance_priorities[chld])); - bool is_proc_ready = true; - for (const auto &parent : graph.parents(chld)) { - if ((schedule.assignedProcessor(parent) != proc) - && (schedule.assignedSuperstep(parent) == superstep)) { - is_proc_ready = false; + ready.insert(std::make_pair(chld, variancePriorities[chld])); + bool isProcReady = true; + for (const auto &parent : graph.Parents(chld)) { + if ((schedule.AssignedProcessor(parent) != proc) + && (schedule.AssignedSuperstep(parent) == superstep)) { + isProcReady = false; break; } } - if (is_proc_ready) { - procReady[proc].insert(std::make_pair(chld, variance_priorities[chld])); + if (isProcReady) { + procReady[proc].insert(std::make_pair(chld, variancePriorities[chld])); // std::cout << "Inserting child " << chld << " into procReady for processor " << // proc << ".\n"; } @@ -405,38 +400,38 @@ class LightEdgeVariancePartitioner : public VariancePartitioner(((-2.0) * pow(alpha, 3.0)) + (3.0 * pow(alpha, 2.0))); } }; -struct superstep_only_interpolation { +struct SuperstepOnlyInterpolation { float operator()(float, const float) { return 0.0f; }; }; -struct global_only_interpolation { +struct GlobalOnlyInterpolation { float operator()(float, const float) { return 1.0f; }; }; -template -class LoadBalancerBase : public Scheduler { - static_assert(std::is_invocable_r::value, +template +class LoadBalancerBase : public Scheduler { + static_assert(std::is_invocable_r::value, "Interpolation_t must be invocable with two float arguments and return a float."); protected: @@ -56,45 +56,45 @@ class LoadBalancerBase : public Scheduler { /// @param instance bsp instance /// @param slack how much to ignore global balance /// @return vector with the interpolated priorities - std::vector computeProcessorPrioritiesInterpolation(const std::vector> &superstep_partition_work, - const std::vector> &total_partition_work, - const v_workw_t &total_work, - const BspInstance &instance, + std::vector ComputeProcessorPrioritiesInterpolation(const std::vector> &superstepPartitionWork, + const std::vector> &totalPartitionWork, + const VWorkwT &totalWork, + const BspInstance &instance, const float slack = 0.0) { - v_workw_t work_till_now = 0; - for (const auto &part_work : total_partition_work) { - work_till_now += part_work; + VWorkwT workTillNow = 0; + for (const auto &partWork : totalPartitionWork) { + workTillNow += partWork; } - float percentage_complete = static_cast(work_till_now) / static_cast(total_work); + float percentageComplete = static_cast(workTillNow) / static_cast(totalWork); - float value = Interpolation_t()(percentage_complete, slack); + float value = InterpolationT()(percentageComplete, slack); - std::vector proc_prio(instance.numberOfProcessors()); - for (size_t i = 0; i < proc_prio.size(); i++) { - assert(static_cast(total_partition_work[i]) < std::numeric_limits::max() - && static_cast(superstep_partition_work[i]) < std::numeric_limits::max()); - proc_prio[i] = ((1 - value) * static_cast(superstep_partition_work[i])) - + (value * static_cast(total_partition_work[i])); + std::vector procPrio(instance.NumberOfProcessors()); + for (size_t i = 0; i < procPrio.size(); i++) { + assert(static_cast(totalPartitionWork[i]) < std::numeric_limits::max() + && static_cast(superstepPartitionWork[i]) < std::numeric_limits::max()); + procPrio[i] = ((1 - value) * static_cast(superstepPartitionWork[i])) + + (value * static_cast(totalPartitionWork[i])); } - return proc_prio; + return procPrio; } /// @brief Computes processor priorities - /// @param superstep_partition_work vector with current work distribution in current superstep - /// @param total_partition_work vector with current work distribution overall - /// @param total_work total work weight of all nodes of the graph + /// @param superstepPartitionWork vector with current work distribution in current superstep + /// @param totalPartitionWork vector with current work distribution overall + /// @param totalWork total work weight of all nodes of the graph /// @param instance bsp instance /// @param slack how much to ignore global balance /// @return vector with the processors in order of priority - std::vector computeProcessorPriority(const std::vector> &superstep_partition_work, - const std::vector> &total_partition_work, - const v_workw_t &total_work, - const BspInstance &instance, + std::vector ComputeProcessorPriority(const std::vector> &superstepPartitionWork, + const std::vector> &totalPartitionWork, + const VWorkwT &totalWork, + const BspInstance &instance, const float slack = 0.0) { - return sorting_arrangement( - computeProcessorPrioritiesInterpolation(superstep_partition_work, total_partition_work, total_work, instance, slack)); + return SortingArrangement( + ComputeProcessorPrioritiesInterpolation(superstepPartitionWork, totalPartitionWork, totalWork, instance, slack)); } public: diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp index 6b5f904c..fd233cc8 100644 --- a/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp +++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp @@ -24,11 +24,11 @@ limitations under the License. namespace osp { -template -class VariancePartitioner : public LoadBalancerBase { - static_assert(is_computational_dag_v, "VariancePartitioner can only be used with computational DAGs."); +template +class VariancePartitioner : public LoadBalancerBase { + static_assert(isComputationalDagV, "VariancePartitioner can only be used with computational DAGs."); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; struct VarianceCompare { bool operator()(const std::pair &lhs, const std::pair &rhs) const { @@ -37,160 +37,160 @@ class VariancePartitioner : public LoadBalancerBase { }; protected: - constexpr static bool use_memory_constraint = is_memory_constraint_v - or is_memory_constraint_schedule_v; + constexpr static bool useMemoryConstraint_ = isMemoryConstraintV + or isMemoryConstraintScheduleV; - static_assert(not use_memory_constraint or std::is_same_v, - "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); + static_assert(not useMemoryConstraint_ or std::is_same_v, + "GraphT must be the same as MemoryConstraintT::GraphImplT."); - MemoryConstraint_t memory_constraint; + MemoryConstraintT memoryConstraint_; /// @brief threshold percentage of idle processors as to when a new superstep should be introduced - double max_percent_idle_processors; + double maxPercentIdleProcessors_; /// @brief the power in the power mean average of the variance scheduler - double variance_power; + double variancePower_; /// @brief whether or not parallelism should be increased in the next superstep - bool increase_parallelism_in_new_superstep; + bool increaseParallelismInNewSuperstep_; /// @brief percentage of the average workload by which the processor priorities may diverge - float max_priority_difference_percent; + float maxPriorityDifferencePercent_; /// @brief how much to ignore the global work balance, value between 0 and 1 - float slack; + float slack_; /// @brief Computes a power mean average of the bottom node distance /// @param graph graph /// @param power the power in the power mean average /// @return vector of the logarithm of power mean averaged bottom node distance - std::vector compute_work_variance(const Graph_t &graph, double power = 2) const { - std::vector work_variance(graph.num_vertices(), 0.0); + std::vector ComputeWorkVariance(const GraphT &graph, double power = 2) const { + std::vector workVariance(graph.NumVertices(), 0.0); - const auto top_order = GetTopOrder(graph); + const auto topOrder = GetTopOrder(graph); - for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { + for (auto rIter = topOrder.rbegin(); rIter != topOrder.crend(); rIter++) { double temp = 0; - double max_priority = 0; - for (const auto &child : graph.children(*r_iter)) { - max_priority = std::max(work_variance[child], max_priority); + double maxPriority = 0; + for (const auto &child : graph.Children(*rIter)) { + maxPriority = std::max(workVariance[child], maxPriority); } - for (const auto &child : graph.children(*r_iter)) { - temp += std::exp(power * (work_variance[child] - max_priority)); + for (const auto &child : graph.Children(*rIter)) { + temp += std::exp(power * (workVariance[child] - maxPriority)); } - temp = std::log(temp) / power + max_priority; + temp = std::log(temp) / power + maxPriority; - double node_weight = std::log(graph.vertex_work_weight(*r_iter)); - double larger_val = node_weight > temp ? node_weight : temp; + double nodeWeight = std::log(graph.VertexWorkWeight(*rIter)); + double largerVal = nodeWeight > temp ? nodeWeight : temp; - work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; + workVariance[*rIter] = std::log(std::exp(nodeWeight - largerVal) + std::exp(temp - largerVal)) + largerVal; } - return work_variance; + return workVariance; } public: - VariancePartitioner(double max_percent_idle_processors_ = 0.2, - double variance_power_ = 2.0, - bool increase_parallelism_in_new_superstep_ = true, - float max_priority_difference_percent_ = 0.34f, - float slack_ = 0.0f) - : max_percent_idle_processors(max_percent_idle_processors_), - variance_power(variance_power_), - increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_), - max_priority_difference_percent(max_priority_difference_percent_), - slack(slack_) {}; + VariancePartitioner(double maxPercentIdleProcessors = 0.2, + double variancePower = 2.0, + bool increaseParallelismInNewSuperstep = true, + float maxPriorityDifferencePercent = 0.34f, + float slack = 0.0f) + : maxPercentIdleProcessors_(maxPercentIdleProcessors), + variancePower_(variancePower), + increaseParallelismInNewSuperstep_(increaseParallelismInNewSuperstep), + maxPriorityDifferencePercent_(maxPriorityDifferencePercent), + slack_(slack) {}; virtual ~VariancePartitioner() = default; - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); - const auto &n_vert = instance.numberOfVertices(); - const unsigned &n_processors = instance.numberOfProcessors(); - const auto &graph = instance.getComputationalDag(); + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); + const auto &nVert = instance.NumberOfVertices(); + const unsigned &nProcessors = instance.NumberOfProcessors(); + const auto &graph = instance.GetComputationalDag(); unsigned superstep = 0; - if constexpr (is_memory_constraint_v) { - memory_constraint.initialize(instance); - } else if constexpr (is_memory_constraint_schedule_v) { - memory_constraint.initialize(schedule, superstep); + if constexpr (isMemoryConstraintV) { + memoryConstraint_.Initialize(instance); + } else if constexpr (isMemoryConstraintScheduleV) { + memoryConstraint_.Initialize(schedule, superstep); } - v_workw_t total_work = 0; + VWorkwT totalWork = 0; - std::vector> total_partition_work(n_processors, 0); - std::vector> superstep_partition_work(n_processors, 0); + std::vector> totalPartitionWork(nProcessors, 0); + std::vector> superstepPartitionWork(nProcessors, 0); - std::vector variance_priorities = compute_work_variance(graph, variance_power); - std::vector num_unallocated_parents(n_vert, 0); + std::vector variancePriorities = ComputeWorkVariance(graph, variancePower_); + std::vector numUnallocatedParents(nVert, 0); std::set, VarianceCompare> ready; - std::vector, VarianceCompare>> procReady(n_processors); + std::vector, VarianceCompare>> procReady(nProcessors); std::set, VarianceCompare> allReady; - std::vector, VarianceCompare>> procReadyPrior(n_processors); + std::vector, VarianceCompare>> procReadyPrior(nProcessors); - std::vector which_proc_ready_prior(n_vert, n_processors); + std::vector whichProcReadyPrior(nVert, nProcessors); - for (const auto &v : graph.vertices()) { - schedule.setAssignedProcessor(v, n_processors); + for (const auto &v : graph.Vertices()) { + schedule.SetAssignedProcessor(v, nProcessors); - total_work += graph.vertex_work_weight(v); + totalWork += graph.VertexWorkWeight(v); - if (is_source(v, graph)) { - ready.insert(std::make_pair(v, variance_priorities[v])); - allReady.insert(std::make_pair(v, variance_priorities[v])); + if (IsSource(v, graph)) { + ready.insert(std::make_pair(v, variancePriorities[v])); + allReady.insert(std::make_pair(v, variancePriorities[v])); } else { - num_unallocated_parents[v] = graph.in_degree(v); + numUnallocatedParents[v] = graph.InDegree(v); } } - std::set free_processors; + std::set freeProcessors; bool endsuperstep = false; - unsigned num_unable_to_partition_node_loop = 0; - // RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; + unsigned numUnableToPartitionNodeLoop = 0; + // ReturnStatus status = ReturnStatus::OSP_SUCCESS; while (!ready.empty()) { // Increase memory capacity if needed - if (num_unable_to_partition_node_loop == 1) { + if (numUnableToPartitionNodeLoop == 1) { endsuperstep = true; // std::cout << "\nCall for new superstep - unable to schedule.\n"; } else { - if constexpr (use_memory_constraint) { - if (num_unable_to_partition_node_loop >= 2) { - return RETURN_STATUS::ERROR; + if constexpr (useMemoryConstraint_) { + if (numUnableToPartitionNodeLoop >= 2) { + return ReturnStatus::ERROR; } } } // Checking if new superstep is needed - // std::cout << "freeprocessor " << free_processors.size() << " idle thresh " << max_percent_idle_processors - // * n_processors << " ready size " << ready.size() << " small increase " << 1.2 * (n_processors - - // free_processors.size()) << " large increase " << n_processors - free_processors.size() + (0.5 * - // free_processors.size()) << "\n"; - if (num_unable_to_partition_node_loop == 0 - && static_cast(free_processors.size()) > max_percent_idle_processors * n_processors - && ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors - || static_cast(ready.size()) >= 1.2 * (n_processors - static_cast(free_processors.size())) - || static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) - + (0.5 * static_cast(free_processors.size())))) { + // std::cout << "freeprocessor " << freeProcessors.size() << " idle thresh " << maxPercentIdleProcessors_ + // * nProcessors << " ready size " << ready.size() << " small increase " << 1.2 * (nProcessors - + // freeProcessors.size()) << " large increase " << nProcessors - freeProcessors.size() + (0.5 * + // freeProcessors.size()) << "\n"; + if (numUnableToPartitionNodeLoop == 0 + && static_cast(freeProcessors.size()) > maxPercentIdleProcessors_ * nProcessors + && ((!increaseParallelismInNewSuperstep_) || ready.size() >= nProcessors + || static_cast(ready.size()) >= 1.2 * (nProcessors - static_cast(freeProcessors.size())) + || static_cast(ready.size()) >= nProcessors - static_cast(freeProcessors.size()) + + (0.5 * static_cast(freeProcessors.size())))) { endsuperstep = true; // std::cout << "\nCall for new superstep - parallelism.\n"; } - std::vector processor_priorities - = LoadBalancerBase::computeProcessorPrioritiesInterpolation( - superstep_partition_work, total_partition_work, total_work, instance); - float min_priority = processor_priorities[0]; - float max_priority = processor_priorities[0]; - for (const auto &prio : processor_priorities) { - min_priority = std::min(min_priority, prio); - max_priority = std::max(max_priority, prio); + std::vector processorPriorities + = LoadBalancerBase::ComputeProcessorPrioritiesInterpolation( + superstepPartitionWork, totalPartitionWork, totalWork, instance); + float minPriority = processorPriorities[0]; + float maxPriority = processorPriorities[0]; + for (const auto &prio : processorPriorities) { + minPriority = std::min(minPriority, prio); + maxPriority = std::max(maxPriority, prio); } - if (num_unable_to_partition_node_loop == 0 - && (max_priority - min_priority) - > max_priority_difference_percent * static_cast(total_work) / static_cast(n_processors)) { + if (numUnableToPartitionNodeLoop == 0 + && (maxPriority - minPriority) + > maxPriorityDifferencePercent_ * static_cast(totalWork) / static_cast(nProcessors)) { endsuperstep = true; // std::cout << "\nCall for new superstep - difference.\n"; } @@ -198,20 +198,20 @@ class VariancePartitioner : public LoadBalancerBase { // Introducing new superstep if (endsuperstep) { allReady = ready; - for (unsigned proc = 0; proc < n_processors; proc++) { + for (unsigned proc = 0; proc < nProcessors; proc++) { for (const auto &item : procReady[proc]) { procReadyPrior[proc].insert(item); - which_proc_ready_prior[item.first] = proc; + whichProcReadyPrior[item.first] = proc; } procReady[proc].clear(); - superstep_partition_work[proc] = 0; + superstepPartitionWork[proc] = 0; } - free_processors.clear(); + freeProcessors.clear(); - if constexpr (use_memory_constraint) { - for (unsigned proc = 0; proc < n_processors; proc++) { - memory_constraint.reset(proc); + if constexpr (useMemoryConstraint_) { + for (unsigned proc = 0; proc < nProcessors; proc++) { + memoryConstraint_.Reset(proc); } } @@ -219,128 +219,126 @@ class VariancePartitioner : public LoadBalancerBase { endsuperstep = false; } - bool assigned_a_node = false; + bool assignedANode = false; // Choosing next processor - std::vector processors_in_order = LoadBalancerBase::computeProcessorPriority( - superstep_partition_work, total_partition_work, total_work, instance, slack); - for (unsigned &proc : processors_in_order) { - if ((free_processors.find(proc)) != free_processors.cend()) { + std::vector processorsInOrder = LoadBalancerBase::ComputeProcessorPriority( + superstepPartitionWork, totalPartitionWork, totalWork, instance, slack_); + for (unsigned &proc : processorsInOrder) { + if ((freeProcessors.find(proc)) != freeProcessors.cend()) { continue; } // Check for too many free processors - needed here because free processors may not have been detected // yet - if (num_unable_to_partition_node_loop == 0 - && static_cast(free_processors.size()) > max_percent_idle_processors * n_processors - && ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors - || static_cast(ready.size()) >= 1.2 * (n_processors - static_cast(free_processors.size())) - || static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) - + (0.5 * static_cast(free_processors.size())))) { + if (numUnableToPartitionNodeLoop == 0 + && static_cast(freeProcessors.size()) > maxPercentIdleProcessors_ * nProcessors + && ((!increaseParallelismInNewSuperstep_) || ready.size() >= nProcessors + || static_cast(ready.size()) >= 1.2 * (nProcessors - static_cast(freeProcessors.size())) + || static_cast(ready.size()) >= nProcessors - static_cast(freeProcessors.size()) + + (0.5 * static_cast(freeProcessors.size())))) { endsuperstep = true; // std::cout << "\nCall for new superstep - parallelism.\n"; break; } - assigned_a_node = false; + assignedANode = false; // Choosing next node - VertexType next_node; - for (auto vertex_prior_pair_iter = procReady[proc].begin(); vertex_prior_pair_iter != procReady[proc].cend(); - vertex_prior_pair_iter++) { - if (assigned_a_node) { + VertexType nextNode; + for (auto vertexPriorPairIter = procReady[proc].begin(); vertexPriorPairIter != procReady[proc].cend(); + vertexPriorPairIter++) { + if (assignedANode) { break; } - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) { - next_node = vertex_prior_pair_iter->first; - assigned_a_node = true; + if constexpr (isMemoryConstraintV || isMemoryConstraintScheduleV) { + if (memoryConstraint_.CanAdd(vertexPriorPairIter->first, proc)) { + nextNode = vertexPriorPairIter->first; + assignedANode = true; } } else { - next_node = vertex_prior_pair_iter->first; - assigned_a_node = true; + nextNode = vertexPriorPairIter->first; + assignedANode = true; } } - for (auto vertex_prior_pair_iter = procReadyPrior[proc].begin(); - vertex_prior_pair_iter != procReadyPrior[proc].cend(); - vertex_prior_pair_iter++) { - if (assigned_a_node) { + for (auto vertexPriorPairIter = procReadyPrior[proc].begin(); vertexPriorPairIter != procReadyPrior[proc].cend(); + vertexPriorPairIter++) { + if (assignedANode) { break; } - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) { - next_node = vertex_prior_pair_iter->first; - assigned_a_node = true; + if constexpr (isMemoryConstraintV || isMemoryConstraintScheduleV) { + if (memoryConstraint_.CanAdd(vertexPriorPairIter->first, proc)) { + nextNode = vertexPriorPairIter->first; + assignedANode = true; } } else { - next_node = vertex_prior_pair_iter->first; - assigned_a_node = true; + nextNode = vertexPriorPairIter->first; + assignedANode = true; } } - for (auto vertex_prior_pair_iter = allReady.begin(); vertex_prior_pair_iter != allReady.cend(); - vertex_prior_pair_iter++) { - if (assigned_a_node) { + for (auto vertexPriorPairIter = allReady.begin(); vertexPriorPairIter != allReady.cend(); vertexPriorPairIter++) { + if (assignedANode) { break; } - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) { - next_node = vertex_prior_pair_iter->first; - assigned_a_node = true; + if constexpr (isMemoryConstraintV || isMemoryConstraintScheduleV) { + if (memoryConstraint_.CanAdd(vertexPriorPairIter->first, proc)) { + nextNode = vertexPriorPairIter->first; + assignedANode = true; } } else { - next_node = vertex_prior_pair_iter->first; - assigned_a_node = true; + nextNode = vertexPriorPairIter->first; + assignedANode = true; } } - if (!assigned_a_node) { - free_processors.insert(proc); + if (!assignedANode) { + freeProcessors.insert(proc); } else { // Assignments - // std::cout << "Allocated node " << next_node << " to processor " << proc << ".\n"; - schedule.setAssignedProcessor(next_node, proc); - schedule.setAssignedSuperstep(next_node, superstep); - num_unable_to_partition_node_loop = 0; + // std::cout << "Allocated node " << nextNode << " to processor " << proc << ".\n"; + schedule.SetAssignedProcessor(nextNode, proc); + schedule.SetAssignedSuperstep(nextNode, superstep); + numUnableToPartitionNodeLoop = 0; // Updating loads - total_partition_work[proc] += graph.vertex_work_weight(next_node); - superstep_partition_work[proc] += graph.vertex_work_weight(next_node); + totalPartitionWork[proc] += graph.VertexWorkWeight(nextNode); + superstepPartitionWork[proc] += graph.VertexWorkWeight(nextNode); - if constexpr (use_memory_constraint) { - memory_constraint.add(next_node, proc); + if constexpr (isMemoryConstraintV || isMemoryConstraintScheduleV) { + memoryConstraint_.Add(nextNode, proc); } // Deletion from Queues - std::pair pair = std::make_pair(next_node, variance_priorities[next_node]); + std::pair pair = std::make_pair(nextNode, variancePriorities[nextNode]); ready.erase(pair); procReady[proc].erase(pair); procReadyPrior[proc].erase(pair); allReady.erase(pair); - if (which_proc_ready_prior[next_node] != n_processors) { - procReadyPrior[which_proc_ready_prior[next_node]].erase(pair); + if (whichProcReadyPrior[nextNode] != nProcessors) { + procReadyPrior[whichProcReadyPrior[nextNode]].erase(pair); } // Checking children - for (const auto &chld : graph.children(next_node)) { - num_unallocated_parents[chld] -= 1; - if (num_unallocated_parents[chld] == 0) { + for (const auto &chld : graph.Children(nextNode)) { + numUnallocatedParents[chld] -= 1; + if (numUnallocatedParents[chld] == 0) { // std::cout << "Inserting child " << chld << " into ready.\n"; - ready.insert(std::make_pair(chld, variance_priorities[chld])); - bool is_proc_ready = true; - for (const auto &parent : graph.parents(chld)) { - if ((schedule.assignedProcessor(parent) != proc) - && (schedule.assignedSuperstep(parent) == superstep)) { - is_proc_ready = false; + ready.insert(std::make_pair(chld, variancePriorities[chld])); + bool isProcReady = true; + for (const auto &parent : graph.Parents(chld)) { + if ((schedule.AssignedProcessor(parent) != proc) + && (schedule.AssignedSuperstep(parent) == superstep)) { + isProcReady = false; break; } } - if (is_proc_ready) { - procReady[proc].insert(std::make_pair(chld, variance_priorities[chld])); + if (isProcReady) { + procReady[proc].insert(std::make_pair(chld, variancePriorities[chld])); // std::cout << "Inserting child " << chld << " into procReady for processor " << proc // << ".\n"; } @@ -350,15 +348,15 @@ class VariancePartitioner : public LoadBalancerBase { break; } } - if (!assigned_a_node) { - num_unable_to_partition_node_loop += 1; + if (!assignedANode) { + numUnableToPartitionNodeLoop += 1; } } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return "VariancePartitioner"; }; + std::string GetScheduleName() const override { return "VariancePartitioner"; }; }; } // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp index 5d2374de..9d157943 100644 --- a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp @@ -26,119 +26,119 @@ limitations under the License. namespace osp { -template -class HillClimbingScheduler : public ImprovementScheduler { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(is_computational_dag_v, "Graph_t must satisfy the computational_dag concept"); +template +class HillClimbingScheduler : public ImprovementScheduler { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + static_assert(isComputationalDagV, "GraphT must satisfy the computational_dag concept"); - using vertex_idx = vertex_idx_t; - using cost_type = v_workw_t; + using VertexIdx = VertexIdxT; + using CostType = VWorkwT; - static_assert(std::is_same_v, v_commw_t>, + static_assert(std::is_same_v, VCommwT>, "HillClimbing requires work and comm. weights to have the same type."); public: enum Direction { EARLIER = 0, AT, LATER }; - static const int NumDirections = 3; + static const int numDirections_ = 3; // aux structure for efficiently storing the changes incurred by a potential HC step - struct stepAuxData { - cost_type newCost; - std::map, int> sentChange, recChange; - bool canShrink = false; + struct StepAuxData { + CostType newCost_; + std::map, int> sentChange_, recChange_; + bool canShrink_ = false; }; private: - BspSchedule *schedule; - cost_type cost = 0; + BspSchedule *schedule_; + CostType cost_ = 0; // Main parameters for runnign algorithm - bool shrink = true; - bool steepestAscent = false; + bool shrink_ = true; + bool steepestAscent_ = false; // aux data structures - std::vector>> supsteplists; - std::vector>> canMove; - std::vector>> moveOptions; - std::vector>::iterator>>> movePointer; - std::vector>> succSteps; - std::vector> workCost, sent, received, commCost; - std::vector>> workCostList, commCostList; - std::vector>::iterator>> workCostPointer, commCostPointer; - std::vector::iterator> supStepListPointer; - std::pair>::iterator> nextMove; - bool HCwithLatency = true; + std::vector>> supsteplists_; + std::vector>> canMove_; + std::vector>> moveOptions_; + std::vector>::iterator>>> movePointer_; + std::vector>> succSteps_; + std::vector> workCost_, sent_, received_, commCost_; + std::vector>> workCostList_, commCostList_; + std::vector>::iterator>> workCostPointer_, commCostPointer_; + std::vector::iterator> supStepListPointer_; + std::pair>::iterator> nextMove_; + bool hcWithLatency_ = true; // for improved candidate selection - std::deque> promisingMoves; - bool findPromisingMoves = true; + std::deque> promisingMoves_; + bool findPromisingMoves_ = true; // Initialize data structures (based on current schedule) void Init(); - void updatePromisingMoves(); + void UpdatePromisingMoves(); // Functions to compute and update the std::list of possible moves - void updateNodeMovesEarlier(vertex_idx node); - void updateNodeMovesAt(vertex_idx node); - void updateNodeMovesLater(vertex_idx node); - void updateNodeMoves(vertex_idx node); - void updateMoveOptions(vertex_idx node, int where); + void UpdateNodeMovesEarlier(VertexIdx node); + void UpdateNodeMovesAt(VertexIdx node); + void UpdateNodeMovesLater(VertexIdx node); + void UpdateNodeMoves(VertexIdx node); + void UpdateMoveOptions(VertexIdx node, int where); - void addMoveOption(vertex_idx node, unsigned p, Direction dir); + void AddMoveOption(VertexIdx node, unsigned proc, Direction dir); - void eraseMoveOption(vertex_idx node, unsigned p, Direction dir); - void eraseMoveOptionsEarlier(vertex_idx node); - void eraseMoveOptionsAt(vertex_idx node); - void eraseMoveOptionsLater(vertex_idx node); - void eraseMoveOptions(vertex_idx node); + void EraseMoveOption(VertexIdx node, unsigned proc, Direction dir); + void EraseMoveOptionsEarlier(VertexIdx node); + void EraseMoveOptionsAt(VertexIdx node); + void EraseMoveOptionsLater(VertexIdx node); + void EraseMoveOptions(VertexIdx node); // Create superstep lists (for convenience) for a BSP schedule void CreateSupstepLists(); // For memory constraints - bool use_memory_constraint = false; - std::vector>> memory_used; - bool violatesMemConstraint(vertex_idx node, unsigned processor, int where); + bool useMemoryConstraint_ = false; + std::vector>> memoryUsed_; + bool ViolatesMemConstraint(VertexIdx node, unsigned processor, int where); // Compute the cost change incurred by a potential move - int moveCostChange(vertex_idx node, unsigned p, int where, stepAuxData &changing); + int MoveCostChange(VertexIdx node, unsigned proc, int where, StepAuxData &changing); // Execute a chosen move, updating the schedule and the data structures - void executeMove(vertex_idx node, unsigned newProc, int where, const stepAuxData &changing); + void ExecuteMove(VertexIdx node, unsigned newProc, int where, const StepAuxData &changing); // Single hill climbing step bool Improve(); public: - HillClimbingScheduler() : ImprovementScheduler() {} + HillClimbingScheduler() : ImprovementScheduler() {} virtual ~HillClimbingScheduler() = default; - virtual RETURN_STATUS improveSchedule(BspSchedule &input_schedule) override; + virtual ReturnStatus ImproveSchedule(BspSchedule &inputSchedule) override; // call with time/step limits - virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule &input_schedule) override; - virtual RETURN_STATUS improveScheduleWithStepLimit(BspSchedule &input_schedule, const unsigned stepLimit = 10); + virtual ReturnStatus ImproveScheduleWithTimeLimit(BspSchedule &inputSchedule) override; + virtual ReturnStatus ImproveScheduleWithStepLimit(BspSchedule &inputSchedule, const unsigned stepLimit = 10); // setting parameters - void setSteepestAscend(bool steepestAscent_) { steepestAscent = steepestAscent_; } + void SetSteepestAscend(bool steepestAscent) { steepestAscent_ = steepestAscent; } - void setShrink(bool shrink_) { shrink = shrink_; } + void SetShrink(bool shrink) { shrink_ = shrink; } - virtual std::string getScheduleName() const override { return "HillClimbing"; } + virtual std::string GetScheduleName() const override { return "HillClimbing"; } }; -template -RETURN_STATUS HillClimbingScheduler::improveSchedule(BspSchedule &input_schedule) { - ImprovementScheduler::setTimeLimitSeconds(600U); - return improveScheduleWithTimeLimit(input_schedule); +template +ReturnStatus HillClimbingScheduler::ImproveSchedule(BspSchedule &inputSchedule) { + ImprovementScheduler::SetTimeLimitSeconds(600U); + return ImproveScheduleWithTimeLimit(inputSchedule); } // Main method for hill climbing (with time limit) -template -RETURN_STATUS HillClimbingScheduler::improveScheduleWithTimeLimit(BspSchedule &input_schedule) { - schedule = &input_schedule; +template +ReturnStatus HillClimbingScheduler::ImproveScheduleWithTimeLimit(BspSchedule &inputSchedule) { + schedule_ = &inputSchedule; CreateSupstepLists(); Init(); @@ -150,21 +150,21 @@ RETURN_STATUS HillClimbingScheduler::improveScheduleWithTimeLimit(BspSc counter = 0; std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); const auto elapsed = std::chrono::duration_cast(now - startTime).count(); - if (elapsed >= ImprovementScheduler::timeLimitSeconds) { + if (elapsed >= ImprovementScheduler::timeLimitSeconds_) { std::cout << "Hill Climbing was shut down due to time limit." << std::endl; break; } } } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } -template +template // Hill climbing with step limit (designed as an ingredient for multilevel algorithms, no safety checks) -RETURN_STATUS HillClimbingScheduler::improveScheduleWithStepLimit(BspSchedule &input_schedule, - const unsigned stepLimit) { - schedule = &input_schedule; +ReturnStatus HillClimbingScheduler::ImproveScheduleWithStepLimit(BspSchedule &inputSchedule, + const unsigned stepLimit) { + schedule_ = &inputSchedule; CreateSupstepLists(); Init(); @@ -174,231 +174,231 @@ RETURN_STATUS HillClimbingScheduler::improveScheduleWithStepLimit(BspSc } } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } -template -void HillClimbingScheduler::Init() { - if (shrink) { - schedule->shrinkByMergingSupersteps(); +template +void HillClimbingScheduler::Init() { + if (shrink_) { + schedule_->ShrinkByMergingSupersteps(); CreateSupstepLists(); } - const vertex_idx N = schedule->getInstance().getComputationalDag().num_vertices(); - const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); - const unsigned M = schedule->numberOfSupersteps(); - const Graph_t &G = schedule->getInstance().getComputationalDag(); + const VertexIdx n = schedule_->GetInstance().GetComputationalDag().NumVertices(); + const unsigned p = schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); + const unsigned m = schedule_->NumberOfSupersteps(); + const GraphT &g = schedule_->GetInstance().GetComputationalDag(); // Movement options - canMove.clear(); - canMove.resize(NumDirections, std::vector>(N, std::vector(P, false))); - moveOptions.clear(); - moveOptions.resize(NumDirections); - movePointer.clear(); - movePointer.resize(NumDirections, - std::vector>::iterator>>( - N, std::vector>::iterator>(P))); + canMove_.clear(); + canMove_.resize(numDirections_, std::vector>(n, std::vector(p, false))); + moveOptions_.clear(); + moveOptions_.resize(numDirections_); + movePointer_.clear(); + movePointer_.resize(numDirections_, + std::vector>::iterator>>( + n, std::vector>::iterator>(p))); // Value use lists - succSteps.clear(); - succSteps.resize(N, std::vector>(P)); - for (vertex_idx node = 0; node < N; ++node) { - for (const vertex_idx &succ : G.children(node)) { - if (succSteps[node][schedule->assignedProcessor(succ)].find(schedule->assignedSuperstep(succ)) - == succSteps[node][schedule->assignedProcessor(succ)].end()) { - succSteps[node][schedule->assignedProcessor(succ)].insert({schedule->assignedSuperstep(succ), 1U}); + succSteps_.clear(); + succSteps_.resize(n, std::vector>(p)); + for (VertexIdx node = 0; node < n; ++node) { + for (const VertexIdx &succ : g.Children(node)) { + if (succSteps_[node][schedule_->AssignedProcessor(succ)].find(schedule_->AssignedSuperstep(succ)) + == succSteps_[node][schedule_->AssignedProcessor(succ)].end()) { + succSteps_[node][schedule_->AssignedProcessor(succ)].insert({schedule_->AssignedSuperstep(succ), 1U}); } else { - succSteps[node][schedule->assignedProcessor(succ)].at(schedule->assignedSuperstep(succ)) += 1; + succSteps_[node][schedule_->AssignedProcessor(succ)].at(schedule_->AssignedSuperstep(succ)) += 1; } } } // Cost data - workCost.clear(); - workCost.resize(M, std::vector(P, 0)); - sent.clear(); - sent.resize(M - 1, std::vector(P, 0)); - received.clear(); - received.resize(M - 1, std::vector(P, 0)); - commCost.clear(); - commCost.resize(M - 1, std::vector(P)); - - workCostList.clear(); - workCostList.resize(M); - commCostList.clear(); - commCostList.resize(M - 1); - workCostPointer.clear(); - workCostPointer.resize(M, std::vector>::iterator>(P)); - commCostPointer.clear(); - commCostPointer.resize(M - 1, std::vector>::iterator>(P)); + workCost_.clear(); + workCost_.resize(m, std::vector(p, 0)); + sent_.clear(); + sent_.resize(m - 1, std::vector(p, 0)); + received_.clear(); + received_.resize(m - 1, std::vector(p, 0)); + commCost_.clear(); + commCost_.resize(m - 1, std::vector(p)); + + workCostList_.clear(); + workCostList_.resize(m); + commCostList_.clear(); + commCostList_.resize(m - 1); + workCostPointer_.clear(); + workCostPointer_.resize(m, std::vector>::iterator>(p)); + commCostPointer_.clear(); + commCostPointer_.resize(m - 1, std::vector>::iterator>(p)); // Supstep std::list pointers - supStepListPointer.clear(); - supStepListPointer.resize(N); - for (unsigned step = 0; step < M; ++step) { - for (unsigned proc = 0; proc < P; ++proc) { - for (auto it = supsteplists[step][proc].begin(); it != supsteplists[step][proc].end(); ++it) { - supStepListPointer[*it] = it; + supStepListPointer_.clear(); + supStepListPointer_.resize(n); + for (unsigned step = 0; step < m; ++step) { + for (unsigned proc = 0; proc < p; ++proc) { + for (auto it = supsteplists_[step][proc].begin(); it != supsteplists_[step][proc].end(); ++it) { + supStepListPointer_[*it] = it; } } } // Compute movement options - for (vertex_idx node = 0; node < N; ++node) { - updateNodeMoves(node); + for (VertexIdx node = 0; node < n; ++node) { + UpdateNodeMoves(node); } - nextMove.first = 0; - nextMove.second = moveOptions[0].begin(); + nextMove_.first = 0; + nextMove_.second = moveOptions_[0].begin(); // Compute cost data - std::vector work_cost(M, 0); - for (unsigned step = 0; step < M; ++step) { - for (unsigned proc = 0; proc < P; ++proc) { - for (const vertex_idx node : supsteplists[step][proc]) { - workCost[step][proc] += schedule->getInstance().getComputationalDag().vertex_work_weight(node); + std::vector workCost(m, 0); + for (unsigned step = 0; step < m; ++step) { + for (unsigned proc = 0; proc < p; ++proc) { + for (const VertexIdx node : supsteplists_[step][proc]) { + workCost_[step][proc] += schedule_->GetInstance().GetComputationalDag().VertexWorkWeight(node); } - std::pair entry(workCost[step][proc], proc); - workCostPointer[step][proc] = workCostList[step].insert(entry).first; + std::pair entry(workCost_[step][proc], proc); + workCostPointer_[step][proc] = workCostList_[step].insert(entry).first; } - work_cost[step] = (--workCostList[step].end())->first; + workCost[step] = (--workCostList_[step].end())->first; } - cost = work_cost[0]; - std::vector> present(N, std::vector(P, false)); - for (unsigned step = 0; step < M - schedule->getStaleness(); ++step) { - for (unsigned proc = 0; proc < P; ++proc) { - for (const vertex_idx node : supsteplists[step + schedule->getStaleness()][proc]) { - for (const vertex_idx &pred : G.parents(node)) { - if (schedule->assignedProcessor(node) != schedule->assignedProcessor(pred) - && !present[pred][schedule->assignedProcessor(node)]) { - present[pred][schedule->assignedProcessor(node)] = true; - sent[step][schedule->assignedProcessor(pred)] - += schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), - schedule->assignedProcessor(node)); - received[step][schedule->assignedProcessor(node)] - += schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), - schedule->assignedProcessor(node)); + cost_ = workCost[0]; + std::vector> present(n, std::vector(p, false)); + for (unsigned step = 0; step < m - schedule_->GetStaleness(); ++step) { + for (unsigned proc = 0; proc < p; ++proc) { + for (const VertexIdx node : supsteplists_[step + schedule_->GetStaleness()][proc]) { + for (const VertexIdx &pred : g.Parents(node)) { + if (schedule_->AssignedProcessor(node) != schedule_->AssignedProcessor(pred) + && !present[pred][schedule_->AssignedProcessor(node)]) { + present[pred][schedule_->AssignedProcessor(node)] = true; + sent_[step][schedule_->AssignedProcessor(pred)] + += schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts(schedule_->AssignedProcessor(pred), + schedule_->AssignedProcessor(node)); + received_[step][schedule_->AssignedProcessor(node)] + += schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts(schedule_->AssignedProcessor(pred), + schedule_->AssignedProcessor(node)); } } } } } - for (unsigned step = 0; step < M - 1; ++step) { - for (unsigned proc = 0; proc < P; ++proc) { - commCost[step][proc] = std::max(sent[step][proc], received[step][proc]); - std::pair entry(commCost[step][proc], proc); - commCostPointer[step][proc] = commCostList[step].insert(entry).first; + for (unsigned step = 0; step < m - 1; ++step) { + for (unsigned proc = 0; proc < p; ++proc) { + commCost_[step][proc] = std::max(sent_[step][proc], received_[step][proc]); + std::pair entry(commCost_[step][proc], proc); + commCostPointer_[step][proc] = commCostList_[step].insert(entry).first; } - cost_type comm_cost = schedule->getInstance().getArchitecture().communicationCosts() * commCostList[step].rbegin()->first; - cost_type sync_cost = (comm_cost > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; + CostType commCost = schedule_->GetInstance().GetArchitecture().CommunicationCosts() * commCostList_[step].rbegin()->first; + CostType syncCost = (commCost > 0) ? schedule_->GetInstance().GetArchitecture().SynchronisationCosts() : 0; - if (schedule->getStaleness() == 1) { - cost += comm_cost + work_cost[step + 1] + sync_cost; + if (schedule_->GetStaleness() == 1) { + cost_ += commCost + workCost[step + 1] + syncCost; } else { - cost += std::max(comm_cost, work_cost[step + 1]) + sync_cost; + cost_ += std::max(commCost, workCost[step + 1]) + syncCost; } } - updatePromisingMoves(); + UpdatePromisingMoves(); // memory_constraints - if (use_memory_constraint) { - memory_used.clear(); - memory_used.resize(P, std::vector>(M, 0)); - for (vertex_idx node = 0; node < N; ++node) { - memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)] - += schedule->getInstance().getComputationalDag().vertex_mem_weight(node); + if (useMemoryConstraint_) { + memoryUsed_.clear(); + memoryUsed_.resize(p, std::vector>(m, 0)); + for (VertexIdx node = 0; node < n; ++node) { + memoryUsed_[schedule_->AssignedProcessor(node)][schedule_->AssignedSuperstep(node)] + += schedule_->GetInstance().GetComputationalDag().VertexMemWeight(node); } } } -template -void HillClimbingScheduler::updatePromisingMoves() { - if (!findPromisingMoves) { +template +void HillClimbingScheduler::UpdatePromisingMoves() { + if (!findPromisingMoves_) { return; } - const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); - const Graph_t &G = schedule->getInstance().getComputationalDag(); + const unsigned p = schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); + const GraphT &g = schedule_->GetInstance().GetComputationalDag(); - promisingMoves.clear(); - for (vertex_idx node = 0; node < schedule->getInstance().getComputationalDag().num_vertices(); ++node) { - std::vector nrPredOnProc(P, 0); - for (const vertex_idx &pred : G.parents(node)) { - ++nrPredOnProc[schedule->assignedProcessor(pred)]; + promisingMoves_.clear(); + for (VertexIdx node = 0; node < schedule_->GetInstance().GetComputationalDag().NumVertices(); ++node) { + std::vector nrPredOnProc(p, 0); + for (const VertexIdx &pred : g.Parents(node)) { + ++nrPredOnProc[schedule_->AssignedProcessor(pred)]; } unsigned otherProcUsed = 0; - for (unsigned proc = 0; proc < P; ++proc) { - if (schedule->assignedProcessor(node) != proc && nrPredOnProc[proc] > 0) { + for (unsigned proc = 0; proc < p; ++proc) { + if (schedule_->AssignedProcessor(node) != proc && nrPredOnProc[proc] > 0) { ++otherProcUsed; } } if (otherProcUsed == 1) { - for (unsigned proc = 0; proc < P; ++proc) { - if (schedule->assignedProcessor(node) != proc && nrPredOnProc[proc] > 0 - && schedule->getInstance().isCompatible(node, proc)) { - promisingMoves.push_back(std::make_tuple(node, proc, EARLIER)); - promisingMoves.push_back(std::make_tuple(node, proc, AT)); - promisingMoves.push_back(std::make_tuple(node, proc, LATER)); + for (unsigned proc = 0; proc < p; ++proc) { + if (schedule_->AssignedProcessor(node) != proc && nrPredOnProc[proc] > 0 + && schedule_->GetInstance().IsCompatible(node, proc)) { + promisingMoves_.push_back(std::make_tuple(node, proc, EARLIER)); + promisingMoves_.push_back(std::make_tuple(node, proc, AT)); + promisingMoves_.push_back(std::make_tuple(node, proc, LATER)); } } } - std::vector nrSuccOnProc(P, 0); - for (const vertex_idx &succ : G.children(node)) { - ++nrSuccOnProc[schedule->assignedProcessor(succ)]; + std::vector nrSuccOnProc(p, 0); + for (const VertexIdx &succ : g.Children(node)) { + ++nrSuccOnProc[schedule_->AssignedProcessor(succ)]; } otherProcUsed = 0; - for (unsigned proc = 0; proc < P; ++proc) { - if (schedule->assignedProcessor(node) != proc && nrSuccOnProc[proc] > 0) { + for (unsigned proc = 0; proc < p; ++proc) { + if (schedule_->AssignedProcessor(node) != proc && nrSuccOnProc[proc] > 0) { ++otherProcUsed; } } if (otherProcUsed == 1) { - for (unsigned proc = 0; proc < P; ++proc) { - if (schedule->assignedProcessor(node) != proc && nrSuccOnProc[proc] > 0 - && schedule->getInstance().isCompatible(node, proc)) { - promisingMoves.push_back(std::make_tuple(node, proc, EARLIER)); - promisingMoves.push_back(std::make_tuple(node, proc, AT)); - promisingMoves.push_back(std::make_tuple(node, proc, LATER)); + for (unsigned proc = 0; proc < p; ++proc) { + if (schedule_->AssignedProcessor(node) != proc && nrSuccOnProc[proc] > 0 + && schedule_->GetInstance().IsCompatible(node, proc)) { + promisingMoves_.push_back(std::make_tuple(node, proc, EARLIER)); + promisingMoves_.push_back(std::make_tuple(node, proc, AT)); + promisingMoves_.push_back(std::make_tuple(node, proc, LATER)); } } } } - for (unsigned step = 0; step < schedule->numberOfSupersteps(); ++step) { + for (unsigned step = 0; step < schedule_->NumberOfSupersteps(); ++step) { std::list minProcs, maxProcs; - cost_type minWork = std::numeric_limits::max(), maxWork = std::numeric_limits::min(); - for (unsigned proc = 0; proc < P; ++proc) { - if (workCost[step][proc] > maxWork) { - maxWork = workCost[step][proc]; + CostType minWork = std::numeric_limits::max(), maxWork = std::numeric_limits::min(); + for (unsigned proc = 0; proc < p; ++proc) { + if (workCost_[step][proc] > maxWork) { + maxWork = workCost_[step][proc]; } - if (workCost[step][proc] < minWork) { - minWork = workCost[step][proc]; + if (workCost_[step][proc] < minWork) { + minWork = workCost_[step][proc]; } } - for (unsigned proc = 0; proc < P; ++proc) { - if (workCost[step][proc] == minWork) { + for (unsigned proc = 0; proc < p; ++proc) { + if (workCost_[step][proc] == minWork) { minProcs.push_back(proc); } - if (workCost[step][proc] == maxWork) { + if (workCost_[step][proc] == maxWork) { maxProcs.push_back(proc); } } for (unsigned to : minProcs) { for (unsigned from : maxProcs) { - for (vertex_idx node : supsteplists[step][from]) { - if (schedule->getInstance().isCompatible(node, to)) { - promisingMoves.push_back(std::make_tuple(node, to, AT)); + for (VertexIdx node : supsteplists_[step][from]) { + if (schedule_->GetInstance().IsCompatible(node, to)) { + promisingMoves_.push_back(std::make_tuple(node, to, AT)); } } } @@ -407,26 +407,26 @@ void HillClimbingScheduler::updatePromisingMoves() { } // Functions to compute and update the std::list of possible moves -template -void HillClimbingScheduler::updateNodeMovesEarlier(const vertex_idx node) { - if (schedule->assignedSuperstep(node) == 0) { +template +void HillClimbingScheduler::UpdateNodeMovesEarlier(const VertexIdx node) { + if (schedule_->AssignedSuperstep(node) == 0) { return; } std::set predProc; - for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) { - if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node)) { + for (const VertexIdx &pred : schedule_->GetInstance().GetComputationalDag().Parents(node)) { + if (schedule_->AssignedSuperstep(pred) == schedule_->AssignedSuperstep(node)) { return; } - if (static_cast(schedule->assignedSuperstep(pred)) - >= static_cast(schedule->assignedSuperstep(node)) - static_cast(schedule->getStaleness())) { - predProc.insert(schedule->assignedProcessor(pred)); + if (static_cast(schedule_->AssignedSuperstep(pred)) + >= static_cast(schedule_->AssignedSuperstep(node)) - static_cast(schedule_->GetStaleness())) { + predProc.insert(schedule_->AssignedProcessor(pred)); } } - if (schedule->getStaleness() == 2) { - for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) { - if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node)) { - predProc.insert(schedule->assignedProcessor(succ)); + if (schedule_->GetStaleness() == 2) { + for (const VertexIdx &succ : schedule_->GetInstance().GetComputationalDag().Children(node)) { + if (schedule_->AssignedSuperstep(succ) == schedule_->AssignedSuperstep(node)) { + predProc.insert(schedule_->AssignedProcessor(succ)); } } } @@ -436,55 +436,55 @@ void HillClimbingScheduler::updateNodeMovesEarlier(const vertex_idx nod } if (predProc.size() == 1) { - addMoveOption(node, *predProc.begin(), EARLIER); + AddMoveOption(node, *predProc.begin(), EARLIER); } else { - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { - addMoveOption(node, proc, EARLIER); + for (unsigned proc = 0; proc < schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); ++proc) { + AddMoveOption(node, proc, EARLIER); } } } -template -void HillClimbingScheduler::updateNodeMovesAt(const vertex_idx node) { - for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) { - if (static_cast(schedule->assignedSuperstep(pred)) - >= static_cast(schedule->assignedSuperstep(node)) - static_cast(schedule->getStaleness()) + 1) { +template +void HillClimbingScheduler::UpdateNodeMovesAt(const VertexIdx node) { + for (const VertexIdx &pred : schedule_->GetInstance().GetComputationalDag().Parents(node)) { + if (static_cast(schedule_->AssignedSuperstep(pred)) + >= static_cast(schedule_->AssignedSuperstep(node)) - static_cast(schedule_->GetStaleness()) + 1) { return; } } - for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) { - if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness() - 1) { + for (const VertexIdx &succ : schedule_->GetInstance().GetComputationalDag().Children(node)) { + if (schedule_->AssignedSuperstep(succ) <= schedule_->AssignedSuperstep(node) + schedule_->GetStaleness() - 1) { return; } } - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { - if (proc != schedule->assignedProcessor(node)) { - addMoveOption(node, proc, AT); + for (unsigned proc = 0; proc < schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); ++proc) { + if (proc != schedule_->AssignedProcessor(node)) { + AddMoveOption(node, proc, AT); } } } -template -void HillClimbingScheduler::updateNodeMovesLater(const vertex_idx node) { - if (schedule->assignedSuperstep(node) == schedule->numberOfSupersteps() - 1) { +template +void HillClimbingScheduler::UpdateNodeMovesLater(const VertexIdx node) { + if (schedule_->AssignedSuperstep(node) == schedule_->NumberOfSupersteps() - 1) { return; } std::set succProc; - for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) { - if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node)) { + for (const VertexIdx &succ : schedule_->GetInstance().GetComputationalDag().Children(node)) { + if (schedule_->AssignedSuperstep(succ) == schedule_->AssignedSuperstep(node)) { return; } - if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness()) { - succProc.insert(schedule->assignedProcessor(succ)); + if (schedule_->AssignedSuperstep(succ) <= schedule_->AssignedSuperstep(node) + schedule_->GetStaleness()) { + succProc.insert(schedule_->AssignedProcessor(succ)); } } - if (schedule->getStaleness() == 2) { - for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) { - if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node)) { - succProc.insert(schedule->assignedProcessor(pred)); + if (schedule_->GetStaleness() == 2) { + for (const VertexIdx &pred : schedule_->GetInstance().GetComputationalDag().Parents(node)) { + if (schedule_->AssignedSuperstep(pred) == schedule_->AssignedSuperstep(node)) { + succProc.insert(schedule_->AssignedProcessor(pred)); } } } @@ -494,154 +494,154 @@ void HillClimbingScheduler::updateNodeMovesLater(const vertex_idx node) } if (succProc.size() == 1) { - addMoveOption(node, *succProc.begin(), LATER); + AddMoveOption(node, *succProc.begin(), LATER); } else { - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { - addMoveOption(node, proc, LATER); + for (unsigned proc = 0; proc < schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); ++proc) { + AddMoveOption(node, proc, LATER); } } } -template -void HillClimbingScheduler::updateNodeMoves(const vertex_idx node) { - eraseMoveOptions(node); - updateNodeMovesEarlier(node); - updateNodeMovesAt(node); - updateNodeMovesLater(node); +template +void HillClimbingScheduler::UpdateNodeMoves(const VertexIdx node) { + EraseMoveOptions(node); + UpdateNodeMovesEarlier(node); + UpdateNodeMovesAt(node); + UpdateNodeMovesLater(node); } -template -void HillClimbingScheduler::updateMoveOptions(vertex_idx node, int where) { - const Graph_t &G = schedule->getInstance().getComputationalDag(); +template +void HillClimbingScheduler::UpdateMoveOptions(VertexIdx node, int where) { + const GraphT &g = schedule_->GetInstance().GetComputationalDag(); - updateNodeMoves(node); + UpdateNodeMoves(node); if (where == 0) { - for (const vertex_idx &pred : G.parents(node)) { - eraseMoveOptionsLater(pred); - updateNodeMovesLater(pred); + for (const VertexIdx &pred : g.Parents(node)) { + EraseMoveOptionsLater(pred); + UpdateNodeMovesLater(pred); } - for (const vertex_idx &succ : G.children(node)) { - eraseMoveOptionsEarlier(succ); - updateNodeMovesEarlier(succ); + for (const VertexIdx &succ : g.Children(node)) { + EraseMoveOptionsEarlier(succ); + UpdateNodeMovesEarlier(succ); } } if (where == -1) { - for (const vertex_idx &pred : G.parents(node)) { - eraseMoveOptionsLater(pred); - updateNodeMovesLater(pred); - eraseMoveOptionsAt(pred); - updateNodeMovesAt(pred); - if (schedule->getStaleness() == 2) { - eraseMoveOptionsEarlier(pred); - updateNodeMovesEarlier(pred); + for (const VertexIdx &pred : g.Parents(node)) { + EraseMoveOptionsLater(pred); + UpdateNodeMovesLater(pred); + EraseMoveOptionsAt(pred); + UpdateNodeMovesAt(pred); + if (schedule_->GetStaleness() == 2) { + EraseMoveOptionsEarlier(pred); + UpdateNodeMovesEarlier(pred); } } - for (const vertex_idx &succ : G.children(node)) { - eraseMoveOptionsEarlier(succ); - updateNodeMovesEarlier(succ); - if (schedule->getStaleness() == 2) { - eraseMoveOptionsAt(succ); - updateNodeMovesAt(succ); + for (const VertexIdx &succ : g.Children(node)) { + EraseMoveOptionsEarlier(succ); + UpdateNodeMovesEarlier(succ); + if (schedule_->GetStaleness() == 2) { + EraseMoveOptionsAt(succ); + UpdateNodeMovesAt(succ); } } } if (where == 1) { - for (const vertex_idx &pred : G.parents(node)) { - eraseMoveOptionsLater(pred); - updateNodeMovesLater(pred); - if (schedule->getStaleness() == 2) { - eraseMoveOptionsAt(pred); - updateNodeMovesAt(pred); + for (const VertexIdx &pred : g.Parents(node)) { + EraseMoveOptionsLater(pred); + UpdateNodeMovesLater(pred); + if (schedule_->GetStaleness() == 2) { + EraseMoveOptionsAt(pred); + UpdateNodeMovesAt(pred); } } - for (const vertex_idx &succ : G.children(node)) { - eraseMoveOptionsEarlier(succ); - updateNodeMovesEarlier(succ); - eraseMoveOptionsAt(succ); - updateNodeMovesAt(succ); - if (schedule->getStaleness() == 2) { - eraseMoveOptionsLater(succ); - updateNodeMovesLater(succ); + for (const VertexIdx &succ : g.Children(node)) { + EraseMoveOptionsEarlier(succ); + UpdateNodeMovesEarlier(succ); + EraseMoveOptionsAt(succ); + UpdateNodeMovesAt(succ); + if (schedule_->GetStaleness() == 2) { + EraseMoveOptionsLater(succ); + UpdateNodeMovesLater(succ); } } } } -template -void HillClimbingScheduler::addMoveOption(const vertex_idx node, const unsigned p, const Direction dir) { - if (!canMove[dir][node][p] && schedule->getInstance().isCompatible(node, p)) { - canMove[dir][node][p] = true; - moveOptions[dir].emplace_back(node, p); - movePointer[dir][node][p] = --moveOptions[dir].end(); +template +void HillClimbingScheduler::AddMoveOption(const VertexIdx node, const unsigned proc, const Direction dir) { + if (!canMove_[dir][node][proc] && schedule_->GetInstance().IsCompatible(node, proc)) { + canMove_[dir][node][proc] = true; + moveOptions_[dir].emplace_back(node, proc); + movePointer_[dir][node][proc] = --moveOptions_[dir].end(); } } -template -void HillClimbingScheduler::eraseMoveOption(vertex_idx node, unsigned p, Direction dir) { - canMove[dir][node][p] = false; - if (nextMove.first == dir && nextMove.second->first == node && nextMove.second->second == p) { - ++nextMove.second; +template +void HillClimbingScheduler::EraseMoveOption(VertexIdx node, unsigned proc, Direction dir) { + canMove_[dir][node][proc] = false; + if (nextMove_.first == dir && nextMove_.second->first == node && nextMove_.second->second == proc) { + ++nextMove_.second; } - moveOptions[dir].erase(movePointer[dir][node][p]); + moveOptions_[dir].erase(movePointer_[dir][node][proc]); } -template -void HillClimbingScheduler::eraseMoveOptionsEarlier(vertex_idx node) { - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { - if (canMove[EARLIER][node][proc]) { - eraseMoveOption(node, proc, EARLIER); +template +void HillClimbingScheduler::EraseMoveOptionsEarlier(VertexIdx node) { + for (unsigned proc = 0; proc < schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); ++proc) { + if (canMove_[EARLIER][node][proc]) { + EraseMoveOption(node, proc, EARLIER); } } } -template -void HillClimbingScheduler::eraseMoveOptionsAt(vertex_idx node) { - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { - if (canMove[AT][node][proc]) { - eraseMoveOption(node, proc, AT); +template +void HillClimbingScheduler::EraseMoveOptionsAt(VertexIdx node) { + for (unsigned proc = 0; proc < schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); ++proc) { + if (canMove_[AT][node][proc]) { + EraseMoveOption(node, proc, AT); } } } -template -void HillClimbingScheduler::eraseMoveOptionsLater(vertex_idx node) { - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { - if (canMove[LATER][node][proc]) { - eraseMoveOption(node, proc, LATER); +template +void HillClimbingScheduler::EraseMoveOptionsLater(VertexIdx node) { + for (unsigned proc = 0; proc < schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); ++proc) { + if (canMove_[LATER][node][proc]) { + EraseMoveOption(node, proc, LATER); } } } -template -void HillClimbingScheduler::eraseMoveOptions(vertex_idx node) { - eraseMoveOptionsEarlier(node); - eraseMoveOptionsAt(node); - eraseMoveOptionsLater(node); +template +void HillClimbingScheduler::EraseMoveOptions(VertexIdx node) { + EraseMoveOptionsEarlier(node); + EraseMoveOptionsAt(node); + EraseMoveOptionsLater(node); } // Compute the cost change incurred by a potential move -template -int HillClimbingScheduler::moveCostChange(const vertex_idx node, unsigned p, const int where, stepAuxData &changing) { - const unsigned step = schedule->assignedSuperstep(node); - const unsigned new_step = static_cast(static_cast(step) + where); - unsigned oldProc = schedule->assignedProcessor(node); +template +int HillClimbingScheduler::MoveCostChange(const VertexIdx node, unsigned proc, const int where, StepAuxData &changing) { + const unsigned step = schedule_->AssignedSuperstep(node); + const unsigned newStep = static_cast(static_cast(step) + where); + unsigned oldProc = schedule_->AssignedProcessor(node); int change = 0; - const Graph_t &G = schedule->getInstance().getComputationalDag(); + const GraphT &g = schedule_->GetInstance().GetComputationalDag(); std::set affectedSteps; // Work cost change - std::map newWorkCost; - const auto itBest = --workCostList[step].end(); - cost_type maxAfterRemoval = itBest->first; + std::map newWorkCost; + const auto itBest = --workCostList_[step].end(); + CostType maxAfterRemoval = itBest->first; if (itBest->second == oldProc) { auto itNext = itBest; --itNext; maxAfterRemoval - = std::max(itBest->first - schedule->getInstance().getComputationalDag().vertex_work_weight(node), itNext->first); + = std::max(itBest->first - schedule_->GetInstance().GetComputationalDag().VertexWorkWeight(node), itNext->first); if (itBest->first != maxAfterRemoval) { - if (step == 0 || schedule->getStaleness() == 1) { // incorporate immediately into cost change + if (step == 0 || schedule_->GetStaleness() == 1) { // incorporate immediately into cost change change -= static_cast(itBest->first) - static_cast(maxAfterRemoval); } else { newWorkCost[step] = maxAfterRemoval; @@ -650,360 +650,361 @@ int HillClimbingScheduler::moveCostChange(const vertex_idx node, unsign } } - const cost_type maxBeforeAddition = (where == 0) ? maxAfterRemoval : workCostList[new_step].rbegin()->first; - if (workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node) > maxBeforeAddition) { - if (new_step == 0 || schedule->getStaleness() == 1) { // incorporate immediately into cost change - change - += static_cast(workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node)) - - static_cast(maxBeforeAddition); + const CostType maxBeforeAddition = (where == 0) ? maxAfterRemoval : workCostList_[newStep].rbegin()->first; + if (workCost_[newStep][proc] + schedule_->GetInstance().GetComputationalDag().VertexWorkWeight(node) > maxBeforeAddition) { + if (newStep == 0 || schedule_->GetStaleness() == 1) { // incorporate immediately into cost change + change += static_cast(workCost_[newStep][proc] + + schedule_->GetInstance().GetComputationalDag().VertexWorkWeight(node)) + - static_cast(maxBeforeAddition); } else { - newWorkCost[new_step] = workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node); - affectedSteps.insert(new_step - 1); + newWorkCost[newStep] = workCost_[newStep][proc] + schedule_->GetInstance().GetComputationalDag().VertexWorkWeight(node); + affectedSteps.insert(newStep - 1); } } // Comm cost change std::list> sentInc, recInc; // -outputs - if (p != oldProc) { - for (unsigned j = 0; j < schedule->getInstance().getArchitecture().numberOfProcessors(); ++j) { - if (succSteps[node][j].empty()) { + if (proc != oldProc) { + for (unsigned j = 0; j < schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); ++j) { + if (succSteps_[node][j].empty()) { continue; } - unsigned affectedStep = succSteps[node][j].begin()->first - schedule->getStaleness(); - if (j == p) { + unsigned affectedStep = succSteps_[node][j].begin()->first - schedule_->GetStaleness(); + if (j == proc) { sentInc.emplace_back(affectedStep, oldProc, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(oldProc, j))); recInc.emplace_back(affectedStep, - p, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); + proc, + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(oldProc, j))); } else if (j == oldProc) { recInc.emplace_back(affectedStep, oldProc, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(p, j))); + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(proc, j))); sentInc.emplace_back(affectedStep, - p, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(p, j))); + proc, + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(proc, j))); } else { sentInc.emplace_back(affectedStep, oldProc, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(oldProc, j))); recInc.emplace_back(affectedStep, j, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(oldProc, j))); sentInc.emplace_back(affectedStep, - p, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(p, j))); + proc, + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(proc, j))); recInc.emplace_back(affectedStep, j, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(p, j))); + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(proc, j))); } } } // -inputs - if (p == oldProc) { - for (const vertex_idx &pred : G.parents(node)) { - if (schedule->assignedProcessor(pred) == p) { + if (proc == oldProc) { + for (const VertexIdx &pred : g.Parents(node)) { + if (schedule_->AssignedProcessor(pred) == proc) { continue; } - const auto firstUse = *succSteps[pred][p].begin(); + const auto firstUse = *succSteps_[pred][proc].begin(); const bool skip = firstUse.first < step || (firstUse.first == step && where >= 0 && firstUse.second > 1); if (!skip) { - sentInc.emplace_back(step - schedule->getStaleness(), - schedule->assignedProcessor(pred), - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts( - schedule->assignedProcessor(pred), p))); - recInc.emplace_back(step - schedule->getStaleness(), - p, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts( - schedule->assignedProcessor(pred), p))); - sentInc.emplace_back( - new_step - schedule->getStaleness(), - schedule->assignedProcessor(pred), - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); - recInc.emplace_back( - new_step - schedule->getStaleness(), - p, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); + sentInc.emplace_back(step - schedule_->GetStaleness(), + schedule_->AssignedProcessor(pred), + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), proc))); + recInc.emplace_back(step - schedule_->GetStaleness(), + proc, + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), proc))); + sentInc.emplace_back(newStep - schedule_->GetStaleness(), + schedule_->AssignedProcessor(pred), + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), proc))); + recInc.emplace_back(newStep - schedule_->GetStaleness(), + proc, + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), proc))); } } } else { - for (const vertex_idx &pred : G.parents(node)) { + for (const VertexIdx &pred : g.Parents(node)) { // Comm. cost of sending pred to oldProc - auto firstUse = succSteps[pred][oldProc].begin(); - bool skip = (schedule->assignedProcessor(pred) == oldProc) || firstUse->first < step + auto firstUse = succSteps_[pred][oldProc].begin(); + bool skip = (schedule_->AssignedProcessor(pred) == oldProc) || firstUse->first < step || (firstUse->first == step && firstUse->second > 1); if (!skip) { - sentInc.emplace_back(step - schedule->getStaleness(), - schedule->assignedProcessor(pred), - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts( - schedule->assignedProcessor(pred), oldProc))); - recInc.emplace_back(step - schedule->getStaleness(), + sentInc.emplace_back(step - schedule_->GetStaleness(), + schedule_->AssignedProcessor(pred), + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), oldProc))); + recInc.emplace_back(step - schedule_->GetStaleness(), oldProc, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts( - schedule->assignedProcessor(pred), oldProc))); + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), oldProc))); ++firstUse; - if (firstUse != succSteps[pred][oldProc].end()) { + if (firstUse != succSteps_[pred][oldProc].end()) { const unsigned nextStep = firstUse->first; - sentInc.emplace_back(nextStep - schedule->getStaleness(), - schedule->assignedProcessor(pred), - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts( - schedule->assignedProcessor(pred), oldProc))); - recInc.emplace_back(nextStep - schedule->getStaleness(), + sentInc.emplace_back(nextStep - schedule_->GetStaleness(), + schedule_->AssignedProcessor(pred), + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), oldProc))); + recInc.emplace_back(nextStep - schedule_->GetStaleness(), oldProc, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts( - schedule->assignedProcessor(pred), oldProc))); + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), oldProc))); } } - // Comm. cost of sending pred to p - firstUse = succSteps[pred][p].begin(); - skip = (schedule->assignedProcessor(pred) == p) - || ((firstUse != succSteps[pred][p].end()) && (firstUse->first <= new_step)); + // Comm. cost of sending pred to proc + firstUse = succSteps_[pred][proc].begin(); + skip = (schedule_->AssignedProcessor(pred) == proc) + || ((firstUse != succSteps_[pred][proc].end()) && (firstUse->first <= newStep)); if (!skip) { - sentInc.emplace_back( - new_step - schedule->getStaleness(), - schedule->assignedProcessor(pred), - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); - recInc.emplace_back( - new_step - schedule->getStaleness(), - p, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); - if (firstUse != succSteps[pred][p].end()) { - sentInc.emplace_back(firstUse->first - schedule->getStaleness(), - schedule->assignedProcessor(pred), - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts( - schedule->assignedProcessor(pred), p))); - recInc.emplace_back(firstUse->first - schedule->getStaleness(), - p, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) - * schedule->getInstance().getArchitecture().sendCosts( - schedule->assignedProcessor(pred), p))); + sentInc.emplace_back(newStep - schedule_->GetStaleness(), + schedule_->AssignedProcessor(pred), + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), proc))); + recInc.emplace_back(newStep - schedule_->GetStaleness(), + proc, + static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), proc))); + if (firstUse != succSteps_[pred][proc].end()) { + sentInc.emplace_back(firstUse->first - schedule_->GetStaleness(), + schedule_->AssignedProcessor(pred), + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), proc))); + recInc.emplace_back(firstUse->first - schedule_->GetStaleness(), + proc, + -static_cast(schedule_->GetInstance().GetComputationalDag().VertexCommWeight(pred) + * schedule_->GetInstance().GetArchitecture().SendCosts( + schedule_->AssignedProcessor(pred), proc))); } } } } // -process changes - changing.sentChange.clear(); - changing.recChange.clear(); + changing.sentChange_.clear(); + changing.recChange_.clear(); for (auto entry : sentInc) { - const unsigned e_step = std::get<0>(entry); - const unsigned e_proc = std::get<1>(entry); - const int e_increase = std::get<2>(entry); - affectedSteps.insert(e_step); - auto itr = changing.sentChange.find(std::make_pair(e_step, e_proc)); - if (itr == changing.sentChange.end()) { - changing.sentChange.insert({std::make_pair(e_step, e_proc), e_increase}); + const unsigned eStep = std::get<0>(entry); + const unsigned eProc = std::get<1>(entry); + const int eIncrease = std::get<2>(entry); + affectedSteps.insert(eStep); + auto itr = changing.sentChange_.find(std::make_pair(eStep, eProc)); + if (itr == changing.sentChange_.end()) { + changing.sentChange_.insert({std::make_pair(eStep, eProc), eIncrease}); } else { - itr->second += e_increase; + itr->second += eIncrease; } } for (auto entry : recInc) { - const unsigned e_step = std::get<0>(entry); - const unsigned e_proc = std::get<1>(entry); - const int e_increase = std::get<2>(entry); - affectedSteps.insert(e_step); - auto itr = changing.recChange.find(std::make_pair(e_step, e_proc)); - if (itr == changing.recChange.end()) { - changing.recChange.insert({std::make_pair(e_step, e_proc), e_increase}); + const unsigned eStep = std::get<0>(entry); + const unsigned eProc = std::get<1>(entry); + const int eIncrease = std::get<2>(entry); + affectedSteps.insert(eStep); + auto itr = changing.recChange_.find(std::make_pair(eStep, eProc)); + if (itr == changing.recChange_.end()) { + changing.recChange_.insert({std::make_pair(eStep, eProc), eIncrease}); } else { - itr->second += e_increase; + itr->second += eIncrease; } } - auto itrSent = changing.sentChange.begin(), itrRec = changing.recChange.begin(); - bool last_affected_empty = false; + auto itrSent = changing.sentChange_.begin(), itrRec = changing.recChange_.begin(); + bool lastAffectedEmpty = false; for (const unsigned sstep : affectedSteps) { - cost_type oldMax = schedule->getInstance().getArchitecture().communicationCosts() * commCostList[sstep].rbegin()->first; - cost_type oldSync = (HCwithLatency && oldMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; + CostType oldMax = schedule_->GetInstance().GetArchitecture().CommunicationCosts() * commCostList_[sstep].rbegin()->first; + CostType oldSync = (hcWithLatency_ && oldMax > 0) ? schedule_->GetInstance().GetArchitecture().SynchronisationCosts() : 0; - cost_type newMax = 0; - for (unsigned j = 0; j < schedule->getInstance().getArchitecture().numberOfProcessors(); ++j) { - int diff = (itrSent != changing.sentChange.end() && itrSent->first.first == sstep && itrSent->first.second == j) + CostType newMax = 0; + for (unsigned j = 0; j < schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); ++j) { + int diff = (itrSent != changing.sentChange_.end() && itrSent->first.first == sstep && itrSent->first.second == j) ? (itrSent++)->second : 0; - if (static_cast(sent[sstep][j]) + diff > static_cast(newMax)) { - newMax = static_cast(static_cast(sent[sstep][j]) + diff); + if (static_cast(sent_[sstep][j]) + diff > static_cast(newMax)) { + newMax = static_cast(static_cast(sent_[sstep][j]) + diff); } - diff = (itrRec != changing.recChange.end() && itrRec->first.first == sstep && itrRec->first.second == j) + diff = (itrRec != changing.recChange_.end() && itrRec->first.first == sstep && itrRec->first.second == j) ? (itrRec++)->second : 0; - if (static_cast(received[sstep][j]) + diff > static_cast(newMax)) { - newMax = static_cast(static_cast(received[sstep][j]) + diff); + if (static_cast(received_[sstep][j]) + diff > static_cast(newMax)) { + newMax = static_cast(static_cast(received_[sstep][j]) + diff); } } - newMax *= schedule->getInstance().getArchitecture().communicationCosts(); - cost_type newSync = (HCwithLatency && newMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; + newMax *= schedule_->GetInstance().GetArchitecture().CommunicationCosts(); + CostType newSync = (hcWithLatency_ && newMax > 0) ? schedule_->GetInstance().GetArchitecture().SynchronisationCosts() : 0; if (newMax == 0) { - if (schedule->getStaleness() == 1) { - changing.canShrink = true; + if (schedule_->GetStaleness() == 1) { + changing.canShrink_ = true; } else { if ((sstep > 0 && affectedSteps.find(sstep - 1) == affectedSteps.end() - && commCostList[sstep - 1].rbegin()->first == 0) - || (sstep < commCostList.size() - 1 && affectedSteps.find(sstep + 1) == affectedSteps.end() - && commCostList[sstep + 1].rbegin()->first == 0) - || (sstep > 0 && affectedSteps.find(sstep - 1) != affectedSteps.end() && last_affected_empty)) { - changing.canShrink = true; + && commCostList_[sstep - 1].rbegin()->first == 0) + || (sstep < commCostList_.size() - 1 && affectedSteps.find(sstep + 1) == affectedSteps.end() + && commCostList_[sstep + 1].rbegin()->first == 0) + || (sstep > 0 && affectedSteps.find(sstep - 1) != affectedSteps.end() && lastAffectedEmpty)) { + changing.canShrink_ = true; } } - last_affected_empty = true; + lastAffectedEmpty = true; } else { - last_affected_empty = false; + lastAffectedEmpty = false; } - if (schedule->getStaleness() == 2) { + if (schedule_->GetStaleness() == 2) { auto itrWork = newWorkCost.find(sstep + 1); - oldMax = std::max(oldMax, workCostList[sstep + 1].rbegin()->first); - newMax = std::max(newMax, itrWork != newWorkCost.end() ? itrWork->second : workCostList[sstep + 1].rbegin()->first); + oldMax = std::max(oldMax, workCostList_[sstep + 1].rbegin()->first); + newMax = std::max(newMax, itrWork != newWorkCost.end() ? itrWork->second : workCostList_[sstep + 1].rbegin()->first); } change += static_cast(newMax + newSync) - static_cast(oldMax + oldSync); } - changing.newCost = static_cast(static_cast(cost) + change); + changing.newCost_ = static_cast(static_cast(cost_) + change); return change; } // Execute a chosen move, updating the schedule and the data structures -template -void HillClimbingScheduler::executeMove(const vertex_idx node, - const unsigned newProc, - const int where, - const stepAuxData &changing) { - unsigned oldStep = schedule->assignedSuperstep(node); +template +void HillClimbingScheduler::ExecuteMove(const VertexIdx node, + const unsigned newProc, + const int where, + const StepAuxData &changing) { + unsigned oldStep = schedule_->AssignedSuperstep(node); unsigned newStep = static_cast(static_cast(oldStep) + where); - const unsigned oldProc = schedule->assignedProcessor(node); - cost = changing.newCost; + const unsigned oldProc = schedule_->AssignedProcessor(node); + cost_ = changing.newCost_; // Work cost change - workCostList[oldStep].erase(workCostPointer[oldStep][oldProc]); - workCost[oldStep][oldProc] -= schedule->getInstance().getComputationalDag().vertex_work_weight(node); - workCostPointer[oldStep][oldProc] = workCostList[oldStep].insert(std::make_pair(workCost[oldStep][oldProc], oldProc)).first; + workCostList_[oldStep].erase(workCostPointer_[oldStep][oldProc]); + workCost_[oldStep][oldProc] -= schedule_->GetInstance().GetComputationalDag().VertexWorkWeight(node); + workCostPointer_[oldStep][oldProc] = workCostList_[oldStep].insert(std::make_pair(workCost_[oldStep][oldProc], oldProc)).first; - workCostList[newStep].erase(workCostPointer[newStep][newProc]); - workCost[newStep][newProc] += schedule->getInstance().getComputationalDag().vertex_work_weight(node); - workCostPointer[newStep][newProc] = workCostList[newStep].insert(std::make_pair(workCost[newStep][newProc], newProc)).first; + workCostList_[newStep].erase(workCostPointer_[newStep][newProc]); + workCost_[newStep][newProc] += schedule_->GetInstance().GetComputationalDag().VertexWorkWeight(node); + workCostPointer_[newStep][newProc] = workCostList_[newStep].insert(std::make_pair(workCost_[newStep][newProc], newProc)).first; // Comm cost change - for (const auto &update : changing.sentChange) { - sent[update.first.first][update.first.second] - = static_cast(static_cast(sent[update.first.first][update.first.second]) + update.second); + for (const auto &update : changing.sentChange_) { + sent_[update.first.first][update.first.second] + = static_cast(static_cast(sent_[update.first.first][update.first.second]) + update.second); } - for (const auto &update : changing.recChange) { - received[update.first.first][update.first.second] - = static_cast(static_cast(received[update.first.first][update.first.second]) + update.second); + for (const auto &update : changing.recChange_) { + received_[update.first.first][update.first.second] + = static_cast(static_cast(received_[update.first.first][update.first.second]) + update.second); } std::set> toUpdate; - for (const auto &update : changing.sentChange) { - if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second]) - != commCost[update.first.first][update.first.second]) { + for (const auto &update : changing.sentChange_) { + if (std::max(sent_[update.first.first][update.first.second], received_[update.first.first][update.first.second]) + != commCost_[update.first.first][update.first.second]) { toUpdate.insert(std::make_pair(update.first.first, update.first.second)); } } - for (const auto &update : changing.recChange) { - if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second]) - != commCost[update.first.first][update.first.second]) { + for (const auto &update : changing.recChange_) { + if (std::max(sent_[update.first.first][update.first.second], received_[update.first.first][update.first.second]) + != commCost_[update.first.first][update.first.second]) { toUpdate.insert(std::make_pair(update.first.first, update.first.second)); } } for (const auto &update : toUpdate) { - commCostList[update.first].erase(commCostPointer[update.first][update.second]); - commCost[update.first][update.second] = std::max(sent[update.first][update.second], received[update.first][update.second]); - commCostPointer[update.first][update.second] - = commCostList[update.first].insert(std::make_pair(commCost[update.first][update.second], update.second)).first; + commCostList_[update.first].erase(commCostPointer_[update.first][update.second]); + commCost_[update.first][update.second] + = std::max(sent_[update.first][update.second], received_[update.first][update.second]); + commCostPointer_[update.first][update.second] + = commCostList_[update.first].insert(std::make_pair(commCost_[update.first][update.second], update.second)).first; } // update successor lists - for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) { - auto itr = succSteps[pred][oldProc].find(oldStep); + for (const VertexIdx &pred : schedule_->GetInstance().GetComputationalDag().Parents(node)) { + auto itr = succSteps_[pred][oldProc].find(oldStep); if ((--(itr->second)) == 0) { - succSteps[pred][oldProc].erase(itr); + succSteps_[pred][oldProc].erase(itr); } - itr = succSteps[pred][newProc].find(newStep); - if (itr == succSteps[pred][newProc].end()) { - succSteps[pred][newProc].insert({newStep, 1U}); + itr = succSteps_[pred][newProc].find(newStep); + if (itr == succSteps_[pred][newProc].end()) { + succSteps_[pred][newProc].insert({newStep, 1U}); } else { itr->second += 1; } } // memory constraints, if any - if (use_memory_constraint) { - memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)] - -= schedule->getInstance().getComputationalDag().vertex_mem_weight(node); - memory_used[newProc][newStep] += schedule->getInstance().getComputationalDag().vertex_mem_weight(node); + if (useMemoryConstraint_) { + memoryUsed_[schedule_->AssignedProcessor(node)][schedule_->AssignedSuperstep(node)] + -= schedule_->GetInstance().GetComputationalDag().VertexMemWeight(node); + memoryUsed_[newProc][newStep] += schedule_->GetInstance().GetComputationalDag().VertexMemWeight(node); } // update data - schedule->setAssignedProcessor(node, newProc); - schedule->setAssignedSuperstep(node, newStep); - supsteplists[oldStep][oldProc].erase(supStepListPointer[node]); - supsteplists[newStep][newProc].push_back(node); - supStepListPointer[node] = (--supsteplists[newStep][newProc].end()); + schedule_->SetAssignedProcessor(node, newProc); + schedule_->SetAssignedSuperstep(node, newStep); + supsteplists_[oldStep][oldProc].erase(supStepListPointer_[node]); + supsteplists_[newStep][newProc].push_back(node); + supStepListPointer_[node] = (--supsteplists_[newStep][newProc].end()); - updateMoveOptions(node, where); + UpdateMoveOptions(node, where); } // Single hill climbing step -template -bool HillClimbingScheduler::Improve() { - cost_type bestCost = cost; - stepAuxData bestMoveData; - std::pair bestMove; +template +bool HillClimbingScheduler::Improve() { + CostType bestCost = cost_; + StepAuxData bestMoveData; + std::pair bestMove; int bestDir = 0; - int startingDir = nextMove.first; + int startingDir = nextMove_.first; // pre-selected "promising" moves - while (!promisingMoves.empty() && !steepestAscent) { - std::tuple next = promisingMoves.front(); - promisingMoves.pop_front(); + while (!promisingMoves_.empty() && !steepestAscent_) { + std::tuple next = promisingMoves_.front(); + promisingMoves_.pop_front(); - const vertex_idx node = std::get<0>(next); + const VertexIdx node = std::get<0>(next); const unsigned proc = std::get<1>(next); const int where = std::get<2>(next); - if (!canMove[static_cast(where)][node][proc]) { + if (!canMove_[static_cast(where)][node][proc]) { continue; } - if (use_memory_constraint && violatesMemConstraint(node, proc, where - 1)) { + if (useMemoryConstraint_ && ViolatesMemConstraint(node, proc, where - 1)) { continue; } - stepAuxData moveData; - int costDiff = moveCostChange(node, proc, where - 1, moveData); + StepAuxData moveData; + int costDiff = MoveCostChange(node, proc, where - 1, moveData); if (costDiff < 0) { - executeMove(node, proc, where - 1, moveData); - if (shrink && moveData.canShrink) { + ExecuteMove(node, proc, where - 1, moveData); + if (shrink_ && moveData.canShrink_) { Init(); } @@ -1015,53 +1016,53 @@ bool HillClimbingScheduler::Improve() { int dir = startingDir; while (true) { bool reachedBeginning = false; - while (nextMove.second == moveOptions[static_cast(nextMove.first)].end()) { - dir = (nextMove.first + 1) % NumDirections; + while (nextMove_.second == moveOptions_[static_cast(nextMove_.first)].end()) { + dir = (nextMove_.first + 1) % numDirections_; if (dir == startingDir) { reachedBeginning = true; break; } - nextMove.first = dir; - nextMove.second = moveOptions[static_cast(nextMove.first)].begin(); + nextMove_.first = dir; + nextMove_.second = moveOptions_[static_cast(nextMove_.first)].begin(); } if (reachedBeginning) { break; } - std::pair next = *nextMove.second; - ++nextMove.second; + std::pair next = *nextMove_.second; + ++nextMove_.second; - const vertex_idx node = next.first; + const VertexIdx node = next.first; const unsigned proc = next.second; - if (use_memory_constraint && violatesMemConstraint(node, proc, dir - 1)) { + if (useMemoryConstraint_ && ViolatesMemConstraint(node, proc, dir - 1)) { continue; } - stepAuxData moveData; - int costDiff = moveCostChange(node, proc, dir - 1, moveData); + StepAuxData moveData; + int costDiff = MoveCostChange(node, proc, dir - 1, moveData); - if (!steepestAscent && costDiff < 0) { - executeMove(node, proc, dir - 1, moveData); - if (shrink && moveData.canShrink) { + if (!steepestAscent_ && costDiff < 0) { + ExecuteMove(node, proc, dir - 1, moveData); + if (shrink_ && moveData.canShrink_) { Init(); } return true; - } else if (static_cast(static_cast(cost) + costDiff) < bestCost) { - bestCost = static_cast(static_cast(cost) + costDiff); + } else if (static_cast(static_cast(cost_) + costDiff) < bestCost) { + bestCost = static_cast(static_cast(cost_) + costDiff); bestMove = next; bestMoveData = moveData; bestDir = dir - 1; } } - if (bestCost == cost) { + if (bestCost == cost_) { return false; } - executeMove(bestMove.first, bestMove.second, bestDir, bestMoveData); - if (shrink && bestMoveData.canShrink) { + ExecuteMove(bestMove.first, bestMove.second, bestDir, bestMoveData); + if (shrink_ && bestMoveData.canShrink_) { Init(); } @@ -1069,30 +1070,30 @@ bool HillClimbingScheduler::Improve() { } // Check if move violates mem constraints -template -bool HillClimbingScheduler::violatesMemConstraint(vertex_idx node, unsigned processor, int where) { - if (memory_used[processor][static_cast(static_cast(schedule->assignedSuperstep(node)) + where)] - + schedule->getInstance().getComputationalDag().vertex_mem_weight(node) - > schedule->getInstance().memoryBound(processor)) { // TODO ANDRAS double check change +template +bool HillClimbingScheduler::ViolatesMemConstraint(VertexIdx node, unsigned processor, int where) { + if (memoryUsed_[processor][static_cast(static_cast(schedule_->AssignedSuperstep(node)) + where)] + + schedule_->GetInstance().GetComputationalDag().VertexMemWeight(node) + > schedule_->GetInstance().MemoryBound(processor)) { // TODO ANDRAS double check change return true; } return false; } -template -void HillClimbingScheduler::CreateSupstepLists() { - const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); - const Graph_t &G = schedule->getInstance().getComputationalDag(); +template +void HillClimbingScheduler::CreateSupstepLists() { + const unsigned p = schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); + const GraphT &g = schedule_->GetInstance().GetComputationalDag(); - schedule->updateNumberOfSupersteps(); - const unsigned M = schedule->numberOfSupersteps(); + schedule_->UpdateNumberOfSupersteps(); + const unsigned m = schedule_->NumberOfSupersteps(); - supsteplists.clear(); - supsteplists.resize(M, std::vector>(P)); + supsteplists_.clear(); + supsteplists_.resize(m, std::vector>(p)); - for (vertex_idx node : top_sort_view(G)) { - supsteplists[schedule->assignedSuperstep(node)][schedule->assignedProcessor(node)].push_back(node); + for (VertexIdx node : TopSortView(g)) { + supsteplists_[schedule_->AssignedSuperstep(node)][schedule_->AssignedProcessor(node)].push_back(node); } } diff --git a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp index 07131ff3..79f0cf02 100644 --- a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp @@ -25,33 +25,33 @@ limitations under the License. namespace osp { -template +template class HillClimbingForCommSteps { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(is_computational_dag_v, "Graph_t must satisfy the computational_dag concept"); + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + static_assert(isComputationalDagV, "GraphT must satisfy the computational_dag concept"); - using vertex_idx = vertex_idx_t; - using cost_type = v_commw_t; + using VertexIdx = VertexIdxT; + using CostType = VCommwT; - BspScheduleCS *schedule; - cost_type cost = 0; + BspScheduleCS *schedule_; + CostType cost_ = 0; // Main parameters for runnign algorithm - bool steepestAscent = false; + bool steepestAscent_ = false; // aux data for comm schedule hill climbing - std::vector> commSchedule; - std::vector>> supsteplists; - std::vector>> commCostList; - std::vector>::iterator>> commCostPointer; - std::vector> sent, received, commCost; - std::vector>> commBounds; - std::vector>>> commSchedSendLists; - std::vector>::iterator>> commSchedSendListPointer; - std::vector>>> commSchedRecLists; - std::vector>::iterator>> commSchedRecListPointer; - std::vector minimum_cost_per_superstep; - unsigned nextSupstep; + std::vector> commSchedule_; + std::vector>> supsteplists_; + std::vector>> commCostList_; + std::vector>::iterator>> commCostPointer_; + std::vector> sent_, received_, commCost_; + std::vector>> commBounds_; + std::vector>>> commSchedSendLists_; + std::vector>::iterator>> commSchedSendListPointer_; + std::vector>>> commSchedRecLists_; + std::vector>::iterator>> commSchedRecListPointer_; + std::vector minimumCostPerSuperstep_; + unsigned nextSupstep_; // Create superstep lists (for convenience) for a BSP schedule void CreateSupstepLists(); @@ -60,10 +60,10 @@ class HillClimbingForCommSteps { void Init(); // compute cost change incurred by a potential move - int moveCostChange(vertex_idx node, unsigned p, unsigned step); + int MoveCostChange(VertexIdx node, unsigned p, unsigned step); // execute a move, updating the comm. schedule and the data structures - void executeMove(vertex_idx node, unsigned p, unsigned step, int changeCost); + void ExecuteMove(VertexIdx node, unsigned p, unsigned step, int changeCost); // Single comm. schedule hill climbing step bool Improve(); @@ -76,30 +76,30 @@ class HillClimbingForCommSteps { virtual ~HillClimbingForCommSteps() = default; - virtual RETURN_STATUS improveSchedule(BspScheduleCS &input_schedule); + virtual ReturnStatus ImproveSchedule(BspScheduleCS &inputSchedule); // call with time limit - virtual RETURN_STATUS improveScheduleWithTimeLimit(BspScheduleCS &input_schedule, const unsigned timeLimit); + virtual ReturnStatus ImproveScheduleWithTimeLimit(BspScheduleCS &inputSchedule, const unsigned timeLimit); // setting parameters - void setSteepestAscend(bool steepestAscent_) { steepestAscent = steepestAscent_; } + void SetSteepestAscend(bool steepestAscent) { steepestAscent_ = steepestAscent; } - virtual std::string getScheduleName() const { return "HillClimbingForCommSchedule"; } + virtual std::string GetScheduleName() const { return "HillClimbingForCommSchedule"; } }; -template -RETURN_STATUS HillClimbingForCommSteps::improveSchedule(BspScheduleCS &input_schedule) { - return improveScheduleWithTimeLimit(input_schedule, 180); +template +ReturnStatus HillClimbingForCommSteps::ImproveSchedule(BspScheduleCS &inputSchedule) { + return ImproveScheduleWithTimeLimit(inputSchedule, 180); } // Main method for hill climbing (with time limit) -template -RETURN_STATUS HillClimbingForCommSteps::improveScheduleWithTimeLimit(BspScheduleCS &input_schedule, - const unsigned timeLimit) { - schedule = &input_schedule; +template +ReturnStatus HillClimbingForCommSteps::ImproveScheduleWithTimeLimit(BspScheduleCS &inputSchedule, + const unsigned timeLimit) { + schedule_ = &inputSchedule; - if (schedule->numberOfSupersteps() <= 2) { - return RETURN_STATUS::OSP_SUCCESS; + if (schedule_->NumberOfSupersteps() <= 2) { + return ReturnStatus::OSP_SUCCESS; } Init(); @@ -121,54 +121,54 @@ RETURN_STATUS HillClimbingForCommSteps::improveScheduleWithTimeLimit(Bs ConvertCommSchedule(); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } // Initialization for comm. schedule hill climbing -template -void HillClimbingForCommSteps::Init() { - const unsigned N = static_cast(schedule->getInstance().getComputationalDag().num_vertices()); - const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); - const unsigned M = schedule->numberOfSupersteps(); - const Graph_t &G = schedule->getInstance().getComputationalDag(); +template +void HillClimbingForCommSteps::Init() { + const unsigned n = static_cast(schedule_->GetInstance().GetComputationalDag().NumVertices()); + const unsigned p = schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); + const unsigned m = schedule_->NumberOfSupersteps(); + const GraphT &g = schedule_->GetInstance().GetComputationalDag(); CreateSupstepLists(); - cost = schedule->computeCosts(); - - nextSupstep = 0; - commSchedule.clear(); - commSchedule.resize(N, std::vector(P, UINT_MAX)); - sent.clear(); - sent.resize(M - 1, std::vector(P, 0)); - received.clear(); - received.resize(M - 1, std::vector(P, 0)); - commCost.clear(); - commCost.resize(M - 1, std::vector(P)); - commCostList.clear(); - commCostList.resize(M - 1); - commCostPointer.clear(); - commCostPointer.resize(M - 1, std::vector>::iterator>(P)); - commBounds.clear(); - commBounds.resize(N, std::vector>(P)); - commSchedSendLists.clear(); - commSchedSendLists.resize(M - 1, std::vector>>(P)); - commSchedRecLists.clear(); - commSchedRecLists.resize(M - 1, std::vector>>(P)); - commSchedSendListPointer.clear(); - commSchedSendListPointer.resize(N, std::vector>::iterator>(P)); - commSchedRecListPointer.clear(); - commSchedRecListPointer.resize(N, std::vector>::iterator>(P)); + cost_ = schedule_->ComputeCosts(); + + nextSupstep_ = 0; + commSchedule_.clear(); + commSchedule_.resize(n, std::vector(p, UINT_MAX)); + sent_.clear(); + sent_.resize(m - 1, std::vector(p, 0)); + received_.clear(); + received_.resize(m - 1, std::vector(p, 0)); + commCost_.clear(); + commCost_.resize(m - 1, std::vector(p)); + commCostList_.clear(); + commCostList_.resize(m - 1); + commCostPointer_.clear(); + commCostPointer_.resize(m - 1, std::vector>::iterator>(p)); + commBounds_.clear(); + commBounds_.resize(n, std::vector>(p)); + commSchedSendLists_.clear(); + commSchedSendLists_.resize(m - 1, std::vector>>(p)); + commSchedRecLists_.clear(); + commSchedRecLists_.resize(m - 1, std::vector>>(p)); + commSchedSendListPointer_.clear(); + commSchedSendListPointer_.resize(n, std::vector>::iterator>(p)); + commSchedRecListPointer_.clear(); + commSchedRecListPointer_.resize(n, std::vector>::iterator>(p)); // initialize to lazy comm schedule first - to make sure it's correct even if e.g. com scehdule has indirect sending - for (unsigned step = 1; step < M; ++step) { - for (unsigned proc = 0; proc < P; ++proc) { - for (const vertex_idx node : supsteplists[step][proc]) { - for (const vertex_idx &pred : G.parents(node)) { - if (schedule->assignedProcessor(pred) != schedule->assignedProcessor(node) - && commSchedule[pred][schedule->assignedProcessor(node)] == UINT_MAX) { - commSchedule[pred][schedule->assignedProcessor(node)] = step - schedule->getStaleness(); - commBounds[pred][schedule->assignedProcessor(node)] - = std::make_pair(schedule->assignedSuperstep(pred), step - schedule->getStaleness()); + for (unsigned step = 1; step < m; ++step) { + for (unsigned proc = 0; proc < p; ++proc) { + for (const VertexIdx node : supsteplists_[step][proc]) { + for (const VertexIdx &pred : g.Parents(node)) { + if (schedule_->AssignedProcessor(pred) != schedule_->AssignedProcessor(node) + && commSchedule_[pred][schedule_->AssignedProcessor(node)] == UINT_MAX) { + commSchedule_[pred][schedule_->AssignedProcessor(node)] = step - schedule_->GetStaleness(); + commBounds_[pred][schedule_->AssignedProcessor(node)] + = std::make_pair(schedule_->AssignedSuperstep(pred), step - schedule_->GetStaleness()); } } } @@ -176,218 +176,219 @@ void HillClimbingForCommSteps::Init() { } // overwrite with original comm schedule, wherever possible - const std::map, unsigned int> originalCommSchedule - = schedule->getCommunicationSchedule(); - for (vertex_idx node = 0; node < N; ++node) { - for (unsigned proc = 0; proc < P; ++proc) { - if (commSchedule[node][proc] == UINT_MAX) { + const std::map, unsigned int> originalCommSchedule + = schedule_->GetCommunicationSchedule(); + for (VertexIdx node = 0; node < n; ++node) { + for (unsigned proc = 0; proc < p; ++proc) { + if (commSchedule_[node][proc] == UINT_MAX) { continue; } - const auto comm_schedule_key = std::make_tuple(node, schedule->assignedProcessor(node), proc); - auto mapIterator = originalCommSchedule.find(comm_schedule_key); + const auto commScheduleKey = std::make_tuple(node, schedule_->AssignedProcessor(node), proc); + auto mapIterator = originalCommSchedule.find(commScheduleKey); if (mapIterator != originalCommSchedule.end()) { unsigned originalStep = mapIterator->second; - if (originalStep >= commBounds[node][proc].first && originalStep <= commBounds[node][proc].second) { - commSchedule[node][proc] = originalStep; + if (originalStep >= commBounds_[node][proc].first && originalStep <= commBounds_[node][proc].second) { + commSchedule_[node][proc] = originalStep; } } - unsigned step = commSchedule[node][proc]; - commSchedSendLists[step][schedule->assignedProcessor(node)].emplace_front(node, proc); - commSchedSendListPointer[node][proc] = commSchedSendLists[step][schedule->assignedProcessor(node)].begin(); - commSchedRecLists[step][proc].emplace_front(node, proc); - commSchedRecListPointer[node][proc] = commSchedRecLists[step][proc].begin(); - - sent[step][schedule->assignedProcessor(node)] - += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc); - received[step][proc] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc); + unsigned step = commSchedule_[node][proc]; + commSchedSendLists_[step][schedule_->AssignedProcessor(node)].emplace_front(node, proc); + commSchedSendListPointer_[node][proc] = commSchedSendLists_[step][schedule_->AssignedProcessor(node)].begin(); + commSchedRecLists_[step][proc].emplace_front(node, proc); + commSchedRecListPointer_[node][proc] = commSchedRecLists_[step][proc].begin(); + + sent_[step][schedule_->AssignedProcessor(node)] + += schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(schedule_->AssignedProcessor(node), proc); + received_[step][proc] + += schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(schedule_->AssignedProcessor(node), proc); } } - for (unsigned step = 0; step < M - 1; ++step) { - for (unsigned proc = 0; proc < P; ++proc) { - commCost[step][proc] = std::max(sent[step][proc], received[step][proc]); - commCostPointer[step][proc] = commCostList[step].emplace(commCost[step][proc], proc).first; + for (unsigned step = 0; step < m - 1; ++step) { + for (unsigned proc = 0; proc < p; ++proc) { + commCost_[step][proc] = std::max(sent_[step][proc], received_[step][proc]); + commCostPointer_[step][proc] = commCostList_[step].emplace(commCost_[step][proc], proc).first; } } // set minimum cost - differs for BSP and MaxBSP - minimum_cost_per_superstep.clear(); - if (schedule->getStaleness() == 1) { - minimum_cost_per_superstep.resize(M - 1, 0); + minimumCostPerSuperstep_.clear(); + if (schedule_->GetStaleness() == 1) { + minimumCostPerSuperstep_.resize(m - 1, 0); } else { - minimum_cost_per_superstep = cost_helpers::compute_max_work_per_step(*schedule); - minimum_cost_per_superstep.erase(minimum_cost_per_superstep.begin()); + minimumCostPerSuperstep_ = cost_helpers::ComputeMaxWorkPerStep(*schedule_); + minimumCostPerSuperstep_.erase(minimumCostPerSuperstep_.begin()); } } // compute cost change incurred by a potential move -template -int HillClimbingForCommSteps::moveCostChange(const vertex_idx node, const unsigned p, const unsigned step) { - const unsigned oldStep = commSchedule[node][p]; - const unsigned sourceProc = schedule->assignedProcessor(node); +template +int HillClimbingForCommSteps::MoveCostChange(const VertexIdx node, const unsigned proc, const unsigned step) { + const unsigned oldStep = commSchedule_[node][proc]; + const unsigned sourceProc = schedule_->AssignedProcessor(node); int change = 0; // Change at old place - auto itr = commCostList[oldStep].rbegin(); - cost_type oldMax = std::max(itr->first * schedule->getInstance().getArchitecture().communicationCosts(), - minimum_cost_per_superstep[oldStep]) - + schedule->getInstance().getArchitecture().synchronisationCosts(); - cost_type maxSource = std::max(sent[oldStep][sourceProc] - - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p), - received[oldStep][sourceProc]); - cost_type maxTarget = std::max(sent[oldStep][p], - received[oldStep][p] - - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); - cost_type maxOther = 0; - for (; itr != commCostList[oldStep].rend(); ++itr) { - if (itr->second != sourceProc && itr->second != p) { + auto itr = commCostList_[oldStep].rbegin(); + CostType oldMax = std::max(itr->first * schedule_->GetInstance().GetArchitecture().CommunicationCosts(), + minimumCostPerSuperstep_[oldStep]) + + schedule_->GetInstance().GetArchitecture().SynchronisationCosts(); + CostType maxSource = std::max(sent_[oldStep][sourceProc] + - schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc), + received_[oldStep][sourceProc]); + CostType maxTarget = std::max(sent_[oldStep][proc], + received_[oldStep][proc] + - schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc)); + CostType maxOther = 0; + for (; itr != commCostList_[oldStep].rend(); ++itr) { + if (itr->second != sourceProc && itr->second != proc) { maxOther = itr->first; break; } } - cost_type newMax - = std::max(std::max(maxSource, maxTarget), maxOther) * schedule->getInstance().getArchitecture().communicationCosts(); - cost_type newSync = (newMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; - newMax = std::max(newMax, minimum_cost_per_superstep[oldStep]) + newSync; + CostType newMax + = std::max(std::max(maxSource, maxTarget), maxOther) * schedule_->GetInstance().GetArchitecture().CommunicationCosts(); + CostType newSync = (newMax > 0) ? schedule_->GetInstance().GetArchitecture().SynchronisationCosts() : 0; + newMax = std::max(newMax, minimumCostPerSuperstep_[oldStep]) + newSync; change += static_cast(newMax) - static_cast(oldMax); // Change at new place - oldMax = commCostList[step].rbegin()->first * schedule->getInstance().getArchitecture().communicationCosts(); - cost_type oldSync = (oldMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; - oldMax = std::max(oldMax, minimum_cost_per_superstep[step]); - maxSource = schedule->getInstance().getArchitecture().communicationCosts() - * (sent[step][sourceProc] - + schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); - maxTarget = schedule->getInstance().getArchitecture().communicationCosts() - * (received[step][p] - + schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); + oldMax = commCostList_[step].rbegin()->first * schedule_->GetInstance().GetArchitecture().CommunicationCosts(); + CostType oldSync = (oldMax > 0) ? schedule_->GetInstance().GetArchitecture().SynchronisationCosts() : 0; + oldMax = std::max(oldMax, minimumCostPerSuperstep_[step]); + maxSource = schedule_->GetInstance().GetArchitecture().CommunicationCosts() + * (sent_[step][sourceProc] + + schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc)); + maxTarget = schedule_->GetInstance().GetArchitecture().CommunicationCosts() + * (received_[step][proc] + + schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc)); newMax = std::max(std::max(oldMax, maxSource), maxTarget); - change += static_cast(newMax + schedule->getInstance().getArchitecture().synchronisationCosts()) + change += static_cast(newMax + schedule_->GetInstance().GetArchitecture().SynchronisationCosts()) - static_cast(oldMax + oldSync); return change; } // execute a move, updating the comm. schedule and the data structures -template -void HillClimbingForCommSteps::executeMove(vertex_idx node, unsigned p, const unsigned step, const int changeCost) { - const unsigned oldStep = commSchedule[node][p]; - const unsigned sourceProc = schedule->assignedProcessor(node); - cost = static_cast(static_cast(cost) + changeCost); +template +void HillClimbingForCommSteps::ExecuteMove(VertexIdx node, unsigned proc, const unsigned step, const int changeCost) { + const unsigned oldStep = commSchedule_[node][proc]; + const unsigned sourceProc = schedule_->AssignedProcessor(node); + cost_ = static_cast(static_cast(cost_) + changeCost); // Old step update - if (sent[oldStep][sourceProc] > received[oldStep][sourceProc]) { - commCostList[oldStep].erase(commCostPointer[oldStep][sourceProc]); - sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); - commCost[oldStep][sourceProc] = std::max(sent[oldStep][sourceProc], received[oldStep][sourceProc]); - commCostPointer[oldStep][sourceProc] = commCostList[oldStep].emplace(commCost[oldStep][sourceProc], sourceProc).first; + if (sent_[oldStep][sourceProc] > received_[oldStep][sourceProc]) { + commCostList_[oldStep].erase(commCostPointer_[oldStep][sourceProc]); + sent_[oldStep][sourceProc] -= schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc); + commCost_[oldStep][sourceProc] = std::max(sent_[oldStep][sourceProc], received_[oldStep][sourceProc]); + commCostPointer_[oldStep][sourceProc] = commCostList_[oldStep].emplace(commCost_[oldStep][sourceProc], sourceProc).first; } else { - sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + sent_[oldStep][sourceProc] -= schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc); } - if (received[oldStep][p] > sent[oldStep][p]) { - commCostList[oldStep].erase(commCostPointer[oldStep][p]); - received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); - commCost[oldStep][p] = std::max(sent[oldStep][p], received[oldStep][p]); - commCostPointer[oldStep][p] = commCostList[oldStep].emplace(commCost[oldStep][p], p).first; + if (received_[oldStep][proc] > sent_[oldStep][proc]) { + commCostList_[oldStep].erase(commCostPointer_[oldStep][proc]); + received_[oldStep][proc] -= schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc); + commCost_[oldStep][proc] = std::max(sent_[oldStep][proc], received_[oldStep][proc]); + commCostPointer_[oldStep][proc] = commCostList_[oldStep].emplace(commCost_[oldStep][proc], proc).first; } else { - received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + received_[oldStep][proc] -= schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc); } // New step update - sent[step][sourceProc] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); - if (sent[step][sourceProc] > received[step][sourceProc]) { - commCostList[step].erase(commCostPointer[step][sourceProc]); - commCost[step][sourceProc] = sent[step][sourceProc]; - commCostPointer[step][sourceProc] = commCostList[step].emplace(commCost[step][sourceProc], sourceProc).first; + sent_[step][sourceProc] += schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc); + if (sent_[step][sourceProc] > received_[step][sourceProc]) { + commCostList_[step].erase(commCostPointer_[step][sourceProc]); + commCost_[step][sourceProc] = sent_[step][sourceProc]; + commCostPointer_[step][sourceProc] = commCostList_[step].emplace(commCost_[step][sourceProc], sourceProc).first; } - received[step][p] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) - * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); - if (received[step][p] > sent[step][p]) { - commCostList[step].erase(commCostPointer[step][p]); - commCost[step][p] = received[step][p]; - commCostPointer[step][p] = commCostList[step].emplace(commCost[step][p], p).first; + received_[step][proc] += schedule_->GetInstance().GetComputationalDag().VertexCommWeight(node) + * schedule_->GetInstance().GetArchitecture().SendCosts(sourceProc, proc); + if (received_[step][proc] > sent_[step][proc]) { + commCostList_[step].erase(commCostPointer_[step][proc]); + commCost_[step][proc] = received_[step][proc]; + commCostPointer_[step][proc] = commCostList_[step].emplace(commCost_[step][proc], proc).first; } // CommSched update - commSchedule[node][p] = step; + commSchedule_[node][proc] = step; // Comm lists - commSchedSendLists[oldStep][sourceProc].erase(commSchedSendListPointer[node][p]); - commSchedSendLists[step][sourceProc].emplace_front(node, p); - commSchedSendListPointer[node][p] = commSchedSendLists[step][sourceProc].begin(); + commSchedSendLists_[oldStep][sourceProc].erase(commSchedSendListPointer_[node][proc]); + commSchedSendLists_[step][sourceProc].emplace_front(node, proc); + commSchedSendListPointer_[node][proc] = commSchedSendLists_[step][sourceProc].begin(); - commSchedRecLists[oldStep][p].erase(commSchedRecListPointer[node][p]); - commSchedRecLists[step][p].emplace_front(node, p); - commSchedRecListPointer[node][p] = commSchedRecLists[step][p].begin(); + commSchedRecLists_[oldStep][proc].erase(commSchedRecListPointer_[node][proc]); + commSchedRecLists_[step][proc].emplace_front(node, proc); + commSchedRecListPointer_[node][proc] = commSchedRecLists_[step][proc].begin(); } // Single comm. schedule hill climbing step -template -bool HillClimbingForCommSteps::Improve() { - const unsigned M = static_cast(schedule->numberOfSupersteps()); +template +bool HillClimbingForCommSteps::Improve() { + const unsigned m = static_cast(schedule_->NumberOfSupersteps()); int bestDiff = 0; - vertex_idx bestNode = 0; + VertexIdx bestNode = 0; unsigned bestProc = 0, bestStep = 0; - unsigned startingSupstep = nextSupstep; + unsigned startingSupstep = nextSupstep_; // iterate over supersteps while (true) { - auto itr = commCostList[nextSupstep].rbegin(); + auto itr = commCostList_[nextSupstep_].rbegin(); - if (itr == commCostList[nextSupstep].crend()) { + if (itr == commCostList_[nextSupstep_].crend()) { break; } // find maximal comm cost that dominates the h-relation - const cost_type commMax = itr->first; + const CostType commMax = itr->first; if (commMax == 0) { - nextSupstep = (nextSupstep + 1) % (M - 1); - if (nextSupstep == startingSupstep) { + nextSupstep_ = (nextSupstep_ + 1) % (m - 1); + if (nextSupstep_ == startingSupstep) { break; } else { continue; } } - // go over all processors that incur this maximal comm cost in superstep nextSupstep - for (; itr != commCostList[nextSupstep].rend() && itr->first == commMax; ++itr) { + // go over all processors that incur this maximal comm cost in superstep nextSupstep_ + for (; itr != commCostList_[nextSupstep_].rend() && itr->first == commMax; ++itr) { const unsigned maxProc = itr->second; - if (sent[nextSupstep][maxProc] == commMax) { - for (const std::pair &entry : commSchedSendLists[nextSupstep][maxProc]) { - const vertex_idx node = entry.first; - const unsigned p = entry.second; + if (sent_[nextSupstep_][maxProc] == commMax) { + for (const std::pair &entry : commSchedSendLists_[nextSupstep_][maxProc]) { + const VertexIdx node = entry.first; + const unsigned proc = entry.second; // iterate over alternative supsteps to place this communication step - for (unsigned step = commBounds[node][p].first; step < commBounds[node][p].second; ++step) { - if (step == commSchedule[node][p]) { + for (unsigned step = commBounds_[node][proc].first; step < commBounds_[node][proc].second; ++step) { + if (step == commSchedule_[node][proc]) { continue; } - const int costDiff = moveCostChange(node, p, step); + const int costDiff = MoveCostChange(node, proc, step); - if (!steepestAscent && costDiff < 0) { - executeMove(node, p, step, costDiff); + if (!steepestAscent_ && costDiff < 0) { + ExecuteMove(node, proc, step, costDiff); return true; } else if (costDiff < bestDiff) { bestNode = node; - bestProc = p; + bestProc = proc; bestStep = step; bestDiff = costDiff; } @@ -395,25 +396,25 @@ bool HillClimbingForCommSteps::Improve() { } } - if (received[nextSupstep][maxProc] == commMax) { - for (const std::pair &entry : commSchedRecLists[nextSupstep][maxProc]) { - const vertex_idx node = entry.first; - const unsigned p = entry.second; + if (received_[nextSupstep_][maxProc] == commMax) { + for (const std::pair &entry : commSchedRecLists_[nextSupstep_][maxProc]) { + const VertexIdx node = entry.first; + const unsigned proc = entry.second; // iterate over alternative supsteps to place this communication step - for (unsigned step = commBounds[node][p].first; step < commBounds[node][p].second; ++step) { - if (step == commSchedule[node][p]) { + for (unsigned step = commBounds_[node][proc].first; step < commBounds_[node][proc].second; ++step) { + if (step == commSchedule_[node][proc]) { continue; } - const int costDiff = moveCostChange(node, p, step); + const int costDiff = MoveCostChange(node, proc, step); - if (!steepestAscent && costDiff < 0) { - executeMove(node, p, step, costDiff); + if (!steepestAscent_ && costDiff < 0) { + ExecuteMove(node, proc, step, costDiff); return true; } if (costDiff < bestDiff) { bestNode = node; - bestProc = p; + bestProc = proc; bestStep = step; bestDiff = costDiff; } @@ -422,8 +423,8 @@ bool HillClimbingForCommSteps::Improve() { } } - nextSupstep = (nextSupstep + 1) % (M - 1); - if (nextSupstep == startingSupstep) { + nextSupstep_ = (nextSupstep_ + 1) % (m - 1); + if (nextSupstep_ == startingSupstep) { break; } } @@ -432,45 +433,45 @@ bool HillClimbingForCommSteps::Improve() { return false; } - executeMove(bestNode, bestProc, bestStep, bestDiff); + ExecuteMove(bestNode, bestProc, bestStep, bestDiff); return true; } -template -void HillClimbingForCommSteps::CreateSupstepLists() { - const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); - const Graph_t &G = schedule->getInstance().getComputationalDag(); +template +void HillClimbingForCommSteps::CreateSupstepLists() { + const unsigned p = schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); + const GraphT &g = schedule_->GetInstance().GetComputationalDag(); - schedule->updateNumberOfSupersteps(); - const unsigned M = schedule->numberOfSupersteps(); + schedule_->UpdateNumberOfSupersteps(); + const unsigned m = schedule_->NumberOfSupersteps(); - supsteplists.clear(); - supsteplists.resize(M, std::vector>(P)); + supsteplists_.clear(); + supsteplists_.resize(m, std::vector>(p)); - const std::vector topOrder = GetTopOrder(G); - for (vertex_idx node : topOrder) { - supsteplists[schedule->assignedSuperstep(node)][schedule->assignedProcessor(node)].push_back(node); + const std::vector topOrder = GetTopOrder(g); + for (VertexIdx node : topOrder) { + supsteplists_[schedule_->AssignedSuperstep(node)][schedule_->AssignedProcessor(node)].push_back(node); } } -template -void HillClimbingForCommSteps::ConvertCommSchedule() { - const vertex_idx N = static_cast(schedule->getInstance().getComputationalDag().num_vertices()); - const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); +template +void HillClimbingForCommSteps::ConvertCommSchedule() { + const VertexIdx n = static_cast(schedule_->GetInstance().GetComputationalDag().NumVertices()); + const unsigned p = schedule_->GetInstance().GetArchitecture().NumberOfProcessors(); - std::map, unsigned> newCommSchedule; + std::map, unsigned> newCommSchedule; - for (vertex_idx node = 0; node < N; ++node) { - for (unsigned proc = 0; proc < P; ++proc) { - if (commSchedule[node][proc] != UINT_MAX) { - const auto comm_schedule_key = std::make_tuple(node, schedule->assignedProcessor(node), proc); - newCommSchedule[comm_schedule_key] = commSchedule[node][proc]; + for (VertexIdx node = 0; node < n; ++node) { + for (unsigned proc = 0; proc < p; ++proc) { + if (commSchedule_[node][proc] != UINT_MAX) { + const auto commScheduleKey = std::make_tuple(node, schedule_->AssignedProcessor(node), proc); + newCommSchedule[commScheduleKey] = commSchedule_[node][proc]; } } } - schedule->setCommunicationSchedule(newCommSchedule); + schedule_->SetCommunicationSchedule(newCommSchedule); } } // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/comm_cost_policies.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/comm_cost_policies.hpp new file mode 100644 index 00000000..34380164 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/comm_cost_policies.hpp @@ -0,0 +1,533 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include +#include +#include + +namespace osp { + +struct EagerCommCostPolicy { + using ValueType = unsigned; + + template + static inline void AttributeCommunication(DS &ds, + const CommWeightT &cost, + const unsigned uStep, + const unsigned uProc, + const unsigned vProc, + const unsigned vStep, + const ValueType &val) { + ds.StepProcReceive(uStep, vProc) += cost; + ds.StepProcSend(uStep, uProc) += cost; + } + + template + static inline void UnattributeCommunication(DS &ds, + const CommWeightT &cost, + const unsigned uStep, + const unsigned uProc, + const unsigned vProc, + const unsigned vStep, + const ValueType &val) { + ds.StepProcReceive(uStep, vProc) -= cost; + ds.StepProcSend(uStep, uProc) -= cost; + } + + static inline bool AddChild(ValueType &val, unsigned step) { + val++; + return val == 1; + } + + static inline bool RemoveChild(ValueType &val, unsigned step) { + val--; + return val == 0; + } + + static inline void Reset(ValueType &val) { val = 0; } + + static inline bool HasEntry(const ValueType &val) { return val > 0; } + + static inline bool IsSingleEntry(const ValueType &val) { return val == 1; } + + template + static inline void CalculateDeltaRemove(const ValueType &val, + unsigned childStep, + unsigned parentStep, + unsigned parentProc, + unsigned childProc, + CommWeightT cost, + DeltaTracker &dt) { + if (val == 1) { + dt.Add(true, parentStep, childProc, -cost); + dt.Add(false, parentStep, parentProc, -cost); + } + } + + template + static inline void CalculateDeltaAdd(const ValueType &val, + unsigned childStep, + unsigned parentStep, + unsigned parentProc, + unsigned childProc, + CommWeightT cost, + DeltaTracker &dt) { + if (val == 0) { + dt.Add(true, parentStep, childProc, cost); + dt.Add(false, parentStep, parentProc, cost); + } + } + + template + static inline void CalculateDeltaOutgoing( + const ValueType &val, unsigned nodeStep, unsigned nodeProc, unsigned childProc, CommWeightT cost, DeltaTracker &dt) { + if (val > 0) { + CommWeightT totalCost = cost * val; + dt.Add(true, nodeStep, childProc, totalCost); + dt.Add(false, nodeStep, nodeProc, totalCost); + } + } +}; + +struct LazyCommCostPolicy { + using ValueType = std::vector; + + template + static inline void AttributeCommunication(DS &ds, + const CommWeightT &cost, + const unsigned uStep, + const unsigned uProc, + const unsigned vProc, + const unsigned vStep, + const ValueType &val) { + // val contains v_step (already added). + // Check if v_step is the new minimum. + unsigned minStep = std::numeric_limits::max(); + for (unsigned s : val) { + minStep = std::min(minStep, s); + } + + if (minStep == vStep) { + // Check if it was strictly smaller than previous min. + unsigned prevMin = std::numeric_limits::max(); + for (size_t i = 0; i < val.size() - 1; ++i) { + prevMin = std::min(prevMin, val[i]); + } + + if (vStep < prevMin) { + if (prevMin != std::numeric_limits::max() && prevMin > 0) { + ds.StepProcReceive(prevMin - 1, vProc) -= cost; + ds.StepProcSend(prevMin - 1, uProc) -= cost; + } + if (vStep > 0) { + ds.StepProcReceive(vStep - 1, vProc) += cost; + ds.StepProcSend(vStep - 1, uProc) += cost; + } + } + } + } + + template + static inline void UnattributeCommunication(DS &ds, + const CommWeightT &cost, + const unsigned uStep, + const unsigned uProc, + const unsigned vProc, + const unsigned vStep, + const ValueType &val) { + // val is state AFTER removal. + + if (val.empty()) { + // Removed the last child. + if (vStep > 0) { + ds.StepProcReceive(vStep - 1, vProc) -= cost; + ds.StepProcSend(vStep - 1, uProc) -= cost; + } + } else { + // Check if v_step was the unique minimum. + unsigned newMin = val[0]; + for (unsigned s : val) { + newMin = std::min(newMin, s); + } + + if (vStep < newMin) { + // v_step was the unique minimum. + if (vStep > 0) { + ds.StepProcReceive(vStep - 1, vProc) -= cost; + ds.StepProcSend(vStep - 1, uProc) -= cost; + } + if (newMin > 0) { + ds.StepProcReceive(newMin - 1, vProc) += cost; + ds.StepProcSend(newMin - 1, uProc) += cost; + } + } + } + } + + static inline bool AddChild(ValueType &val, unsigned step) { + val.push_back(step); + if (val.size() == 1) { + return true; + } + unsigned minS = val[0]; + for (unsigned s : val) { + minS = std::min(minS, s); + } + return step == minS; + } + + static inline bool RemoveChild(ValueType &val, unsigned step) { + auto it = std::find(val.begin(), val.end(), step); + if (it != val.end()) { + val.erase(it); + if (val.empty()) { + return true; + } + unsigned newMin = val[0]; + for (unsigned s : val) { + newMin = std::min(newMin, s); + } + bool res = step < newMin; + return res; + } + return false; + } + + static inline void Reset(ValueType &val) { val.clear(); } + + static inline bool HasEntry(const ValueType &val) { return !val.empty(); } + + static inline bool IsSingleEntry(const ValueType &val) { return val.size() == 1; } + + template + static inline void CalculateDeltaRemove(const ValueType &val, + unsigned childStep, + unsigned parentStep, + unsigned parentProc, + unsigned childProc, + CommWeightT cost, + DeltaTracker &dt) { + if (val.empty()) { + return; + } + unsigned minS = val[0]; + for (unsigned s : val) { + minS = std::min(minS, s); + } + + if (childStep == minS) { + int count = 0; + for (unsigned s : val) { + if (s == minS) { + count++; + } + } + + if (count == 1) { + if (minS > 0) { + dt.Add(true, minS - 1, childProc, -cost); + dt.Add(false, minS - 1, parentProc, -cost); + } + if (val.size() > 1) { + unsigned nextMin = std::numeric_limits::max(); + for (unsigned s : val) { + if (s != minS) { + nextMin = std::min(nextMin, s); + } + } + if (nextMin != std::numeric_limits::max() && nextMin > 0) { + dt.Add(true, nextMin - 1, childProc, cost); + dt.Add(false, nextMin - 1, parentProc, cost); + } + } + } + } + } + + template + static inline void CalculateDeltaAdd(const ValueType &val, + unsigned childStep, + unsigned parentStep, + unsigned parentProc, + unsigned childProc, + CommWeightT cost, + DeltaTracker &dt) { + if (val.empty()) { + if (childStep > 0) { + dt.Add(true, childStep - 1, childProc, cost); + dt.Add(false, childStep - 1, parentProc, cost); + } + } else { + unsigned minS = val[0]; + for (unsigned s : val) { + minS = std::min(minS, s); + } + + if (childStep < minS) { + if (minS > 0) { + dt.Add(true, minS - 1, childProc, -cost); + dt.Add(false, minS - 1, parentProc, -cost); + } + if (childStep > 0) { + dt.Add(true, childStep - 1, childProc, cost); + dt.Add(false, childStep - 1, parentProc, cost); + } + } + } + } + + template + static inline void CalculateDeltaOutgoing( + const ValueType &val, unsigned nodeStep, unsigned nodeProc, unsigned childProc, CommWeightT cost, DeltaTracker &dt) { + for (unsigned s : val) { + if (s > 0) { + dt.Add(true, s - 1, childProc, cost); + dt.Add(false, s - 1, nodeProc, cost); + } + } + } +}; + +struct BufferedCommCostPolicy { + using ValueType = std::vector; + + template + static inline void AttributeCommunication(DS &ds, + const CommWeightT &cost, + const unsigned uStep, + const unsigned uProc, + const unsigned vProc, + const unsigned vStep, + const ValueType &val) { + // Buffered: Send at u_step, Receive at v_step - 1. + + unsigned minStep = std::numeric_limits::max(); + for (unsigned s : val) { + minStep = std::min(minStep, s); + } + + if (minStep == vStep) { + unsigned prevMin = std::numeric_limits::max(); + for (size_t i = 0; i < val.size() - 1; ++i) { + prevMin = std::min(prevMin, val[i]); + } + + if (vStep < prevMin) { + if (prevMin != std::numeric_limits::max() && prevMin > 0) { + ds.StepProcReceive(prevMin - 1, vProc) -= cost; + } + if (vStep > 0) { + ds.StepProcReceive(vStep - 1, vProc) += cost; + } + } + } + + // Send side logic (u_step) + // If this is the FIRST child on this proc, add send cost. + if (val.size() == 1) { + ds.StepProcSend(uStep, uProc) += cost; + } + } + + template + static inline void UnattributeCommunication(DS &ds, + const CommWeightT &cost, + const unsigned uStep, + const unsigned uProc, + const unsigned vProc, + const unsigned vStep, + const ValueType &val) { + // val is state AFTER removal. + + if (val.empty()) { + // Removed last child. + ds.StepProcSend(uStep, uProc) -= cost; // Send side + if (vStep > 0) { + ds.StepProcReceive(vStep - 1, vProc) -= cost; // Recv side + } + } else { + // Check if v_step was unique minimum for Recv side. + unsigned newMin = val[0]; + for (unsigned s : val) { + newMin = std::min(newMin, s); + } + + if (vStep < newMin) { + if (vStep > 0) { + ds.StepProcReceive(vStep - 1, vProc) -= cost; + } + if (newMin > 0) { + ds.StepProcReceive(newMin - 1, vProc) += cost; + } + } + // Send side remains (val not empty). + } + } + + static inline bool AddChild(ValueType &val, unsigned step) { + val.push_back(step); + if (val.size() == 1) { + return true; // Need update for send side + } + unsigned minS = val[0]; + for (unsigned s : val) { + minS = std::min(minS, s); + } + return step == minS; // Need update for recv side + } + + static inline bool RemoveChild(ValueType &val, unsigned step) { + auto it = std::find(val.begin(), val.end(), step); + if (it != val.end()) { + val.erase(it); + if (val.empty()) { + return true; // Need update for send side + } + unsigned newMin = val[0]; + for (unsigned s : val) { + newMin = std::min(newMin, s); + } + return step < newMin; // Need update for recv side + } + return false; + } + + static inline void Reset(ValueType &val) { val.clear(); } + + static inline bool HasEntry(const ValueType &val) { return !val.empty(); } + + static inline bool IsSingleEntry(const ValueType &val) { return val.size() == 1; } + + template + static inline void CalculateDeltaRemove(const ValueType &val, + unsigned childStep, + unsigned parentStep, + unsigned parentProc, + unsigned childProc, + CommWeightT cost, + DeltaTracker &dt) { + // Lazy: Send and Recv are both at min(child_steps) - 1. + + if (val.empty()) { + return; + } + + unsigned minS = val[0]; + for (unsigned s : val) { + minS = std::min(minS, s); + } + + if (childStep == minS) { + int count = 0; + for (unsigned s : val) { + if (s == minS) { + count++; + } + } + + if (count == 1) { + // Unique min being removed. + if (minS > 0) { + dt.Add(true, minS - 1, childProc, -cost); // Remove Recv + dt.Add(false, minS - 1, parentProc, -cost); // Remove Send + } + + if (val.size() > 1) { + unsigned nextMin = std::numeric_limits::max(); + for (unsigned s : val) { + if (s != minS) { + nextMin = std::min(nextMin, s); + } + } + + if (nextMin != std::numeric_limits::max() && nextMin > 0) { + dt.Add(true, nextMin - 1, childProc, cost); // Add Recv at new min + dt.Add(false, nextMin - 1, parentProc, cost); // Add Send at new min + } + } + } + } + } + + template + static inline void CalculateDeltaAdd(const ValueType &val, + unsigned childStep, + unsigned parentStep, + unsigned parentProc, + unsigned childProc, + CommWeightT cost, + DeltaTracker &dt) { + // Lazy: Send and Recv are both at min(child_steps) - 1. + + if (val.empty()) { + // First child. + if (childStep > 0) { + dt.Add(true, childStep - 1, childProc, cost); + dt.Add(false, childStep - 1, parentProc, cost); + } + } else { + unsigned minS = val[0]; + for (unsigned s : val) { + minS = std::min(minS, s); + } + + if (childStep < minS) { + // New global minimum. + if (minS > 0) { + dt.Add(true, minS - 1, childProc, -cost); // Remove old Recv + dt.Add(false, minS - 1, parentProc, -cost); // Remove old Send + } + if (childStep > 0) { + dt.Add(true, childStep - 1, childProc, cost); // Add new Recv + dt.Add(false, childStep - 1, parentProc, cost); // Add new Send + } + } + } + } + + template + static inline void CalculateDeltaOutgoing( + const ValueType &val, unsigned nodeStep, unsigned nodeProc, unsigned childProc, CommWeightT cost, DeltaTracker &dt) { + // Buffered Outgoing (Node -> Children) + // Node is parent (sender). Pays at node_step. + // Children are receivers. Pay at child_step - 1. + + // Send side: node_step. + // If val is not empty, we pay send cost ONCE. + if (!val.empty()) { + dt.Add(false, nodeStep, nodeProc, cost); + } + + // Recv side: iterate steps in val (child steps). + // But we only pay at min(val) - 1. + if (!val.empty()) { + unsigned minS = val[0]; + for (unsigned s : val) { + minS = std::min(minS, s); + } + + if (minS > 0) { + dt.Add(true, minS - 1, childProc, cost); + } + } + } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/generic_lambda_container.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/generic_lambda_container.hpp new file mode 100644 index 00000000..98f4a87b --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/generic_lambda_container.hpp @@ -0,0 +1,121 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include +#include +#include + +namespace osp { + +template +struct DefaultHasEntry { + static inline bool HasEntry(const T &val) { return val != 0; } +}; + +template +struct DefaultHasEntry> { + static inline bool HasEntry(const std::vector &val) { return !val.empty(); } +}; + +/** + * @brief Generic container for tracking child processor assignments in a BSP schedule using vectors. + * + * This structure tracks information about children assigned to each processor. + * It uses a 2D vector for dense data. + */ +template > +struct GenericLambdaVectorContainer { + /** + * @brief Range adapter for iterating over non-zero/non-empty processor entries. + */ + class LambdaVectorRange { + private: + const std::vector &vec_; + + public: + class LambdaVectorIterator { + using iterator_category = std::input_iterator_tag; + using value_type = std::pair; + using difference_type = std::ptrdiff_t; + using pointer = value_type *; + using reference = value_type &; + + private: + const std::vector &vec_; + unsigned index_; + + public: + LambdaVectorIterator(const std::vector &vec) : vec_(vec), index_(0) { + while (index_ < vec_.size() && !HasEntry::HasEntry(vec_[index_])) { + ++index_; + } + } + + LambdaVectorIterator(const std::vector &vec, unsigned index) : vec_(vec), index_(index) {} + + LambdaVectorIterator &operator++() { + ++index_; + while (index_ < vec_.size() && !HasEntry::HasEntry(vec_[index_])) { + ++index_; + } + return *this; + } + + value_type operator*() const { return std::make_pair(index_, vec_[index_]); } + + bool operator==(const LambdaVectorIterator &other) const { return index_ == other.index_; } + + bool operator!=(const LambdaVectorIterator &other) const { return !(*this == other); } + }; + + LambdaVectorRange(const std::vector &vec) : vec_(vec) {} + + LambdaVectorIterator begin() { return LambdaVectorIterator(vec_); } + + LambdaVectorIterator end() { return LambdaVectorIterator(vec_, static_cast(vec_.size())); } + }; + + /// 2D vector: for each node, stores processor assignment info + std::vector> nodeLambdaVec_; + + /// Number of processors in the system + unsigned numProcs_ = 0; + + inline void Initialize(const VertexIdxT numVertices, const unsigned numProcs) { + nodeLambdaVec_.assign(numVertices, std::vector(numProcs)); + numProcs_ = numProcs; + } + + inline void ResetNode(const VertexIdxT node) { nodeLambdaVec_[node].assign(numProcs_, ValueType()); } + + inline void Clear() { nodeLambdaVec_.clear(); } + + inline bool HasProcEntry(const VertexIdxT node, const unsigned proc) const { + return HasEntry::HasEntry(nodeLambdaVec_[node][proc]); + } + + inline ValueType &GetProcEntry(const VertexIdxT node, const unsigned proc) { return nodeLambdaVec_[node][proc]; } + + inline ValueType GetProcEntry(const VertexIdxT node, const unsigned proc) const { return nodeLambdaVec_[node][proc]; } + + inline auto IterateProcEntries(const VertexIdxT node) { return LambdaVectorRange(nodeLambdaVec_[node]); } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_bsp_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_bsp_comm_cost.hpp new file mode 100644 index 00000000..e7ec1013 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_bsp_comm_cost.hpp @@ -0,0 +1,686 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include + +#include "../kl_active_schedule.hpp" +#include "../kl_improver.hpp" +#include "max_comm_datastructure.hpp" + +namespace osp { + +// A lightweight helper to track deltas without hash maps or repeated allocations. +// Uses a dense vector for O(1) lookups and a sparse list for fast iteration/clearing. +template +struct FastDeltaTracker { + std::vector denseVals_; // Size: num_procs + std::vector dirtyProcs_; // List of modified indices + std::vector procDirtyIndex_; // Map proc -> index in dirtyProcs_ (num_procs if not dirty) + unsigned numProcs_ = 0; + + void Initialize(unsigned nProcs) { + if (nProcs > numProcs_) { + numProcs_ = nProcs; + denseVals_.resize(numProcs_, 0); + dirtyProcs_.reserve(numProcs_); + procDirtyIndex_.resize(numProcs_, numProcs_); + } + } + + inline void Add(unsigned proc, CommWeightT val) { + if (val == 0) { + return; + } + + // If currently 0, it is becoming dirty + if (denseVals_[proc] == 0) { + procDirtyIndex_[proc] = static_cast(dirtyProcs_.size()); + dirtyProcs_.push_back(proc); + } + + denseVals_[proc] += val; + + // If it returns to 0, remove it from dirty list (Swap and Pop for O(1)) + if (denseVals_[proc] == 0) { + unsigned idx = procDirtyIndex_[proc]; + unsigned lastProc = dirtyProcs_.back(); + + // Move last element to the hole + dirtyProcs_[idx] = lastProc; + procDirtyIndex_[lastProc] = idx; + + // Remove last + dirtyProcs_.pop_back(); + procDirtyIndex_[proc] = numProcs_; + } + } + + inline CommWeightT Get(unsigned proc) const { + if (proc < denseVals_.size()) { + return denseVals_[proc]; + } + return 0; + } + + inline void Clear() { + for (unsigned p : dirtyProcs_) { + denseVals_[p] = 0; + procDirtyIndex_[p] = numProcs_; + } + dirtyProcs_.clear(); + } +}; + +template +struct KlBspCommCostFunction { + using VertexType = VertexIdxT; + using KlMove = KlMoveStruct; + using KlGainUpdateInfo = KlUpdateInfo; + using CommWeightT = VCommwT; + + constexpr static unsigned windowRange_ = 2 * windowSize + 1; + constexpr static bool isMaxCommCostFunction_ = true; + + KlActiveSchedule *activeSchedule_; + CompatibleProcessorRange *procRange_; + const GraphT *graph_; + const BspInstance *instance_; + + MaxCommDatastructure> commDs_; + + inline CostT GetCommMultiplier() { return 1; } + + inline CostT GetMaxCommWeight() { return commDs_.maxCommWeight_; } + + inline CostT GetMaxCommWeightMultiplied() { return commDs_.maxCommWeight_; } + + inline const std::string Name() const { return "bsp_comm"; } + + inline bool IsCompatible(VertexType node, unsigned proc) { return activeSchedule_->GetInstance().IsCompatible(node, proc); } + + inline unsigned StartIdx(const unsigned nodeStep, const unsigned startStep) { + return (nodeStep < windowSize + startStep) ? windowSize - (nodeStep - startStep) : 0; + } + + inline unsigned EndIdx(const unsigned nodeStep, const unsigned endStep) { + return (nodeStep + windowSize <= endStep) ? windowRange_ : windowRange_ - (nodeStep + windowSize - endStep); + } + + void Initialize(KlActiveSchedule &sched, CompatibleProcessorRange &pRange) { + activeSchedule_ = &sched; + procRange_ = &pRange; + instance_ = &sched.GetInstance(); + graph_ = &instance_->GetComputationalDag(); + + const unsigned numSteps = activeSchedule_->NumSteps(); + commDs_.Initialize(*activeSchedule_); + } + + using PreMoveCommDataT = PreMoveCommData; + + inline PreMoveCommDataT GetPreMoveCommData(const KlMove &move) { return commDs_.GetPreMoveCommData(move); } + + void ComputeSendReceiveDatastructures() { commDs_.ComputeCommDatastructures(0, activeSchedule_->NumSteps() - 1); } + + template + CostT ComputeScheduleCost() { + if constexpr (computeDatastructures) { + ComputeSendReceiveDatastructures(); + } + + CostT totalCost = 0; + for (unsigned step = 0; step < activeSchedule_->NumSteps(); step++) { + totalCost += activeSchedule_->GetStepMaxWork(step); + totalCost += commDs_.StepMaxComm(step) * instance_->CommunicationCosts(); + } + + if (activeSchedule_->NumSteps() > 1) { + totalCost += static_cast(activeSchedule_->NumSteps() - 1) * instance_->SynchronisationCosts(); + } + + return totalCost; + } + + CostT ComputeScheduleCostTest() { return ComputeScheduleCost(); } + + void UpdateDatastructureAfterMove(const KlMove &move, const unsigned startStep, const unsigned endStep) { + commDs_.UpdateDatastructureAfterMove(move, startStep, endStep); + } + + // Structure to hold thread-local scratchpads to avoid re-allocation. + struct ScratchData { + std::vector> sendDeltas_; // Size: num_steps + std::vector> recvDeltas_; // Size: num_steps + + std::vector activeSteps_; // List of steps touched in current operation + std::vector stepIsActive_; // Fast lookup for active steps + + std::vector> childCostBuffer_; + + void Init(unsigned nSteps, unsigned nProcs) { + if (sendDeltas_.size() < nSteps) { + sendDeltas_.resize(nSteps); + recvDeltas_.resize(nSteps); + stepIsActive_.resize(nSteps, false); + activeSteps_.reserve(nSteps); + } + + for (auto &tracker : sendDeltas_) { + tracker.Initialize(nProcs); + } + for (auto &tracker : recvDeltas_) { + tracker.Initialize(nProcs); + } + + childCostBuffer_.reserve(nProcs); + } + + void ClearAll() { + for (unsigned step : activeSteps_) { + sendDeltas_[step].Clear(); + recvDeltas_[step].Clear(); + stepIsActive_[step] = false; + } + activeSteps_.clear(); + childCostBuffer_.clear(); + } + + void MarkActive(unsigned step) { + if (!stepIsActive_[step]) { + stepIsActive_[step] = true; + activeSteps_.push_back(step); + } + } + }; + + template + void ComputeCommAffinity(VertexType node, + AffinityTableT &affinityTableNode, + const CostT &penalty, + const CostT &reward, + const unsigned startStep, + const unsigned endStep) { + // Use static thread_local scratchpad to avoid allocation in hot loop + static thread_local ScratchData scratch; + scratch.Init(activeSchedule_->NumSteps(), instance_->NumberOfProcessors()); + scratch.ClearAll(); + + const unsigned nodeStep = activeSchedule_->AssignedSuperstep(node); + const unsigned nodeProc = activeSchedule_->AssignedProcessor(node); + const unsigned windowBound = EndIdx(nodeStep, endStep); + const unsigned nodeStartIdx = StartIdx(nodeStep, startStep); + + for (const auto &target : instance_->GetComputationalDag().Children(node)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + + if (targetStep < nodeStep + (targetProc != nodeProc)) { + const unsigned diff = nodeStep - targetStep; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = nodeStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] -= reward; + } + } + if (windowSize >= diff && IsCompatible(node, targetProc)) { + affinityTableNode[targetProc][idx] -= reward; + } + } else { + const unsigned diff = targetStep - nodeStep; + unsigned idx = windowSize + diff; + if (idx < windowBound && IsCompatible(node, targetProc)) { + affinityTableNode[targetProc][idx] -= penalty; + } + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] += penalty; + } + } + } + } + + for (const auto &source : instance_->GetComputationalDag().Parents(node)) { + const unsigned sourceStep = activeSchedule_->AssignedSuperstep(source); + const unsigned sourceProc = activeSchedule_->AssignedProcessor(source); + + if (sourceStep < nodeStep + (sourceProc == nodeProc)) { + const unsigned diff = nodeStep - sourceStep; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + unsigned idx = nodeStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] += penalty; + } + } + if (idx - 1 < bound && IsCompatible(node, sourceProc)) { + affinityTableNode[sourceProc][idx - 1] -= penalty; + } + } else { + const unsigned diff = sourceStep - nodeStep; + unsigned idx = std::min(windowSize + diff, windowBound); + if (idx < windowBound && IsCompatible(node, sourceProc)) { + affinityTableNode[sourceProc][idx] -= reward; + } + idx++; + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] -= reward; + } + } + } + } + + const CommWeightT commWNode = graph_->VertexCommWeight(node); + const auto ¤tVecSchedule = activeSchedule_->GetVectorSchedule(); + + auto AddDelta = [&](bool isRecv, unsigned step, unsigned proc, CommWeightT val) { + if (val == 0) { + return; + } + if (step < activeSchedule_->NumSteps()) { + scratch.MarkActive(step); + if (isRecv) { + scratch.recvDeltas_[step].Add(proc, val); + } else { + scratch.sendDeltas_[step].Add(proc, val); + } + } + }; + + // 1. Remove Node from Current State (Phase 1 - Invariant for all candidates) + + // Outgoing (Children) + // Child stops receiving from nodeProc at nodeStep + auto nodeLambdaEntries = commDs_.nodeLambdaMap_.IterateProcEntries(node); + CommWeightT totalSendCostRemoved = 0; + + for (const auto [proc, count] : nodeLambdaEntries) { + if (proc != nodeProc) { + const CommWeightT cost = commWNode * instance_->SendCosts(nodeProc, proc); + if (cost > 0) { + AddDelta(true, nodeStep, proc, -cost); + totalSendCostRemoved += cost; + } + } + } + if (totalSendCostRemoved > 0) { + AddDelta(false, nodeStep, nodeProc, -totalSendCostRemoved); + } + + // Incoming (Parents) + for (const auto &u : graph_->Parents(node)) { + const unsigned uProc = activeSchedule_->AssignedProcessor(u); + const unsigned uStep = currentVecSchedule.AssignedSuperstep(u); + const CommWeightT commWU = graph_->VertexCommWeight(u); + + if (uProc != nodeProc) { + if (commDs_.nodeLambdaMap_.GetProcEntry(u, nodeProc) == 1) { + const CommWeightT cost = commWU * instance_->SendCosts(uProc, nodeProc); + if (cost > 0) { + AddDelta(true, uStep, nodeProc, -cost); + AddDelta(false, uStep, uProc, -cost); + } + } + } + } + + // 2. Add Node to Target (Iterate candidates) + + for (const unsigned pTo : procRange_->CompatibleProcessorsVertex(node)) { + // --- Part A: Incoming Edges (Parents -> pTo) --- + // These updates are specific to pTo but independent of sTo. + // We apply them, run the sTo loop, then revert them. + + for (const auto &u : graph_->Parents(node)) { + const unsigned uProc = activeSchedule_->AssignedProcessor(u); + const unsigned uStep = currentVecSchedule.AssignedSuperstep(u); + const CommWeightT commWU = graph_->VertexCommWeight(u); + + if (uProc != pTo) { + bool alreadySendingToPTo = false; + unsigned countOnPTo = commDs_.nodeLambdaMap_.GetProcEntry(u, pTo); + + if (pTo == nodeProc) { + if (countOnPTo > 0) { + countOnPTo--; + } + } + + if (countOnPTo > 0) { + alreadySendingToPTo = true; + } + + if (!alreadySendingToPTo) { + const CommWeightT cost = commWU * instance_->SendCosts(uProc, pTo); + if (cost > 0) { + AddDelta(true, uStep, pTo, cost); + AddDelta(false, uStep, uProc, cost); + } + } + } + } + + // --- Part B: Outgoing Edges (Node -> Children) --- + // These depend on which processors children are on. + scratch.childCostBuffer_.clear(); + CommWeightT totalSendCostAdded = 0; + + for (const auto [v_proc, count] : commDs_.nodeLambdaMap_.IterateProcEntries(node)) { + if (v_proc != pTo) { + const CommWeightT cost = commWNode * instance_->SendCosts(pTo, v_proc); + if (cost > 0) { + scratch.childCostBuffer_.push_back({v_proc, cost}); + totalSendCostAdded += cost; + } + } + } + + // Iterate Window (sTo) + for (unsigned sToIdx = nodeStartIdx; sToIdx < windowBound; ++sToIdx) { + unsigned sTo = nodeStep + sToIdx - windowSize; + + // Apply Outgoing Deltas for this specific step sTo + for (const auto &[v_proc, cost] : scratch.childCostBuffer_) { + AddDelta(true, sTo, v_proc, cost); + } + + if (totalSendCostAdded > 0) { + AddDelta(false, sTo, pTo, totalSendCostAdded); + } + + CostT totalChange = 0; + + // Only check steps that are active (modified in Phase 1, Part A, or Part B) + for (unsigned step : scratch.activeSteps_) { + // Check if dirtyProcs_ is empty implies no change for this step + // FastDeltaTracker ensures dirtyProcs_ is empty if all deltas summed to 0 + if (!scratch.sendDeltas_[step].dirtyProcs_.empty() || !scratch.recvDeltas_[step].dirtyProcs_.empty()) { + totalChange += CalculateStepCostChange(step, scratch.sendDeltas_[step], scratch.recvDeltas_[step]); + } + } + + affinityTableNode[pTo][sToIdx] += totalChange * instance_->CommunicationCosts(); + + // Revert Outgoing Deltas for sTo (Inverse of Apply) + for (const auto &[v_proc, cost] : scratch.childCostBuffer_) { + AddDelta(true, sTo, v_proc, -cost); + } + if (totalSendCostAdded > 0) { + AddDelta(false, sTo, pTo, -totalSendCostAdded); + } + } + + // Revert Incoming Deltas (Inverse of Part A) + for (const auto &u : graph_->Parents(node)) { + const unsigned uProc = activeSchedule_->AssignedProcessor(u); + const unsigned uStep = currentVecSchedule.AssignedSuperstep(u); + const CommWeightT commWU = graph_->VertexCommWeight(u); + + if (uProc != pTo) { + bool alreadySendingToPTo = false; + unsigned countOnPTo = commDs_.nodeLambdaMap_.GetProcEntry(u, pTo); + if (pTo == nodeProc) { + if (countOnPTo > 0) { + countOnPTo--; + } + } + if (countOnPTo > 0) { + alreadySendingToPTo = true; + } + + if (!alreadySendingToPTo) { + const CommWeightT cost = commWU * instance_->SendCosts(uProc, pTo); + if (cost > 0) { + AddDelta(true, uStep, pTo, -cost); + AddDelta(false, uStep, uProc, -cost); + } + } + } + } + } + } + + CommWeightT CalculateStepCostChange(unsigned step, + const FastDeltaTracker &deltaSend, + const FastDeltaTracker &deltaRecv) { + CommWeightT oldMax = commDs_.StepMaxComm(step); + CommWeightT secondMax = commDs_.StepSecondMaxComm(step); + unsigned oldMaxCount = commDs_.StepMaxCommCount(step); + + CommWeightT newGlobalMax = 0; + unsigned reducedMaxInstances = 0; + + // 1. Check modified sends (Iterate sparse dirty list) + for (unsigned proc : deltaSend.dirtyProcs_) { + CommWeightT delta = deltaSend.Get(proc); + // delta cannot be 0 here due to FastDeltaTracker invariant + + CommWeightT currentVal = commDs_.StepProcSend(step, proc); + CommWeightT newVal = currentVal + delta; + + if (newVal > newGlobalMax) { + newGlobalMax = newVal; + } + if (delta < 0 && currentVal == oldMax) { + reducedMaxInstances++; + } + } + + // 2. Check modified receives (Iterate sparse dirty list) + for (unsigned proc : deltaRecv.dirtyProcs_) { + CommWeightT delta = deltaRecv.Get(proc); + + CommWeightT currentVal = commDs_.StepProcReceive(step, proc); + CommWeightT newVal = currentVal + delta; + + if (newVal > newGlobalMax) { + newGlobalMax = newVal; + } + if (delta < 0 && currentVal == oldMax) { + reducedMaxInstances++; + } + } + + // 3. Determine result + if (newGlobalMax > oldMax) { + return newGlobalMax - oldMax; + } + if (reducedMaxInstances < oldMaxCount) { + return 0; + } + return std::max(newGlobalMax, secondMax) - oldMax; + } + + template + void UpdateNodeCommAffinity(const KlMove &move, + ThreadDataT &threadData, + const CostT &penalty, + const CostT &reward, + std::map &, + std::vector &newNodes) { + const unsigned startStep = threadData.startStep_; + const unsigned endStep = threadData.endStep_; + + for (const auto &target : instance_->GetComputationalDag().Children(move.node_)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + if (targetStep < startStep || targetStep > endStep) { + continue; + } + + if (threadData.lockManager_.IsLocked(target)) { + continue; + } + + if (not threadData.affinityTable_.IsSelected(target)) { + newNodes.push_back(target); + continue; + } + + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + const unsigned targetStartIdx = StartIdx(targetStep, startStep); + auto &affinityTable = threadData.affinityTable_.At(target); + + if (move.fromStep_ < targetStep + (move.fromProc_ == targetProc)) { + const unsigned diff = targetStep - move.fromStep_; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + unsigned idx = targetStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTable[p][idx] -= penalty; + } + } + + if (idx - 1 < bound && IsCompatible(target, move.fromProc_)) { + affinityTable[move.fromProc_][idx - 1] += penalty; + } + + } else { + const unsigned diff = move.fromStep_ - targetStep; + const unsigned windowBound = EndIdx(targetStep, endStep); + unsigned idx = std::min(windowSize + diff, windowBound); + + if (idx < windowBound && IsCompatible(target, move.fromProc_)) { + affinityTable[move.fromProc_][idx] += reward; + } + + idx++; + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTable[p][idx] += reward; + } + } + } + + if (move.toStep_ < targetStep + (move.toProc_ == targetProc)) { + unsigned idx = targetStartIdx; + const unsigned diff = targetStep - move.toStep_; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTable[p][idx] += penalty; + } + } + + if (idx - 1 < bound && IsCompatible(target, move.toProc_)) { + affinityTable[move.toProc_][idx - 1] -= penalty; + } + + } else { + const unsigned diff = move.toStep_ - targetStep; + const unsigned windowBound = EndIdx(targetStep, endStep); + unsigned idx = std::min(windowSize + diff, windowBound); + + if (idx < windowBound && IsCompatible(target, move.toProc_)) { + affinityTable[move.toProc_][idx] -= reward; + } + + idx++; + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTable[p][idx] -= reward; + } + } + } + } + + for (const auto &source : instance_->GetComputationalDag().Parents(move.node_)) { + const unsigned sourceStep = activeSchedule_->AssignedSuperstep(source); + if (sourceStep < startStep || sourceStep > endStep) { + continue; + } + + if (threadData.lockManager_.IsLocked(source)) { + continue; + } + + if (not threadData.affinityTable_.IsSelected(source)) { + newNodes.push_back(source); + continue; + } + + const unsigned sourceProc = activeSchedule_->AssignedProcessor(source); + const unsigned sourceStartIdx = StartIdx(sourceStep, startStep); + const unsigned windowBound = EndIdx(sourceStep, endStep); + auto &affinityTableSource = threadData.affinityTable_.At(source); + + if (move.fromStep_ < sourceStep + (move.fromProc_ != sourceProc)) { + const unsigned diff = sourceStep - move.fromStep_; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = sourceStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] += reward; + } + } + + if (windowSize >= diff && IsCompatible(source, move.fromProc_)) { + affinityTableSource[move.fromProc_][idx] += reward; + } + + } else { + const unsigned diff = move.fromStep_ - sourceStep; + unsigned idx = windowSize + diff; + + if (idx < windowBound && IsCompatible(source, move.fromProc_)) { + affinityTableSource[move.fromProc_][idx] += penalty; + } + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] -= penalty; + } + } + } + + if (move.toStep_ < sourceStep + (move.toProc_ != sourceProc)) { + const unsigned diff = sourceStep - move.toStep_; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = sourceStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] -= reward; + } + } + + if (windowSize >= diff && IsCompatible(source, move.toProc_)) { + affinityTableSource[move.toProc_][idx] -= reward; + } + + } else { + const unsigned diff = move.toStep_ - sourceStep; + unsigned idx = windowSize + diff; + + if (idx < windowBound && IsCompatible(source, move.toProc_)) { + affinityTableSource[move.toProc_][idx] -= penalty; + } + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] += penalty; + } + } + } + } + } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_hyper_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_hyper_total_comm_cost.hpp new file mode 100644 index 00000000..f7794ca8 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_hyper_total_comm_cost.hpp @@ -0,0 +1,648 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include "../kl_active_schedule.hpp" +#include "../kl_improver.hpp" +#include "lambda_container.hpp" + +namespace osp { + +template +struct KlHyperTotalCommCostFunction { + using VertexType = VertexIdxT; + using KlMove = KlMoveStruct; + using KlGainUpdateInfo = KlUpdateInfo; + + constexpr static unsigned windowRange_ = 2 * windowSize + 1; + constexpr static bool isMaxCommCostFunction_ = false; + + KlActiveSchedule *activeSchedule_; + + CompatibleProcessorRange *procRange_; + + const GraphT *graph_; + const BspInstance *instance_; + + CostT commMultiplier_ = 1; + CostT maxCommWeight_ = 0; + + LambdaVectorContainer nodeLambdaMap_; + + inline CostT GetCommMultiplier() { return commMultiplier_; } + + inline CostT GetMaxCommWeight() { return maxCommWeight_; } + + inline CostT GetMaxCommWeightMultiplied() { return maxCommWeight_ * commMultiplier_; } + + const std::string Name() const { return "toal_comm_cost"; } + + inline bool IsCompatible(VertexType node, unsigned proc) { return activeSchedule_->GetInstance().IsCompatible(node, proc); } + + void Initialize(KlActiveSchedule &sched, CompatibleProcessorRange &pRange) { + activeSchedule_ = &sched; + procRange_ = &pRange; + instance_ = &sched.GetInstance(); + graph_ = &instance_->GetComputationalDag(); + commMultiplier_ = 1.0 / instance_->NumberOfProcessors(); + nodeLambdaMap_.Initialize(graph_->NumVertices(), instance_->NumberOfProcessors()); + } + + struct EmptyStruct {}; + + using PreMoveCommDataT = EmptyStruct; + + inline EmptyStruct GetPreMoveCommData(const KlMove &) { return EmptyStruct(); } + + CostT ComputeScheduleCost() { + CostT workCosts = 0; + for (unsigned step = 0; step < activeSchedule_->NumSteps(); step++) { + workCosts += activeSchedule_->GetStepMaxWork(step); + } + + CostT commCosts = 0; + for (const auto vertex : graph_->Vertices()) { + const unsigned vertexProc = activeSchedule_->AssignedProcessor(vertex); + const CostT vCommCost = graph_->VertexCommWeight(vertex); + maxCommWeight_ = std::max(maxCommWeight_, vCommCost); + + nodeLambdaMap_.ResetNode(vertex); + + for (const auto &target : instance_->GetComputationalDag().Children(vertex)) { + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + + if (nodeLambdaMap_.IncreaseProcCount(vertex, targetProc)) { + commCosts += vCommCost + * instance_->CommunicationCosts(vertexProc, targetProc); // is 0 if targetProc == vertexProc + } + } + } + + return workCosts + commCosts * commMultiplier_ + + static_cast>(activeSchedule_->NumSteps() - 1) * instance_->SynchronisationCosts(); + } + + CostT ComputeScheduleCostTest() { + CostT workCosts = 0; + for (unsigned step = 0; step < activeSchedule_->NumSteps(); step++) { + workCosts += activeSchedule_->GetStepMaxWork(step); + } + + CostT commCosts = 0; + for (const auto vertex : graph_->Vertices()) { + const unsigned vertexProc = activeSchedule_->AssignedProcessor(vertex); + const CostT vCommCost = graph_->VertexCommWeight(vertex); + for (const auto lambdaprocMultPair : nodeLambdaMap_.IterateProcEntries(vertex)) { + const auto &lambdaProc = lambdaprocMultPair.first; + commCosts += vCommCost * instance_->CommunicationCosts(vertexProc, lambdaProc); + } + } + + return workCosts + commCosts * commMultiplier_ + + static_cast>(activeSchedule_->NumSteps() - 1) * instance_->SynchronisationCosts(); + } + + inline void UpdateDatastructureAfterMove(const KlMove &move, const unsigned startStep, const unsigned endStep) { + if (move.toProc_ != move.fromProc_) { + for (const auto &source : instance_->GetComputationalDag().Parents(move.node_)) { + const unsigned sourceStep = activeSchedule_->AssignedSuperstep(source); + if (sourceStep < startStep || sourceStep > endStep) { + continue; + } + UpdateSourceAfterMove(move, source); + } + } + } + + inline void UpdateSourceAfterMove(const KlMove &move, VertexType source) { + nodeLambdaMap_.DecreaseProcCount(source, move.fromProc_); + nodeLambdaMap_.IncreaseProcCount(source, move.toProc_); + } + + template + void UpdateNodeCommAffinity(const KlMove &move, + ThreadDataT &threadData, + const CostT &penalty, + const CostT &reward, + std::map &maxGainRecompute, + std::vector &newNodes) { + const unsigned startStep = threadData.startStep_; + const unsigned endStep = threadData.endStep_; + + for (const auto &target : instance_->GetComputationalDag().Children(move.node_)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + if (targetStep < startStep || targetStep > endStep) { + continue; + } + + if (threadData.lockManager_.IsLocked(target)) { + continue; + } + + if (not threadData.affinityTable_.IsSelected(target)) { + newNodes.push_back(target); + continue; + } + + if (maxGainRecompute.find(target) != maxGainRecompute.end()) { + maxGainRecompute[target].fullUpdate_ = true; + } else { + maxGainRecompute[target] = KlGainUpdateInfo(target, true); + } + + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + const unsigned targetStartIdx = StartIdx(targetStep, startStep); + auto &affinityTable = threadData.affinityTable_.At(target); + + if (move.fromStep_ < targetStep + (move.fromProc_ == targetProc)) { + const unsigned diff = targetStep - move.fromStep_; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + unsigned idx = targetStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTable[p][idx] -= penalty; + } + } + + if (idx - 1 < bound && IsCompatible(target, move.fromProc_)) { + affinityTable[move.fromProc_][idx - 1] += penalty; + } + + } else { + const unsigned diff = move.fromStep_ - targetStep; + const unsigned windowBound = EndIdx(targetStep, endStep); + unsigned idx = std::min(windowSize + diff, windowBound); + + if (idx < windowBound && IsCompatible(target, move.fromProc_)) { + affinityTable[move.fromProc_][idx] += reward; + } + + idx++; + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTable[p][idx] += reward; + } + } + } + + if (move.toStep_ < targetStep + (move.toProc_ == targetProc)) { + unsigned idx = targetStartIdx; + const unsigned diff = targetStep - move.toStep_; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTable[p][idx] += penalty; + } + } + + if (idx - 1 < bound && IsCompatible(target, move.toProc_)) { + affinityTable[move.toProc_][idx - 1] -= penalty; + } + + } else { + const unsigned diff = move.toStep_ - targetStep; + const unsigned windowBound = EndIdx(targetStep, endStep); + unsigned idx = std::min(windowSize + diff, windowBound); + + if (idx < windowBound && IsCompatible(target, move.toProc_)) { + affinityTable[move.toProc_][idx] -= reward; + } + + idx++; + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTable[p][idx] -= reward; + } + } + } + + if (move.toProc_ != move.fromProc_) { + const CostT commGain = graph_->VertexCommWeight(move.node_) * commMultiplier_; + + const unsigned windowBound = EndIdx(targetStep, endStep); + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + if (p == targetProc) { + continue; + } + if (nodeLambdaMap_.GetProcEntry(move.node_, targetProc) == 1) { + for (unsigned idx = targetStartIdx; idx < windowBound; idx++) { + const CostT x = instance_->CommunicationCosts(move.fromProc_, targetProc) * commGain; + const CostT y = instance_->CommunicationCosts(move.toProc_, targetProc) * commGain; + affinityTable[p][idx] += x - y; + } + } + + if (nodeLambdaMap_.HasNoProcEntry(move.node_, p)) { + for (unsigned idx = targetStartIdx; idx < windowBound; idx++) { + const CostT x = instance_->CommunicationCosts(move.fromProc_, p) * commGain; + const CostT y = instance_->CommunicationCosts(move.toProc_, p) * commGain; + affinityTable[p][idx] -= x - y; + } + } + } + } + } + + for (const auto &source : instance_->GetComputationalDag().Parents(move.node_)) { + if (move.toProc_ != move.fromProc_) { + const unsigned sourceProc = activeSchedule_->AssignedProcessor(source); + if (nodeLambdaMap_.HasNoProcEntry(source, move.fromProc_)) { + const CostT commGain = graph_->VertexCommWeight(source) * commMultiplier_; + + for (const auto &target : instance_->GetComputationalDag().Children(source)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + if ((targetStep < startStep || targetStep > endStep) || (target == move.node_) + || (not threadData.affinityTable_.IsSelected(target)) || threadData.lockManager_.IsLocked(target)) { + continue; + } + + if (sourceProc != move.fromProc_ && IsCompatible(target, move.fromProc_)) { + if (maxGainRecompute.find(target) != maxGainRecompute.end()) { // todo more specialized update + maxGainRecompute[target].fullUpdate_ = true; + } else { + maxGainRecompute[target] = KlGainUpdateInfo(target, true); + } + + auto &affinityTableTargetFromProc = threadData.affinityTable_.At(target)[move.fromProc_]; + const unsigned targetWindowBound = EndIdx(targetStep, endStep); + const CostT commAff = instance_->CommunicationCosts(sourceProc, move.fromProc_) * commGain; + for (unsigned idx = StartIdx(targetStep, startStep); idx < targetWindowBound; idx++) { + affinityTableTargetFromProc[idx] += commAff; + } + } + } + } else if (nodeLambdaMap_.GetProcEntry(source, move.fromProc_) == 1) { + const CostT commGain = graph_->VertexCommWeight(source) * commMultiplier_; + + for (const auto &target : instance_->GetComputationalDag().Children(source)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + if ((targetStep < startStep || targetStep > endStep) || (target == move.node_) + || threadData.lockManager_.IsLocked(target) || (not threadData.affinityTable_.IsSelected(target))) { + continue; + } + + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + if (targetProc == move.fromProc_) { + if (maxGainRecompute.find(target) != maxGainRecompute.end()) { // todo more specialized update + maxGainRecompute[target].fullUpdate_ = true; + } else { + maxGainRecompute[target] = KlGainUpdateInfo(target, true); + } + + const unsigned targetStartIdx = StartIdx(targetStep, startStep); + const unsigned targetWindowBound = EndIdx(targetStep, endStep); + auto &affinityTableTarget = threadData.affinityTable_.At(target); + const CostT commAff = instance_->CommunicationCosts(sourceProc, targetProc) * commGain; + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + if (p == targetProc) { + continue; + } + + for (unsigned idx = targetStartIdx; idx < targetWindowBound; idx++) { + affinityTableTarget[p][idx] -= commAff; + } + } + break; // since nodeLambdaMap_[source][move.fromProc_] == 1 + } + } + } + + if (nodeLambdaMap_.GetProcEntry(source, move.toProc_) == 1) { + const CostT commGain = graph_->VertexCommWeight(source) * commMultiplier_; + + for (const auto &target : instance_->GetComputationalDag().Children(source)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + if ((targetStep < startStep || targetStep > endStep) || (target == move.node_) + || (not threadData.affinityTable_.IsSelected(target)) || threadData.lockManager_.IsLocked(target)) { + continue; + } + + if (sourceProc != move.toProc_ && IsCompatible(target, move.toProc_)) { + if (maxGainRecompute.find(target) != maxGainRecompute.end()) { + maxGainRecompute[target].fullUpdate_ = true; + } else { + maxGainRecompute[target] = KlGainUpdateInfo(target, true); + } + + const unsigned targetWindowBound = EndIdx(targetStep, endStep); + auto &affinityTableTargetToProc = threadData.affinityTable_.At(target)[move.toProc_]; + const CostT commAff = instance_->CommunicationCosts(sourceProc, move.toProc_) * commGain; + for (unsigned idx = StartIdx(targetStep, startStep); idx < targetWindowBound; idx++) { + affinityTableTargetToProc[idx] -= commAff; + } + } + } + } else if (nodeLambdaMap_.GetProcEntry(source, move.toProc_) == 2) { + for (const auto &target : instance_->GetComputationalDag().Children(source)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + if ((targetStep < startStep || targetStep > endStep) || (target == move.node_) + || (not threadData.affinityTable_.IsSelected(target)) || threadData.lockManager_.IsLocked(target)) { + continue; + } + + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + if (targetProc == move.toProc_) { + if (sourceProc != targetProc) { + if (maxGainRecompute.find(target) != maxGainRecompute.end()) { + maxGainRecompute[target].fullUpdate_ = true; + } else { + maxGainRecompute[target] = KlGainUpdateInfo(target, true); + } + + const unsigned targetStartIdx = StartIdx(targetStep, startStep); + const unsigned targetWindowBound = EndIdx(targetStep, endStep); + auto &affinityTableTarget = threadData.affinityTable_.At(target); + const CostT commAff = instance_->CommunicationCosts(sourceProc, targetProc) + * graph_->VertexCommWeight(source) * commMultiplier_; + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + if (p == targetProc) { + continue; + } + + for (unsigned idx = targetStartIdx; idx < targetWindowBound; idx++) { + affinityTableTarget[p][idx] += commAff; + } + } + } + break; + } + } + } + } + + const unsigned sourceStep = activeSchedule_->AssignedSuperstep(source); + if (sourceStep < startStep || sourceStep > endStep) { + continue; + } + + if (threadData.lockManager_.IsLocked(source)) { + continue; + } + + if (not threadData.affinityTable_.IsSelected(source)) { + newNodes.push_back(source); + continue; + } + + if (maxGainRecompute.find(source) != maxGainRecompute.end()) { + maxGainRecompute[source].fullUpdate_ = true; + } else { + maxGainRecompute[source] = KlGainUpdateInfo(source, true); + } + + const unsigned sourceProc = activeSchedule_->AssignedProcessor(source); + const unsigned sourceStartIdx = StartIdx(sourceStep, startStep); + const unsigned windowBound = EndIdx(sourceStep, endStep); + auto &affinityTableSource = threadData.affinityTable_.At(source); + + if (move.fromStep_ < sourceStep + (move.fromProc_ != sourceProc)) { + const unsigned diff = sourceStep - move.fromStep_; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = sourceStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] += reward; + } + } + + if (windowSize >= diff && IsCompatible(source, move.fromProc_)) { + affinityTableSource[move.fromProc_][idx] += reward; + } + + } else { + const unsigned diff = move.fromStep_ - sourceStep; + unsigned idx = windowSize + diff; + + if (idx < windowBound && IsCompatible(source, move.fromProc_)) { + affinityTableSource[move.fromProc_][idx] += penalty; + } + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] -= penalty; + } + } + } + + if (move.toStep_ < sourceStep + (move.toProc_ != sourceProc)) { + const unsigned diff = sourceStep - move.toStep_; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = sourceStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] -= reward; + } + } + + if (windowSize >= diff && IsCompatible(source, move.toProc_)) { + affinityTableSource[move.toProc_][idx] -= reward; + } + + } else { + const unsigned diff = move.toStep_ - sourceStep; + unsigned idx = windowSize + diff; + + if (idx < windowBound && IsCompatible(source, move.toProc_)) { + affinityTableSource[move.toProc_][idx] -= penalty; + } + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] += penalty; + } + } + } + + if (move.toProc_ != move.fromProc_) { + if (nodeLambdaMap_.HasNoProcEntry(source, move.fromProc_)) { + const CostT commGain = graph_->VertexCommWeight(source) * commMultiplier_; + + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + if (p == sourceProc) { + continue; + } + + const CostT commCost = ChangeCommCost(instance_->CommunicationCosts(p, move.fromProc_), + instance_->CommunicationCosts(sourceProc, move.fromProc_), + commGain); + for (unsigned idx = sourceStartIdx; idx < windowBound; idx++) { + affinityTableSource[p][idx] -= commCost; + } + } + } + + if (nodeLambdaMap_.GetProcEntry(source, move.toProc_) == 1) { + const CostT commGain = graph_->VertexCommWeight(source) * commMultiplier_; + + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + if (p == sourceProc) { + continue; + } + + const CostT commCost = ChangeCommCost(instance_->CommunicationCosts(p, move.toProc_), + instance_->CommunicationCosts(sourceProc, move.toProc_), + commGain); + for (unsigned idx = sourceStartIdx; idx < windowBound; idx++) { + affinityTableSource[p][idx] += commCost; + } + } + } + } + } + } + + inline unsigned StartIdx(const unsigned nodeStep, const unsigned startStep) { + return nodeStep < windowSize + startStep ? windowSize - (nodeStep - startStep) : 0; + } + + inline unsigned EndIdx(const unsigned nodeStep, const unsigned endStep) { + return nodeStep + windowSize <= endStep ? windowRange_ : windowRange_ - (nodeStep + windowSize - endStep); + } + + inline CostT ChangeCommCost(const VCommwT &pTargetCommCost, + const VCommwT &nodeTargetCommCost, + const CostT &commGain) { + return pTargetCommCost > nodeTargetCommCost ? (pTargetCommCost - nodeTargetCommCost) * commGain + : (nodeTargetCommCost - pTargetCommCost) * commGain * -1.0; + } + + template + void ComputeCommAffinity(VertexType node, + AffinityTableT &affinityTableNode, + const CostT &penalty, + const CostT &reward, + const unsigned startStep, + const unsigned endStep) { + const unsigned nodeStep = activeSchedule_->AssignedSuperstep(node); + const unsigned nodeProc = activeSchedule_->AssignedProcessor(node); + const unsigned windowBound = EndIdx(nodeStep, endStep); + const unsigned nodeStartIdx = StartIdx(nodeStep, startStep); + + for (const auto &target : instance_->GetComputationalDag().Children(node)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + + if (targetStep < nodeStep + (targetProc != nodeProc)) { + const unsigned diff = nodeStep - targetStep; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = nodeStartIdx; + + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] -= reward; + } + } + + if (windowSize >= diff && IsCompatible(node, targetProc)) { + affinityTableNode[targetProc][idx] -= reward; + } + + } else { + const unsigned diff = targetStep - nodeStep; + unsigned idx = windowSize + diff; + + if (idx < windowBound && IsCompatible(node, targetProc)) { + affinityTableNode[targetProc][idx] -= penalty; + } + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] += penalty; + } + } + } + } // traget + + const CostT commGain = graph_->VertexCommWeight(node) * commMultiplier_; + + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + if (p == nodeProc) { + continue; + } + + for (const auto lambdaPair : nodeLambdaMap_.IterateProcEntries(node)) { + const auto &lambdaProc = lambdaPair.first; + const CostT commCost = ChangeCommCost( + instance_->CommunicationCosts(p, lambdaProc), instance_->CommunicationCosts(nodeProc, lambdaProc), commGain); + for (unsigned idx = nodeStartIdx; idx < windowBound; idx++) { + affinityTableNode[p][idx] += commCost; + } + } + } + + for (const auto &source : instance_->GetComputationalDag().Parents(node)) { + const unsigned sourceStep = activeSchedule_->AssignedSuperstep(source); + const unsigned sourceProc = activeSchedule_->AssignedProcessor(source); + + if (sourceStep < nodeStep + (sourceProc == nodeProc)) { + const unsigned diff = nodeStep - sourceStep; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + unsigned idx = nodeStartIdx; + + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] += penalty; + } + } + + if (idx - 1 < bound && IsCompatible(node, sourceProc)) { + affinityTableNode[sourceProc][idx - 1] -= penalty; + } + + } else { + const unsigned diff = sourceStep - nodeStep; + unsigned idx = std::min(windowSize + diff, windowBound); + + if (idx < windowBound && IsCompatible(node, sourceProc)) { + affinityTableNode[sourceProc][idx] -= reward; + } + + idx++; + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] -= reward; + } + } + } + + const CostT sourceCommGain = graph_->VertexCommWeight(source) * commMultiplier_; + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + if (p == nodeProc) { + continue; + } + + if (sourceProc != nodeProc && nodeLambdaMap_.GetProcEntry(source, nodeProc) == 1) { + for (unsigned idx = nodeStartIdx; idx < windowBound; idx++) { + affinityTableNode[p][idx] -= instance_->CommunicationCosts(sourceProc, nodeProc) * sourceCommGain; + } + } + + if (sourceProc != p && nodeLambdaMap_.HasNoProcEntry(source, p)) { + for (unsigned idx = nodeStartIdx; idx < windowBound; idx++) { + affinityTableNode[p][idx] += instance_->CommunicationCosts(sourceProc, p) * sourceCommGain; + } + } + } + } // source + } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_total_comm_cost.hpp new file mode 100644 index 00000000..93776a43 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_total_comm_cost.hpp @@ -0,0 +1,449 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include "../kl_active_schedule.hpp" +#include "../kl_improver.hpp" + +namespace osp { + +template +struct KlTotalCommCostFunction { + using VertexType = VertexIdxT; + using KlMove = KlMoveStruct; + using KlGainUpdateInfo = KlUpdateInfo; + + constexpr static bool isMaxCommCostFunction_ = false; + + constexpr static unsigned windowRange_ = 2 * windowSize + 1; + constexpr static bool useNodeCommunicationCosts_ = useNodeCommunicationCostsArg || not hasEdgeWeightsV; + + KlActiveSchedule *activeSchedule_; + + CompatibleProcessorRange *procRange_; + + const GraphT *graph_; + const BspInstance *instance_; + + CostT commMultiplier_ = 1; + CostT maxCommWeight_ = 0; + + inline CostT GetCommMultiplier() { return commMultiplier_; } + + inline CostT GetMaxCommWeight() { return maxCommWeight_; } + + inline CostT GetMaxCommWeightMultiplied() { return maxCommWeight_ * commMultiplier_; } + + const std::string Name() const { return "toal_comm_cost"; } + + inline bool IsCompatible(VertexType node, unsigned proc) { return activeSchedule_->GetInstance().IsCompatible(node, proc); } + + void Initialize(KlActiveSchedule &sched, CompatibleProcessorRange &pRange) { + activeSchedule_ = &sched; + procRange_ = &pRange; + instance_ = &sched.GetInstance(); + graph_ = &instance_->GetComputationalDag(); + commMultiplier_ = 1.0 / instance_->NumberOfProcessors(); + } + + struct EmptyStruct {}; + + using PreMoveCommDataT = EmptyStruct; + + inline EmptyStruct GetPreMoveCommData(const KlMove &) { return EmptyStruct(); } + + CostT ComputeScheduleCostTest() { return ComputeScheduleCost(); } + + void UpdateDatastructureAfterMove(const KlMove &, const unsigned, const unsigned) {} + + CostT ComputeScheduleCost() { + CostT workCosts = 0; + for (unsigned step = 0; step < activeSchedule_->NumSteps(); step++) { + workCosts += activeSchedule_->GetStepMaxWork(step); + } + + CostT commCosts = 0; + for (const auto &edge : Edges(*graph_)) { + const auto &sourceV = Source(edge, *graph_); + const auto &targetV = Target(edge, *graph_); + + const unsigned &sourceProc = activeSchedule_->AssignedProcessor(sourceV); + const unsigned &targetProc = activeSchedule_->AssignedProcessor(targetV); + + if (sourceProc != targetProc) { + if constexpr (useNodeCommunicationCosts_) { + const CostT sourceCommCost = graph_->VertexCommWeight(sourceV); + maxCommWeight_ = std::max(maxCommWeight_, sourceCommCost); + commCosts += sourceCommCost * instance_->CommunicationCosts(sourceProc, targetProc); + } else { + const CostT sourceCommCost = graph_->EdgeCommWeight(edge); + maxCommWeight_ = std::max(maxCommWeight_, sourceCommCost); + commCosts += sourceCommCost * instance_->CommunicationCosts(sourceProc, targetProc); + } + } + } + + return workCosts + commCosts * commMultiplier_ + + static_cast>(activeSchedule_->NumSteps() - 1) * instance_->SynchronisationCosts(); + } + + template + void UpdateNodeCommAffinity(const KlMove &move, + ThreadDataT &threadData, + const CostT &penalty, + const CostT &reward, + std::map &maxGainRecompute, + std::vector &newNodes) { + const unsigned &startStep = threadData.startStep_; + const unsigned &endStep = threadData.endStep_; + + for (const auto &target : instance_->GetComputationalDag().Children(move.node_)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + if (targetStep < startStep || targetStep > endStep) { + continue; + } + + if (threadData.lockManager_.IsLocked(target)) { + continue; + } + + if (not threadData.affinityTable_.IsSelected(target)) { + newNodes.push_back(target); + continue; + } + + if (maxGainRecompute.find(target) != maxGainRecompute.end()) { + maxGainRecompute[target].fullUpdate_ = true; + } else { + maxGainRecompute[target] = KlGainUpdateInfo(target, true); + } + + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + const unsigned targetStartIdx = StartIdx(targetStep, startStep); + auto &affinityTableTarget = threadData.affinityTable_.At(target); + + if (move.fromStep_ < targetStep + (move.fromProc_ == targetProc)) { + const unsigned diff = targetStep - move.fromStep_; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + unsigned idx = targetStartIdx; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTableTarget[p][idx] -= penalty; + } + } + + if (idx - 1 < bound && IsCompatible(target, move.fromProc_)) { + affinityTableTarget[move.fromProc_][idx - 1] += penalty; + } + + } else { + const unsigned diff = move.fromStep_ - targetStep; + const unsigned windowBound = EndIdx(targetStep, endStep); + unsigned idx = std::min(windowSize + diff, windowBound); + + if (idx < windowBound && IsCompatible(target, move.fromProc_)) { + affinityTableTarget[move.fromProc_][idx] += reward; + } + + idx++; + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTableTarget[p][idx] += reward; + } + } + } + + if (move.toStep_ < targetStep + (move.toProc_ == targetProc)) { + unsigned idx = targetStartIdx; + const unsigned diff = targetStep - move.toStep_; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTableTarget[p][idx] += penalty; + } + } + + if (idx - 1 < bound && IsCompatible(target, move.toProc_)) { + affinityTableTarget[move.toProc_][idx - 1] -= penalty; + } + + } else { + const unsigned diff = move.toStep_ - targetStep; + const unsigned windowBound = EndIdx(targetStep, endStep); + unsigned idx = std::min(windowSize + diff, windowBound); + + if (idx < windowBound && IsCompatible(target, move.toProc_)) { + affinityTableTarget[move.toProc_][idx] -= reward; + } + + idx++; + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + affinityTableTarget[p][idx] -= reward; + } + } + } + + if (move.toProc_ != move.fromProc_) { + const auto fromProcTargetCommCost = instance_->CommunicationCosts(move.fromProc_, targetProc); + const auto toProcTargetCommCost = instance_->CommunicationCosts(move.toProc_, targetProc); + + const CostT commGain = graph_->VertexCommWeight(move.node_) * commMultiplier_; + + unsigned idx = targetStartIdx; + const unsigned windowBound = EndIdx(targetStep, endStep); + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(target)) { + const auto x + = ChangeCommCost(instance_->CommunicationCosts(p, move.toProc_), toProcTargetCommCost, commGain); + const auto y + = ChangeCommCost(instance_->CommunicationCosts(p, move.fromProc_), fromProcTargetCommCost, commGain); + affinityTableTarget[p][idx] += x - y; + } + } + } + } + + for (const auto &source : instance_->GetComputationalDag().Parents(move.node_)) { + const unsigned sourceStep = activeSchedule_->AssignedSuperstep(source); + if (sourceStep < startStep || sourceStep > endStep) { + continue; + } + + if (threadData.lockManager_.IsLocked(source)) { + continue; + } + + if (not threadData.affinityTable_.IsSelected(source)) { + newNodes.push_back(source); + continue; + } + + if (maxGainRecompute.find(source) != maxGainRecompute.end()) { + maxGainRecompute[source].fullUpdate_ = true; + } else { + maxGainRecompute[source] = KlGainUpdateInfo(source, true); + } + + const unsigned sourceProc = activeSchedule_->AssignedProcessor(source); + const unsigned windowBound = EndIdx(sourceStep, endStep); + auto &affinityTableSource = threadData.affinityTable_.At(source); + + if (move.fromStep_ < sourceStep + (move.fromProc_ != sourceProc)) { + const unsigned diff = sourceStep - move.fromStep_; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = StartIdx(sourceStep, startStep); + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] += reward; + } + } + + if (windowSize >= diff && IsCompatible(source, move.fromProc_)) { + affinityTableSource[move.fromProc_][idx] += reward; + } + + } else { + const unsigned diff = move.fromStep_ - sourceStep; + unsigned idx = windowSize + diff; + + if (idx < windowBound && IsCompatible(source, move.fromProc_)) { + affinityTableSource[move.fromProc_][idx] += penalty; + } + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] -= penalty; + } + } + } + + if (move.toStep_ < sourceStep + (move.toProc_ != sourceProc)) { + const unsigned diff = sourceStep - move.toStep_; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = StartIdx(sourceStep, startStep); + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] -= reward; + } + } + + if (windowSize >= diff && IsCompatible(source, move.toProc_)) { + affinityTableSource[move.toProc_][idx] -= reward; + } + + } else { + const unsigned diff = move.toStep_ - sourceStep; + unsigned idx = windowSize + diff; + + if (idx < windowBound && IsCompatible(source, move.toProc_)) { + affinityTableSource[move.toProc_][idx] -= penalty; + } + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + affinityTableSource[p][idx] += penalty; + } + } + } + + if (move.toProc_ != move.fromProc_) { + const auto fromProcSourceCommCost = instance_->CommunicationCosts(sourceProc, move.fromProc_); + const auto toProcSourceCommCost = instance_->CommunicationCosts(sourceProc, move.toProc_); + + const CostT commGain = graph_->VertexCommWeight(source) * commMultiplier_; + + unsigned idx = StartIdx(sourceStep, startStep); + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(source)) { + const CostT x + = ChangeCommCost(instance_->CommunicationCosts(p, move.toProc_), toProcSourceCommCost, commGain); + const CostT y + = ChangeCommCost(instance_->CommunicationCosts(p, move.fromProc_), fromProcSourceCommCost, commGain); + affinityTableSource[p][idx] += x - y; + } + } + } + } + } + + inline unsigned StartIdx(const unsigned nodeStep, const unsigned startStep) { + return (nodeStep < windowSize + startStep) ? windowSize - (nodeStep - startStep) : 0; + } + + inline unsigned EndIdx(const unsigned nodeStep, const unsigned endStep) { + return (nodeStep + windowSize <= endStep) ? windowRange_ : windowRange_ - (nodeStep + windowSize - endStep); + } + + inline CostT ChangeCommCost(const VCommwT &pTargetCommCost, + const VCommwT &nodeTargetCommCost, + const CostT &commGain) { + return pTargetCommCost > nodeTargetCommCost ? (pTargetCommCost - nodeTargetCommCost) * commGain + : (nodeTargetCommCost - pTargetCommCost) * commGain * -1.0; + } + + template + void ComputeCommAffinity(VertexType node, + AffinityTableT &affinityTableNode, + const CostT &penalty, + const CostT &reward, + const unsigned startStep, + const unsigned endStep) { + const unsigned nodeStep = activeSchedule_->AssignedSuperstep(node); + const unsigned nodeProc = activeSchedule_->AssignedProcessor(node); + const unsigned windowBound = EndIdx(nodeStep, endStep); + const unsigned nodeStartIdx = StartIdx(nodeStep, startStep); + + for (const auto &target : instance_->GetComputationalDag().Children(node)) { + const unsigned targetStep = activeSchedule_->AssignedSuperstep(target); + const unsigned targetProc = activeSchedule_->AssignedProcessor(target); + + if (targetStep < nodeStep + (targetProc != nodeProc)) { + const unsigned diff = nodeStep - targetStep; + const unsigned bound = windowSize > diff ? windowSize - diff : 0; + unsigned idx = nodeStartIdx; + + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] -= reward; + } + } + + if (windowSize >= diff && IsCompatible(node, targetProc)) { + affinityTableNode[targetProc][idx] -= reward; + } + + } else { + const unsigned diff = targetStep - nodeStep; + unsigned idx = windowSize + diff; + + if (idx < windowBound && IsCompatible(node, targetProc)) { + affinityTableNode[targetProc][idx] -= penalty; + } + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] += penalty; + } + } + } + + const CostT commGain = graph_->VertexCommWeight(node) * commMultiplier_; + const auto nodeTargetCommCost = instance_->CommunicationCosts(nodeProc, targetProc); + + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + const CostT commCost = ChangeCommCost(instance_->CommunicationCosts(p, targetProc), nodeTargetCommCost, commGain); + for (unsigned idx = nodeStartIdx; idx < windowBound; idx++) { + affinityTableNode[p][idx] += commCost; + } + } + + } // traget + + for (const auto &source : instance_->GetComputationalDag().Parents(node)) { + const unsigned sourceStep = activeSchedule_->AssignedSuperstep(source); + const unsigned sourceProc = activeSchedule_->AssignedProcessor(source); + + if (sourceStep < nodeStep + (sourceProc == nodeProc)) { + const unsigned diff = nodeStep - sourceStep; + const unsigned bound = windowSize >= diff ? windowSize - diff + 1 : 0; + unsigned idx = nodeStartIdx; + + for (; idx < bound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] += penalty; + } + } + + if (idx - 1 < bound && IsCompatible(node, sourceProc)) { + affinityTableNode[sourceProc][idx - 1] -= penalty; + } + + } else { + const unsigned diff = sourceStep - nodeStep; + unsigned idx = std::min(windowSize + diff, windowBound); + + if (idx < windowBound && IsCompatible(node, sourceProc)) { + affinityTableNode[sourceProc][idx] -= reward; + } + + idx++; + + for (; idx < windowBound; idx++) { + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + affinityTableNode[p][idx] -= reward; + } + } + } + + const CostT commGain = graph_->VertexCommWeight(source) * commMultiplier_; + const auto sourceNodeCommCost = instance_->CommunicationCosts(sourceProc, nodeProc); + + for (const unsigned p : procRange_->CompatibleProcessorsVertex(node)) { + const CostT commCost = ChangeCommCost(instance_->CommunicationCosts(p, sourceProc), sourceNodeCommCost, commGain); + for (unsigned idx = nodeStartIdx; idx < windowBound; idx++) { + affinityTableNode[p][idx] += commCost; + } + } + } // source + } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/lambda_container.hpp similarity index 68% rename from include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp rename to include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/lambda_container.hpp index a1494bff..5e1823e8 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/lambda_container.hpp @@ -33,28 +33,28 @@ namespace osp { * For each node, the map stores the count of children assigned to each processor, which is * important for computing communication costs in BSP scheduling. */ -template -struct lambda_map_container { +template +struct LambdaMapContainer { /// Vector of maps: for each node, maps processor ID to assignment count - std::vector> node_lambda_map; + std::vector> nodeLambdaMap_; /** * @brief Initialize the container for a given number of vertices. - * @param num_vertices Number of nodes in the schedule + * @param NumVertices Number of nodes in the schedule * @param (unused) Number of processors (not needed for map-based implementation) */ - inline void initialize(const vertex_idx_t num_vertices, const unsigned) { node_lambda_map.resize(num_vertices); } + inline void Initialize(const VertexIdxT numVertices, const unsigned) { nodeLambdaMap_.resize(numVertices); } /** * @brief Reset all processor assignments for a specific node. * @param node Node index to reset */ - inline void reset_node(const vertex_idx_t node) { node_lambda_map[node].clear(); } + inline void ResetNode(const VertexIdxT node) { nodeLambdaMap_[node].clear(); } /** * @brief Clear all data from the container. */ - inline void clear() { node_lambda_map.clear(); } + inline void Clear() { nodeLambdaMap_.clear(); } /** * @brief Check if a processor has an entry for a given node. @@ -62,8 +62,8 @@ struct lambda_map_container { * @param proc Processor ID * @return true if the processor has at least one assignment to the node */ - inline bool has_proc_entry(const vertex_idx_t node, const unsigned proc) const { - return (node_lambda_map[node].find(proc) != node_lambda_map[node].end()); + inline bool HasProcEntry(const VertexIdxT node, const unsigned proc) const { + return (nodeLambdaMap_[node].find(proc) != nodeLambdaMap_[node].end()); } /** @@ -72,8 +72,8 @@ struct lambda_map_container { * @param proc Processor ID * @return true if the processor has no assignments to the node */ - inline bool has_no_proc_entry(const vertex_idx_t node, const unsigned proc) const { - return (node_lambda_map[node].find(proc) == node_lambda_map[node].end()); + inline bool HasNoProcEntry(const VertexIdxT node, const unsigned proc) const { + return (nodeLambdaMap_[node].find(proc) == nodeLambdaMap_[node].end()); } /** @@ -82,7 +82,7 @@ struct lambda_map_container { * @param proc Processor ID * @return Reference to the count (creates entry if it doesn't exist) */ - inline unsigned &get_proc_entry(const vertex_idx_t node, const unsigned proc) { return node_lambda_map[node][proc]; } + inline unsigned &GetProcEntry(const VertexIdxT node, const unsigned proc) { return nodeLambdaMap_[node][proc]; } /** * @brief Get the processor count for a given node (const version). @@ -91,9 +91,9 @@ struct lambda_map_container { * @return The count value for the processor at the node * @pre has_proc_entry(node, proc) must be true */ - inline unsigned get_proc_entry(const vertex_idx_t node, const unsigned proc) const { - assert(has_proc_entry(node, proc)); - return node_lambda_map[node].at(proc); + inline unsigned GetProcEntry(const VertexIdxT node, const unsigned proc) const { + assert(HasProcEntry(node, proc)); + return nodeLambdaMap_[node].at(proc); } /** @@ -101,7 +101,7 @@ struct lambda_map_container { * @param node Node index * @return The count of different processors the node is sending to */ - inline unsigned get_proc_count(const vertex_idx_t node) const { return static_cast(node_lambda_map[node].size()); } + inline unsigned GetProcCount(const VertexIdxT node) const { return static_cast(nodeLambdaMap_[node].size()); } /** * @brief Increase the processor count for a given node. @@ -109,12 +109,12 @@ struct lambda_map_container { * @param proc Processor ID * @return true if this is the first assignment of this processor to the node */ - inline bool increase_proc_count(const vertex_idx_t node, const unsigned proc) { - if (has_proc_entry(node, proc)) { - node_lambda_map[node][proc]++; + inline bool IncreaseProcCount(const VertexIdxT node, const unsigned proc) { + if (HasProcEntry(node, proc)) { + nodeLambdaMap_[node][proc]++; return false; } else { - node_lambda_map[node][proc] = 1; + nodeLambdaMap_[node][proc] = 1; return true; } } @@ -126,13 +126,13 @@ struct lambda_map_container { * @return true if this was the last assignment of this processor to the node * @pre has_proc_entry(node, proc) must be true */ - inline bool decrease_proc_count(const vertex_idx_t node, const unsigned proc) { - assert(has_proc_entry(node, proc)); - if (node_lambda_map[node][proc] == 1) { - node_lambda_map[node].erase(proc); + inline bool DecreaseProcCount(const VertexIdxT node, const unsigned proc) { + assert(HasProcEntry(node, proc)); + if (nodeLambdaMap_[node][proc] == 1) { + nodeLambdaMap_[node].erase(proc); return true; } else { - node_lambda_map[node][proc]--; + nodeLambdaMap_[node][proc]--; return false; } } @@ -142,7 +142,7 @@ struct lambda_map_container { * @param node Node index * @return Reference to the unordered_map of processor assignments for the node */ - inline const auto &iterate_proc_entries(const vertex_idx_t node) { return node_lambda_map[node]; } + inline const auto &IterateProcEntries(const VertexIdxT node) { return nodeLambdaMap_[node]; } }; /** @@ -155,15 +155,15 @@ struct lambda_map_container { * For each node, the vector stores the count of children assigned to each processor, which is * important for computing communication costs in BSP scheduling. */ -template -struct lambda_vector_container { +template +struct LambdaVectorContainer { /** * @brief Range adapter for iterating over non-zero processor entries. * * Provides a range-based for loop interface that automatically skips processors * with zero assignments. */ - class lambda_vector_range { + class LambdaVectorRange { private: const std::vector &vec_; @@ -174,7 +174,7 @@ struct lambda_vector_container { * Implements an input iterator that yields pairs of (processor_id, count) * for all processors with non-zero assignment counts. */ - class lambda_vector_iterator { + class LambdaVectorIterator { using iterator_category = std::input_iterator_tag; using value_type = std::pair; using difference_type = std::ptrdiff_t; @@ -190,7 +190,7 @@ struct lambda_vector_container { * @brief Construct iterator at the beginning, skipping initial zeros. * @param vec Reference to the vector to iterate over */ - lambda_vector_iterator(const std::vector &vec) : vec_(vec), index_(0) { + LambdaVectorIterator(const std::vector &vec) : vec_(vec), index_(0) { // Advance to the first valid entry while (index_ < vec_.size() && vec_[index_] == 0) { ++index_; @@ -202,13 +202,13 @@ struct lambda_vector_container { * @param vec Reference to the vector to iterate over * @param index Starting index */ - lambda_vector_iterator(const std::vector &vec, unsigned index) : vec_(vec), index_(index) {} + LambdaVectorIterator(const std::vector &vec, unsigned index) : vec_(vec), index_(index) {} /** * @brief Advance to the next non-zero entry. * @return Reference to this iterator */ - lambda_vector_iterator &operator++() { + LambdaVectorIterator &operator++() { ++index_; while (index_ < vec_.size() && vec_[index_] == 0) { ++index_; @@ -227,55 +227,55 @@ struct lambda_vector_container { * @param other Iterator to compare with * @return true if both iterators point to the same position */ - bool operator==(const lambda_vector_iterator &other) const { return index_ == other.index_; } + bool operator==(const LambdaVectorIterator &other) const { return index_ == other.index_; } /** * @brief Check inequality with another iterator. * @param other Iterator to compare with * @return true if iterators point to different positions */ - bool operator!=(const lambda_vector_iterator &other) const { return !(*this == other); } + bool operator!=(const LambdaVectorIterator &other) const { return !(*this == other); } }; /** * @brief Construct a range from a vector. * @param vec Reference to the vector to create range over */ - lambda_vector_range(const std::vector &vec) : vec_(vec) {} + LambdaVectorRange(const std::vector &vec) : vec_(vec) {} /// Get iterator to the first non-zero entry - lambda_vector_iterator begin() { return lambda_vector_iterator(vec_); } + LambdaVectorIterator begin() { return LambdaVectorIterator(vec_); } /// Get iterator to the end - lambda_vector_iterator end() { return lambda_vector_iterator(vec_, static_cast(vec_.size())); } + LambdaVectorIterator end() { return LambdaVectorIterator(vec_, static_cast(vec_.size())); } }; /// 2D vector: for each node, stores processor assignment counts - std::vector> node_lambda_vec; + std::vector> nodeLambdaVec_; /// Number of processors in the system - unsigned num_procs_ = 0; + unsigned numProcs_ = 0; /** * @brief Initialize the container for a given number of vertices and processors. - * @param num_vertices Number of nodes in the schedule + * @param NumVertices Number of nodes in the schedule * @param num_procs Number of processors in the system */ - inline void initialize(const vertex_idx_t num_vertices, const unsigned num_procs) { - node_lambda_vec.assign(num_vertices, std::vector(num_procs, 0)); - num_procs_ = num_procs; + inline void Initialize(const VertexIdxT numVertices, const unsigned numProcs) { + nodeLambdaVec_.assign(numVertices, std::vector(numProcs, 0)); + numProcs_ = numProcs; } /** * @brief Reset all processor assignments for a specific node. * @param node Node index to reset */ - inline void reset_node(const vertex_idx_t node) { node_lambda_vec[node].assign(num_procs_, 0); } + inline void ResetNode(const VertexIdxT node) { nodeLambdaVec_[node].assign(numProcs_, 0); } /** * @brief Clear all data from the container. */ - inline void clear() { node_lambda_vec.clear(); } + inline void Clear() { nodeLambdaVec_.clear(); } /** * @brief Check if a processor has an entry for a given node. @@ -283,7 +283,7 @@ struct lambda_vector_container { * @param proc Processor ID * @return true if the processor has at least one assignment to the node */ - inline bool has_proc_entry(const vertex_idx_t node, const unsigned proc) const { return node_lambda_vec[node][proc] > 0; } + inline bool HasProcEntry(const VertexIdxT node, const unsigned proc) const { return nodeLambdaVec_[node][proc] > 0; } /** * @brief Check if a processor has no entry for a given node. @@ -291,7 +291,7 @@ struct lambda_vector_container { * @param proc Processor ID * @return true if the processor has no assignments to the node */ - inline bool has_no_proc_entry(const vertex_idx_t node, const unsigned proc) const { return node_lambda_vec[node][proc] == 0; } + inline bool HasNoProcEntry(const VertexIdxT node, const unsigned proc) const { return nodeLambdaVec_[node][proc] == 0; } /** * @brief Get a reference to the processor count for a given node. @@ -299,7 +299,7 @@ struct lambda_vector_container { * @param proc Processor ID * @return Reference to the count (allows modification) */ - inline unsigned &get_proc_entry(const vertex_idx_t node, const unsigned proc) { return node_lambda_vec[node][proc]; } + inline unsigned &GetProcEntry(const VertexIdxT node, const unsigned proc) { return nodeLambdaVec_[node][proc]; } /** * @brief Get the processor count for a given node (const version). @@ -308,9 +308,9 @@ struct lambda_vector_container { * @return The count value for the processor at the node * @pre has_proc_entry(node, proc) must be true */ - inline unsigned get_proc_entry(const vertex_idx_t node, const unsigned proc) const { - assert(has_proc_entry(node, proc)); - return node_lambda_vec[node][proc]; + inline unsigned GetProcEntry(const VertexIdxT node, const unsigned proc) const { + assert(HasProcEntry(node, proc)); + return nodeLambdaVec_[node][proc]; } /** @@ -320,10 +320,10 @@ struct lambda_vector_container { * @return The count value for the processor at the node * @pre has_proc_entry(node, proc) must be true */ - inline unsigned get_proc_count(const vertex_idx_t node) const { + inline unsigned GetProcCount(const VertexIdxT node) const { unsigned count = 0; - for (unsigned proc = 0; proc < num_procs_; ++proc) { - if (node_lambda_vec[node][proc] > 0) { + for (unsigned proc = 0; proc < numProcs_; ++proc) { + if (nodeLambdaVec_[node][proc] > 0) { ++count; } } @@ -336,9 +336,9 @@ struct lambda_vector_container { * @param proc Processor ID * @return true if this is the first assignment of this processor to the node */ - inline bool increase_proc_count(const vertex_idx_t node, const unsigned proc) { - node_lambda_vec[node][proc]++; - return node_lambda_vec[node][proc] == 1; + inline bool IncreaseProcCount(const VertexIdxT node, const unsigned proc) { + nodeLambdaVec_[node][proc]++; + return nodeLambdaVec_[node][proc] == 1; } /** @@ -348,10 +348,10 @@ struct lambda_vector_container { * @return true if this was the last assignment of this processor to the node * @pre has_proc_entry(node, proc) must be true */ - inline bool decrease_proc_count(const vertex_idx_t node, const unsigned proc) { - assert(has_proc_entry(node, proc)); - node_lambda_vec[node][proc]--; - return node_lambda_vec[node][proc] == 0; + inline bool DecreaseProcCount(const VertexIdxT node, const unsigned proc) { + assert(HasProcEntry(node, proc)); + nodeLambdaVec_[node][proc]--; + return nodeLambdaVec_[node][proc] == 0; } /** @@ -359,7 +359,7 @@ struct lambda_vector_container { * @param node Node index * @return Range object that can be used in range-based for loops */ - inline auto iterate_proc_entries(const vertex_idx_t node) { return lambda_vector_range(node_lambda_vec[node]); } + inline auto IterateProcEntries(const VertexIdxT node) { return LambdaVectorRange(nodeLambdaVec_[node]); } }; } // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/max_comm_datastructure.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/max_comm_datastructure.hpp new file mode 100644 index 00000000..9de9b0ba --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/max_comm_datastructure.hpp @@ -0,0 +1,381 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include +#include +#include +#include + +#include "comm_cost_policies.hpp" +#include "generic_lambda_container.hpp" +#include "lambda_container.hpp" +#include "osp/bsp/model/BspInstance.hpp" + +namespace osp { + +template +struct PreMoveCommData { + struct StepInfo { + CommWeightT maxComm_; + CommWeightT secondMaxComm_; + unsigned maxCommCount_; + }; + + std::unordered_map stepData_; + + PreMoveCommData() = default; + + void AddStep(unsigned step, CommWeightT max, CommWeightT second, unsigned count) { stepData_[step] = {max, second, count}; } + + bool GetStep(unsigned step, StepInfo &info) const { + auto it = stepData_.find(step); + if (it != stepData_.end()) { + info = it->second; + return true; + } + return false; + } +}; + +template +struct MaxCommDatastructure { + using CommWeightT = VCommwT; + using VertexType = VertexIdxT; + using KlMove = KlMoveStruct; + + const BspInstance *instance_; + const KlActiveScheduleT *activeSchedule_; + + std::vector> stepProcSend_; + std::vector> stepProcReceive_; + + // Caches for fast cost calculation (Global Max/Second Max per step) + std::vector stepMaxCommCache_; + std::vector stepSecondMaxCommCache_; + std::vector stepMaxCommCountCache_; + + CommWeightT maxCommWeight_ = 0; + + // Select the appropriate container type based on the policy's ValueType + using ContainerType = typename std::conditional::value, + LambdaVectorContainer, + GenericLambdaVectorContainer>::type; + + ContainerType nodeLambdaMap_; + + // Optimization: Scratchpad for update_datastructure_after_move to avoid allocations + std::vector affectedStepsList_; + std::vector stepIsAffected_; + + inline CommWeightT StepProcSend(unsigned step, unsigned proc) const { return stepProcSend_[step][proc]; } + + inline CommWeightT &StepProcSend(unsigned step, unsigned proc) { return stepProcSend_[step][proc]; } + + inline CommWeightT StepProcReceive(unsigned step, unsigned proc) const { return stepProcReceive_[step][proc]; } + + inline CommWeightT &StepProcReceive(unsigned step, unsigned proc) { return stepProcReceive_[step][proc]; } + + inline CommWeightT StepMaxComm(unsigned step) const { return stepMaxCommCache_[step]; } + + inline CommWeightT StepSecondMaxComm(unsigned step) const { return stepSecondMaxCommCache_[step]; } + + inline unsigned StepMaxCommCount(unsigned step) const { return stepMaxCommCountCache_[step]; } + + inline void Initialize(KlActiveScheduleT &klSched) { + activeSchedule_ = &klSched; + instance_ = &activeSchedule_->GetInstance(); + const unsigned numSteps = activeSchedule_->NumSteps(); + const unsigned numProcs = instance_->NumberOfProcessors(); + maxCommWeight_ = 0; + + stepProcSend_.assign(numSteps, std::vector(numProcs, 0)); + stepProcReceive_.assign(numSteps, std::vector(numProcs, 0)); + + stepMaxCommCache_.assign(numSteps, 0); + stepSecondMaxCommCache_.assign(numSteps, 0); + stepMaxCommCountCache_.assign(numSteps, 0); + + nodeLambdaMap_.Initialize(instance_->GetComputationalDag().NumVertices(), numProcs); + + // Initialize scratchpad + stepIsAffected_.assign(numSteps, false); + affectedStepsList_.reserve(numSteps); + } + + inline void Clear() { + stepProcSend_.clear(); + stepProcReceive_.clear(); + stepMaxCommCache_.clear(); + stepSecondMaxCommCache_.clear(); + stepMaxCommCountCache_.clear(); + nodeLambdaMap_.clear(); + affectedStepsList_.clear(); + stepIsAffected_.clear(); + } + + inline void ArrangeSuperstepCommData(const unsigned step) { + CommWeightT maxSend = 0; + CommWeightT secondMaxSend = 0; + unsigned maxSendCount = 0; + + const auto &sends = stepProcSend_[step]; + for (const auto val : sends) { + if (val > maxSend) { + secondMaxSend = maxSend; + maxSend = val; + maxSendCount = 1; + } else if (val == maxSend) { + maxSendCount++; + } else if (val > secondMaxSend) { + secondMaxSend = val; + } + } + + CommWeightT maxReceive = 0; + CommWeightT secondMaxReceive = 0; + unsigned maxReceiveCount = 0; + + const auto &receives = stepProcReceive_[step]; + for (const auto val : receives) { + if (val > maxReceive) { + secondMaxReceive = maxReceive; + maxReceive = val; + maxReceiveCount = 1; + } else if (val == maxReceive) { + maxReceiveCount++; + } else if (val > secondMaxReceive) { + secondMaxReceive = val; + } + } + + const CommWeightT globalMax = std::max(maxSend, maxReceive); + stepMaxCommCache_[step] = globalMax; + + unsigned globalCount = 0; + if (maxSend == globalMax) { + globalCount += maxSendCount; + } + if (maxReceive == globalMax) { + globalCount += maxReceiveCount; + } + stepMaxCommCountCache_[step] = globalCount; + + CommWeightT candSend = (maxSend == globalMax) ? secondMaxSend : maxSend; + CommWeightT candRecv = (maxReceive == globalMax) ? secondMaxReceive : maxReceive; + + stepSecondMaxCommCache_[step] = std::max(candSend, candRecv); + } + + void RecomputeMaxSendReceive(unsigned step) { ArrangeSuperstepCommData(step); } + + inline PreMoveCommData GetPreMoveCommData(const KlMove &move) { + PreMoveCommData data; + std::unordered_set affectedSteps; + + affectedSteps.insert(move.fromStep_); + affectedSteps.insert(move.toStep_); + + const auto &graph = instance_->GetComputationalDag(); + + for (const auto &parent : graph.Parents(move.node_)) { + affectedSteps.insert(activeSchedule_->AssignedSuperstep(parent)); + } + + for (unsigned step : affectedSteps) { + data.AddStep(step, StepMaxComm(step), StepSecondMaxComm(step), StepMaxCommCount(step)); + } + + return data; + } + + void UpdateDatastructureAfterMove(const KlMove &move, unsigned, unsigned) { + const auto &graph = instance_->GetComputationalDag(); + + // Prepare Scratchpad (Avoids Allocations) --- + for (unsigned step : affectedStepsList_) { + if (step < stepIsAffected_.size()) { + stepIsAffected_[step] = false; + } + } + affectedStepsList_.clear(); + + auto MarkStep = [&](unsigned step) { + if (step < stepIsAffected_.size() && !stepIsAffected_[step]) { + stepIsAffected_[step] = true; + affectedStepsList_.push_back(step); + } + }; + + const VertexType node = move.node_; + const unsigned fromStep = move.fromStep_; + const unsigned toStep = move.toStep_; + const unsigned fromProc = move.fromProc_; + const unsigned toProc = move.toProc_; + const CommWeightT commWNode = graph.VertexCommWeight(node); + + // Handle Node Movement (Outgoing Edges: Node -> Children) + + if (fromStep != toStep) { + // Case 1: Node changes Step + for (const auto [proc, val] : nodeLambdaMap_.IterateProcEntries(node)) { + // A. Remove Old (Sender: fromProc, Receiver: proc) + if (proc != fromProc) { + const CommWeightT cost = commWNode * instance_->SendCosts(fromProc, proc); + if (cost > 0) { + CommPolicy::UnattributeCommunication(*this, cost, fromStep, fromProc, proc, 0, val); + } + } + + // B. Add New (Sender: toProc, Receiver: proc) + if (proc != toProc) { + const CommWeightT cost = commWNode * instance_->SendCosts(toProc, proc); + if (cost > 0) { + CommPolicy::AttributeCommunication(*this, cost, toStep, toProc, proc, 0, val); + } + } + } + MarkStep(fromStep); + MarkStep(toStep); + + } else if (fromProc != toProc) { + // Case 2: Node stays in same Step, but changes Processor + + for (const auto [proc, val] : nodeLambdaMap_.IterateProcEntries(node)) { + // Remove Old (Sender: fromProc, Receiver: proc) + if (proc != fromProc) { + const CommWeightT cost = commWNode * instance_->SendCosts(fromProc, proc); + if (cost > 0) { + CommPolicy::UnattributeCommunication(*this, cost, fromStep, fromProc, proc, 0, val); + } + } + + // Add New (Sender: toProc, Receiver: proc) + if (proc != toProc) { + const CommWeightT cost = commWNode * instance_->SendCosts(toProc, proc); + if (cost > 0) { + CommPolicy::AttributeCommunication(*this, cost, fromStep, toProc, proc, 0, val); + } + } + } + MarkStep(fromStep); + } + + // Update Parents' Outgoing Communication (Parents → Node) + + for (const auto &parent : graph.Parents(node)) { + const unsigned parentStep = activeSchedule_->AssignedSuperstep(parent); + // Fast boundary check + if (parentStep >= stepProcSend_.size()) { + continue; + } + + const unsigned parentProc = activeSchedule_->AssignedProcessor(parent); + const CommWeightT commWParent = graph.VertexCommWeight(parent); + + auto &val = nodeLambdaMap_.GetProcEntry(parent, fromProc); + const bool removedFromProc = CommPolicy::RemoveChild(val, fromStep); + + // 1. Handle Removal from fromProc + if (removedFromProc) { + if (fromProc != parentProc) { + const CommWeightT cost = commWParent * instance_->SendCosts(parentProc, fromProc); + if (cost > 0) { + CommPolicy::UnattributeCommunication(*this, cost, parentStep, parentProc, fromProc, fromStep, val); + } + } + } + + auto &valTo = nodeLambdaMap_.GetProcEntry(parent, toProc); + const bool addedToProc = CommPolicy::AddChild(valTo, toStep); + + // 2. Handle Addition to toProc + if (addedToProc) { + if (toProc != parentProc) { + const CommWeightT cost = commWParent * instance_->SendCosts(parentProc, toProc); + if (cost > 0) { + CommPolicy::AttributeCommunication(*this, cost, parentStep, parentProc, toProc, toStep, valTo); + } + } + } + + MarkStep(parentStep); + } + + // Re-arrange Affected Steps + for (unsigned step : affectedStepsList_) { + ArrangeSuperstepCommData(step); + } + } + + void SwapSteps(const unsigned step1, const unsigned step2) { + std::swap(stepProcSend_[step1], stepProcSend_[step2]); + std::swap(stepProcReceive_[step1], stepProcReceive_[step2]); + std::swap(stepMaxCommCache_[step1], stepMaxCommCache_[step2]); + std::swap(stepSecondMaxCommCache_[step1], stepSecondMaxCommCache_[step2]); + std::swap(stepMaxCommCountCache_[step1], stepMaxCommCountCache_[step2]); + } + + void ResetSuperstep(unsigned step) { + std::fill(stepProcSend_[step].begin(), stepProcSend_[step].end(), 0); + std::fill(stepProcReceive_[step].begin(), stepProcReceive_[step].end(), 0); + ArrangeSuperstepCommData(step); + } + + void ComputeCommDatastructures(unsigned startStep, unsigned endStep) { + for (unsigned step = startStep; step <= endStep; step++) { + std::fill(stepProcSend_[step].begin(), stepProcSend_[step].end(), 0); + std::fill(stepProcReceive_[step].begin(), stepProcReceive_[step].end(), 0); + } + + const auto &vecSched = activeSchedule_->GetVectorSchedule(); + const auto &graph = instance_->GetComputationalDag(); + + for (const auto &u : graph.Vertices()) { + nodeLambdaMap_.ResetNode(u); + const unsigned uProc = vecSched.AssignedProcessor(u); + const unsigned uStep = vecSched.AssignedSuperstep(u); + const CommWeightT commW = graph.VertexCommWeight(u); + maxCommWeight_ = std::max(maxCommWeight_, commW); + + for (const auto &v : graph.Children(u)) { + const unsigned vProc = vecSched.AssignedProcessor(v); + const unsigned vStep = vecSched.AssignedSuperstep(v); + + const CommWeightT commWSendCost = (uProc != vProc) ? commW * instance_->SendCosts(uProc, vProc) : 0; + + auto &val = nodeLambdaMap_.GetProcEntry(u, vProc); + if (CommPolicy::AddChild(val, vStep)) { + if (uProc != vProc && commWSendCost > 0) { + CommPolicy::AttributeCommunication(*this, commWSendCost, uStep, uProc, vProc, vStep, val); + } + } + } + } + + for (unsigned step = startStep; step <= endStep; step++) { + if (step >= stepProcSend_.size()) { + continue; + } + ArrangeSuperstepCommData(step); + } + } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_active_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_active_schedule.hpp new file mode 100644 index 00000000..0506d386 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_active_schedule.hpp @@ -0,0 +1,675 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include "osp/bsp/model/BspSchedule.hpp" +#include "osp/bsp/model/IBspSchedule.hpp" +#include "osp/bsp/model/util/SetSchedule.hpp" +#include "osp/bsp/model/util/VectorSchedule.hpp" +#include "osp/bsp/scheduler/ImprovementScheduler.hpp" +#include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" +#include "osp/graph_algorithms/directed_graph_util.hpp" + +namespace osp { + +template +struct KlMoveStruct { + VertexIdxT node_; + CostT gain_; + + unsigned fromProc_; + unsigned fromStep_; + + unsigned toProc_; + unsigned toStep_; + + KlMoveStruct() : node_(0), gain_(0), fromProc_(0), fromStep_(0), toProc_(0), toStep_(0) {} + + KlMoveStruct(VertexIdxT node, CostT gain, unsigned fromProc, unsigned fromStep, unsigned toProc, unsigned toStep) + : node_(node), gain_(gain), fromProc_(fromProc), fromStep_(fromStep), toProc_(toProc), toStep_(toStep) {} + + bool operator<(KlMoveStruct const &rhs) const { + return (gain_ < rhs.gain_) or (gain_ == rhs.gain_ and node_ > rhs.node_); + } + + bool operator>(KlMoveStruct const &rhs) const { + return (gain_ > rhs.gain_) or (gain_ >= rhs.gain_ and node_ < rhs.node_); + } + + KlMoveStruct ReverseMove() const { + return KlMoveStruct(node_, -gain_, toProc_, toStep_, fromProc_, fromStep_); + } +}; + +template +struct PreMoveWorkData { + WorkWeightT fromStepMaxWork_; + WorkWeightT fromStepSecondMaxWork_; + unsigned fromStepMaxWorkProcessorCount_; + + WorkWeightT toStepMaxWork_; + WorkWeightT toStepSecondMaxWork_; + unsigned toStepMaxWorkProcessorCount_; + + PreMoveWorkData() {} + + PreMoveWorkData(WorkWeightT fromStepMaxWork, + WorkWeightT fromStepSecondMaxWork, + unsigned fromStepMaxWorkProcessorCount, + WorkWeightT toStepMaxWork, + WorkWeightT toStepSecondMaxWork, + unsigned toStepMaxWorkProcessorCount) + : fromStepMaxWork_(fromStepMaxWork), + fromStepSecondMaxWork_(fromStepSecondMaxWork), + fromStepMaxWorkProcessorCount_(fromStepMaxWorkProcessorCount), + toStepMaxWork_(toStepMaxWork), + toStepSecondMaxWork_(toStepSecondMaxWork), + toStepMaxWorkProcessorCount_(toStepMaxWorkProcessorCount) {} +}; + +template +struct KlActiveScheduleWorkDatastructures { + using WorkWeightT = VWorkwT; + + const BspInstance *instance_; + const SetSchedule *setSchedule_; + + struct WeightProc { + WorkWeightT work_; + unsigned proc_; + + WeightProc() : work_(0), proc_(0) {} + + WeightProc(WorkWeightT work, unsigned proc) : work_(work), proc_(proc) {} + + bool operator<(WeightProc const &rhs) const { return (work_ > rhs.work_) or (work_ == rhs.work_ and proc_ < rhs.proc_); } + }; + + std::vector> stepProcessorWork_; + std::vector> stepProcessorPosition_; + std::vector stepMaxWorkProcessorCount_; + WorkWeightT maxWorkWeight_; + WorkWeightT totalWorkWeight_; + + inline WorkWeightT StepMaxWork(unsigned step) const { return stepProcessorWork_[step][0].work_; } + + inline WorkWeightT StepSecondMaxWork(unsigned step) const { + return stepProcessorWork_[step][stepMaxWorkProcessorCount_[step]].work_; + } + + inline WorkWeightT StepProcWork(unsigned step, unsigned proc) const { + return stepProcessorWork_[step][stepProcessorPosition_[step][proc]].work_; + } + + inline WorkWeightT &StepProcWork(unsigned step, unsigned proc) { + return stepProcessorWork_[step][stepProcessorPosition_[step][proc]].work_; + } + + template + inline PreMoveWorkData GetPreMoveWorkData(KlMoveStruct move) { + return PreMoveWorkData(StepMaxWork(move.fromStep_), + StepSecondMaxWork(move.fromStep_), + stepMaxWorkProcessorCount_[move.fromStep_], + StepMaxWork(move.toStep_), + StepSecondMaxWork(move.toStep_), + stepMaxWorkProcessorCount_[move.toStep_]); + } + + inline void Initialize(const SetSchedule &sched, const BspInstance &inst, unsigned numSteps) { + instance_ = &inst; + setSchedule_ = &sched; + maxWorkWeight_ = 0; + totalWorkWeight_ = 0; + stepProcessorWork_.assign(numSteps, std::vector(instance_->NumberOfProcessors())); + stepProcessorPosition_.assign(numSteps, std::vector(instance_->NumberOfProcessors(), 0)); + stepMaxWorkProcessorCount_.assign(numSteps, 0); + } + + inline void Clear() { + stepProcessorWork_.clear(); + stepProcessorPosition_.clear(); + stepMaxWorkProcessorCount_.clear(); + } + + inline void ArrangeSuperstepData(const unsigned step) { + std::sort(stepProcessorWork_[step].begin(), stepProcessorWork_[step].end()); + unsigned pos = 0; + const WorkWeightT maxWorkTo = stepProcessorWork_[step][0].work_; + + for (const auto &wp : stepProcessorWork_[step]) { + stepProcessorPosition_[step][wp.proc_] = pos++; + + if (wp.work_ == maxWorkTo && pos < instance_->NumberOfProcessors()) { + stepMaxWorkProcessorCount_[step] = pos; + } + } + } + + template + void ApplyMove(KlMoveStruct move, WorkWeightT workWeight) { + if (workWeight == 0) { + return; + } + + if (move.toStep_ != move.fromStep_) { + StepProcWork(move.toStep_, move.toProc_) += workWeight; + StepProcWork(move.fromStep_, move.fromProc_) -= workWeight; + + ArrangeSuperstepData(move.toStep_); + ArrangeSuperstepData(move.fromStep_); + + } else { + StepProcWork(move.toStep_, move.toProc_) += workWeight; + StepProcWork(move.fromStep_, move.fromProc_) -= workWeight; + ArrangeSuperstepData(move.toStep_); + } + } + + void SwapSteps(const unsigned step1, const unsigned step2) { + std::swap(stepProcessorWork_[step1], stepProcessorWork_[step2]); + std::swap(stepProcessorPosition_[step1], stepProcessorPosition_[step2]); + std::swap(stepMaxWorkProcessorCount_[step1], stepMaxWorkProcessorCount_[step2]); + } + + void OverrideNextSuperstep(unsigned step) { + const unsigned nextStep = step + 1; + for (unsigned i = 0; i < instance_->NumberOfProcessors(); i++) { + stepProcessorWork_[nextStep][i] = stepProcessorWork_[step][i]; + stepProcessorPosition_[nextStep][i] = stepProcessorPosition_[step][i]; + } + stepMaxWorkProcessorCount_[nextStep] = stepMaxWorkProcessorCount_[step]; + } + + void ResetSuperstep(unsigned step) { + for (unsigned i = 0; i < instance_->NumberOfProcessors(); i++) { + stepProcessorWork_[step][i] = {0, i}; + stepProcessorPosition_[step][i] = i; + } + stepMaxWorkProcessorCount_[step] = instance_->NumberOfProcessors() - 1; + } + + void ComputeWorkDatastructures(unsigned startStep, unsigned endStep) { + for (unsigned step = startStep; step <= endStep; step++) { + stepMaxWorkProcessorCount_[step] = 0; + WorkWeightT maxWork = 0; + + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + stepProcessorWork_[step][proc].work_ = 0; + stepProcessorWork_[step][proc].proc_ = proc; + + for (const auto &node : setSchedule_->stepProcessorVertices_[step][proc]) { + const WorkWeightT vertexWorkWeight = instance_->GetComputationalDag().VertexWorkWeight(node); + totalWorkWeight_ += vertexWorkWeight; + maxWorkWeight_ = std::max(vertexWorkWeight, maxWorkWeight_); + stepProcessorWork_[step][proc].work_ += vertexWorkWeight; + } + + if (stepProcessorWork_[step][proc].work_ > maxWork) { + maxWork = stepProcessorWork_[step][proc].work_; + stepMaxWorkProcessorCount_[step] = 1; + } else if (stepProcessorWork_[step][proc].work_ == maxWork + && stepMaxWorkProcessorCount_[step] < (instance_->NumberOfProcessors() - 1)) { + stepMaxWorkProcessorCount_[step]++; + } + } + + std::sort(stepProcessorWork_[step].begin(), stepProcessorWork_[step].end()); + unsigned pos = 0; + for (const auto &wp : stepProcessorWork_[step]) { + stepProcessorPosition_[step][wp.proc_] = pos++; + } + } + } +}; + +template +struct ThreadLocalActiveScheduleData { + using VertexType = VertexIdxT; + using EdgeType = EdgeDescT; + + using KlMove = KlMoveStruct; + + std::unordered_set currentViolations_; + std::vector appliedMoves_; + + CostT cost_ = 0; + CostT initialCost_ = 0; + bool feasible_ = true; + + CostT bestCost_ = 0; + unsigned bestScheduleIdx_ = 0; + + std::unordered_map newViolations_; + std::unordered_set resolvedViolations_; + + inline void InitializeCost(CostT cost) { + initialCost_ = cost; + cost_ = cost; + bestCost_ = cost; + feasible_ = true; + } + + inline void UpdateCost(CostT changeInCost) { + cost_ += changeInCost; + + if (cost_ <= bestCost_ && feasible_) { + bestCost_ = cost_; + bestScheduleIdx_ = static_cast(appliedMoves_.size()); + } + } +}; + +template +class KlActiveSchedule { + private: + using VertexType = VertexIdxT; + using EdgeType = EdgeDescT; + using KlMove = KlMoveStruct; + using ThreadDataT = ThreadLocalActiveScheduleData; + + const BspInstance *instance_; + + VectorSchedule vectorSchedule_; + SetSchedule setSchedule_; + + CostT cost_ = 0; + bool feasible_ = true; + + public: + virtual ~KlActiveSchedule() = default; + + inline const BspInstance &GetInstance() const { return *instance_; } + + inline const VectorSchedule &GetVectorSchedule() const { return vectorSchedule_; } + + inline VectorSchedule &GetVectorSchedule() { return vectorSchedule_; } + + inline const SetSchedule &GetSetSchedule() const { return setSchedule_; } + + inline CostT GetCost() { return cost_; } + + inline bool IsFeasible() { return feasible_; } + + inline unsigned NumSteps() const { return vectorSchedule_.NumberOfSupersteps(); } + + inline unsigned AssignedProcessor(VertexType node) const { return vectorSchedule_.AssignedProcessor(node); } + + inline unsigned AssignedSuperstep(VertexType node) const { return vectorSchedule_.AssignedSuperstep(node); } + + inline VWorkwT GetStepMaxWork(unsigned step) const { return workDatastructures_.StepMaxWork(step); } + + inline VWorkwT GetStepSecondMaxWork(unsigned step) const { return workDatastructures_.StepSecondMaxWork(step); } + + inline std::vector &GetStepMaxWorkProcessorCount() { return workDatastructures_.stepMaxWorkProcessorCount_; } + + inline VWorkwT GetStepProcessorWork(unsigned step, unsigned proc) const { + return workDatastructures_.StepProcWork(step, proc); + } + + inline PreMoveWorkData> GetPreMoveWorkData(KlMove move) { + return workDatastructures_.GetPreMoveWorkData(move); + } + + inline VWorkwT GetMaxWorkWeight() { return workDatastructures_.maxWorkWeight_; } + + inline VWorkwT GetTotalWorkWeight() { return workDatastructures_.totalWorkWeight_; } + + inline void SetCost(CostT cost) { cost_ = cost; } + + constexpr static bool useMemoryConstraint_ = isLocalSearchMemoryConstraintV; + + MemoryConstraintT memoryConstraint_; + + KlActiveScheduleWorkDatastructures workDatastructures_; + + inline VWorkwT GetStepTotalWork(unsigned step) const { + VWorkwT totalWork = 0; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + totalWork += workDatastructures_.StepProcWork(step, proc); + } + return totalWork; + } + + void ApplyMove(KlMove move, ThreadDataT &threadData) { + vectorSchedule_.SetAssignedProcessor(move.node_, move.toProc_); + vectorSchedule_.SetAssignedSuperstep(move.node_, move.toStep_); + + setSchedule_.stepProcessorVertices_[move.fromStep_][move.fromProc_].erase(move.node_); + setSchedule_.stepProcessorVertices_[move.toStep_][move.toProc_].insert(move.node_); + + UpdateViolations(move.node_, threadData); + threadData.appliedMoves_.push_back(move); + + workDatastructures_.ApplyMove(move, instance_->GetComputationalDag().VertexWorkWeight(move.node_)); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.ApplyMove(move.node_, move.fromProc_, move.fromStep_, move.toProc_, move.toStep_); + } + } + + template + void RevertToBestSchedule(unsigned startMove, + unsigned insertStep, + CommDatastructuresT &commDatastructures, + ThreadDataT &threadData, + unsigned startStep, + unsigned &endStep) { + const unsigned bound = std::max(startMove, threadData.bestScheduleIdx_); + RevertMoves(bound, commDatastructures, threadData, startStep, endStep); + + if (startMove > threadData.bestScheduleIdx_) { + SwapEmptyStepBwd(++endStep, insertStep); + } + + RevertMoves(threadData.bestScheduleIdx_, commDatastructures, threadData, startStep, endStep); + +#ifdef KL_DEBUG + if (not threadData.feasible_) { + std::cout << "Reverted to best schedule with cost: " << threadData.bestCost_ << " and " + << vectorSchedule_.NumberOfSupersteps() << " supersteps" << std::endl; + } +#endif + + threadData.appliedMoves_.clear(); + threadData.bestScheduleIdx_ = 0; + threadData.currentViolations_.clear(); + threadData.feasible_ = true; + threadData.cost_ = threadData.bestCost_; + } + + template + void RevertScheduleToBound(const size_t bound, + const CostT newCost, + const bool isFeasible, + CommDatastructuresT &commDatastructures, + ThreadDataT &threadData, + unsigned startStep, + unsigned endStep) { + RevertMoves(bound, commDatastructures, threadData, startStep, endStep); + + threadData.currentViolations_.clear(); + threadData.feasible_ = isFeasible; + threadData.cost_ = newCost; + } + + void ComputeViolations(ThreadDataT &threadData); + void ComputeWorkMemoryDatastructures(unsigned startStep, unsigned endStep); + void WriteSchedule(BspSchedule &schedule); + inline void Initialize(const IBspSchedule &schedule); + inline void Clear(); + void RemoveEmptyStep(unsigned step); + void InsertEmptyStep(unsigned step); + void SwapEmptyStepFwd(const unsigned step, const unsigned toStep); + void SwapEmptyStepBwd(const unsigned toStep, const unsigned emptyStep); + void SwapSteps(const unsigned step1, const unsigned step2); + + private: + template + void RevertMoves(const size_t bound, + CommDatastructuresT &commDatastructures, + ThreadDataT &threadData, + unsigned startStep, + unsigned endStep) { + while (threadData.appliedMoves_.size() > bound) { + const auto move = threadData.appliedMoves_.back().ReverseMove(); + threadData.appliedMoves_.pop_back(); + + vectorSchedule_.SetAssignedProcessor(move.node_, move.toProc_); + vectorSchedule_.SetAssignedSuperstep(move.node_, move.toStep_); + + setSchedule_.stepProcessorVertices_[move.fromStep_][move.fromProc_].erase(move.node_); + setSchedule_.stepProcessorVertices_[move.toStep_][move.toProc_].insert(move.node_); + workDatastructures_.ApplyMove(move, instance_->GetComputationalDag().VertexWorkWeight(move.node_)); + commDatastructures.UpdateDatastructureAfterMove(move, startStep, endStep); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.ApplyMove(move.node_, move.fromProc_, move.fromStep_, move.toProc_, move.toStep_); + } + } + } + + void UpdateViolations(VertexType node, ThreadDataT &threadData) { + threadData.newViolations_.clear(); + threadData.resolvedViolations_.clear(); + + const unsigned nodeStep = vectorSchedule_.AssignedSuperstep(node); + const unsigned nodeProc = vectorSchedule_.AssignedProcessor(node); + + for (const auto &edge : OutEdges(node, instance_->GetComputationalDag())) { + const auto &child = Target(edge, instance_->GetComputationalDag()); + + if (threadData.currentViolations_.find(edge) == threadData.currentViolations_.end()) { + if ((nodeStep > vectorSchedule_.AssignedSuperstep(child)) + || (nodeStep == vectorSchedule_.AssignedSuperstep(child) + && nodeProc != vectorSchedule_.AssignedProcessor(child))) { + threadData.currentViolations_.insert(edge); + threadData.newViolations_[child] = edge; + } + } else { + if ((nodeStep < vectorSchedule_.AssignedSuperstep(child)) + || (nodeStep == vectorSchedule_.AssignedSuperstep(child) + && nodeProc == vectorSchedule_.AssignedProcessor(child))) { + threadData.currentViolations_.erase(edge); + threadData.resolvedViolations_.insert(edge); + } + } + } + + for (const auto &edge : InEdges(node, instance_->GetComputationalDag())) { + const auto &parent = Source(edge, instance_->GetComputationalDag()); + + if (threadData.currentViolations_.find(edge) == threadData.currentViolations_.end()) { + if ((nodeStep < vectorSchedule_.AssignedSuperstep(parent)) + || (nodeStep == vectorSchedule_.AssignedSuperstep(parent) + && nodeProc != vectorSchedule_.AssignedProcessor(parent))) { + threadData.currentViolations_.insert(edge); + threadData.newViolations_[parent] = edge; + } + } else { + if ((nodeStep > vectorSchedule_.AssignedSuperstep(parent)) + || (nodeStep == vectorSchedule_.AssignedSuperstep(parent) + && nodeProc == vectorSchedule_.AssignedProcessor(parent))) { + threadData.currentViolations_.erase(edge); + threadData.resolvedViolations_.insert(edge); + } + } + } + +#ifdef KL_DEBUG + + if (threadData.newViolations_.size() > 0) { + std::cout << "New violations: " << std::endl; + for (const auto &edge : threadData.newViolations_) { + std::cout << "Edge: " << Source(edge.second, instance_->GetComputationalDag()) << " -> " + << Target(edge.second, instance_->GetComputationalDag()) << std::endl; + } + } + + if (threadData.resolvedViolations_.size() > 0) { + std::cout << "Resolved violations: " << std::endl; + for (const auto &edge : threadData.resolvedViolations_) { + std::cout << "Edge: " << Source(edge, instance_->GetComputationalDag()) << " -> " + << Target(edge, instance_->GetComputationalDag()) << std::endl; + } + } + +#endif + + if (threadData.currentViolations_.size() > 0) { + threadData.feasible_ = false; + } else { + threadData.feasible_ = true; + } + } +}; + +template +void KlActiveSchedule::Clear() { + workDatastructures_.Clear(); + vectorSchedule_.Clear(); + setSchedule_.Clear(); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Clear(); + } +} + +template +void KlActiveSchedule::ComputeViolations(ThreadDataT &threadData) { + threadData.currentViolations_.clear(); + threadData.feasible_ = true; + + for (const auto &edge : Edges(instance_->GetComputationalDag())) { + const auto &sourceV = Source(edge, instance_->GetComputationalDag()); + const auto &targetV = Target(edge, instance_->GetComputationalDag()); + + const unsigned sourceProc = AssignedProcessor(sourceV); + const unsigned targetProc = AssignedProcessor(targetV); + const unsigned sourceStep = AssignedSuperstep(sourceV); + const unsigned targetStep = AssignedSuperstep(targetV); + + if (sourceStep > targetStep || (sourceStep == targetStep && sourceProc != targetProc)) { + threadData.currentViolations_.insert(edge); + threadData.feasible_ = false; + } + } +} + +template +void KlActiveSchedule::Initialize(const IBspSchedule &schedule) { + instance_ = &schedule.GetInstance(); + vectorSchedule_ = VectorSchedule(schedule); + setSchedule_ = SetSchedule(schedule); + workDatastructures_.Initialize(setSchedule_, *instance_, NumSteps()); + + cost_ = 0; + feasible_ = true; + + if constexpr (useMemoryConstraint_) { + memoryConstraint_.Initialize(setSchedule_, vectorSchedule_); + } + + ComputeWorkMemoryDatastructures(0, NumSteps() - 1); +} + +template +void KlActiveSchedule::ComputeWorkMemoryDatastructures(unsigned startStep, unsigned endStep) { + if constexpr (useMemoryConstraint_) { + memoryConstraint_.ComputeMemoryDatastructure(startStep, endStep); + } + workDatastructures_.ComputeWorkDatastructures(startStep, endStep); +} + +template +void KlActiveSchedule::WriteSchedule(BspSchedule &schedule) { + for (const auto v : instance_->Vertices()) { + schedule.SetAssignedProcessor(v, vectorSchedule_.AssignedProcessor(v)); + schedule.SetAssignedSuperstep(v, vectorSchedule_.AssignedSuperstep(v)); + } + schedule.UpdateNumberOfSupersteps(); +} + +template +void KlActiveSchedule::RemoveEmptyStep(unsigned step) { + for (unsigned i = step; i < NumSteps() - 1; i++) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (const auto node : setSchedule_.stepProcessorVertices_[i + 1][proc]) { + vectorSchedule_.SetAssignedSuperstep(node, i); + } + } + std::swap(setSchedule_.stepProcessorVertices_[i], setSchedule_.stepProcessorVertices_[i + 1]); + workDatastructures_.SwapSteps(i, i + 1); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.SwapSteps(i, i + 1); + } + } + vectorSchedule_.numberOfSupersteps_--; +} + +template +void KlActiveSchedule::SwapEmptyStepFwd(const unsigned step, const unsigned toStep) { + for (unsigned i = step; i < toStep; i++) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (const auto node : setSchedule_.stepProcessorVertices_[i + 1][proc]) { + vectorSchedule_.SetAssignedSuperstep(node, i); + } + } + std::swap(setSchedule_.stepProcessorVertices_[i], setSchedule_.stepProcessorVertices_[i + 1]); + workDatastructures_.SwapSteps(i, i + 1); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.SwapSteps(i, i + 1); + } + } +} + +template +void KlActiveSchedule::InsertEmptyStep(unsigned step) { + unsigned i = vectorSchedule_.IncrementNumberOfSupersteps(); + + for (; i > step; i--) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (const auto node : setSchedule_.stepProcessorVertices_[i - 1][proc]) { + vectorSchedule_.SetAssignedSuperstep(node, i); + } + } + std::swap(setSchedule_.stepProcessorVertices_[i], setSchedule_.stepProcessorVertices_[i - 1]); + workDatastructures_.SwapSteps(i - 1, i); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.SwapSteps(i - 1, i); + } + } +} + +template +void KlActiveSchedule::SwapEmptyStepBwd(const unsigned toStep, const unsigned emptyStep) { + unsigned i = toStep; + + for (; i > emptyStep; i--) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (const auto node : setSchedule_.stepProcessorVertices_[i - 1][proc]) { + vectorSchedule_.SetAssignedSuperstep(node, i); + } + } + std::swap(setSchedule_.stepProcessorVertices_[i], setSchedule_.stepProcessorVertices_[i - 1]); + workDatastructures_.SwapSteps(i - 1, i); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.SwapSteps(i - 1, i); + } + } +} + +template +void KlActiveSchedule::SwapSteps(const unsigned step1, const unsigned step2) { + if (step1 == step2) { + return; + } + + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + for (const auto node : setSchedule_.stepProcessorVertices_[step1][proc]) { + vectorSchedule_.SetAssignedSuperstep(node, step2); + } + for (const auto node : setSchedule_.stepProcessorVertices_[step2][proc]) { + vectorSchedule_.SetAssignedSuperstep(node, step1); + } + } + std::swap(setSchedule_.stepProcessorVertices_[step1], setSchedule_.stepProcessorVertices_[step2]); + workDatastructures_.SwapSteps(step1, step2); + if constexpr (useMemoryConstraint_) { + memoryConstraint_.SwapSteps(step1, step2); + } +} + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp deleted file mode 100644 index f9a921e4..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp +++ /dev/null @@ -1,2413 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "kl_current_schedule.hpp" -#include "osp/auxiliary/misc.hpp" -#include "osp/bsp/scheduler/ImprovementScheduler.hpp" -#include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" -#include "osp/graph_algorithms/directed_graph_util.hpp" - -// #define KL_PRINT_SCHEDULE - -#ifdef KL_PRINT_SCHEDULE -# include "file_interactions/DotFileWriter.hpp" -#endif - -namespace osp { - -struct kl_base_parameter { - double max_div_best_sol_base_percent = 1.05; - double max_div_best_sol_rate_percent = 0.002; - - unsigned max_num_unlocks = 1; - unsigned max_num_failed_branches = 5; - - unsigned max_inner_iterations = 150; - unsigned max_outer_iterations = 100; - - unsigned max_no_improvement_iterations = 75; - - std::size_t selection_threshold; - bool select_all_nodes = false; - - double initial_penalty = 0.0; - - double gain_threshold = -10.0; - double change_in_cost_threshold = 0.0; - - bool quick_pass = false; - - unsigned max_step_selection_epochs = 4; - unsigned reset_epoch_counter_threshold = 10; - - unsigned violations_threshold = 0; -}; - -template -class kl_base : public ImprovementScheduler, public Ikl_cost_function { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(has_hashable_edge_desc_v, "Graph_t must satisfy the has_hashable_edge_desc concept"); - static_assert(is_computational_dag_v, "Graph_t must satisfy the computational_dag concept"); - - private: - using memw_t = v_memw_t; - using commw_t = v_commw_t; - using workw_t = v_workw_t; - - protected: - using VertexType = vertex_idx_t; - - kl_base_parameter parameters; - - std::mt19937 gen; - - VertexType num_nodes; - unsigned num_procs; - - double penalty = 0.0; - double reward = 0.0; - - virtual void update_reward_penalty() = 0; - virtual void set_initial_reward_penalty() = 0; - - boost::heap::fibonacci_heap> max_gain_heap; - using heap_handle = typename boost::heap::fibonacci_heap>::handle_type; - - std::unordered_map node_heap_handles; - - std::vector>> node_gains; - std::vector>> node_change_in_costs; - - kl_current_schedule ¤t_schedule; - - BspSchedule *best_schedule; - double best_schedule_costs; - - std::unordered_set locked_nodes; - std::unordered_set super_locked_nodes; - std::vector unlock; - - bool unlock_node(VertexType node) { - if (super_locked_nodes.find(node) == super_locked_nodes.end()) { - if (locked_nodes.find(node) == locked_nodes.end()) { - return true; - } else if (locked_nodes.find(node) != locked_nodes.end() && unlock[node] > 0) { - unlock[node]--; - - locked_nodes.erase(node); - - return true; - } - } - return false; - } - - bool check_node_unlocked(VertexType node) { - if (super_locked_nodes.find(node) == super_locked_nodes.end() && locked_nodes.find(node) == locked_nodes.end()) { - return true; - } - return false; - }; - - void reset_locked_nodes() { - for (const auto &i : locked_nodes) { - unlock[i] = parameters.max_num_unlocks; - } - - locked_nodes.clear(); - } - - bool check_violation_locked() { - if (current_schedule.current_violations.empty()) { - return false; - } - - for (auto &edge : current_schedule.current_violations) { - const auto &source_v = source(edge, current_schedule.instance->getComputationalDag()); - const auto &target_v = target(edge, current_schedule.instance->getComputationalDag()); - - if (locked_nodes.find(source_v) == locked_nodes.end() || locked_nodes.find(target_v) == locked_nodes.end()) { - return false; - } - - bool abort = false; - if (locked_nodes.find(source_v) != locked_nodes.end()) { - if (unlock_node(source_v)) { - nodes_to_update.insert(source_v); - node_selection.insert(source_v); - } else { - abort = true; - } - } - - if (locked_nodes.find(target_v) != locked_nodes.end()) { - if (unlock_node(target_v)) { - nodes_to_update.insert(target_v); - node_selection.insert(target_v); - abort = false; - } - } - - if (abort) { - return true; - } - } - - return false; - } - - void reset_gain_heap() { - max_gain_heap.clear(); - node_heap_handles.clear(); - } - - virtual void initialize_datastructures() { -#ifdef KL_DEBUG - std::cout << "KLBase initialize datastructures" << std::endl; -#endif - - node_gains = std::vector>>( - num_nodes, std::vector>(num_procs, std::vector(3, 0))); - - node_change_in_costs = std::vector>>( - num_nodes, std::vector>(num_procs, std::vector(3, 0))); - - unlock = std::vector(num_nodes, parameters.max_num_unlocks); - - current_schedule.initialize_current_schedule(*best_schedule); - best_schedule_costs = current_schedule.current_cost; - } - - std::unordered_set nodes_to_update; - - void compute_nodes_to_update(kl_move move) { - nodes_to_update.clear(); - - for (const auto &target : current_schedule.instance->getComputationalDag().children(move.node)) { - if (node_selection.find(target) != node_selection.end() && locked_nodes.find(target) == locked_nodes.end() - && super_locked_nodes.find(target) == super_locked_nodes.end()) { - nodes_to_update.insert(target); - } - } - - for (const auto &source : current_schedule.instance->getComputationalDag().parents(move.node)) { - if (node_selection.find(source) != node_selection.end() && locked_nodes.find(source) == locked_nodes.end() - && super_locked_nodes.find(source) == super_locked_nodes.end()) { - nodes_to_update.insert(source); - } - } - - const unsigned start_step = std::min(move.from_step, move.to_step) == 0 ? 0 : std::min(move.from_step, move.to_step) - 1; - const unsigned end_step = std::min(current_schedule.num_steps(), std::max(move.from_step, move.to_step) + 2); - -#ifdef KL_DEBUG - std::cout << "updating from step " << start_step << " to step " << end_step << std::endl; -#endif - - for (unsigned step = start_step; step < end_step; step++) { - for (unsigned proc = 0; proc < num_procs; proc++) { - for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) { - if (node_selection.find(node) != node_selection.end() && locked_nodes.find(node) == locked_nodes.end() - && super_locked_nodes.find(node) == super_locked_nodes.end()) { - nodes_to_update.insert(node); - } - } - } - } - } - - void initialize_gain_heap(const std::unordered_set &nodes) { - reset_gain_heap(); - - for (const auto &node : nodes) { - compute_node_gain(node); - compute_max_gain_insert_or_update_heap(node); - } - } - - void initialize_gain_heap_unlocked_nodes(const std::unordered_set &nodes) { - reset_gain_heap(); - - for (const auto &node : nodes) { - if (locked_nodes.find(node) == locked_nodes.end() && super_locked_nodes.find(node) == super_locked_nodes.end()) { - compute_node_gain(node); - compute_max_gain_insert_or_update_heap(node); - } - } - } - - void compute_node_gain(VertexType node) { - const unsigned ¤t_proc = current_schedule.vector_schedule.assignedProcessor(node); - const unsigned ¤t_step = current_schedule.vector_schedule.assignedSuperstep(node); - - for (unsigned new_proc = 0; new_proc < num_procs; new_proc++) { - if (current_schedule.instance->isCompatible(node, new_proc)) { - node_gains[node][new_proc][0] = 0.0; - node_gains[node][new_proc][1] = 0.0; - node_gains[node][new_proc][2] = 0.0; - - node_change_in_costs[node][new_proc][0] = 0; - node_change_in_costs[node][new_proc][1] = 0; - node_change_in_costs[node][new_proc][2] = 0; - - compute_comm_gain(node, current_step, current_proc, new_proc); - compute_work_gain(node, current_step, current_proc, new_proc); - - if constexpr (current_schedule.use_memory_constraint) { - if (not current_schedule.memory_constraint.can_move( - node, new_proc, current_schedule.vector_schedule.assignedSuperstep(node))) { - node_gains[node][new_proc][1] = std::numeric_limits::lowest(); - } - - if (current_schedule.vector_schedule.assignedSuperstep(node) > 0) { - if (not current_schedule.memory_constraint.can_move( - node, new_proc, current_schedule.vector_schedule.assignedSuperstep(node) - 1)) { - node_gains[node][new_proc][0] = std::numeric_limits::lowest(); - } - } - if (current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - if (not current_schedule.memory_constraint.can_move( - node, new_proc, current_schedule.vector_schedule.assignedSuperstep(node) + 1)) { - node_gains[node][new_proc][2] = std::numeric_limits::lowest(); - } - } - } - - } else { - node_gains[node][new_proc][0] = std::numeric_limits::lowest(); - node_gains[node][new_proc][1] = std::numeric_limits::lowest(); - node_gains[node][new_proc][2] = std::numeric_limits::lowest(); - } - } - } - - double compute_max_gain_insert_or_update_heap(VertexType node) { - double node_max_gain = std::numeric_limits::lowest(); - double node_change_in_cost = 0; - unsigned node_best_step = 0; - unsigned node_best_proc = 0; - - double proc_change_in_cost = 0; - double proc_max = 0; - unsigned best_step = 0; - - for (unsigned proc = 0; proc < num_procs; proc++) { - int rand_count = 0; - - if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 - && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - if (node_gains[node][proc][0] > node_gains[node][proc][1]) { - if (node_gains[node][proc][0] > node_gains[node][proc][2]) { - proc_max = node_gains[node][proc][0]; - proc_change_in_cost = node_change_in_costs[node][proc][0]; - best_step = 0; - - } else { - proc_max = node_gains[node][proc][2]; - proc_change_in_cost = node_change_in_costs[node][proc][2]; - best_step = 2; - } - - } else { - if (node_gains[node][proc][1] > node_gains[node][proc][2]) { - proc_max = node_gains[node][proc][1]; - proc_change_in_cost = node_change_in_costs[node][proc][1]; - best_step = 1; - } else { - proc_max = node_gains[node][proc][2]; - proc_change_in_cost = node_change_in_costs[node][proc][2]; - best_step = 2; - } - } - - } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 - && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - if (node_gains[node][proc][2] > node_gains[node][proc][1]) { - proc_max = node_gains[node][proc][2]; - proc_change_in_cost = node_change_in_costs[node][proc][2]; - best_step = 2; - } else { - proc_max = node_gains[node][proc][1]; - proc_change_in_cost = node_change_in_costs[node][proc][1]; - best_step = 1; - } - - } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 - && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { - if (node_gains[node][proc][1] > node_gains[node][proc][0]) { - proc_max = node_gains[node][proc][1]; - proc_change_in_cost = node_change_in_costs[node][proc][1]; - best_step = 1; - } else { - proc_max = node_gains[node][proc][0]; - proc_change_in_cost = node_change_in_costs[node][proc][0]; - best_step = 0; - } - } else { - proc_max = node_gains[node][proc][1]; - proc_change_in_cost = node_change_in_costs[node][proc][1]; - best_step = 1; - } - - if (node_max_gain < proc_max) { - node_max_gain = proc_max; - node_change_in_cost = proc_change_in_cost; - node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1; - node_best_proc = proc; - rand_count = 0; - - } else if (node_max_gain <= proc_max) { // only == - - if (rand() % (2 + rand_count) == 0) { - node_max_gain = proc_max; - node_change_in_cost = proc_change_in_cost; - node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1; - node_best_proc = proc; - rand_count++; - } - } - } - - if (node_heap_handles.find(node) != node_heap_handles.end()) { - (*node_heap_handles[node]).to_proc = node_best_proc; - (*node_heap_handles[node]).to_step = node_best_step; - (*node_heap_handles[node]).change_in_cost = node_change_in_cost; - - if ((*node_heap_handles[node]).gain >= node_max_gain) { - (*node_heap_handles[node]).gain = node_max_gain; - max_gain_heap.update(node_heap_handles[node]); - } - - } else { - // if (node_max_gain < parameters.gain_threshold && node_change_in_cost > - // parameters.change_in_cost_threshold) - // return node_max_gain; - - kl_move move(node, - node_max_gain, - node_change_in_cost, - current_schedule.vector_schedule.assignedProcessor(node), - current_schedule.vector_schedule.assignedSuperstep(node), - node_best_proc, - node_best_step); - node_heap_handles[node] = max_gain_heap.push(move); - } - - return node_max_gain; - } - - void compute_work_gain(VertexType node, unsigned current_step, unsigned current_proc, unsigned new_proc) { - if (current_proc == new_proc) { - node_gains[node][current_proc][1] = std::numeric_limits::lowest(); - - } else { - if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc] - && current_schedule.step_processor_work[current_step][current_proc] - > current_schedule.step_second_max_work[current_step]) { - // new max - const double new_max_work - = std::max(current_schedule.step_processor_work[current_step][current_proc] - - current_schedule.instance->getComputationalDag().vertex_work_weight(node), - current_schedule.step_second_max_work[current_step]); - - if (current_schedule.step_processor_work[current_step][new_proc] - + current_schedule.instance->getComputationalDag().vertex_work_weight(node) - > new_max_work) { - const double gain - = static_cast(current_schedule.step_max_work[current_step]) - - (static_cast(current_schedule.step_processor_work[current_step][new_proc]) - + static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node))); - - node_gains[node][new_proc][1] += gain; - node_change_in_costs[node][new_proc][1] -= gain; - - } else { - const double gain - = static_cast(current_schedule.step_max_work[current_step]) - static_cast(new_max_work); - - node_gains[node][new_proc][1] += gain; - node_change_in_costs[node][new_proc][1] -= gain; - } - - } else { - if (current_schedule.step_max_work[current_step] - < current_schedule.instance->getComputationalDag().vertex_work_weight(node) - + current_schedule.step_processor_work[current_step][new_proc]) { - const double gain - = (static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) - + static_cast(current_schedule.step_processor_work[current_step][new_proc]) - - static_cast(current_schedule.step_max_work[current_step])); - - node_gains[node][new_proc][1] -= gain; - node_change_in_costs[node][new_proc][1] += gain; - } - } - } - - if (current_step > 0) { - if (current_schedule.step_max_work[current_step - 1] - < current_schedule.step_processor_work[current_step - 1][new_proc] - + current_schedule.instance->getComputationalDag().vertex_work_weight(node)) { - const double gain = static_cast(current_schedule.step_processor_work[current_step - 1][new_proc]) - + static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) - - static_cast(current_schedule.step_max_work[current_step - 1]); - - node_gains[node][new_proc][0] -= gain; - - node_change_in_costs[node][new_proc][0] += gain; - } - - if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc] - && current_schedule.step_processor_work[current_step][current_proc] - > current_schedule.step_second_max_work[current_step]) { - if (current_schedule.step_max_work[current_step] - - current_schedule.instance->getComputationalDag().vertex_work_weight(node) - > current_schedule.step_second_max_work[current_step]) { - const double gain = current_schedule.instance->getComputationalDag().vertex_work_weight(node); - node_gains[node][new_proc][0] += gain; - node_change_in_costs[node][new_proc][0] -= gain; - - } else { - const double gain - = current_schedule.step_max_work[current_step] - current_schedule.step_second_max_work[current_step]; - - node_gains[node][new_proc][0] += gain; - node_change_in_costs[node][new_proc][0] -= gain; - } - } - - } else { - node_gains[node][new_proc][0] = std::numeric_limits::lowest(); - } - - if (current_step < current_schedule.num_steps() - 1) { - if (current_schedule.step_max_work[current_step + 1] - < current_schedule.step_processor_work[current_step + 1][new_proc] - + current_schedule.instance->getComputationalDag().vertex_work_weight(node)) { - const double gain = static_cast(current_schedule.step_processor_work[current_step + 1][new_proc]) - + static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) - - static_cast(current_schedule.step_max_work[current_step + 1]); - - node_gains[node][new_proc][2] -= gain; - node_change_in_costs[node][new_proc][2] += gain; - } - - if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc] - && current_schedule.step_processor_work[current_step][current_proc] - > current_schedule.step_second_max_work[current_step]) { - if ((current_schedule.step_max_work[current_step] - - current_schedule.instance->getComputationalDag().vertex_work_weight(node)) - > current_schedule.step_second_max_work[current_step]) { - const double gain = current_schedule.instance->getComputationalDag().vertex_work_weight(node); - - node_gains[node][new_proc][2] += gain; - node_change_in_costs[node][new_proc][2] -= gain; - - } else { - const double gain - = current_schedule.step_max_work[current_step] - current_schedule.step_second_max_work[current_step]; - - node_gains[node][new_proc][2] += gain; - node_change_in_costs[node][new_proc][2] -= gain; - } - } - } else { - node_gains[node][new_proc][2] = std::numeric_limits::lowest(); - } - } - - virtual void compute_comm_gain(vertex_idx_t node, unsigned current_step, unsigned current_proc, unsigned new_proc) = 0; - - void update_node_gains(const std::unordered_set &nodes) { - for (const auto &node : nodes) { - compute_node_gain(node); - compute_max_gain_insert_or_update_heap(node); - } - }; - - kl_move find_best_move() { - const unsigned local_max = 50; - std::vector max_nodes(local_max); - unsigned count = 0; - for (auto iter = max_gain_heap.ordered_begin(); iter != max_gain_heap.ordered_end(); ++iter) { - if (iter->gain >= max_gain_heap.top().gain && count < local_max) { - max_nodes[count] = (iter->node); - count++; - - } else { - break; - } - } - - std::uniform_int_distribution dis(0, count - 1); - unsigned i = dis(gen); - - kl_move best_move = kl_move((*node_heap_handles[max_nodes[i]])); - - max_gain_heap.erase(node_heap_handles[max_nodes[i]]); - node_heap_handles.erase(max_nodes[i]); - - return best_move; - } - - kl_move compute_best_move(VertexType node) { - double node_max_gain = std::numeric_limits::lowest(); - double node_change_in_cost = 0; - unsigned node_best_step = 0; - unsigned node_best_proc = 0; - - double proc_change_in_cost = 0; - double proc_max = 0; - unsigned best_step = 0; - for (unsigned proc = 0; proc < num_procs; proc++) { - unsigned rand_count = 0; - - if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 - && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - if (node_gains[node][proc][0] > node_gains[node][proc][1]) { - if (node_gains[node][proc][0] > node_gains[node][proc][2]) { - proc_max = node_gains[node][proc][0]; - proc_change_in_cost = node_change_in_costs[node][proc][0]; - best_step = 0; - - } else { - proc_max = node_gains[node][proc][2]; - proc_change_in_cost = node_change_in_costs[node][proc][2]; - best_step = 2; - } - - } else { - if (node_gains[node][proc][1] > node_gains[node][proc][2]) { - proc_max = node_gains[node][proc][1]; - proc_change_in_cost = node_change_in_costs[node][proc][1]; - best_step = 1; - } else { - proc_max = node_gains[node][proc][2]; - proc_change_in_cost = node_change_in_costs[node][proc][2]; - best_step = 2; - } - } - - } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 - && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - if (node_gains[node][proc][2] > node_gains[node][proc][1]) { - proc_max = node_gains[node][proc][2]; - proc_change_in_cost = node_change_in_costs[node][proc][2]; - best_step = 2; - } else { - proc_max = node_gains[node][proc][1]; - proc_change_in_cost = node_change_in_costs[node][proc][1]; - best_step = 1; - } - - } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 - && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { - if (node_gains[node][proc][1] > node_gains[node][proc][0]) { - proc_max = node_gains[node][proc][1]; - proc_change_in_cost = node_change_in_costs[node][proc][1]; - best_step = 1; - } else { - proc_max = node_gains[node][proc][0]; - proc_change_in_cost = node_change_in_costs[node][proc][0]; - best_step = 0; - } - } else { - proc_max = node_gains[node][proc][1]; - proc_change_in_cost = node_change_in_costs[node][proc][1]; - best_step = 1; - } - - if (node_max_gain < proc_max) { - node_max_gain = proc_max; - node_change_in_cost = proc_change_in_cost; - node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1; - node_best_proc = proc; - rand_count = 0; - - } else if (node_max_gain <= proc_max) { - if (rand() % (2 + rand_count) == 0) { - node_max_gain = proc_max; - node_change_in_cost = proc_change_in_cost; - node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1; - node_best_proc = proc; - rand_count++; - } - } - } - - return kl_move(node, - node_max_gain, - node_change_in_cost, - current_schedule.vector_schedule.assignedProcessor(node), - current_schedule.vector_schedule.assignedSuperstep(node), - node_best_proc, - node_best_step); - } - - kl_move best_move_change_superstep(VertexType node) { - double node_max_gain = std::numeric_limits::lowest(); - double node_change_in_cost = 0; - unsigned node_best_step = 0; - unsigned node_best_proc = 0; - - double proc_change_in_cost = 0; - double proc_max = 0; - unsigned best_step = 0; - for (unsigned proc = 0; proc < num_procs; proc++) { - if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 - && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - if (node_gains[node][proc][0] > node_gains[node][proc][2]) { - proc_max = node_gains[node][proc][0]; - proc_change_in_cost = node_change_in_costs[node][proc][0]; - best_step = 0; - - } else { - proc_max = node_gains[node][proc][2]; - proc_change_in_cost = node_change_in_costs[node][proc][2]; - best_step = 2; - } - - } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 - && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - proc_max = node_gains[node][proc][2]; - proc_change_in_cost = node_change_in_costs[node][proc][2]; - best_step = 2; - - } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 - && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { - proc_max = node_gains[node][proc][0]; - proc_change_in_cost = node_change_in_costs[node][proc][0]; - best_step = 0; - - } else { - throw std::invalid_argument("error lk base best_move_change_superstep"); - } - - if (node_max_gain < proc_max) { - node_max_gain = proc_max; - node_change_in_cost = proc_change_in_cost; - node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1; - node_best_proc = proc; - } - } - - return kl_move(node, - node_max_gain, - node_change_in_cost, - current_schedule.vector_schedule.assignedProcessor(node), - current_schedule.vector_schedule.assignedSuperstep(node), - node_best_proc, - node_best_step); - } - - void save_best_schedule(const IBspSchedule &schedule) { - for (const auto &node : current_schedule.instance->vertices()) { - best_schedule->setAssignedProcessor(node, schedule.assignedProcessor(node)); - best_schedule->setAssignedSuperstep(node, schedule.assignedSuperstep(node)); - } - best_schedule->updateNumberOfSupersteps(); - } - - void reverse_move_best_schedule(kl_move move) { - best_schedule->setAssignedProcessor(move.node, move.from_proc); - best_schedule->setAssignedSuperstep(move.node, move.from_step); - } - - std::unordered_set node_selection; - - void select_nodes() { - if (parameters.select_all_nodes) { - for (const auto &node : current_schedule.instance->vertices()) { - if (super_locked_nodes.find(node) == super_locked_nodes.end()) { - node_selection.insert(node); - } - } - - } else { - select_nodes_threshold(parameters.selection_threshold - super_locked_nodes.size()); - } - } - - virtual void select_nodes_comm() { - for (const auto &node : current_schedule.instance->vertices()) { - if (super_locked_nodes.find(node) != super_locked_nodes.end()) { - continue; - } - - for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) { - if (current_schedule.vector_schedule.assignedProcessor(node) - != current_schedule.vector_schedule.assignedProcessor(source)) { - node_selection.insert(node); - break; - } - } - - for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - if (current_schedule.vector_schedule.assignedProcessor(node) - != current_schedule.vector_schedule.assignedProcessor(target)) { - node_selection.insert(node); - break; - } - } - } - } - - void select_nodes_threshold(std::size_t threshold) { - std::uniform_int_distribution> dis(0, num_nodes - 1); - - while (node_selection.size() < threshold) { - auto node = dis(gen); - - if (super_locked_nodes.find(node) == super_locked_nodes.end()) { - node_selection.insert(node); - } - } - } - - void select_nodes_permutation_threshold(std::size_t threshold) { - std::vector permutation(num_nodes); - std::iota(std::begin(permutation), std::end(permutation), 0); - - std::shuffle(permutation.begin(), permutation.end(), gen); - - for (std::size_t i = 0; i < threshold; i++) { - if (super_locked_nodes.find(permutation[i]) == super_locked_nodes.end()) { - node_selection.insert(permutation[i]); - } - } - } - - void select_nodes_violations() { - if (current_schedule.current_violations.empty()) { - select_nodes(); - return; - } - - for (const auto &edge : current_schedule.current_violations) { - const auto &source_v = source(edge, current_schedule.instance->getComputationalDag()); - const auto &target_v = target(edge, current_schedule.instance->getComputationalDag()); - - node_selection.insert(source_v); - node_selection.insert(target_v); - - for (const auto &child : current_schedule.instance->getComputationalDag().children(source_v)) { - if (child != target_v) { - node_selection.insert(child); - } - } - - for (const auto &parent : current_schedule.instance->getComputationalDag().parents(source_v)) { - if (parent != target_v) { - node_selection.insert(parent); - } - } - - for (const auto &child : current_schedule.instance->getComputationalDag().children(target_v)) { - if (child != source_v) { - node_selection.insert(child); - } - } - - for (const auto &parent : current_schedule.instance->getComputationalDag().parents(target_v)) { - if (parent != source_v) { - node_selection.insert(parent); - } - } - } - } - - void select_nodes_conseque_max_work(bool do_not_select_super_locked_nodes = false) { - if (step_selection_epoch_counter > parameters.max_step_selection_epochs) { -#ifdef KL_DEBUG - std::cout << "step selection epoch counter exceeded. conseque max work" << std::endl; -#endif - - select_nodes(); - return; - } - - unsigned max_work_step = 0; - unsigned max_step = 0; - unsigned second_max_work_step = 0; - unsigned second_max_step = 0; - - for (unsigned proc = 0; proc < num_procs; proc++) { - if (current_schedule.step_processor_work[step_selection_counter][proc] > max_work_step) { - second_max_work_step = max_work_step; - second_max_step = max_step; - max_work_step = current_schedule.step_processor_work[step_selection_counter][proc]; - max_step = proc; - - } else if (current_schedule.step_processor_work[step_selection_counter][proc] > second_max_work_step) { - second_max_work_step = current_schedule.step_processor_work[step_selection_counter][proc]; - second_max_step = proc; - } - } - - if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].size() - < parameters.selection_threshold * .66) { - node_selection.insert(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].begin(), - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].end()); - - } else { - std::sample(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].begin(), - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].end(), - std::inserter(node_selection, node_selection.end()), - static_cast(std::round(parameters.selection_threshold * .66)), - gen); - } - - if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].size() - < parameters.selection_threshold * .33) { - node_selection.insert( - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].begin(), - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].end()); - - } else { - std::sample(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].begin(), - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].end(), - std::inserter(node_selection, node_selection.end()), - static_cast(std::round(parameters.selection_threshold * .33)), - gen); - } - - if (do_not_select_super_locked_nodes) { - for (const auto &node : super_locked_nodes) { - node_selection.erase(node); - } - } - -#ifdef KL_DEBUG - std::cout << "step selection conseque max work, node selection size " << node_selection.size() - << " ... selected nodes assigend to superstep " << step_selection_counter << " and procs " << max_step - << " and " << second_max_step << std::endl; -#endif - - step_selection_counter++; - if (step_selection_counter >= current_schedule.num_steps()) { - step_selection_counter = 0; - step_selection_epoch_counter++; - } - } - - void select_nodes_check_remove_superstep() { - if (step_selection_epoch_counter > parameters.max_step_selection_epochs) { -#ifdef KL_DEBUG - std::cout << "step selection epoch counter exceeded, remove supersteps" << std::endl; -#endif - - select_nodes(); - return; - } - - for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps(); step_to_remove++) { -#ifdef KL_DEBUG - std::cout << "checking step to remove " << step_to_remove << " / " << current_schedule.num_steps() << std::endl; -#endif - - if (check_remove_superstep(step_to_remove)) { -#ifdef KL_DEBUG - std::cout << "trying to remove superstep " << step_to_remove << std::endl; -#endif - - if (scatter_nodes_remove_superstep(step_to_remove)) { - for (unsigned proc = 0; proc < num_procs; proc++) { - if (step_to_remove < current_schedule.num_steps()) { - node_selection.insert( - current_schedule.set_schedule.step_processor_vertices[step_to_remove][proc].begin(), - current_schedule.set_schedule.step_processor_vertices[step_to_remove][proc].end()); - } - - if (step_to_remove > 0) { - node_selection.insert( - current_schedule.set_schedule.step_processor_vertices[step_to_remove - 1][proc].begin(), - current_schedule.set_schedule.step_processor_vertices[step_to_remove - 1][proc].end()); - } - } - - step_selection_counter = step_to_remove + 1; - - if (step_selection_counter >= current_schedule.num_steps()) { - step_selection_counter = 0; - step_selection_epoch_counter++; - } - - parameters.violations_threshold = 0; - super_locked_nodes.clear(); -#ifdef KL_DEBUG - std::cout << "---- reset super locked nodes" << std::endl; -#endif - - return; - } - } - } - -#ifdef KL_DEBUG - std::cout << "no superstep to remove" << std::endl; -#endif - - step_selection_epoch_counter++; - select_nodes(); - return; - } - - unsigned step_selection_counter = 0; - unsigned step_selection_epoch_counter = 0; - - bool auto_alternate = false; - bool alternate_reset_remove_superstep = false; - bool reset_superstep = false; - - virtual bool check_remove_superstep(unsigned step) { - if (current_schedule.num_steps() <= 2) { - return false; - } - - v_workw_t total_work = 0; - - for (unsigned proc = 0; proc < num_procs; proc++) { - total_work += current_schedule.step_processor_work[step][proc]; - } - - if (total_work < 2.0 * current_schedule.instance->synchronisationCosts()) { - return true; - } - return false; - } - - bool scatter_nodes_remove_superstep(unsigned step) { - assert(step < current_schedule.num_steps()); - - std::vector> moves; - - bool abort = false; - - for (unsigned proc = 0; proc < num_procs; proc++) { - for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) { - compute_node_gain(node); - moves.push_back(best_move_change_superstep(node)); - - if (moves.back().gain <= std::numeric_limits::lowest()) { - abort = true; - break; - } - - if constexpr (current_schedule.use_memory_constraint) { - current_schedule.memory_constraint.apply_move(node, proc, step, moves.back().to_proc, moves.back().to_step); - } - } - - if (abort) { - break; - } - } - - if (abort) { - current_schedule.recompute_neighboring_supersteps(step); - -#ifdef KL_DEBUG - BspSchedule tmp_schedule(current_schedule.set_schedule); - if (not tmp_schedule.satisfiesMemoryConstraints()) { - std::cout << "Mem const violated" << std::endl; - } -#endif - - return false; - } - - for (unsigned proc = 0; proc < num_procs; proc++) { - current_schedule.set_schedule.step_processor_vertices[step][proc].clear(); - } - - for (const auto &move : moves) { -#ifdef KL_DEBUG - std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step << std::endl; -#endif - - current_schedule.vector_schedule.setAssignedSuperstep(move.node, move.to_step); - current_schedule.vector_schedule.setAssignedProcessor(move.node, move.to_proc); - current_schedule.set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node); - } - - current_schedule.remove_superstep(step); - -#ifdef KL_DEBUG - BspSchedule tmp_schedule(current_schedule.set_schedule); - if (not tmp_schedule.satisfiesMemoryConstraints()) { - std::cout << "Mem const violated" << std::endl; - } -#endif - - return true; - } - - void select_nodes_check_reset_superstep() { - if (step_selection_epoch_counter > parameters.max_step_selection_epochs) { -#ifdef KL_DEBUG - std::cout << "step selection epoch counter exceeded, reset supersteps" << std::endl; -#endif - - select_nodes(); - return; - } - - for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps(); step_to_remove++) { -#ifdef KL_DEBUG - std::cout << "checking step to reset " << step_to_remove << " / " << current_schedule.num_steps() << std::endl; -#endif - - if (check_reset_superstep(step_to_remove)) { -#ifdef KL_DEBUG - std::cout << "trying to reset superstep " << step_to_remove << std::endl; -#endif - - if (scatter_nodes_reset_superstep(step_to_remove)) { - for (unsigned proc = 0; proc < num_procs; proc++) { - if (step_to_remove < current_schedule.num_steps() - 1) { - node_selection.insert( - current_schedule.set_schedule.step_processor_vertices[step_to_remove + 1][proc].begin(), - current_schedule.set_schedule.step_processor_vertices[step_to_remove + 1][proc].end()); - } - - if (step_to_remove > 0) { - node_selection.insert( - current_schedule.set_schedule.step_processor_vertices[step_to_remove - 1][proc].begin(), - current_schedule.set_schedule.step_processor_vertices[step_to_remove - 1][proc].end()); - } - } - - step_selection_counter = step_to_remove + 1; - - if (step_selection_counter >= current_schedule.num_steps()) { - step_selection_counter = 0; - step_selection_epoch_counter++; - } - - parameters.violations_threshold = 0; - super_locked_nodes.clear(); -#ifdef KL_DEBUG - std::cout << "---- reset super locked nodes" << std::endl; -#endif - - return; - } - } - } - -#ifdef KL_DEBUG - std::cout << "no superstep to reset" << std::endl; -#endif - - step_selection_epoch_counter++; - select_nodes(); - return; - } - - virtual bool check_reset_superstep(unsigned step) { - if (current_schedule.num_steps() <= 2) { - return false; - } - - v_workw_t total_work = 0; - v_workw_t max_total_work = 0; - v_workw_t min_total_work = std::numeric_limits>::max(); - - for (unsigned proc = 0; proc < num_procs; proc++) { - total_work += current_schedule.step_processor_work[step][proc]; - max_total_work = std::max(max_total_work, current_schedule.step_processor_work[step][proc]); - min_total_work = std::min(min_total_work, current_schedule.step_processor_work[step][proc]); - } - -#ifdef KL_DEBUG - - std::cout << " avg " - << static_cast(total_work) / static_cast(current_schedule.instance->numberOfProcessors()) - << " max " << max_total_work << " min " << min_total_work << std::endl; -#endif - - if (static_cast(total_work) / static_cast(current_schedule.instance->numberOfProcessors()) - - static_cast(min_total_work) - > 0.1 * static_cast(min_total_work)) { - return true; - } - - return false; - } - - bool scatter_nodes_reset_superstep(unsigned step) { - assert(step < current_schedule.num_steps()); - - std::vector> moves; - - bool abort = false; - - for (unsigned proc = 0; proc < num_procs; proc++) { - for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) { - compute_node_gain(node); - moves.push_back(best_move_change_superstep(node)); - - if (moves.back().gain == std::numeric_limits::lowest()) { - abort = true; - break; - } - - if constexpr (current_schedule.use_memory_constraint) { - current_schedule.memory_constraint.apply_forward_move( - node, proc, step, moves.back().to_proc, moves.back().to_step); - } - } - - if (abort) { - break; - } - } - - if (abort) { - current_schedule.recompute_neighboring_supersteps(step); - return false; - } - - for (unsigned proc = 0; proc < num_procs; proc++) { - current_schedule.set_schedule.step_processor_vertices[step][proc].clear(); - } - - for (const auto &move : moves) { -#ifdef KL_DEBUG - std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step << std::endl; -#endif - - current_schedule.vector_schedule.setAssignedSuperstep(move.node, move.to_step); - current_schedule.vector_schedule.setAssignedProcessor(move.node, move.to_proc); - current_schedule.set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node); - } - - current_schedule.reset_superstep(step); - - return true; - } - - void select_unlock_neighbors(VertexType node) { - for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - if (check_node_unlocked(target)) { - node_selection.insert(target); - nodes_to_update.insert(target); - } - } - - for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) { - if (check_node_unlocked(source)) { - node_selection.insert(source); - nodes_to_update.insert(source); - } - } - } - - void set_parameters() { - if (num_nodes < 250) { - parameters.max_outer_iterations = 300; - - parameters.select_all_nodes = true; - parameters.selection_threshold = num_nodes; - - } else if (num_nodes < 1000) { - parameters.max_outer_iterations = static_cast(num_nodes / 2); - - parameters.select_all_nodes = true; - parameters.selection_threshold = num_nodes; - - } else if (num_nodes < 5000) { - parameters.max_outer_iterations = 4 * static_cast(std::sqrt(num_nodes)); - - parameters.selection_threshold = num_nodes / 3; - - } else if (num_nodes < 10000) { - parameters.max_outer_iterations = 3 * static_cast(std::sqrt(num_nodes)); - - parameters.selection_threshold = num_nodes / 3; - - } else if (num_nodes < 50000) { - parameters.max_outer_iterations = static_cast(std::sqrt(num_nodes)); - - parameters.selection_threshold = num_nodes / 5; - - } else if (num_nodes < 100000) { - parameters.max_outer_iterations = 2 * static_cast(std::log(num_nodes)); - - parameters.selection_threshold = num_nodes / 10; - - } else { - parameters.max_outer_iterations = static_cast(std::min(10000.0, std::log(num_nodes))); - - parameters.selection_threshold = num_nodes / 10; - } - - if (parameters.quick_pass) { - parameters.max_outer_iterations = 50; - parameters.max_no_improvement_iterations = 25; - } - - if (auto_alternate && current_schedule.instance->getArchitecture().synchronisationCosts() > 10000.0) { -#ifdef KL_DEBUG - std::cout << "KLBase set parameters, large synchchost: only remove supersets" << std::endl; -#endif - reset_superstep = false; - alternate_reset_remove_superstep = false; - } - -#ifdef KL_DEBUG - if (parameters.select_all_nodes) { - std::cout << "KLBase set parameters, select all nodes" << std::endl; - } else { - std::cout << "KLBase set parameters, selection threshold: " << parameters.selection_threshold << std::endl; - } -#endif - } - - virtual void cleanup_datastructures() { - node_change_in_costs.clear(); - node_gains.clear(); - - unlock.clear(); - - max_gain_heap.clear(); - node_heap_handles.clear(); - - current_schedule.cleanup_superstep_datastructures(); - } - - void reset_run_datastructures() { - node_selection.clear(); - nodes_to_update.clear(); - locked_nodes.clear(); - super_locked_nodes.clear(); - } - - bool run_local_search_without_violations() { - penalty = std::numeric_limits::max() * .24; - - double initial_costs = current_schedule.current_cost; - - auto start_time = std::chrono::high_resolution_clock::now(); - - select_nodes_threshold(parameters.selection_threshold); - - initialize_gain_heap(node_selection); - -#ifdef KL_DEBUG - std::cout << "Initial costs " << initial_costs << std::endl; -#endif - - for (unsigned outer_counter = 0; outer_counter < parameters.max_outer_iterations; outer_counter++) { -#ifdef KL_DEBUG - std::cout << "outer iteration " << outer_counter << std::endl; -#endif - unsigned failed_branches = 0; - // double best_iter_costs = current_schedule.current_cost; - - unsigned inner_counter = 0; - - while (failed_branches < 3 && inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) { - inner_counter++; - - const double iter_costs = current_schedule.current_cost; - - kl_move best_move = find_best_move(); // O(log n) - - if (best_move.gain < -std::numeric_limits::max() * .25) { - continue; - } - - current_schedule.apply_move(best_move); // O(p + log n) - - locked_nodes.insert(best_move.node); - -#ifdef KL_DEBUG - double tmp_costs = current_schedule.current_cost; - if (tmp_costs != compute_current_costs()) { - std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain - << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl; - - std::cout << "! costs not equal " << std::endl; - } -#endif - - if (best_move.change_in_cost > 0 && current_schedule.current_feasible) { - if (best_schedule_costs > iter_costs) { -#ifdef KL_DEBUG - std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl; -#endif - best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) - reverse_move_best_schedule(best_move); - } - } - - compute_nodes_to_update(best_move); - - select_unlock_neighbors(best_move.node); - - update_node_gains(nodes_to_update); - -#ifdef KL_DEBUG - std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost " - << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step - << ", from proc " << best_move.from_proc << " to " << best_move.to_proc - << " violations: " << current_schedule.current_violations.size() << " cost " - << current_schedule.current_cost << std::endl; -#endif - - // if (not current_schedule.current_feasible) { - - if (current_schedule.current_cost > (1.04 + outer_counter * 0.002) * best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "current cost " << current_schedule.current_cost - << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule" - << std::endl; -#endif - - current_schedule.set_current_schedule(*best_schedule); - - // set_initial_reward_penalty(); - initialize_gain_heap_unlocked_nodes(node_selection); - - failed_branches++; - } - //} - - } // while - -#ifdef KL_DEBUG - std::cout << "end inner loop current cost: " << current_schedule.current_cost << " with " - << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs - << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" - << parameters.max_outer_iterations << std::endl; -#endif - - if (current_schedule.current_feasible) { - if (current_schedule.current_cost <= best_schedule_costs) { - save_best_schedule(current_schedule.vector_schedule); - best_schedule_costs = current_schedule.current_cost; - } else { - current_schedule.set_current_schedule(*best_schedule); - } - } else { - current_schedule.set_current_schedule(*best_schedule); - } - - reset_locked_nodes(); - node_selection.clear(); - select_nodes_threshold(parameters.selection_threshold); - - initialize_gain_heap(node_selection); - -#ifdef KL_DEBUG - std::cout << "end of while, current cost " << current_schedule.current_cost << std::endl; -#endif - - if (compute_with_time_limit) { - auto finish_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(finish_time - start_time).count(); - - if (duration > ImprovementScheduler::timeLimitSeconds) { - break; - } - } - - } // for - - cleanup_datastructures(); - - if (initial_costs > current_schedule.current_cost) { - return true; - } else { - return false; - } - } - - bool run_local_search_simple() { - set_initial_reward_penalty(); - - const double initial_costs = current_schedule.current_cost; - - unsigned improvement_counter = 0; - - auto start_time = std::chrono::high_resolution_clock::now(); - - select_nodes(); - - initialize_gain_heap(node_selection); - -#ifdef KL_DEBUG - std::cout << "Initial costs " << initial_costs << std::endl; -#endif - - for (unsigned outer_counter = 0; outer_counter < parameters.max_outer_iterations; outer_counter++) { -#ifdef KL_DEBUG - std::cout << "outer iteration " << outer_counter << std::endl; - if (max_gain_heap.size() == 0) { - std::cout << "max gain heap empty" << std::endl; - } -#endif - unsigned failed_branches = 0; - double best_iter_costs = current_schedule.current_cost; - - VertexType node_causing_first_violation = 0; - - unsigned inner_counter = 0; - - while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations - && max_gain_heap.size() > 0) { - inner_counter++; - - const bool iter_feasible = current_schedule.current_feasible; - const double iter_costs = current_schedule.current_cost; - - kl_move best_move = find_best_move(); // O(log n) - - if (best_move.gain < -std::numeric_limits::max() * .25) { -#ifdef KL_DEBUG - std::cout << "abort iteration on very negative max gain" << std::endl; -#endif - break; - } - -#ifdef KL_DEBUG - std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost " - << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step - << ", from proc " << best_move.from_proc << " to " << best_move.to_proc - << " violations: " << current_schedule.current_violations.size() << " cost " - << current_schedule.current_cost << std::endl; -#endif - - current_schedule.apply_move(best_move); // O(p + log n) - - update_reward_penalty(); - locked_nodes.insert(best_move.node); - -#ifdef KL_DEBUG - double tmp_costs = current_schedule.current_cost; - if (tmp_costs != compute_current_costs()) { - std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain - << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl; - - std::cout << "! costs not equal " << std::endl; - } -#endif - - if (iter_feasible != current_schedule.current_feasible) { - if (iter_feasible) { -#ifdef KL_DEBUG - std::cout << "===> current schedule changed from feasible to infeasible" << std::endl; -#endif - - node_causing_first_violation = best_move.node; - - if (iter_costs < best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "save best schedule with costs " << iter_costs << std::endl; -#endif - best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) - reverse_move_best_schedule(best_move); - } - - } else { -#ifdef KL_DEBUG - std::cout << "===> current schedule changed from infeasible to feasible" << std::endl; -#endif - } - } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) { - if (iter_costs < best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl; -#endif - best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) - reverse_move_best_schedule(best_move); - } - } - - compute_nodes_to_update(best_move); - - select_unlock_neighbors(best_move.node); - - if (check_violation_locked()) { - if (iter_feasible != current_schedule.current_feasible && iter_feasible) { - node_causing_first_violation = best_move.node; - } - super_locked_nodes.insert(node_causing_first_violation); -#ifdef KL_DEBUG - std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation - << std::endl; -#endif - break; - } - - update_node_gains(nodes_to_update); - - if (current_schedule.current_cost - > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent) - * best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "current cost " << current_schedule.current_cost - << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule" - << std::endl; -#endif - - current_schedule.set_current_schedule(*best_schedule); - - set_initial_reward_penalty(); - initialize_gain_heap_unlocked_nodes(node_selection); - - failed_branches++; - } - - } // while - -#ifdef KL_DEBUG - std::cout << "end inner loop current cost: " << current_schedule.current_cost << " with " - << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs - << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" - << parameters.max_outer_iterations << std::endl; -#endif - - if (current_schedule.current_feasible) { - if (current_schedule.current_cost <= best_schedule_costs) { - save_best_schedule(current_schedule.vector_schedule); - best_schedule_costs = current_schedule.current_cost; - } else { - current_schedule.set_current_schedule(*best_schedule); - } - } else { - current_schedule.set_current_schedule(*best_schedule); - } - - if (outer_counter == 20) { - parameters.initial_penalty = 0.0; -#ifdef KL_DEBUG - std::cout << "---- reset initial penalty" << std::endl; -#endif - } - if (outer_counter > 0 && outer_counter % 30 == 0) { - super_locked_nodes.clear(); -#ifdef KL_DEBUG - std::cout << "---- reset super locked nodes" << std::endl; -#endif - } - - reset_locked_nodes(); - - node_selection.clear(); - select_nodes(); - - set_initial_reward_penalty(); - - initialize_gain_heap(node_selection); - -#ifdef KL_DEBUG - std::cout << "end of while, current cost " << current_schedule.current_cost << std::endl; -#endif - - if (compute_with_time_limit) { - auto finish_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(finish_time - start_time).count(); - if (duration > ImprovementScheduler::timeLimitSeconds) { - break; - } - } - - if (best_iter_costs <= current_schedule.current_cost) { - if (improvement_counter++ >= parameters.max_no_improvement_iterations) { -#ifdef KL_DEBUG - std::cout << "no improvement for " << parameters.max_no_improvement_iterations - << " iterations, end local search" << std::endl; -#endif - break; - } - } else { - improvement_counter = 0; - } - - } // for - - cleanup_datastructures(); - -#ifdef KL_DEBUG - std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs" << std::endl; - assert(best_schedule->satisfiesPrecedenceConstraints()); -#endif - - if (initial_costs > current_schedule.current_cost) { - return true; - } else { - return false; - } - } - - bool run_local_search_remove_supersteps() { - const double initial_costs = current_schedule.current_cost; - -#ifdef KL_DEBUG - std::cout << "Initial costs " << initial_costs << std::endl; -#endif - - unsigned no_improvement_iter_counter = 0; - - auto start_time = std::chrono::high_resolution_clock::now(); - - select_nodes_check_remove_superstep(); - - update_reward_penalty(); - - initialize_gain_heap(node_selection); - - for (unsigned outer_counter = 0; outer_counter < parameters.max_outer_iterations; outer_counter++) { -#ifdef KL_DEBUG - std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost << std::endl; - if (max_gain_heap.size() == 0) { - std::cout << "max gain heap empty" << std::endl; - } -#endif - - unsigned conseq_no_gain_moves_counter = 0; - - unsigned failed_branches = 0; - double best_iter_costs = current_schedule.current_cost; - - VertexType node_causing_first_violation = 0; - - unsigned inner_counter = 0; - - while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations - && max_gain_heap.size() > 0) { - inner_counter++; - - const bool iter_feasible = current_schedule.current_feasible; - const double iter_costs = current_schedule.current_cost; - - kl_move best_move = find_best_move(); // O(log n) - - if (best_move.gain < -std::numeric_limits::max() * .25) { -#ifdef KL_DEBUG - std::cout << "abort iteration on very negative max gain" << std::endl; -#endif - break; - } - -#ifdef KL_DEBUG - std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost " - << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step - << ", from proc " << best_move.from_proc << " to " << best_move.to_proc - << " violations: " << current_schedule.current_violations.size() << " old cost " - << current_schedule.current_cost << " new cost " - << current_schedule.current_cost + best_move.change_in_cost << std::endl; -#endif - - current_schedule.apply_move(best_move); // O(p + log n) - - update_reward_penalty(); - locked_nodes.insert(best_move.node); - -#ifdef KL_DEBUG - double tmp_costs = current_schedule.current_cost; - if (tmp_costs != compute_current_costs()) { - std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain - << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl; - - std::cout << "! costs not equal " << std::endl; - } -#endif - - if (iter_feasible != current_schedule.current_feasible) { - if (iter_feasible) { -#ifdef KL_DEBUG - std::cout << "===> current schedule changed from feasible to infeasible" << std::endl; -#endif - - node_causing_first_violation = best_move.node; - - if (iter_costs < best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "save best schedule with costs " << iter_costs << std::endl; -#endif - best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) - reverse_move_best_schedule(best_move); -#ifdef KL_DEBUG - std::cout << "KLBase save best schedule with (source node comm) cost " - << best_schedule->computeCostsTotalCommunication() << " and number of supersteps " - << best_schedule->numberOfSupersteps() << std::endl; -#endif - } - - } else { -#ifdef KL_DEBUG - std::cout << "===> current schedule changed from infeasible to feasible" << std::endl; -#endif - } - } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) { - if (iter_costs < best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl; -#endif - best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) - reverse_move_best_schedule(best_move); -#ifdef KL_DEBUG - std::cout << "KLBase save best schedule with (source node comm) cost " - << best_schedule->computeCostsTotalCommunication() << " and number of supersteps " - << best_schedule->numberOfSupersteps() << std::endl; -#endif - } - } - - compute_nodes_to_update(best_move); - - select_unlock_neighbors(best_move.node); - - if (check_violation_locked()) { - if (iter_feasible != current_schedule.current_feasible && iter_feasible) { - node_causing_first_violation = best_move.node; - } - super_locked_nodes.insert(node_causing_first_violation); -#ifdef KL_DEBUG - std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation - << std::endl; -#endif - break; - } - - update_node_gains(nodes_to_update); - - if (current_schedule.current_cost - > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent) - * best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "current cost " << current_schedule.current_cost - << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule" - << std::endl; -#endif - - current_schedule.set_current_schedule(*best_schedule); - - set_initial_reward_penalty(); - initialize_gain_heap_unlocked_nodes(node_selection); - -#ifdef KL_DEBUG - std::cout << "new current cost " << current_schedule.current_cost << std::endl; -#endif - - failed_branches++; - } - - } // while - -#ifdef KL_DEBUG - std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost << " with " - << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs - << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" - << parameters.max_outer_iterations << std::endl; -#endif - - if (current_schedule.current_feasible) { - if (current_schedule.current_cost <= best_schedule_costs) { - save_best_schedule(current_schedule.vector_schedule); - best_schedule_costs = current_schedule.current_cost; -#ifdef KL_DEBUG - std::cout << "KLBase save best schedule with (source node comm) cost " - << best_schedule->computeCostsTotalCommunication() << " and number of supersteps " - << best_schedule->numberOfSupersteps() << std::endl; -#endif - } else { - current_schedule.set_current_schedule(*best_schedule); - } - } else { - current_schedule.set_current_schedule(*best_schedule); - } - - if (outer_counter > 0 && outer_counter % 30 == 0) { - super_locked_nodes.clear(); -#ifdef KL_DEBUG - std::cout << "---- reset super locked nodes" << std::endl; -#endif - } - - reset_locked_nodes(); - - node_selection.clear(); - select_nodes_check_remove_superstep(); - - update_reward_penalty(); - - initialize_gain_heap(node_selection); - -#ifdef KL_DEBUG - std::cout << "end of while, current cost " << current_schedule.current_cost << std::endl; -#endif - - if (compute_with_time_limit) { - auto finish_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(finish_time - start_time).count(); - if (duration > ImprovementScheduler::timeLimitSeconds) { - break; - } - } - - if (best_iter_costs <= current_schedule.current_cost) { - no_improvement_iter_counter++; - - if (no_improvement_iter_counter > parameters.reset_epoch_counter_threshold) { - step_selection_epoch_counter = 0; - parameters.reset_epoch_counter_threshold += current_schedule.num_steps(); -#ifdef KL_DEBUG - std::cout << "no improvement for " << no_improvement_iter_counter - << " iterations, reset epoc counter. Increase reset threshold to " - << parameters.reset_epoch_counter_threshold << std::endl; -#endif - } - - if (no_improvement_iter_counter > 10) { - parameters.initial_penalty = 0.0; - parameters.violations_threshold = 3; -#ifdef KL_DEBUG - std::cout << "---- reset initial penalty " << parameters.initial_penalty << " violations threshold " - << parameters.violations_threshold << std::endl; -#endif - } - - if (no_improvement_iter_counter == 35) { - parameters.max_div_best_sol_base_percent *= 1.02; -#ifdef KL_DEBUG - std::cout << "no improvement for " << no_improvement_iter_counter - << " iterations, increase max_div_best_sol_base_percent to " - << parameters.max_div_best_sol_base_percent << std::endl; -#endif - } - - if (no_improvement_iter_counter >= parameters.max_no_improvement_iterations) { -#ifdef KL_DEBUG - std::cout << "no improvement for " << parameters.max_no_improvement_iterations - << " iterations, end local search" << std::endl; -#endif - break; - } - } else { - no_improvement_iter_counter = 0; - } - - } // for - - cleanup_datastructures(); - -#ifdef KL_DEBUG - std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs" << std::endl; - assert(best_schedule->satisfiesPrecedenceConstraints()); -#endif - - if (initial_costs > current_schedule.current_cost) { - return true; - } else { - return false; - } - } - - bool run_local_search_unlock_delay() { - const double initial_costs = current_schedule.current_cost; - -#ifdef KL_DEBUG_1 - std::cout << "Initial costs " << initial_costs << " with " << best_schedule->numberOfSupersteps() << " supersteps." - << std::endl; -#endif - -#ifdef KL_PRINT_SCHEDULE - print_best_schedule(0); -#endif - - unsigned no_improvement_iter_counter = 0; - - auto start_time = std::chrono::high_resolution_clock::now(); - - select_nodes_check_remove_superstep(); - - update_reward_penalty(); - - initialize_gain_heap(node_selection); - - for (unsigned outer_counter = 0; outer_counter < parameters.max_outer_iterations; outer_counter++) { -#ifdef KL_DEBUG - std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost << std::endl; - if (max_gain_heap.size() == 0) { - std::cout << "max gain heap empty" << std::endl; - } -#endif - - // unsigned conseq_no_gain_moves_counter = 0; - - unsigned failed_branches = 0; - double best_iter_costs = current_schedule.current_cost; - - VertexType node_causing_first_violation = 0; - - unsigned inner_counter = 0; - - while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations - && max_gain_heap.size() > 0) { - inner_counter++; - - const bool iter_feasible = current_schedule.current_feasible; - const double iter_costs = current_schedule.current_cost; -#ifdef KL_DEBUG - print_heap(); -#endif - kl_move best_move = find_best_move(); // O(log n) - - if (best_move.gain < -std::numeric_limits::max() * .25) { -#ifdef KL_DEBUG - std::cout << "abort iteration on very negative max gain" << std::endl; -#endif - break; - } - -#ifdef KL_DEBUG - std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost " - << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step - << ", from proc " << best_move.from_proc << " to " << best_move.to_proc - << " violations: " << current_schedule.current_violations.size() << " old cost " - << current_schedule.current_cost << " new cost " - << current_schedule.current_cost + best_move.change_in_cost << std::endl; - - if constexpr (current_schedule.use_memory_constraint) { - std::cout << "memory to step/proc " - << current_schedule.memory_constraint.step_processor_memory[best_move.to_step][best_move.to_proc] - << std::endl; - } - - printSetScheduleWorkMemNodesGrid(std::cout, current_schedule.set_schedule, true); -#endif - - current_schedule.apply_move(best_move); // O(p + log n) - - // if (best_move.gain <= 0.000000001) { - // conseq_no_gain_moves_counter++; - - // if (conseq_no_gain_moves_counter > 15) { - - // conseq_no_gain_moves_counter = 0; - // parameters.initial_penalty = 0.0; - // parameters.violations_threshold = 3; - // #ifdef KL_DEBUG - // std::cout << "more than 15 moves with gain <= 0, set " << - // parameters.initial_penalty - // << " violations threshold " << parameters.violations_threshold << - // std::endl; - // #endif - // } - - // } else { - // conseq_no_gain_moves_counter = 0; - // } - -#ifdef KL_DEBUG - BspSchedule tmp_schedule(current_schedule.set_schedule); - if (not tmp_schedule.satisfiesMemoryConstraints()) { - std::cout << "Mem const violated" << std::endl; - } -#endif - - update_reward_penalty(); - locked_nodes.insert(best_move.node); - -#ifdef KL_DEBUG - double tmp_costs = current_schedule.current_cost; - if (tmp_costs != compute_current_costs()) { - std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain - << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl; - - std::cout << "! costs not equal " << std::endl; - } -#endif - - if (iter_feasible != current_schedule.current_feasible) { - if (iter_feasible) { -#ifdef KL_DEBUG - std::cout << "===> current schedule changed from feasible to infeasible" << std::endl; -#endif - - node_causing_first_violation = best_move.node; - - if (iter_costs < best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "save best schedule with costs " << iter_costs << std::endl; -#endif - best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) - reverse_move_best_schedule(best_move); -#ifdef KL_DEBUG - std::cout << "KLBase save best schedule with (source node comm) cost " - << best_schedule->computeTotalCosts() << " and number of supersteps " - << best_schedule->numberOfSupersteps() << std::endl; -#endif - } - - } else { -#ifdef KL_DEBUG - std::cout << "===> current schedule changed from infeasible to feasible" << std::endl; -#endif - } - } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) { - if (iter_costs < best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl; -#endif - best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) - reverse_move_best_schedule(best_move); -#ifdef KL_DEBUG - std::cout << "KLBase save best schedule with (source node comm) cost " - << best_schedule->computeTotalCosts() << " and number of supersteps " - << best_schedule->numberOfSupersteps() << std::endl; -#endif - } - } - -#ifdef KL_DEBUG - std::cout << "Node selection: ["; - for (auto it = node_selection.begin(); it != node_selection.end(); ++it) { - std::cout << *it << " "; - } - std::cout << "]" << std::endl; - - std::cout << "Locked nodes: ["; - for (auto it = locked_nodes.begin(); it != locked_nodes.end(); ++it) { - std::cout << *it << " "; - } - std::cout << "]" << std::endl; - - std::cout << "Super locked nodes: ["; - for (auto it = super_locked_nodes.begin(); it != super_locked_nodes.end(); ++it) { - std::cout << *it << " "; - } - std::cout << "]" << std::endl; - -#endif - - compute_nodes_to_update(best_move); - - select_unlock_neighbors(best_move.node); - - if (check_violation_locked()) { - if (iter_feasible != current_schedule.current_feasible && iter_feasible) { - node_causing_first_violation = best_move.node; - } - super_locked_nodes.insert(node_causing_first_violation); -#ifdef KL_DEBUG - std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation - << std::endl; -#endif - break; - } - -#ifdef KL_DEBUG - std::cout << "Nodes to update: ["; - for (auto it = nodes_to_update.begin(); it != nodes_to_update.end(); ++it) { - std::cout << *it << " "; - } - std::cout << "]" << std::endl; -#endif - - update_node_gains(nodes_to_update); - - if (not(current_schedule.current_violations.size() > 4) && not iter_feasible && not max_gain_heap.empty()) { - const auto &iter = max_gain_heap.ordered_begin(); - if (iter->gain < parameters.gain_threshold) { - node_selection.clear(); - locked_nodes.clear(); - super_locked_nodes.clear(); - select_nodes_violations(); - - update_reward_penalty(); - - initialize_gain_heap(node_selection); - -#ifdef KL_DEBUG - std::cout << "max gain below gain threshold" << std::endl; -#endif - } - } - - if (current_schedule.current_cost - > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent) - * best_schedule_costs) { -#ifdef KL_DEBUG - std::cout << "current cost " << current_schedule.current_cost - << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule" - << std::endl; -#endif - - current_schedule.set_current_schedule(*best_schedule); - - set_initial_reward_penalty(); - initialize_gain_heap_unlocked_nodes(node_selection); - -#ifdef KL_DEBUG - std::cout << "new current cost " << current_schedule.current_cost << std::endl; -#endif - - failed_branches++; - } - - } // while - -#ifdef KL_DEBUG - std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost << " with " - << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs - << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" - << parameters.max_outer_iterations << std::endl; -#endif - - if (current_schedule.current_feasible) { - if (current_schedule.current_cost <= best_schedule_costs) { - save_best_schedule(current_schedule.vector_schedule); - best_schedule_costs = current_schedule.current_cost; -#ifdef KL_DEBUG - std::cout << "KLBase save best schedule with (source node comm) cost " << best_schedule->computeTotalCosts() - << " and number of supersteps " << best_schedule->numberOfSupersteps() << std::endl; -#endif - } else { - current_schedule.set_current_schedule(*best_schedule); - } - } else { - current_schedule.set_current_schedule(*best_schedule); - } - - if (compute_with_time_limit) { - auto finish_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(finish_time - start_time).count(); - if (duration > ImprovementScheduler::timeLimitSeconds) { - break; - } - } - - if (outer_counter > 0 && outer_counter % 30 == 0) { - super_locked_nodes.clear(); -#ifdef KL_DEBUG - std::cout << "---- reset super locked nodes" << std::endl; -#endif - } - -#ifdef KL_PRINT_SCHEDULE - if (best_iter_costs > current_schedule.current_cost) { - print_best_schedule(outer_counter + 1); - } -#endif - - reset_locked_nodes(); - - node_selection.clear(); - - // if (reset_superstep) { - // select_nodes_check_reset_superstep(); - // } else { - select_nodes_check_remove_superstep(); - // } - - update_reward_penalty(); - - initialize_gain_heap(node_selection); - -#ifdef KL_DEBUG - std::cout << "end of while, current cost " << current_schedule.current_cost << std::endl; -#endif - - if (best_iter_costs <= current_schedule.current_cost) { - no_improvement_iter_counter++; - - if (no_improvement_iter_counter > parameters.reset_epoch_counter_threshold) { - step_selection_epoch_counter = 0; - parameters.reset_epoch_counter_threshold += current_schedule.num_steps(); -#ifdef KL_DEBUG - std::cout << "no improvement for " << no_improvement_iter_counter - << " iterations, reset epoc counter. Increase reset threshold to " - << parameters.reset_epoch_counter_threshold << std::endl; -#endif - } - - // if (no_improvement_iter_counter > 10 && no_improvement_iter_counter % 15 == 0) { - - // step_selection_epoch_counter = 0; - - // if (alternate_reset_remove_superstep) { - // reset_superstep = !reset_superstep; - // } - - // #ifdef KL_DEBUG - // std::cout << "no improvement for " << no_improvement_iter_counter << " reset - // superstep " - // << reset_superstep << std::endl; - // #endif - // } - - if (no_improvement_iter_counter > 50 && no_improvement_iter_counter % 3 == 0) { - parameters.initial_penalty = 0.0; - parameters.violations_threshold = 5; - - } else if (no_improvement_iter_counter > 30 && no_improvement_iter_counter % 5 == 0) { - parameters.initial_penalty = 0.0; - parameters.violations_threshold = 4; - - } else if (no_improvement_iter_counter > 9 && no_improvement_iter_counter % 10 == 0) { - parameters.initial_penalty = 0.0; - parameters.violations_threshold = 3; -#ifdef KL_DEBUG - std::cout << "---- reset initial penalty " << parameters.initial_penalty << " violations threshold " - << parameters.violations_threshold << std::endl; -#endif - } - - if (no_improvement_iter_counter == 35) { - parameters.max_div_best_sol_base_percent *= 1.02; -#ifdef KL_DEBUG - std::cout << "no improvement for " << no_improvement_iter_counter - << " iterations, increase max_div_best_sol_base_percent to " - << parameters.max_div_best_sol_base_percent << std::endl; -#endif - } - - if (no_improvement_iter_counter >= parameters.max_no_improvement_iterations) { -#ifdef KL_DEBUG - std::cout << "no improvement for " << parameters.max_no_improvement_iterations - << " iterations, end local search" << std::endl; -#endif - break; - } - } else { - no_improvement_iter_counter = 0; - } - -#ifdef KL_DEBUG - std::cout << "end of while, current cost " << current_schedule.current_cost << std::endl; -#endif - - } // for - - cleanup_datastructures(); - -#ifdef KL_DEBUG_1 - std::cout << "kl done, current cost " << best_schedule_costs << " with " << best_schedule->numberOfSupersteps() - << " supersteps vs " << initial_costs << " initial costs" << std::endl; - assert(best_schedule->satisfiesPrecedenceConstraints()); -#endif - - if (initial_costs > current_schedule.current_cost) { - return true; - } else { - return false; - } - } - - // virtual void checkMergeSupersteps(); - // virtual void checkInsertSuperstep(); - - // virtual void insertSuperstep(unsigned step); - - void print_heap() { - std::cout << "heap current size: " << max_gain_heap.size() << std::endl; - std::cout << "heap top node " << max_gain_heap.top().node << " gain " << max_gain_heap.top().gain << std::endl; - - unsigned count = 0; - for (auto it = max_gain_heap.ordered_begin(); it != max_gain_heap.ordered_end(); ++it) { - std::cout << "node " << it->node << " gain " << it->gain << " to proc " << it->to_proc << " to step " << it->to_step - << std::endl; - - if (count++ > 15 || it->gain <= 0.0) { - break; - } - } - } - - bool compute_with_time_limit = false; - -#ifdef KL_PRINT_SCHEDULE - std::string file_name_write_schedule = "kl_schedule_iter_"; - void print_best_schedule(unsigned iteration); -#endif - - public: - kl_base(kl_current_schedule ¤t_schedule_) - : ImprovementScheduler(), current_schedule(current_schedule_) { - std::random_device rd; - gen = std::mt19937(rd()); - } - - virtual ~kl_base() = default; - - virtual RETURN_STATUS improveSchedule(BspSchedule &schedule) override { - reset_run_datastructures(); - - best_schedule = &schedule; - current_schedule.instance = &best_schedule->getInstance(); - - num_nodes = current_schedule.instance->numberOfVertices(); - num_procs = current_schedule.instance->numberOfProcessors(); - - set_parameters(); - initialize_datastructures(); - - bool improvement_found = run_local_search_unlock_delay(); - - if (improvement_found) { - return RETURN_STATUS::OSP_SUCCESS; - } else { - return RETURN_STATUS::BEST_FOUND; - } - } - - virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule &schedule) override { - compute_with_time_limit = true; - return improveSchedule(schedule); - } - - virtual void set_compute_with_time_limit(bool compute_with_time_limit_) { - compute_with_time_limit = compute_with_time_limit_; - } - - virtual std::string getScheduleName() const = 0; - - virtual void set_quick_pass(bool quick_pass_) { parameters.quick_pass = quick_pass_; } - - virtual void set_alternate_reset_remove_superstep(bool alternate_reset_remove_superstep_) { - auto_alternate = false; - alternate_reset_remove_superstep = alternate_reset_remove_superstep_; - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp deleted file mode 100644 index f8ded91e..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp +++ /dev/null @@ -1,474 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -// #define KL_DEBUG - -#include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/model/IBspSchedule.hpp" -#include "osp/bsp/model/util/SetSchedule.hpp" -#include "osp/bsp/model/util/VectorSchedule.hpp" -#include "osp/bsp/scheduler/ImprovementScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" -#include "osp/graph_algorithms/directed_graph_util.hpp" - -namespace osp { - -template -struct kl_move { - vertex_idx_t node; - - double gain; - double change_in_cost; - - unsigned from_proc; - unsigned from_step; - - unsigned to_proc; - unsigned to_step; - - kl_move() : node(0), gain(0), change_in_cost(0), from_proc(0), from_step(0), to_proc(0), to_step(0) {} - - kl_move(vertex_idx_t _node, - double _gain, - double _change_cost, - unsigned _from_proc, - unsigned _from_step, - unsigned _to_proc, - unsigned _to_step) - : node(_node), - gain(_gain), - change_in_cost(_change_cost), - from_proc(_from_proc), - from_step(_from_step), - to_proc(_to_proc), - to_step(_to_step) {} - - bool operator<(kl_move const &rhs) const { - return (gain < rhs.gain) or (gain <= rhs.gain and change_in_cost < rhs.change_in_cost) - or (gain <= rhs.gain and change_in_cost <= rhs.change_in_cost and node > rhs.node); - } - - kl_move reverse_move() const { return kl_move(node, -gain, -change_in_cost, to_proc, to_step, from_proc, from_step); } -}; - -class Ikl_cost_function { - public: - virtual double compute_current_costs() = 0; - - virtual ~Ikl_cost_function() = default; -}; - -template -class kl_current_schedule { - private: - using VertexType = vertex_idx_t; - using EdgeType = edge_desc_t; - - public: - kl_current_schedule(Ikl_cost_function *cost_f_) : cost_f(cost_f_) { -#ifdef KL_DEBUG - if constexpr (use_memory_constraint) { - std::cout << "KLCurrentSchedule constructor with memory constraint" << std::endl; - } else { - std::cout << "KLCurrentSchedule constructor without memory constraint" << std::endl; - } -#endif - } - - virtual ~kl_current_schedule() = default; - - Ikl_cost_function *cost_f; - - const BspInstance *instance; - - VectorSchedule vector_schedule; - SetSchedule set_schedule; - - constexpr static bool use_memory_constraint = is_local_search_memory_constraint_v; - - MemoryConstraint_t memory_constraint; - - std::vector>> step_processor_work; - - std::vector> step_max_work; - std::vector> step_second_max_work; - - double current_cost = 0; - - bool current_feasible = true; - std::unordered_set current_violations; // edges - - std::unordered_map new_violations; - std::unordered_set resolved_violations; - - void remove_superstep(unsigned step) { - if (step > 0) { - vector_schedule.mergeSupersteps(step - 1, step); - set_schedule.mergeSupersteps(step - 1, step); - - compute_work_memory_datastructures(step - 1, step); - - } else { - vector_schedule.mergeSupersteps(0, 1); - set_schedule.mergeSupersteps(0, 1); - - compute_work_memory_datastructures(0, 0); - } - - for (unsigned i = step + 1; i < num_steps(); i++) { - step_max_work[i] = step_max_work[i + 1]; - step_second_max_work[i] = step_second_max_work[i + 1]; - - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - step_processor_work[i][proc] = step_processor_work[i + 1][proc]; - - if constexpr (use_memory_constraint) { - memory_constraint.override_superstep(i, proc, i + 1, proc); - } - } - } - - step_second_max_work[num_steps()] = 0; - step_max_work[num_steps()] = 0; - - if constexpr (use_memory_constraint) { - memory_constraint.reset_superstep(num_steps()); - } - - recompute_current_violations(); - cost_f->compute_current_costs(); - } - - void reset_superstep(unsigned step) { - if (step > 0) { - compute_work_memory_datastructures(step - 1, step - 1); - if (step < num_steps() - 1) { - compute_work_memory_datastructures(step + 1, step + 1); - } - } else { - compute_work_memory_datastructures(1, 1); - } - - step_second_max_work[step] = 0; - step_max_work[step] = 0; - - if constexpr (use_memory_constraint) { - memory_constraint.reset_superstep(step); - } - - recompute_current_violations(); - cost_f->compute_current_costs(); - } - - void recompute_neighboring_supersteps(unsigned step) { - if (step > 0) { - compute_work_memory_datastructures(step - 1, step); - if (step < num_steps() - 1) { - compute_work_memory_datastructures(step + 1, step + 1); - } - } else { - compute_work_memory_datastructures(0, 0); - if (num_steps() > 1) { - compute_work_memory_datastructures(1, 1); - } - } - } - - inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); } - - virtual void set_current_schedule(const IBspSchedule &schedule) { - if (num_steps() == schedule.numberOfSupersteps()) { -#ifdef KL_DEBUG - std::cout << "KLCurrentSchedule set current schedule, same nr supersteps" << std::endl; -#endif - - for (unsigned step = 0; step < num_steps(); step++) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - set_schedule.step_processor_vertices[step][proc].clear(); - } - } - - for (const auto &node : instance->getComputationalDag().vertices()) { - vector_schedule.setAssignedProcessor(node, schedule.assignedProcessor(node)); - vector_schedule.setAssignedSuperstep(node, schedule.assignedSuperstep(node)); - - set_schedule.step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].insert( - node); - } - - } else { -#ifdef KL_DEBUG - std::cout << "KLCurrentSchedule set current schedule, different nr supersteps" << std::endl; -#endif - - vector_schedule = VectorSchedule(schedule); - set_schedule = SetSchedule(schedule); - - initialize_superstep_datastructures(); - } - - compute_work_memory_datastructures(0, num_steps() - 1); - recompute_current_violations(); - - cost_f->compute_current_costs(); - -#ifdef KL_DEBUG - std::cout << "KLCurrentSchedule set current schedule done, costs: " << current_cost - << " number of supersteps: " << num_steps() << std::endl; -#endif - } - - virtual void initialize_superstep_datastructures() { -#ifdef KL_DEBUG - std::cout << "KLCurrentSchedule initialize datastructures" << std::endl; -#endif - - const unsigned num_procs = instance->numberOfProcessors(); - - if constexpr (use_memory_constraint) { - memory_constraint.initialize(set_schedule, vector_schedule); - } - - step_processor_work - = std::vector>>(num_steps(), std::vector>(num_procs, 0)); - step_max_work = std::vector>(num_steps(), 0); - step_second_max_work = std::vector>(num_steps(), 0); - } - - virtual void cleanup_superstep_datastructures() { - step_processor_work.clear(); - step_max_work.clear(); - step_second_max_work.clear(); - - if constexpr (use_memory_constraint) { - memory_constraint.clear(); - } - } - - virtual void compute_work_memory_datastructures(unsigned start_step, unsigned end_step) { - if constexpr (use_memory_constraint) { - memory_constraint.compute_memory_datastructure(start_step, end_step); - } - - for (unsigned step = start_step; step <= end_step; step++) { - step_max_work[step] = 0; - step_second_max_work[step] = 0; - - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - step_processor_work[step][proc] = 0; - - for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { - step_processor_work[step][proc] += instance->getComputationalDag().vertex_work_weight(node); - } - - if (step_processor_work[step][proc] > step_max_work[step]) { - step_second_max_work[step] = step_max_work[step]; - step_max_work[step] = step_processor_work[step][proc]; - - } else if (step_processor_work[step][proc] > step_second_max_work[step]) { - step_second_max_work[step] = step_processor_work[step][proc]; - } - } - } - } - - virtual void recompute_current_violations() { - current_violations.clear(); - -#ifdef KL_DEBUG - std::cout << "Recompute current violations:" << std::endl; -#endif - - for (const auto &edge : edges(instance->getComputationalDag())) { - const auto &source_v = source(edge, instance->getComputationalDag()); - const auto &target_v = target(edge, instance->getComputationalDag()); - - if (vector_schedule.assignedSuperstep(source_v) >= vector_schedule.assignedSuperstep(target_v)) { - if (vector_schedule.assignedProcessor(source_v) != vector_schedule.assignedProcessor(target_v) - || vector_schedule.assignedSuperstep(source_v) > vector_schedule.assignedSuperstep(target_v)) { - current_violations.insert(edge); - -#ifdef KL_DEBUG - std::cout << "Edge: " << source_v << " -> " << target_v << std::endl; -#endif - } - } - } - - if (current_violations.size() > 0) { - current_feasible = false; - } else { -#ifdef KL_DEBUG - std::cout << "Current schedule is feasible" << std::endl; -#endif - - current_feasible = true; - } - }; - - virtual void apply_move(kl_move move) { - vector_schedule.setAssignedProcessor(move.node, move.to_proc); - vector_schedule.setAssignedSuperstep(move.node, move.to_step); - - set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node); - set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node); - - current_cost += move.change_in_cost; - - step_processor_work[move.to_step][move.to_proc] += instance->getComputationalDag().vertex_work_weight(move.node); - step_processor_work[move.from_step][move.from_proc] -= instance->getComputationalDag().vertex_work_weight(move.node); - - update_max_work_datastructures(move); - update_violations(move.node); - - if constexpr (use_memory_constraint) { - memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step); - } - } - - virtual void initialize_current_schedule(const IBspSchedule &schedule) { -#ifdef KL_DEBUG - std::cout << "KLCurrentSchedule initialize current schedule" << std::endl; -#endif - - vector_schedule = VectorSchedule(schedule); - set_schedule = SetSchedule(schedule); - - initialize_superstep_datastructures(); - - compute_work_memory_datastructures(0, num_steps() - 1); - recompute_current_violations(); - - cost_f->compute_current_costs(); - } - - private: - void update_violations(VertexType node) { - new_violations.clear(); - resolved_violations.clear(); - - for (const auto &edge : out_edges(node, instance->getComputationalDag())) { - const auto &child = target(edge, instance->getComputationalDag()); - - if (current_violations.find(edge) == current_violations.end()) { - if (vector_schedule.assignedSuperstep(node) >= vector_schedule.assignedSuperstep(child)) { - if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(child) - || vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(child)) { - current_violations.insert(edge); - new_violations[child] = edge; - } - } - } else { - if (vector_schedule.assignedSuperstep(node) <= vector_schedule.assignedSuperstep(child)) { - if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(child) - || vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(child)) { - current_violations.erase(edge); - resolved_violations.insert(edge); - } - } - } - } - - for (const auto &edge : in_edges(node, instance->getComputationalDag())) { - const auto &parent = source(edge, instance->getComputationalDag()); - - if (current_violations.find(edge) == current_violations.end()) { - if (vector_schedule.assignedSuperstep(node) <= vector_schedule.assignedSuperstep(parent)) { - if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(parent) - || vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(parent)) { - current_violations.insert(edge); - new_violations[parent] = edge; - } - } - } else { - if (vector_schedule.assignedSuperstep(node) >= vector_schedule.assignedSuperstep(parent)) { - if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(parent) - || vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(parent)) { - current_violations.erase(edge); - resolved_violations.insert(edge); - } - } - } - } - -#ifdef KL_DEBUG - - if (new_violations.size() > 0) { - std::cout << "New violations: " << std::endl; - for (const auto &edge : new_violations) { - std::cout << "Edge: " << source(edge.second, instance->getComputationalDag()) << " -> " - << target(edge.second, instance->getComputationalDag()) << std::endl; - } - } - - if (resolved_violations.size() > 0) { - std::cout << "Resolved violations: " << std::endl; - for (const auto &edge : resolved_violations) { - std::cout << "Edge: " << source(edge, instance->getComputationalDag()) << " -> " - << target(edge, instance->getComputationalDag()) << std::endl; - } - } - -#endif - - if (current_violations.size() > 0) { - current_feasible = false; - } else { - current_feasible = true; - } - } - - void update_max_work_datastructures(kl_move move) { - if (move.from_step == move.to_step) { - recompute_superstep_max_work(move.from_step); - - } else { - recompute_superstep_max_work(move.from_step); - recompute_superstep_max_work(move.to_step); - } - } - - void recompute_superstep_max_work(unsigned step) { - step_max_work[step] = 0; - step_second_max_work[step] = 0; - - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - if (step_processor_work[step][proc] > step_max_work[step]) { - step_second_max_work[step] = step_max_work[step]; - step_max_work[step] = step_processor_work[step][proc]; - - } else if (step_processor_work[step][proc] > step_second_max_work[step]) { - step_second_max_work[step] = step_processor_work[step][proc]; - } - } - } -}; - -template -class kl_current_schedule_max_comm : public kl_current_schedule { - public: - std::vector>> step_processor_send; - std::vector> step_max_send; - std::vector> step_max_receive; - - std::vector>> step_processor_receive; - std::vector> step_second_max_send; - std::vector> step_second_max_receive; -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp deleted file mode 100644 index 81841b0e..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp +++ /dev/null @@ -1,81 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include -#include - -#include "kl_total.hpp" - -namespace osp { - -template -class kl_hyper_total_comm : public kl_total { - protected: - virtual void compute_comm_gain(vertex_idx_t node, - unsigned current_step, - unsigned current_proc, - unsigned new_proc) override { - throw std::runtime_error("Not implemented yet"); - } - - virtual double compute_current_costs() override { - double work_costs = 0; - for (unsigned step = 0; step < current_schedule.num_steps(); step++) { - work_costs += current_schedule.step_max_work[step]; - } - - double comm_costs = 0; - - for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) { - if (is_sink(node, current_schedule.instance->getComputationalDag())) { - continue; - } - - std::unordered_set intersects; - - for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - const unsigned &target_proc = current_schedule.vector_schedule.assignedProcessor(target); - - if (current_schedule.vector_schedule.assignedProcessor(node) != target_proc) { - intersects.insert(target_proc); - } - } - - comm_costs += intersects.size() * current_schedule.instance->getComputationalDag().vertex_comm_weight(node); - } - - current_schedule.current_cost = work_costs + comm_costs * current_schedule.comm_multiplier - + (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts(); - - return current_schedule.current_cost; - } - - public: - kl_hyper_total_comm(bool use_node_communication_costs_ = false) : kl_total(use_node_communication_costs_) {} - - virtual ~kl_hyper_total_comm() = default; - - virtual std::string getScheduleName() const override { return "KLHyperTotalComm"; } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp deleted file mode 100644 index 6d4a15fe..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp +++ /dev/null @@ -1,83 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include -#include - -#include "kl_total.hpp" - -namespace osp { - -template -class kl_hyper_total_cut : public kl_total { - protected: - virtual void compute_comm_gain(vertex_idx_t node, - unsigned current_step, - unsigned current_proc, - unsigned new_proc) override { - throw std::runtime_error("Not implemented yet"); - } - - virtual double compute_current_costs() override { - double work_costs = 0; - for (unsigned step = 0; step < current_schedule.num_steps(); step++) { - work_costs += current_schedule.step_max_work[step]; - } - - double comm_costs = 0; - - for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) { - if (is_sink(node, current_schedule.instance->getComputationalDag())) { - continue; - } - - std::unordered_set intersects; - - for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - const unsigned &target_proc = current_schedule.vector_schedule.assignedProcessor(target); - const unsigned &target_step = current_schedule.vector_schedule.assignedSuperstep(target); - - if (current_schedule.vector_schedule.assignedProcessor(node) != target_proc - || current_schedule.vector_schedule.assignedSuperstep(node) != target_step) { - intersects.insert(current_schedule.instance->numberOfProcessors() * target_step + target_proc); - } - } - - comm_costs += intersects.size() * current_schedule.instance->getComputationalDag().vertex_comm_weight(node); - } - - current_schedule.current_cost = work_costs + comm_costs * current_schedule.comm_multiplier - + (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts(); - - return current_schedule.current_cost; - } - - public: - kl_hyper_total_cut(bool use_node_communication_costs_ = false) : kl_total(use_node_communication_costs_) {} - - virtual ~kl_hyper_total_cut() = default; - - virtual std::string getScheduleName() const override { return "KLHyperTotalCut"; } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver.hpp new file mode 100644 index 00000000..72493c31 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver.hpp @@ -0,0 +1,1912 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "kl_active_schedule.hpp" +#include "kl_util.hpp" +#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp" +#include "osp/auxiliary/misc.hpp" +#include "osp/bsp/model/util/CompatibleProcessorRange.hpp" +#include "osp/bsp/scheduler/ImprovementScheduler.hpp" +#include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" +#include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" +#include "osp/graph_algorithms/directed_graph_util.hpp" + +namespace osp { + +struct KlParameter { + double timeQuality_ = 0.8; + double superstepRemoveStrength_ = 0.5; + unsigned numParallelLoops_ = 4; + + unsigned maxInnerIterationsReset_ = 500; + unsigned maxNoImprovementIterations_ = 50; + + constexpr static unsigned abortScatterNodesViolationThreshold_ = 500; + constexpr static unsigned initialViolationThreshold_ = 250; + + unsigned maxNoVioaltionsRemovedBacktrackReset_; + unsigned removeStepEpocs_; + unsigned nodeMaxStepSelectionEpochs_; + unsigned maxNoVioaltionsRemovedBacktrackForRemoveStepReset_; + unsigned maxOuterIterations_; + unsigned tryRemoveStepAfterNumOuterIterations_; + unsigned minInnerIterReset_; + + unsigned threadMinRange_ = 8; + unsigned threadRangeGap_ = 0; +}; + +template +struct KlUpdateInfo { + VertexType node_ = 0; + + bool fullUpdate_ = false; + bool updateFromStep_ = false; + bool updateToStep_ = false; + bool updateEntireToStep_ = false; + bool updateEntireFromStep_ = false; + + KlUpdateInfo() = default; + + KlUpdateInfo(VertexType n) : node_(n), fullUpdate_(false), updateEntireToStep_(false), updateEntireFromStep_(false) {} + + KlUpdateInfo(VertexType n, bool full) + : node_(n), fullUpdate_(full), updateEntireToStep_(false), updateEntireFromStep_(false) {} +}; + +template +class KlImprover : public ImprovementScheduler { + static_assert(isDirectedGraphEdgeDescV, "GraphT must satisfy the directed_graph concept"); + static_assert(hasHashableEdgeDescV, "GraphT must satisfy the HasHashableEdgeDesc concept"); + static_assert(isComputationalDagV, "GraphT must satisfy the computational_dag concept"); + + protected: + constexpr static unsigned windowRange_ = 2 * windowSize + 1; + constexpr static bool enableQuickMoves_ = true; + constexpr static bool enablePreresolvingViolations_ = true; + constexpr static double epsilon_ = 1e-9; + + using VertexMemWeightT = osp::VMemwT; + using VertexCommWeightT = osp::VCommwT; + using VertexWorkWeightT = osp::VWorkwT; + using VertexType = VertexIdxT; + using EdgeType = EdgeDescT; + + using KlMove = KlMoveStruct; + using HeapDatastructure = MaxPairingHeap; + using ActiveScheduleT = KlActiveSchedule; + using NodeSelectionContainerT = AdaptiveAffinityTable; + using KlGainUpdateInfo = KlUpdateInfo; + + struct ThreadSearchContext { + unsigned threadId_ = 0; + unsigned startStep_ = 0; + unsigned endStep_ = 0; + unsigned originalEndStep_ = 0; + + VectorVertexLockManager lockManager_; + HeapDatastructure maxGainHeap_; + NodeSelectionContainerT affinityTable_; + std::vector> localAffinityTable_; + RewardPenaltyStrategy rewardPenaltyStrat_; + VertexSelectionStrategy selectionStrategy_; + ThreadLocalActiveScheduleData activeScheduleData_; + + double averageGain_ = 0.0; + unsigned maxInnerIterations_ = 0; + unsigned noImprovementIterationsReducePenalty_ = 0; + unsigned minInnerIter_ = 0; + unsigned noImprovementIterationsIncreaseInnerIter_ = 0; + unsigned stepSelectionEpochCounter_ = 0; + unsigned stepSelectionCounter_ = 0; + unsigned stepToRemove_ = 0; + unsigned localSearchStartStep_ = 0; + unsigned unlockEdgeBacktrackCounter_ = 0; + unsigned unlockEdgeBacktrackCounterReset_ = 0; + unsigned maxNoVioaltionsRemovedBacktrack_ = 0; + + inline unsigned NumSteps() const { return endStep_ - startStep_ + 1; } + + inline unsigned StartIdx(const unsigned nodeStep) const { + return nodeStep < startStep_ + windowSize ? windowSize - (nodeStep - startStep_) : 0; + } + + inline unsigned EndIdx(unsigned nodeStep) const { + return nodeStep + windowSize <= endStep_ ? windowRange_ : windowRange_ - (nodeStep + windowSize - endStep_); + } + }; + + bool computeWithTimeLimit_ = false; + + BspSchedule *inputSchedule_; + const GraphT *graph_; + const BspInstance *instance_; + + CompatibleProcessorRange procRange_; + + KlParameter parameters_; + std::mt19937 gen_; + + ActiveScheduleT activeSchedule_; + CommCostFunctionT commCostF_; + std::vector threadDataVec_; + std::vector threadFinishedVec_; + + inline unsigned RelStepIdx(const unsigned nodeStep, const unsigned moveStep) const { + return (moveStep >= nodeStep) ? ((moveStep - nodeStep) + windowSize) : (windowSize - (nodeStep - moveStep)); + } + + inline bool IsCompatible(VertexType node, unsigned proc) const { + return activeSchedule_.GetInstance().IsCompatible(node, proc); + } + + void SetStartStep(const unsigned step, ThreadSearchContext &threadData) { + threadData.startStep_ = step; + threadData.stepToRemove_ = step; + threadData.stepSelectionCounter_ = step; + + threadData.averageGain_ = 0.0; + threadData.maxInnerIterations_ = parameters_.maxInnerIterationsReset_; + threadData.noImprovementIterationsReducePenalty_ = parameters_.maxNoImprovementIterations_ / 5; + threadData.minInnerIter_ = parameters_.minInnerIterReset_; + threadData.stepSelectionEpochCounter_ = 0; + threadData.noImprovementIterationsIncreaseInnerIter_ = 10; + threadData.unlockEdgeBacktrackCounterReset_ = 0; + threadData.unlockEdgeBacktrackCounter_ = threadData.unlockEdgeBacktrackCounterReset_; + threadData.maxNoVioaltionsRemovedBacktrack_ = parameters_.maxNoVioaltionsRemovedBacktrackReset_; + } + + KlMove GetBestMove(NodeSelectionContainerT &affinityTable, + VectorVertexLockManager &lockManager, + HeapDatastructure &maxGainHeap) { + // To introduce non-determinism and help escape local optima, if there are multiple moves with the same + // top gain, we randomly select one. We check up to `local_max` ties. + const unsigned localMax = 50; + std::vector topGainNodes = maxGainHeap.GetTopKeys(localMax); + + if (topGainNodes.empty()) { + // This case is guarded by the caller, but for safety: + topGainNodes.push_back(maxGainHeap.Top()); + } + + std::uniform_int_distribution dis(0, topGainNodes.size() - 1); + const VertexType node = topGainNodes[dis(gen_)]; + + KlMove bestMove = maxGainHeap.GetValue(node); + maxGainHeap.Erase(node); + lockManager.Lock(node); + affinityTable.Remove(node); + + return bestMove; + } + + inline void ProcessOtherStepsBestMove(const unsigned idx, + const unsigned nodeStep, + const VertexType &node, + const CostT affinityCurrentProcStep, + CostT &maxGain, + unsigned &maxProc, + unsigned &maxStep, + const std::vector> &affinityTableNode) const { + for (const unsigned p : procRange_.CompatibleProcessorsVertex(node)) { + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.CanMove(node, p, nodeStep + idx - windowSize)) { + continue; + } + } + + const CostT gain = affinityCurrentProcStep - affinityTableNode[p][idx]; + if (gain > maxGain) { + maxGain = gain; + maxProc = p; + maxStep = idx; + } + } + } + + template + KlMove ComputeBestMove(VertexType node, + const std::vector> &affinityTableNode, + ThreadSearchContext &threadData) { + const unsigned nodeStep = activeSchedule_.AssignedSuperstep(node); + const unsigned nodeProc = activeSchedule_.AssignedProcessor(node); + + CostT maxGain = std::numeric_limits::lowest(); + + unsigned maxProc = std::numeric_limits::max(); + unsigned maxStep = std::numeric_limits::max(); + + const CostT affinityCurrentProcStep = affinityTableNode[nodeProc][windowSize]; + + unsigned idx = threadData.StartIdx(nodeStep); + for (; idx < windowSize; idx++) { + ProcessOtherStepsBestMove(idx, nodeStep, node, affinityCurrentProcStep, maxGain, maxProc, maxStep, affinityTableNode); + } + + if constexpr (moveToSameSuperStep) { + for (const unsigned proc : procRange_.CompatibleProcessorsVertex(node)) { + if (proc == nodeProc) { + continue; + } + + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.CanMove(node, proc, nodeStep + idx - windowSize)) { + continue; + } + } + + const CostT gain = affinityCurrentProcStep - affinityTableNode[proc][windowSize]; + if (gain > maxGain) { + maxGain = gain; + maxProc = proc; + maxStep = idx; + } + } + } + + idx++; + + const unsigned bound = threadData.EndIdx(nodeStep); + for (; idx < bound; idx++) { + ProcessOtherStepsBestMove(idx, nodeStep, node, affinityCurrentProcStep, maxGain, maxProc, maxStep, affinityTableNode); + } + + return KlMove(node, maxGain, nodeProc, nodeStep, maxProc, nodeStep + maxStep - windowSize); + } + + KlGainUpdateInfo UpdateNodeWorkAffinityAfterMove(VertexType node, + KlMove move, + const PreMoveWorkData &prevWorkData, + std::vector> &affinityTableNode) { + const unsigned nodeStep = activeSchedule_.AssignedSuperstep(node); + const VertexWorkWeightT vertexWeight = graph_->VertexWorkWeight(node); + + KlGainUpdateInfo updateInfo(node); + + if (move.fromStep_ == move.toStep_) { + const unsigned lowerBound = move.fromStep_ > windowSize ? move.fromStep_ - windowSize : 0; + if (lowerBound <= nodeStep && nodeStep <= move.fromStep_ + windowSize) { + updateInfo.updateFromStep_ = true; + updateInfo.updateToStep_ = true; + + const VertexWorkWeightT prevMaxWork = prevWorkData.fromStepMaxWork_; + const VertexWorkWeightT prevSecondMaxWork = prevWorkData.fromStepSecondMaxWork_; + + if (nodeStep == move.fromStep_) { + const unsigned nodeProc = activeSchedule_.AssignedProcessor(node); + const VertexWorkWeightT newMaxWeight = activeSchedule_.GetStepMaxWork(move.fromStep_); + const VertexWorkWeightT newSecondMaxWeight = activeSchedule_.GetStepSecondMaxWork(move.fromStep_); + const VertexWorkWeightT newStepProcWork = activeSchedule_.GetStepProcessorWork(nodeStep, nodeProc); + const VertexWorkWeightT prevStepProcWork + = (nodeProc == move.fromProc_) ? newStepProcWork + graph_->VertexWorkWeight(move.node_) + : (nodeProc == move.toProc_) ? newStepProcWork - graph_->VertexWorkWeight(move.node_) + : newStepProcWork; + const bool prevIsSoleMaxProcessor = (prevWorkData.fromStepMaxWorkProcessorCount_ == 1) + && (prevMaxWork == prevStepProcWork); + const CostT prevNodeProcAffinity + = prevIsSoleMaxProcessor ? std::min(vertexWeight, prevMaxWork - prevSecondMaxWork) : 0.0; + const bool newIsSoleMaxProcessor = (activeSchedule_.GetStepMaxWorkProcessorCount()[nodeStep] == 1) + && (newMaxWeight == newStepProcWork); + const CostT newNodeProcAffinity + = newIsSoleMaxProcessor ? std::min(vertexWeight, newMaxWeight - newSecondMaxWeight) : 0.0; + + const CostT diff = newNodeProcAffinity - prevNodeProcAffinity; + if (std::abs(diff) > epsilon_) { + updateInfo.fullUpdate_ = true; + affinityTableNode[nodeProc][windowSize] += diff; // Use the pre-calculated diff + } + + if ((prevMaxWork != newMaxWeight) || updateInfo.fullUpdate_) { + updateInfo.updateEntireFromStep_ = true; + + for (const unsigned proc : procRange_.CompatibleProcessorsVertex(node)) { + if ((proc == nodeProc) || (proc == move.fromProc_) || (proc == move.toProc_)) { + continue; + } + + const VertexWorkWeightT newWeight = vertexWeight + activeSchedule_.GetStepProcessorWork(nodeStep, proc); + const CostT prevOtherAffinity = ComputeSameStepAffinity(prevMaxWork, newWeight, prevNodeProcAffinity); + const CostT otherAffinity = ComputeSameStepAffinity(newMaxWeight, newWeight, newNodeProcAffinity); + + affinityTableNode[proc][windowSize] += (otherAffinity - prevOtherAffinity); + } + } + + if (nodeProc != move.fromProc_ && IsCompatible(node, move.fromProc_)) { + const VertexWorkWeightT prevNewWeight = vertexWeight + + activeSchedule_.GetStepProcessorWork(nodeStep, move.fromProc_) + + graph_->VertexWorkWeight(move.node_); + const CostT prevOtherAffinity = ComputeSameStepAffinity(prevMaxWork, prevNewWeight, prevNodeProcAffinity); + const VertexWorkWeightT newWeight + = vertexWeight + activeSchedule_.GetStepProcessorWork(nodeStep, move.fromProc_); + const CostT otherAffinity = ComputeSameStepAffinity(newMaxWeight, newWeight, newNodeProcAffinity); + affinityTableNode[move.fromProc_][windowSize] += (otherAffinity - prevOtherAffinity); + } + + if (nodeProc != move.toProc_ && IsCompatible(node, move.toProc_)) { + const VertexWorkWeightT prevNewWeight = vertexWeight + + activeSchedule_.GetStepProcessorWork(nodeStep, move.toProc_) + - graph_->VertexWorkWeight(move.node_); + const CostT prevOtherAffinity = ComputeSameStepAffinity(prevMaxWork, prevNewWeight, prevNodeProcAffinity); + const VertexWorkWeightT newWeight + = vertexWeight + activeSchedule_.GetStepProcessorWork(nodeStep, move.toProc_); + const CostT otherAffinity = ComputeSameStepAffinity(newMaxWeight, newWeight, newNodeProcAffinity); + affinityTableNode[move.toProc_][windowSize] += (otherAffinity - prevOtherAffinity); + } + + } else { + const VertexWorkWeightT newMaxWeight = activeSchedule_.GetStepMaxWork(move.fromStep_); + const unsigned idx = RelStepIdx(nodeStep, move.fromStep_); + if (prevMaxWork != newMaxWeight) { + updateInfo.updateEntireFromStep_ = true; + // update moving to all procs with special for move.fromProc_ + for (const unsigned proc : procRange_.CompatibleProcessorsVertex(node)) { + const VertexWorkWeightT newWeight + = vertexWeight + activeSchedule_.GetStepProcessorWork(move.fromStep_, proc); + if (proc == move.fromProc_) { + const VertexWorkWeightT prevNewWeight + = vertexWeight + activeSchedule_.GetStepProcessorWork(move.fromStep_, proc) + + graph_->VertexWorkWeight(move.node_); + const CostT prevAffinity = prevMaxWork < prevNewWeight ? static_cast(prevNewWeight) + - static_cast(prevMaxWork) + : 0.0; + const CostT newAffinity = newMaxWeight < newWeight + ? static_cast(newWeight) - static_cast(newMaxWeight) + : 0.0; + affinityTableNode[proc][idx] += newAffinity - prevAffinity; + } else if (proc == move.toProc_) { + const VertexWorkWeightT prevNewWeight = vertexWeight + + activeSchedule_.GetStepProcessorWork(move.toStep_, proc) + - graph_->VertexWorkWeight(move.node_); + const CostT prevAffinity = prevMaxWork < prevNewWeight ? static_cast(prevNewWeight) + - static_cast(prevMaxWork) + : 0.0; + const CostT newAffinity = newMaxWeight < newWeight + ? static_cast(newWeight) - static_cast(newMaxWeight) + : 0.0; + affinityTableNode[proc][idx] += newAffinity - prevAffinity; + } else { + const CostT prevAffinity = prevMaxWork < newWeight + ? static_cast(newWeight) - static_cast(prevMaxWork) + : 0.0; + const CostT newAffinity = newMaxWeight < newWeight + ? static_cast(newWeight) - static_cast(newMaxWeight) + : 0.0; + affinityTableNode[proc][idx] += newAffinity - prevAffinity; + } + } + } else { + // update only move.fromProc_ and move.toProc_ + if (IsCompatible(node, move.fromProc_)) { + const VertexWorkWeightT fromNewWeight + = vertexWeight + activeSchedule_.GetStepProcessorWork(move.fromStep_, move.fromProc_); + const VertexWorkWeightT fromPrevNewWeight = fromNewWeight + graph_->VertexWorkWeight(move.node_); + const CostT fromPrevAffinity = prevMaxWork < fromPrevNewWeight ? static_cast(fromPrevNewWeight) + - static_cast(prevMaxWork) + : 0.0; + + const CostT fromNewAffinity = newMaxWeight < fromNewWeight ? static_cast(fromNewWeight) + - static_cast(newMaxWeight) + : 0.0; + affinityTableNode[move.fromProc_][idx] += fromNewAffinity - fromPrevAffinity; + } + + if (IsCompatible(node, move.toProc_)) { + const VertexWorkWeightT toNewWeight + = vertexWeight + activeSchedule_.GetStepProcessorWork(move.toStep_, move.toProc_); + const VertexWorkWeightT toPrevNewWeight = toNewWeight - graph_->VertexWorkWeight(move.node_); + const CostT toPrevAffinity = prevMaxWork < toPrevNewWeight ? static_cast(toPrevNewWeight) + - static_cast(prevMaxWork) + : 0.0; + + const CostT toNewAffinity = newMaxWeight < toNewWeight + ? static_cast(toNewWeight) - static_cast(newMaxWeight) + : 0.0; + affinityTableNode[move.toProc_][idx] += toNewAffinity - toPrevAffinity; + } + } + } + } + + } else { + const unsigned nodeProc = activeSchedule_.AssignedProcessor(node); + ProcessWorkUpdateStep(node, + nodeStep, + nodeProc, + vertexWeight, + move.fromStep_, + move.fromProc_, + graph_->VertexWorkWeight(move.node_), + prevWorkData.fromStepMaxWork_, + prevWorkData.fromStepSecondMaxWork_, + prevWorkData.fromStepMaxWorkProcessorCount_, + updateInfo.updateFromStep_, + updateInfo.updateEntireFromStep_, + updateInfo.fullUpdate_, + affinityTableNode); + ProcessWorkUpdateStep(node, + nodeStep, + nodeProc, + vertexWeight, + move.toStep_, + move.toProc_, + -graph_->VertexWorkWeight(move.node_), + prevWorkData.toStepMaxWork_, + prevWorkData.toStepSecondMaxWork_, + prevWorkData.toStepMaxWorkProcessorCount_, + updateInfo.updateToStep_, + updateInfo.updateEntireToStep_, + updateInfo.fullUpdate_, + affinityTableNode); + } + + return updateInfo; + } + + void ProcessWorkUpdateStep(VertexType node, + unsigned nodeStep, + unsigned nodeProc, + VertexWorkWeightT vertexWeight, + unsigned moveStep, + unsigned moveProc, + VertexWorkWeightT moveCorrectionNodeWeight, + const VertexWorkWeightT prevMoveStepMaxWork, + const VertexWorkWeightT prevMoveStepSecondMaxWork, + unsigned prevMoveStepMaxWorkProcessorCount, + bool &updateStep, + bool &updateEntireStep, + bool &fullUpdate, + std::vector> &affinityTableNode); + void UpdateNodeWorkAffinity(NodeSelectionContainerT &nodes, + KlMove move, + const PreMoveWorkData &prevWorkData, + std::map &recomputeMaxGain); + void UpdateBestMove( + VertexType node, unsigned step, unsigned proc, NodeSelectionContainerT &affinityTable, ThreadSearchContext &threadData); + void UpdateBestMove(VertexType node, unsigned step, NodeSelectionContainerT &affinityTable, ThreadSearchContext &threadData); + void UpdateMaxGain(KlMove move, std::map &recomputeMaxGain, ThreadSearchContext &threadData); + void ComputeWorkAffinity(VertexType node, std::vector> &affinityTableNode, ThreadSearchContext &threadData); + + inline void RecomputeNodeMaxGain(VertexType node, NodeSelectionContainerT &affinityTable, ThreadSearchContext &threadData) { + const auto bestMove = ComputeBestMove(node, affinityTable[node], threadData); + threadData.maxGainHeap_.Update(node, bestMove); + } + + inline CostT ComputeSameStepAffinity(const VertexWorkWeightT &maxWorkForStep, + const VertexWorkWeightT &newWeight, + const CostT &nodeProcAffinity) { + const CostT maxWorkAfterRemoval = static_cast(maxWorkForStep) - nodeProcAffinity; + if (newWeight > maxWorkAfterRemoval) { + return newWeight - maxWorkAfterRemoval; + } + return 0.0; + } + + inline CostT ApplyMove(KlMove move, ThreadSearchContext &threadData) { + activeSchedule_.ApplyMove(move, threadData.activeScheduleData_); + commCostF_.UpdateDatastructureAfterMove(move, threadData.startStep_, threadData.endStep_); + CostT changeInCost = -move.gain_; + changeInCost += static_cast(threadData.activeScheduleData_.resolvedViolations_.size()) + * threadData.rewardPenaltyStrat_.reward_; + changeInCost + -= static_cast(threadData.activeScheduleData_.newViolations_.size()) * threadData.rewardPenaltyStrat_.penalty_; + +#ifdef KL_DEBUG + std::cout << "penalty: " << threadData.rewardPenaltyStrat_.penalty_ + << " num violations: " << threadData.activeScheduleData_.currentViolations_.size() + << " num new violations: " << threadData.activeScheduleData_.newViolations_.size() + << ", num resolved violations: " << threadData.activeScheduleData_.resolvedViolations_.size() + << ", reward: " << threadData.rewardPenaltyStrat_.reward_ << std::endl; + std::cout << "apply move, previous cost: " << threadData.activeScheduleData_.cost_ + << ", new cost: " << threadData.activeScheduleData_.cost_ + changeInCost << ", " + << (threadData.activeScheduleData_.feasible_ ? "feasible," : "infeasible,") << std::endl; +#endif + + threadData.activeScheduleData_.UpdateCost(changeInCost); + + return changeInCost; + } + + void RunQuickMoves(unsigned &innerIter, + ThreadSearchContext &threadData, + const CostT changeInCost, + const VertexType bestMoveNode) { +#ifdef KL_DEBUG + std::cout << "Starting quick moves sequence." << std::endl; +#endif + innerIter++; + + const size_t numAppliedMoves = threadData.activeScheduleData_.appliedMoves_.size() - 1; + const CostT savedCost = threadData.activeScheduleData_.cost_ - changeInCost; + + std::unordered_set localLock; + localLock.insert(bestMoveNode); + std::vector quickMovesStack; + quickMovesStack.reserve(10 + threadData.activeScheduleData_.newViolations_.size() * 2); + + for (const auto &keyValuePair : threadData.activeScheduleData_.newViolations_) { + const auto &key = keyValuePair.first; + quickMovesStack.push_back(key); + } + + while (quickMovesStack.size() > 0) { + auto nextNodeToMove = quickMovesStack.back(); + quickMovesStack.pop_back(); + + threadData.rewardPenaltyStrat_.InitRewardPenalty( + static_cast(threadData.activeScheduleData_.currentViolations_.size()) + 1.0); + ComputeNodeAffinities(nextNodeToMove, threadData.localAffinityTable_, threadData); + KlMove bestQuickMove = ComputeBestMove(nextNodeToMove, threadData.localAffinityTable_, threadData); + + localLock.insert(nextNodeToMove); + if (bestQuickMove.gain_ <= std::numeric_limits::lowest()) { + continue; + } + +#ifdef KL_DEBUG + std::cout << " >>> move node " << bestQuickMove.node_ << " with gain " << bestQuickMove.gain_ + << ", from proc|step: " << bestQuickMove.fromProc_ << "|" << bestQuickMove.fromStep_ + << " to: " << bestQuickMove.toProc_ << "|" << bestQuickMove.toStep_ << std::endl; +#endif + + ApplyMove(bestQuickMove, threadData); + innerIter++; + + if (threadData.activeScheduleData_.newViolations_.size() > 0) { + bool abort = false; + + for (const auto &keyValuePair : threadData.activeScheduleData_.newViolations_) { + const auto &key = keyValuePair.first; + if (localLock.find(key) != localLock.end()) { + abort = true; + break; + } + quickMovesStack.push_back(key); + } + + if (abort) { + break; + } + + } else if (threadData.activeScheduleData_.feasible_) { + break; + } + } + + if (!threadData.activeScheduleData_.feasible_) { + activeSchedule_.RevertScheduleToBound(numAppliedMoves, + savedCost, + true, + commCostF_, + threadData.activeScheduleData_, + threadData.startStep_, + threadData.endStep_); +#ifdef KL_DEBUG + std::cout << "Ending quick moves sequence with infeasible solution." << std::endl; +#endif + } +#ifdef KL_DEBUG + else { + std::cout << "Ending quick moves sequence with feasible solution." << std::endl; + } +#endif + + threadData.affinityTable_.Trim(); + threadData.maxGainHeap_.Clear(); + threadData.rewardPenaltyStrat_.InitRewardPenalty(1.0); + InsertGainHeap(threadData); // Re-initialize the heap with the current state + } + + void ResolveViolations(ThreadSearchContext &threadData) { + auto ¤tViolations = threadData.activeScheduleData_.currentViolations_; + unsigned numViolations = static_cast(currentViolations.size()); + if (numViolations > 0) { +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ << ", Starting preresolving violations with " << numViolations + << " initial violations" << std::endl; +#endif + threadData.rewardPenaltyStrat_.InitRewardPenalty(static_cast(numViolations) + 1.0); + std::unordered_set localLock; + unsigned numIter = 0; + const unsigned minIter = numViolations / 4; + while (not currentViolations.empty()) { + std::uniform_int_distribution dis(0, currentViolations.size() - 1); + auto it = currentViolations.begin(); + std::advance(it, dis(gen_)); + const auto &nextEdge = *it; + const VertexType sourceV = Source(nextEdge, *graph_); + const VertexType targetV = Target(nextEdge, *graph_); + const bool sourceLocked = localLock.find(sourceV) != localLock.end(); + const bool targetLocked = localLock.find(targetV) != localLock.end(); + + if (sourceLocked && targetLocked) { +#ifdef KL_DEBUG_1 + std::cout << "source, target locked" << std::endl; +#endif + break; + } + + KlMove bestMove; + if (sourceLocked || targetLocked) { + const VertexType node = sourceLocked ? targetV : sourceV; + ComputeNodeAffinities(node, threadData.localAffinityTable_, threadData); + bestMove = ComputeBestMove(node, threadData.localAffinityTable_, threadData); + } else { + ComputeNodeAffinities(sourceV, threadData.localAffinityTable_, threadData); + KlMove bestSourceVMove = ComputeBestMove(sourceV, threadData.localAffinityTable_, threadData); + ComputeNodeAffinities(targetV, threadData.localAffinityTable_, threadData); + KlMove bestTargetVMove = ComputeBestMove(targetV, threadData.localAffinityTable_, threadData); + bestMove = bestTargetVMove.gain_ > bestSourceVMove.gain_ ? std::move(bestTargetVMove) + : std::move(bestSourceVMove); + } + + localLock.insert(bestMove.node_); + if (bestMove.gain_ <= std::numeric_limits::lowest()) { + continue; + } + + ApplyMove(bestMove, threadData); + threadData.affinityTable_.Insert(bestMove.node_); +#ifdef KL_DEBUG_1 + std::cout << "move node " << bestMove.node_ << " with gain " << bestMove.gain_ + << ", from proc|step: " << bestMove.fromProc_ << "|" << bestMove.fromStep_ + << " to: " << bestMove.toProc_ << "|" << bestMove.toStep_ << std::endl; +#endif + const unsigned newNumViolations = static_cast(currentViolations.size()); + if (newNumViolations == 0) { + break; + } + + if (threadData.activeScheduleData_.newViolations_.size() > 0) { + for (const auto &vertexEdgePair : threadData.activeScheduleData_.newViolations_) { + const auto &vertex = vertexEdgePair.first; + threadData.affinityTable_.Insert(vertex); + } + } + + const double gain = static_cast(numViolations) - static_cast(newNumViolations); + numViolations = newNumViolations; + UpdateAvgGain(gain, numIter++, threadData.averageGain_); +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ << ", preresolving violations with " << numViolations + << " violations, " << numIter << " #iterations, " << threadData.averageGain_ << " average gain" + << std::endl; +#endif + if (numIter > minIter && threadData.averageGain_ < 0.0) { + break; + } + } + threadData.averageGain_ = 0.0; + } + } + + void RunLocalSearch(ThreadSearchContext &threadData) { +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ + << ", start local search, initial schedule cost: " << threadData.activeScheduleData_.cost_ << " with " + << threadData.NumSteps() << " supersteps." << std::endl; +#endif + std::vector newNodes; + std::vector unlockNodes; + std::map recomputeMaxGain; + + const auto startTime = std::chrono::high_resolution_clock::now(); + + unsigned noImprovementIterCounter = 0; + unsigned outerIter = 0; + + for (; outerIter < parameters_.maxOuterIterations_; outerIter++) { + CostT initialInnerIterCost = threadData.activeScheduleData_.cost_; + + ResetInnerSearchStructures(threadData); + SelectActiveNodes(threadData); + threadData.rewardPenaltyStrat_.InitRewardPenalty( + static_cast(threadData.activeScheduleData_.currentViolations_.size()) + 1.0); + InsertGainHeap(threadData); + + unsigned innerIter = 0; + unsigned violationRemovedCount = 0; + unsigned resetCounter = 0; + bool iterInitalFeasible = threadData.activeScheduleData_.feasible_; + +#ifdef KL_DEBUG + std::cout << "------ start inner loop ------" << std::endl; + std::cout << "initial node selection: {"; + for (size_t i = 0; i < threadData.affinityTable_.size(); ++i) { + std::cout << threadData.affinityTable_.GetSelectedNodes()[i] << ", "; + } + std::cout << "}" << std::endl; +#endif +#ifdef KL_DEBUG_1 + if (not iterInitalFeasible) { + std::cout << "initial solution not feasible, num violations: " + << threadData.activeScheduleData_.currentViolations_.size() + << ". Penalty: " << threadData.rewardPenaltyStrat_.penalty_ + << ", reward: " << threadData.rewardPenaltyStrat_.reward_ << std::endl; + } +#endif +#ifdef KL_DEBUG_COST_CHECK + activeSchedule_.GetVectorSchedule().numberOfSupersteps = threadDataVec_[0].NumSteps(); + if (std::abs(commCostF_.ComputeScheduleCostTest() - threadData.activeScheduleData_.cost_) > 0.00001) { + std::cout << "computed cost: " << commCostF_.ComputeScheduleCostTest() + << ", current cost: " << threadData.activeScheduleData_.cost_ << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; + } + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.SatisfiedMemoryConstraint()) { + std::cout << "memory constraint not satisfied" << std::endl; + } + } +#endif + + while (innerIter < threadData.maxInnerIterations_ && threadData.maxGainHeap_.size() > 0) { + KlMove bestMove + = GetBestMove(threadData.affinityTable_, + threadData.lockManager_, + threadData.maxGainHeap_); // locks bestMove.node and removes it from node_selection + if (bestMove.gain_ <= std::numeric_limits::lowest()) { + break; + } + UpdateAvgGain(bestMove.gain_, innerIter, threadData.averageGain_); +#ifdef KL_DEBUG + std::cout << " >>> move node " << bestMove.node_ << " with gain " << bestMove.gain_ + << ", from proc|step: " << bestMove.fromProc_ << "|" << bestMove.fromStep_ << " to: " << bestMove.toProc_ + << "|" << bestMove.toStep_ << ",avg gain: " << threadData.averageGain_ << std::endl; +#endif + if (innerIter > threadData.minInnerIter_ && threadData.averageGain_ < 0.0) { +#ifdef KL_DEBUG + std::cout << "Negative average gain: " << threadData.averageGain_ << ", end local search" << std::endl; +#endif + break; + } + +#ifdef KL_DEBUG + if (not activeSchedule_.GetInstance().IsCompatible(bestMove.node_, bestMove.toProc_)) { + std::cout << "move to incompatibe node" << std::endl; + } +#endif + + const auto prevWorkData = activeSchedule_.GetPreMoveWorkData(bestMove); + const typename CommCostFunctionT::PreMoveCommDataT prevCommData = commCostF_.GetPreMoveCommData(bestMove); + const CostT changeInCost = ApplyMove(bestMove, threadData); +#ifdef KL_DEBUG_COST_CHECK + activeSchedule_.GetVectorSchedule().numberOfSupersteps = threadDataVec_[0].NumSteps(); + if (std::abs(commCostF_.ComputeScheduleCostTest() - threadData.activeScheduleData_.cost_) > 0.00001) { + std::cout << "computed cost: " << commCostF_.ComputeScheduleCostTest() + << ", current cost: " << threadData.activeScheduleData_.cost_ << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; + } + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.SatisfiedMemoryConstraint()) { + std::cout << "memory constraint not satisfied" << std::endl; + } + } +#endif + if constexpr (enableQuickMoves_) { + if (iterInitalFeasible && threadData.activeScheduleData_.newViolations_.size() > 0) { + RunQuickMoves(innerIter, threadData, changeInCost, bestMove.node_); +#ifdef KL_DEBUG_COST_CHECK + activeSchedule_.GetVectorSchedule().numberOfSupersteps = threadDataVec_[0].NumSteps(); + if (std::abs(commCostF_.ComputeScheduleCostTest() - threadData.activeScheduleData_.cost_) > 0.00001) { + std::cout << "computed cost: " << commCostF_.ComputeScheduleCostTest() + << ", current cost: " << threadData.activeScheduleData_.cost_ << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" + << std::endl; + } + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.SatisfiedMemoryConstraint()) { + std::cout << "memory constraint not satisfied" << std::endl; + } + } +#endif + continue; + } + } + + if (threadData.activeScheduleData_.currentViolations_.size() > 0) { + if (threadData.activeScheduleData_.resolvedViolations_.size() > 0) { + violationRemovedCount = 0; + } else { + violationRemovedCount++; + + if (violationRemovedCount > 3) { + if (resetCounter < threadData.maxNoVioaltionsRemovedBacktrack_ + && ((not iterInitalFeasible) + || (threadData.activeScheduleData_.cost_ < threadData.activeScheduleData_.bestCost_))) { + threadData.affinityTable_.ResetNodeSelection(); + threadData.maxGainHeap_.Clear(); + threadData.lockManager_.Clear(); + threadData.selectionStrategy_.SelectNodesViolations( + threadData.affinityTable_, + threadData.activeScheduleData_.currentViolations_, + threadData.startStep_, + threadData.endStep_); +#ifdef KL_DEBUG + std::cout << "Infeasible, and no violations resolved for 5 iterations, reset node selection" + << std::endl; +#endif + threadData.rewardPenaltyStrat_.InitRewardPenalty( + static_cast(threadData.activeScheduleData_.currentViolations_.size())); + InsertGainHeap(threadData); + + resetCounter++; + innerIter++; + continue; + } else { +#ifdef KL_DEBUG + std::cout << "Infeasible, and no violations resolved for 5 iterations, end local search" + << std::endl; +#endif + break; + } + } + } + } + + if (IsLocalSearchBlocked(threadData)) { + if (not BlockedEdgeStrategy(bestMove.node_, unlockNodes, threadData)) { + break; + } + } + + threadData.affinityTable_.Trim(); + UpdateAffinities(bestMove, threadData, recomputeMaxGain, newNodes, prevWorkData, prevCommData); + + for (const auto v : unlockNodes) { + threadData.lockManager_.Unlock(v); + } + newNodes.insert(newNodes.end(), unlockNodes.begin(), unlockNodes.end()); + unlockNodes.clear(); + +#ifdef KL_DEBUG + std::cout << "recmopute max gain: {"; + for (const auto mapPair : recomputeMaxGain) { + const auto &key = mapPair.first; + std::cout << key << ", "; + } + std::cout << "}" << std::endl; + std::cout << "new nodes: {"; + for (const auto v : newNodes) { + std::cout << v << ", "; + } + std::cout << "}" << std::endl; +#endif +#ifdef KL_DEBUG_COST_CHECK + activeSchedule_.GetVectorSchedule().numberOfSupersteps = threadDataVec_[0].NumSteps(); + if (std::abs(commCostF_.ComputeScheduleCostTest() - threadData.activeScheduleData_.cost_) > 0.00001) { + std::cout << "computed cost: " << commCostF_.ComputeScheduleCostTest() + << ", current cost: " << threadData.activeScheduleData_.cost_ << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; + } + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.SatisfiedMemoryConstraint()) { + std::cout << "memory constraint not satisfied" << std::endl; + } + } +#endif + UpdateMaxGain(bestMove, recomputeMaxGain, threadData); + InsertNewNodesGainHeap(newNodes, threadData.affinityTable_, threadData); + + recomputeMaxGain.clear(); + newNodes.clear(); + + innerIter++; + } + +#ifdef KL_DEBUG + std::cout << "--- end inner loop after " << innerIter + << " inner iterations, gain heap size: " << threadData.maxGainHeap_.size() << ", outer iteraion " + << outerIter << "/" << parameters_.maxOuterIterations_ + << ", current cost: " << threadData.activeScheduleData_.cost_ << ", " + << (threadData.activeScheduleData_.feasible_ ? "feasible" : "infeasible") << std::endl; +#endif +#ifdef KL_DEBUG_1 + const unsigned numStepsTmp = threadData.endStep_; +#endif + activeSchedule_.RevertToBestSchedule(threadData.localSearchStartStep_, + threadData.stepToRemove_, + commCostF_, + threadData.activeScheduleData_, + threadData.startStep_, + threadData.endStep_); +#ifdef KL_DEBUG_1 + if (threadData.localSearchStartStep_ > 0) { + if (numStepsTmp == threadData.endStep_) { + std::cout << "thread " << threadData.threadId_ << ", removing step " << threadData.stepToRemove_ + << " succeded " << std::endl; + } else { + std::cout << "thread " << threadData.threadId_ << ", removing step " << threadData.stepToRemove_ << " failed " + << std::endl; + } + } +#endif + +#ifdef KL_DEBUG_COST_CHECK + activeSchedule_.GetVectorSchedule().numberOfSupersteps = threadDataVec_[0].NumSteps(); + if (std::abs(commCostF_.ComputeScheduleCostTest() - threadData.activeScheduleData_.cost_) > 0.00001) { + std::cout << "computed cost: " << commCostF_.ComputeScheduleCostTest() + << ", current cost: " << threadData.activeScheduleData_.cost_ << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; + } + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.SatisfiedMemoryConstraint()) { + std::cout << "memory constraint not satisfied" << std::endl; + } + } +#endif + + if (computeWithTimeLimit_) { + auto finishTime = std::chrono::high_resolution_clock::now(); + auto duration = std::chrono::duration_cast(finishTime - startTime).count(); + if (duration > ImprovementScheduler::timeLimitSeconds_) { + break; + } + } + + if (OtherThreadsFinished(threadData.threadId_)) { +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ << ", other threads finished, end local search" << std::endl; +#endif + break; + } + + if (initialInnerIterCost <= threadData.activeScheduleData_.cost_) { + noImprovementIterCounter++; + + if (noImprovementIterCounter >= parameters_.maxNoImprovementIterations_) { +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ << ", no improvement for " + << parameters_.maxNoImprovementIterations_ << " iterations, end local search" << std::endl; +#endif + break; + } + } else { + noImprovementIterCounter = 0; + } + + AdjustLocalSearchParameters(outerIter, noImprovementIterCounter, threadData); + } + +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ << ", local search end after " << outerIter + << " outer iterations, current cost: " << threadData.activeScheduleData_.cost_ << " with " + << threadData.NumSteps() << " supersteps, vs serial cost " << activeSchedule_.GetTotalWorkWeight() << "." + << std::endl; +#endif + threadFinishedVec_[threadData.threadId_] = true; + } + + bool OtherThreadsFinished(const unsigned threadId) { + const size_t numThreads = threadFinishedVec_.size(); + if (numThreads == 1) { + return false; + } + + for (size_t i = 0; i < numThreads; i++) { + if (i != threadId && !threadFinishedVec_[i]) { + return false; + } + } + return true; + } + + inline void UpdateAffinities(const KlMove &bestMove, + ThreadSearchContext &threadData, + std::map &recomputeMaxGain, + std::vector &newNodes, + const PreMoveWorkData &prevWorkData, + const typename CommCostFunctionT::PreMoveCommDataT &prevCommData) { + if constexpr (CommCostFunctionT::isMaxCommCostFunction_) { + commCostF_.UpdateNodeCommAffinity( + bestMove, + threadData, + threadData.rewardPenaltyStrat_.penalty_, + threadData.rewardPenaltyStrat_.reward_, + recomputeMaxGain, + newNodes); // this only updated reward/penalty, collects newNodes, and fills recomputeMaxGain + + // Determine the steps where max/second_max/max_count for work/comm changed + std::unordered_set changedSteps; + + // Check work changes for fromStep + if (bestMove.fromStep_ == bestMove.toStep_) { + // Same step - check if max/second_max changed + const auto currentMax = activeSchedule_.GetStepMaxWork(bestMove.fromStep_); + const auto currentSecondMax = activeSchedule_.GetStepSecondMaxWork(bestMove.fromStep_); + const auto currentCount = activeSchedule_.GetStepMaxWorkProcessorCount()[bestMove.fromStep_]; + if (currentMax != prevWorkData.fromStepMaxWork_ || currentSecondMax != prevWorkData.fromStepSecondMaxWork_ + || currentCount != prevWorkData.fromStepMaxWorkProcessorCount_) { + changedSteps.insert(bestMove.fromStep_); + } + } else { + // Different steps - check both + const auto currentFromMax = activeSchedule_.GetStepMaxWork(bestMove.fromStep_); + const auto currentFromSecondMax = activeSchedule_.GetStepSecondMaxWork(bestMove.fromStep_); + const auto currentFromCount = activeSchedule_.GetStepMaxWorkProcessorCount()[bestMove.fromStep_]; + if (currentFromMax != prevWorkData.fromStepMaxWork_ || currentFromSecondMax != prevWorkData.fromStepSecondMaxWork_ + || currentFromCount != prevWorkData.fromStepMaxWorkProcessorCount_) { + changedSteps.insert(bestMove.fromStep_); + } + + const auto currentToMax = activeSchedule_.GetStepMaxWork(bestMove.toStep_); + const auto currentToSecondMax = activeSchedule_.GetStepSecondMaxWork(bestMove.toStep_); + const auto currentToCount = activeSchedule_.GetStepMaxWorkProcessorCount()[bestMove.toStep_]; + if (currentToMax != prevWorkData.toStepMaxWork_ || currentToSecondMax != prevWorkData.toStepSecondMaxWork_ + || currentToCount != prevWorkData.toStepMaxWorkProcessorCount_) { + changedSteps.insert(bestMove.toStep_); + } + } + + for (const auto &[step, stepInfo] : prevCommData.stepData_) { + // typename CommCostFunctionT::PreMoveCommDataT::StepInfo currentInfo; + // Query current values + const auto currentMax = commCostF_.commDs_.StepMaxComm(step); + const auto currentSecondMax = commCostF_.commDs_.StepSecondMaxComm(step); + const auto currentCount = commCostF_.commDs_.StepMaxCommCount(step); + + if (currentMax != stepInfo.maxComm_ || currentSecondMax != stepInfo.secondMaxComm_ + || currentCount != stepInfo.maxCommCount_) { + changedSteps.insert(step); + } + } + + // Recompute affinities for all active nodes + const size_t activeCount = threadData.affinityTable_.size(); + for (size_t i = 0; i < activeCount; ++i) { + const VertexType node = threadData.affinityTable_.GetSelectedNodes()[i]; + + // Determine if this node needs affinity recomputation + // A node needs recomputation if it's in or adjacent to changed steps + const unsigned nodeStep = activeSchedule_.AssignedSuperstep(node); + + // Calculate window bounds for this node once + const int nodeLowerBound = static_cast(nodeStep) - static_cast(windowSize); + const unsigned nodeUpperBound = nodeStep + windowSize; + + bool needsUpdate = false; + // Check if any changed step falls within the node's window + for (unsigned step : changedSteps) { + if (static_cast(step) >= nodeLowerBound && step <= nodeUpperBound) { + needsUpdate = true; + break; + } + } + + if (needsUpdate) { + auto &affinityTableNode = threadData.affinityTable_.GetAffinityTable(node); + + // Reset affinity table entries to zero + const unsigned numProcs = activeSchedule_.GetInstance().NumberOfProcessors(); + for (unsigned p = 0; p < numProcs; ++p) { + for (unsigned idx = 0; idx < affinityTableNode[p].size(); ++idx) { + affinityTableNode[p][idx] = 0; + } + } + + ComputeNodeAffinities(node, affinityTableNode, threadData); + recomputeMaxGain[node] = KlGainUpdateInfo(node, true); + } + } + } else { + UpdateNodeWorkAffinity(threadData.affinityTable_, bestMove, prevWorkData, recomputeMaxGain); + commCostF_.UpdateNodeCommAffinity(bestMove, + threadData, + threadData.rewardPenaltyStrat_.penalty_, + threadData.rewardPenaltyStrat_.reward_, + recomputeMaxGain, + newNodes); + } + } + + inline bool BlockedEdgeStrategy(VertexType node, std::vector &unlockNodes, ThreadSearchContext &threadData) { + if (threadData.unlockEdgeBacktrackCounter_ > 1) { + for (const auto vertexEdgePair : threadData.activeScheduleData_.newViolations_) { + const auto &e = vertexEdgePair.second; + const auto sourceV = Source(e, *graph_); + const auto targetV = Target(e, *graph_); + + if (node == sourceV && threadData.lockManager_.IsLocked(targetV)) { + unlockNodes.push_back(targetV); + } else if (node == targetV && threadData.lockManager_.IsLocked(sourceV)) { + unlockNodes.push_back(sourceV); + } + } +#ifdef KL_DEBUG + std::cout << "Nodes of violated edge locked, backtrack counter: " << threadData.unlockEdgeBacktrackCounter_ + << std::endl; +#endif + threadData.unlockEdgeBacktrackCounter_--; + return true; + } else { +#ifdef KL_DEBUG + std::cout << "Nodes of violated edge locked, end local search" << std::endl; +#endif + return false; // or reset local search and initalize with violating nodes + } + } + + inline void AdjustLocalSearchParameters(unsigned outerIter, unsigned noImpCounter, ThreadSearchContext &threadData) { + if (noImpCounter >= threadData.noImprovementIterationsReducePenalty_ + && threadData.rewardPenaltyStrat_.initialPenalty_ > 1.0) { + threadData.rewardPenaltyStrat_.initialPenalty_ + = static_cast(std::floor(std::sqrt(threadData.rewardPenaltyStrat_.initialPenalty_))); + threadData.unlockEdgeBacktrackCounterReset_ += 1; + threadData.noImprovementIterationsReducePenalty_ += 15; +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ << ", no improvement for " + << threadData.noImprovementIterationsReducePenalty_ << " iterations, reducing initial penalty to " + << threadData.rewardPenaltyStrat_.initialPenalty_ << std::endl; +#endif + } + + if (parameters_.tryRemoveStepAfterNumOuterIterations_ > 0 + && ((outerIter + 1) % parameters_.tryRemoveStepAfterNumOuterIterations_) == 0) { + threadData.stepSelectionEpochCounter_ = 0; + ; +#ifdef KL_DEBUG + std::cout << "reset remove epoc counter after " << outerIter << " iterations." << std::endl; +#endif + } + + if (noImpCounter >= threadData.noImprovementIterationsIncreaseInnerIter_) { + threadData.minInnerIter_ = static_cast(std::ceil(threadData.minInnerIter_ * 2.2)); + threadData.noImprovementIterationsIncreaseInnerIter_ += 20; +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ << ", no improvement for " + << threadData.noImprovementIterationsIncreaseInnerIter_ << " iterations, increasing min inner iter to " + << threadData.minInnerIter_ << std::endl; +#endif + } + } + + bool IsLocalSearchBlocked(ThreadSearchContext &threadData); + void SetParameters(VertexIdxT numNodes); + void ResetInnerSearchStructures(ThreadSearchContext &threadData) const; + void InitializeDatastructures(BspSchedule &schedule); + void PrintHeap(HeapDatastructure &maxGainHeap) const; + void CleanupDatastructures(); + void UpdateAvgGain(const CostT gain, const unsigned numIter, double &averageGain); + void InsertGainHeap(ThreadSearchContext &threadData); + void InsertNewNodesGainHeap(std::vector &newNodes, NodeSelectionContainerT &nodes, ThreadSearchContext &threadData); + + inline void ComputeNodeAffinities(VertexType node, + std::vector> &affinityTableNode, + ThreadSearchContext &threadData) { + ComputeWorkAffinity(node, affinityTableNode, threadData); + commCostF_.ComputeCommAffinity(node, + affinityTableNode, + threadData.rewardPenaltyStrat_.penalty_, + threadData.rewardPenaltyStrat_.reward_, + threadData.startStep_, + threadData.endStep_); + } + + void SelectActiveNodes(ThreadSearchContext &threadData) { + if (SelectNodesCheckRemoveSuperstep(threadData.stepToRemove_, threadData)) { + activeSchedule_.SwapEmptyStepFwd(threadData.stepToRemove_, threadData.endStep_); + threadData.endStep_--; + threadData.localSearchStartStep_ = static_cast(threadData.activeScheduleData_.appliedMoves_.size()); + threadData.activeScheduleData_.UpdateCost(static_cast(-1.0 * instance_->SynchronisationCosts())); + + if constexpr (enablePreresolvingViolations_) { + ResolveViolations(threadData); + } + + if (threadData.activeScheduleData_.currentViolations_.size() > parameters_.initialViolationThreshold_) { + activeSchedule_.RevertToBestSchedule(threadData.localSearchStartStep_, + threadData.stepToRemove_, + commCostF_, + threadData.activeScheduleData_, + threadData.startStep_, + threadData.endStep_); + } else { + threadData.unlockEdgeBacktrackCounter_ + = static_cast(threadData.activeScheduleData_.currentViolations_.size()); + threadData.maxInnerIterations_ + = std::max(threadData.unlockEdgeBacktrackCounter_ * 5u, parameters_.maxInnerIterationsReset_); + threadData.maxNoVioaltionsRemovedBacktrack_ = parameters_.maxNoVioaltionsRemovedBacktrackForRemoveStepReset_; +#ifdef KL_DEBUG_1 + std::cout << "thread " << threadData.threadId_ << ", Trying to remove step " << threadData.stepToRemove_ + << std::endl; +#endif + return; + } + } + // threadData.stepToRemove_ = threadData.startStep_; + threadData.localSearchStartStep_ = 0; + threadData.selectionStrategy_.SelectActiveNodes(threadData.affinityTable_, threadData.startStep_, threadData.endStep_); + } + + bool CheckRemoveSuperstep(unsigned step); + bool SelectNodesCheckRemoveSuperstep(unsigned &step, ThreadSearchContext &threadData); + + bool ScatterNodesSuperstep(unsigned step, ThreadSearchContext &threadData) { + assert(step <= threadData.endStep_ && threadData.startStep_ <= step); + bool abort = false; + + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); proc++) { + const std::vector stepProcNodeVec( + activeSchedule_.GetSetSchedule().stepProcessorVertices_[step][proc].begin(), + activeSchedule_.GetSetSchedule().stepProcessorVertices_[step][proc].end()); + for (const auto &node : stepProcNodeVec) { + threadData.rewardPenaltyStrat_.InitRewardPenalty( + static_cast(threadData.activeScheduleData_.currentViolations_.size()) + 1.0); + ComputeNodeAffinities(node, threadData.localAffinityTable_, threadData); + KlMove bestMove = ComputeBestMove(node, threadData.localAffinityTable_, threadData); + + if (bestMove.gain_ <= std::numeric_limits::lowest()) { + abort = true; + break; + } + + ApplyMove(bestMove, threadData); + if (threadData.activeScheduleData_.currentViolations_.size() > parameters_.abortScatterNodesViolationThreshold_) { + abort = true; + break; + } + + threadData.affinityTable_.Insert(node); + // threadData.selectionStrategy_.AddNeighboursToSelection(node, threadData.affinityTable_, + // threadData.startStep_, threadData.endStep_); + if (threadData.activeScheduleData_.newViolations_.size() > 0) { + for (const auto &vertexEdgePair : threadData.activeScheduleData_.newViolations_) { + const auto &vertex = vertexEdgePair.first; + threadData.affinityTable_.Insert(vertex); + } + } + +#ifdef KL_DEBUG + std::cout << "move node " << bestMove.node_ << " with gain " << bestMove.gain_ + << ", from proc|step: " << bestMove.fromProc_ << "|" << bestMove.fromStep_ + << " to: " << bestMove.toProc_ << "|" << bestMove.toStep_ << std::endl; +#endif + +#ifdef KL_DEBUG_COST_CHECK + activeSchedule_.GetVectorSchedule().numberOfSupersteps = threadDataVec_[0].NumSteps(); + if (std::abs(commCostF_.ComputeScheduleCostTest() - threadData.activeScheduleData_.cost_) > 0.00001) { + std::cout << "computed cost: " << commCostF_.ComputeScheduleCostTest() + << ", current cost: " << threadData.activeScheduleData_.cost_ << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; + } + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.SatisfiedMemoryConstraint()) { + std::cout << "memory constraint not satisfied" << std::endl; + } + } +#endif + } + + if (abort) { + break; + } + } + + if (abort) { + activeSchedule_.RevertToBestSchedule( + 0, 0, commCostF_, threadData.activeScheduleData_, threadData.startStep_, threadData.endStep_); + threadData.affinityTable_.ResetNodeSelection(); + return false; + } + return true; + } + + void SynchronizeActiveSchedule(const unsigned numThreads) { + if (numThreads == 1) { // single thread case + activeSchedule_.SetCost(threadDataVec_[0].activeScheduleData_.cost_); + activeSchedule_.GetVectorSchedule().numberOfSupersteps_ = threadDataVec_[0].NumSteps(); + return; + } + + unsigned writeCursor = threadDataVec_[0].endStep_ + 1; + for (unsigned i = 1; i < numThreads; ++i) { + auto &thread = threadDataVec_[i]; + if (thread.startStep_ <= thread.endStep_) { + for (unsigned j = thread.startStep_; j <= thread.endStep_; ++j) { + if (j != writeCursor) { + activeSchedule_.SwapSteps(j, writeCursor); + } + writeCursor++; + } + } + } + activeSchedule_.GetVectorSchedule().numberOfSupersteps_ = writeCursor; + const CostT newCost = commCostF_.ComputeScheduleCost(); + activeSchedule_.SetCost(newCost); + } + + public: + KlImprover() : ImprovementScheduler() { + std::random_device rd; + gen_ = std::mt19937(rd()); + } + + explicit KlImprover(unsigned seed) : ImprovementScheduler() { gen_ = std::mt19937(seed); } + + virtual ~KlImprover() = default; + + virtual ReturnStatus ImproveSchedule(BspSchedule &schedule) override { + if (schedule.GetInstance().NumberOfProcessors() < 2) { + return ReturnStatus::BEST_FOUND; + } + + const unsigned numThreads = 1; + + threadDataVec_.resize(numThreads); + threadFinishedVec_.assign(numThreads, true); + + SetParameters(schedule.GetInstance().NumberOfVertices()); + InitializeDatastructures(schedule); + const CostT initialCost = activeSchedule_.GetCost(); + const unsigned numSteps = schedule.NumberOfSupersteps(); + + SetStartStep(0, threadDataVec_[0]); + threadDataVec_[0].endStep_ = (numSteps > 0) ? numSteps - 1 : 0; + + auto &threadData = this->threadDataVec_[0]; + threadData.activeScheduleData_.InitializeCost(activeSchedule_.GetCost()); + threadData.selectionStrategy_.Setup(threadData.startStep_, threadData.endStep_); + RunLocalSearch(threadData); + + SynchronizeActiveSchedule(numThreads); + + if (initialCost > activeSchedule_.GetCost()) { + activeSchedule_.WriteSchedule(schedule); + CleanupDatastructures(); + return ReturnStatus::OSP_SUCCESS; + } else { + CleanupDatastructures(); + return ReturnStatus::BEST_FOUND; + } + } + + virtual ReturnStatus ImproveScheduleWithTimeLimit(BspSchedule &schedule) override { + computeWithTimeLimit_ = true; + return ImproveSchedule(schedule); + } + + virtual void SetTimeQualityParameter(const double timeQuality) { this->parameters_.timeQuality_ = timeQuality; } + + virtual void SetSuperstepRemoveStrengthParameter(const double superstepRemoveStrength) { + this->parameters_.superstepRemoveStrength_ = superstepRemoveStrength; + } + + virtual std::string GetScheduleName() const { return "kl_improver_" + commCostF_.Name(); } +}; + +template +void KlImprover::SetParameters(VertexIdxT numNodes) { + const unsigned logNumNodes = (numNodes > 1) ? static_cast(std::log(numNodes)) : 1; + + // Total number of outer iterations. Proportional to sqrt N. + parameters_.maxOuterIterations_ + = static_cast(std::sqrt(numNodes) * (parameters_.timeQuality_ * 10.0) / parameters_.numParallelLoops_); + + // Number of times to reset the search for violations before giving up. + parameters_.maxNoVioaltionsRemovedBacktrackReset_ = parameters_.timeQuality_ < 0.75 ? 1 + : parameters_.timeQuality_ < 1.0 ? 2 + : 3; + + // Parameters for the superstep removal heuristic. + parameters_.maxNoVioaltionsRemovedBacktrackForRemoveStepReset_ + = 3 + static_cast(parameters_.superstepRemoveStrength_ * 7); + parameters_.nodeMaxStepSelectionEpochs_ = parameters_.superstepRemoveStrength_ < 0.75 ? 1 + : parameters_.superstepRemoveStrength_ < 1.0 ? 2 + : 3; + parameters_.removeStepEpocs_ = static_cast(parameters_.superstepRemoveStrength_ * 4.0); + + parameters_.minInnerIterReset_ = static_cast(logNumNodes + logNumNodes * (1.0 + parameters_.timeQuality_)); + + if (parameters_.removeStepEpocs_ > 0) { + parameters_.tryRemoveStepAfterNumOuterIterations_ = parameters_.maxOuterIterations_ / parameters_.removeStepEpocs_; + } else { + // Effectively disable superstep removal if remove_step_epocs is 0. + parameters_.tryRemoveStepAfterNumOuterIterations_ = parameters_.maxOuterIterations_ + 1; + } + + unsigned i = 0; + for (auto &thread : threadDataVec_) { + thread.threadId_ = i++; + // The number of nodes to consider in each inner iteration. Proportional to log(N). + thread.selectionStrategy_.selectionThreshold_ + = static_cast(std::ceil(parameters_.timeQuality_ * 10 * logNumNodes + logNumNodes)); + } + +#ifdef KL_DEBUG_1 + std::cout << "kl set parameter, number of nodes: " << numNodes << std::endl; + std::cout << "max outer iterations: " << parameters_.maxOuterIterations_ << std::endl; + std::cout << "max inner iterations: " << parameters_.maxInnerIterationsReset_ << std::endl; + std::cout << "no improvement iterations reduce penalty: " << threadDataVec_[0].noImprovementIterationsReducePenalty_ + << std::endl; + std::cout << "selction threshold: " << threadDataVec_[0].selectionStrategy_.selectionThreshold_ << std::endl; + std::cout << "remove step epocs: " << parameters_.removeStepEpocs_ << std::endl; + std::cout << "try remove step after num outer iterations: " << parameters_.tryRemoveStepAfterNumOuterIterations_ << std::endl; + std::cout << "number of parallel loops: " << parameters_.numParallelLoops_ << std::endl; +#endif +} + +template +void KlImprover::UpdateNodeWorkAffinity( + NodeSelectionContainerT &nodes, + KlMove move, + const PreMoveWorkData &prevWorkData, + std::map &recomputeMaxGain) { + const size_t activeCount = nodes.size(); + + for (size_t i = 0; i < activeCount; ++i) { + const VertexType node = nodes.GetSelectedNodes()[i]; + + KlGainUpdateInfo updateInfo = UpdateNodeWorkAffinityAfterMove(node, move, prevWorkData, nodes.At(node)); + if (updateInfo.updateFromStep_ || updateInfo.updateToStep_) { + recomputeMaxGain[node] = updateInfo; + } + } +} + +template +void KlImprover::UpdateMaxGain( + KlMove move, std::map &recomputeMaxGain, ThreadSearchContext &threadData) { + for (auto &pair : recomputeMaxGain) { + if (pair.second.fullUpdate_) { + RecomputeNodeMaxGain(pair.first, threadData.affinityTable_, threadData); + } else { + if (pair.second.updateEntireFromStep_) { + UpdateBestMove(pair.first, move.fromStep_, threadData.affinityTable_, threadData); + } else if (pair.second.updateFromStep_ && IsCompatible(pair.first, move.fromProc_)) { + UpdateBestMove(pair.first, move.fromStep_, move.fromProc_, threadData.affinityTable_, threadData); + } + + if (move.fromStep_ != move.toStep_ || not pair.second.updateEntireFromStep_) { + if (pair.second.updateEntireToStep_) { + UpdateBestMove(pair.first, move.toStep_, threadData.affinityTable_, threadData); + } else if (pair.second.updateToStep_ && IsCompatible(pair.first, move.toProc_)) { + UpdateBestMove(pair.first, move.toStep_, move.toProc_, threadData.affinityTable_, threadData); + } + } + } + } +} + +template +void KlImprover::ComputeWorkAffinity( + VertexType node, std::vector> &affinityTableNode, ThreadSearchContext &threadData) { + const unsigned nodeStep = activeSchedule_.AssignedSuperstep(node); + const VertexWorkWeightT vertexWeight = graph_->VertexWorkWeight(node); + + unsigned step = (nodeStep > windowSize) ? (nodeStep - windowSize) : 0; + for (unsigned idx = threadData.StartIdx(nodeStep); idx < threadData.EndIdx(nodeStep); ++idx, ++step) { + if (idx == windowSize) { + continue; + } + + const CostT maxWorkForStep = static_cast(activeSchedule_.GetStepMaxWork(step)); + + for (const unsigned proc : procRange_.CompatibleProcessorsVertex(node)) { + const VertexWorkWeightT newWeight = vertexWeight + activeSchedule_.GetStepProcessorWork(step, proc); + const CostT workDiff = static_cast(newWeight) - maxWorkForStep; + affinityTableNode[proc][idx] = std::max(0.0, workDiff); + } + } + + const unsigned nodeProc = activeSchedule_.AssignedProcessor(node); + const VertexWorkWeightT maxWorkForStep = activeSchedule_.GetStepMaxWork(nodeStep); + const bool isSoleMaxProcessor = (activeSchedule_.GetStepMaxWorkProcessorCount()[nodeStep] == 1) + && (maxWorkForStep == activeSchedule_.GetStepProcessorWork(nodeStep, nodeProc)); + + const CostT nodeProcAffinity + = isSoleMaxProcessor ? std::min(vertexWeight, maxWorkForStep - activeSchedule_.GetStepSecondMaxWork(nodeStep)) : 0.0; + affinityTableNode[nodeProc][windowSize] = nodeProcAffinity; + + for (const unsigned proc : procRange_.CompatibleProcessorsVertex(node)) { + if (proc == nodeProc) { + continue; + } + + const VertexWorkWeightT newWeight = vertexWeight + activeSchedule_.GetStepProcessorWork(nodeStep, proc); + affinityTableNode[proc][windowSize] = ComputeSameStepAffinity(maxWorkForStep, newWeight, nodeProcAffinity); + } +} + +template +void KlImprover::ProcessWorkUpdateStep( + VertexType node, + unsigned nodeStep, + unsigned nodeProc, + VertexWorkWeightT vertexWeight, + unsigned moveStep, + unsigned moveProc, + VertexWorkWeightT moveCorrectionNodeWeight, + const VertexWorkWeightT prevMoveStepMaxWork, + const VertexWorkWeightT prevMoveStepSecondMaxWork, + unsigned prevMoveStepMaxWorkProcessorCount, + bool &updateStep, + bool &updateEntireStep, + bool &fullUpdate, + std::vector> &affinityTableNode) { + const unsigned lowerBound = moveStep > windowSize ? moveStep - windowSize : 0; + if (lowerBound <= nodeStep && nodeStep <= moveStep + windowSize) { + updateStep = true; + if (nodeStep == moveStep) { + const VertexWorkWeightT newMaxWeight = activeSchedule_.GetStepMaxWork(moveStep); + const VertexWorkWeightT newSecondMaxWeight = activeSchedule_.GetStepSecondMaxWork(moveStep); + const VertexWorkWeightT newStepProcWork = activeSchedule_.GetStepProcessorWork(nodeStep, nodeProc); + + const VertexWorkWeightT prevStepProcWork = (nodeProc == moveProc) ? newStepProcWork + moveCorrectionNodeWeight + : newStepProcWork; + const bool prevIsSoleMaxProcessor = (prevMoveStepMaxWorkProcessorCount == 1) + && (prevMoveStepMaxWork == prevStepProcWork); + const CostT prevNodeProcAffinity + = prevIsSoleMaxProcessor ? std::min(vertexWeight, prevMoveStepMaxWork - prevMoveStepSecondMaxWork) : 0.0; + + const bool newIsSoleMaxProcessor = (activeSchedule_.GetStepMaxWorkProcessorCount()[nodeStep] == 1) + && (newMaxWeight == newStepProcWork); + const CostT newNodeProcAffinity = newIsSoleMaxProcessor ? std::min(vertexWeight, newMaxWeight - newSecondMaxWeight) + : 0.0; + + const CostT diff = newNodeProcAffinity - prevNodeProcAffinity; + const bool updateNodeProcAffinity = std::abs(diff) > epsilon_; + if (updateNodeProcAffinity) { + fullUpdate = true; + affinityTableNode[nodeProc][windowSize] += diff; + } + + if ((prevMoveStepMaxWork != newMaxWeight) || updateNodeProcAffinity) { + updateEntireStep = true; + + for (const unsigned proc : procRange_.CompatibleProcessorsVertex(node)) { + if ((proc == nodeProc) || (proc == moveProc)) { + continue; + } + + const VertexWorkWeightT newWeight = vertexWeight + activeSchedule_.GetStepProcessorWork(nodeStep, proc); + const CostT prevOtherAffinity = ComputeSameStepAffinity(prevMoveStepMaxWork, newWeight, prevNodeProcAffinity); + const CostT otherAffinity = ComputeSameStepAffinity(newMaxWeight, newWeight, newNodeProcAffinity); + + affinityTableNode[proc][windowSize] += (otherAffinity - prevOtherAffinity); + } + } + + if (nodeProc != moveProc && IsCompatible(node, moveProc)) { + const VertexWorkWeightT prevNewWeight + = vertexWeight + activeSchedule_.GetStepProcessorWork(nodeStep, moveProc) + moveCorrectionNodeWeight; + const CostT prevOtherAffinity = ComputeSameStepAffinity(prevMoveStepMaxWork, prevNewWeight, prevNodeProcAffinity); + const VertexWorkWeightT newWeight = vertexWeight + activeSchedule_.GetStepProcessorWork(nodeStep, moveProc); + const CostT otherAffinity = ComputeSameStepAffinity(newMaxWeight, newWeight, newNodeProcAffinity); + + affinityTableNode[moveProc][windowSize] += (otherAffinity - prevOtherAffinity); + } + + } else { + const VertexWorkWeightT newMaxWeight = activeSchedule_.GetStepMaxWork(moveStep); + const unsigned idx = RelStepIdx(nodeStep, moveStep); + if (prevMoveStepMaxWork != newMaxWeight) { + updateEntireStep = true; + + // update moving to all procs with special for moveProc + for (const unsigned proc : procRange_.CompatibleProcessorsVertex(node)) { + const VertexWorkWeightT newWeight = vertexWeight + activeSchedule_.GetStepProcessorWork(moveStep, proc); + if (proc != moveProc) { + const CostT prevAffinity = prevMoveStepMaxWork < newWeight + ? static_cast(newWeight) - static_cast(prevMoveStepMaxWork) + : 0.0; + const CostT newAffinity + = newMaxWeight < newWeight ? static_cast(newWeight) - static_cast(newMaxWeight) : 0.0; + affinityTableNode[proc][idx] += newAffinity - prevAffinity; + + } else { + const VertexWorkWeightT prevNewWeight + = vertexWeight + activeSchedule_.GetStepProcessorWork(moveStep, proc) + moveCorrectionNodeWeight; + const CostT prevAffinity = prevMoveStepMaxWork < prevNewWeight + ? static_cast(prevNewWeight) - static_cast(prevMoveStepMaxWork) + : 0.0; + + const CostT newAffinity + = newMaxWeight < newWeight ? static_cast(newWeight) - static_cast(newMaxWeight) : 0.0; + affinityTableNode[proc][idx] += newAffinity - prevAffinity; + } + } + } else { + // update only moveProc + if (IsCompatible(node, moveProc)) { + const VertexWorkWeightT newWeight = vertexWeight + activeSchedule_.GetStepProcessorWork(moveStep, moveProc); + const VertexWorkWeightT prevNewWeight = newWeight + moveCorrectionNodeWeight; + const CostT prevAffinity = prevMoveStepMaxWork < prevNewWeight + ? static_cast(prevNewWeight) - static_cast(prevMoveStepMaxWork) + : 0.0; + + const CostT newAffinity + = newMaxWeight < newWeight ? static_cast(newWeight) - static_cast(newMaxWeight) : 0.0; + affinityTableNode[moveProc][idx] += newAffinity - prevAffinity; + } + } + } + } +} + +template +bool KlImprover::SelectNodesCheckRemoveSuperstep( + unsigned &stepToRemove, ThreadSearchContext &threadData) { + if (threadData.stepSelectionEpochCounter_ >= parameters_.nodeMaxStepSelectionEpochs_ || threadData.NumSteps() < 3) { + return false; + } + + for (stepToRemove = threadData.stepSelectionCounter_; stepToRemove <= threadData.endStep_; stepToRemove++) { + assert(stepToRemove >= threadData.startStep_ && stepToRemove <= threadData.endStep_); +#ifdef KL_DEBUG + std::cout << "Checking to remove step " << stepToRemove << "/" << threadData.endStep_ << std::endl; +#endif + if (CheckRemoveSuperstep(stepToRemove)) { +#ifdef KL_DEBUG + std::cout << "Checking to scatter step " << stepToRemove << "/" << threadData.endStep_ << std::endl; +#endif + assert(stepToRemove >= threadData.startStep_ && stepToRemove <= threadData.endStep_); + if (ScatterNodesSuperstep(stepToRemove, threadData)) { + threadData.stepSelectionCounter_ = stepToRemove + 1; + + if (threadData.stepSelectionCounter_ > threadData.endStep_) { + threadData.stepSelectionCounter_ = threadData.startStep_; + threadData.stepSelectionEpochCounter_++; + } + return true; + } + } + } + + threadData.stepSelectionEpochCounter_++; + threadData.stepSelectionCounter_ = threadData.startStep_; + return false; +} + +template +bool KlImprover::CheckRemoveSuperstep(unsigned step) { + if (activeSchedule_.NumSteps() < 2) { + return false; + } + + if (activeSchedule_.GetStepMaxWork(step) < instance_->SynchronisationCosts()) { + return true; + } + + return false; +} + +template +void KlImprover::ResetInnerSearchStructures( + ThreadSearchContext &threadData) const { + threadData.unlockEdgeBacktrackCounter_ = threadData.unlockEdgeBacktrackCounterReset_; + threadData.maxInnerIterations_ = parameters_.maxInnerIterationsReset_; + threadData.maxNoVioaltionsRemovedBacktrack_ = parameters_.maxNoVioaltionsRemovedBacktrackReset_; + threadData.averageGain_ = 0.0; + threadData.affinityTable_.ResetNodeSelection(); + threadData.maxGainHeap_.Clear(); + threadData.lockManager_.Clear(); +} + +template +bool KlImprover::IsLocalSearchBlocked( + ThreadSearchContext &threadData) { + for (const auto &pair : threadData.activeScheduleData_.newViolations_) { + if (threadData.lockManager_.IsLocked(pair.first)) { + return true; + } + } + return false; +} + +template +void KlImprover::InitializeDatastructures( + BspSchedule &schedule) { + inputSchedule_ = &schedule; + instance_ = &schedule.GetInstance(); + graph_ = &instance_->GetComputationalDag(); + + activeSchedule_.Initialize(schedule); + + procRange_.Initialize(*instance_); + commCostF_.Initialize(activeSchedule_, procRange_); + const CostT initialCost = commCostF_.ComputeScheduleCost(); + activeSchedule_.SetCost(initialCost); + + for (auto &tData : threadDataVec_) { + tData.affinityTable_.Initialize(activeSchedule_, tData.selectionStrategy_.selectionThreshold_); + tData.lockManager_.Initialize(graph_->NumVertices()); + tData.rewardPenaltyStrat_.Initialize( + activeSchedule_, commCostF_.GetMaxCommWeightMultiplied(), activeSchedule_.GetMaxWorkWeight()); + tData.selectionStrategy_.Initialize(activeSchedule_, gen_, tData.startStep_, tData.endStep_); + + tData.localAffinityTable_.resize(instance_->NumberOfProcessors()); + for (unsigned i = 0; i < instance_->NumberOfProcessors(); ++i) { + tData.localAffinityTable_[i].resize(windowRange_); + } + } +} + +template +void KlImprover::UpdateAvgGain(const CostT gain, + const unsigned numIter, + double &averageGain) { + averageGain = static_cast((averageGain * numIter + gain)) / (numIter + 1.0); +} + +template +void KlImprover::InsertGainHeap(ThreadSearchContext &threadData) { + const size_t activeCount = threadData.affinityTable_.size(); + + for (size_t i = 0; i < activeCount; ++i) { + const VertexType node = threadData.affinityTable_.GetSelectedNodes()[i]; + ComputeNodeAffinities(node, threadData.affinityTable_.At(node), threadData); + const auto bestMove = ComputeBestMove(node, threadData.affinityTable_[node], threadData); + threadData.maxGainHeap_.Push(node, bestMove); + } +} + +template +void KlImprover::InsertNewNodesGainHeap( + std::vector &newNodes, NodeSelectionContainerT &nodes, ThreadSearchContext &threadData) { + for (const auto &node : newNodes) { + nodes.Insert(node); + ComputeNodeAffinities(node, threadData.affinityTable_.At(node), threadData); + const auto bestMove = ComputeBestMove(node, threadData.affinityTable_[node], threadData); + threadData.maxGainHeap_.Push(node, bestMove); + } +} + +template +void KlImprover::CleanupDatastructures() { + threadDataVec_.clear(); + activeSchedule_.Clear(); +} + +template +void KlImprover::PrintHeap(HeapDatastructure &maxGainHeap) const { + if (maxGainHeap.IsEmpty()) { + std::cout << "heap is empty" << std::endl; + return; + } + HeapDatastructure tempHeap = maxGainHeap; // requires copy constructor + + std::cout << "heap current size: " << tempHeap.size() << std::endl; + const auto &topVal = tempHeap.GetValue(tempHeap.Top()); + std::cout << "heap top node " << topVal.node_ << " gain " << topVal.gain_ << std::endl; + + unsigned count = 0; + while (!tempHeap.IsEmpty() && count++ < 15) { + const auto &val = tempHeap.GetValue(tempHeap.Top()); + std::cout << "node " << val.node_ << " gain " << val.gain_ << " to proc " << val.toProc_ << " to step " << val.toStep_ + << std::endl; + tempHeap.Pop(); + } +} + +template +void KlImprover::UpdateBestMove( + VertexType node, unsigned step, unsigned proc, NodeSelectionContainerT &affinityTable, ThreadSearchContext &threadData) { + const unsigned nodeProc = activeSchedule_.AssignedProcessor(node); + const unsigned nodeStep = activeSchedule_.AssignedSuperstep(node); + + if ((nodeProc == proc) && (nodeStep == step)) { + return; + } + + KlMove nodeMove = threadData.maxGainHeap_.GetValue(node); + CostT maxGain = nodeMove.gain_; + + unsigned maxProc = nodeMove.toProc_; + unsigned maxStep = nodeMove.toStep_; + + if ((maxStep == step) && (maxProc == proc)) { + RecomputeNodeMaxGain(node, affinityTable, threadData); + } else { + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.CanMove(node, proc, step)) { + return; + } + } + const unsigned idx = RelStepIdx(nodeStep, step); + const CostT gain = affinityTable[node][nodeProc][windowSize] - affinityTable[node][proc][idx]; + if (gain > maxGain) { + maxGain = gain; + maxProc = proc; + maxStep = step; + } + + const CostT diff = maxGain - nodeMove.gain_; + if ((std::abs(diff) > epsilon_) || (maxProc != nodeMove.toProc_) || (maxStep != nodeMove.toStep_)) { + nodeMove.gain_ = maxGain; + nodeMove.toProc_ = maxProc; + nodeMove.toStep_ = maxStep; + threadData.maxGainHeap_.Update(node, nodeMove); + } + } +} + +template +void KlImprover::UpdateBestMove( + VertexType node, unsigned step, NodeSelectionContainerT &affinityTable, ThreadSearchContext &threadData) { + const unsigned nodeProc = activeSchedule_.AssignedProcessor(node); + const unsigned nodeStep = activeSchedule_.AssignedSuperstep(node); + + KlMove nodeMove = threadData.maxGainHeap_.GetValue(node); + CostT maxGain = nodeMove.gain_; + + unsigned maxProc = nodeMove.toProc_; + unsigned maxStep = nodeMove.toStep_; + + if (maxStep == step) { + RecomputeNodeMaxGain(node, affinityTable, threadData); + } else { + if (nodeStep != step) { + const unsigned idx = RelStepIdx(nodeStep, step); + for (const unsigned p : procRange_.CompatibleProcessorsVertex(node)) { + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.CanMove(node, p, step)) { + continue; + } + } + const CostT gain = affinityTable[node][nodeProc][windowSize] - affinityTable[node][p][idx]; + if (gain > maxGain) { + maxGain = gain; + maxProc = p; + maxStep = step; + } + } + } else { + for (const unsigned proc : procRange_.CompatibleProcessorsVertex(node)) { + if (proc == nodeProc) { + continue; + } + if constexpr (ActiveScheduleT::useMemoryConstraint_) { + if (not activeSchedule_.memoryConstraint_.CanMove(node, proc, step)) { + continue; + } + } + const CostT gain = affinityTable[node][nodeProc][windowSize] - affinityTable[node][proc][windowSize]; + if (gain > maxGain) { + maxGain = gain; + maxProc = proc; + maxStep = step; + } + } + } + + const CostT diff = maxGain - nodeMove.gain_; + if ((std::abs(diff) > epsilon_) || (maxProc != nodeMove.toProc_) || (maxStep != nodeMove.toStep_)) { + nodeMove.gain_ = maxGain; + nodeMove.toProc_ = maxProc; + nodeMove.toStep_ = maxStep; + threadData.maxGainHeap_.Update(node, nodeMove); + } + } +} + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_mt.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_mt.hpp new file mode 100644 index 00000000..a1b37266 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_mt.hpp @@ -0,0 +1,159 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include + +#include "kl_improver.hpp" + +namespace osp { + +template +class KlImproverMt : public KlImprover { + protected: + unsigned maxNumThreads_ = std::numeric_limits::max(); + + void SetThreadBoundaries(const unsigned numThreads, const unsigned numSteps, bool lastThreadLargeRange) { + if (numThreads == 1) { + this->SetStartStep(0, this->threadDataVec_[0]); + this->threadDataVec_[0].endStep_ = (numSteps > 0) ? numSteps - 1 : 0; + this->threadDataVec_[0].originalEndStep_ = this->threadDataVec_[0].endStep_; + return; + } else { + const unsigned totalGapSize = (numThreads - 1) * this->parameters_.threadRangeGap_; + const unsigned bonus = this->parameters_.threadMinRange_; + const unsigned stepsToDistribute = numSteps - totalGapSize - bonus; + const unsigned baseRange = stepsToDistribute / numThreads; + const unsigned remainder = stepsToDistribute % numThreads; + const unsigned largeRangeThreadIdx = lastThreadLargeRange ? numThreads - 1 : 0; + + unsigned currentStartStep = 0; + for (unsigned i = 0; i < numThreads; ++i) { + this->threadFinishedVec_[i] = false; + this->SetStartStep(currentStartStep, this->threadDataVec_[i]); + unsigned currentRange = baseRange + (i < remainder ? 1 : 0); + if (i == largeRangeThreadIdx) { + currentRange += bonus; + } + + const unsigned endStep = currentStartStep + currentRange - 1; + this->threadDataVec_[i].endStep_ = endStep; + this->threadDataVec_[i].originalEndStep_ = this->threadDataVec_[i].endStep_; + currentStartStep = endStep + 1 + this->parameters_.threadRangeGap_; +#ifdef KL_DEBUG_1 + std::cout << "thread " << i << ": start_step=" << this->threadDataVec_[i].startStep_ + << ", end_step=" << this->threadDataVec_[i].endStep_ << std::endl; +#endif + } + } + } + + void SetNumThreads(unsigned &numThreads, const unsigned numSteps) { + unsigned maxAllowedThreads = 0; + if (numSteps >= this->parameters_.threadMinRange_ + this->parameters_.threadRangeGap_) { + const unsigned divisor = this->parameters_.threadMinRange_ + this->parameters_.threadRangeGap_; + if (divisor > 0) { + // This calculation is based on the constraint that one thread's range is + // 'min_range' larger than the others, and all ranges are at least 'min_range'. + maxAllowedThreads = (numSteps + this->parameters_.threadRangeGap_ - this->parameters_.threadMinRange_) / divisor; + } else { + maxAllowedThreads = numSteps; + } + } else if (numSteps >= this->parameters_.threadMinRange_) { + maxAllowedThreads = 1; + } + + if (numThreads > maxAllowedThreads) { + numThreads = maxAllowedThreads; + } + + if (numThreads == 0) { + numThreads = 1; + } +#ifdef KL_DEBUG_1 + std::cout << "num threads: " << numThreads << " number of supersteps: " << numSteps + << ", max allowed threads: " << maxAllowedThreads << std::endl; +#endif + } + + public: + KlImproverMt() : KlImprover() {} + + explicit KlImproverMt(unsigned seed) : KlImprover(seed) {} + + virtual ~KlImproverMt() = default; + + void SetMaxNumThreads(const unsigned numThreads) { maxNumThreads_ = numThreads; } + + virtual ReturnStatus ImproveSchedule(BspSchedule &schedule) override { + if (schedule.GetInstance().NumberOfProcessors() < 2) { + return ReturnStatus::BEST_FOUND; + } + + unsigned numThreads = std::min(maxNumThreads_, static_cast(omp_get_max_threads())); + SetNumThreads(numThreads, schedule.NumberOfSupersteps()); + + this->threadDataVec_.resize(numThreads); + this->threadFinishedVec_.assign(numThreads, true); + + if (numThreads == 1) { + this->parameters_.numParallelLoops_ + = 1; // no parallelization with one thread. Affects parameters.max_out_iteration calculation in set_parameters() + } + + this->SetParameters(schedule.GetInstance().NumberOfVertices()); + this->InitializeDatastructures(schedule); + const CostT initialCost = this->activeSchedule_.GetCost(); + + for (size_t i = 0; i < this->parameters_.numParallelLoops_; ++i) { + SetThreadBoundaries(numThreads, schedule.NumberOfSupersteps(), i % 2 == 0); + +#pragma omp parallel num_threads(numThreads) + { + const size_t threadId = static_cast(omp_get_thread_num()); + auto &threadData = this->threadDataVec_[threadId]; + threadData.activeScheduleData_.InitializeCost(this->activeSchedule_.GetCost()); + threadData.selectionStrategy_.Setup(threadData.startStep_, threadData.endStep_); + this->RunLocalSearch(threadData); + } + + this->SynchronizeActiveSchedule(numThreads); + if (numThreads > 1) { + this->activeSchedule_.SetCost(this->commCostF_.ComputeScheduleCost()); + SetNumThreads(numThreads, schedule.NumberOfSupersteps()); + this->threadFinishedVec_.resize(numThreads); + } + } + + if (initialCost > this->activeSchedule_.GetCost()) { + this->activeSchedule_.WriteSchedule(schedule); + this->CleanupDatastructures(); + return ReturnStatus::OSP_SUCCESS; + } else { + this->CleanupDatastructures(); + return ReturnStatus::BEST_FOUND; + } + } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_test.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_test.hpp new file mode 100644 index 00000000..039c902e --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_test.hpp @@ -0,0 +1,165 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include "kl_improver.hpp" + +namespace osp { + +template +class KlImproverTest : public KlImprover { + using VertexType = VertexIdxT; + using KlMove = KlMoveStruct; + using HeapDatastructure = MaxPairingHeap; + using ActiveSchedule = KlActiveSchedule; + using KlGainUpdateInfo = KlUpdateInfo; + using NodeSelectionContainer = AdaptiveAffinityTable; + + public: + KlImproverTest() : KlImprover() { + this->threadDataVec_.resize(1); + this->threadFinishedVec_.assign(1, true); + } + + virtual ~KlImproverTest() = default; + + ActiveSchedule &GetActiveSchedule() { return this->activeSchedule_; } + + auto &GetAffinityTable() { return this->threadDataVec_[0].affinityTable_; } + + auto &GetCommCostF() { return this->commCostF_; } + + void SetupSchedule(BspSchedule &schedule) { + this->threadDataVec_.resize(1); + this->SetParameters(schedule.GetInstance().GetComputationalDag().NumVertices()); + this->threadDataVec_[0].endStep_ = schedule.NumberOfSupersteps() > 0 ? schedule.NumberOfSupersteps() - 1 : 0; + this->InitializeDatastructures(schedule); + this->threadDataVec_[0].activeScheduleData_.InitializeCost(this->activeSchedule_.GetCost()); + } + + void ApplyMoveTest(KlMove move) { this->ApplyMove(move, this->threadDataVec_[0]); } + + auto &GetMaxGainHeap() { return this->threadDataVec_[0].maxGainHeap_; } + + auto GetCurrentCost() { return this->threadDataVec_[0].activeScheduleData_.cost_; } + + bool IsFeasible() { return this->threadDataVec_[0].activeScheduleData_.feasible_; } + + void ComputeViolationsTest() { this->activeSchedule_.ComputeViolations(this->threadDataVec_[0].activeScheduleData_); } + + NodeSelectionContainer &InsertGainHeapTest(const std::vector &n) { + this->threadDataVec_[0].rewardPenaltyStrat_.penalty_ = 0.0; + this->threadDataVec_[0].rewardPenaltyStrat_.reward_ = 0.0; + + this->threadDataVec_[0].affinityTable_.Initialize(this->activeSchedule_, n.size()); + for (const auto &node : n) { + this->threadDataVec_[0].affinityTable_.Insert(node); + } + + this->InsertGainHeap(this->threadDataVec_[0]); + + return this->threadDataVec_[0].affinityTable_; + } + + NodeSelectionContainer &InsertGainHeapTestPenalty(const std::vector &n) { + this->threadDataVec_[0].affinityTable_.Initialize(this->activeSchedule_, n.size()); + for (const auto &node : n) { + this->threadDataVec_[0].affinityTable_.Insert(node); + } + this->threadDataVec_[0].rewardPenaltyStrat_.penalty_ = 5.5; + this->threadDataVec_[0].rewardPenaltyStrat_.reward_ = 0.0; + + this->InsertGainHeap(this->threadDataVec_[0]); + + return this->threadDataVec_[0].affinityTable_; + } + + NodeSelectionContainer &InsertGainHeapTestPenaltyReward(const std::vector &n) { + this->threadDataVec_[0].affinityTable_.Initialize(this->activeSchedule_, n.size()); + for (const auto &node : n) { + this->threadDataVec_[0].affinityTable_.Insert(node); + } + + this->threadDataVec_[0].rewardPenaltyStrat_.InitRewardPenalty(); + this->threadDataVec_[0].rewardPenaltyStrat_.reward_ = 15.0; + + this->InsertGainHeap(this->threadDataVec_[0]); + + return this->threadDataVec_[0].affinityTable_; + } + + void UpdateAffinityTableTest(KlMove bestMove, NodeSelectionContainer &nodeSelection) { + std::map recomputeMaxGain; + std::vector newNodes; + + const auto prevWorkData = this->activeSchedule_.GetPreMoveWorkData(bestMove); + const auto prevCommData = this->commCostF_.GetPreMoveCommData(bestMove); + this->ApplyMove(bestMove, this->threadDataVec_[0]); + + this->threadDataVec_[0].affinityTable_.Trim(); + this->UpdateAffinities(bestMove, this->threadDataVec_[0], recomputeMaxGain, newNodes, prevWorkData, prevCommData); + } + + auto RunInnerIterationTest() { + std::map recomputeMaxGain; + std::vector newNodes; + + this->PrintHeap(this->threadDataVec_[0].maxGainHeap_); + + KlMove bestMove = this->GetBestMove( + this->threadDataVec_[0].affinityTable_, + this->threadDataVec_[0].lockManager_, + this->threadDataVec_[0].maxGainHeap_); // locks best_move.node and removes it from node_selection + +#ifdef KL_DEBUG + std::cout << "Best move: " << bestMove.node << " gain: " << bestMove.gain << ", from: " << bestMove.from_step << "|" + << bestMove.from_proc << " to: " << bestMove.to_step << "|" << bestMove.toProc << std::endl; +#endif + + const auto prevWorkData = this->activeSchedule_.GetPreMoveWorkData(bestMove); + const auto prevCommData = this->commCostF_.GetPreMoveCommData(bestMove); + this->ApplyMove(bestMove, this->threadDataVec_[0]); + + this->threadDataVec_[0].affinityTable_.Trim(); + this->UpdateAffinities(bestMove, this->threadDataVec_[0], recomputeMaxGain, newNodes, prevWorkData, prevCommData); + +#ifdef KL_DEBUG + std::cout << "New nodes: { "; + for (const auto v : newNodes) { + std::cout << v << " "; + } + std::cout << "}" << std::endl; +#endif + + this->UpdateMaxGain(bestMove, recomputeMaxGain, this->threadDataVec_[0]); + this->InsertNewNodesGainHeap(newNodes, this->threadDataVec_[0].affinityTable_, this->threadDataVec_[0]); + + return recomputeMaxGain; + } + + bool IsNodeLocked(VertexType node) const { return this->threadDataVec_[0].lockManager_.IsLocked(node); } + + void GetActiveScheduleTest(BspSchedule &schedule) { this->activeSchedule_.WriteSchedule(schedule); } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp new file mode 100644 index 00000000..fc10fc08 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp @@ -0,0 +1,81 @@ + +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +// #define KL_DEBUG +// #define KL_DEBUG_1 +// #define KL_DEBUG_COST_CHECK + +#include "comm_cost_modules/kl_bsp_comm_cost.hpp" +#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp" +#include "comm_cost_modules/kl_total_comm_cost.hpp" +#include "kl_improver.hpp" +#include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" + +namespace osp { + +using DoubleCostT = double; + +template +using KlTotalCommImprover + = KlImprover, + MemoryConstraintT, + windowSize, + DoubleCostT>; + +template , + unsigned windowSize = 1, + bool useNodeCommunicationCostsArg = true> +using KlTotalCommImproverLocalMemConstr + = KlImprover, + MemoryConstraintT, + windowSize, + DoubleCostT>; + +template +using KlTotalLambdaCommImprover = KlImprover, + MemoryConstraintT, + windowSize, + DoubleCostT>; + +template , unsigned windowSize = 1> +using KlTotalLambdaCommImproverLocalMemConstr + = KlImprover, + MemoryConstraintT, + windowSize, + DoubleCostT>; + +template +using KlBspCommImprover + = KlImprover, MemoryConstraintT, windowSize, DoubleCostT>; + +template , unsigned windowSize = 1> +using KlBspCommImproverLocalMemConstr + = KlImprover, MemoryConstraintT, windowSize, DoubleCostT>; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp new file mode 100644 index 00000000..c24659e1 --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp @@ -0,0 +1,49 @@ + +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include "comm_cost_modules/kl_bsp_comm_cost.hpp" +#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp" +#include "comm_cost_modules/kl_total_comm_cost.hpp" +#include "kl_improver_mt.hpp" +#include "kl_include.hpp" + +namespace osp { + +template +using KlTotalCommImproverMt + = KlImproverMt, + MemoryConstraintT, + windowSize, + double>; + +template +using KlTotalLambdaCommImproverMt + = KlImproverMt, MemoryConstraintT, windowSize, double>; + +template +using KlBspCommImproverMt + = KlImproverMt, MemoryConstraintT, windowSize, double>; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp deleted file mode 100644 index 0e3f5d65..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp +++ /dev/null @@ -1,173 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include -#include - -#include "kl_base.hpp" - -namespace osp { - -template -class kl_current_schedule_total : public kl_current_schedule { - public: - kl_current_schedule_total(Ikl_cost_function *cost_f_) : kl_current_schedule(cost_f_) {} - - double comm_multiplier = 1.0; - constexpr static bool use_node_communication_costs = use_node_communication_costs_arg || not has_edge_weights_v; -}; - -template -class kl_total : public kl_base { - protected: - kl_current_schedule_total current_schedule; - - v_commw_t node_comm_selection_threshold = 0; - double max_edge_weight = 0.0; - - virtual void initialize_datastructures() override { -#ifdef KL_DEBUG - std::cout << "KLTotal initialize datastructures" << std::endl; -#endif - - kl_base::initialize_datastructures(); - - v_commw_t max_edge_weight_ = 0; - v_workw_t max_node_weight_ = 0; - - for (const auto vertex : current_schedule.instance->getComputationalDag().vertices()) { - if (is_sink(vertex, current_schedule.instance->getComputationalDag())) { - continue; - } - - max_edge_weight_ - = std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().vertex_comm_weight(vertex)); - - max_node_weight_ - = std::max(max_node_weight_, current_schedule.instance->getComputationalDag().vertex_work_weight(vertex)); - } - - if constexpr (not current_schedule.use_node_communication_costs) { - max_edge_weight_ = 0; - - for (const auto &edge : edges(current_schedule.instance->getComputationalDag())) { - max_edge_weight_ - = std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().edge_comm_weight(edge)); - } - } - - max_edge_weight = max_edge_weight_ + max_node_weight_; - - kl_base::parameters.initial_penalty - = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); - - kl_base::parameters.gain_threshold - = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); - } - - virtual void update_reward_penalty() override { - if (current_schedule.current_violations.size() <= kl_base::parameters.violations_threshold) { - kl_base::penalty = kl_base::parameters.initial_penalty; - kl_base::reward = 0.0; - - } else { - kl_base::parameters.violations_threshold = 0; - - kl_base::penalty = std::log((current_schedule.current_violations.size())) - * max_edge_weight * current_schedule.comm_multiplier - * current_schedule.instance->communicationCosts(); - - kl_base::reward = std::sqrt((current_schedule.current_violations.size() + 4)) - * max_edge_weight * current_schedule.comm_multiplier - * current_schedule.instance->communicationCosts(); - } - } - - virtual void set_initial_reward_penalty() override { - kl_base::penalty = kl_base::parameters.initial_penalty; - kl_base::reward - = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); - } - - virtual void select_nodes_comm() override { - if constexpr (current_schedule.use_node_communication_costs) { - for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) { - for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) { - if (current_schedule.vector_schedule.assignedProcessor(node) - != current_schedule.vector_schedule.assignedProcessor(source)) { - if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node) - > node_comm_selection_threshold) { - kl_base::node_selection.insert(node); - break; - } - } - } - - for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - if (current_schedule.vector_schedule.assignedProcessor(node) - != current_schedule.vector_schedule.assignedProcessor(target)) { - if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node) - > node_comm_selection_threshold) { - kl_base::node_selection.insert(node); - break; - } - } - } - } - - } else { - for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) { - for (const auto &in_edge : in_edges(node, current_schedule.instance->getComputationalDag())) { - const auto &source_v = source(in_edge, current_schedule.instance->getComputationalDag()); - if (current_schedule.vector_schedule.assignedProcessor(node) - != current_schedule.vector_schedule.assignedProcessor(source_v)) { - if (current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge) - > node_comm_selection_threshold) { - kl_base::node_selection.insert(node); - break; - } - } - } - - for (const auto &out_edge : out_edges(node, current_schedule.instance->getComputationalDag())) { - const auto &target_v = target(out_edge, current_schedule.instance->getComputationalDag()); - if (current_schedule.vector_schedule.assignedProcessor(node) - != current_schedule.vector_schedule.assignedProcessor(target_v)) { - if (current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge) - > node_comm_selection_threshold) { - kl_base::node_selection.insert(node); - break; - } - } - } - } - } - } - - public: - kl_total() : kl_base(current_schedule), current_schedule(this) {} - - virtual ~kl_total() = default; -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp deleted file mode 100644 index b5f16bf0..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp +++ /dev/null @@ -1,1016 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include -#include - -#include "kl_total.hpp" - -namespace osp { - -template -class kl_total_comm : public kl_total { - protected: - virtual void compute_comm_gain(vertex_idx_t node, - unsigned current_step, - unsigned current_proc, - unsigned new_proc) override { - if constexpr (kl_total::current_schedule - .use_node_communication_costs) { - if (current_proc == new_proc) { - for (const auto &target : - kl_total::current_schedule.instance - ->getComputationalDag() - .children(node)) { - if ((current_step + 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(target)) - || (current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(target))) { - kl_total::node_gains[node][current_proc][2] - -= kl_total::penalty; - - } else if ((current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(target)) - || (current_step - 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(target))) { - kl_total::node_gains[node][current_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - } - } - - for (const auto &source : - kl_total::current_schedule.instance - ->getComputationalDag() - .parents(node)) { - if ((current_step - 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(source)) - || (current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(source))) { - kl_total::node_gains[node][current_proc][0] - -= kl_total::penalty; - - } else if ((current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(source)) - || (current_step + 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(source))) { - kl_total::node_gains[node][current_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - } - } - } else { - // current_proc != new_proc - - for (const auto &target : - kl_total::current_schedule.instance - ->getComputationalDag() - .children(node)) { - const unsigned &target_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(target); - if (target_proc == current_proc) { - const double loss - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - * kl_total::current_schedule - .instance->communicationCosts(new_proc, target_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][1] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][1] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step) { - kl_total::node_gains[node][new_proc][1] - -= kl_total::penalty; - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - } - - } else if (target_proc == new_proc) { - const double gain - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, target_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - < current_step) { - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - } - - } else { - assert(target_proc != current_proc && target_proc != new_proc); - - const double gain - = static_cast( - kl_total::current_schedule - .instance->communicationCosts(new_proc, target_proc) - - kl_total::current_schedule - .instance->communicationCosts(current_proc, target_proc)) - * kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step) { - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - } - } - } - - for (const auto &source : - kl_total::current_schedule.instance - ->getComputationalDag() - .parents(node)) { - const unsigned &source_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(source); - if (source_proc == current_proc) { - const double loss - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, new_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][1] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][1] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - kl_total::node_gains[node][new_proc][1] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - } - - } else if (source_proc == new_proc) { - assert(source_proc != current_proc); - const double gain - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, new_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - } - - } else { - assert(source_proc != current_proc && source_proc != new_proc); - const double gain - = static_cast( - kl_total::current_schedule - .instance->communicationCosts(new_proc, source_proc) - - kl_total::current_schedule - .instance->communicationCosts(current_proc, source_proc)) - * kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step) { - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - } - } - } - } - } else { - if (current_proc == new_proc) { - for (const auto &out_edge : - out_edges(node, - kl_total::current_schedule.instance - ->getComputationalDag())) { - const auto &target_v - = target(out_edge, - kl_total::current_schedule - .instance->getComputationalDag()); - // for (const auto &target : - // kl_total::current_schedule.instance->getComputationalDag().children(node)) { - - if ((current_step + 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v)) - || (current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v))) { - kl_total::node_gains[node][current_proc][2] - -= kl_total::penalty; - - } else if ((current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v)) - || (current_step - 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v))) { - kl_total::node_gains[node][current_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - } - } - - for (const auto &in_edge : - in_edges(node, - kl_total::current_schedule.instance - ->getComputationalDag())) { - const auto &source_v - = source(in_edge, - kl_total::current_schedule - .instance->getComputationalDag()); - // for (const auto &source : - // kl_total::current_schedule.instance->getComputationalDag().parents(node)) { - - if ((current_step - 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v)) - || (current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v))) { - kl_total::node_gains[node][current_proc][0] - -= kl_total::penalty; - - } else if ((current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v)) - || (current_step + 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v))) { - kl_total::node_gains[node][current_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - } - } - } else { - // current_proc != new_proc - - for (const auto &out_edge : - out_edges(node, - kl_total::current_schedule.instance - ->getComputationalDag())) { - const auto &target_v - = target(out_edge, - kl_total::current_schedule - .instance->getComputationalDag()); - const unsigned &target_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v); - - if (target_proc == current_proc) { - const double loss - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - * kl_total::current_schedule - .instance->communicationCosts(new_proc, target_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][1] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][1] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step) { - kl_total::node_gains[node][new_proc][1] - -= kl_total::penalty; - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - } - - } else if (target_proc == new_proc) { - const double gain - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, target_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - } - - } else { - assert(target_proc != current_proc && target_proc != new_proc); - - const double gain - = static_cast( - kl_total::current_schedule - .instance->communicationCosts(new_proc, target_proc) - - kl_total::current_schedule - .instance->communicationCosts(current_proc, target_proc)) - * kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step) { - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - } - } - } - - for (const auto &in_edge : - in_edges(node, - kl_total::current_schedule.instance - ->getComputationalDag())) { - const auto &source_v - = source(in_edge, - kl_total::current_schedule - .instance->getComputationalDag()); - - const unsigned &source_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v); - if (source_proc == current_proc) { - const double loss - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, new_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][1] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][1] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - kl_total::node_gains[node][new_proc][1] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - } - } else if (source_proc == new_proc) { - assert(source_proc != current_proc); - const double gain - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, new_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - } - - } else { - assert(source_proc != current_proc && source_proc != new_proc); - const double gain - = static_cast( - kl_total::current_schedule - .instance->communicationCosts(new_proc, source_proc) - - kl_total::current_schedule - .instance->communicationCosts(current_proc, source_proc)) - * kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step) { - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - } - } - } - } - } - } - - virtual double compute_current_costs() override { - double work_costs = 0; - for (unsigned step = 0; - step < kl_total::current_schedule.num_steps(); - step++) { - work_costs - += kl_total::current_schedule.step_max_work[step]; - } - - double comm_costs = 0; - for (const auto &edge : edges(kl_total::current_schedule - .instance->getComputationalDag())) { - const auto &source_v = source(edge, - kl_total::current_schedule - .instance->getComputationalDag()); - const unsigned &source_proc = kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v); - const unsigned &target_proc - = kl_total::current_schedule.vector_schedule - .assignedProcessor( - target(edge, - kl_total::current_schedule - .instance->getComputationalDag())); - - if (source_proc != target_proc) { - if constexpr (kl_total::current_schedule - .use_node_communication_costs) { - comm_costs - += kl_total::current_schedule.instance - ->getComputationalDag() - .vertex_comm_weight(source_v) - * kl_total::current_schedule.instance - ->communicationCosts(source_proc, target_proc); - } else { - comm_costs - += kl_total::current_schedule.instance - ->getComputationalDag() - .edge_comm_weight(edge) - * kl_total::current_schedule.instance - ->communicationCosts(source_proc, target_proc); - } - } - } - - kl_total::current_schedule.current_cost - = work_costs - + comm_costs - * kl_total::current_schedule.comm_multiplier - + (static_cast( - kl_total::current_schedule.num_steps()) - - 1) - * kl_total::current_schedule.instance - ->synchronisationCosts(); - - return kl_total::current_schedule.current_cost; - } - - public: - kl_total_comm() : kl_total() {} - - virtual ~kl_total_comm() = default; - - virtual std::string getScheduleName() const override { return "KLTotalComm"; } -}; - -template -class kl_total_comm_test : public kl_total_comm { - public: - kl_total_comm_test() : kl_total_comm() {} - - virtual ~kl_total_comm_test() = default; - - virtual std::string getScheduleName() const override { return "KLBaseTest"; } - - kl_current_schedule_total &get_current_schedule() { - return kl_total::current_schedule; - } - - auto &get_node_gains() { return kl_total::node_gains; } - - auto &get_node_change_in_costs() { - return kl_total::node_change_in_costs; - } - - auto &get_max_gain_heap() { return kl_total::max_gain_heap; } - - void initialize_gain_heap_test(const std::unordered_set> &nodes, - double reward_ = 0.0, - double penalty_ = 0.0) { - kl_total::reward = reward_; - kl_total::penalty = penalty_; - - kl_total::initialize_gain_heap(nodes); - } - - void test_setup_schedule(BspSchedule &schedule) { - kl_total::current_schedule.instance - = &schedule.getInstance(); - - kl_total::best_schedule = &schedule; - - kl_total::num_nodes - = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs - = kl_total::current_schedule.instance - ->numberOfProcessors(); - - kl_total::set_parameters(); - kl_total::initialize_datastructures(); - } - - RETURN_STATUS improve_schedule_test_1(BspSchedule &schedule) { - kl_total::current_schedule.instance - = &schedule.getInstance(); - - kl_total::best_schedule = &schedule; - kl_total::num_nodes - = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs - = kl_total::current_schedule.instance - ->numberOfProcessors(); - - kl_total::set_parameters(); - kl_total::initialize_datastructures(); - - bool improvement_found = kl_total::run_local_search_simple(); - - if (improvement_found) { - return RETURN_STATUS::OSP_SUCCESS; - } else { - return RETURN_STATUS::BEST_FOUND; - } - } - - RETURN_STATUS improve_schedule_test_2(BspSchedule &schedule) { - kl_total::current_schedule.instance = &schedule.getInstance(); - - kl_total::best_schedule = &schedule; - kl_total::num_nodes - = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs - = kl_total::current_schedule.instance->numberOfProcessors(); - - kl_total::set_parameters(); - kl_total::initialize_datastructures(); - - bool improvement_found = kl_total::run_local_search_unlock_delay(); - - if (improvement_found) { - return RETURN_STATUS::OSP_SUCCESS; - } else { - return RETURN_STATUS::BEST_FOUND; - } - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp deleted file mode 100644 index d10c6109..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp +++ /dev/null @@ -1,1245 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include -#include - -#include "kl_total.hpp" - -namespace osp { - -template -class kl_total_cut : public kl_total { - protected: - double max_edge_weight = 0.0; - - virtual void compute_comm_gain(vertex_idx_t node, - unsigned current_step, - unsigned current_proc, - unsigned new_proc) override { - if constexpr (kl_total::current_schedule - .use_node_communication_costs) { - if (current_proc == new_proc) { - for (const auto &target : - kl_total::current_schedule.instance - ->getComputationalDag() - .children(node)) { - const unsigned &target_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(target); - const double loss - = static_cast( - kl_total::current_schedule.instance - ->getComputationalDag() - .vertex_comm_weight(node)) - * kl_total::current_schedule.instance - ->communicationCosts(new_proc, target_proc) - * kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule - .assignedSuperstep(target) - == current_step) { - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= loss; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][0] - -= loss; - } - - if ((current_step + 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(target)) - || (current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(target))) { - kl_total::node_gains[node][current_proc][2] - -= kl_total::penalty; - - } else if ((current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(target)) - || (current_step - 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(target))) { - kl_total::node_gains[node][current_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - } - } - - for (const auto &source : - kl_total::current_schedule.instance - ->getComputationalDag() - .parents(node)) { - const unsigned &source_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(source); - const double loss - = static_cast( - kl_total::current_schedule.instance - ->getComputationalDag() - .vertex_comm_weight(source)) - * kl_total::current_schedule.instance - ->communicationCosts(new_proc, source_proc) - * kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule - .assignedSuperstep(source) - == current_step) { - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= loss; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][0] - -= loss; - } - - if ((current_step - 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(source)) - || (current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(source))) { - kl_total::node_gains[node][current_proc][0] - -= kl_total::penalty; - - } else if ((current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(source)) - || (current_step + 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(source))) { - kl_total::node_gains[node][current_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - } - } - } else { - // current_proc != new_proc - - for (const auto &target : - kl_total::current_schedule.instance - ->getComputationalDag() - .children(node)) { - const unsigned &target_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(target); - if (target_proc == current_proc) { - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step) { - const double loss - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - * kl_total::current_schedule - .instance->communicationCosts(new_proc, target_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][1] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][1] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - } - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step) { - kl_total::node_gains[node][new_proc][1] - -= kl_total::penalty; - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - } - - } else if (target_proc == new_proc) { - const double gain - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, target_proc) - * kl_total::current_schedule - .comm_multiplier; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - } - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - < current_step) { - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - } - - } else { - assert(target_proc != current_proc && target_proc != new_proc); - - const double gain - = static_cast( - kl_total::current_schedule - .instance->communicationCosts(new_proc, target_proc) - - kl_total::current_schedule - .instance->communicationCosts(current_proc, target_proc)) - * kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target) - == current_step) { - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(node)) - + kl_total::reward; - } - } - } - - for (const auto &source : - kl_total::current_schedule.instance - ->getComputationalDag() - .parents(node)) { - const unsigned &source_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(source); - if (source_proc == current_proc) { - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step) { - const double loss - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, new_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][1] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][1] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - } - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - kl_total::node_gains[node][new_proc][1] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - } - - } else if (source_proc == new_proc) { - assert(source_proc != current_proc); - const double gain - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, new_proc) - * kl_total::current_schedule - .comm_multiplier; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - } - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - } - - } else { - assert(source_proc != current_proc && source_proc != new_proc); - const double gain - = static_cast( - kl_total::current_schedule - .instance->communicationCosts(new_proc, source_proc) - - kl_total::current_schedule - .instance->communicationCosts(current_proc, source_proc)) - * kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source) - == current_step) { - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .vertex_comm_weight(source)) - + kl_total::reward; - } - } - } - } - } else { - if (current_proc == new_proc) { - for (const auto &out_edge : - out_edges(node, - kl_total::current_schedule.instance - ->getComputationalDag())) { - const auto &target_v - = target(out_edge, - kl_total::current_schedule - .instance->getComputationalDag()); - const unsigned &target_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v); - - const double loss - = static_cast( - kl_total::current_schedule.instance - ->getComputationalDag() - .edge_comm_weight(out_edge)) - * kl_total::current_schedule.instance - ->communicationCosts(new_proc, target_proc) - * kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule - .assignedSuperstep(target_v) - == current_step) { - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= loss; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][0] - -= loss; - } - - if ((current_step + 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v)) - || (current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v))) { - kl_total::node_gains[node][current_proc][2] - -= kl_total::penalty; - - } else if ((current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v)) - || (current_step - 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v))) { - kl_total::node_gains[node][current_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - } - } - - for (const auto &in_edge : - in_edges(node, - kl_total::current_schedule.instance - ->getComputationalDag())) { - const auto &source_v - = source(in_edge, - kl_total::current_schedule - .instance->getComputationalDag()); - const unsigned &source_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v); - - const double loss - = static_cast( - kl_total::current_schedule.instance - ->getComputationalDag() - .edge_comm_weight(in_edge)) - * kl_total::current_schedule.instance - ->communicationCosts(new_proc, source_proc) - * kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule - .assignedSuperstep(source_v) - == current_step) { - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= loss; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][0] - -= loss; - } - - if ((current_step - 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v)) - || (current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v))) { - kl_total::node_gains[node][current_proc][0] - -= kl_total::penalty; - - } else if ((current_step - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - && current_proc - != kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v)) - || (current_step + 1 - == kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - && current_proc - == kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v))) { - kl_total::node_gains[node][current_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - } - } - } else { - // current_proc != new_proc - - for (const auto &out_edge : - out_edges(node, - kl_total::current_schedule.instance - ->getComputationalDag())) { - const auto &target_v - = target(out_edge, - kl_total::current_schedule - .instance->getComputationalDag()); - const unsigned &target_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v); - - if (target_proc == current_proc) { - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step) { - const double loss - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - * kl_total::current_schedule - .instance->communicationCosts(new_proc, target_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][1] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][1] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - } - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step) { - kl_total::node_gains[node][new_proc][1] - -= kl_total::penalty; - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - } - - } else if (target_proc == new_proc) { - const double gain - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, target_proc) - * kl_total::current_schedule - .comm_multiplier; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - } - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - } - - } else { - assert(target_proc != current_proc && target_proc != new_proc); - - const double gain - = static_cast( - kl_total::current_schedule - .instance->communicationCosts(new_proc, target_proc) - - kl_total::current_schedule - .instance->communicationCosts(current_proc, target_proc)) - * kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - -= kl_total::penalty; - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v) - == current_step) { - kl_total::node_gains[node][new_proc][0] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(out_edge)) - + kl_total::reward; - } - } - } - - for (const auto &in_edge : - in_edges(node, - kl_total::current_schedule.instance - ->getComputationalDag())) { - const auto &source_v - = source(in_edge, - kl_total::current_schedule - .instance->getComputationalDag()); - - const unsigned &source_proc - = kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v); - if (source_proc == current_proc) { - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step) { - const double loss - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, new_proc) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - -= loss; - kl_total::node_gains[node][new_proc][1] - -= loss; - kl_total::node_gains[node][new_proc][2] - -= loss; - - kl_total::node_change_in_costs[node] - [new_proc][0] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][1] - += loss; - kl_total::node_change_in_costs[node] - [new_proc][2] - += loss; - } - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - kl_total::node_gains[node][new_proc][1] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - } - } else if (source_proc == new_proc) { - assert(source_proc != current_proc); - const double gain - = static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - * kl_total::current_schedule - .instance->communicationCosts(current_proc, new_proc) - * kl_total::current_schedule - .comm_multiplier; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - } - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step) { - kl_total::node_gains[node][new_proc][1] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step + 1) { - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - } - - } else { - assert(source_proc != current_proc && source_proc != new_proc); - const double gain - = static_cast( - kl_total::current_schedule - .instance->communicationCosts(new_proc, source_proc) - - kl_total::current_schedule - .instance->communicationCosts(current_proc, source_proc)) - * kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge) - * kl_total::current_schedule - .comm_multiplier; - - kl_total::node_gains[node][new_proc][0] - += gain; - kl_total::node_gains[node][new_proc][1] - += gain; - kl_total::node_gains[node][new_proc][2] - += gain; - - kl_total::node_change_in_costs[node] - [new_proc][0] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][1] - -= gain; - kl_total::node_change_in_costs[node] - [new_proc][2] - -= gain; - - if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step - 1) { - kl_total::node_gains[node][new_proc][0] - -= kl_total::penalty; - - } else if (kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v) - == current_step) { - kl_total::node_gains[node][new_proc][2] - += static_cast( - kl_total::current_schedule - .instance->getComputationalDag() - .edge_comm_weight(in_edge)) - + kl_total::reward; - } - } - } - } - } - } - - virtual double compute_current_costs() override { - double work_costs = 0; - for (unsigned step = 0; - step < kl_total::current_schedule.num_steps(); - step++) { - work_costs - += kl_total::current_schedule.step_max_work[step]; - } - - double comm_costs = 0; - for (const auto &edge : edges(kl_total::current_schedule - .instance->getComputationalDag())) { - const vertex_idx_t &source_v - = source(edge, - kl_total::current_schedule.instance - ->getComputationalDag()); - const vertex_idx_t &target_v - = target(edge, - kl_total::current_schedule.instance - ->getComputationalDag()); - const unsigned &source_proc = kl_total::current_schedule - .vector_schedule.assignedProcessor(source_v); - const unsigned &target_proc = kl_total::current_schedule - .vector_schedule.assignedProcessor(target_v); - const unsigned &source_step = kl_total::current_schedule - .vector_schedule.assignedSuperstep(source_v); - const unsigned &target_step = kl_total::current_schedule - .vector_schedule.assignedSuperstep(target_v); - - if (source_proc != target_proc || source_step != target_step) { - if constexpr (kl_total::current_schedule - .use_node_communication_costs) { - comm_costs - += kl_total::current_schedule.instance - ->getComputationalDag() - .vertex_comm_weight(source_v) - * kl_total::current_schedule.instance - ->communicationCosts(source_proc, target_proc); - } else { - comm_costs - += kl_total::current_schedule.instance - ->getComputationalDag() - .edge_comm_weight(edge) - * kl_total::current_schedule.instance - ->communicationCosts(source_proc, target_proc); - } - } - } - - kl_total::current_schedule.current_cost - = work_costs - + comm_costs - * kl_total::current_schedule.comm_multiplier - + (kl_total::current_schedule.num_steps() - 1) - * static_cast(kl_total::current_schedule - .instance->synchronisationCosts()); - - return kl_total::current_schedule.current_cost; - } - - public: - kl_total_cut() : kl_total() {} - - virtual ~kl_total_cut() = default; - - virtual std::string getScheduleName() const override { return "KLTotalCut"; } -}; - -template -class kl_total_cut_test : public kl_total_cut { - public: - kl_total_cut_test() : kl_total_cut() {} - - virtual ~kl_total_cut_test() = default; - - virtual std::string getScheduleName() const override { return "KLTotalCutTest"; } - - kl_current_schedule_total &get_current_schedule() { - return kl_total::current_schedule; - } - - auto &get_node_gains() { return kl_total::node_gains; } - - auto &get_node_change_in_costs() { return kl_total::node_change_in_costs; } - - auto &get_max_gain_heap() { return kl_total::max_gain_heap; } - - void initialize_gain_heap_test(const std::unordered_set> &nodes, - double reward_ = 0.0, - double penalty_ = 0.0) { - kl_total::reward = reward_; - kl_total::penalty = penalty_; - - kl_total::initialize_gain_heap(nodes); - } - - void test_setup_schedule(BspSchedule &schedule) { - kl_total::current_schedule.instance = &schedule.getInstance(); - - kl_total::best_schedule = &schedule; - - kl_total::num_nodes - = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs - = kl_total::current_schedule.instance->numberOfProcessors(); - - kl_total::set_parameters(); - kl_total::initialize_datastructures(); - } - - RETURN_STATUS improve_schedule_test_1(BspSchedule &schedule) { - kl_total::current_schedule.instance = &schedule.getInstance(); - - kl_total::best_schedule = &schedule; - kl_total::num_nodes - = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs - = kl_total::current_schedule.instance->numberOfProcessors(); - - kl_total::set_parameters(); - kl_total::initialize_datastructures(); - - bool improvement_found = kl_total::run_local_search_simple(); - - if (improvement_found) { - return RETURN_STATUS::OSP_SUCCESS; - } else { - return RETURN_STATUS::BEST_FOUND; - } - } - - RETURN_STATUS improve_schedule_test_2(BspSchedule &schedule) { - kl_total::current_schedule.instance = &schedule.getInstance(); - - kl_total::best_schedule = &schedule; - kl_total::num_nodes - = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs - = kl_total::current_schedule.instance->numberOfProcessors(); - - kl_total::set_parameters(); - kl_total::initialize_datastructures(); - - bool improvement_found = kl_total::run_local_search_unlock_delay(); - - if (improvement_found) { - return RETURN_STATUS::OSP_SUCCESS; - } else { - return RETURN_STATUS::BEST_FOUND; - } - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_util.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_util.hpp new file mode 100644 index 00000000..7d8b7afc --- /dev/null +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_util.hpp @@ -0,0 +1,433 @@ +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include + +#include "kl_active_schedule.hpp" + +namespace osp { + +template +struct RewardPenaltyStrategy { + KlActiveScheduleT *activeSchedule_; + CostT maxWeight_; + + unsigned violationsThreshold_ = 0; + CostT initialPenalty_ = 10.0; + CostT penalty_ = 0; + CostT reward_ = 0; + + void Initialize(KlActiveScheduleT &sched, const CostT maxComm, const CostT maxWork) { + maxWeight_ = std::max(maxWork, maxComm * sched.GetInstance().CommunicationCosts()); + activeSchedule_ = &sched; + initialPenalty_ = static_cast(std::sqrt(maxWeight_)); + } + + void InitRewardPenalty(double multiplier = 1.0) { + multiplier = std::min(multiplier, 10.0); + penalty_ = static_cast(initialPenalty_ * multiplier); + reward_ = static_cast(maxWeight_ * multiplier); + } +}; + +template +struct SetVertexLockManager { + std::unordered_set lockedNodes_; + + void Initialize(size_t) {} + + void Lock(VertexType node) { lockedNodes_.insert(node); } + + void Unlock(VertexType node) { lockedNodes_.erase(node); } + + bool IsLocked(VertexType node) { return lockedNodes_.find(node) != lockedNodes_.end(); } + + void Clear() { lockedNodes_.clear(); } +}; + +template +struct VectorVertexLockManager { + std::vector lockedNodes_; + + void Initialize(size_t numNodes) { lockedNodes_.resize(numNodes); } + + void Lock(VertexType node) { lockedNodes_[node] = true; } + + void Unlock(VertexType node) { lockedNodes_[node] = false; } + + bool IsLocked(VertexType node) { return lockedNodes_[node]; } + + void Clear() { lockedNodes_.assign(lockedNodes_.size(), false); } +}; + +template +struct AdaptiveAffinityTable { + constexpr static unsigned windowRange_ = 2 * windowSize + 1; + using VertexType = VertexIdxT; + + private: + const KlActiveScheduleT *activeSchedule_; + const GraphT *graph_; + + std::vector nodeIsSelected_; + std::vector selectedNodesIdx_; + + std::vector>> affinityTable_; + std::vector selectedNodes_; + + std::vector gaps_; + size_t lastIdx_; + + public: + void Initialize(const KlActiveScheduleT &sche, const std::size_t initialTableSize) { + activeSchedule_ = &sche; + graph_ = &(sche.GetInstance().GetComputationalDag()); + + lastIdx_ = 0; + + nodeIsSelected_.resize(graph_->NumVertices()); + selectedNodesIdx_.resize(graph_->NumVertices()); + selectedNodes_.resize(initialTableSize); + + nodeIsSelected_.assign(nodeIsSelected_.size(), false); + + affinityTable_.resize(initialTableSize); + const unsigned numProcs = sche.GetInstance().NumberOfProcessors(); + for (auto &table : affinityTable_) { + table.resize(numProcs); + for (auto &row : table) { + row.resize(windowRange_); + } + } + } + + inline std::vector &GetSelectedNodes() { return selectedNodes_; } + + inline const std::vector &GetSelectedNodes() const { return selectedNodes_; } + + inline size_t size() const { return lastIdx_ - gaps_.size(); } + + inline bool IsSelected(VertexType node) const { return nodeIsSelected_[node]; } + + inline const std::vector &GetSelectedNodesIndices() const { return selectedNodesIdx_; } + + inline size_t GetSelectedNodesIdx(VertexType node) const { return selectedNodesIdx_[node]; } + + inline std::vector> &operator[](VertexType node) { + assert(nodeIsSelected_[node]); + return affinityTable_[selectedNodesIdx_[node]]; + } + + inline std::vector> &At(VertexType node) { + assert(nodeIsSelected_[node]); + return affinityTable_[selectedNodesIdx_[node]]; + } + + inline const std::vector> &At(VertexType node) const { + assert(nodeIsSelected_[node]); + return affinityTable_[selectedNodesIdx_[node]]; + } + + inline std::vector> &GetAffinityTable(VertexType node) { + assert(nodeIsSelected_[node]); + return affinityTable_[selectedNodesIdx_[node]]; + } + + bool Insert(VertexType node) { + if (nodeIsSelected_[node]) { + return false; // Node is already in the table. + } + + size_t insertLocation; + if (!gaps_.empty()) { + insertLocation = gaps_.back(); + gaps_.pop_back(); + } else { + insertLocation = lastIdx_; + + if (insertLocation >= selectedNodes_.size()) { + const size_t oldSize = selectedNodes_.size(); + const size_t newSize = std::min(oldSize * 2, static_cast(graph_->NumVertices())); + + selectedNodes_.resize(newSize); + affinityTable_.resize(newSize); + + const unsigned numProcs = activeSchedule_->GetInstance().NumberOfProcessors(); + for (size_t i = oldSize; i < newSize; ++i) { + affinityTable_[i].resize(numProcs); + for (auto &row : affinityTable_[i]) { + row.resize(windowRange_); + } + } + } + lastIdx_++; + } + + nodeIsSelected_[node] = true; + selectedNodesIdx_[node] = insertLocation; + selectedNodes_[insertLocation] = node; + + return true; + } + + void Remove(VertexType node) { + assert(nodeIsSelected_[node]); + nodeIsSelected_[node] = false; + + gaps_.push_back(selectedNodesIdx_[node]); + } + + void ResetNodeSelection() { + nodeIsSelected_.assign(nodeIsSelected_.size(), false); + gaps_.clear(); + lastIdx_ = 0; + } + + void Clear() { + nodeIsSelected_.clear(); + selectedNodesIdx_.clear(); + affinityTable_.clear(); + selectedNodes_.clear(); + gaps_.clear(); + lastIdx_ = 0; + } + + void Trim() { + while (!gaps_.empty() && lastIdx_ > 0) { + size_t lastElementIdx = lastIdx_ - 1; + + // The last element could be a gap itself. If so, just shrink the size. + // We don't need to touch the `gaps` vector, as it will be cleared. + if (!nodeIsSelected_[selectedNodes_[lastElementIdx]]) { + lastIdx_--; + continue; + } + + size_t gapIdx = gaps_.back(); + gaps_.pop_back(); + + // If the gap we picked is now at or after the end, we can ignore it. + if (gapIdx >= lastIdx_) { + continue; + } + + VertexType nodeToMove = selectedNodes_[lastElementIdx]; + + std::swap(affinityTable_[gapIdx], affinityTable_[lastElementIdx]); + std::swap(selectedNodes_[gapIdx], selectedNodes_[lastElementIdx]); + selectedNodesIdx_[nodeToMove] = gapIdx; + + lastIdx_--; + } + gaps_.clear(); + } +}; + +template +struct StaticAffinityTable { + constexpr static unsigned windowRange_ = 2 * windowSize + 1; + using VertexType = VertexIdxT; + + private: + const KlActiveScheduleT *activeSchedule_; + const GraphT *graph_; + + std::unordered_set selectedNodes_; + + std::vector>> affinityTable_; + + public: + void Initialize(const KlActiveScheduleT &sche, const std::size_t) { + activeSchedule_ = &sche; + graph_ = &(sche.GetInstance().GetComputationalDag()); + + affinityTable_.resize(graph_->NumVertices()); + const unsigned numProcs = sche.GetInstance().NumberOfProcessors(); + for (auto &table : affinityTable_) { + table.resize(numProcs); + for (auto &row : table) { + row.resize(windowRange_); + } + } + } + + inline std::vector GetSelectedNodes() const { return {selectedNodes_.begin(), selectedNodes_.end()}; } + + inline size_t size() const { return selectedNodes_.size(); } + + inline bool IsSelected(VertexType node) const { return selectedNodes_.find(node) != selectedNodes_.end(); } + + inline std::vector> &operator[](VertexType node) { return affinityTable_[node]; } + + inline std::vector> &At(VertexType node) { return affinityTable_[node]; } + + inline const std::vector> &At(VertexType node) const { return affinityTable_[node]; } + + inline std::vector> &GetAffinityTable(VertexType node) { return affinityTable_[node]; } + + bool Insert(VertexType node) { + const auto pair = selectedNodes_.insert(node); + return pair.second; + } + + void Remove(VertexType node) { selectedNodes_.erase(node); } + + void ResetNodeSelection() { selectedNodes_.clear(); } + + void Clear() { + affinityTable_.clear(); + selectedNodes_.clear(); + } + + void Trim() {} +}; + +template +struct VertexSelectionStrategy { + using EdgeType = EdgeDescT; + + const KlActiveScheduleT *activeSchedule_; + const GraphT *graph_; + std::mt19937 *gen_; + std::size_t selectionThreshold_ = 0; + unsigned strategyCounter_ = 0; + + std::vector> permutation_; + std::size_t permutationIdx_; + + unsigned maxWorkCounter_ = 0; + + inline void Initialize(const KlActiveScheduleT &sche, std::mt19937 &gen, const unsigned startStep, const unsigned endStep) { + activeSchedule_ = &sche; + graph_ = &(sche.GetInstance().GetComputationalDag()); + gen_ = &gen; + + permutation_.reserve(graph_->NumVertices() / activeSchedule_->NumSteps() * (endStep - startStep)); + } + + inline void Setup(const unsigned startStep, const unsigned endStep) { + maxWorkCounter_ = startStep; + strategyCounter_ = 0; + permutation_.clear(); + + const unsigned numProcs = activeSchedule_->GetInstance().NumberOfProcessors(); + for (unsigned step = startStep; step <= endStep; ++step) { + const auto &processorVertices = activeSchedule_->GetSetSchedule().stepProcessorVertices_[step]; + for (unsigned proc = 0; proc < numProcs; ++proc) { + for (const auto node : processorVertices[proc]) { + permutation_.push_back(node); + } + } + } + + permutationIdx_ = 0; + std::shuffle(permutation_.begin(), permutation_.end(), *gen_); + } + + void AddNeighboursToSelection(VertexIdxT node, ContainerT &nodes, const unsigned startStep, const unsigned endStep) { + for (const auto parent : graph_->Parents(node)) { + const unsigned parentStep = activeSchedule_->AssignedSuperstep(parent); + if (parentStep >= startStep && parentStep <= endStep) { + nodes.Insert(parent); + } + } + + for (const auto child : graph_->Children(node)) { + const unsigned childStep = activeSchedule_->AssignedSuperstep(child); + if (childStep >= startStep && childStep <= endStep) { + nodes.Insert(child); + } + } + } + + inline void SelectActiveNodes(ContainerT &nodeSelection, const unsigned startStep, const unsigned endStep) { + if (strategyCounter_ < 3) { + SelectNodesPermutationThreshold(selectionThreshold_, nodeSelection); + } else if (strategyCounter_ == 4) { + SelectNodesMaxWorkProc(selectionThreshold_, nodeSelection, startStep, endStep); + } + + strategyCounter_++; + strategyCounter_ %= 5; + } + + void SelectNodesViolations(ContainerT &nodeSelection, + std::unordered_set ¤tViolations, + const unsigned startStep, + const unsigned endStep) { + for (const auto &edge : currentViolations) { + const auto sourceV = Source(edge, *graph_); + const auto targetV = Target(edge, *graph_); + + const unsigned sourceStep = activeSchedule_->AssignedSuperstep(sourceV); + if (sourceStep >= startStep && sourceStep <= endStep) { + nodeSelection.Insert(sourceV); + } + + const unsigned targetStep = activeSchedule_->AssignedSuperstep(targetV); + if (targetStep >= startStep && targetStep <= endStep) { + nodeSelection.Insert(targetV); + } + } + } + + void SelectNodesPermutationThreshold(const std::size_t &threshold, ContainerT &nodeSelection) { + const size_t bound = std::min(threshold + permutationIdx_, permutation_.size()); + for (std::size_t i = permutationIdx_; i < bound; i++) { + nodeSelection.Insert(permutation_[i]); + } + + permutationIdx_ = bound; + if (permutationIdx_ + threshold >= permutation_.size()) { + permutationIdx_ = 0; + std::shuffle(permutation_.begin(), permutation_.end(), *gen_); + } + } + + void SelectNodesMaxWorkProc(const std::size_t &threshold, + ContainerT &nodeSelection, + const unsigned startStep, + const unsigned endStep) { + while (nodeSelection.size() < threshold) { + if (maxWorkCounter_ > endStep) { + maxWorkCounter_ = startStep; // wrap around + break; // stop after one full pass + } + + SelectNodesMaxWorkProcHelper(threshold - nodeSelection.size(), maxWorkCounter_, nodeSelection); + maxWorkCounter_++; + } + } + + void SelectNodesMaxWorkProcHelper(const std::size_t &threshold, unsigned step, ContainerT &nodeSelection) { + const unsigned numMaxWorkProc = activeSchedule_->workDatastructures_.stepMaxWorkProcessorCount_[step]; + for (unsigned idx = 0; idx < numMaxWorkProc; idx++) { + const unsigned proc = activeSchedule_->workDatastructures_.stepProcessorWork_[step][idx].proc_; + const std::unordered_set> stepProcVert + = activeSchedule_->GetSetSchedule().stepProcessorVertices_[step][proc]; + const size_t numInsert = std::min(threshold - nodeSelection.size(), stepProcVert.size()); + auto endIt = stepProcVert.begin(); + std::advance(endIt, numInsert); + std::for_each(stepProcVert.begin(), endIt, [&](const auto &val) { nodeSelection.Insert(val); }); + } + } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp deleted file mode 100644 index 07537551..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp +++ /dev/null @@ -1,533 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include - -namespace osp { - -struct EagerCommCostPolicy { - using ValueType = unsigned; - - template - static inline void attribute_communication(DS &ds, - const comm_weight_t &cost, - const unsigned u_step, - const unsigned u_proc, - const unsigned v_proc, - const unsigned v_step, - const ValueType &val) { - ds.step_proc_receive(u_step, v_proc) += cost; - ds.step_proc_send(u_step, u_proc) += cost; - } - - template - static inline void unattribute_communication(DS &ds, - const comm_weight_t &cost, - const unsigned u_step, - const unsigned u_proc, - const unsigned v_proc, - const unsigned v_step, - const ValueType &val) { - ds.step_proc_receive(u_step, v_proc) -= cost; - ds.step_proc_send(u_step, u_proc) -= cost; - } - - static inline bool add_child(ValueType &val, unsigned step) { - val++; - return val == 1; - } - - static inline bool remove_child(ValueType &val, unsigned step) { - val--; - return val == 0; - } - - static inline void reset(ValueType &val) { val = 0; } - - static inline bool has_entry(const ValueType &val) { return val > 0; } - - static inline bool is_single_entry(const ValueType &val) { return val == 1; } - - template - static inline void calculate_delta_remove(const ValueType &val, - unsigned child_step, - unsigned parent_step, - unsigned parent_proc, - unsigned child_proc, - comm_weight_t cost, - DeltaTracker &dt) { - if (val == 1) { - dt.add(true, parent_step, child_proc, -cost); - dt.add(false, parent_step, parent_proc, -cost); - } - } - - template - static inline void calculate_delta_add(const ValueType &val, - unsigned child_step, - unsigned parent_step, - unsigned parent_proc, - unsigned child_proc, - comm_weight_t cost, - DeltaTracker &dt) { - if (val == 0) { - dt.add(true, parent_step, child_proc, cost); - dt.add(false, parent_step, parent_proc, cost); - } - } - - template - static inline void calculate_delta_outgoing( - const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { - if (val > 0) { - comm_weight_t total_cost = cost * val; - dt.add(true, node_step, child_proc, total_cost); - dt.add(false, node_step, node_proc, total_cost); - } - } -}; - -struct LazyCommCostPolicy { - using ValueType = std::vector; - - template - static inline void attribute_communication(DS &ds, - const comm_weight_t &cost, - const unsigned u_step, - const unsigned u_proc, - const unsigned v_proc, - const unsigned v_step, - const ValueType &val) { - // val contains v_step (already added). - // Check if v_step is the new minimum. - unsigned min_step = std::numeric_limits::max(); - for (unsigned s : val) { - min_step = std::min(min_step, s); - } - - if (min_step == v_step) { - // Check if it was strictly smaller than previous min. - unsigned prev_min = std::numeric_limits::max(); - for (size_t i = 0; i < val.size() - 1; ++i) { - prev_min = std::min(prev_min, val[i]); - } - - if (v_step < prev_min) { - if (prev_min != std::numeric_limits::max() && prev_min > 0) { - ds.step_proc_receive(prev_min - 1, v_proc) -= cost; - ds.step_proc_send(prev_min - 1, u_proc) -= cost; - } - if (v_step > 0) { - ds.step_proc_receive(v_step - 1, v_proc) += cost; - ds.step_proc_send(v_step - 1, u_proc) += cost; - } - } - } - } - - template - static inline void unattribute_communication(DS &ds, - const comm_weight_t &cost, - const unsigned u_step, - const unsigned u_proc, - const unsigned v_proc, - const unsigned v_step, - const ValueType &val) { - // val is state AFTER removal. - - if (val.empty()) { - // Removed the last child. - if (v_step > 0) { - ds.step_proc_receive(v_step - 1, v_proc) -= cost; - ds.step_proc_send(v_step - 1, u_proc) -= cost; - } - } else { - // Check if v_step was the unique minimum. - unsigned new_min = val[0]; - for (unsigned s : val) { - new_min = std::min(new_min, s); - } - - if (v_step < new_min) { - // v_step was the unique minimum. - if (v_step > 0) { - ds.step_proc_receive(v_step - 1, v_proc) -= cost; - ds.step_proc_send(v_step - 1, u_proc) -= cost; - } - if (new_min > 0) { - ds.step_proc_receive(new_min - 1, v_proc) += cost; - ds.step_proc_send(new_min - 1, u_proc) += cost; - } - } - } - } - - static inline bool add_child(ValueType &val, unsigned step) { - val.push_back(step); - if (val.size() == 1) { - return true; - } - unsigned min_s = val[0]; - for (unsigned s : val) { - min_s = std::min(min_s, s); - } - return step == min_s; - } - - static inline bool remove_child(ValueType &val, unsigned step) { - auto it = std::find(val.begin(), val.end(), step); - if (it != val.end()) { - val.erase(it); - if (val.empty()) { - return true; - } - unsigned new_min = val[0]; - for (unsigned s : val) { - new_min = std::min(new_min, s); - } - bool res = step < new_min; - return res; - } - return false; - } - - static inline void reset(ValueType &val) { val.clear(); } - - static inline bool has_entry(const ValueType &val) { return !val.empty(); } - - static inline bool is_single_entry(const ValueType &val) { return val.size() == 1; } - - template - static inline void calculate_delta_remove(const ValueType &val, - unsigned child_step, - unsigned parent_step, - unsigned parent_proc, - unsigned child_proc, - comm_weight_t cost, - DeltaTracker &dt) { - if (val.empty()) { - return; - } - unsigned min_s = val[0]; - for (unsigned s : val) { - min_s = std::min(min_s, s); - } - - if (child_step == min_s) { - int count = 0; - for (unsigned s : val) { - if (s == min_s) { - count++; - } - } - - if (count == 1) { - if (min_s > 0) { - dt.add(true, min_s - 1, child_proc, -cost); - dt.add(false, min_s - 1, parent_proc, -cost); - } - if (val.size() > 1) { - unsigned next_min = std::numeric_limits::max(); - for (unsigned s : val) { - if (s != min_s) { - next_min = std::min(next_min, s); - } - } - if (next_min != std::numeric_limits::max() && next_min > 0) { - dt.add(true, next_min - 1, child_proc, cost); - dt.add(false, next_min - 1, parent_proc, cost); - } - } - } - } - } - - template - static inline void calculate_delta_add(const ValueType &val, - unsigned child_step, - unsigned parent_step, - unsigned parent_proc, - unsigned child_proc, - comm_weight_t cost, - DeltaTracker &dt) { - if (val.empty()) { - if (child_step > 0) { - dt.add(true, child_step - 1, child_proc, cost); - dt.add(false, child_step - 1, parent_proc, cost); - } - } else { - unsigned min_s = val[0]; - for (unsigned s : val) { - min_s = std::min(min_s, s); - } - - if (child_step < min_s) { - if (min_s > 0) { - dt.add(true, min_s - 1, child_proc, -cost); - dt.add(false, min_s - 1, parent_proc, -cost); - } - if (child_step > 0) { - dt.add(true, child_step - 1, child_proc, cost); - dt.add(false, child_step - 1, parent_proc, cost); - } - } - } - } - - template - static inline void calculate_delta_outgoing( - const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { - for (unsigned s : val) { - if (s > 0) { - dt.add(true, s - 1, child_proc, cost); - dt.add(false, s - 1, node_proc, cost); - } - } - } -}; - -struct BufferedCommCostPolicy { - using ValueType = std::vector; - - template - static inline void attribute_communication(DS &ds, - const comm_weight_t &cost, - const unsigned u_step, - const unsigned u_proc, - const unsigned v_proc, - const unsigned v_step, - const ValueType &val) { - // Buffered: Send at u_step, Receive at v_step - 1. - - unsigned min_step = std::numeric_limits::max(); - for (unsigned s : val) { - min_step = std::min(min_step, s); - } - - if (min_step == v_step) { - unsigned prev_min = std::numeric_limits::max(); - for (size_t i = 0; i < val.size() - 1; ++i) { - prev_min = std::min(prev_min, val[i]); - } - - if (v_step < prev_min) { - if (prev_min != std::numeric_limits::max() && prev_min > 0) { - ds.step_proc_receive(prev_min - 1, v_proc) -= cost; - } - if (v_step > 0) { - ds.step_proc_receive(v_step - 1, v_proc) += cost; - } - } - } - - // Send side logic (u_step) - // If this is the FIRST child on this proc, add send cost. - if (val.size() == 1) { - ds.step_proc_send(u_step, u_proc) += cost; - } - } - - template - static inline void unattribute_communication(DS &ds, - const comm_weight_t &cost, - const unsigned u_step, - const unsigned u_proc, - const unsigned v_proc, - const unsigned v_step, - const ValueType &val) { - // val is state AFTER removal. - - if (val.empty()) { - // Removed last child. - ds.step_proc_send(u_step, u_proc) -= cost; // Send side - if (v_step > 0) { - ds.step_proc_receive(v_step - 1, v_proc) -= cost; // Recv side - } - } else { - // Check if v_step was unique minimum for Recv side. - unsigned new_min = val[0]; - for (unsigned s : val) { - new_min = std::min(new_min, s); - } - - if (v_step < new_min) { - if (v_step > 0) { - ds.step_proc_receive(v_step - 1, v_proc) -= cost; - } - if (new_min > 0) { - ds.step_proc_receive(new_min - 1, v_proc) += cost; - } - } - // Send side remains (val not empty). - } - } - - static inline bool add_child(ValueType &val, unsigned step) { - val.push_back(step); - if (val.size() == 1) { - return true; // Need update for send side - } - unsigned min_s = val[0]; - for (unsigned s : val) { - min_s = std::min(min_s, s); - } - return step == min_s; // Need update for recv side - } - - static inline bool remove_child(ValueType &val, unsigned step) { - auto it = std::find(val.begin(), val.end(), step); - if (it != val.end()) { - val.erase(it); - if (val.empty()) { - return true; // Need update for send side - } - unsigned new_min = val[0]; - for (unsigned s : val) { - new_min = std::min(new_min, s); - } - return step < new_min; // Need update for recv side - } - return false; - } - - static inline void reset(ValueType &val) { val.clear(); } - - static inline bool has_entry(const ValueType &val) { return !val.empty(); } - - static inline bool is_single_entry(const ValueType &val) { return val.size() == 1; } - - template - static inline void calculate_delta_remove(const ValueType &val, - unsigned child_step, - unsigned parent_step, - unsigned parent_proc, - unsigned child_proc, - comm_weight_t cost, - DeltaTracker &dt) { - // Lazy: Send and Recv are both at min(child_steps) - 1. - - if (val.empty()) { - return; - } - - unsigned min_s = val[0]; - for (unsigned s : val) { - min_s = std::min(min_s, s); - } - - if (child_step == min_s) { - int count = 0; - for (unsigned s : val) { - if (s == min_s) { - count++; - } - } - - if (count == 1) { - // Unique min being removed. - if (min_s > 0) { - dt.add(true, min_s - 1, child_proc, -cost); // Remove Recv - dt.add(false, min_s - 1, parent_proc, -cost); // Remove Send - } - - if (val.size() > 1) { - unsigned next_min = std::numeric_limits::max(); - for (unsigned s : val) { - if (s != min_s) { - next_min = std::min(next_min, s); - } - } - - if (next_min != std::numeric_limits::max() && next_min > 0) { - dt.add(true, next_min - 1, child_proc, cost); // Add Recv at new min - dt.add(false, next_min - 1, parent_proc, cost); // Add Send at new min - } - } - } - } - } - - template - static inline void calculate_delta_add(const ValueType &val, - unsigned child_step, - unsigned parent_step, - unsigned parent_proc, - unsigned child_proc, - comm_weight_t cost, - DeltaTracker &dt) { - // Lazy: Send and Recv are both at min(child_steps) - 1. - - if (val.empty()) { - // First child. - if (child_step > 0) { - dt.add(true, child_step - 1, child_proc, cost); - dt.add(false, child_step - 1, parent_proc, cost); - } - } else { - unsigned min_s = val[0]; - for (unsigned s : val) { - min_s = std::min(min_s, s); - } - - if (child_step < min_s) { - // New global minimum. - if (min_s > 0) { - dt.add(true, min_s - 1, child_proc, -cost); // Remove old Recv - dt.add(false, min_s - 1, parent_proc, -cost); // Remove old Send - } - if (child_step > 0) { - dt.add(true, child_step - 1, child_proc, cost); // Add new Recv - dt.add(false, child_step - 1, parent_proc, cost); // Add new Send - } - } - } - } - - template - static inline void calculate_delta_outgoing( - const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { - // Buffered Outgoing (Node -> Children) - // Node is parent (sender). Pays at node_step. - // Children are receivers. Pay at child_step - 1. - - // Send side: node_step. - // If val is not empty, we pay send cost ONCE. - if (!val.empty()) { - dt.add(false, node_step, node_proc, cost); - } - - // Recv side: iterate steps in val (child steps). - // But we only pay at min(val) - 1. - if (!val.empty()) { - unsigned min_s = val[0]; - for (unsigned s : val) { - min_s = std::min(min_s, s); - } - - if (min_s > 0) { - dt.add(true, min_s - 1, child_proc, cost); - } - } - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp deleted file mode 100644 index e86baada..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp +++ /dev/null @@ -1,121 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include - -namespace osp { - -template -struct DefaultHasEntry { - static inline bool has_entry(const T &val) { return val != 0; } -}; - -template -struct DefaultHasEntry> { - static inline bool has_entry(const std::vector &val) { return !val.empty(); } -}; - -/** - * @brief Generic container for tracking child processor assignments in a BSP schedule using vectors. - * - * This structure tracks information about children assigned to each processor. - * It uses a 2D vector for dense data. - */ -template > -struct generic_lambda_vector_container { - /** - * @brief Range adapter for iterating over non-zero/non-empty processor entries. - */ - class lambda_vector_range { - private: - const std::vector &vec_; - - public: - class lambda_vector_iterator { - using iterator_category = std::input_iterator_tag; - using value_type = std::pair; - using difference_type = std::ptrdiff_t; - using pointer = value_type *; - using reference = value_type &; - - private: - const std::vector &vec_; - unsigned index_; - - public: - lambda_vector_iterator(const std::vector &vec) : vec_(vec), index_(0) { - while (index_ < vec_.size() && !HasEntry::has_entry(vec_[index_])) { - ++index_; - } - } - - lambda_vector_iterator(const std::vector &vec, unsigned index) : vec_(vec), index_(index) {} - - lambda_vector_iterator &operator++() { - ++index_; - while (index_ < vec_.size() && !HasEntry::has_entry(vec_[index_])) { - ++index_; - } - return *this; - } - - value_type operator*() const { return std::make_pair(index_, vec_[index_]); } - - bool operator==(const lambda_vector_iterator &other) const { return index_ == other.index_; } - - bool operator!=(const lambda_vector_iterator &other) const { return !(*this == other); } - }; - - lambda_vector_range(const std::vector &vec) : vec_(vec) {} - - lambda_vector_iterator begin() { return lambda_vector_iterator(vec_); } - - lambda_vector_iterator end() { return lambda_vector_iterator(vec_, static_cast(vec_.size())); } - }; - - /// 2D vector: for each node, stores processor assignment info - std::vector> node_lambda_vec; - - /// Number of processors in the system - unsigned num_procs_ = 0; - - inline void initialize(const vertex_idx_t num_vertices, const unsigned num_procs) { - node_lambda_vec.assign(num_vertices, std::vector(num_procs)); - num_procs_ = num_procs; - } - - inline void reset_node(const vertex_idx_t node) { node_lambda_vec[node].assign(num_procs_, ValueType()); } - - inline void clear() { node_lambda_vec.clear(); } - - inline bool has_proc_entry(const vertex_idx_t node, const unsigned proc) const { - return HasEntry::has_entry(node_lambda_vec[node][proc]); - } - - inline ValueType &get_proc_entry(const vertex_idx_t node, const unsigned proc) { return node_lambda_vec[node][proc]; } - - inline ValueType get_proc_entry(const vertex_idx_t node, const unsigned proc) const { return node_lambda_vec[node][proc]; } - - inline auto iterate_proc_entries(const vertex_idx_t node) { return lambda_vector_range(node_lambda_vec[node]); } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp deleted file mode 100644 index 7fd3693f..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp +++ /dev/null @@ -1,688 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include - -#include "../kl_active_schedule.hpp" -#include "../kl_improver.hpp" -#include "max_comm_datastructure.hpp" - -namespace osp { - -// A lightweight helper to track deltas without hash maps or repeated allocations. -// Uses a dense vector for O(1) lookups and a sparse list for fast iteration/clearing. -template -struct FastDeltaTracker { - std::vector dense_vals; // Size: num_procs - std::vector dirty_procs; // List of modified indices - std::vector proc_dirty_index; // Map proc -> index in dirty_procs (num_procs if not dirty) - unsigned num_procs = 0; - - void initialize(unsigned n_procs) { - if (n_procs > num_procs) { - num_procs = n_procs; - dense_vals.resize(num_procs, 0); - dirty_procs.reserve(num_procs); - proc_dirty_index.resize(num_procs, num_procs); - } - } - - inline void add(unsigned proc, comm_weight_t val) { - if (val == 0) { - return; - } - - // If currently 0, it is becoming dirty - if (dense_vals[proc] == 0) { - proc_dirty_index[proc] = static_cast(dirty_procs.size()); - dirty_procs.push_back(proc); - } - - dense_vals[proc] += val; - - // If it returns to 0, remove it from dirty list (Swap and Pop for O(1)) - if (dense_vals[proc] == 0) { - unsigned idx = proc_dirty_index[proc]; - unsigned last_proc = dirty_procs.back(); - - // Move last element to the hole - dirty_procs[idx] = last_proc; - proc_dirty_index[last_proc] = idx; - - // Remove last - dirty_procs.pop_back(); - proc_dirty_index[proc] = num_procs; - } - } - - inline comm_weight_t get(unsigned proc) const { - if (proc < dense_vals.size()) { - return dense_vals[proc]; - } - return 0; - } - - inline void clear() { - for (unsigned p : dirty_procs) { - dense_vals[p] = 0; - proc_dirty_index[p] = num_procs; - } - dirty_procs.clear(); - } -}; - -template -struct kl_bsp_comm_cost_function { - using VertexType = vertex_idx_t; - using kl_move = kl_move_struct; - using kl_gain_update_info = kl_update_info; - using comm_weight_t = v_commw_t; - - constexpr static unsigned window_range = 2 * window_size + 1; - constexpr static bool is_max_comm_cost_function = true; - - kl_active_schedule *active_schedule; - CompatibleProcessorRange *proc_range; - const Graph_t *graph; - const BspInstance *instance; - - max_comm_datastructure> comm_ds; - - inline cost_t get_comm_multiplier() { return 1; } - - inline cost_t get_max_comm_weight() { return comm_ds.max_comm_weight; } - - inline cost_t get_max_comm_weight_multiplied() { return comm_ds.max_comm_weight; } - - inline const std::string name() const { return "bsp_comm"; } - - inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); } - - inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { - return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0; - } - - inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { - return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step); - } - - void initialize(kl_active_schedule &sched, CompatibleProcessorRange &p_range) { - active_schedule = &sched; - proc_range = &p_range; - instance = &sched.getInstance(); - graph = &instance->getComputationalDag(); - - const unsigned num_steps = active_schedule->num_steps(); - comm_ds.initialize(*active_schedule); - } - - using pre_move_comm_data_t = pre_move_comm_data; - - inline pre_move_comm_data get_pre_move_comm_data(const kl_move &move) { - return comm_ds.get_pre_move_comm_data(move); - } - - void compute_send_receive_datastructures() { comm_ds.compute_comm_datastructures(0, active_schedule->num_steps() - 1); } - - template - cost_t compute_schedule_cost() { - if constexpr (compute_datastructures) { - compute_send_receive_datastructures(); - } - - cost_t total_cost = 0; - for (unsigned step = 0; step < active_schedule->num_steps(); step++) { - total_cost += active_schedule->get_step_max_work(step); - total_cost += comm_ds.step_max_comm(step) * instance->communicationCosts(); - } - - if (active_schedule->num_steps() > 1) { - total_cost += static_cast(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); - } - - return total_cost; - } - - cost_t compute_schedule_cost_test() { return compute_schedule_cost(); } - - void update_datastructure_after_move(const kl_move &move, const unsigned start_step, const unsigned end_step) { - comm_ds.update_datastructure_after_move(move, start_step, end_step); - } - - // Structure to hold thread-local scratchpads to avoid re-allocation. - struct ScratchData { - std::vector> send_deltas; // Size: num_steps - std::vector> recv_deltas; // Size: num_steps - - std::vector active_steps; // List of steps touched in current operation - std::vector step_is_active; // Fast lookup for active steps - - std::vector> child_cost_buffer; - - void init(unsigned n_steps, unsigned n_procs) { - if (send_deltas.size() < n_steps) { - send_deltas.resize(n_steps); - recv_deltas.resize(n_steps); - step_is_active.resize(n_steps, false); - active_steps.reserve(n_steps); - } - - for (auto &tracker : send_deltas) { - tracker.initialize(n_procs); - } - for (auto &tracker : recv_deltas) { - tracker.initialize(n_procs); - } - - child_cost_buffer.reserve(n_procs); - } - - void clear_all() { - for (unsigned step : active_steps) { - send_deltas[step].clear(); - recv_deltas[step].clear(); - step_is_active[step] = false; - } - active_steps.clear(); - child_cost_buffer.clear(); - } - - void mark_active(unsigned step) { - if (!step_is_active[step]) { - step_is_active[step] = true; - active_steps.push_back(step); - } - } - }; - - template - void compute_comm_affinity(VertexType node, - affinity_table_t &affinity_table_node, - const cost_t &penalty, - const cost_t &reward, - const unsigned start_step, - const unsigned end_step) { - // Use static thread_local scratchpad to avoid allocation in hot loop - static thread_local ScratchData scratch; - scratch.init(active_schedule->num_steps(), instance->numberOfProcessors()); - scratch.clear_all(); - - const unsigned node_step = active_schedule->assigned_superstep(node); - const unsigned node_proc = active_schedule->assigned_processor(node); - const unsigned window_bound = end_idx(node_step, end_step); - const unsigned node_start_idx = start_idx(node_step, start_step); - - for (const auto &target : instance->getComputationalDag().children(node)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - const unsigned target_proc = active_schedule->assigned_processor(target); - - if (target_step < node_step + (target_proc != node_proc)) { - const unsigned diff = node_step - target_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = node_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] -= reward; - } - } - if (window_size >= diff && is_compatible(node, target_proc)) { - affinity_table_node[target_proc][idx] -= reward; - } - } else { - const unsigned diff = target_step - node_step; - unsigned idx = window_size + diff; - if (idx < window_bound && is_compatible(node, target_proc)) { - affinity_table_node[target_proc][idx] -= penalty; - } - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] += penalty; - } - } - } - } - - for (const auto &source : instance->getComputationalDag().parents(node)) { - const unsigned source_step = active_schedule->assigned_superstep(source); - const unsigned source_proc = active_schedule->assigned_processor(source); - - if (source_step < node_step + (source_proc == node_proc)) { - const unsigned diff = node_step - source_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - unsigned idx = node_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] += penalty; - } - } - if (idx - 1 < bound && is_compatible(node, source_proc)) { - affinity_table_node[source_proc][idx - 1] -= penalty; - } - } else { - const unsigned diff = source_step - node_step; - unsigned idx = std::min(window_size + diff, window_bound); - if (idx < window_bound && is_compatible(node, source_proc)) { - affinity_table_node[source_proc][idx] -= reward; - } - idx++; - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] -= reward; - } - } - } - } - - const comm_weight_t comm_w_node = graph->vertex_comm_weight(node); - const auto ¤t_vec_schedule = active_schedule->getVectorSchedule(); - - auto add_delta = [&](bool is_recv, unsigned step, unsigned proc, comm_weight_t val) { - if (val == 0) { - return; - } - if (step < active_schedule->num_steps()) { - scratch.mark_active(step); - if (is_recv) { - scratch.recv_deltas[step].add(proc, val); - } else { - scratch.send_deltas[step].add(proc, val); - } - } - }; - - // 1. Remove Node from Current State (Phase 1 - Invariant for all candidates) - - // Outgoing (Children) - // Child stops receiving from node_proc at node_step - auto node_lambda_entries = comm_ds.node_lambda_map.iterate_proc_entries(node); - comm_weight_t total_send_cost_removed = 0; - - for (const auto [proc, count] : node_lambda_entries) { - if (proc != node_proc) { - const comm_weight_t cost = comm_w_node * instance->sendCosts(node_proc, proc); - if (cost > 0) { - add_delta(true, node_step, proc, -cost); - total_send_cost_removed += cost; - } - } - } - if (total_send_cost_removed > 0) { - add_delta(false, node_step, node_proc, -total_send_cost_removed); - } - - // Incoming (Parents) - for (const auto &u : graph->parents(node)) { - const unsigned u_proc = active_schedule->assigned_processor(u); - const unsigned u_step = current_vec_schedule.assignedSuperstep(u); - const comm_weight_t comm_w_u = graph->vertex_comm_weight(u); - - if (u_proc != node_proc) { - if (comm_ds.node_lambda_map.get_proc_entry(u, node_proc) == 1) { - const comm_weight_t cost = comm_w_u * instance->sendCosts(u_proc, node_proc); - if (cost > 0) { - add_delta(true, u_step, node_proc, -cost); - add_delta(false, u_step, u_proc, -cost); - } - } - } - } - - // 2. Add Node to Target (Iterate candidates) - - for (const unsigned p_to : proc_range->compatible_processors_vertex(node)) { - // --- Part A: Incoming Edges (Parents -> p_to) --- - // These updates are specific to p_to but independent of s_to. - // We apply them, run the s_to loop, then revert them. - - for (const auto &u : graph->parents(node)) { - const unsigned u_proc = active_schedule->assigned_processor(u); - const unsigned u_step = current_vec_schedule.assignedSuperstep(u); - const comm_weight_t comm_w_u = graph->vertex_comm_weight(u); - - if (u_proc != p_to) { - bool already_sending_to_p_to = false; - unsigned count_on_p_to = comm_ds.node_lambda_map.get_proc_entry(u, p_to); - - if (p_to == node_proc) { - if (count_on_p_to > 0) { - count_on_p_to--; - } - } - - if (count_on_p_to > 0) { - already_sending_to_p_to = true; - } - - if (!already_sending_to_p_to) { - const comm_weight_t cost = comm_w_u * instance->sendCosts(u_proc, p_to); - if (cost > 0) { - add_delta(true, u_step, p_to, cost); - add_delta(false, u_step, u_proc, cost); - } - } - } - } - - // --- Part B: Outgoing Edges (Node -> Children) --- - // These depend on which processors children are on. - scratch.child_cost_buffer.clear(); - comm_weight_t total_send_cost_added = 0; - - for (const auto [v_proc, count] : comm_ds.node_lambda_map.iterate_proc_entries(node)) { - if (v_proc != p_to) { - const comm_weight_t cost = comm_w_node * instance->sendCosts(p_to, v_proc); - if (cost > 0) { - scratch.child_cost_buffer.push_back({v_proc, cost}); - total_send_cost_added += cost; - } - } - } - - // Iterate Window (s_to) - for (unsigned s_to_idx = node_start_idx; s_to_idx < window_bound; ++s_to_idx) { - unsigned s_to = node_step + s_to_idx - window_size; - - // Apply Outgoing Deltas for this specific step s_to - for (const auto &[v_proc, cost] : scratch.child_cost_buffer) { - add_delta(true, s_to, v_proc, cost); - } - - if (total_send_cost_added > 0) { - add_delta(false, s_to, p_to, total_send_cost_added); - } - - cost_t total_change = 0; - - // Only check steps that are active (modified in Phase 1, Part A, or Part B) - for (unsigned step : scratch.active_steps) { - // Check if dirty_procs is empty implies no change for this step - // FastDeltaTracker ensures dirty_procs is empty if all deltas summed to 0 - if (!scratch.send_deltas[step].dirty_procs.empty() || !scratch.recv_deltas[step].dirty_procs.empty()) { - total_change += calculate_step_cost_change(step, scratch.send_deltas[step], scratch.recv_deltas[step]); - } - } - - affinity_table_node[p_to][s_to_idx] += total_change * instance->communicationCosts(); - - // Revert Outgoing Deltas for s_to (Inverse of Apply) - for (const auto &[v_proc, cost] : scratch.child_cost_buffer) { - add_delta(true, s_to, v_proc, -cost); - } - if (total_send_cost_added > 0) { - add_delta(false, s_to, p_to, -total_send_cost_added); - } - } - - // Revert Incoming Deltas (Inverse of Part A) - for (const auto &u : graph->parents(node)) { - const unsigned u_proc = active_schedule->assigned_processor(u); - const unsigned u_step = current_vec_schedule.assignedSuperstep(u); - const comm_weight_t comm_w_u = graph->vertex_comm_weight(u); - - if (u_proc != p_to) { - bool already_sending_to_p_to = false; - unsigned count_on_p_to = comm_ds.node_lambda_map.get_proc_entry(u, p_to); - if (p_to == node_proc) { - if (count_on_p_to > 0) { - count_on_p_to--; - } - } - if (count_on_p_to > 0) { - already_sending_to_p_to = true; - } - - if (!already_sending_to_p_to) { - const comm_weight_t cost = comm_w_u * instance->sendCosts(u_proc, p_to); - if (cost > 0) { - add_delta(true, u_step, p_to, -cost); - add_delta(false, u_step, u_proc, -cost); - } - } - } - } - } - } - - comm_weight_t calculate_step_cost_change(unsigned step, - const FastDeltaTracker &delta_send, - const FastDeltaTracker &delta_recv) { - comm_weight_t old_max = comm_ds.step_max_comm(step); - comm_weight_t second_max = comm_ds.step_second_max_comm(step); - unsigned old_max_count = comm_ds.step_max_comm_count(step); - - comm_weight_t new_global_max = 0; - unsigned reduced_max_instances = 0; - - // 1. Check modified sends (Iterate sparse dirty list) - for (unsigned proc : delta_send.dirty_procs) { - comm_weight_t delta = delta_send.get(proc); - // delta cannot be 0 here due to FastDeltaTracker invariant - - comm_weight_t current_val = comm_ds.step_proc_send(step, proc); - comm_weight_t new_val = current_val + delta; - - if (new_val > new_global_max) { - new_global_max = new_val; - } - if (delta < 0 && current_val == old_max) { - reduced_max_instances++; - } - } - - // 2. Check modified receives (Iterate sparse dirty list) - for (unsigned proc : delta_recv.dirty_procs) { - comm_weight_t delta = delta_recv.get(proc); - - comm_weight_t current_val = comm_ds.step_proc_receive(step, proc); - comm_weight_t new_val = current_val + delta; - - if (new_val > new_global_max) { - new_global_max = new_val; - } - if (delta < 0 && current_val == old_max) { - reduced_max_instances++; - } - } - - // 3. Determine result - if (new_global_max > old_max) { - return new_global_max - old_max; - } - if (reduced_max_instances < old_max_count) { - return 0; - } - return std::max(new_global_max, second_max) - old_max; - } - - template - void update_node_comm_affinity(const kl_move &move, - thread_data_t &thread_data, - const cost_t &penalty, - const cost_t &reward, - std::map &, - std::vector &new_nodes) { - const unsigned start_step = thread_data.start_step; - const unsigned end_step = thread_data.end_step; - - for (const auto &target : instance->getComputationalDag().children(move.node)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - if (target_step < start_step || target_step > end_step) { - continue; - } - - if (thread_data.lock_manager.is_locked(target)) { - continue; - } - - if (not thread_data.affinity_table.is_selected(target)) { - new_nodes.push_back(target); - continue; - } - - const unsigned target_proc = active_schedule->assigned_processor(target); - const unsigned target_start_idx = start_idx(target_step, start_step); - auto &affinity_table = thread_data.affinity_table.at(target); - - if (move.from_step < target_step + (move.from_proc == target_proc)) { - const unsigned diff = target_step - move.from_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - unsigned idx = target_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table[p][idx] -= penalty; - } - } - - if (idx - 1 < bound && is_compatible(target, move.from_proc)) { - affinity_table[move.from_proc][idx - 1] += penalty; - } - - } else { - const unsigned diff = move.from_step - target_step; - const unsigned window_bound = end_idx(target_step, end_step); - unsigned idx = std::min(window_size + diff, window_bound); - - if (idx < window_bound && is_compatible(target, move.from_proc)) { - affinity_table[move.from_proc][idx] += reward; - } - - idx++; - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table[p][idx] += reward; - } - } - } - - if (move.to_step < target_step + (move.to_proc == target_proc)) { - unsigned idx = target_start_idx; - const unsigned diff = target_step - move.to_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table[p][idx] += penalty; - } - } - - if (idx - 1 < bound && is_compatible(target, move.to_proc)) { - affinity_table[move.to_proc][idx - 1] -= penalty; - } - - } else { - const unsigned diff = move.to_step - target_step; - const unsigned window_bound = end_idx(target_step, end_step); - unsigned idx = std::min(window_size + diff, window_bound); - - if (idx < window_bound && is_compatible(target, move.to_proc)) { - affinity_table[move.to_proc][idx] -= reward; - } - - idx++; - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table[p][idx] -= reward; - } - } - } - } - - for (const auto &source : instance->getComputationalDag().parents(move.node)) { - const unsigned source_step = active_schedule->assigned_superstep(source); - if (source_step < start_step || source_step > end_step) { - continue; - } - - if (thread_data.lock_manager.is_locked(source)) { - continue; - } - - if (not thread_data.affinity_table.is_selected(source)) { - new_nodes.push_back(source); - continue; - } - - const unsigned source_proc = active_schedule->assigned_processor(source); - const unsigned source_start_idx = start_idx(source_step, start_step); - const unsigned window_bound = end_idx(source_step, end_step); - auto &affinity_table_source = thread_data.affinity_table.at(source); - - if (move.from_step < source_step + (move.from_proc != source_proc)) { - const unsigned diff = source_step - move.from_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = source_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] += reward; - } - } - - if (window_size >= diff && is_compatible(source, move.from_proc)) { - affinity_table_source[move.from_proc][idx] += reward; - } - - } else { - const unsigned diff = move.from_step - source_step; - unsigned idx = window_size + diff; - - if (idx < window_bound && is_compatible(source, move.from_proc)) { - affinity_table_source[move.from_proc][idx] += penalty; - } - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] -= penalty; - } - } - } - - if (move.to_step < source_step + (move.to_proc != source_proc)) { - const unsigned diff = source_step - move.to_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = source_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] -= reward; - } - } - - if (window_size >= diff && is_compatible(source, move.to_proc)) { - affinity_table_source[move.to_proc][idx] -= reward; - } - - } else { - const unsigned diff = move.to_step - source_step; - unsigned idx = window_size + diff; - - if (idx < window_bound && is_compatible(source, move.to_proc)) { - affinity_table_source[move.to_proc][idx] -= penalty; - } - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] += penalty; - } - } - } - } - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp deleted file mode 100644 index 898f2df4..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp +++ /dev/null @@ -1,649 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include "../kl_active_schedule.hpp" -#include "../kl_improver.hpp" -#include "lambda_container.hpp" - -namespace osp { - -template -struct kl_hyper_total_comm_cost_function { - using VertexType = vertex_idx_t; - using kl_move = kl_move_struct; - using kl_gain_update_info = kl_update_info; - - constexpr static unsigned window_range = 2 * window_size + 1; - constexpr static bool is_max_comm_cost_function = false; - - kl_active_schedule *active_schedule; - - CompatibleProcessorRange *proc_range; - - const Graph_t *graph; - const BspInstance *instance; - - cost_t comm_multiplier = 1; - cost_t max_comm_weight = 0; - - lambda_vector_container node_lambda_map; - - inline cost_t get_comm_multiplier() { return comm_multiplier; } - - inline cost_t get_max_comm_weight() { return max_comm_weight; } - - inline cost_t get_max_comm_weight_multiplied() { return max_comm_weight * comm_multiplier; } - - const std::string name() const { return "toal_comm_cost"; } - - inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); } - - void initialize(kl_active_schedule &sched, CompatibleProcessorRange &p_range) { - active_schedule = &sched; - proc_range = &p_range; - instance = &sched.getInstance(); - graph = &instance->getComputationalDag(); - comm_multiplier = 1.0 / instance->numberOfProcessors(); - node_lambda_map.initialize(graph->num_vertices(), instance->numberOfProcessors()); - } - - struct empty_struct {}; - - using pre_move_comm_data_t = empty_struct; - - inline empty_struct get_pre_move_comm_data(const kl_move &) { return empty_struct(); } - - cost_t compute_schedule_cost() { - cost_t work_costs = 0; - for (unsigned step = 0; step < active_schedule->num_steps(); step++) { - work_costs += active_schedule->get_step_max_work(step); - } - - cost_t comm_costs = 0; - for (const auto vertex : graph->vertices()) { - const unsigned vertex_proc = active_schedule->assigned_processor(vertex); - const cost_t v_comm_cost = graph->vertex_comm_weight(vertex); - max_comm_weight = std::max(max_comm_weight, v_comm_cost); - - node_lambda_map.reset_node(vertex); - - for (const auto &target : instance->getComputationalDag().children(vertex)) { - const unsigned target_proc = active_schedule->assigned_processor(target); - - if (node_lambda_map.increase_proc_count(vertex, target_proc)) { - comm_costs - += v_comm_cost - * instance->communicationCosts(vertex_proc, target_proc); // is 0 if target_proc == vertex_proc - } - } - } - - return work_costs + comm_costs * comm_multiplier - + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); - } - - cost_t compute_schedule_cost_test() { - cost_t work_costs = 0; - for (unsigned step = 0; step < active_schedule->num_steps(); step++) { - work_costs += active_schedule->get_step_max_work(step); - } - - cost_t comm_costs = 0; - for (const auto vertex : graph->vertices()) { - const unsigned vertex_proc = active_schedule->assigned_processor(vertex); - const cost_t v_comm_cost = graph->vertex_comm_weight(vertex); - for (const auto lambdaproc_mult_pair : node_lambda_map.iterate_proc_entries(vertex)) { - const auto &lambda_proc = lambdaproc_mult_pair.first; - comm_costs += v_comm_cost * instance->communicationCosts(vertex_proc, lambda_proc); - } - } - - return work_costs + comm_costs * comm_multiplier - + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); - } - - inline void update_datastructure_after_move(const kl_move &move, const unsigned start_step, const unsigned end_step) { - if (move.to_proc != move.from_proc) { - for (const auto &source : instance->getComputationalDag().parents(move.node)) { - const unsigned source_step = active_schedule->assigned_superstep(source); - if (source_step < start_step || source_step > end_step) { - continue; - } - update_source_after_move(move, source); - } - } - } - - inline void update_source_after_move(const kl_move &move, VertexType source) { - node_lambda_map.decrease_proc_count(source, move.from_proc); - node_lambda_map.increase_proc_count(source, move.to_proc); - } - - template - void update_node_comm_affinity(const kl_move &move, - thread_data_t &thread_data, - const cost_t &penalty, - const cost_t &reward, - std::map &max_gain_recompute, - std::vector &new_nodes) { - const unsigned start_step = thread_data.start_step; - const unsigned end_step = thread_data.end_step; - - for (const auto &target : instance->getComputationalDag().children(move.node)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - if (target_step < start_step || target_step > end_step) { - continue; - } - - if (thread_data.lock_manager.is_locked(target)) { - continue; - } - - if (not thread_data.affinity_table.is_selected(target)) { - new_nodes.push_back(target); - continue; - } - - if (max_gain_recompute.find(target) != max_gain_recompute.end()) { - max_gain_recompute[target].full_update = true; - } else { - max_gain_recompute[target] = kl_gain_update_info(target, true); - } - - const unsigned target_proc = active_schedule->assigned_processor(target); - const unsigned target_start_idx = start_idx(target_step, start_step); - auto &affinity_table = thread_data.affinity_table.at(target); - - if (move.from_step < target_step + (move.from_proc == target_proc)) { - const unsigned diff = target_step - move.from_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - unsigned idx = target_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table[p][idx] -= penalty; - } - } - - if (idx - 1 < bound && is_compatible(target, move.from_proc)) { - affinity_table[move.from_proc][idx - 1] += penalty; - } - - } else { - const unsigned diff = move.from_step - target_step; - const unsigned window_bound = end_idx(target_step, end_step); - unsigned idx = std::min(window_size + diff, window_bound); - - if (idx < window_bound && is_compatible(target, move.from_proc)) { - affinity_table[move.from_proc][idx] += reward; - } - - idx++; - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table[p][idx] += reward; - } - } - } - - if (move.to_step < target_step + (move.to_proc == target_proc)) { - unsigned idx = target_start_idx; - const unsigned diff = target_step - move.to_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table[p][idx] += penalty; - } - } - - if (idx - 1 < bound && is_compatible(target, move.to_proc)) { - affinity_table[move.to_proc][idx - 1] -= penalty; - } - - } else { - const unsigned diff = move.to_step - target_step; - const unsigned window_bound = end_idx(target_step, end_step); - unsigned idx = std::min(window_size + diff, window_bound); - - if (idx < window_bound && is_compatible(target, move.to_proc)) { - affinity_table[move.to_proc][idx] -= reward; - } - - idx++; - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table[p][idx] -= reward; - } - } - } - - if (move.to_proc != move.from_proc) { - const cost_t comm_gain = graph->vertex_comm_weight(move.node) * comm_multiplier; - - const unsigned window_bound = end_idx(target_step, end_step); - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - if (p == target_proc) { - continue; - } - if (node_lambda_map.get_proc_entry(move.node, target_proc) == 1) { - for (unsigned idx = target_start_idx; idx < window_bound; idx++) { - const cost_t x = instance->communicationCosts(move.from_proc, target_proc) * comm_gain; - const cost_t y = instance->communicationCosts(move.to_proc, target_proc) * comm_gain; - affinity_table[p][idx] += x - y; - } - } - - if (node_lambda_map.has_no_proc_entry(move.node, p)) { - for (unsigned idx = target_start_idx; idx < window_bound; idx++) { - const cost_t x = instance->communicationCosts(move.from_proc, p) * comm_gain; - const cost_t y = instance->communicationCosts(move.to_proc, p) * comm_gain; - affinity_table[p][idx] -= x - y; - } - } - } - } - } - - for (const auto &source : instance->getComputationalDag().parents(move.node)) { - if (move.to_proc != move.from_proc) { - const unsigned source_proc = active_schedule->assigned_processor(source); - if (node_lambda_map.has_no_proc_entry(source, move.from_proc)) { - const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; - - for (const auto &target : instance->getComputationalDag().children(source)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - if ((target_step < start_step || target_step > end_step) || (target == move.node) - || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) { - continue; - } - - if (source_proc != move.from_proc && is_compatible(target, move.from_proc)) { - if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update - max_gain_recompute[target].full_update = true; - } else { - max_gain_recompute[target] = kl_gain_update_info(target, true); - } - - auto &affinity_table_target_from_proc = thread_data.affinity_table.at(target)[move.from_proc]; - const unsigned target_window_bound = end_idx(target_step, end_step); - const cost_t comm_aff = instance->communicationCosts(source_proc, move.from_proc) * comm_gain; - for (unsigned idx = start_idx(target_step, start_step); idx < target_window_bound; idx++) { - affinity_table_target_from_proc[idx] += comm_aff; - } - } - } - } else if (node_lambda_map.get_proc_entry(source, move.from_proc) == 1) { - const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; - - for (const auto &target : instance->getComputationalDag().children(source)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - if ((target_step < start_step || target_step > end_step) || (target == move.node) - || thread_data.lock_manager.is_locked(target) || (not thread_data.affinity_table.is_selected(target))) { - continue; - } - - const unsigned target_proc = active_schedule->assigned_processor(target); - if (target_proc == move.from_proc) { - if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update - max_gain_recompute[target].full_update = true; - } else { - max_gain_recompute[target] = kl_gain_update_info(target, true); - } - - const unsigned target_start_idx = start_idx(target_step, start_step); - const unsigned target_window_bound = end_idx(target_step, end_step); - auto &affinity_table_target = thread_data.affinity_table.at(target); - const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) * comm_gain; - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - if (p == target_proc) { - continue; - } - - for (unsigned idx = target_start_idx; idx < target_window_bound; idx++) { - affinity_table_target[p][idx] -= comm_aff; - } - } - break; // since node_lambda_map[source][move.from_proc] == 1 - } - } - } - - if (node_lambda_map.get_proc_entry(source, move.to_proc) == 1) { - const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; - - for (const auto &target : instance->getComputationalDag().children(source)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - if ((target_step < start_step || target_step > end_step) || (target == move.node) - || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) { - continue; - } - - if (source_proc != move.to_proc && is_compatible(target, move.to_proc)) { - if (max_gain_recompute.find(target) != max_gain_recompute.end()) { - max_gain_recompute[target].full_update = true; - } else { - max_gain_recompute[target] = kl_gain_update_info(target, true); - } - - const unsigned target_window_bound = end_idx(target_step, end_step); - auto &affinity_table_target_to_proc = thread_data.affinity_table.at(target)[move.to_proc]; - const cost_t comm_aff = instance->communicationCosts(source_proc, move.to_proc) * comm_gain; - for (unsigned idx = start_idx(target_step, start_step); idx < target_window_bound; idx++) { - affinity_table_target_to_proc[idx] -= comm_aff; - } - } - } - } else if (node_lambda_map.get_proc_entry(source, move.to_proc) == 2) { - for (const auto &target : instance->getComputationalDag().children(source)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - if ((target_step < start_step || target_step > end_step) || (target == move.node) - || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) { - continue; - } - - const unsigned target_proc = active_schedule->assigned_processor(target); - if (target_proc == move.to_proc) { - if (source_proc != target_proc) { - if (max_gain_recompute.find(target) != max_gain_recompute.end()) { - max_gain_recompute[target].full_update = true; - } else { - max_gain_recompute[target] = kl_gain_update_info(target, true); - } - - const unsigned target_start_idx = start_idx(target_step, start_step); - const unsigned target_window_bound = end_idx(target_step, end_step); - auto &affinity_table_target = thread_data.affinity_table.at(target); - const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) - * graph->vertex_comm_weight(source) * comm_multiplier; - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - if (p == target_proc) { - continue; - } - - for (unsigned idx = target_start_idx; idx < target_window_bound; idx++) { - affinity_table_target[p][idx] += comm_aff; - } - } - } - break; - } - } - } - } - - const unsigned source_step = active_schedule->assigned_superstep(source); - if (source_step < start_step || source_step > end_step) { - continue; - } - - if (thread_data.lock_manager.is_locked(source)) { - continue; - } - - if (not thread_data.affinity_table.is_selected(source)) { - new_nodes.push_back(source); - continue; - } - - if (max_gain_recompute.find(source) != max_gain_recompute.end()) { - max_gain_recompute[source].full_update = true; - } else { - max_gain_recompute[source] = kl_gain_update_info(source, true); - } - - const unsigned source_proc = active_schedule->assigned_processor(source); - const unsigned source_start_idx = start_idx(source_step, start_step); - const unsigned window_bound = end_idx(source_step, end_step); - auto &affinity_table_source = thread_data.affinity_table.at(source); - - if (move.from_step < source_step + (move.from_proc != source_proc)) { - const unsigned diff = source_step - move.from_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = source_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] += reward; - } - } - - if (window_size >= diff && is_compatible(source, move.from_proc)) { - affinity_table_source[move.from_proc][idx] += reward; - } - - } else { - const unsigned diff = move.from_step - source_step; - unsigned idx = window_size + diff; - - if (idx < window_bound && is_compatible(source, move.from_proc)) { - affinity_table_source[move.from_proc][idx] += penalty; - } - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] -= penalty; - } - } - } - - if (move.to_step < source_step + (move.to_proc != source_proc)) { - const unsigned diff = source_step - move.to_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = source_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] -= reward; - } - } - - if (window_size >= diff && is_compatible(source, move.to_proc)) { - affinity_table_source[move.to_proc][idx] -= reward; - } - - } else { - const unsigned diff = move.to_step - source_step; - unsigned idx = window_size + diff; - - if (idx < window_bound && is_compatible(source, move.to_proc)) { - affinity_table_source[move.to_proc][idx] -= penalty; - } - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] += penalty; - } - } - } - - if (move.to_proc != move.from_proc) { - if (node_lambda_map.has_no_proc_entry(source, move.from_proc)) { - const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; - - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - if (p == source_proc) { - continue; - } - - const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.from_proc), - instance->communicationCosts(source_proc, move.from_proc), - comm_gain); - for (unsigned idx = source_start_idx; idx < window_bound; idx++) { - affinity_table_source[p][idx] -= comm_cost; - } - } - } - - if (node_lambda_map.get_proc_entry(source, move.to_proc) == 1) { - const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; - - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - if (p == source_proc) { - continue; - } - - const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.to_proc), - instance->communicationCosts(source_proc, move.to_proc), - comm_gain); - for (unsigned idx = source_start_idx; idx < window_bound; idx++) { - affinity_table_source[p][idx] += comm_cost; - } - } - } - } - } - } - - inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { - return node_step < window_size + start_step ? window_size - (node_step - start_step) : 0; - } - - inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { - return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step); - } - - inline cost_t change_comm_cost(const v_commw_t &p_target_comm_cost, - const v_commw_t &node_target_comm_cost, - const cost_t &comm_gain) { - return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain - : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; - } - - template - void compute_comm_affinity(VertexType node, - affinity_table_t &affinity_table_node, - const cost_t &penalty, - const cost_t &reward, - const unsigned start_step, - const unsigned end_step) { - const unsigned node_step = active_schedule->assigned_superstep(node); - const unsigned node_proc = active_schedule->assigned_processor(node); - const unsigned window_bound = end_idx(node_step, end_step); - const unsigned node_start_idx = start_idx(node_step, start_step); - - for (const auto &target : instance->getComputationalDag().children(node)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - const unsigned target_proc = active_schedule->assigned_processor(target); - - if (target_step < node_step + (target_proc != node_proc)) { - const unsigned diff = node_step - target_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = node_start_idx; - - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] -= reward; - } - } - - if (window_size >= diff && is_compatible(node, target_proc)) { - affinity_table_node[target_proc][idx] -= reward; - } - - } else { - const unsigned diff = target_step - node_step; - unsigned idx = window_size + diff; - - if (idx < window_bound && is_compatible(node, target_proc)) { - affinity_table_node[target_proc][idx] -= penalty; - } - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] += penalty; - } - } - } - } // traget - - const cost_t comm_gain = graph->vertex_comm_weight(node) * comm_multiplier; - - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - if (p == node_proc) { - continue; - } - - for (const auto lambda_pair : node_lambda_map.iterate_proc_entries(node)) { - const auto &lambda_proc = lambda_pair.first; - const cost_t comm_cost = change_comm_cost( - instance->communicationCosts(p, lambda_proc), instance->communicationCosts(node_proc, lambda_proc), comm_gain); - for (unsigned idx = node_start_idx; idx < window_bound; idx++) { - affinity_table_node[p][idx] += comm_cost; - } - } - } - - for (const auto &source : instance->getComputationalDag().parents(node)) { - const unsigned source_step = active_schedule->assigned_superstep(source); - const unsigned source_proc = active_schedule->assigned_processor(source); - - if (source_step < node_step + (source_proc == node_proc)) { - const unsigned diff = node_step - source_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - unsigned idx = node_start_idx; - - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] += penalty; - } - } - - if (idx - 1 < bound && is_compatible(node, source_proc)) { - affinity_table_node[source_proc][idx - 1] -= penalty; - } - - } else { - const unsigned diff = source_step - node_step; - unsigned idx = std::min(window_size + diff, window_bound); - - if (idx < window_bound && is_compatible(node, source_proc)) { - affinity_table_node[source_proc][idx] -= reward; - } - - idx++; - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] -= reward; - } - } - } - - const cost_t source_comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - if (p == node_proc) { - continue; - } - - if (source_proc != node_proc && node_lambda_map.get_proc_entry(source, node_proc) == 1) { - for (unsigned idx = node_start_idx; idx < window_bound; idx++) { - affinity_table_node[p][idx] -= instance->communicationCosts(source_proc, node_proc) * source_comm_gain; - } - } - - if (source_proc != p && node_lambda_map.has_no_proc_entry(source, p)) { - for (unsigned idx = node_start_idx; idx < window_bound; idx++) { - affinity_table_node[p][idx] += instance->communicationCosts(source_proc, p) * source_comm_gain; - } - } - } - } // source - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp deleted file mode 100644 index a28e4640..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp +++ /dev/null @@ -1,451 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include "../kl_active_schedule.hpp" -#include "../kl_improver.hpp" - -namespace osp { - -template -struct kl_total_comm_cost_function { - using VertexType = vertex_idx_t; - using kl_move = kl_move_struct; - using kl_gain_update_info = kl_update_info; - - constexpr static bool is_max_comm_cost_function = false; - - constexpr static unsigned window_range = 2 * window_size + 1; - constexpr static bool use_node_communication_costs = use_node_communication_costs_arg || not has_edge_weights_v; - - kl_active_schedule *active_schedule; - - CompatibleProcessorRange *proc_range; - - const Graph_t *graph; - const BspInstance *instance; - - cost_t comm_multiplier = 1; - cost_t max_comm_weight = 0; - - inline cost_t get_comm_multiplier() { return comm_multiplier; } - - inline cost_t get_max_comm_weight() { return max_comm_weight; } - - inline cost_t get_max_comm_weight_multiplied() { return max_comm_weight * comm_multiplier; } - - const std::string name() const { return "toal_comm_cost"; } - - inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); } - - void initialize(kl_active_schedule &sched, CompatibleProcessorRange &p_range) { - active_schedule = &sched; - proc_range = &p_range; - instance = &sched.getInstance(); - graph = &instance->getComputationalDag(); - comm_multiplier = 1.0 / instance->numberOfProcessors(); - } - - struct empty_struct {}; - - using pre_move_comm_data_t = empty_struct; - - inline empty_struct get_pre_move_comm_data(const kl_move &) { return empty_struct(); } - - cost_t compute_schedule_cost_test() { return compute_schedule_cost(); } - - void update_datastructure_after_move(const kl_move &, const unsigned, const unsigned) {} - - cost_t compute_schedule_cost() { - cost_t work_costs = 0; - for (unsigned step = 0; step < active_schedule->num_steps(); step++) { - work_costs += active_schedule->get_step_max_work(step); - } - - cost_t comm_costs = 0; - for (const auto &edge : edges(*graph)) { - const auto &source_v = source(edge, *graph); - const auto &target_v = target(edge, *graph); - - const unsigned &source_proc = active_schedule->assigned_processor(source_v); - const unsigned &target_proc = active_schedule->assigned_processor(target_v); - - if (source_proc != target_proc) { - if constexpr (use_node_communication_costs) { - const cost_t source_comm_cost = graph->vertex_comm_weight(source_v); - max_comm_weight = std::max(max_comm_weight, source_comm_cost); - comm_costs += source_comm_cost * instance->communicationCosts(source_proc, target_proc); - } else { - const cost_t source_comm_cost = graph->edge_comm_weight(edge); - max_comm_weight = std::max(max_comm_weight, source_comm_cost); - comm_costs += source_comm_cost * instance->communicationCosts(source_proc, target_proc); - } - } - } - - return work_costs + comm_costs * comm_multiplier - + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); - } - - template - void update_node_comm_affinity(const kl_move &move, - thread_data_t &thread_data, - const cost_t &penalty, - const cost_t &reward, - std::map &max_gain_recompute, - std::vector &new_nodes) { - const unsigned &start_step = thread_data.start_step; - const unsigned &end_step = thread_data.end_step; - - for (const auto &target : instance->getComputationalDag().children(move.node)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - if (target_step < start_step || target_step > end_step) { - continue; - } - - if (thread_data.lock_manager.is_locked(target)) { - continue; - } - - if (not thread_data.affinity_table.is_selected(target)) { - new_nodes.push_back(target); - continue; - } - - if (max_gain_recompute.find(target) != max_gain_recompute.end()) { - max_gain_recompute[target].full_update = true; - } else { - max_gain_recompute[target] = kl_gain_update_info(target, true); - } - - const unsigned target_proc = active_schedule->assigned_processor(target); - const unsigned target_start_idx = start_idx(target_step, start_step); - auto &affinity_table_target = thread_data.affinity_table.at(target); - - if (move.from_step < target_step + (move.from_proc == target_proc)) { - const unsigned diff = target_step - move.from_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - unsigned idx = target_start_idx; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table_target[p][idx] -= penalty; - } - } - - if (idx - 1 < bound && is_compatible(target, move.from_proc)) { - affinity_table_target[move.from_proc][idx - 1] += penalty; - } - - } else { - const unsigned diff = move.from_step - target_step; - const unsigned window_bound = end_idx(target_step, end_step); - unsigned idx = std::min(window_size + diff, window_bound); - - if (idx < window_bound && is_compatible(target, move.from_proc)) { - affinity_table_target[move.from_proc][idx] += reward; - } - - idx++; - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table_target[p][idx] += reward; - } - } - } - - if (move.to_step < target_step + (move.to_proc == target_proc)) { - unsigned idx = target_start_idx; - const unsigned diff = target_step - move.to_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table_target[p][idx] += penalty; - } - } - - if (idx - 1 < bound && is_compatible(target, move.to_proc)) { - affinity_table_target[move.to_proc][idx - 1] -= penalty; - } - - } else { - const unsigned diff = move.to_step - target_step; - const unsigned window_bound = end_idx(target_step, end_step); - unsigned idx = std::min(window_size + diff, window_bound); - - if (idx < window_bound && is_compatible(target, move.to_proc)) { - affinity_table_target[move.to_proc][idx] -= reward; - } - - idx++; - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - affinity_table_target[p][idx] -= reward; - } - } - } - - if (move.to_proc != move.from_proc) { - const auto from_proc_target_comm_cost = instance->communicationCosts(move.from_proc, target_proc); - const auto to_proc_target_comm_cost = instance->communicationCosts(move.to_proc, target_proc); - - const cost_t comm_gain = graph->vertex_comm_weight(move.node) * comm_multiplier; - - unsigned idx = target_start_idx; - const unsigned window_bound = end_idx(target_step, end_step); - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - const auto x = change_comm_cost( - instance->communicationCosts(p, move.to_proc), to_proc_target_comm_cost, comm_gain); - const auto y = change_comm_cost( - instance->communicationCosts(p, move.from_proc), from_proc_target_comm_cost, comm_gain); - affinity_table_target[p][idx] += x - y; - } - } - } - } - - for (const auto &source : instance->getComputationalDag().parents(move.node)) { - const unsigned source_step = active_schedule->assigned_superstep(source); - if (source_step < start_step || source_step > end_step) { - continue; - } - - if (thread_data.lock_manager.is_locked(source)) { - continue; - } - - if (not thread_data.affinity_table.is_selected(source)) { - new_nodes.push_back(source); - continue; - } - - if (max_gain_recompute.find(source) != max_gain_recompute.end()) { - max_gain_recompute[source].full_update = true; - } else { - max_gain_recompute[source] = kl_gain_update_info(source, true); - } - - const unsigned source_proc = active_schedule->assigned_processor(source); - const unsigned window_bound = end_idx(source_step, end_step); - auto &affinity_table_source = thread_data.affinity_table.at(source); - - if (move.from_step < source_step + (move.from_proc != source_proc)) { - const unsigned diff = source_step - move.from_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = start_idx(source_step, start_step); - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] += reward; - } - } - - if (window_size >= diff && is_compatible(source, move.from_proc)) { - affinity_table_source[move.from_proc][idx] += reward; - } - - } else { - const unsigned diff = move.from_step - source_step; - unsigned idx = window_size + diff; - - if (idx < window_bound && is_compatible(source, move.from_proc)) { - affinity_table_source[move.from_proc][idx] += penalty; - } - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] -= penalty; - } - } - } - - if (move.to_step < source_step + (move.to_proc != source_proc)) { - const unsigned diff = source_step - move.to_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = start_idx(source_step, start_step); - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] -= reward; - } - } - - if (window_size >= diff && is_compatible(source, move.to_proc)) { - affinity_table_source[move.to_proc][idx] -= reward; - } - - } else { - const unsigned diff = move.to_step - source_step; - unsigned idx = window_size + diff; - - if (idx < window_bound && is_compatible(source, move.to_proc)) { - affinity_table_source[move.to_proc][idx] -= penalty; - } - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - affinity_table_source[p][idx] += penalty; - } - } - } - - if (move.to_proc != move.from_proc) { - const auto from_proc_source_comm_cost = instance->communicationCosts(source_proc, move.from_proc); - const auto to_proc_source_comm_cost = instance->communicationCosts(source_proc, move.to_proc); - - const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; - - unsigned idx = start_idx(source_step, start_step); - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - const cost_t x = change_comm_cost( - instance->communicationCosts(p, move.to_proc), to_proc_source_comm_cost, comm_gain); - const cost_t y = change_comm_cost( - instance->communicationCosts(p, move.from_proc), from_proc_source_comm_cost, comm_gain); - affinity_table_source[p][idx] += x - y; - } - } - } - } - } - - inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { - return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0; - } - - inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { - return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step); - } - - inline cost_t change_comm_cost(const v_commw_t &p_target_comm_cost, - const v_commw_t &node_target_comm_cost, - const cost_t &comm_gain) { - return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain - : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; - } - - template - void compute_comm_affinity(VertexType node, - affinity_table_t &affinity_table_node, - const cost_t &penalty, - const cost_t &reward, - const unsigned start_step, - const unsigned end_step) { - const unsigned node_step = active_schedule->assigned_superstep(node); - const unsigned node_proc = active_schedule->assigned_processor(node); - const unsigned window_bound = end_idx(node_step, end_step); - const unsigned node_start_idx = start_idx(node_step, start_step); - - for (const auto &target : instance->getComputationalDag().children(node)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - const unsigned target_proc = active_schedule->assigned_processor(target); - - if (target_step < node_step + (target_proc != node_proc)) { - const unsigned diff = node_step - target_step; - const unsigned bound = window_size > diff ? window_size - diff : 0; - unsigned idx = node_start_idx; - - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] -= reward; - } - } - - if (window_size >= diff && is_compatible(node, target_proc)) { - affinity_table_node[target_proc][idx] -= reward; - } - - } else { - const unsigned diff = target_step - node_step; - unsigned idx = window_size + diff; - - if (idx < window_bound && is_compatible(node, target_proc)) { - affinity_table_node[target_proc][idx] -= penalty; - } - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] += penalty; - } - } - } - - const cost_t comm_gain = graph->vertex_comm_weight(node) * comm_multiplier; - const auto node_target_comm_cost = instance->communicationCosts(node_proc, target_proc); - - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - const cost_t comm_cost - = change_comm_cost(instance->communicationCosts(p, target_proc), node_target_comm_cost, comm_gain); - for (unsigned idx = node_start_idx; idx < window_bound; idx++) { - affinity_table_node[p][idx] += comm_cost; - } - } - - } // traget - - for (const auto &source : instance->getComputationalDag().parents(node)) { - const unsigned source_step = active_schedule->assigned_superstep(source); - const unsigned source_proc = active_schedule->assigned_processor(source); - - if (source_step < node_step + (source_proc == node_proc)) { - const unsigned diff = node_step - source_step; - const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; - unsigned idx = node_start_idx; - - for (; idx < bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] += penalty; - } - } - - if (idx - 1 < bound && is_compatible(node, source_proc)) { - affinity_table_node[source_proc][idx - 1] -= penalty; - } - - } else { - const unsigned diff = source_step - node_step; - unsigned idx = std::min(window_size + diff, window_bound); - - if (idx < window_bound && is_compatible(node, source_proc)) { - affinity_table_node[source_proc][idx] -= reward; - } - - idx++; - - for (; idx < window_bound; idx++) { - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - affinity_table_node[p][idx] -= reward; - } - } - } - - const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; - const auto source_node_comm_cost = instance->communicationCosts(source_proc, node_proc); - - for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - const cost_t comm_cost - = change_comm_cost(instance->communicationCosts(p, source_proc), source_node_comm_cost, comm_gain); - for (unsigned idx = node_start_idx; idx < window_bound; idx++) { - affinity_table_node[p][idx] += comm_cost; - } - } - } // source - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp deleted file mode 100644 index 8c76efe4..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp +++ /dev/null @@ -1,384 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include - -#include "comm_cost_policies.hpp" -#include "generic_lambda_container.hpp" -#include "lambda_container.hpp" -#include "osp/bsp/model/BspInstance.hpp" - -namespace osp { - -template -struct pre_move_comm_data { - struct step_info { - comm_weight_t max_comm; - comm_weight_t second_max_comm; - unsigned max_comm_count; - }; - - std::unordered_map step_data; - - pre_move_comm_data() = default; - - void add_step(unsigned step, comm_weight_t max, comm_weight_t second, unsigned count) { - step_data[step] = {max, second, count}; - } - - bool get_step(unsigned step, step_info &info) const { - auto it = step_data.find(step); - if (it != step_data.end()) { - info = it->second; - return true; - } - return false; - } -}; - -template -struct max_comm_datastructure { - using comm_weight_t = v_commw_t; - using VertexType = vertex_idx_t; - using kl_move = kl_move_struct; - - const BspInstance *instance; - const kl_active_schedule_t *active_schedule; - - std::vector> step_proc_send_; - std::vector> step_proc_receive_; - - // Caches for fast cost calculation (Global Max/Second Max per step) - std::vector step_max_comm_cache; - std::vector step_second_max_comm_cache; - std::vector step_max_comm_count_cache; - - comm_weight_t max_comm_weight = 0; - - // Select the appropriate container type based on the policy's ValueType - using ContainerType = - typename std::conditional::value, - lambda_vector_container, - generic_lambda_vector_container>::type; - - ContainerType node_lambda_map; - - // Optimization: Scratchpad for update_datastructure_after_move to avoid allocations - std::vector affected_steps_list; - std::vector step_is_affected; - - inline comm_weight_t step_proc_send(unsigned step, unsigned proc) const { return step_proc_send_[step][proc]; } - - inline comm_weight_t &step_proc_send(unsigned step, unsigned proc) { return step_proc_send_[step][proc]; } - - inline comm_weight_t step_proc_receive(unsigned step, unsigned proc) const { return step_proc_receive_[step][proc]; } - - inline comm_weight_t &step_proc_receive(unsigned step, unsigned proc) { return step_proc_receive_[step][proc]; } - - inline comm_weight_t step_max_comm(unsigned step) const { return step_max_comm_cache[step]; } - - inline comm_weight_t step_second_max_comm(unsigned step) const { return step_second_max_comm_cache[step]; } - - inline unsigned step_max_comm_count(unsigned step) const { return step_max_comm_count_cache[step]; } - - inline void initialize(kl_active_schedule_t &kl_sched) { - active_schedule = &kl_sched; - instance = &active_schedule->getInstance(); - const unsigned num_steps = active_schedule->num_steps(); - const unsigned num_procs = instance->numberOfProcessors(); - max_comm_weight = 0; - - step_proc_send_.assign(num_steps, std::vector(num_procs, 0)); - step_proc_receive_.assign(num_steps, std::vector(num_procs, 0)); - - step_max_comm_cache.assign(num_steps, 0); - step_second_max_comm_cache.assign(num_steps, 0); - step_max_comm_count_cache.assign(num_steps, 0); - - node_lambda_map.initialize(instance->getComputationalDag().num_vertices(), num_procs); - - // Initialize scratchpad - step_is_affected.assign(num_steps, false); - affected_steps_list.reserve(num_steps); - } - - inline void clear() { - step_proc_send_.clear(); - step_proc_receive_.clear(); - step_max_comm_cache.clear(); - step_second_max_comm_cache.clear(); - step_max_comm_count_cache.clear(); - node_lambda_map.clear(); - affected_steps_list.clear(); - step_is_affected.clear(); - } - - inline void arrange_superstep_comm_data(const unsigned step) { - comm_weight_t max_send = 0; - comm_weight_t second_max_send = 0; - unsigned max_send_count = 0; - - const auto &sends = step_proc_send_[step]; - for (const auto val : sends) { - if (val > max_send) { - second_max_send = max_send; - max_send = val; - max_send_count = 1; - } else if (val == max_send) { - max_send_count++; - } else if (val > second_max_send) { - second_max_send = val; - } - } - - comm_weight_t max_receive = 0; - comm_weight_t second_max_receive = 0; - unsigned max_receive_count = 0; - - const auto &receives = step_proc_receive_[step]; - for (const auto val : receives) { - if (val > max_receive) { - second_max_receive = max_receive; - max_receive = val; - max_receive_count = 1; - } else if (val == max_receive) { - max_receive_count++; - } else if (val > second_max_receive) { - second_max_receive = val; - } - } - - const comm_weight_t global_max = std::max(max_send, max_receive); - step_max_comm_cache[step] = global_max; - - unsigned global_count = 0; - if (max_send == global_max) { - global_count += max_send_count; - } - if (max_receive == global_max) { - global_count += max_receive_count; - } - step_max_comm_count_cache[step] = global_count; - - comm_weight_t cand_send = (max_send == global_max) ? second_max_send : max_send; - comm_weight_t cand_recv = (max_receive == global_max) ? second_max_receive : max_receive; - - step_second_max_comm_cache[step] = std::max(cand_send, cand_recv); - } - - void recompute_max_send_receive(unsigned step) { arrange_superstep_comm_data(step); } - - inline pre_move_comm_data get_pre_move_comm_data(const kl_move &move) { - pre_move_comm_data data; - std::unordered_set affected_steps; - - affected_steps.insert(move.from_step); - affected_steps.insert(move.to_step); - - const auto &graph = instance->getComputationalDag(); - - for (const auto &parent : graph.parents(move.node)) { - affected_steps.insert(active_schedule->assigned_superstep(parent)); - } - - for (unsigned step : affected_steps) { - data.add_step(step, step_max_comm(step), step_second_max_comm(step), step_max_comm_count(step)); - } - - return data; - } - - void update_datastructure_after_move(const kl_move &move, unsigned, unsigned) { - const auto &graph = instance->getComputationalDag(); - - // Prepare Scratchpad (Avoids Allocations) --- - for (unsigned step : affected_steps_list) { - if (step < step_is_affected.size()) { - step_is_affected[step] = false; - } - } - affected_steps_list.clear(); - - auto mark_step = [&](unsigned step) { - if (step < step_is_affected.size() && !step_is_affected[step]) { - step_is_affected[step] = true; - affected_steps_list.push_back(step); - } - }; - - const VertexType node = move.node; - const unsigned from_step = move.from_step; - const unsigned to_step = move.to_step; - const unsigned from_proc = move.from_proc; - const unsigned to_proc = move.to_proc; - const comm_weight_t comm_w_node = graph.vertex_comm_weight(node); - - // Handle Node Movement (Outgoing Edges: Node -> Children) - - if (from_step != to_step) { - // Case 1: Node changes Step - for (const auto [proc, val] : node_lambda_map.iterate_proc_entries(node)) { - // A. Remove Old (Sender: from_proc, Receiver: proc) - if (proc != from_proc) { - const comm_weight_t cost = comm_w_node * instance->sendCosts(from_proc, proc); - if (cost > 0) { - CommPolicy::unattribute_communication(*this, cost, from_step, from_proc, proc, 0, val); - } - } - - // B. Add New (Sender: to_proc, Receiver: proc) - if (proc != to_proc) { - const comm_weight_t cost = comm_w_node * instance->sendCosts(to_proc, proc); - if (cost > 0) { - CommPolicy::attribute_communication(*this, cost, to_step, to_proc, proc, 0, val); - } - } - } - mark_step(from_step); - mark_step(to_step); - - } else if (from_proc != to_proc) { - // Case 2: Node stays in same Step, but changes Processor - - for (const auto [proc, val] : node_lambda_map.iterate_proc_entries(node)) { - // Remove Old (Sender: from_proc, Receiver: proc) - if (proc != from_proc) { - const comm_weight_t cost = comm_w_node * instance->sendCosts(from_proc, proc); - if (cost > 0) { - CommPolicy::unattribute_communication(*this, cost, from_step, from_proc, proc, 0, val); - } - } - - // Add New (Sender: to_proc, Receiver: proc) - if (proc != to_proc) { - const comm_weight_t cost = comm_w_node * instance->sendCosts(to_proc, proc); - if (cost > 0) { - CommPolicy::attribute_communication(*this, cost, from_step, to_proc, proc, 0, val); - } - } - } - mark_step(from_step); - } - - // Update Parents' Outgoing Communication (Parents → Node) - - for (const auto &parent : graph.parents(node)) { - const unsigned parent_step = active_schedule->assigned_superstep(parent); - // Fast boundary check - if (parent_step >= step_proc_send_.size()) { - continue; - } - - const unsigned parent_proc = active_schedule->assigned_processor(parent); - const comm_weight_t comm_w_parent = graph.vertex_comm_weight(parent); - - auto &val = node_lambda_map.get_proc_entry(parent, from_proc); - const bool removed_from_proc = CommPolicy::remove_child(val, from_step); - - // 1. Handle Removal from from_proc - if (removed_from_proc) { - if (from_proc != parent_proc) { - const comm_weight_t cost = comm_w_parent * instance->sendCosts(parent_proc, from_proc); - if (cost > 0) { - CommPolicy::unattribute_communication(*this, cost, parent_step, parent_proc, from_proc, from_step, val); - } - } - } - - auto &val_to = node_lambda_map.get_proc_entry(parent, to_proc); - const bool added_to_proc = CommPolicy::add_child(val_to, to_step); - - // 2. Handle Addition to to_proc - if (added_to_proc) { - if (to_proc != parent_proc) { - const comm_weight_t cost = comm_w_parent * instance->sendCosts(parent_proc, to_proc); - if (cost > 0) { - CommPolicy::attribute_communication(*this, cost, parent_step, parent_proc, to_proc, to_step, val_to); - } - } - } - - mark_step(parent_step); - } - - // Re-arrange Affected Steps - for (unsigned step : affected_steps_list) { - arrange_superstep_comm_data(step); - } - } - - void swap_steps(const unsigned step1, const unsigned step2) { - std::swap(step_proc_send_[step1], step_proc_send_[step2]); - std::swap(step_proc_receive_[step1], step_proc_receive_[step2]); - std::swap(step_max_comm_cache[step1], step_max_comm_cache[step2]); - std::swap(step_second_max_comm_cache[step1], step_second_max_comm_cache[step2]); - std::swap(step_max_comm_count_cache[step1], step_max_comm_count_cache[step2]); - } - - void reset_superstep(unsigned step) { - std::fill(step_proc_send_[step].begin(), step_proc_send_[step].end(), 0); - std::fill(step_proc_receive_[step].begin(), step_proc_receive_[step].end(), 0); - arrange_superstep_comm_data(step); - } - - void compute_comm_datastructures(unsigned start_step, unsigned end_step) { - for (unsigned step = start_step; step <= end_step; step++) { - std::fill(step_proc_send_[step].begin(), step_proc_send_[step].end(), 0); - std::fill(step_proc_receive_[step].begin(), step_proc_receive_[step].end(), 0); - } - - const auto &vec_sched = active_schedule->getVectorSchedule(); - const auto &graph = instance->getComputationalDag(); - - for (const auto &u : graph.vertices()) { - node_lambda_map.reset_node(u); - const unsigned u_proc = vec_sched.assignedProcessor(u); - const unsigned u_step = vec_sched.assignedSuperstep(u); - const comm_weight_t comm_w = graph.vertex_comm_weight(u); - max_comm_weight = std::max(max_comm_weight, comm_w); - - for (const auto &v : graph.children(u)) { - const unsigned v_proc = vec_sched.assignedProcessor(v); - const unsigned v_step = vec_sched.assignedSuperstep(v); - - const comm_weight_t comm_w_send_cost = (u_proc != v_proc) ? comm_w * instance->sendCosts(u_proc, v_proc) : 0; - - auto &val = node_lambda_map.get_proc_entry(u, v_proc); - if (CommPolicy::add_child(val, v_step)) { - if (u_proc != v_proc && comm_w_send_cost > 0) { - CommPolicy::attribute_communication(*this, comm_w_send_cost, u_step, u_proc, v_proc, v_step, val); - } - } - } - } - - for (unsigned step = start_step; step <= end_step; step++) { - if (step >= step_proc_send_.size()) { - continue; - } - arrange_superstep_comm_data(step); - } - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp deleted file mode 100644 index 2e6f3af0..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp +++ /dev/null @@ -1,723 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/model/IBspSchedule.hpp" -#include "osp/bsp/model/util/SetSchedule.hpp" -#include "osp/bsp/model/util/VectorSchedule.hpp" -#include "osp/bsp/scheduler/ImprovementScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" -#include "osp/graph_algorithms/directed_graph_util.hpp" - -namespace osp { - -template -struct kl_move_struct { - vertex_idx_t node; - cost_t gain; - - unsigned from_proc; - unsigned from_step; - - unsigned to_proc; - unsigned to_step; - - kl_move_struct() : node(0), gain(0), from_proc(0), from_step(0), to_proc(0), to_step(0) {} - - kl_move_struct(vertex_idx_t _node, cost_t _gain, unsigned _from_proc, unsigned _from_step, unsigned _to_proc, unsigned _to_step) - : node(_node), gain(_gain), from_proc(_from_proc), from_step(_from_step), to_proc(_to_proc), to_step(_to_step) {} - - bool operator<(kl_move_struct const &rhs) const { - return (gain < rhs.gain) or (gain == rhs.gain and node > rhs.node); - } - - bool operator>(kl_move_struct const &rhs) const { - return (gain > rhs.gain) or (gain >= rhs.gain and node < rhs.node); - } - - kl_move_struct reverse_move() const { - return kl_move_struct(node, -gain, to_proc, to_step, from_proc, from_step); - } -}; - -template -struct pre_move_work_data { - work_weight_t from_step_max_work; - work_weight_t from_step_second_max_work; - unsigned from_step_max_work_processor_count; - - work_weight_t to_step_max_work; - work_weight_t to_step_second_max_work; - unsigned to_step_max_work_processor_count; - - pre_move_work_data() {} - - pre_move_work_data(work_weight_t from_step_max_work_, - work_weight_t from_step_second_max_work_, - unsigned from_step_max_work_processor_count_, - work_weight_t to_step_max_work_, - work_weight_t to_step_second_max_work_, - unsigned to_step_max_work_processor_count_) - : from_step_max_work(from_step_max_work_), - from_step_second_max_work(from_step_second_max_work_), - from_step_max_work_processor_count(from_step_max_work_processor_count_), - to_step_max_work(to_step_max_work_), - to_step_second_max_work(to_step_second_max_work_), - to_step_max_work_processor_count(to_step_max_work_processor_count_) {} -}; - -template -struct kl_active_schedule_work_datastructures { - using work_weight_t = v_workw_t; - - const BspInstance *instance; - const SetSchedule *set_schedule; - - struct weight_proc { - work_weight_t work; - unsigned proc; - - weight_proc() : work(0), proc(0) {} - - weight_proc(work_weight_t _work, unsigned _proc) : work(_work), proc(_proc) {} - - bool operator<(weight_proc const &rhs) const { return (work > rhs.work) or (work == rhs.work and proc < rhs.proc); } - }; - - std::vector> step_processor_work_; - std::vector> step_processor_position; - std::vector step_max_work_processor_count; - work_weight_t max_work_weight; - work_weight_t total_work_weight; - - inline work_weight_t step_max_work(unsigned step) const { return step_processor_work_[step][0].work; } - - inline work_weight_t step_second_max_work(unsigned step) const { - return step_processor_work_[step][step_max_work_processor_count[step]].work; - } - - inline work_weight_t step_proc_work(unsigned step, unsigned proc) const { - return step_processor_work_[step][step_processor_position[step][proc]].work; - } - - inline work_weight_t &step_proc_work(unsigned step, unsigned proc) { - return step_processor_work_[step][step_processor_position[step][proc]].work; - } - - template - inline pre_move_work_data get_pre_move_work_data(kl_move_struct move) { - return pre_move_work_data(step_max_work(move.from_step), - step_second_max_work(move.from_step), - step_max_work_processor_count[move.from_step], - step_max_work(move.to_step), - step_second_max_work(move.to_step), - step_max_work_processor_count[move.to_step]); - } - - inline void initialize(const SetSchedule &sched, const BspInstance &inst, unsigned num_steps) { - instance = &inst; - set_schedule = &sched; - max_work_weight = 0; - total_work_weight = 0; - step_processor_work_ - = std::vector>(num_steps, std::vector(instance->numberOfProcessors())); - step_processor_position - = std::vector>(num_steps, std::vector(instance->numberOfProcessors(), 0)); - step_max_work_processor_count = std::vector(num_steps, 0); - } - - inline void clear() { - step_processor_work_.clear(); - step_processor_position.clear(); - step_max_work_processor_count.clear(); - } - - inline void arrange_superstep_data(const unsigned step) { - std::sort(step_processor_work_[step].begin(), step_processor_work_[step].end()); - unsigned pos = 0; - const work_weight_t max_work_to = step_processor_work_[step][0].work; - - for (const auto &wp : step_processor_work_[step]) { - step_processor_position[step][wp.proc] = pos++; - - if (wp.work == max_work_to && pos < instance->numberOfProcessors()) { - step_max_work_processor_count[step] = pos; - } - } - } - - template - void apply_move(kl_move_struct move, work_weight_t work_weight) { - if (work_weight == 0) { - return; - } - - if (move.to_step != move.from_step) { - step_proc_work(move.to_step, move.to_proc) += work_weight; - step_proc_work(move.from_step, move.from_proc) -= work_weight; - - arrange_superstep_data(move.to_step); - arrange_superstep_data(move.from_step); - - // const work_weight_t prev_max_work_to = step_max_work(move.to_step); - // const work_weight_t new_weight_to = step_proc_work(move.to_step, move.to_proc) += work_weight; - - // if (prev_max_work_to < new_weight_to) { - // step_max_work_processor_count[move.to_step] = 1; - // } else if (prev_max_work_to == new_weight_to) { - // step_max_work_processor_count[move.to_step]++; - // } - - // unsigned to_proc_pos = step_processor_position[move.to_step][move.to_proc]; - - // while (to_proc_pos > 0 && step_processor_work_[move.to_step][to_proc_pos - 1].work < new_weight_to) { - // std::swap(step_processor_work_[move.to_step][to_proc_pos], step_processor_work_[move.to_step][to_proc_pos - - // 1]); std::swap(step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos].proc], - // step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos - 1].proc]); - // to_proc_pos--; - // } - - // const work_weight_t prev_max_work_from = step_max_work(move.from_step); - // const work_weight_t prev_weight_from = step_proc_work(move.from_step, move.from_proc); - // const work_weight_t new_weight_from = step_proc_work(move.from_step, move.from_proc) -= work_weight; - - // unsigned from_proc_pos = step_processor_position[move.from_step][move.from_proc]; - - // while (from_proc_pos < instance->numberOfProcessors() - 1 && step_processor_work_[move.from_step][from_proc_pos + - // 1].work > new_weight_from) { - // std::swap(step_processor_work_[move.from_step][from_proc_pos], - // step_processor_work_[move.from_step][from_proc_pos + 1]); - // std::swap(step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos].proc], - // step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos + 1].proc]); - // from_proc_pos++; - // } - - // if (prev_max_work_from == prev_weight_from) { - // step_max_work_processor_count[move.from_step]--; - // if (step_max_work_processor_count[move.from_step] == 0) { - // step_max_work_processor_count[move.from_step] = from_proc_pos; - // } - // } - - } else { - step_proc_work(move.to_step, move.to_proc) += work_weight; - step_proc_work(move.from_step, move.from_proc) -= work_weight; - arrange_superstep_data(move.to_step); - } - } - - void swap_steps(const unsigned step1, const unsigned step2) { - std::swap(step_processor_work_[step1], step_processor_work_[step2]); - std::swap(step_processor_position[step1], step_processor_position[step2]); - std::swap(step_max_work_processor_count[step1], step_max_work_processor_count[step2]); - } - - void override_next_superstep(unsigned step) { - const unsigned next_step = step + 1; - for (unsigned i = 0; i < instance->numberOfProcessors(); i++) { - step_processor_work_[next_step][i] = step_processor_work_[step][i]; - step_processor_position[next_step][i] = step_processor_position[step][i]; - } - step_max_work_processor_count[next_step] = step_max_work_processor_count[step]; - } - - void reset_superstep(unsigned step) { - for (unsigned i = 0; i < instance->numberOfProcessors(); i++) { - step_processor_work_[step][i] = {0, i}; - step_processor_position[step][i] = i; - } - step_max_work_processor_count[step] = instance->numberOfProcessors() - 1; - } - - void compute_work_datastructures(unsigned start_step, unsigned end_step) { - for (unsigned step = start_step; step <= end_step; step++) { - step_max_work_processor_count[step] = 0; - work_weight_t max_work = 0; - - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - step_processor_work_[step][proc].work = 0; - step_processor_work_[step][proc].proc = proc; - - for (const auto &node : set_schedule->step_processor_vertices[step][proc]) { - const work_weight_t vertex_work_weight = instance->getComputationalDag().vertex_work_weight(node); - total_work_weight += vertex_work_weight; - max_work_weight = std::max(vertex_work_weight, max_work_weight); - step_processor_work_[step][proc].work += vertex_work_weight; - } - - if (step_processor_work_[step][proc].work > max_work) { - max_work = step_processor_work_[step][proc].work; - step_max_work_processor_count[step] = 1; - } else if (step_processor_work_[step][proc].work == max_work - && step_max_work_processor_count[step] < (instance->numberOfProcessors() - 1)) { - step_max_work_processor_count[step]++; - } - } - - std::sort(step_processor_work_[step].begin(), step_processor_work_[step].end()); - unsigned pos = 0; - for (const auto &wp : step_processor_work_[step]) { - step_processor_position[step][wp.proc] = pos++; - } - } - } -}; - -template -struct thread_local_active_schedule_data { - using VertexType = vertex_idx_t; - using EdgeType = edge_desc_t; - - using kl_move = kl_move_struct; - - std::unordered_set current_violations; - std::vector applied_moves; - - cost_t cost = 0; - cost_t initial_cost = 0; - bool feasible = true; - - cost_t best_cost = 0; - unsigned best_schedule_idx = 0; - - std::unordered_map new_violations; - std::unordered_set resolved_violations; - - inline void initialize_cost(cost_t cost_) { - initial_cost = cost_; - cost = cost_; - best_cost = cost_; - feasible = true; - } - - inline void update_cost(cost_t change_in_cost) { - cost += change_in_cost; - - if (cost <= best_cost && feasible) { - best_cost = cost; - best_schedule_idx = static_cast(applied_moves.size()); - } - } -}; - -template -class kl_active_schedule { - private: - using VertexType = vertex_idx_t; - using EdgeType = edge_desc_t; - using kl_move = kl_move_struct; - using thread_data_t = thread_local_active_schedule_data; - - const BspInstance *instance; - - VectorSchedule vector_schedule; - SetSchedule set_schedule; - - cost_t cost = 0; - bool feasible = true; - - public: - virtual ~kl_active_schedule() = default; - - inline const BspInstance &getInstance() const { return *instance; } - - inline const VectorSchedule &getVectorSchedule() const { return vector_schedule; } - - inline VectorSchedule &getVectorSchedule() { return vector_schedule; } - - inline const SetSchedule &getSetSchedule() const { return set_schedule; } - - inline cost_t get_cost() { return cost; } - - inline bool is_feasible() { return feasible; } - - inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); } - - inline unsigned assigned_processor(VertexType node) const { return vector_schedule.assignedProcessor(node); } - - inline unsigned assigned_superstep(VertexType node) const { return vector_schedule.assignedSuperstep(node); } - - inline v_workw_t get_step_max_work(unsigned step) const { return work_datastructures.step_max_work(step); } - - inline v_workw_t get_step_second_max_work(unsigned step) const { - return work_datastructures.step_second_max_work(step); - } - - inline std::vector &get_step_max_work_processor_count() { - return work_datastructures.step_max_work_processor_count; - } - - inline v_workw_t get_step_processor_work(unsigned step, unsigned proc) const { - return work_datastructures.step_proc_work(step, proc); - } - - inline pre_move_work_data> get_pre_move_work_data(kl_move move) { - return work_datastructures.get_pre_move_work_data(move); - } - - inline v_workw_t get_max_work_weight() { return work_datastructures.max_work_weight; } - - inline v_workw_t get_total_work_weight() { return work_datastructures.total_work_weight; } - - inline void set_cost(cost_t cost_) { cost = cost_; } - - constexpr static bool use_memory_constraint = is_local_search_memory_constraint_v; - - MemoryConstraint_t memory_constraint; - - kl_active_schedule_work_datastructures work_datastructures; - - inline v_workw_t get_step_total_work(unsigned step) const { - v_workw_t total_work = 0; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - total_work += get_step_processor_work(step, proc); - } - return total_work; - } - - void apply_move(kl_move move, thread_data_t &thread_data) { - vector_schedule.setAssignedProcessor(move.node, move.to_proc); - vector_schedule.setAssignedSuperstep(move.node, move.to_step); - - set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node); - set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node); - - update_violations(move.node, thread_data); - thread_data.applied_moves.push_back(move); - - work_datastructures.apply_move(move, instance->getComputationalDag().vertex_work_weight(move.node)); - if constexpr (use_memory_constraint) { - memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step); - } - } - - template - void revert_to_best_schedule(unsigned start_move, - unsigned insert_step, - comm_datastructures_t &comm_datastructures, - thread_data_t &thread_data, - unsigned start_step, - unsigned &end_step) { - const unsigned bound = std::max(start_move, thread_data.best_schedule_idx); - revert_moves(bound, comm_datastructures, thread_data, start_step, end_step); - - if (start_move > thread_data.best_schedule_idx) { - swap_empty_step_bwd(++end_step, insert_step); - } - - revert_moves(thread_data.best_schedule_idx, comm_datastructures, thread_data, start_step, end_step); - -#ifdef KL_DEBUG - if (not thread_data.feasible) { - std::cout << "Reverted to best schedule with cost: " << thread_data.best_cost << " and " - << vector_schedule.number_of_supersteps << " supersteps" << std::endl; - } -#endif - - thread_data.applied_moves.clear(); - thread_data.best_schedule_idx = 0; - thread_data.current_violations.clear(); - thread_data.feasible = true; - thread_data.cost = thread_data.best_cost; - } - - template - void revert_schedule_to_bound(const size_t bound, - const cost_t new_cost, - const bool is_feasible, - comm_datastructures_t &comm_datastructures, - thread_data_t &thread_data, - unsigned start_step, - unsigned end_step) { - revert_moves(bound, comm_datastructures, thread_data, start_step, end_step); - - thread_data.current_violations.clear(); - thread_data.feasible = is_feasible; - thread_data.cost = new_cost; - } - - void compute_violations(thread_data_t &thread_data); - void compute_work_memory_datastructures(unsigned start_step, unsigned end_step); - void write_schedule(BspSchedule &schedule); - inline void initialize(const IBspSchedule &schedule); - inline void clear(); - void remove_empty_step(unsigned step); - void insert_empty_step(unsigned step); - void swap_empty_step_fwd(const unsigned step, const unsigned to_step); - void swap_empty_step_bwd(const unsigned to_step, const unsigned empty_step); - void swap_steps(const unsigned step1, const unsigned step2); - - private: - template - void revert_moves(const size_t bound, - comm_datastructures_t &comm_datastructures, - thread_data_t &thread_data, - unsigned start_step, - unsigned end_step) { - while (thread_data.applied_moves.size() > bound) { - const auto move = thread_data.applied_moves.back().reverse_move(); - thread_data.applied_moves.pop_back(); - - vector_schedule.setAssignedProcessor(move.node, move.to_proc); - vector_schedule.setAssignedSuperstep(move.node, move.to_step); - - set_schedule.step_processor_vertices[move.from_step][move.from_proc].erase(move.node); - set_schedule.step_processor_vertices[move.to_step][move.to_proc].insert(move.node); - work_datastructures.apply_move(move, instance->getComputationalDag().vertex_work_weight(move.node)); - comm_datastructures.update_datastructure_after_move(move, start_step, end_step); - if constexpr (use_memory_constraint) { - memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step); - } - } - } - - void update_violations(VertexType node, thread_data_t &thread_data) { - thread_data.new_violations.clear(); - thread_data.resolved_violations.clear(); - - const unsigned node_step = vector_schedule.assignedSuperstep(node); - const unsigned node_proc = vector_schedule.assignedProcessor(node); - - for (const auto &edge : out_edges(node, instance->getComputationalDag())) { - const auto &child = target(edge, instance->getComputationalDag()); - - if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) { - if ((node_step > vector_schedule.assignedSuperstep(child)) - || (node_step == vector_schedule.assignedSuperstep(child) - && node_proc != vector_schedule.assignedProcessor(child))) { - thread_data.current_violations.insert(edge); - thread_data.new_violations[child] = edge; - } - } else { - if ((node_step < vector_schedule.assignedSuperstep(child)) - || (node_step == vector_schedule.assignedSuperstep(child) - && node_proc == vector_schedule.assignedProcessor(child))) { - thread_data.current_violations.erase(edge); - thread_data.resolved_violations.insert(edge); - } - } - } - - for (const auto &edge : in_edges(node, instance->getComputationalDag())) { - const auto &parent = source(edge, instance->getComputationalDag()); - - if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) { - if ((node_step < vector_schedule.assignedSuperstep(parent)) - || (node_step == vector_schedule.assignedSuperstep(parent) - && node_proc != vector_schedule.assignedProcessor(parent))) { - thread_data.current_violations.insert(edge); - thread_data.new_violations[parent] = edge; - } - } else { - if ((node_step > vector_schedule.assignedSuperstep(parent)) - || (node_step == vector_schedule.assignedSuperstep(parent) - && node_proc == vector_schedule.assignedProcessor(parent))) { - thread_data.current_violations.erase(edge); - thread_data.resolved_violations.insert(edge); - } - } - } - -#ifdef KL_DEBUG - - if (thread_data.new_violations.size() > 0) { - std::cout << "New violations: " << std::endl; - for (const auto &edge : thread_data.new_violations) { - std::cout << "Edge: " << source(edge.second, instance->getComputationalDag()) << " -> " - << target(edge.second, instance->getComputationalDag()) << std::endl; - } - } - - if (thread_data.resolved_violations.size() > 0) { - std::cout << "Resolved violations: " << std::endl; - for (const auto &edge : thread_data.resolved_violations) { - std::cout << "Edge: " << source(edge, instance->getComputationalDag()) << " -> " - << target(edge, instance->getComputationalDag()) << std::endl; - } - } - -#endif - - if (thread_data.current_violations.size() > 0) { - thread_data.feasible = false; - } else { - thread_data.feasible = true; - } - } -}; - -template -void kl_active_schedule::clear() { - work_datastructures.clear(); - vector_schedule.clear(); - set_schedule.clear(); - if constexpr (use_memory_constraint) { - memory_constraint.clear(); - } -} - -template -void kl_active_schedule::compute_violations(thread_data_t &thread_data) { - thread_data.current_violations.clear(); - thread_data.feasible = true; - - for (const auto &edge : edges(instance->getComputationalDag())) { - const auto &source_v = source(edge, instance->getComputationalDag()); - const auto &target_v = target(edge, instance->getComputationalDag()); - - const unsigned source_proc = assigned_processor(source_v); - const unsigned target_proc = assigned_processor(target_v); - const unsigned source_step = assigned_superstep(source_v); - const unsigned target_step = assigned_superstep(target_v); - - if (source_step > target_step || (source_step == target_step && source_proc != target_proc)) { - thread_data.current_violations.insert(edge); - thread_data.feasible = false; - } - } -} - -template -void kl_active_schedule::initialize(const IBspSchedule &schedule) { - instance = &schedule.getInstance(); - vector_schedule = VectorSchedule(schedule); - set_schedule = SetSchedule(schedule); - work_datastructures.initialize(set_schedule, *instance, num_steps()); - - cost = 0; - feasible = true; - - if constexpr (use_memory_constraint) { - memory_constraint.initialize(set_schedule, vector_schedule); - } - - compute_work_memory_datastructures(0, num_steps() - 1); -} - -template -void kl_active_schedule::compute_work_memory_datastructures(unsigned start_step, - unsigned end_step) { - if constexpr (use_memory_constraint) { - memory_constraint.compute_memory_datastructure(start_step, end_step); - } - work_datastructures.compute_work_datastructures(start_step, end_step); -} - -template -void kl_active_schedule::write_schedule(BspSchedule &schedule) { - for (const auto v : instance->vertices()) { - schedule.setAssignedProcessor(v, vector_schedule.assignedProcessor(v)); - schedule.setAssignedSuperstep(v, vector_schedule.assignedSuperstep(v)); - } - schedule.updateNumberOfSupersteps(); -} - -template -void kl_active_schedule::remove_empty_step(unsigned step) { - for (unsigned i = step; i < num_steps() - 1; i++) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]) { - vector_schedule.setAssignedSuperstep(node, i); - } - } - std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i + 1]); - work_datastructures.swap_steps(i, i + 1); - if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(i, i + 1); - } - } - vector_schedule.number_of_supersteps--; -} - -template -void kl_active_schedule::swap_empty_step_fwd(const unsigned step, const unsigned to_step) { - for (unsigned i = step; i < to_step; i++) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[i + 1][proc]) { - vector_schedule.setAssignedSuperstep(node, i); - } - } - std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i + 1]); - work_datastructures.swap_steps(i, i + 1); - if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(i, i + 1); - } - } -} - -template -void kl_active_schedule::insert_empty_step(unsigned step) { - unsigned i = vector_schedule.number_of_supersteps++; - - for (; i > step; i--) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[i - 1][proc]) { - vector_schedule.setAssignedSuperstep(node, i); - } - } - std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i - 1]); - work_datastructures.swap_steps(i - 1, i); - if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(i - 1, i); - } - } -} - -template -void kl_active_schedule::swap_empty_step_bwd(const unsigned to_step, - const unsigned empty_step) { - unsigned i = to_step; - - for (; i > empty_step; i--) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[i - 1][proc]) { - vector_schedule.setAssignedSuperstep(node, i); - } - } - std::swap(set_schedule.step_processor_vertices[i], set_schedule.step_processor_vertices[i - 1]); - work_datastructures.swap_steps(i - 1, i); - if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(i - 1, i); - } - } -} - -template -void kl_active_schedule::swap_steps(const unsigned step1, const unsigned step2) { - if (step1 == step2) { - return; - } - - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (const auto node : set_schedule.step_processor_vertices[step1][proc]) { - vector_schedule.setAssignedSuperstep(node, step2); - } - for (const auto node : set_schedule.step_processor_vertices[step2][proc]) { - vector_schedule.setAssignedSuperstep(node, step1); - } - } - std::swap(set_schedule.step_processor_vertices[step1], set_schedule.step_processor_vertices[step2]); - work_datastructures.swap_steps(step1, step2); - if constexpr (use_memory_constraint) { - memory_constraint.swap_steps(step1, step2); - } -} - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp deleted file mode 100644 index 5003d796..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp +++ /dev/null @@ -1,2003 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "kl_active_schedule.hpp" -#include "kl_util.hpp" -#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp" -#include "osp/auxiliary/misc.hpp" -#include "osp/bsp/model/util/CompatibleProcessorRange.hpp" -#include "osp/bsp/scheduler/ImprovementScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" -#include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" -#include "osp/graph_algorithms/directed_graph_util.hpp" - -namespace osp { - -struct kl_parameter { - double time_quality = 0.8; - double superstep_remove_strength = 0.5; - unsigned num_parallel_loops = 4; - - unsigned max_inner_iterations_reset = 500; - unsigned max_no_improvement_iterations = 50; - - constexpr static unsigned abort_scatter_nodes_violation_threshold = 500; - constexpr static unsigned initial_violation_threshold = 250; - - unsigned max_no_vioaltions_removed_backtrack_reset; - unsigned remove_step_epocs; - unsigned node_max_step_selection_epochs; - unsigned max_no_vioaltions_removed_backtrack_for_remove_step_reset; - unsigned max_outer_iterations; - unsigned try_remove_step_after_num_outer_iterations; - unsigned min_inner_iter_reset; - - unsigned thread_min_range = 8; - unsigned thread_range_gap = 0; -}; - -template -struct kl_update_info { - VertexType node = 0; - - bool full_update = false; - bool update_from_step = false; - bool update_to_step = false; - bool update_entire_to_step = false; - bool update_entire_from_step = false; - - kl_update_info() = default; - - kl_update_info(VertexType n) : node(n), full_update(false), update_entire_to_step(false), update_entire_from_step(false) {} - - kl_update_info(VertexType n, bool full) - : node(n), full_update(full), update_entire_to_step(false), update_entire_from_step(false) {} -}; - -template -class kl_improver : public ImprovementScheduler { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(has_hashable_edge_desc_v, "Graph_t must satisfy the has_hashable_edge_desc concept"); - static_assert(is_computational_dag_v, "Graph_t must satisfy the computational_dag concept"); - - protected: - constexpr static unsigned window_range = 2 * window_size + 1; - constexpr static bool enable_quick_moves = true; - constexpr static bool enable_preresolving_violations = true; - constexpr static double EPSILON = 1e-9; - - using memw_t = v_memw_t; - using commw_t = v_commw_t; - using work_weight_t = v_workw_t; - using VertexType = vertex_idx_t; - using EdgeType = edge_desc_t; - - using kl_move = kl_move_struct; - using heap_datastructure = MaxPairingHeap; - using active_schedule_t = kl_active_schedule; - using node_selection_container_t = adaptive_affinity_table; - using kl_gain_update_info = kl_update_info; - - struct ThreadSearchContext { - unsigned thread_id = 0; - unsigned start_step = 0; - unsigned end_step = 0; - unsigned original_end_step = 0; - - vector_vertex_lock_manger lock_manager; - heap_datastructure max_gain_heap; - node_selection_container_t affinity_table; - std::vector> local_affinity_table; - reward_penalty_strategy reward_penalty_strat; - vertex_selection_strategy selection_strategy; - thread_local_active_schedule_data active_schedule_data; - - double average_gain = 0.0; - unsigned max_inner_iterations = 0; - unsigned no_improvement_iterations_reduce_penalty = 0; - unsigned min_inner_iter = 0; - unsigned no_improvement_iterations_increase_inner_iter = 0; - unsigned step_selection_epoch_counter = 0; - unsigned step_selection_counter = 0; - unsigned step_to_remove = 0; - unsigned local_search_start_step = 0; - unsigned unlock_edge_backtrack_counter = 0; - unsigned unlock_edge_backtrack_counter_reset = 0; - unsigned max_no_vioaltions_removed_backtrack = 0; - - inline unsigned num_steps() const { return end_step - start_step + 1; } - - inline unsigned start_idx(const unsigned node_step) const { - return node_step < start_step + window_size ? window_size - (node_step - start_step) : 0; - } - - inline unsigned end_idx(unsigned node_step) const { - return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step); - } - }; - - bool compute_with_time_limit = false; - - BspSchedule *input_schedule; - const Graph_t *graph; - const BspInstance *instance; - - CompatibleProcessorRange proc_range; - - kl_parameter parameters; - std::mt19937 gen; - - active_schedule_t active_schedule; - comm_cost_function_t comm_cost_f; - std::vector thread_data_vec; - std::vector thread_finished_vec; - - inline unsigned rel_step_idx(const unsigned node_step, const unsigned move_step) const { - return (move_step >= node_step) ? ((move_step - node_step) + window_size) : (window_size - (node_step - move_step)); - } - - inline bool is_compatible(VertexType node, unsigned proc) const { - return active_schedule.getInstance().isCompatible(node, proc); - } - - void set_start_step(const unsigned step, ThreadSearchContext &thread_data) { - thread_data.start_step = step; - thread_data.step_to_remove = step; - thread_data.step_selection_counter = step; - - thread_data.average_gain = 0.0; - thread_data.max_inner_iterations = parameters.max_inner_iterations_reset; - thread_data.no_improvement_iterations_reduce_penalty = parameters.max_no_improvement_iterations / 5; - thread_data.min_inner_iter = parameters.min_inner_iter_reset; - thread_data.step_selection_epoch_counter = 0; - thread_data.no_improvement_iterations_increase_inner_iter = 10; - thread_data.unlock_edge_backtrack_counter_reset = 0; - thread_data.unlock_edge_backtrack_counter = thread_data.unlock_edge_backtrack_counter_reset; - thread_data.max_no_vioaltions_removed_backtrack = parameters.max_no_vioaltions_removed_backtrack_reset; - } - - kl_move get_best_move(node_selection_container_t &affinity_table, - vector_vertex_lock_manger &lock_manager, - heap_datastructure &max_gain_heap) { - // To introduce non-determinism and help escape local optima, if there are multiple moves with the same - // top gain, we randomly select one. We check up to `local_max` ties. - const unsigned local_max = 50; - std::vector top_gain_nodes = max_gain_heap.get_top_keys(local_max); - - if (top_gain_nodes.empty()) { - // This case is guarded by the caller, but for safety: - top_gain_nodes.push_back(max_gain_heap.top()); - } - - std::uniform_int_distribution dis(0, top_gain_nodes.size() - 1); - const VertexType node = top_gain_nodes[dis(gen)]; - - kl_move best_move = max_gain_heap.get_value(node); - max_gain_heap.erase(node); - lock_manager.lock(node); - affinity_table.remove(node); - - return best_move; - } - - inline void process_other_steps_best_move(const unsigned idx, - const unsigned node_step, - const VertexType &node, - const cost_t affinity_current_proc_step, - cost_t &max_gain, - unsigned &max_proc, - unsigned &max_step, - const std::vector> &affinity_table_node) const { - for (const unsigned p : proc_range.compatible_processors_vertex(node)) { - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.can_move(node, p, node_step + idx - window_size)) { - continue; - } - } - - const cost_t gain = affinity_current_proc_step - affinity_table_node[p][idx]; - if (gain > max_gain) { - max_gain = gain; - max_proc = p; - max_step = idx; - } - } - } - - template - kl_move compute_best_move(VertexType node, - const std::vector> &affinity_table_node, - ThreadSearchContext &thread_data) { - const unsigned node_step = active_schedule.assigned_superstep(node); - const unsigned node_proc = active_schedule.assigned_processor(node); - - cost_t max_gain = std::numeric_limits::lowest(); - - unsigned max_proc = std::numeric_limits::max(); - unsigned max_step = std::numeric_limits::max(); - - const cost_t affinity_current_proc_step = affinity_table_node[node_proc][window_size]; - - unsigned idx = thread_data.start_idx(node_step); - for (; idx < window_size; idx++) { - process_other_steps_best_move( - idx, node_step, node, affinity_current_proc_step, max_gain, max_proc, max_step, affinity_table_node); - } - - if constexpr (move_to_same_super_step) { - for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - if (proc == node_proc) { - continue; - } - - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.can_move(node, proc, node_step + idx - window_size)) { - continue; - } - } - - const cost_t gain = affinity_current_proc_step - affinity_table_node[proc][window_size]; - if (gain > max_gain) { - max_gain = gain; - max_proc = proc; - max_step = idx; - } - } - } - - idx++; - - const unsigned bound = thread_data.end_idx(node_step); - for (; idx < bound; idx++) { - process_other_steps_best_move( - idx, node_step, node, affinity_current_proc_step, max_gain, max_proc, max_step, affinity_table_node); - } - - return kl_move(node, max_gain, node_proc, node_step, max_proc, node_step + max_step - window_size); - } - - kl_gain_update_info update_node_work_affinity_after_move(VertexType node, - kl_move move, - const pre_move_work_data &prev_work_data, - std::vector> &affinity_table_node) { - const unsigned node_step = active_schedule.assigned_superstep(node); - const work_weight_t vertex_weight = graph->vertex_work_weight(node); - - kl_gain_update_info update_info(node); - - if (move.from_step == move.to_step) { - const unsigned lower_bound = move.from_step > window_size ? move.from_step - window_size : 0; - if (lower_bound <= node_step && node_step <= move.from_step + window_size) { - update_info.update_from_step = true; - update_info.update_to_step = true; - - const work_weight_t prev_max_work = prev_work_data.from_step_max_work; - const work_weight_t prev_second_max_work = prev_work_data.from_step_second_max_work; - - if (node_step == move.from_step) { - const unsigned node_proc = active_schedule.assigned_processor(node); - const work_weight_t new_max_weight = active_schedule.get_step_max_work(move.from_step); - const work_weight_t new_second_max_weight = active_schedule.get_step_second_max_work(move.from_step); - const work_weight_t new_step_proc_work = active_schedule.get_step_processor_work(node_step, node_proc); - const work_weight_t prev_step_proc_work - = (node_proc == move.from_proc) ? new_step_proc_work + graph->vertex_work_weight(move.node) - : (node_proc == move.to_proc) ? new_step_proc_work - graph->vertex_work_weight(move.node) - : new_step_proc_work; - const bool prev_is_sole_max_processor = (prev_work_data.from_step_max_work_processor_count == 1) - && (prev_max_work == prev_step_proc_work); - const cost_t prev_node_proc_affinity - = prev_is_sole_max_processor ? std::min(vertex_weight, prev_max_work - prev_second_max_work) : 0.0; - const bool new_is_sole_max_processor = (active_schedule.get_step_max_work_processor_count()[node_step] == 1) - && (new_max_weight == new_step_proc_work); - const cost_t new_node_proc_affinity - = new_is_sole_max_processor ? std::min(vertex_weight, new_max_weight - new_second_max_weight) : 0.0; - - const cost_t diff = new_node_proc_affinity - prev_node_proc_affinity; - if (std::abs(diff) > EPSILON) { - update_info.full_update = true; - affinity_table_node[node_proc][window_size] += diff; // Use the pre-calculated diff - } - - if ((prev_max_work != new_max_weight) || update_info.full_update) { - update_info.update_entire_from_step = true; - - for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - if ((proc == node_proc) || (proc == move.from_proc) || (proc == move.to_proc)) { - continue; - } - - const work_weight_t new_weight - = vertex_weight + active_schedule.get_step_processor_work(node_step, proc); - const cost_t prev_other_affinity - = compute_same_step_affinity(prev_max_work, new_weight, prev_node_proc_affinity); - const cost_t other_affinity - = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); - - affinity_table_node[proc][window_size] += (other_affinity - prev_other_affinity); - } - } - - if (node_proc != move.from_proc && is_compatible(node, move.from_proc)) { - const work_weight_t prev_new_weight = vertex_weight - + active_schedule.get_step_processor_work(node_step, move.from_proc) - + graph->vertex_work_weight(move.node); - const cost_t prev_other_affinity - = compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity); - const work_weight_t new_weight - = vertex_weight + active_schedule.get_step_processor_work(node_step, move.from_proc); - const cost_t other_affinity - = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); - affinity_table_node[move.from_proc][window_size] += (other_affinity - prev_other_affinity); - } - - if (node_proc != move.to_proc && is_compatible(node, move.to_proc)) { - const work_weight_t prev_new_weight = vertex_weight - + active_schedule.get_step_processor_work(node_step, move.to_proc) - - graph->vertex_work_weight(move.node); - const cost_t prev_other_affinity - = compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity); - const work_weight_t new_weight - = vertex_weight + active_schedule.get_step_processor_work(node_step, move.to_proc); - const cost_t other_affinity - = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); - affinity_table_node[move.to_proc][window_size] += (other_affinity - prev_other_affinity); - } - - } else { - const work_weight_t new_max_weight = active_schedule.get_step_max_work(move.from_step); - const unsigned idx = rel_step_idx(node_step, move.from_step); - if (prev_max_work != new_max_weight) { - update_info.update_entire_from_step = true; - // update moving to all procs with special for move.from_proc - for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - const work_weight_t new_weight - = vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc); - if (proc == move.from_proc) { - const work_weight_t prev_new_weight - = vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc) - + graph->vertex_work_weight(move.node); - const cost_t prev_affinity - = prev_max_work < prev_new_weight - ? static_cast(prev_new_weight) - static_cast(prev_max_work) - : 0.0; - const cost_t new_affinity = new_max_weight < new_weight ? static_cast(new_weight) - - static_cast(new_max_weight) - : 0.0; - affinity_table_node[proc][idx] += new_affinity - prev_affinity; - } else if (proc == move.to_proc) { - const work_weight_t prev_new_weight - = vertex_weight + active_schedule.get_step_processor_work(move.to_step, proc) - - graph->vertex_work_weight(move.node); - const cost_t prev_affinity - = prev_max_work < prev_new_weight - ? static_cast(prev_new_weight) - static_cast(prev_max_work) - : 0.0; - const cost_t new_affinity = new_max_weight < new_weight ? static_cast(new_weight) - - static_cast(new_max_weight) - : 0.0; - affinity_table_node[proc][idx] += new_affinity - prev_affinity; - } else { - const cost_t prev_affinity = prev_max_work < new_weight ? static_cast(new_weight) - - static_cast(prev_max_work) - : 0.0; - const cost_t new_affinity = new_max_weight < new_weight ? static_cast(new_weight) - - static_cast(new_max_weight) - : 0.0; - affinity_table_node[proc][idx] += new_affinity - prev_affinity; - } - } - } else { - // update only move.from_proc and move.to_proc - if (is_compatible(node, move.from_proc)) { - const work_weight_t from_new_weight - = vertex_weight + active_schedule.get_step_processor_work(move.from_step, move.from_proc); - const work_weight_t from_prev_new_weight = from_new_weight + graph->vertex_work_weight(move.node); - const cost_t from_prev_affinity - = prev_max_work < from_prev_new_weight - ? static_cast(from_prev_new_weight) - static_cast(prev_max_work) - : 0.0; - - const cost_t from_new_affinity - = new_max_weight < from_new_weight - ? static_cast(from_new_weight) - static_cast(new_max_weight) - : 0.0; - affinity_table_node[move.from_proc][idx] += from_new_affinity - from_prev_affinity; - } - - if (is_compatible(node, move.to_proc)) { - const work_weight_t to_new_weight - = vertex_weight + active_schedule.get_step_processor_work(move.to_step, move.to_proc); - const work_weight_t to_prev_new_weight = to_new_weight - graph->vertex_work_weight(move.node); - const cost_t to_prev_affinity - = prev_max_work < to_prev_new_weight - ? static_cast(to_prev_new_weight) - static_cast(prev_max_work) - : 0.0; - - const cost_t to_new_affinity - = new_max_weight < to_new_weight - ? static_cast(to_new_weight) - static_cast(new_max_weight) - : 0.0; - affinity_table_node[move.to_proc][idx] += to_new_affinity - to_prev_affinity; - } - } - } - } - - } else { - const unsigned node_proc = active_schedule.assigned_processor(node); - process_work_update_step(node, - node_step, - node_proc, - vertex_weight, - move.from_step, - move.from_proc, - graph->vertex_work_weight(move.node), - prev_work_data.from_step_max_work, - prev_work_data.from_step_second_max_work, - prev_work_data.from_step_max_work_processor_count, - update_info.update_from_step, - update_info.update_entire_from_step, - update_info.full_update, - affinity_table_node); - process_work_update_step(node, - node_step, - node_proc, - vertex_weight, - move.to_step, - move.to_proc, - -graph->vertex_work_weight(move.node), - prev_work_data.to_step_max_work, - prev_work_data.to_step_second_max_work, - prev_work_data.to_step_max_work_processor_count, - update_info.update_to_step, - update_info.update_entire_to_step, - update_info.full_update, - affinity_table_node); - } - - return update_info; - } - - void process_work_update_step(VertexType node, - unsigned node_step, - unsigned node_proc, - work_weight_t vertex_weight, - unsigned move_step, - unsigned move_proc, - work_weight_t move_correction_node_weight, - const work_weight_t prev_move_step_max_work, - const work_weight_t prev_move_step_second_max_work, - unsigned prev_move_step_max_work_processor_count, - bool &update_step, - bool &update_entire_step, - bool &full_update, - std::vector> &affinity_table_node); - void update_node_work_affinity(node_selection_container_t &nodes, - kl_move move, - const pre_move_work_data &prev_work_data, - std::map &recompute_max_gain); - void update_best_move(VertexType node, - unsigned step, - unsigned proc, - node_selection_container_t &affinity_table, - ThreadSearchContext &thread_data); - void update_best_move(VertexType node, - unsigned step, - node_selection_container_t &affinity_table, - ThreadSearchContext &thread_data); - void update_max_gain(kl_move move, - std::map &recompute_max_gain, - ThreadSearchContext &thread_data); - void compute_work_affinity(VertexType node, - std::vector> &affinity_table_node, - ThreadSearchContext &thread_data); - - inline void recompute_node_max_gain(VertexType node, - node_selection_container_t &affinity_table, - ThreadSearchContext &thread_data) { - const auto best_move = compute_best_move(node, affinity_table[node], thread_data); - thread_data.max_gain_heap.update(node, best_move); - } - - inline cost_t compute_same_step_affinity(const work_weight_t &max_work_for_step, - const work_weight_t &new_weight, - const cost_t &node_proc_affinity) { - const cost_t max_work_after_removal = static_cast(max_work_for_step) - node_proc_affinity; - if (new_weight > max_work_after_removal) { - return new_weight - max_work_after_removal; - } - return 0.0; - } - - inline cost_t apply_move(kl_move move, ThreadSearchContext &thread_data) { - active_schedule.apply_move(move, thread_data.active_schedule_data); - comm_cost_f.update_datastructure_after_move(move, thread_data.start_step, thread_data.end_step); - cost_t change_in_cost = -move.gain; - change_in_cost += static_cast(thread_data.active_schedule_data.resolved_violations.size()) - * thread_data.reward_penalty_strat.reward; - change_in_cost -= static_cast(thread_data.active_schedule_data.new_violations.size()) - * thread_data.reward_penalty_strat.penalty; - -#ifdef KL_DEBUG - std::cout << "penalty: " << thread_data.reward_penalty_strat.penalty - << " num violations: " << thread_data.active_schedule_data.current_violations.size() - << " num new violations: " << thread_data.active_schedule_data.new_violations.size() - << ", num resolved violations: " << thread_data.active_schedule_data.resolved_violations.size() - << ", reward: " << thread_data.reward_penalty_strat.reward << std::endl; - std::cout << "apply move, previous cost: " << thread_data.active_schedule_data.cost - << ", new cost: " << thread_data.active_schedule_data.cost + change_in_cost << ", " - << (thread_data.active_schedule_data.feasible ? "feasible," : "infeasible,") << std::endl; -#endif - - thread_data.active_schedule_data.update_cost(change_in_cost); - - return change_in_cost; - } - - void run_quick_moves(unsigned &inner_iter, - ThreadSearchContext &thread_data, - const cost_t change_in_cost, - const VertexType best_move_node) { -#ifdef KL_DEBUG - std::cout << "Starting quick moves sequence." << std::endl; -#endif - inner_iter++; - - const size_t num_applied_moves = thread_data.active_schedule_data.applied_moves.size() - 1; - const cost_t saved_cost = thread_data.active_schedule_data.cost - change_in_cost; - - std::unordered_set local_lock; - local_lock.insert(best_move_node); - std::vector quick_moves_stack; - quick_moves_stack.reserve(10 + thread_data.active_schedule_data.new_violations.size() * 2); - - for (const auto &key_value_pair : thread_data.active_schedule_data.new_violations) { - const auto &key = key_value_pair.first; - quick_moves_stack.push_back(key); - } - - while (quick_moves_stack.size() > 0) { - auto next_node_to_move = quick_moves_stack.back(); - quick_moves_stack.pop_back(); - - thread_data.reward_penalty_strat.init_reward_penalty( - static_cast(thread_data.active_schedule_data.current_violations.size()) + 1.0); - compute_node_affinities(next_node_to_move, thread_data.local_affinity_table, thread_data); - kl_move best_quick_move = compute_best_move(next_node_to_move, thread_data.local_affinity_table, thread_data); - - local_lock.insert(next_node_to_move); - if (best_quick_move.gain <= std::numeric_limits::lowest()) { - continue; - } - -#ifdef KL_DEBUG - std::cout << " >>> move node " << best_quick_move.node << " with gain " << best_quick_move.gain - << ", from proc|step: " << best_quick_move.from_proc << "|" << best_quick_move.from_step - << " to: " << best_quick_move.to_proc << "|" << best_quick_move.to_step << std::endl; -#endif - - apply_move(best_quick_move, thread_data); - inner_iter++; - - if (thread_data.active_schedule_data.new_violations.size() > 0) { - bool abort = false; - - for (const auto &key_value_pair : thread_data.active_schedule_data.new_violations) { - const auto &key = key_value_pair.first; - if (local_lock.find(key) != local_lock.end()) { - abort = true; - break; - } - quick_moves_stack.push_back(key); - } - - if (abort) { - break; - } - - } else if (thread_data.active_schedule_data.feasible) { - break; - } - } - - if (!thread_data.active_schedule_data.feasible) { - active_schedule.revert_schedule_to_bound(num_applied_moves, - saved_cost, - true, - comm_cost_f, - thread_data.active_schedule_data, - thread_data.start_step, - thread_data.end_step); -#ifdef KL_DEBUG - std::cout << "Ending quick moves sequence with infeasible solution." << std::endl; -#endif - } -#ifdef KL_DEBUG - else { - std::cout << "Ending quick moves sequence with feasible solution." << std::endl; - } -#endif - - thread_data.affinity_table.trim(); - thread_data.max_gain_heap.clear(); - thread_data.reward_penalty_strat.init_reward_penalty(1.0); - insert_gain_heap(thread_data); // Re-initialize the heap with the current state - } - - void resolve_violations(ThreadSearchContext &thread_data) { - auto ¤t_violations = thread_data.active_schedule_data.current_violations; - unsigned num_violations = static_cast(current_violations.size()); - if (num_violations > 0) { -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", Starting preresolving violations with " << num_violations - << " initial violations" << std::endl; -#endif - thread_data.reward_penalty_strat.init_reward_penalty(static_cast(num_violations) + 1.0); - std::unordered_set local_lock; - unsigned num_iter = 0; - const unsigned min_iter = num_violations / 4; - while (not current_violations.empty()) { - std::uniform_int_distribution dis(0, current_violations.size() - 1); - auto it = current_violations.begin(); - std::advance(it, dis(gen)); - const auto &next_edge = *it; - const VertexType source_v = source(next_edge, *graph); - const VertexType target_v = target(next_edge, *graph); - const bool source_locked = local_lock.find(source_v) != local_lock.end(); - const bool target_locked = local_lock.find(target_v) != local_lock.end(); - - if (source_locked && target_locked) { -#ifdef KL_DEBUG_1 - std::cout << "source, target locked" << std::endl; -#endif - break; - } - - kl_move best_move; - if (source_locked || target_locked) { - const VertexType node = source_locked ? target_v : source_v; - compute_node_affinities(node, thread_data.local_affinity_table, thread_data); - best_move = compute_best_move(node, thread_data.local_affinity_table, thread_data); - } else { - compute_node_affinities(source_v, thread_data.local_affinity_table, thread_data); - kl_move best_source_v_move = compute_best_move(source_v, thread_data.local_affinity_table, thread_data); - compute_node_affinities(target_v, thread_data.local_affinity_table, thread_data); - kl_move best_target_v_move = compute_best_move(target_v, thread_data.local_affinity_table, thread_data); - best_move = best_target_v_move.gain > best_source_v_move.gain ? std::move(best_target_v_move) - : std::move(best_source_v_move); - } - - local_lock.insert(best_move.node); - if (best_move.gain <= std::numeric_limits::lowest()) { - continue; - } - - apply_move(best_move, thread_data); - thread_data.affinity_table.insert(best_move.node); -#ifdef KL_DEBUG_1 - std::cout << "move node " << best_move.node << " with gain " << best_move.gain - << ", from proc|step: " << best_move.from_proc << "|" << best_move.from_step - << " to: " << best_move.to_proc << "|" << best_move.to_step << std::endl; -#endif - const unsigned new_num_violations = static_cast(current_violations.size()); - if (new_num_violations == 0) { - break; - } - - if (thread_data.active_schedule_data.new_violations.size() > 0) { - for (const auto &vertex_edge_pair : thread_data.active_schedule_data.new_violations) { - const auto &vertex = vertex_edge_pair.first; - thread_data.affinity_table.insert(vertex); - } - } - - const double gain = static_cast(num_violations) - static_cast(new_num_violations); - num_violations = new_num_violations; - update_avg_gain(gain, num_iter++, thread_data.average_gain); -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", preresolving violations with " << num_violations - << " violations, " << num_iter << " #iterations, " << thread_data.average_gain << " average gain" - << std::endl; -#endif - if (num_iter > min_iter && thread_data.average_gain < 0.0) { - break; - } - } - thread_data.average_gain = 0.0; - } - } - - void run_local_search(ThreadSearchContext &thread_data) { -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id - << ", start local search, initial schedule cost: " << thread_data.active_schedule_data.cost << " with " - << thread_data.num_steps() << " supersteps." << std::endl; -#endif - std::vector new_nodes; - std::vector unlock_nodes; - std::map recompute_max_gain; - - const auto start_time = std::chrono::high_resolution_clock::now(); - - unsigned no_improvement_iter_counter = 0; - unsigned outer_iter = 0; - - for (; outer_iter < parameters.max_outer_iterations; outer_iter++) { - cost_t initial_inner_iter_cost = thread_data.active_schedule_data.cost; - - reset_inner_search_structures(thread_data); - select_active_nodes(thread_data); - thread_data.reward_penalty_strat.init_reward_penalty( - static_cast(thread_data.active_schedule_data.current_violations.size()) + 1.0); - insert_gain_heap(thread_data); - - unsigned inner_iter = 0; - unsigned violation_removed_count = 0; - unsigned reset_counter = 0; - bool iter_inital_feasible = thread_data.active_schedule_data.feasible; - -#ifdef KL_DEBUG - std::cout << "------ start inner loop ------" << std::endl; - std::cout << "initial node selection: {"; - for (size_t i = 0; i < thread_data.affinity_table.size(); ++i) { - std::cout << thread_data.affinity_table.get_selected_nodes()[i] << ", "; - } - std::cout << "}" << std::endl; -#endif -#ifdef KL_DEBUG_1 - if (not iter_inital_feasible) { - std::cout << "initial solution not feasible, num violations: " - << thread_data.active_schedule_data.current_violations.size() - << ". Penalty: " << thread_data.reward_penalty_strat.penalty - << ", reward: " << thread_data.reward_penalty_strat.reward << std::endl; - } -#endif -#ifdef KL_DEBUG_COST_CHECK - active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { - std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() - << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; - } - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { - std::cout << "memory constraint not satisfied" << std::endl; - } - } -#endif - - while (inner_iter < thread_data.max_inner_iterations && thread_data.max_gain_heap.size() > 0) { - kl_move best_move - = get_best_move(thread_data.affinity_table, - thread_data.lock_manager, - thread_data.max_gain_heap); // locks best_move.node and removes it from node_selection - if (best_move.gain <= std::numeric_limits::lowest()) { - break; - } - update_avg_gain(best_move.gain, inner_iter, thread_data.average_gain); -#ifdef KL_DEBUG - std::cout << " >>> move node " << best_move.node << " with gain " << best_move.gain - << ", from proc|step: " << best_move.from_proc << "|" << best_move.from_step - << " to: " << best_move.to_proc << "|" << best_move.to_step << ",avg gain: " << thread_data.average_gain - << std::endl; -#endif - if (inner_iter > thread_data.min_inner_iter && thread_data.average_gain < 0.0) { -#ifdef KL_DEBUG - std::cout << "Negative average gain: " << thread_data.average_gain << ", end local search" << std::endl; -#endif - break; - } - -#ifdef KL_DEBUG - if (not active_schedule.getInstance().isCompatible(best_move.node, best_move.to_proc)) { - std::cout << "move to incompatibe node" << std::endl; - } -#endif - - const auto prev_work_data = active_schedule.get_pre_move_work_data(best_move); - const typename comm_cost_function_t::pre_move_comm_data_t prev_comm_data - = comm_cost_f.get_pre_move_comm_data(best_move); - const cost_t change_in_cost = apply_move(best_move, thread_data); -#ifdef KL_DEBUG_COST_CHECK - active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { - std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() - << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; - } - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { - std::cout << "memory constraint not satisfied" << std::endl; - } - } -#endif - if constexpr (enable_quick_moves) { - if (iter_inital_feasible && thread_data.active_schedule_data.new_violations.size() > 0) { - run_quick_moves(inner_iter, thread_data, change_in_cost, best_move.node); -#ifdef KL_DEBUG_COST_CHECK - active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { - std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() - << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" - << std::endl; - } - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { - std::cout << "memory constraint not satisfied" << std::endl; - } - } -#endif - continue; - } - } - - if (thread_data.active_schedule_data.current_violations.size() > 0) { - if (thread_data.active_schedule_data.resolved_violations.size() > 0) { - violation_removed_count = 0; - } else { - violation_removed_count++; - - if (violation_removed_count > 3) { - if (reset_counter < thread_data.max_no_vioaltions_removed_backtrack - && ((not iter_inital_feasible) - || (thread_data.active_schedule_data.cost < thread_data.active_schedule_data.best_cost))) { - thread_data.affinity_table.reset_node_selection(); - thread_data.max_gain_heap.clear(); - thread_data.lock_manager.clear(); - thread_data.selection_strategy.select_nodes_violations( - thread_data.affinity_table, - thread_data.active_schedule_data.current_violations, - thread_data.start_step, - thread_data.end_step); -#ifdef KL_DEBUG - std::cout << "Infeasible, and no violations resolved for 5 iterations, reset node selection" - << std::endl; -#endif - thread_data.reward_penalty_strat.init_reward_penalty( - static_cast(thread_data.active_schedule_data.current_violations.size())); - insert_gain_heap(thread_data); - - reset_counter++; - inner_iter++; - continue; - } else { -#ifdef KL_DEBUG - std::cout << "Infeasible, and no violations resolved for 5 iterations, end local search" - << std::endl; -#endif - break; - } - } - } - } - - if (is_local_search_blocked(thread_data)) { - if (not blocked_edge_strategy(best_move.node, unlock_nodes, thread_data)) { - break; - } - } - - thread_data.affinity_table.trim(); - update_affinities(best_move, thread_data, recompute_max_gain, new_nodes, prev_work_data, prev_comm_data); - - for (const auto v : unlock_nodes) { - thread_data.lock_manager.unlock(v); - } - new_nodes.insert(new_nodes.end(), unlock_nodes.begin(), unlock_nodes.end()); - unlock_nodes.clear(); - -#ifdef KL_DEBUG - std::cout << "recmopute max gain: {"; - for (const auto map_pair : recompute_max_gain) { - const auto &key = map_pair.first; - std::cout << key << ", "; - } - std::cout << "}" << std::endl; - std::cout << "new nodes: {"; - for (const auto v : new_nodes) { - std::cout << v << ", "; - } - std::cout << "}" << std::endl; -#endif -#ifdef KL_DEBUG_COST_CHECK - active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { - std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() - << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; - } - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { - std::cout << "memory constraint not satisfied" << std::endl; - } - } -#endif - update_max_gain(best_move, recompute_max_gain, thread_data); - insert_new_nodes_gain_heap(new_nodes, thread_data.affinity_table, thread_data); - - recompute_max_gain.clear(); - new_nodes.clear(); - - inner_iter++; - } - -#ifdef KL_DEBUG - std::cout << "--- end inner loop after " << inner_iter - << " inner iterations, gain heap size: " << thread_data.max_gain_heap.size() << ", outer iteraion " - << outer_iter << "/" << parameters.max_outer_iterations - << ", current cost: " << thread_data.active_schedule_data.cost << ", " - << (thread_data.active_schedule_data.feasible ? "feasible" : "infeasible") << std::endl; -#endif -#ifdef KL_DEBUG_1 - const unsigned num_steps_tmp = thread_data.end_step; -#endif - active_schedule.revert_to_best_schedule(thread_data.local_search_start_step, - thread_data.step_to_remove, - comm_cost_f, - thread_data.active_schedule_data, - thread_data.start_step, - thread_data.end_step); -#ifdef KL_DEBUG_1 - if (thread_data.local_search_start_step > 0) { - if (num_steps_tmp == thread_data.end_step) { - std::cout << "thread " << thread_data.thread_id << ", removing step " << thread_data.step_to_remove - << " succeded " << std::endl; - } else { - std::cout << "thread " << thread_data.thread_id << ", removing step " << thread_data.step_to_remove - << " failed " << std::endl; - } - } -#endif - -#ifdef KL_DEBUG_COST_CHECK - active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { - std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() - << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; - } - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { - std::cout << "memory constraint not satisfied" << std::endl; - } - } -#endif - - if (compute_with_time_limit) { - auto finish_time = std::chrono::high_resolution_clock::now(); - auto duration = std::chrono::duration_cast(finish_time - start_time).count(); - if (duration > ImprovementScheduler::timeLimitSeconds) { - break; - } - } - - if (other_threads_finished(thread_data.thread_id)) { -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", other threads finished, end local search" << std::endl; -#endif - break; - } - - if (initial_inner_iter_cost <= thread_data.active_schedule_data.cost) { - no_improvement_iter_counter++; - - if (no_improvement_iter_counter >= parameters.max_no_improvement_iterations) { -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", no improvement for " - << parameters.max_no_improvement_iterations << " iterations, end local search" << std::endl; -#endif - break; - } - } else { - no_improvement_iter_counter = 0; - } - - adjust_local_search_parameters(outer_iter, no_improvement_iter_counter, thread_data); - } - -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", local search end after " << outer_iter - << " outer iterations, current cost: " << thread_data.active_schedule_data.cost << " with " - << thread_data.num_steps() << " supersteps, vs serial cost " << active_schedule.get_total_work_weight() << "." - << std::endl; -#endif - thread_finished_vec[thread_data.thread_id] = true; - } - - bool other_threads_finished(const unsigned thread_id) { - const size_t num_threads = thread_finished_vec.size(); - if (num_threads == 1) { - return false; - } - - for (size_t i = 0; i < num_threads; i++) { - if (i != thread_id && !thread_finished_vec[i]) { - return false; - } - } - return true; - } - - inline void update_affinities(const kl_move &best_move, - ThreadSearchContext &thread_data, - std::map &recompute_max_gain, - std::vector &new_nodes, - const pre_move_work_data> &prev_work_data, - const typename comm_cost_function_t::pre_move_comm_data_t &prev_comm_data) { - if constexpr (comm_cost_function_t::is_max_comm_cost_function) { - comm_cost_f.update_node_comm_affinity( - best_move, - thread_data, - thread_data.reward_penalty_strat.penalty, - thread_data.reward_penalty_strat.reward, - recompute_max_gain, - new_nodes); // this only updated reward/penalty, collects new_nodes, and fills recompute_max_gain - - // Add nodes from affected steps to new_nodes - // { - // std::unordered_set steps_to_check; - // const unsigned num_steps = active_schedule.num_steps(); - - // auto add_steps_range = [&](unsigned center_step) { - // unsigned start = (center_step > window_size) ? center_step - window_size : 0; - // unsigned end = std::min(center_step + window_size, num_steps - 1); - - // // Constrain to thread range - // if (start < thread_data.start_step) - // start = thread_data.start_step; - // if (end > thread_data.end_step) - // end = thread_data.end_step; - - // for (unsigned s = start; s <= end; ++s) { - // steps_to_check.insert(s); - // } - // }; - - // add_steps_range(best_move.from_step); - // add_steps_range(best_move.to_step); - - // for (unsigned step : steps_to_check) { - // for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - // const auto &nodes_in_step = active_schedule.getSetSchedule().step_processor_vertices[step][proc]; - // for (const auto &node : nodes_in_step) { - // if (!thread_data.affinity_table.is_selected(node) && !thread_data.lock_manager.is_locked(node)) { - // new_nodes.push_back(node); - // } - // } - // } - // } - - // // Deduplicate new_nodes - // std::sort(new_nodes.begin(), new_nodes.end()); - // new_nodes.erase(std::unique(new_nodes.begin(), new_nodes.end()), new_nodes.end()); - // } - - // Determine the steps where max/second_max/max_count for work/comm changed - std::unordered_set changed_steps; - - // Check work changes for from_step - if (best_move.from_step == best_move.to_step) { - // Same step - check if max/second_max changed - const auto current_max = active_schedule.get_step_max_work(best_move.from_step); - const auto current_second_max = active_schedule.get_step_second_max_work(best_move.from_step); - const auto current_count = active_schedule.get_step_max_work_processor_count()[best_move.from_step]; - if (current_max != prev_work_data.from_step_max_work - || current_second_max != prev_work_data.from_step_second_max_work - || current_count != prev_work_data.from_step_max_work_processor_count) { - changed_steps.insert(best_move.from_step); - } - } else { - // Different steps - check both - const auto current_from_max = active_schedule.get_step_max_work(best_move.from_step); - const auto current_from_second_max = active_schedule.get_step_second_max_work(best_move.from_step); - const auto current_from_count = active_schedule.get_step_max_work_processor_count()[best_move.from_step]; - if (current_from_max != prev_work_data.from_step_max_work - || current_from_second_max != prev_work_data.from_step_second_max_work - || current_from_count != prev_work_data.from_step_max_work_processor_count) { - changed_steps.insert(best_move.from_step); - } - - const auto current_to_max = active_schedule.get_step_max_work(best_move.to_step); - const auto current_to_second_max = active_schedule.get_step_second_max_work(best_move.to_step); - const auto current_to_count = active_schedule.get_step_max_work_processor_count()[best_move.to_step]; - if (current_to_max != prev_work_data.to_step_max_work - || current_to_second_max != prev_work_data.to_step_second_max_work - || current_to_count != prev_work_data.to_step_max_work_processor_count) { - changed_steps.insert(best_move.to_step); - } - } - - for (const auto &[step, step_info] : prev_comm_data.step_data) { - typename comm_cost_function_t::pre_move_comm_data_t::step_info current_info; - // Query current values - const auto current_max = comm_cost_f.comm_ds.step_max_comm(step); - const auto current_second_max = comm_cost_f.comm_ds.step_second_max_comm(step); - const auto current_count = comm_cost_f.comm_ds.step_max_comm_count(step); - - if (current_max != step_info.max_comm || current_second_max != step_info.second_max_comm - || current_count != step_info.max_comm_count) { - changed_steps.insert(step); - } - } - - // Recompute affinities for all active nodes - const size_t active_count = thread_data.affinity_table.size(); - for (size_t i = 0; i < active_count; ++i) { - const VertexType node = thread_data.affinity_table.get_selected_nodes()[i]; - - // Determine if this node needs affinity recomputation - // A node needs recomputation if it's in or adjacent to changed steps - const unsigned node_step = active_schedule.assigned_superstep(node); - - // Calculate window bounds for this node once - const int node_lower_bound = static_cast(node_step) - static_cast(window_size); - const unsigned node_upper_bound = node_step + window_size; - - bool needs_update = false; - // Check if any changed step falls within the node's window - for (unsigned step : changed_steps) { - if (static_cast(step) >= node_lower_bound && step <= node_upper_bound) { - needs_update = true; - break; - } - } - - if (needs_update) { - auto &affinity_table_node = thread_data.affinity_table.get_affinity_table(node); - - // Reset affinity table entries to zero - const unsigned num_procs = active_schedule.getInstance().numberOfProcessors(); - for (unsigned p = 0; p < num_procs; ++p) { - for (unsigned idx = 0; idx < affinity_table_node[p].size(); ++idx) { - affinity_table_node[p][idx] = 0; - } - } - - compute_node_affinities(node, affinity_table_node, thread_data); - recompute_max_gain[node] = kl_gain_update_info(node, true); - } - } - } else { - update_node_work_affinity(thread_data.affinity_table, best_move, prev_work_data, recompute_max_gain); - comm_cost_f.update_node_comm_affinity(best_move, - thread_data, - thread_data.reward_penalty_strat.penalty, - thread_data.reward_penalty_strat.reward, - recompute_max_gain, - new_nodes); - } - } - - inline bool blocked_edge_strategy(VertexType node, std::vector &unlock_nodes, ThreadSearchContext &thread_data) { - if (thread_data.unlock_edge_backtrack_counter > 1) { - for (const auto vertex_edge_pair : thread_data.active_schedule_data.new_violations) { - const auto &e = vertex_edge_pair.second; - const auto source_v = source(e, *graph); - const auto target_v = target(e, *graph); - - if (node == source_v && thread_data.lock_manager.is_locked(target_v)) { - unlock_nodes.push_back(target_v); - } else if (node == target_v && thread_data.lock_manager.is_locked(source_v)) { - unlock_nodes.push_back(source_v); - } - } -#ifdef KL_DEBUG - std::cout << "Nodes of violated edge locked, backtrack counter: " << thread_data.unlock_edge_backtrack_counter - << std::endl; -#endif - thread_data.unlock_edge_backtrack_counter--; - return true; - } else { -#ifdef KL_DEBUG - std::cout << "Nodes of violated edge locked, end local search" << std::endl; -#endif - return false; // or reset local search and initalize with violating nodes - } - } - - inline void adjust_local_search_parameters(unsigned outer_iter, unsigned no_imp_counter, ThreadSearchContext &thread_data) { - if (no_imp_counter >= thread_data.no_improvement_iterations_reduce_penalty - && thread_data.reward_penalty_strat.initial_penalty > 1.0) { - thread_data.reward_penalty_strat.initial_penalty - = static_cast(std::floor(std::sqrt(thread_data.reward_penalty_strat.initial_penalty))); - thread_data.unlock_edge_backtrack_counter_reset += 1; - thread_data.no_improvement_iterations_reduce_penalty += 15; -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", no improvement for " - << thread_data.no_improvement_iterations_reduce_penalty << " iterations, reducing initial penalty to " - << thread_data.reward_penalty_strat.initial_penalty << std::endl; -#endif - } - - if (parameters.try_remove_step_after_num_outer_iterations > 0 - && ((outer_iter + 1) % parameters.try_remove_step_after_num_outer_iterations) == 0) { - thread_data.step_selection_epoch_counter = 0; - ; -#ifdef KL_DEBUG - std::cout << "reset remove epoc counter after " << outer_iter << " iterations." << std::endl; -#endif - } - - if (no_imp_counter >= thread_data.no_improvement_iterations_increase_inner_iter) { - thread_data.min_inner_iter = static_cast(std::ceil(thread_data.min_inner_iter * 2.2)); - thread_data.no_improvement_iterations_increase_inner_iter += 20; -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", no improvement for " - << thread_data.no_improvement_iterations_increase_inner_iter << " iterations, increasing min inner iter to " - << thread_data.min_inner_iter << std::endl; -#endif - } - } - - bool is_local_search_blocked(ThreadSearchContext &thread_data); - void set_parameters(vertex_idx_t num_nodes); - void reset_inner_search_structures(ThreadSearchContext &thread_data) const; - void initialize_datastructures(BspSchedule &schedule); - void print_heap(heap_datastructure &max_gain_heap) const; - void cleanup_datastructures(); - void update_avg_gain(const cost_t gain, const unsigned num_iter, double &average_gain); - void insert_gain_heap(ThreadSearchContext &thread_data); - void insert_new_nodes_gain_heap(std::vector &new_nodes, - node_selection_container_t &nodes, - ThreadSearchContext &thread_data); - - inline void compute_node_affinities(VertexType node, - std::vector> &affinity_table_node, - ThreadSearchContext &thread_data) { - compute_work_affinity(node, affinity_table_node, thread_data); - comm_cost_f.compute_comm_affinity(node, - affinity_table_node, - thread_data.reward_penalty_strat.penalty, - thread_data.reward_penalty_strat.reward, - thread_data.start_step, - thread_data.end_step); - } - - void select_active_nodes(ThreadSearchContext &thread_data) { - if (select_nodes_check_remove_superstep(thread_data.step_to_remove, thread_data)) { - active_schedule.swap_empty_step_fwd(thread_data.step_to_remove, thread_data.end_step); - thread_data.end_step--; - thread_data.local_search_start_step = static_cast(thread_data.active_schedule_data.applied_moves.size()); - thread_data.active_schedule_data.update_cost(static_cast(-1.0 * instance->synchronisationCosts())); - - if constexpr (enable_preresolving_violations) { - resolve_violations(thread_data); - } - - if (thread_data.active_schedule_data.current_violations.size() > parameters.initial_violation_threshold) { - active_schedule.revert_to_best_schedule(thread_data.local_search_start_step, - thread_data.step_to_remove, - comm_cost_f, - thread_data.active_schedule_data, - thread_data.start_step, - thread_data.end_step); - } else { - thread_data.unlock_edge_backtrack_counter - = static_cast(thread_data.active_schedule_data.current_violations.size()); - thread_data.max_inner_iterations - = std::max(thread_data.unlock_edge_backtrack_counter * 5u, parameters.max_inner_iterations_reset); - thread_data.max_no_vioaltions_removed_backtrack - = parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset; -#ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", Trying to remove step " << thread_data.step_to_remove - << std::endl; -#endif - return; - } - } - // thread_data.step_to_remove = thread_data.start_step; - thread_data.local_search_start_step = 0; - thread_data.selection_strategy.select_active_nodes( - thread_data.affinity_table, thread_data.start_step, thread_data.end_step); - } - - bool check_remove_superstep(unsigned step); - bool select_nodes_check_remove_superstep(unsigned &step, ThreadSearchContext &thread_data); - - bool scatter_nodes_superstep(unsigned step, ThreadSearchContext &thread_data) { - assert(step <= thread_data.end_step && thread_data.start_step <= step); - bool abort = false; - - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - const std::vector step_proc_node_vec( - active_schedule.getSetSchedule().step_processor_vertices[step][proc].begin(), - active_schedule.getSetSchedule().step_processor_vertices[step][proc].end()); - for (const auto &node : step_proc_node_vec) { - thread_data.reward_penalty_strat.init_reward_penalty( - static_cast(thread_data.active_schedule_data.current_violations.size()) + 1.0); - compute_node_affinities(node, thread_data.local_affinity_table, thread_data); - kl_move best_move = compute_best_move(node, thread_data.local_affinity_table, thread_data); - - if (best_move.gain <= std::numeric_limits::lowest()) { - abort = true; - break; - } - - apply_move(best_move, thread_data); - if (thread_data.active_schedule_data.current_violations.size() - > parameters.abort_scatter_nodes_violation_threshold) { - abort = true; - break; - } - - thread_data.affinity_table.insert(node); - // thread_data.selection_strategy.add_neighbours_to_selection(node, thread_data.affinity_table, - // thread_data.start_step, thread_data.end_step); - if (thread_data.active_schedule_data.new_violations.size() > 0) { - for (const auto &vertex_edge_pair : thread_data.active_schedule_data.new_violations) { - const auto &vertex = vertex_edge_pair.first; - thread_data.affinity_table.insert(vertex); - } - } - -#ifdef KL_DEBUG - std::cout << "move node " << best_move.node << " with gain " << best_move.gain - << ", from proc|step: " << best_move.from_proc << "|" << best_move.from_step - << " to: " << best_move.to_proc << "|" << best_move.to_step << std::endl; -#endif - -#ifdef KL_DEBUG_COST_CHECK - active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { - std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() - << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; - } - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { - std::cout << "memory constraint not satisfied" << std::endl; - } - } -#endif - } - - if (abort) { - break; - } - } - - if (abort) { - active_schedule.revert_to_best_schedule( - 0, 0, comm_cost_f, thread_data.active_schedule_data, thread_data.start_step, thread_data.end_step); - thread_data.affinity_table.reset_node_selection(); - return false; - } - return true; - } - - void synchronize_active_schedule(const unsigned num_threads) { - if (num_threads == 1) { // single thread case - active_schedule.set_cost(thread_data_vec[0].active_schedule_data.cost); - active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - return; - } - - unsigned write_cursor = thread_data_vec[0].end_step + 1; - for (unsigned i = 1; i < num_threads; ++i) { - auto &thread = thread_data_vec[i]; - if (thread.start_step <= thread.end_step) { - for (unsigned j = thread.start_step; j <= thread.end_step; ++j) { - if (j != write_cursor) { - active_schedule.swap_steps(j, write_cursor); - } - write_cursor++; - } - } - } - active_schedule.getVectorSchedule().number_of_supersteps = write_cursor; - const cost_t new_cost = comm_cost_f.compute_schedule_cost(); - active_schedule.set_cost(new_cost); - } - - public: - kl_improver() : ImprovementScheduler() { - std::random_device rd; - gen = std::mt19937(rd()); - } - - explicit kl_improver(unsigned seed) : ImprovementScheduler() { gen = std::mt19937(seed); } - - virtual ~kl_improver() = default; - - virtual RETURN_STATUS improveSchedule(BspSchedule &schedule) override { - if (schedule.getInstance().numberOfProcessors() < 2) { - return RETURN_STATUS::BEST_FOUND; - } - - const unsigned num_threads = 1; - - thread_data_vec.resize(num_threads); - thread_finished_vec.assign(num_threads, true); - - set_parameters(schedule.getInstance().numberOfVertices()); - initialize_datastructures(schedule); - const cost_t initial_cost = active_schedule.get_cost(); - const unsigned num_steps = schedule.numberOfSupersteps(); - - set_start_step(0, thread_data_vec[0]); - thread_data_vec[0].end_step = (num_steps > 0) ? num_steps - 1 : 0; - - auto &thread_data = this->thread_data_vec[0]; - thread_data.active_schedule_data.initialize_cost(active_schedule.get_cost()); - thread_data.selection_strategy.setup(thread_data.start_step, thread_data.end_step); - run_local_search(thread_data); - - synchronize_active_schedule(num_threads); - - if (initial_cost > active_schedule.get_cost()) { - active_schedule.write_schedule(schedule); - cleanup_datastructures(); - return RETURN_STATUS::OSP_SUCCESS; - } else { - cleanup_datastructures(); - return RETURN_STATUS::BEST_FOUND; - } - } - - virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule &schedule) override { - compute_with_time_limit = true; - return improveSchedule(schedule); - } - - virtual void setTimeQualityParameter(const double time_quality) { this->parameters.time_quality = time_quality; } - - virtual void setSuperstepRemoveStrengthParameter(const double superstep_remove_strength) { - this->parameters.superstep_remove_strength = superstep_remove_strength; - } - - virtual std::string getScheduleName() const { return "kl_improver_" + comm_cost_f.name(); } -}; - -template -void kl_improver::set_parameters( - vertex_idx_t num_nodes) { - const unsigned log_num_nodes = (num_nodes > 1) ? static_cast(std::log(num_nodes)) : 1; - - // Total number of outer iterations. Proportional to sqrt N. - parameters.max_outer_iterations - = static_cast(std::sqrt(num_nodes) * (parameters.time_quality * 10.0) / parameters.num_parallel_loops); - - // Number of times to reset the search for violations before giving up. - parameters.max_no_vioaltions_removed_backtrack_reset = parameters.time_quality < 0.75 ? 1 - : parameters.time_quality < 1.0 ? 2 - : 3; - - // Parameters for the superstep removal heuristic. - parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset - = 3 + static_cast(parameters.superstep_remove_strength * 7); - parameters.node_max_step_selection_epochs = parameters.superstep_remove_strength < 0.75 ? 1 - : parameters.superstep_remove_strength < 1.0 ? 2 - : 3; - parameters.remove_step_epocs = static_cast(parameters.superstep_remove_strength * 4.0); - - parameters.min_inner_iter_reset = static_cast(log_num_nodes + log_num_nodes * (1.0 + parameters.time_quality)); - - if (parameters.remove_step_epocs > 0) { - parameters.try_remove_step_after_num_outer_iterations = parameters.max_outer_iterations / parameters.remove_step_epocs; - } else { - // Effectively disable superstep removal if remove_step_epocs is 0. - parameters.try_remove_step_after_num_outer_iterations = parameters.max_outer_iterations + 1; - } - - unsigned i = 0; - for (auto &thread : thread_data_vec) { - thread.thread_id = i++; - // The number of nodes to consider in each inner iteration. Proportional to log(N). - thread.selection_strategy.selection_threshold - = static_cast(std::ceil(parameters.time_quality * 10 * log_num_nodes + log_num_nodes)); - } - -#ifdef KL_DEBUG_1 - std::cout << "kl set parameter, number of nodes: " << num_nodes << std::endl; - std::cout << "max outer iterations: " << parameters.max_outer_iterations << std::endl; - std::cout << "max inner iterations: " << parameters.max_inner_iterations_reset << std::endl; - std::cout << "no improvement iterations reduce penalty: " << thread_data_vec[0].no_improvement_iterations_reduce_penalty - << std::endl; - std::cout << "selction threshold: " << thread_data_vec[0].selection_strategy.selection_threshold << std::endl; - std::cout << "remove step epocs: " << parameters.remove_step_epocs << std::endl; - std::cout << "try remove step after num outer iterations: " << parameters.try_remove_step_after_num_outer_iterations - << std::endl; - std::cout << "number of parallel loops: " << parameters.num_parallel_loops << std::endl; -#endif -} - -template -void kl_improver::update_node_work_affinity( - node_selection_container_t &nodes, - kl_move move, - const pre_move_work_data &prev_work_data, - std::map &recompute_max_gain) { - const size_t active_count = nodes.size(); - - for (size_t i = 0; i < active_count; ++i) { - const VertexType node = nodes.get_selected_nodes()[i]; - - kl_gain_update_info update_info = update_node_work_affinity_after_move(node, move, prev_work_data, nodes.at(node)); - if (update_info.update_from_step || update_info.update_to_step) { - recompute_max_gain[node] = update_info; - } - } -} - -template -void kl_improver::update_max_gain( - kl_move move, std::map &recompute_max_gain, ThreadSearchContext &thread_data) { - for (auto &pair : recompute_max_gain) { - if (pair.second.full_update) { - recompute_node_max_gain(pair.first, thread_data.affinity_table, thread_data); - } else { - if (pair.second.update_entire_from_step) { - update_best_move(pair.first, move.from_step, thread_data.affinity_table, thread_data); - } else if (pair.second.update_from_step && is_compatible(pair.first, move.from_proc)) { - update_best_move(pair.first, move.from_step, move.from_proc, thread_data.affinity_table, thread_data); - } - - if (move.from_step != move.to_step || not pair.second.update_entire_from_step) { - if (pair.second.update_entire_to_step) { - update_best_move(pair.first, move.to_step, thread_data.affinity_table, thread_data); - } else if (pair.second.update_to_step && is_compatible(pair.first, move.to_proc)) { - update_best_move(pair.first, move.to_step, move.to_proc, thread_data.affinity_table, thread_data); - } - } - } - } -} - -template -void kl_improver::compute_work_affinity( - VertexType node, std::vector> &affinity_table_node, ThreadSearchContext &thread_data) { - const unsigned node_step = active_schedule.assigned_superstep(node); - const work_weight_t vertex_weight = graph->vertex_work_weight(node); - - unsigned step = (node_step > window_size) ? (node_step - window_size) : 0; - for (unsigned idx = thread_data.start_idx(node_step); idx < thread_data.end_idx(node_step); ++idx, ++step) { - if (idx == window_size) { - continue; - } - - const cost_t max_work_for_step = static_cast(active_schedule.get_step_max_work(step)); - - for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(step, proc); - const cost_t work_diff = static_cast(new_weight) - max_work_for_step; - affinity_table_node[proc][idx] = std::max(0.0, work_diff); - } - } - - const unsigned node_proc = active_schedule.assigned_processor(node); - const work_weight_t max_work_for_step = active_schedule.get_step_max_work(node_step); - const bool is_sole_max_processor = (active_schedule.get_step_max_work_processor_count()[node_step] == 1) - && (max_work_for_step == active_schedule.get_step_processor_work(node_step, node_proc)); - - const cost_t node_proc_affinity - = is_sole_max_processor ? std::min(vertex_weight, max_work_for_step - active_schedule.get_step_second_max_work(node_step)) - : 0.0; - affinity_table_node[node_proc][window_size] = node_proc_affinity; - - for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - if (proc == node_proc) { - continue; - } - - const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(node_step, proc); - affinity_table_node[proc][window_size] = compute_same_step_affinity(max_work_for_step, new_weight, node_proc_affinity); - } -} - -template -void kl_improver::process_work_update_step( - VertexType node, - unsigned node_step, - unsigned node_proc, - work_weight_t vertex_weight, - unsigned move_step, - unsigned move_proc, - work_weight_t move_correction_node_weight, - const work_weight_t prev_move_step_max_work, - const work_weight_t prev_move_step_second_max_work, - unsigned prev_move_step_max_work_processor_count, - bool &update_step, - bool &update_entire_step, - bool &full_update, - std::vector> &affinity_table_node) { - const unsigned lower_bound = move_step > window_size ? move_step - window_size : 0; - if (lower_bound <= node_step && node_step <= move_step + window_size) { - update_step = true; - if (node_step == move_step) { - const work_weight_t new_max_weight = active_schedule.get_step_max_work(move_step); - const work_weight_t new_second_max_weight = active_schedule.get_step_second_max_work(move_step); - const work_weight_t new_step_proc_work = active_schedule.get_step_processor_work(node_step, node_proc); - - const work_weight_t prev_step_proc_work = (node_proc == move_proc) ? new_step_proc_work + move_correction_node_weight - : new_step_proc_work; - const bool prev_is_sole_max_processor = (prev_move_step_max_work_processor_count == 1) - && (prev_move_step_max_work == prev_step_proc_work); - const cost_t prev_node_proc_affinity - = prev_is_sole_max_processor ? std::min(vertex_weight, prev_move_step_max_work - prev_move_step_second_max_work) - : 0.0; - - const bool new_is_sole_max_processor = (active_schedule.get_step_max_work_processor_count()[node_step] == 1) - && (new_max_weight == new_step_proc_work); - const cost_t new_node_proc_affinity - = new_is_sole_max_processor ? std::min(vertex_weight, new_max_weight - new_second_max_weight) : 0.0; - - const cost_t diff = new_node_proc_affinity - prev_node_proc_affinity; - const bool update_node_proc_affinity = std::abs(diff) > EPSILON; - if (update_node_proc_affinity) { - full_update = true; - affinity_table_node[node_proc][window_size] += diff; - } - - if ((prev_move_step_max_work != new_max_weight) || update_node_proc_affinity) { - update_entire_step = true; - - for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - if ((proc == node_proc) || (proc == move_proc)) { - continue; - } - - const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(node_step, proc); - const cost_t prev_other_affinity - = compute_same_step_affinity(prev_move_step_max_work, new_weight, prev_node_proc_affinity); - const cost_t other_affinity = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); - - affinity_table_node[proc][window_size] += (other_affinity - prev_other_affinity); - } - } - - if (node_proc != move_proc && is_compatible(node, move_proc)) { - const work_weight_t prev_new_weight - = vertex_weight + active_schedule.get_step_processor_work(node_step, move_proc) + move_correction_node_weight; - const cost_t prev_other_affinity - = compute_same_step_affinity(prev_move_step_max_work, prev_new_weight, prev_node_proc_affinity); - const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(node_step, move_proc); - const cost_t other_affinity = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); - - affinity_table_node[move_proc][window_size] += (other_affinity - prev_other_affinity); - } - - } else { - const work_weight_t new_max_weight = active_schedule.get_step_max_work(move_step); - const unsigned idx = rel_step_idx(node_step, move_step); - if (prev_move_step_max_work != new_max_weight) { - update_entire_step = true; - - // update moving to all procs with special for move_proc - for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(move_step, proc); - if (proc != move_proc) { - const cost_t prev_affinity - = prev_move_step_max_work < new_weight - ? static_cast(new_weight) - static_cast(prev_move_step_max_work) - : 0.0; - const cost_t new_affinity = new_max_weight < new_weight - ? static_cast(new_weight) - static_cast(new_max_weight) - : 0.0; - affinity_table_node[proc][idx] += new_affinity - prev_affinity; - - } else { - const work_weight_t prev_new_weight = vertex_weight - + active_schedule.get_step_processor_work(move_step, proc) - + move_correction_node_weight; - const cost_t prev_affinity - = prev_move_step_max_work < prev_new_weight - ? static_cast(prev_new_weight) - static_cast(prev_move_step_max_work) - : 0.0; - - const cost_t new_affinity = new_max_weight < new_weight - ? static_cast(new_weight) - static_cast(new_max_weight) - : 0.0; - affinity_table_node[proc][idx] += new_affinity - prev_affinity; - } - } - } else { - // update only move_proc - if (is_compatible(node, move_proc)) { - const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(move_step, move_proc); - const work_weight_t prev_new_weight = new_weight + move_correction_node_weight; - const cost_t prev_affinity - = prev_move_step_max_work < prev_new_weight - ? static_cast(prev_new_weight) - static_cast(prev_move_step_max_work) - : 0.0; - - const cost_t new_affinity = new_max_weight < new_weight - ? static_cast(new_weight) - static_cast(new_max_weight) - : 0.0; - affinity_table_node[move_proc][idx] += new_affinity - prev_affinity; - } - } - } - } -} - -template -bool kl_improver::select_nodes_check_remove_superstep( - unsigned &step_to_remove, ThreadSearchContext &thread_data) { - if (thread_data.step_selection_epoch_counter >= parameters.node_max_step_selection_epochs || thread_data.num_steps() < 3) { - return false; - } - - for (step_to_remove = thread_data.step_selection_counter; step_to_remove <= thread_data.end_step; step_to_remove++) { - assert(step_to_remove >= thread_data.start_step && step_to_remove <= thread_data.end_step); -#ifdef KL_DEBUG - std::cout << "Checking to remove step " << step_to_remove << "/" << thread_data.end_step << std::endl; -#endif - if (check_remove_superstep(step_to_remove)) { -#ifdef KL_DEBUG - std::cout << "Checking to scatter step " << step_to_remove << "/" << thread_data.end_step << std::endl; -#endif - assert(step_to_remove >= thread_data.start_step && step_to_remove <= thread_data.end_step); - if (scatter_nodes_superstep(step_to_remove, thread_data)) { - thread_data.step_selection_counter = step_to_remove + 1; - - if (thread_data.step_selection_counter > thread_data.end_step) { - thread_data.step_selection_counter = thread_data.start_step; - thread_data.step_selection_epoch_counter++; - } - return true; - } - } - } - - thread_data.step_selection_epoch_counter++; - thread_data.step_selection_counter = thread_data.start_step; - return false; -} - -template -bool kl_improver::check_remove_superstep(unsigned step) { - if (active_schedule.num_steps() < 2) { - return false; - } - - if (active_schedule.get_step_max_work(step) < instance->synchronisationCosts()) { - return true; - } - - return false; -} - -template -void kl_improver::reset_inner_search_structures( - ThreadSearchContext &thread_data) const { - thread_data.unlock_edge_backtrack_counter = thread_data.unlock_edge_backtrack_counter_reset; - thread_data.max_inner_iterations = parameters.max_inner_iterations_reset; - thread_data.max_no_vioaltions_removed_backtrack = parameters.max_no_vioaltions_removed_backtrack_reset; - thread_data.average_gain = 0.0; - thread_data.affinity_table.reset_node_selection(); - thread_data.max_gain_heap.clear(); - thread_data.lock_manager.clear(); -} - -template -bool kl_improver::is_local_search_blocked( - ThreadSearchContext &thread_data) { - for (const auto &pair : thread_data.active_schedule_data.new_violations) { - if (thread_data.lock_manager.is_locked(pair.first)) { - return true; - } - } - return false; -} - -template -void kl_improver::initialize_datastructures( - BspSchedule &schedule) { - input_schedule = &schedule; - instance = &schedule.getInstance(); - graph = &instance->getComputationalDag(); - - active_schedule.initialize(schedule); - - proc_range.initialize(*instance); - comm_cost_f.initialize(active_schedule, proc_range); - const cost_t initial_cost = comm_cost_f.compute_schedule_cost(); - active_schedule.set_cost(initial_cost); - - for (auto &t_data : thread_data_vec) { - t_data.affinity_table.initialize(active_schedule, t_data.selection_strategy.selection_threshold); - t_data.lock_manager.initialize(graph->num_vertices()); - t_data.reward_penalty_strat.initialize( - active_schedule, comm_cost_f.get_max_comm_weight_multiplied(), active_schedule.get_max_work_weight()); - t_data.selection_strategy.initialize(active_schedule, gen, t_data.start_step, t_data.end_step); - - t_data.local_affinity_table.resize(instance->numberOfProcessors()); - for (unsigned i = 0; i < instance->numberOfProcessors(); ++i) { - t_data.local_affinity_table[i].resize(window_range); - } - } -} - -template -void kl_improver::update_avg_gain(const cost_t gain, - const unsigned num_iter, - double &average_gain) { - average_gain = static_cast((average_gain * num_iter + gain)) / (num_iter + 1.0); -} - -template -void kl_improver::insert_gain_heap( - ThreadSearchContext &thread_data) { - const size_t active_count = thread_data.affinity_table.size(); - - for (size_t i = 0; i < active_count; ++i) { - const VertexType node = thread_data.affinity_table.get_selected_nodes()[i]; - compute_node_affinities(node, thread_data.affinity_table.at(node), thread_data); - const auto best_move = compute_best_move(node, thread_data.affinity_table[node], thread_data); - thread_data.max_gain_heap.push(node, best_move); - } -} - -template -void kl_improver::insert_new_nodes_gain_heap( - std::vector &new_nodes, node_selection_container_t &nodes, ThreadSearchContext &thread_data) { - for (const auto &node : new_nodes) { - nodes.insert(node); - compute_node_affinities(node, thread_data.affinity_table.at(node), thread_data); - const auto best_move = compute_best_move(node, thread_data.affinity_table[node], thread_data); - thread_data.max_gain_heap.push(node, best_move); - } -} - -template -void kl_improver::cleanup_datastructures() { - thread_data_vec.clear(); - active_schedule.clear(); -} - -template -void kl_improver::print_heap( - heap_datastructure &max_gain_heap) const { - if (max_gain_heap.is_empty()) { - std::cout << "heap is empty" << std::endl; - return; - } - heap_datastructure temp_heap = max_gain_heap; // requires copy constructor - - std::cout << "heap current size: " << temp_heap.size() << std::endl; - const auto &top_val = temp_heap.get_value(temp_heap.top()); - std::cout << "heap top node " << top_val.node << " gain " << top_val.gain << std::endl; - - unsigned count = 0; - while (!temp_heap.is_empty() && count++ < 15) { - const auto &val = temp_heap.get_value(temp_heap.top()); - std::cout << "node " << val.node << " gain " << val.gain << " to proc " << val.to_proc << " to step " << val.to_step - << std::endl; - temp_heap.pop(); - } -} - -template -void kl_improver::update_best_move( - VertexType node, unsigned step, unsigned proc, node_selection_container_t &affinity_table, ThreadSearchContext &thread_data) { - const unsigned node_proc = active_schedule.assigned_processor(node); - const unsigned node_step = active_schedule.assigned_superstep(node); - - if ((node_proc == proc) && (node_step == step)) { - return; - } - - kl_move node_move = thread_data.max_gain_heap.get_value(node); - cost_t max_gain = node_move.gain; - - unsigned max_proc = node_move.to_proc; - unsigned max_step = node_move.to_step; - - if ((max_step == step) && (max_proc == proc)) { - recompute_node_max_gain(node, affinity_table, thread_data); - } else { - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.can_move(node, proc, step)) { - return; - } - } - const unsigned idx = rel_step_idx(node_step, step); - const cost_t gain = affinity_table[node][node_proc][window_size] - affinity_table[node][proc][idx]; - if (gain > max_gain) { - max_gain = gain; - max_proc = proc; - max_step = step; - } - - const cost_t diff = max_gain - node_move.gain; - if ((std::abs(diff) > EPSILON) || (max_proc != node_move.to_proc) || (max_step != node_move.to_step)) { - node_move.gain = max_gain; - node_move.to_proc = max_proc; - node_move.to_step = max_step; - thread_data.max_gain_heap.update(node, node_move); - } - } -} - -template -void kl_improver::update_best_move( - VertexType node, unsigned step, node_selection_container_t &affinity_table, ThreadSearchContext &thread_data) { - const unsigned node_proc = active_schedule.assigned_processor(node); - const unsigned node_step = active_schedule.assigned_superstep(node); - - kl_move node_move = thread_data.max_gain_heap.get_value(node); - cost_t max_gain = node_move.gain; - - unsigned max_proc = node_move.to_proc; - unsigned max_step = node_move.to_step; - - if (max_step == step) { - recompute_node_max_gain(node, affinity_table, thread_data); - } else { - if (node_step != step) { - const unsigned idx = rel_step_idx(node_step, step); - for (const unsigned p : proc_range.compatible_processors_vertex(node)) { - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.can_move(node, p, step)) { - continue; - } - } - const cost_t gain = affinity_table[node][node_proc][window_size] - affinity_table[node][p][idx]; - if (gain > max_gain) { - max_gain = gain; - max_proc = p; - max_step = step; - } - } - } else { - for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - if (proc == node_proc) { - continue; - } - if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.can_move(node, proc, step)) { - continue; - } - } - const cost_t gain = affinity_table[node][node_proc][window_size] - affinity_table[node][proc][window_size]; - if (gain > max_gain) { - max_gain = gain; - max_proc = proc; - max_step = step; - } - } - } - - const cost_t diff = max_gain - node_move.gain; - if ((std::abs(diff) > EPSILON) || (max_proc != node_move.to_proc) || (max_step != node_move.to_step)) { - node_move.gain = max_gain; - node_move.to_proc = max_proc; - node_move.to_step = max_step; - thread_data.max_gain_heap.update(node, node_move); - } - } -} - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp deleted file mode 100644 index 1a825331..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_mt.hpp +++ /dev/null @@ -1,160 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include - -#include "kl_improver.hpp" - -namespace osp { - -template -class kl_improver_mt : public kl_improver { - protected: - unsigned max_num_threads = std::numeric_limits::max(); - - void set_thread_boundaries(const unsigned num_threads, const unsigned num_steps, bool last_thread_large_range) { - if (num_threads == 1) { - this->set_start_step(0, this->thread_data_vec[0]); - this->thread_data_vec[0].end_step = (num_steps > 0) ? num_steps - 1 : 0; - this->thread_data_vec[0].original_end_step = this->thread_data_vec[0].end_step; - return; - } else { - const unsigned total_gap_size = (num_threads - 1) * this->parameters.thread_range_gap; - const unsigned bonus = this->parameters.thread_min_range; - const unsigned steps_to_distribute = num_steps - total_gap_size - bonus; - const unsigned base_range = steps_to_distribute / num_threads; - const unsigned remainder = steps_to_distribute % num_threads; - const unsigned large_range_thread_idx = last_thread_large_range ? num_threads - 1 : 0; - - unsigned current_start_step = 0; - for (unsigned i = 0; i < num_threads; ++i) { - this->thread_finished_vec[i] = false; - this->set_start_step(current_start_step, this->thread_data_vec[i]); - unsigned current_range = base_range + (i < remainder ? 1 : 0); - if (i == large_range_thread_idx) { - current_range += bonus; - } - - const unsigned end_step = current_start_step + current_range - 1; - this->thread_data_vec[i].end_step = end_step; - this->thread_data_vec[i].original_end_step = this->thread_data_vec[i].end_step; - current_start_step = end_step + 1 + this->parameters.thread_range_gap; -#ifdef KL_DEBUG_1 - std::cout << "thread " << i << ": start_step=" << this->thread_data_vec[i].start_step - << ", end_step=" << this->thread_data_vec[i].end_step << std::endl; -#endif - } - } - } - - void set_num_threads(unsigned &num_threads, const unsigned num_steps) { - unsigned max_allowed_threads = 0; - if (num_steps >= this->parameters.thread_min_range + this->parameters.thread_range_gap) { - const unsigned divisor = this->parameters.thread_min_range + this->parameters.thread_range_gap; - if (divisor > 0) { - // This calculation is based on the constraint that one thread's range is - // 'min_range' larger than the others, and all ranges are at least 'min_range'. - max_allowed_threads = (num_steps + this->parameters.thread_range_gap - this->parameters.thread_min_range) / divisor; - } else { - max_allowed_threads = num_steps; - } - } else if (num_steps >= this->parameters.thread_min_range) { - max_allowed_threads = 1; - } - - if (num_threads > max_allowed_threads) { - num_threads = max_allowed_threads; - } - - if (num_threads == 0) { - num_threads = 1; - } -#ifdef KL_DEBUG_1 - std::cout << "num threads: " << num_threads << " number of supersteps: " << num_steps - << ", max allowed threads: " << max_allowed_threads << std::endl; -#endif - } - - public: - kl_improver_mt() : kl_improver() {} - - explicit kl_improver_mt(unsigned seed) - : kl_improver(seed) {} - - virtual ~kl_improver_mt() = default; - - void set_max_num_threads(const unsigned num_threads) { max_num_threads = num_threads; } - - virtual RETURN_STATUS improveSchedule(BspSchedule &schedule) override { - if (schedule.getInstance().numberOfProcessors() < 2) { - return RETURN_STATUS::BEST_FOUND; - } - - unsigned num_threads = std::min(max_num_threads, static_cast(omp_get_max_threads())); - set_num_threads(num_threads, schedule.numberOfSupersteps()); - - this->thread_data_vec.resize(num_threads); - this->thread_finished_vec.assign(num_threads, true); - - if (num_threads == 1) { - this->parameters.num_parallel_loops - = 1; // no parallelization with one thread. Affects parameters.max_out_iteration calculation in set_parameters() - } - - this->set_parameters(schedule.getInstance().numberOfVertices()); - this->initialize_datastructures(schedule); - const cost_t initial_cost = this->active_schedule.get_cost(); - - for (size_t i = 0; i < this->parameters.num_parallel_loops; ++i) { - set_thread_boundaries(num_threads, schedule.numberOfSupersteps(), i % 2 == 0); - -#pragma omp parallel num_threads(num_threads) - { - const size_t thread_id = static_cast(omp_get_thread_num()); - auto &thread_data = this->thread_data_vec[thread_id]; - thread_data.active_schedule_data.initialize_cost(this->active_schedule.get_cost()); - thread_data.selection_strategy.setup(thread_data.start_step, thread_data.end_step); - this->run_local_search(thread_data); - } - - this->synchronize_active_schedule(num_threads); - if (num_threads > 1) { - this->active_schedule.set_cost(this->comm_cost_f.compute_schedule_cost()); - set_num_threads(num_threads, schedule.numberOfSupersteps()); - this->thread_finished_vec.resize(num_threads); - } - } - - if (initial_cost > this->active_schedule.get_cost()) { - this->active_schedule.write_schedule(schedule); - this->cleanup_datastructures(); - return RETURN_STATUS::OSP_SUCCESS; - } else { - this->cleanup_datastructures(); - return RETURN_STATUS::BEST_FOUND; - } - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp deleted file mode 100644 index 977e693f..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp +++ /dev/null @@ -1,165 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include "kl_improver.hpp" - -namespace osp { - -template -class kl_improver_test : public kl_improver { - using VertexType = vertex_idx_t; - using kl_move = kl_move_struct; - using heap_datastructure = MaxPairingHeap; - using active_schedule_t = kl_active_schedule; - using kl_gain_update_info = kl_update_info; - using node_selection_container_t = adaptive_affinity_table; - - public: - kl_improver_test() : kl_improver() { - this->thread_data_vec.resize(1); - this->thread_finished_vec.assign(1, true); - } - - virtual ~kl_improver_test() = default; - - active_schedule_t &get_active_schedule() { return this->active_schedule; } - - auto &get_affinity_table() { return this->thread_data_vec[0].affinity_table; } - - auto &get_comm_cost_f() { return this->comm_cost_f; } - - void setup_schedule(BspSchedule &schedule) { - this->thread_data_vec.resize(1); - this->set_parameters(schedule.getInstance().getComputationalDag().num_vertices()); - this->thread_data_vec[0].end_step = schedule.numberOfSupersteps() > 0 ? schedule.numberOfSupersteps() - 1 : 0; - this->initialize_datastructures(schedule); - this->thread_data_vec[0].active_schedule_data.initialize_cost(this->active_schedule.get_cost()); - } - - void apply_move_test(kl_move move) { this->apply_move(move, this->thread_data_vec[0]); } - - auto &get_max_gain_heap() { return this->thread_data_vec[0].max_gain_heap; } - - auto get_current_cost() { return this->thread_data_vec[0].active_schedule_data.cost; } - - bool is_feasible() { return this->thread_data_vec[0].active_schedule_data.feasible; } - - void compute_violations_test() { this->active_schedule.compute_violations(this->thread_data_vec[0].active_schedule_data); } - - node_selection_container_t &insert_gain_heap_test(const std::vector &n) { - this->thread_data_vec[0].reward_penalty_strat.penalty = 0.0; - this->thread_data_vec[0].reward_penalty_strat.reward = 0.0; - - this->thread_data_vec[0].affinity_table.initialize(this->active_schedule, n.size()); - for (const auto &node : n) { - this->thread_data_vec[0].affinity_table.insert(node); - } - - this->insert_gain_heap(this->thread_data_vec[0]); - - return this->thread_data_vec[0].affinity_table; - } - - node_selection_container_t &insert_gain_heap_test_penalty(const std::vector &n) { - this->thread_data_vec[0].affinity_table.initialize(this->active_schedule, n.size()); - for (const auto &node : n) { - this->thread_data_vec[0].affinity_table.insert(node); - } - this->thread_data_vec[0].reward_penalty_strat.penalty = 5.5; - this->thread_data_vec[0].reward_penalty_strat.reward = 0.0; - - this->insert_gain_heap(this->thread_data_vec[0]); - - return this->thread_data_vec[0].affinity_table; - } - - node_selection_container_t &insert_gain_heap_test_penalty_reward(const std::vector &n) { - this->thread_data_vec[0].affinity_table.initialize(this->active_schedule, n.size()); - for (const auto &node : n) { - this->thread_data_vec[0].affinity_table.insert(node); - } - - this->thread_data_vec[0].reward_penalty_strat.init_reward_penalty(); - this->thread_data_vec[0].reward_penalty_strat.reward = 15.0; - - this->insert_gain_heap(this->thread_data_vec[0]); - - return this->thread_data_vec[0].affinity_table; - } - - void update_affinity_table_test(kl_move best_move, node_selection_container_t &node_selection) { - std::map recompute_max_gain; - std::vector new_nodes; - - const auto prev_work_data = this->active_schedule.get_pre_move_work_data(best_move); - const auto prev_comm_data = this->comm_cost_f.get_pre_move_comm_data(best_move); - this->apply_move(best_move, this->thread_data_vec[0]); - - this->thread_data_vec[0].affinity_table.trim(); - this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data, prev_comm_data); - } - - auto run_inner_iteration_test() { - std::map recompute_max_gain; - std::vector new_nodes; - - this->print_heap(this->thread_data_vec[0].max_gain_heap); - - kl_move best_move = this->get_best_move( - this->thread_data_vec[0].affinity_table, - this->thread_data_vec[0].lock_manager, - this->thread_data_vec[0].max_gain_heap); // locks best_move.node and removes it from node_selection - -#ifdef KL_DEBUG - std::cout << "Best move: " << best_move.node << " gain: " << best_move.gain << ", from: " << best_move.from_step << "|" - << best_move.from_proc << " to: " << best_move.to_step << "|" << best_move.to_proc << std::endl; -#endif - - const auto prev_work_data = this->active_schedule.get_pre_move_work_data(best_move); - const auto prev_comm_data = this->comm_cost_f.get_pre_move_comm_data(best_move); - this->apply_move(best_move, this->thread_data_vec[0]); - - this->thread_data_vec[0].affinity_table.trim(); - this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data, prev_comm_data); - -#ifdef KL_DEBUG - std::cout << "New nodes: { "; - for (const auto v : new_nodes) { - std::cout << v << " "; - } - std::cout << "}" << std::endl; -#endif - - this->update_max_gain(best_move, recompute_max_gain, this->thread_data_vec[0]); - this->insert_new_nodes_gain_heap(new_nodes, this->thread_data_vec[0].affinity_table, this->thread_data_vec[0]); - - return recompute_max_gain; - } - - bool is_node_locked(VertexType node) const { return this->thread_data_vec[0].lock_manager.is_locked(node); } - - void get_active_schedule_test(BspSchedule &schedule) { this->active_schedule.write_schedule(schedule); } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp deleted file mode 100644 index 9727357f..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp +++ /dev/null @@ -1,89 +0,0 @@ - -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -// #define KL_DEBUG -// #define KL_DEBUG_1 -// #define KL_DEBUG_COST_CHECK - -#include "comm_cost_modules/kl_bsp_comm_cost.hpp" -#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp" -#include "comm_cost_modules/kl_total_comm_cost.hpp" -#include "kl_improver.hpp" -#include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" - -namespace osp { - -using double_cost_t = double; - -template -using kl_total_comm_improver - = kl_improver, - MemoryConstraint_t, - window_size, - double_cost_t>; - -template , - unsigned window_size = 1, - bool use_node_communication_costs_arg = true> -using kl_total_comm_improver_local_mem_constr - = kl_improver, - MemoryConstraint_t, - window_size, - double_cost_t>; - -template -using kl_total_lambda_comm_improver - = kl_improver, - MemoryConstraint_t, - window_size, - double_cost_t>; - -template , unsigned window_size = 1> -using kl_total_lambda_comm_improver_local_mem_constr - = kl_improver, - MemoryConstraint_t, - window_size, - double_cost_t>; - -template -using kl_bsp_comm_improver = kl_improver, - MemoryConstraint_t, - window_size, - double_cost_t>; - -template , unsigned window_size = 1> -using kl_bsp_comm_improver_local_mem_constr - = kl_improver, - MemoryConstraint_t, - window_size, - double_cost_t>; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp deleted file mode 100644 index 1d70f3eb..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp +++ /dev/null @@ -1,53 +0,0 @@ - -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include "comm_cost_modules/kl_bsp_comm_cost.hpp" -#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp" -#include "comm_cost_modules/kl_total_comm_cost.hpp" -#include "kl_improver_mt.hpp" -#include "kl_include.hpp" - -namespace osp { - -template -using kl_total_comm_improver_mt - = kl_improver_mt, - MemoryConstraint_t, - window_size, - double>; - -template -using kl_total_lambda_comm_improver_mt - = kl_improver_mt, - MemoryConstraint_t, - window_size, - double>; - -template -using kl_bsp_comm_improver_mt - = kl_improver_mt, MemoryConstraint_t, window_size, double>; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp deleted file mode 100644 index 397f0a1f..00000000 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp +++ /dev/null @@ -1,439 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include - -#include "kl_active_schedule.hpp" - -namespace osp { - -template -struct reward_penalty_strategy { - kl_active_schedule_t *active_schedule; - cost_t max_weight; - - unsigned violations_threshold = 0; - cost_t initial_penalty = 10.0; - cost_t penalty = 0; - cost_t reward = 0; - - void initialize(kl_active_schedule_t &sched, const cost_t max_comm, const cost_t max_work) { - max_weight = std::max(max_work, max_comm * sched.getInstance().communicationCosts()); - active_schedule = &sched; - initial_penalty = static_cast(std::sqrt(max_weight)); - } - - void init_reward_penalty(double multiplier = 1.0) { - multiplier = std::min(multiplier, 10.0); - penalty = static_cast(initial_penalty * multiplier); - reward = static_cast(max_weight * multiplier); - } -}; - -template -struct set_vertex_lock_manger { - std::unordered_set locked_nodes; - - void initialize(size_t) {} - - void lock(VertexType node) { locked_nodes.insert(node); } - - void unlock(VertexType node) { locked_nodes.erase(node); } - - bool is_locked(VertexType node) { return locked_nodes.find(node) != locked_nodes.end(); } - - void clear() { locked_nodes.clear(); } -}; - -template -struct vector_vertex_lock_manger { - std::vector locked_nodes; - - void initialize(size_t num_nodes) { locked_nodes.resize(num_nodes); } - - void lock(VertexType node) { locked_nodes[node] = true; } - - void unlock(VertexType node) { locked_nodes[node] = false; } - - bool is_locked(VertexType node) { return locked_nodes[node]; } - - void clear() { locked_nodes.assign(locked_nodes.size(), false); } -}; - -template -struct adaptive_affinity_table { - constexpr static unsigned window_range = 2 * window_size + 1; - using VertexType = vertex_idx_t; - - private: - const kl_active_schedule_t *active_schedule; - const Graph_t *graph; - - std::vector node_is_selected; - std::vector selected_nodes_idx; - - std::vector>> affinity_table; - std::vector selected_nodes; - - std::vector gaps; - size_t last_idx; - - public: - void initialize(const kl_active_schedule_t &sche_, const std::size_t initial_table_size) { - active_schedule = &sche_; - graph = &(sche_.getInstance().getComputationalDag()); - - last_idx = 0; - - node_is_selected.resize(graph->num_vertices()); - selected_nodes_idx.resize(graph->num_vertices()); - selected_nodes.resize(initial_table_size); - - node_is_selected.assign(node_is_selected.size(), false); - - affinity_table.resize(initial_table_size); - const unsigned num_procs = sche_.getInstance().numberOfProcessors(); - for (auto &table : affinity_table) { - table.resize(num_procs); - for (auto &row : table) { - row.resize(window_range); - } - } - } - - inline std::vector &get_selected_nodes() { return selected_nodes; } - - inline const std::vector &get_selected_nodes() const { return selected_nodes; } - - inline size_t size() const { return last_idx - gaps.size(); } - - inline bool is_selected(VertexType node) const { return node_is_selected[node]; } - - inline const std::vector &get_selected_nodes_indices() const { return selected_nodes_idx; } - - inline size_t get_selected_nodes_idx(VertexType node) const { return selected_nodes_idx[node]; } - - inline std::vector> &operator[](VertexType node) { - assert(node_is_selected[node]); - return affinity_table[selected_nodes_idx[node]]; - } - - inline std::vector> &at(VertexType node) { - assert(node_is_selected[node]); - return affinity_table[selected_nodes_idx[node]]; - } - - inline const std::vector> &at(VertexType node) const { - assert(node_is_selected[node]); - return affinity_table[selected_nodes_idx[node]]; - } - - inline std::vector> &get_affinity_table(VertexType node) { - assert(node_is_selected[node]); - return affinity_table[selected_nodes_idx[node]]; - } - - bool insert(VertexType node) { - if (node_is_selected[node]) { - return false; // Node is already in the table. - } - - size_t insert_location; - if (!gaps.empty()) { - insert_location = gaps.back(); - gaps.pop_back(); - } else { - insert_location = last_idx; - - if (insert_location >= selected_nodes.size()) { - const size_t old_size = selected_nodes.size(); - const size_t new_size = std::min(old_size * 2, static_cast(graph->num_vertices())); - - selected_nodes.resize(new_size); - affinity_table.resize(new_size); - - const unsigned num_procs = active_schedule->getInstance().numberOfProcessors(); - for (size_t i = old_size; i < new_size; ++i) { - affinity_table[i].resize(num_procs); - for (auto &row : affinity_table[i]) { - row.resize(window_range); - } - } - } - last_idx++; - } - - node_is_selected[node] = true; - selected_nodes_idx[node] = insert_location; - selected_nodes[insert_location] = node; - - return true; - } - - void remove(VertexType node) { - assert(node_is_selected[node]); - node_is_selected[node] = false; - - gaps.push_back(selected_nodes_idx[node]); - } - - void reset_node_selection() { - node_is_selected.assign(node_is_selected.size(), false); - gaps.clear(); - last_idx = 0; - } - - void clear() { - node_is_selected.clear(); - selected_nodes_idx.clear(); - affinity_table.clear(); - selected_nodes.clear(); - gaps.clear(); - last_idx = 0; - } - - void trim() { - while (!gaps.empty() && last_idx > 0) { - size_t last_element_idx = last_idx - 1; - - // The last element could be a gap itself. If so, just shrink the size. - // We don't need to touch the `gaps` vector, as it will be cleared. - if (!node_is_selected[selected_nodes[last_element_idx]]) { - last_idx--; - continue; - } - - size_t gap_idx = gaps.back(); - gaps.pop_back(); - - // If the gap we picked is now at or after the end, we can ignore it. - if (gap_idx >= last_idx) { - continue; - } - - VertexType node_to_move = selected_nodes[last_element_idx]; - - std::swap(affinity_table[gap_idx], affinity_table[last_element_idx]); - std::swap(selected_nodes[gap_idx], selected_nodes[last_element_idx]); - selected_nodes_idx[node_to_move] = gap_idx; - - last_idx--; - } - gaps.clear(); - } -}; - -template -struct static_affinity_table { - constexpr static unsigned window_range = 2 * window_size + 1; - using VertexType = vertex_idx_t; - - private: - const kl_active_schedule_t *active_schedule; - const Graph_t *graph; - - std::unordered_set selected_nodes; - - std::vector>> affinity_table; - - public: - void initialize(const kl_active_schedule_t &sche_, const std::size_t) { - active_schedule = &sche_; - graph = &(sche_.getInstance().getComputationalDag()); - - affinity_table.resize(graph->num_vertices()); - const unsigned num_procs = sche_.getInstance().numberOfProcessors(); - for (auto &table : affinity_table) { - table.resize(num_procs); - for (auto &row : table) { - row.resize(window_range); - } - } - } - - inline std::vector get_selected_nodes() const { return {selected_nodes.begin(), selected_nodes.end()}; } - - inline size_t size() const { return selected_nodes.size(); } - - inline bool is_selected(VertexType node) const { return selected_nodes.find(node) != selected_nodes.end(); } - - inline std::vector> &operator[](VertexType node) { return affinity_table[node]; } - - inline std::vector> &at(VertexType node) { return affinity_table[node]; } - - inline const std::vector> &at(VertexType node) const { return affinity_table[node]; } - - inline std::vector> &get_affinity_table(VertexType node) { return affinity_table[node]; } - - bool insert(VertexType node) { - const auto pair = selected_nodes.insert(node); - return pair.second; - } - - void remove(VertexType node) { selected_nodes.erase(node); } - - void reset_node_selection() { selected_nodes.clear(); } - - void clear() { - affinity_table.clear(); - selected_nodes.clear(); - } - - void trim() {} -}; - -template -struct vertex_selection_strategy { - using EdgeType = edge_desc_t; - - const kl_active_schedule_t *active_schedule; - const Graph_t *graph; - std::mt19937 *gen; - std::size_t selection_threshold = 0; - unsigned strategy_counter = 0; - - std::vector> permutation; - std::size_t permutation_idx; - - unsigned max_work_counter = 0; - - inline void initialize(const kl_active_schedule_t &sche_, - std::mt19937 &gen_, - const unsigned start_step, - const unsigned end_step) { - active_schedule = &sche_; - graph = &(sche_.getInstance().getComputationalDag()); - gen = &gen_; - - permutation.reserve(graph->num_vertices() / active_schedule->num_steps() * (end_step - start_step)); - } - - inline void setup(const unsigned start_step, const unsigned end_step) { - max_work_counter = start_step; - strategy_counter = 0; - permutation.clear(); - - const unsigned num_procs = active_schedule->getInstance().numberOfProcessors(); - for (unsigned step = start_step; step <= end_step; ++step) { - const auto &processor_vertices = active_schedule->getSetSchedule().step_processor_vertices[step]; - for (unsigned proc = 0; proc < num_procs; ++proc) { - for (const auto node : processor_vertices[proc]) { - permutation.push_back(node); - } - } - } - - permutation_idx = 0; - std::shuffle(permutation.begin(), permutation.end(), *gen); - } - - void add_neighbours_to_selection(vertex_idx_t node, - container_t &nodes, - const unsigned start_step, - const unsigned end_step) { - for (const auto parent : graph->parents(node)) { - const unsigned parent_step = active_schedule->assigned_superstep(parent); - if (parent_step >= start_step && parent_step <= end_step) { - nodes.insert(parent); - } - } - - for (const auto child : graph->children(node)) { - const unsigned child_step = active_schedule->assigned_superstep(child); - if (child_step >= start_step && child_step <= end_step) { - nodes.insert(child); - } - } - } - - inline void select_active_nodes(container_t &node_selection, const unsigned start_step, const unsigned end_step) { - if (strategy_counter < 3) { - select_nodes_permutation_threshold(selection_threshold, node_selection); - } else if (strategy_counter == 4) { - select_nodes_max_work_proc(selection_threshold, node_selection, start_step, end_step); - } - - strategy_counter++; - strategy_counter %= 5; - } - - void select_nodes_violations(container_t &node_selection, - std::unordered_set ¤t_violations, - const unsigned start_step, - const unsigned end_step) { - for (const auto &edge : current_violations) { - const auto source_v = source(edge, *graph); - const auto target_v = target(edge, *graph); - - const unsigned source_step = active_schedule->assigned_superstep(source_v); - if (source_step >= start_step && source_step <= end_step) { - node_selection.insert(source_v); - } - - const unsigned target_step = active_schedule->assigned_superstep(target_v); - if (target_step >= start_step && target_step <= end_step) { - node_selection.insert(target_v); - } - } - } - - void select_nodes_permutation_threshold(const std::size_t &threshold, container_t &node_selection) { - const size_t bound = std::min(threshold + permutation_idx, permutation.size()); - for (std::size_t i = permutation_idx; i < bound; i++) { - node_selection.insert(permutation[i]); - } - - permutation_idx = bound; - if (permutation_idx + threshold >= permutation.size()) { - permutation_idx = 0; - std::shuffle(permutation.begin(), permutation.end(), *gen); - } - } - - void select_nodes_max_work_proc(const std::size_t &threshold, - container_t &node_selection, - const unsigned start_step, - const unsigned end_step) { - while (node_selection.size() < threshold) { - if (max_work_counter > end_step) { - max_work_counter = start_step; // wrap around - break; // stop after one full pass - } - - select_nodes_max_work_proc_helper(threshold - node_selection.size(), max_work_counter, node_selection); - max_work_counter++; - } - } - - void select_nodes_max_work_proc_helper(const std::size_t &threshold, unsigned step, container_t &node_selection) { - const unsigned num_max_work_proc = active_schedule->work_datastructures.step_max_work_processor_count[step]; - for (unsigned idx = 0; idx < num_max_work_proc; idx++) { - const unsigned proc = active_schedule->work_datastructures.step_processor_work_[step][idx].proc; - const std::unordered_set> step_proc_vert - = active_schedule->getSetSchedule().step_processor_vertices[step][proc]; - const size_t num_insert = std::min(threshold - node_selection.size(), step_proc_vert.size()); - auto end_it = step_proc_vert.begin(); - std::advance(end_it, num_insert); - std::for_each(step_proc_vert.begin(), end_it, [&](const auto &val) { node_selection.insert(val); }); - } - } -}; - -} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp index bc0ed8eb..bbbe7505 100644 --- a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp @@ -32,114 +32,112 @@ namespace osp { * */ template -struct is_local_search_memory_constraint : std::false_type {}; +struct IsLocalSearchMemoryConstraint : std::false_type {}; template -struct is_local_search_memory_constraint< +struct IsLocalSearchMemoryConstraint< T, - std::void_t().initialize(std::declval>(), - std::declval>())), - decltype(std::declval().apply_move(std::declval>(), - std::declval(), - std::declval(), - std::declval(), - std::declval())), - decltype(std::declval().compute_memory_datastructure(std::declval(), std::declval())), - decltype(std::declval().swap_steps(std::declval(), std::declval())), - decltype(std::declval().reset_superstep(std::declval())), - decltype(std::declval().override_superstep( + std::void_t().Initialize(std::declval>(), + std::declval>())), + decltype(std::declval().ApplyMove(std::declval>(), + std::declval(), + std::declval(), + std::declval(), + std::declval())), + decltype(std::declval().ComputeMemoryDatastructure(std::declval(), std::declval())), + decltype(std::declval().SwapSteps(std::declval(), std::declval())), + decltype(std::declval().ResetSuperstep(std::declval())), + decltype(std::declval().OverrideSuperstep( std::declval(), std::declval(), std::declval(), std::declval())), - decltype(std::declval().can_move( - std::declval>(), std::declval(), std::declval())), - decltype(std::declval().clear()), + decltype(std::declval().CanMove( + std::declval>(), std::declval(), std::declval())), + decltype(std::declval().Clear()), decltype(T())>> : std::true_type {}; template -inline constexpr bool is_local_search_memory_constraint_v = is_local_search_memory_constraint::value; +inline constexpr bool isLocalSearchMemoryConstraintV = IsLocalSearchMemoryConstraint::value; /** * @brief The default memory constraint type, no memory constraints apply. * */ -struct no_local_search_memory_constraint { - using Graph_impl_t = void; +struct NoLocalSearchMemoryConstraint { + using GraphImplT = void; }; /** * @brief A memory constraint module for local memory constraints. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. */ -template -struct ls_local_memory_constraint { - using Graph_impl_t = Graph_t; +template +struct LsLocalMemoryConstraint { + using GraphImplT = GraphT; - const SetSchedule *set_schedule; - const Graph_t *graph; + const SetSchedule *setSchedule_; + const GraphT *graph_; - std::vector>> step_processor_memory; + std::vector>> stepProcessorMemory_; - ls_local_memory_constraint() : set_schedule(nullptr), graph(nullptr) {} + LsLocalMemoryConstraint() : setSchedule_(nullptr), graph_(nullptr) {} - inline void initialize(const SetSchedule &set_schedule_, const VectorSchedule &) { - if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL) { + inline void Initialize(const SetSchedule &setSchedule, const VectorSchedule &) { + if (setSchedule.GetInstance().GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::LOCAL) { throw std::invalid_argument("Memory constraint type is not LOCAL"); } - set_schedule = &set_schedule_; - graph = &set_schedule->getInstance().getComputationalDag(); - step_processor_memory = std::vector>>( - set_schedule->numberOfSupersteps(), - std::vector>(set_schedule->getInstance().numberOfProcessors(), 0)); + setSchedule_ = &setSchedule; + graph_ = &setSchedule_->GetInstance().GetComputationalDag(); + stepProcessorMemory_ = std::vector>>( + setSchedule_->NumberOfSupersteps(), std::vector>(setSchedule_->GetInstance().NumberOfProcessors(), 0)); } - inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) { - step_processor_memory[to_step][to_proc] += graph->vertex_mem_weight(vertex); - step_processor_memory[from_step][from_proc] -= graph->vertex_mem_weight(vertex); + inline void ApplyMove(VertexIdxT vertex, unsigned fromProc, unsigned fromStep, unsigned toProc, unsigned toStep) { + stepProcessorMemory_[toStep][toProc] += graph_->VertexMemWeight(vertex); + stepProcessorMemory_[fromStep][fromProc] -= graph_->VertexMemWeight(vertex); } - inline bool can_move(vertex_idx_t vertex, const unsigned proc, unsigned step) const { - return step_processor_memory[step][proc] + graph->vertex_mem_weight(vertex) - <= set_schedule->getInstance().getArchitecture().memoryBound(proc); + inline bool CanMove(VertexIdxT vertex, const unsigned proc, unsigned step) const { + return stepProcessorMemory_[step][proc] + graph_->VertexMemWeight(vertex) + <= setSchedule_->GetInstance().GetArchitecture().MemoryBound(proc); } - void swap_steps(const unsigned step1, const unsigned step2) { - std::swap(step_processor_memory[step1], step_processor_memory[step2]); + void SwapSteps(const unsigned step1, const unsigned step2) { + std::swap(stepProcessorMemory_[step1], stepProcessorMemory_[step2]); } - void compute_memory_datastructure(unsigned start_step, unsigned end_step) { - for (unsigned step = start_step; step <= end_step; step++) { - for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; + void ComputeMemoryDatastructure(unsigned startStep, unsigned endStep) { + for (unsigned step = startStep; step <= endStep; step++) { + for (unsigned proc = 0; proc < setSchedule_->GetInstance().NumberOfProcessors(); proc++) { + stepProcessorMemory_[step][proc] = 0; - for (const auto &node : set_schedule->step_processor_vertices[step][proc]) { - step_processor_memory[step][proc] += graph->vertex_mem_weight(node); + for (const auto &node : setSchedule_->stepProcessorVertices_[step][proc]) { + stepProcessorMemory_[step][proc] += graph_->VertexMemWeight(node); } } } } - inline void clear() { step_processor_memory.clear(); } + inline void Clear() { stepProcessorMemory_.clear(); } - inline void forward_move(vertex_idx_t vertex, unsigned, unsigned, unsigned to_proc, unsigned to_step) { - step_processor_memory[to_step][to_proc] += graph->vertex_mem_weight(vertex); - // step_processor_memory[from_step][from_proc] -= graph->vertex_mem_weight(vertex); + inline void ForwardMove(VertexIdxT vertex, unsigned, unsigned, unsigned toProc, unsigned toStep) { + stepProcessorMemory_[toStep][toProc] += graph_->VertexMemWeight(vertex); } - inline void reset_superstep(unsigned step) { - for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; + inline void ResetSuperstep(unsigned step) { + for (unsigned proc = 0; proc < setSchedule_->GetInstance().GetArchitecture().NumberOfProcessors(); proc++) { + stepProcessorMemory_[step][proc] = 0; } } - void override_superstep(unsigned step, unsigned proc, unsigned with_step, unsigned with_proc) { - step_processor_memory[step][proc] = step_processor_memory[with_step][with_proc]; + void OverrideSuperstep(unsigned step, unsigned proc, unsigned withStep, unsigned withProc) { + stepProcessorMemory_[step][proc] = stepProcessorMemory_[withStep][withProc]; } - bool satisfied_memory_constraint() const { - for (unsigned step = 0; step < set_schedule->numberOfSupersteps(); step++) { - for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) { - if (step_processor_memory[step][proc] > set_schedule->getInstance().getArchitecture().memoryBound(proc)) { + bool SatisfiedMemoryConstraint() const { + for (unsigned step = 0; step < setSchedule_->NumberOfSupersteps(); step++) { + for (unsigned proc = 0; proc < setSchedule_->GetInstance().NumberOfProcessors(); proc++) { + if (stepProcessorMemory_[step][proc] > setSchedule_->GetInstance().GetArchitecture().MemoryBound(proc)) { return false; } } @@ -148,115 +146,114 @@ struct ls_local_memory_constraint { } }; -template -struct ls_local_inc_edges_memory_constraint { - using Graph_impl_t = Graph_t; +template +struct LsLocalIncEdgesMemoryConstraint { + using GraphImplT = GraphT; - const SetSchedule *set_schedule; - const VectorSchedule *vector_schedule; - const Graph_t *graph; + const SetSchedule *setSchedule_; + const VectorSchedule *vectorSchedule_; + const GraphT *graph_; - std::vector>> step_processor_memory; - std::vector>>> step_processor_pred; + std::vector>> stepProcessorMemory_; + std::vector>>> stepProcessorPred_; - ls_local_inc_edges_memory_constraint() : set_schedule(nullptr), vector_schedule(nullptr), graph(nullptr) {} + LsLocalIncEdgesMemoryConstraint() : setSchedule_(nullptr), vectorSchedule_(nullptr), graph_(nullptr) {} - inline void initialize(const SetSchedule &set_schedule_, const VectorSchedule &vec_schedule_) { - if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES) { + inline void Initialize(const SetSchedule &setSchedule, const VectorSchedule &vecSchedule) { + if (setSchedule.GetInstance().GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::LOCAL_INC_EDGES) { throw std::invalid_argument("Memory constraint type is not LOCAL_INC_EDGES"); } - set_schedule = &set_schedule_; - vector_schedule = &vec_schedule_; - graph = &set_schedule->getInstance().getComputationalDag(); - step_processor_memory = std::vector>>( - set_schedule->numberOfSupersteps(), - std::vector>(set_schedule->getInstance().numberOfProcessors(), 0)); - step_processor_pred = std::vector>>>( - set_schedule->numberOfSupersteps(), - std::vector>>(set_schedule->getInstance().numberOfProcessors())); + setSchedule_ = &setSchedule; + vectorSchedule_ = &vecSchedule; + graph_ = &setSchedule_->GetInstance().GetComputationalDag(); + stepProcessorMemory_ = std::vector>>( + setSchedule_->NumberOfSupersteps(), std::vector>(setSchedule_->GetInstance().NumberOfProcessors(), 0)); + stepProcessorPred_ = std::vector>>>( + setSchedule_->NumberOfSupersteps(), + std::vector>>(setSchedule_->GetInstance().NumberOfProcessors())); } - inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) { - step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(vertex); - step_processor_memory[from_step][from_proc] -= graph->vertex_comm_weight(vertex); + inline void ApplyMove(VertexIdxT vertex, unsigned fromProc, unsigned fromStep, unsigned toProc, unsigned toStep) { + stepProcessorMemory_[toStep][toProc] += graph_->VertexCommWeight(vertex); + stepProcessorMemory_[fromStep][fromProc] -= graph_->VertexCommWeight(vertex); - for (const auto &pred : graph->parents(vertex)) { - if (vector_schedule->assignedSuperstep(pred) < to_step) { - auto pair = step_processor_pred[to_step][to_proc].insert(pred); + for (const auto &pred : graph_->Parents(vertex)) { + if (vectorSchedule_->AssignedSuperstep(pred) < toStep) { + auto pair = stepProcessorPred_[toStep][toProc].insert(pred); if (pair.second) { - step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(pred); + stepProcessorMemory_[toStep][toProc] += graph_->VertexCommWeight(pred); } } - if (vector_schedule->assignedSuperstep(pred) < from_step) { + if (vectorSchedule_->AssignedSuperstep(pred) < fromStep) { bool remove = true; - for (const auto &succ : graph->children(pred)) { + for (const auto &succ : graph_->Children(pred)) { if (succ == vertex) { continue; } - if (vector_schedule->assignedProcessor(succ) == from_proc - && vector_schedule->assignedSuperstep(succ) == from_step) { + if (vectorSchedule_->AssignedProcessor(succ) == fromProc + && vectorSchedule_->AssignedSuperstep(succ) == fromStep) { remove = false; break; } } if (remove) { - step_processor_memory[from_step][from_proc] -= graph->vertex_comm_weight(pred); - step_processor_pred[from_step][from_proc].erase(pred); + stepProcessorMemory_[fromStep][fromProc] -= graph_->VertexCommWeight(pred); + stepProcessorPred_[fromStep][fromProc].erase(pred); } } } - if (to_step != from_step) { - for (const auto &succ : graph->children(vertex)) { - if (to_step > from_step && vector_schedule->assignedSuperstep(succ) == to_step) { - if (step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].find( + if (toStep != fromStep) { + for (const auto &succ : graph_->Children(vertex)) { + if (toStep > fromStep && vectorSchedule_->AssignedSuperstep(succ) == toStep) { + if (stepProcessorPred_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)].find( vertex) - != step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + != stepProcessorPred_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)] .end()) { - step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] - -= graph->vertex_comm_weight(vertex); + stepProcessorMemory_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)] + -= graph_->VertexCommWeight(vertex); - step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].erase( + stepProcessorPred_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)].erase( vertex); } } - if (vector_schedule->assignedSuperstep(succ) > to_step) { + if (vectorSchedule_->AssignedSuperstep(succ) > toStep) { auto pair - = step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + = stepProcessorPred_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)] .insert(vertex); if (pair.second) { - step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] - += graph->vertex_comm_weight(vertex); + stepProcessorMemory_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)] + += graph_->VertexCommWeight(vertex); } } } } } - void swap_steps(const unsigned step1, const unsigned step2) { - std::swap(step_processor_memory[step1], step_processor_memory[step2]); - std::swap(step_processor_pred[step1], step_processor_pred[step2]); + void SwapSteps(const unsigned step1, const unsigned step2) { + std::swap(stepProcessorMemory_[step1], stepProcessorMemory_[step2]); + std::swap(stepProcessorPred_[step1], stepProcessorPred_[step2]); } - void compute_memory_datastructure(unsigned start_step, unsigned end_step) { - for (unsigned step = start_step; step <= end_step; step++) { - for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; - step_processor_pred[step][proc].clear(); + void ComputeMemoryDatastructure(unsigned startStep, unsigned endStep) { + for (unsigned step = startStep; step <= endStep; step++) { + for (unsigned proc = 0; proc < setSchedule_->GetInstance().NumberOfProcessors(); proc++) { + stepProcessorMemory_[step][proc] = 0; + stepProcessorPred_[step][proc].clear(); - for (const auto &node : set_schedule->step_processor_vertices[step][proc]) { - step_processor_memory[step][proc] += graph->vertex_comm_weight(node); + for (const auto &node : setSchedule_->stepProcessorVertices_[step][proc]) { + stepProcessorMemory_[step][proc] += graph_->VertexCommWeight(node); - for (const auto &pred : graph->parents(node)) { - if (vector_schedule->assignedSuperstep(pred) < step) { - auto pair = step_processor_pred[step][proc].insert(pred); + for (const auto &pred : graph_->Parents(node)) { + if (vectorSchedule_->AssignedSuperstep(pred) < step) { + auto pair = stepProcessorPred_[step][proc].insert(pred); if (pair.second) { - step_processor_memory[step][proc] += graph->vertex_comm_weight(pred); + stepProcessorMemory_[step][proc] += graph_->VertexCommWeight(pred); } } } @@ -265,56 +262,54 @@ struct ls_local_inc_edges_memory_constraint { } } - inline void clear() { - step_processor_memory.clear(); - step_processor_pred.clear(); + inline void Clear() { + stepProcessorMemory_.clear(); + stepProcessorPred_.clear(); } - inline void reset_superstep(unsigned step) { - for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; - step_processor_pred[step][proc].clear(); + inline void ResetSuperstep(unsigned step) { + for (unsigned proc = 0; proc < setSchedule_->GetInstance().GetArchitecture().NumberOfProcessors(); proc++) { + stepProcessorMemory_[step][proc] = 0; + stepProcessorPred_[step][proc].clear(); } } - void override_superstep(unsigned step, unsigned proc, unsigned with_step, unsigned with_proc) { - step_processor_memory[step][proc] = step_processor_memory[with_step][with_proc]; - step_processor_pred[step][proc] = step_processor_pred[with_step][with_proc]; + void OverrideSuperstep(unsigned step, unsigned proc, unsigned withStep, unsigned withProc) { + stepProcessorMemory_[step][proc] = stepProcessorMemory_[withStep][withProc]; + stepProcessorPred_[step][proc] = stepProcessorPred_[withStep][withProc]; } - inline bool can_move(vertex_idx_t vertex, const unsigned proc, unsigned step) const { - v_memw_t inc_memory = graph->vertex_comm_weight(vertex); - for (const auto &pred : graph->parents(vertex)) { - if (vector_schedule->assignedSuperstep(pred) < step) { - if (step_processor_pred[step][proc].find(pred) == step_processor_pred[step][proc].end()) { - inc_memory += graph->vertex_comm_weight(pred); + inline bool CanMove(VertexIdxT vertex, const unsigned proc, unsigned step) const { + VMemwT incMemory = graph_->VertexCommWeight(vertex); + for (const auto &pred : graph_->Parents(vertex)) { + if (vectorSchedule_->AssignedSuperstep(pred) < step) { + if (stepProcessorPred_[step][proc].find(pred) == stepProcessorPred_[step][proc].end()) { + incMemory += graph_->VertexCommWeight(pred); } } } - if (step > vector_schedule->assignedSuperstep(vertex)) { - if (step_processor_pred[step][proc].find(vertex) != step_processor_pred[step][proc].end()) { - inc_memory -= graph->vertex_comm_weight(vertex); + if (step > vectorSchedule_->AssignedSuperstep(vertex)) { + if (stepProcessorPred_[step][proc].find(vertex) != stepProcessorPred_[step][proc].end()) { + incMemory -= graph_->VertexCommWeight(vertex); } } - if (step >= vector_schedule->assignedSuperstep(vertex)) { - return step_processor_memory[step][proc] + inc_memory - <= set_schedule->getInstance().getArchitecture().memoryBound(proc); + if (step >= vectorSchedule_->AssignedSuperstep(vertex)) { + return stepProcessorMemory_[step][proc] + incMemory <= setSchedule_->GetInstance().GetArchitecture().MemoryBound(proc); } - if (step_processor_memory[step][proc] + inc_memory > set_schedule->getInstance().getArchitecture().memoryBound(proc)) { + if (stepProcessorMemory_[step][proc] + incMemory > setSchedule_->GetInstance().GetArchitecture().MemoryBound(proc)) { return false; } - for (const auto &succ : graph->children(vertex)) { - const auto &succ_step = vector_schedule->assignedSuperstep(succ); - const auto &succ_proc = vector_schedule->assignedProcessor(succ); + for (const auto &succ : graph_->Children(vertex)) { + const auto &succStep = vectorSchedule_->AssignedSuperstep(succ); + const auto &succProc = vectorSchedule_->AssignedProcessor(succ); - if (succ_step == vector_schedule->assignedSuperstep(vertex) - and succ_proc != vector_schedule->assignedProcessor(vertex)) { - if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) - > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { + if (succStep == vectorSchedule_->AssignedSuperstep(vertex) and succProc != vectorSchedule_->AssignedProcessor(vertex)) { + if (stepProcessorMemory_[succStep][succProc] + graph_->VertexCommWeight(vertex) + > setSchedule_->GetInstance().GetArchitecture().MemoryBound(succProc)) { return false; } } @@ -324,120 +319,118 @@ struct ls_local_inc_edges_memory_constraint { } }; -template -struct ls_local_sources_inc_edges_memory_constraint { - using Graph_impl_t = Graph_t; +template +struct LsLocalSourcesIncEdgesMemoryConstraint { + using GraphImplT = GraphT; - const SetSchedule *set_schedule; - const VectorSchedule *vector_schedule; - const Graph_t *graph; + const SetSchedule *setSchedule_; + const VectorSchedule *vectorSchedule_; + const GraphT *graph_; - std::vector>> step_processor_memory; - std::vector>>> step_processor_pred; + std::vector>> stepProcessorMemory_; + std::vector>>> stepProcessorPred_; - ls_local_sources_inc_edges_memory_constraint() : set_schedule(nullptr), vector_schedule(nullptr), graph(nullptr) {} + LsLocalSourcesIncEdgesMemoryConstraint() : setSchedule_(nullptr), vectorSchedule_(nullptr), graph_(nullptr) {} - inline void swap_steps(const unsigned step1, const unsigned step2) { - std::swap(step_processor_memory[step1], step_processor_memory[step2]); - std::swap(step_processor_pred[step1], step_processor_pred[step2]); + inline void SwapSteps(const unsigned step1, const unsigned step2) { + std::swap(stepProcessorMemory_[step1], stepProcessorMemory_[step2]); + std::swap(stepProcessorPred_[step1], stepProcessorPred_[step2]); } - inline void initialize(const SetSchedule &set_schedule_, const VectorSchedule &vec_schedule_) { - if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() - != MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES) { + inline void Initialize(const SetSchedule &setSchedule, const VectorSchedule &vecSchedule) { + if (setSchedule.GetInstance().GetArchitecture().GetMemoryConstraintType() != MemoryConstraintType::LOCAL_SOURCES_INC_EDGES) { throw std::invalid_argument("Memory constraint type is not LOCAL_SOURCES_INC_EDGES"); } - set_schedule = &set_schedule_; - vector_schedule = &vec_schedule_; - graph = &set_schedule->getInstance().getComputationalDag(); - step_processor_memory = std::vector>>( - set_schedule->numberOfSupersteps(), - std::vector>(set_schedule->getInstance().numberOfProcessors(), 0)); - step_processor_pred = std::vector>>>( - set_schedule->numberOfSupersteps(), - std::vector>>(set_schedule->getInstance().numberOfProcessors())); + setSchedule_ = &setSchedule; + vectorSchedule_ = &vecSchedule; + graph_ = &setSchedule_->GetInstance().GetComputationalDag(); + stepProcessorMemory_ = std::vector>>( + setSchedule_->NumberOfSupersteps(), std::vector>(setSchedule_->GetInstance().NumberOfProcessors(), 0)); + stepProcessorPred_ = std::vector>>>( + setSchedule_->NumberOfSupersteps(), + std::vector>>(setSchedule_->GetInstance().NumberOfProcessors())); } - inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) { - if (is_source(vertex, *graph)) { - step_processor_memory[to_step][to_proc] += graph->vertex_mem_weight(vertex); - step_processor_memory[from_step][from_proc] -= graph->vertex_mem_weight(vertex); + inline void ApplyMove(VertexIdxT vertex, unsigned fromProc, unsigned fromStep, unsigned toProc, unsigned toStep) { + if (IsSource(vertex, *graph_)) { + stepProcessorMemory_[toStep][toProc] += graph_->VertexMemWeight(vertex); + stepProcessorMemory_[fromStep][fromProc] -= graph_->VertexMemWeight(vertex); } - for (const auto &pred : graph->parents(vertex)) { - if (vector_schedule->assignedSuperstep(pred) < to_step) { - auto pair = step_processor_pred[to_step][to_proc].insert(pred); + for (const auto &pred : graph_->Parents(vertex)) { + if (vectorSchedule_->AssignedSuperstep(pred) < toStep) { + auto pair = stepProcessorPred_[toStep][toProc].insert(pred); if (pair.second) { - step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(pred); + stepProcessorMemory_[toStep][toProc] += graph_->VertexCommWeight(pred); } } - if (vector_schedule->assignedSuperstep(pred) < from_step) { + if (vectorSchedule_->AssignedSuperstep(pred) < fromStep) { bool remove = true; - for (const auto &succ : graph->children(pred)) { + for (const auto &succ : graph_->Children(pred)) { if (succ == vertex) { continue; } - if (vector_schedule->assignedProcessor(succ) == from_proc - && vector_schedule->assignedSuperstep(succ) == from_step) { + if (vectorSchedule_->AssignedProcessor(succ) == fromProc + && vectorSchedule_->AssignedSuperstep(succ) == fromStep) { remove = false; break; } } if (remove) { - step_processor_memory[from_step][from_proc] -= graph->vertex_comm_weight(pred); - step_processor_pred[from_step][from_proc].erase(pred); + stepProcessorMemory_[fromStep][fromProc] -= graph_->VertexCommWeight(pred); + stepProcessorPred_[fromStep][fromProc].erase(pred); } } } - if (to_step != from_step) { - for (const auto &succ : graph->children(vertex)) { - if (to_step > from_step && vector_schedule->assignedSuperstep(succ) == to_step) { - if (step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].find( + if (toStep != fromStep) { + for (const auto &succ : graph_->Children(vertex)) { + if (toStep > fromStep && vectorSchedule_->AssignedSuperstep(succ) == toStep) { + if (stepProcessorPred_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)].find( vertex) - != step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + != stepProcessorPred_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)] .end()) { - step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] - -= graph->vertex_comm_weight(vertex); + stepProcessorMemory_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)] + -= graph_->VertexCommWeight(vertex); - step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].erase( + stepProcessorPred_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)].erase( vertex); } } - if (vector_schedule->assignedSuperstep(succ) > to_step) { + if (vectorSchedule_->AssignedSuperstep(succ) > toStep) { auto pair - = step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + = stepProcessorPred_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)] .insert(vertex); if (pair.second) { - step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] - += graph->vertex_comm_weight(vertex); + stepProcessorMemory_[vectorSchedule_->AssignedSuperstep(succ)][vectorSchedule_->AssignedProcessor(succ)] + += graph_->VertexCommWeight(vertex); } } } } } - void compute_memory_datastructure(unsigned start_step, unsigned end_step) { - for (unsigned step = start_step; step <= end_step; step++) { - for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; - step_processor_pred[step][proc].clear(); + void ComputeMemoryDatastructure(unsigned startStep, unsigned endStep) { + for (unsigned step = startStep; step <= endStep; step++) { + for (unsigned proc = 0; proc < setSchedule_->GetInstance().NumberOfProcessors(); proc++) { + stepProcessorMemory_[step][proc] = 0; + stepProcessorPred_[step][proc].clear(); - for (const auto &node : set_schedule->step_processor_vertices[step][proc]) { - if (is_source(node, *graph)) { - step_processor_memory[step][proc] += graph->vertex_mem_weight(node); + for (const auto &node : setSchedule_->stepProcessorVertices_[step][proc]) { + if (IsSource(node, *graph_)) { + stepProcessorMemory_[step][proc] += graph_->VertexMemWeight(node); } - for (const auto &pred : graph->parents(node)) { - if (vector_schedule->assignedSuperstep(pred) < step) { - auto pair = step_processor_pred[step][proc].insert(pred); + for (const auto &pred : graph_->Parents(node)) { + if (vectorSchedule_->AssignedSuperstep(pred) < step) { + auto pair = stepProcessorPred_[step][proc].insert(pred); if (pair.second) { - step_processor_memory[step][proc] += graph->vertex_comm_weight(pred); + stepProcessorMemory_[step][proc] += graph_->VertexCommWeight(pred); } } } @@ -446,69 +439,68 @@ struct ls_local_sources_inc_edges_memory_constraint { } } - inline void clear() { - step_processor_memory.clear(); - step_processor_pred.clear(); + inline void Clear() { + stepProcessorMemory_.clear(); + stepProcessorPred_.clear(); } - inline void reset_superstep(unsigned step) { - for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; - step_processor_pred[step][proc].clear(); + inline void ResetSuperstep(unsigned step) { + for (unsigned proc = 0; proc < setSchedule_->GetInstance().GetArchitecture().NumberOfProcessors(); proc++) { + stepProcessorMemory_[step][proc] = 0; + stepProcessorPred_[step][proc].clear(); } } - void override_superstep(unsigned step, unsigned proc, unsigned with_step, unsigned with_proc) { - step_processor_memory[step][proc] = step_processor_memory[with_step][with_proc]; - step_processor_pred[step][proc] = step_processor_pred[with_step][with_proc]; + void OverrideSuperstep(unsigned step, unsigned proc, unsigned withStep, unsigned withProc) { + stepProcessorMemory_[step][proc] = stepProcessorMemory_[withStep][withProc]; + stepProcessorPred_[step][proc] = stepProcessorPred_[withStep][withProc]; } - inline bool can_move(vertex_idx_t vertex, const unsigned proc, unsigned step) const { - v_memw_t inc_memory = 0; + inline bool CanMove(VertexIdxT vertex, const unsigned proc, unsigned step) const { + VMemwT incMemory = 0; - if (is_source(vertex, *graph)) { - inc_memory += graph->vertex_mem_weight(vertex); + if (IsSource(vertex, *graph_)) { + incMemory += graph_->VertexMemWeight(vertex); } - for (const auto &pred : graph->parents(vertex)) { - if (vector_schedule->assignedSuperstep(pred) < step) { - if (step_processor_pred[step][proc].find(pred) == step_processor_pred[step][proc].end()) { - inc_memory += graph->vertex_comm_weight(pred); + for (const auto &pred : graph_->Parents(vertex)) { + if (vectorSchedule_->AssignedSuperstep(pred) < step) { + if (stepProcessorPred_[step][proc].find(pred) == stepProcessorPred_[step][proc].end()) { + incMemory += graph_->VertexCommWeight(pred); } } } - if (vector_schedule->assignedSuperstep(vertex) < step) { - if (step_processor_pred[step][proc].find(vertex) != step_processor_pred[step][proc].end()) { - inc_memory -= graph->vertex_comm_weight(vertex); + if (vectorSchedule_->AssignedSuperstep(vertex) < step) { + if (stepProcessorPred_[step][proc].find(vertex) != stepProcessorPred_[step][proc].end()) { + incMemory -= graph_->VertexCommWeight(vertex); } } - if (vector_schedule->assignedSuperstep(vertex) <= step) { - return step_processor_memory[step][proc] + inc_memory - <= set_schedule->getInstance().getArchitecture().memoryBound(proc); + if (vectorSchedule_->AssignedSuperstep(vertex) <= step) { + return stepProcessorMemory_[step][proc] + incMemory <= setSchedule_->GetInstance().GetArchitecture().MemoryBound(proc); } - if (step_processor_memory[step][proc] + inc_memory > set_schedule->getInstance().getArchitecture().memoryBound(proc)) { + if (stepProcessorMemory_[step][proc] + incMemory > setSchedule_->GetInstance().GetArchitecture().MemoryBound(proc)) { return false; } - for (const auto &succ : graph->children(vertex)) { - const auto &succ_step = vector_schedule->assignedSuperstep(succ); - const auto &succ_proc = vector_schedule->assignedProcessor(succ); + for (const auto &succ : graph_->Children(vertex)) { + const auto &succStep = vectorSchedule_->AssignedSuperstep(succ); + const auto &succProc = vectorSchedule_->AssignedProcessor(succ); - if (succ_step == vector_schedule->assignedSuperstep(vertex)) { - if (vector_schedule->assignedProcessor(vertex) != succ_proc || (not is_source(vertex, *graph))) { - if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) - > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { + if (succStep == vectorSchedule_->AssignedSuperstep(vertex)) { + if (vectorSchedule_->AssignedProcessor(vertex) != succProc || (not IsSource(vertex, *graph_))) { + if (stepProcessorMemory_[succStep][succProc] + graph_->VertexCommWeight(vertex) + > setSchedule_->GetInstance().GetArchitecture().MemoryBound(succProc)) { return false; } } else { - if (is_source(vertex, *graph)) { - if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) - - graph->vertex_mem_weight(vertex) - > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { + if (IsSource(vertex, *graph_)) { + if (stepProcessorMemory_[succStep][succProc] + graph_->VertexCommWeight(vertex) + - graph_->VertexMemWeight(vertex) + > setSchedule_->GetInstance().GetArchitecture().MemoryBound(succProc)) { return false; } } diff --git a/include/osp/bsp/scheduler/MaxBspScheduler.hpp b/include/osp/bsp/scheduler/MaxBspScheduler.hpp index 5a78e382..c41ad364 100644 --- a/include/osp/bsp/scheduler/MaxBspScheduler.hpp +++ b/include/osp/bsp/scheduler/MaxBspScheduler.hpp @@ -31,38 +31,38 @@ namespace osp { * The Scheduler class provides a common interface for scheduling scheduler in the BSP scheduling system. * It defines methods for setting and getting the time limit, as well as computing schedules. */ -template -class MaxBspScheduler : public Scheduler { +template +class MaxBspScheduler : public Scheduler { public: - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); + static_assert(isComputationalDagV, "BspSchedule can only be used with computational DAGs."); /** * @brief Get the name of the scheduling algorithm. * @return The name of the scheduling algorithm. */ - virtual std::string getScheduleName() const override = 0; + virtual std::string GetScheduleName() const override = 0; /** * @brief Compute a BSP schedule for the given BSP instance. * @param instance The BSP instance for which to compute the schedule. * @return A pair containing the return status and the computed schedule. */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - MaxBspSchedule tmpSched(schedule.getInstance()); - RETURN_STATUS status = computeSchedule(tmpSched); + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + MaxBspSchedule tmpSched(schedule.GetInstance()); + ReturnStatus status = ComputeSchedule(tmpSched); schedule = tmpSched; return status; } - virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) override { - MaxBspScheduleCS tmpSchedule(schedule.getInstance()); - auto result = computeScheduleCS(tmpSchedule); - if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) { + virtual ReturnStatus ComputeScheduleCS(BspScheduleCS &schedule) override { + MaxBspScheduleCS tmpSchedule(schedule.GetInstance()); + auto result = ComputeScheduleCS(tmpSchedule); + if (result == ReturnStatus::OSP_SUCCESS || result == ReturnStatus::BEST_FOUND) { schedule = tmpSchedule; - schedule.setAutoCommunicationSchedule(); + schedule.SetAutoCommunicationSchedule(); return result; } else { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } } @@ -71,15 +71,15 @@ class MaxBspScheduler : public Scheduler { * @param instance The BSP instance for which to compute the schedule. * @return A pair containing the return status and the computed schedule. */ - virtual RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) = 0; + virtual ReturnStatus ComputeSchedule(MaxBspSchedule &schedule) = 0; - virtual RETURN_STATUS computeScheduleCS(MaxBspScheduleCS &schedule) { - auto result = computeSchedule(schedule); - if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) { - // schedule.setAutoCommunicationSchedule(); + virtual ReturnStatus ComputeScheduleCS(MaxBspScheduleCS &schedule) { + auto result = ComputeSchedule(schedule); + if (result == ReturnStatus::OSP_SUCCESS || result == ReturnStatus::BEST_FOUND) { + // schedule.SetAutoCommunicationSchedule(); return result; } else { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } }; }; diff --git a/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp b/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp index abd2a1a4..defd6737 100644 --- a/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp +++ b/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp @@ -32,212 +32,210 @@ limitations under the License. namespace osp { -template -class MultilevelCoarseAndSchedule : public Scheduler { +template +class MultilevelCoarseAndSchedule : public Scheduler { private: - const BspInstance *original_inst; + const BspInstance *originalInst_; protected: - inline const BspInstance *getOriginalInstance() const { return original_inst; }; + inline const BspInstance *GetOriginalInstance() const { return originalInst_; }; - Scheduler *sched; - ImprovementScheduler *improver; + Scheduler *sched_; + ImprovementScheduler *improver_; - MultilevelCoarser *ml_coarser; - long int active_graph; - std::unique_ptr> active_instance; - std::unique_ptr> active_schedule; + MultilevelCoarser *mlCoarser_; + long int activeGraph_; + std::unique_ptr> activeInstance_; + std::unique_ptr> activeSchedule_; - RETURN_STATUS compute_initial_schedule(); - RETURN_STATUS expand_active_schedule(); - RETURN_STATUS expand_active_schedule_to_original_schedule(BspSchedule &schedule); - RETURN_STATUS improve_active_schedule(); - RETURN_STATUS run_expansions(BspSchedule &schedule); + ReturnStatus ComputeInitialSchedule(); + ReturnStatus ExpandActiveSchedule(); + ReturnStatus ExpandActiveScheduleToOriginalSchedule(BspSchedule &schedule); + ReturnStatus ImproveActiveSchedule(); + ReturnStatus RunExpansions(BspSchedule &schedule); - void clear_computation_data(); + void ClearComputationData(); public: MultilevelCoarseAndSchedule() - : Scheduler(), original_inst(nullptr), sched(nullptr), improver(nullptr), ml_coarser(nullptr), active_graph(-1L) { + : Scheduler(), originalInst_(nullptr), sched_(nullptr), improver_(nullptr), mlCoarser_(nullptr), activeGraph_(-1L) { }; - MultilevelCoarseAndSchedule(Scheduler &sched_, MultilevelCoarser &ml_coarser_) - : Scheduler(), - original_inst(nullptr), - sched(&sched_), - improver(nullptr), - ml_coarser(&ml_coarser_), - active_graph(-1L) {}; - MultilevelCoarseAndSchedule(Scheduler &sched_, - ImprovementScheduler &improver_, - MultilevelCoarser &ml_coarser_) - : Scheduler(), - original_inst(nullptr), - sched(&sched_), - improver(&improver_), - ml_coarser(&ml_coarser_), - active_graph(-1L) {}; + MultilevelCoarseAndSchedule(Scheduler &sched, MultilevelCoarser &mlCoarser) + : Scheduler(), + originalInst_(nullptr), + sched_(&sched), + improver_(nullptr), + mlCoarser_(&mlCoarser), + activeGraph_(-1L) {}; + MultilevelCoarseAndSchedule(Scheduler &sched, + ImprovementScheduler &improver, + MultilevelCoarser &mlCoarser) + : Scheduler(), + originalInst_(nullptr), + sched_(&sched), + improver_(&improver), + mlCoarser_(&mlCoarser), + activeGraph_(-1L) {}; virtual ~MultilevelCoarseAndSchedule() = default; - inline void setInitialScheduler(Scheduler &sched_) { sched = &sched_; }; + inline void SetInitialScheduler(Scheduler &sched) { sched_ = &sched; }; - inline void setImprovementScheduler(ImprovementScheduler &improver_) { improver = &improver_; }; + inline void SetImprovementScheduler(ImprovementScheduler &improver) { improver_ = &improver; }; - inline void setMultilevelCoarser(MultilevelCoarser &ml_coarser_) { ml_coarser = &ml_coarser_; }; + inline void SetMultilevelCoarser(MultilevelCoarser &mlCoarser) { mlCoarser_ = &mlCoarser; }; - RETURN_STATUS computeSchedule(BspSchedule &schedule) override; + ReturnStatus ComputeSchedule(BspSchedule &schedule) override; - std::string getScheduleName() const override { - if (improver == nullptr) { - return "C:" + ml_coarser->getCoarserName() + "-S:" + sched->getScheduleName(); + std::string GetScheduleName() const override { + if (improver_ == nullptr) { + return "C:" + mlCoarser_->GetCoarserName() + "-S:" + sched_->GetScheduleName(); } else { - return "C:" + ml_coarser->getCoarserName() + "-S:" + sched->getScheduleName() + "-I:" + improver->getScheduleName(); + return "C:" + mlCoarser_->GetCoarserName() + "-S:" + sched_->GetScheduleName() + "-I:" + improver_->GetScheduleName(); } }; }; -template -RETURN_STATUS MultilevelCoarseAndSchedule::compute_initial_schedule() { - active_graph = static_cast(ml_coarser->dag_history.size()); - active_graph--; +template +ReturnStatus MultilevelCoarseAndSchedule::ComputeInitialSchedule() { + activeGraph_ = static_cast(mlCoarser_->dagHistory_.size()); + activeGraph_--; - assert((active_graph >= 0L) && "Must have done at least one coarsening!"); + assert((activeGraph_ >= 0L) && "Must have done at least one coarsening!"); - RETURN_STATUS status; + ReturnStatus status; - active_instance = std::make_unique>( - *(ml_coarser->dag_history.at(static_cast(active_graph))), original_inst->getArchitecture()); - active_schedule = std::make_unique>(*active_instance); - status = sched->computeSchedule(*active_schedule); - assert(active_schedule->satisfiesPrecedenceConstraints()); + activeInstance_ = std::make_unique>( + *(mlCoarser_->dagHistory_.at(static_cast(activeGraph_))), originalInst_->GetArchitecture()); + activeSchedule_ = std::make_unique>(*activeInstance_); + status = sched_->ComputeSchedule(*activeSchedule_); + assert(activeSchedule_->SatisfiesPrecedenceConstraints()); - RETURN_STATUS ret = improve_active_schedule(); + ReturnStatus ret = ImproveActiveSchedule(); status = std::max(ret, status); return status; } -template -RETURN_STATUS MultilevelCoarseAndSchedule::improve_active_schedule() { - if (improver) { - if (active_instance->getComputationalDag().num_vertices() == 0) { - return RETURN_STATUS::OSP_SUCCESS; +template +ReturnStatus MultilevelCoarseAndSchedule::ImproveActiveSchedule() { + if (improver_) { + if (activeInstance_->GetComputationalDag().NumVertices() == 0) { + return ReturnStatus::OSP_SUCCESS; } - return improver->improveSchedule(*active_schedule); + return improver_->ImproveSchedule(*activeSchedule_); } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } -template -RETURN_STATUS MultilevelCoarseAndSchedule::expand_active_schedule() { - assert((active_graph > 0L) && (static_cast(active_graph) < ml_coarser->dag_history.size())); +template +ReturnStatus MultilevelCoarseAndSchedule::ExpandActiveSchedule() { + assert((activeGraph_ > 0L) && (static_cast(activeGraph_) < mlCoarser_->dagHistory_.size())); - std::unique_ptr> expanded_instance = std::make_unique>( - *(ml_coarser->dag_history.at(static_cast(active_graph) - 1)), original_inst->getArchitecture()); - std::unique_ptr> expanded_schedule - = std::make_unique>(*expanded_instance); + std::unique_ptr> expandedInstance = std::make_unique>( + *(mlCoarser_->dagHistory_.at(static_cast(activeGraph_) - 1)), originalInst_->GetArchitecture()); + std::unique_ptr> expandedSchedule = std::make_unique>(*expandedInstance); - for (const auto &node : expanded_instance->getComputationalDag().vertices()) { - expanded_schedule->setAssignedProcessor( + for (const auto &node : expandedInstance->GetComputationalDag().Vertices()) { + expandedSchedule->SetAssignedProcessor( node, - active_schedule->assignedProcessor(ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); - expanded_schedule->setAssignedSuperstep( + activeSchedule_->AssignedProcessor(mlCoarser_->contractionMaps_.at(static_cast(activeGraph_))->at(node))); + expandedSchedule->SetAssignedSuperstep( node, - active_schedule->assignedSuperstep(ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); + activeSchedule_->AssignedSuperstep(mlCoarser_->contractionMaps_.at(static_cast(activeGraph_))->at(node))); } - assert(expanded_schedule->satisfiesPrecedenceConstraints()); + assert(expandedSchedule->SatisfiesPrecedenceConstraints()); - // std::cout << "exp_inst: " << expanded_instance.get() << " n: " << expanded_instance->numberOfVertices() << " m: - // " << expanded_instance->getComputationalDag().num_edges() << std::endl; std::cout << "exp_sched: " << - // &expanded_schedule->getInstance() << " n: " << expanded_schedule->getInstance().numberOfVertices() << " m: " << - // expanded_schedule->getInstance().getComputationalDag().num_edges() << std::endl; + // std::cout << "exp_inst: " << expanded_instance.get() << " n: " << expanded_instance->NumberOfVertices() << " m: + // " << expanded_instance->GetComputationalDag().NumEdges() << std::endl; std::cout << "exp_sched: " << + // &expanded_schedule->GetInstance() << " n: " << expanded_schedule->GetInstance().NumberOfVertices() << " m: " << + // expanded_schedule->GetInstance().GetComputationalDag().NumEdges() << std::endl; - active_graph--; - std::swap(expanded_instance, active_instance); - std::swap(expanded_schedule, active_schedule); + activeGraph_--; + std::swap(expandedInstance, activeInstance_); + std::swap(expandedSchedule, activeSchedule_); - // std::cout << "act_inst: " << active_instance.get() << " n: " << active_instance->numberOfVertices() << " m: " << - // active_instance->getComputationalDag().num_edges() << std::endl; std::cout << "act_sched: " << - // &active_schedule->getInstance() << " n: " << active_schedule->getInstance().numberOfVertices() << " m: " << - // active_schedule->getInstance().getComputationalDag().num_edges() << std::endl; + // std::cout << "act_inst: " << active_instance.get() << " n: " << active_instance->NumberOfVertices() << " m: " << + // active_instance->GetComputationalDag().NumEdges() << std::endl; std::cout << "act_sched: " << + // &active_schedule->GetInstance() << " n: " << active_schedule->GetInstance().NumberOfVertices() << " m: " << + // active_schedule->GetInstance().GetComputationalDag().NumEdges() << std::endl; - assert(active_schedule->satisfiesPrecedenceConstraints()); - return RETURN_STATUS::OSP_SUCCESS; + assert(activeSchedule_->SatisfiesPrecedenceConstraints()); + return ReturnStatus::OSP_SUCCESS; } -template -RETURN_STATUS MultilevelCoarseAndSchedule::expand_active_schedule_to_original_schedule( - BspSchedule &schedule) { - assert(active_graph == 0L); +template +ReturnStatus MultilevelCoarseAndSchedule::ExpandActiveScheduleToOriginalSchedule(BspSchedule &schedule) { + assert(activeGraph_ == 0L); - for (const auto &node : getOriginalInstance()->getComputationalDag().vertices()) { - schedule.setAssignedProcessor( + for (const auto &node : GetOriginalInstance()->GetComputationalDag().Vertices()) { + schedule.SetAssignedProcessor( node, - active_schedule->assignedProcessor(ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); - schedule.setAssignedSuperstep( + activeSchedule_->AssignedProcessor(mlCoarser_->contractionMaps_.at(static_cast(activeGraph_))->at(node))); + schedule.SetAssignedSuperstep( node, - active_schedule->assignedSuperstep(ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); + activeSchedule_->AssignedSuperstep(mlCoarser_->contractionMaps_.at(static_cast(activeGraph_))->at(node))); } - active_graph--; - active_instance = std::unique_ptr>(); - active_schedule = std::unique_ptr>(); + activeGraph_--; + activeInstance_ = std::unique_ptr>(); + activeSchedule_ = std::unique_ptr>(); - assert(schedule.satisfiesPrecedenceConstraints()); + assert(schedule.SatisfiesPrecedenceConstraints()); - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } -template -RETURN_STATUS MultilevelCoarseAndSchedule::run_expansions(BspSchedule &schedule) { - assert(active_graph >= 0L && static_cast(active_graph) == ml_coarser->dag_history.size() - 1); +template +ReturnStatus MultilevelCoarseAndSchedule::RunExpansions(BspSchedule &schedule) { + assert(activeGraph_ >= 0L && static_cast(activeGraph_) == mlCoarser_->dagHistory_.size() - 1); - RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; + ReturnStatus status = ReturnStatus::OSP_SUCCESS; - while (active_graph > 0L) { - status = std::max(status, expand_active_schedule()); - status = std::max(status, improve_active_schedule()); + while (activeGraph_ > 0L) { + status = std::max(status, ExpandActiveSchedule()); + status = std::max(status, ImproveActiveSchedule()); } - status = std::max(status, expand_active_schedule_to_original_schedule(schedule)); + status = std::max(status, ExpandActiveScheduleToOriginalSchedule(schedule)); return status; } -template -void MultilevelCoarseAndSchedule::clear_computation_data() { - active_graph = -1L; - active_instance = std::unique_ptr>(); - active_schedule = std::unique_ptr>(); +template +void MultilevelCoarseAndSchedule::ClearComputationData() { + activeGraph_ = -1L; + activeInstance_ = std::unique_ptr>(); + activeSchedule_ = std::unique_ptr>(); } -template -RETURN_STATUS MultilevelCoarseAndSchedule::computeSchedule(BspSchedule &schedule) { - clear_computation_data(); +template +ReturnStatus MultilevelCoarseAndSchedule::ComputeSchedule(BspSchedule &schedule) { + ClearComputationData(); - original_inst = &schedule.getInstance(); + originalInst_ = &schedule.GetInstance(); - RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; + ReturnStatus status = ReturnStatus::OSP_SUCCESS; - status = std::max(status, ml_coarser->run(*original_inst)); + status = std::max(status, mlCoarser_->Run(*originalInst_)); - if constexpr (std::is_same_v) { - if (ml_coarser->dag_history.size() == 0) { - status = std::max(status, sched->computeSchedule(schedule)); + if constexpr (std::is_same_v) { + if (mlCoarser_->dagHistory_.size() == 0) { + status = std::max(status, sched_->ComputeSchedule(schedule)); } else { - status = std::max(status, compute_initial_schedule()); - status = std::max(status, run_expansions(schedule)); + status = std::max(status, ComputeInitialSchedule()); + status = std::max(status, RunExpansions(schedule)); } } else { - assert(ml_coarser->dag_history.size() > 0); + assert(mlCoarser_->dagHistory_.size() > 0); - status = std::max(status, compute_initial_schedule()); - status = std::max(status, run_expansions(schedule)); + status = std::max(status, ComputeInitialSchedule()); + status = std::max(status, RunExpansions(schedule)); } - assert(active_graph == -1L); + assert(activeGraph_ == -1L); - clear_computation_data(); + ClearComputationData(); return status; } diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp index 53ee1529..c9c12e20 100644 --- a/include/osp/bsp/scheduler/Scheduler.hpp +++ b/include/osp/bsp/scheduler/Scheduler.hpp @@ -36,9 +36,9 @@ namespace osp { * It specifies the contract for computing standard BSP schedules (BspSchedule) and communication-aware schedules * (BspScheduleCS). */ -template +template class Scheduler { - static_assert(is_computational_dag_v, "Scheduler can only be used with computational DAGs."); + static_assert(isComputationalDagV, "Scheduler can only be used with computational DAGs."); public: /** @@ -55,7 +55,7 @@ class Scheduler { * @brief Get the name of the scheduling algorithm. * @return The name of the scheduling algorithm. */ - virtual std::string getScheduleName() const = 0; + virtual std::string GetScheduleName() const = 0; /** * @brief Computes a BSP schedule for the given BSP instance. @@ -64,28 +64,28 @@ class Scheduler { * the specific scheduling logic. It modifies the passed BspSchedule object. * * @param schedule The BspSchedule object to be computed. It contains the BspInstance. - * @return RETURN_STATUS::OSP_SUCCESS if a schedule was successfully computed, - * RETURN_STATUS::ERROR if an error occurred, or other status codes as appropriate. + * @return ReturnStatus::OSP_SUCCESS if a schedule was successfully computed, + * ReturnStatus::ERROR if an error occurred, or other status codes as appropriate. */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) = 0; + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) = 0; /** * @brief Computes a BSP schedule with communication schedule (CS). * - * This method provides a default implementation that first computes the basic BSP schedule using computeSchedule(). - * If successful, it then calls setAutoCommunicationSchedule() on the schedule to set a communication schedule. + * This method provides a default implementation that first computes the basic BSP schedule using ComputeSchedule(). + * If successful, it then calls SetAutoCommunicationSchedule() on the schedule to set a communication schedule. * * @param schedule The BspScheduleCS object to be computed. It contains the BspInstance. - * @return RETURN_STATUS::OSP_SUCCESS or RETURN_STATUS::BEST_FOUND if a schedule was successfully computed, - * RETURN_STATUS::ERROR if an error occurred, or other status codes as appropriate. + * @return ReturnStatus::OSP_SUCCESS or ReturnStatus::BEST_FOUND if a schedule was successfully computed, + * ReturnStatus::ERROR if an error occurred, or other status codes as appropriate. */ - virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) { - auto result = computeSchedule(schedule); - if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) { - schedule.setAutoCommunicationSchedule(); + virtual ReturnStatus ComputeScheduleCS(BspScheduleCS &schedule) { + auto result = ComputeSchedule(schedule); + if (result == ReturnStatus::OSP_SUCCESS || result == ReturnStatus::BEST_FOUND) { + schedule.SetAutoCommunicationSchedule(); return result; } else { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } } }; diff --git a/include/osp/bsp/scheduler/Serial.hpp b/include/osp/bsp/scheduler/Serial.hpp index cde1b706..1482e804 100644 --- a/include/osp/bsp/scheduler/Serial.hpp +++ b/include/osp/bsp/scheduler/Serial.hpp @@ -34,126 +34,126 @@ namespace osp { * smallest number of supersteps. * */ -template -class Serial : public Scheduler { +template +class Serial : public Scheduler { public: /** * @brief Default constructor for Serial. */ - Serial() : Scheduler() {} + Serial() : Scheduler() {} /** * @brief Default destructor for Serial. */ ~Serial() override = default; - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); - const auto &dag = instance.getComputationalDag(); - const auto num_vertices = dag.num_vertices(); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); + const auto &dag = instance.GetComputationalDag(); + const auto numVertices = dag.NumVertices(); - if (num_vertices == 0) { - return RETURN_STATUS::OSP_SUCCESS; + if (numVertices == 0) { + return ReturnStatus::OSP_SUCCESS; } - const auto &arch = instance.getArchitecture(); + const auto &arch = instance.GetArchitecture(); // Select one processor of each type - std::vector chosen_procs; - if (arch.getNumberOfProcessorTypes() > 0) { - std::vector type_seen(arch.getNumberOfProcessorTypes(), false); - for (unsigned p = 0; p < arch.numberOfProcessors(); ++p) { - if (!type_seen[arch.processorType(p)]) { - chosen_procs.push_back(p); - type_seen[arch.processorType(p)] = true; + std::vector chosenProcs; + if (arch.GetNumberOfProcessorTypes() > 0) { + std::vector typeSeen(arch.GetNumberOfProcessorTypes(), false); + for (unsigned p = 0; p < arch.NumberOfProcessors(); ++p) { + if (!typeSeen[arch.ProcessorType(p)]) { + chosenProcs.push_back(p); + typeSeen[arch.ProcessorType(p)] = true; } } } - if (chosen_procs.empty()) { - return RETURN_STATUS::ERROR; + if (chosenProcs.empty()) { + return ReturnStatus::ERROR; } - const unsigned num_node_types = dag.num_vertex_types(); - std::vector> node_type_compatible_processors(num_node_types); + const unsigned numNodeTypes = dag.NumVertexTypes(); + std::vector> nodeTypeCompatibleProcessors(numNodeTypes); - for (v_type_t type = 0; type < num_node_types; ++type) { - for (const auto &p : chosen_procs) { - if (instance.isCompatibleType(type, instance.processorType(p))) { - node_type_compatible_processors[type].push_back(p); + for (VTypeT type = 0; type < numNodeTypes; ++type) { + for (const auto &p : chosenProcs) { + if (instance.IsCompatibleType(type, instance.ProcessorType(p))) { + nodeTypeCompatibleProcessors[type].push_back(p); } } } - std::vector> in_degree(num_vertices); - std::deque> ready_nodes; - std::deque> deferred_nodes; + std::vector> inDegree(numVertices); + std::deque> readyNodes; + std::deque> deferredNodes; - for (const auto &v : dag.vertices()) { - schedule.setAssignedProcessor(v, std::numeric_limits::max()); - schedule.setAssignedSuperstep(v, std::numeric_limits::max()); - in_degree[v] = dag.in_degree(v); - if (in_degree[v] == 0) { - ready_nodes.push_back(v); + for (const auto &v : dag.Vertices()) { + schedule.SetAssignedProcessor(v, std::numeric_limits::max()); + schedule.SetAssignedSuperstep(v, std::numeric_limits::max()); + inDegree[v] = dag.InDegree(v); + if (inDegree[v] == 0) { + readyNodes.push_back(v); } } - vertex_idx_t scheduled_nodes_count = 0; - unsigned current_superstep = 0; + VertexIdxT scheduledNodesCount = 0; + unsigned currentSuperstep = 0; - while (scheduled_nodes_count < num_vertices) { - while (not ready_nodes.empty()) { - vertex_idx_t v = ready_nodes.front(); - ready_nodes.pop_front(); + while (scheduledNodesCount < numVertices) { + while (not readyNodes.empty()) { + VertexIdxT v = readyNodes.front(); + readyNodes.pop_front(); bool scheduled = false; - unsigned v_type = 0; - if constexpr (has_typed_vertices_v) { - v_type = dag.vertex_type(v); + unsigned vType = 0; + if constexpr (hasTypedVerticesV) { + vType = dag.VertexType(v); } - for (const auto &p : node_type_compatible_processors[v_type]) { - bool parents_compatible = true; - for (const auto &parent : dag.parents(v)) { - if (schedule.assignedSuperstep(parent) == current_superstep && schedule.assignedProcessor(parent) != p) { - parents_compatible = false; + for (const auto &p : nodeTypeCompatibleProcessors[vType]) { + bool parentsCompatible = true; + for (const auto &parent : dag.Parents(v)) { + if (schedule.AssignedSuperstep(parent) == currentSuperstep && schedule.AssignedProcessor(parent) != p) { + parentsCompatible = false; break; } } - if (parents_compatible) { - schedule.setAssignedProcessor(v, p); - schedule.setAssignedSuperstep(v, current_superstep); + if (parentsCompatible) { + schedule.SetAssignedProcessor(v, p); + schedule.SetAssignedSuperstep(v, currentSuperstep); scheduled = true; - ++scheduled_nodes_count; + ++scheduledNodesCount; break; } } if (not scheduled) { - deferred_nodes.push_back(v); + deferredNodes.push_back(v); } else { - for (const auto &child : dag.children(v)) { - if (--in_degree[child] == 0) { - ready_nodes.push_back(child); + for (const auto &child : dag.Children(v)) { + if (--inDegree[child] == 0) { + readyNodes.push_back(child); } } } } - if (scheduled_nodes_count < num_vertices) { - current_superstep++; - ready_nodes.insert(ready_nodes.end(), deferred_nodes.begin(), deferred_nodes.end()); - deferred_nodes.clear(); + if (scheduledNodesCount < numVertices) { + currentSuperstep++; + readyNodes.insert(readyNodes.end(), deferredNodes.begin(), deferredNodes.end()); + deferredNodes.clear(); } } - schedule.setNumberOfSupersteps(current_superstep + 1); - return RETURN_STATUS::OSP_SUCCESS; + schedule.SetNumberOfSupersteps(currentSuperstep + 1); + return ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return "Serial"; } + std::string GetScheduleName() const override { return "Serial"; } }; } // namespace osp diff --git a/include/osp/coarser/BspScheduleCoarser.hpp b/include/osp/coarser/BspScheduleCoarser.hpp index a65e92e5..eea9d7d7 100644 --- a/include/osp/coarser/BspScheduleCoarser.hpp +++ b/include/osp/coarser/BspScheduleCoarser.hpp @@ -31,13 +31,13 @@ namespace osp { * @brief Abstract base class for coarsening ComputationalDags. * */ -template -class BspScheduleCoarser : public CoarserGenContractionMap { +template +class BspScheduleCoarser : public CoarserGenContractionMap { private: - const BspSchedule *schedule; + const BspSchedule *schedule_; public: - BspScheduleCoarser(const BspSchedule &_schedule) : schedule(&_schedule) {} + BspScheduleCoarser(const BspSchedule &schedule) : schedule_(&schedule) {} /** * @brief Destructor for the Coarser class. @@ -48,58 +48,58 @@ class BspScheduleCoarser : public CoarserGenContractionMap>> &vertex_map, - // std::vector> &reverse_vertex_map) override { + // virtual bool coarseDag(const GraphTIn &dag_in, GraphTOut &dag_out, + // std::vector>> &vertex_map, + // std::vector> &reverse_vertex_map) override { - virtual std::vector> generate_vertex_contraction_map(const Graph_t_in &dag_in) override { - using VertexType_in = vertex_idx_t; - using VertexType_out = vertex_idx_t; + virtual std::vector> GenerateVertexContractionMap(const GraphTIn &dagIn) override { + using VertexTypeIn = VertexIdxT; + using VertexTypeOut = VertexIdxT; - assert(&dag_in == &schedule->getInstance().getComputationalDag()); - assert(schedule->satisfiesPrecedenceConstraints()); + assert(&dagIn == &schedule_->GetInstance().GetComputationalDag()); + assert(schedule_->SatisfiesPrecedenceConstraints()); - SetSchedule set_schedule(*schedule); - std::vector reverse_vertex_map(dag_in.num_vertices(), 0); - std::vector> vertex_map; + SetSchedule setSchedule(*schedule_); + std::vector reverseVertexMap(dagIn.NumVertices(), 0); + std::vector> vertexMap; - bool schedule_respects_types = true; + bool scheduleRespectsTypes = true; - for (unsigned step = 0; step < schedule->numberOfSupersteps(); step++) { - for (unsigned proc = 0; proc < schedule->getInstance().numberOfProcessors(); proc++) { - if (set_schedule.step_processor_vertices[step][proc].size() > 0) { - v_workw_t total_work = 0; - v_memw_t total_memory = 0; - v_commw_t total_communication = 0; + for (unsigned step = 0; step < schedule_->NumberOfSupersteps(); step++) { + for (unsigned proc = 0; proc < schedule_->GetInstance().NumberOfProcessors(); proc++) { + if (setSchedule.stepProcessorVertices_[step][proc].size() > 0) { + VWorkwT totalWork = 0; + VMemwT totalMemory = 0; + VCommwT totalCommunication = 0; - vertex_map.push_back(std::vector()); + vertexMap.push_back(std::vector()); - v_type_t type = dag_in.vertex_type(*(set_schedule.step_processor_vertices[step][proc].begin())); - bool homogeneous_types = true; + VTypeT type = dagIn.VertexType(*(setSchedule.stepProcessorVertices_[step][proc].begin())); + bool homogeneousTypes = true; - for (const auto &vertex : set_schedule.step_processor_vertices[step][proc]) { - if (dag_in.vertex_type(vertex) != type) { - homogeneous_types = false; + for (const auto &vertex : setSchedule.stepProcessorVertices_[step][proc]) { + if (dagIn.VertexType(vertex) != type) { + homogeneousTypes = false; } - vertex_map.back().push_back(vertex); - reverse_vertex_map[vertex] = vertex_map.size() - 1; + vertexMap.back().push_back(vertex); + reverseVertexMap[vertex] = vertexMap.size() - 1; - total_work += dag_in.vertex_work_weight(vertex); - total_communication += dag_in.vertex_comm_weight(vertex); - total_memory += dag_in.vertex_mem_weight(vertex); + totalWork += dagIn.VertexWorkWeight(vertex); + totalCommunication += dagIn.VertexCommWeight(vertex); + totalMemory += dagIn.VertexMemWeight(vertex); } - if (schedule_respects_types) { - schedule_respects_types = homogeneous_types; + if (scheduleRespectsTypes) { + scheduleRespectsTypes = homogeneousTypes; } } } } - return reverse_vertex_map; + return reverseVertexMap; } }; diff --git a/include/osp/coarser/Coarser.hpp b/include/osp/coarser/Coarser.hpp index 8ea9c400..5009a411 100644 --- a/include/osp/coarser/Coarser.hpp +++ b/include/osp/coarser/Coarser.hpp @@ -35,20 +35,20 @@ namespace osp { * @brief Abstract base class for coarsening ComputationalDags. * */ -template +template class Coarser { - static_assert(is_computational_dag_v, "Graph_t_in must be a computational DAG"); - static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, - "Graph_t_out must be a (direct) constructable computational DAG"); + static_assert(isComputationalDagV, "GraphTIn must be a computational DAG"); + static_assert(isConstructableCdagV || isDirectConstructableCdagV, + "GraphTOut must be a (direct) constructable computational DAG"); // probably too strict, need to be refined. - // maybe add concept for when Gtaph_t2 is constructable/coarseable from Graph_t_in - static_assert(std::is_same_v, v_workw_t>, - "Graph_t_in and Graph_t_out must have the same work weight type"); - static_assert(std::is_same_v, v_memw_t>, - "Graph_t_in and Graph_t_out must have the same memory weight type"); - static_assert(std::is_same_v, v_commw_t>, - "Graph_t_in and Graph_t_out must have the same communication weight type"); + // maybe add concept for when Gtaph_t2 is constructable/coarseable from GraphTIn + static_assert(std::is_same_v, VWorkwT>, + "GraphTIn and GraphTOut must have the same work weight type"); + static_assert(std::is_same_v, VMemwT>, + "GraphTIn and GraphTOut must have the same memory weight type"); + static_assert(std::is_same_v, VCommwT>, + "GraphTIn and GraphTOut must have the same communication weight type"); public: /** @@ -59,16 +59,14 @@ class Coarser { * @param vertex_contraction_map Output mapping from dag_in to coarsened_dag. * @return A status code indicating the success or failure of the coarsening operation. */ - virtual bool coarsenDag(const Graph_t_in &dag_in, - Graph_t_out &coarsened_dag, - std::vector> &vertex_contraction_map) + virtual bool CoarsenDag(const GraphTIn &dagIn, GraphTOut &coarsenedDag, std::vector> &vertexContractionMap) = 0; /** * @brief Get the name of the coarsening algorithm. * @return A human-readable name of the coarsening algorithm, typically used for identification or logging purposes. */ - virtual std::string getCoarserName() const = 0; + virtual std::string GetCoarserName() const = 0; /** * @brief Destructor for the Coarser class. @@ -81,18 +79,18 @@ class Coarser { * @brief Abstract base class for coarsening ComputationalDags. * */ -template -class CoarserGenContractionMap : public Coarser { +template +class CoarserGenContractionMap : public Coarser { public: - virtual std::vector> generate_vertex_contraction_map(const Graph_t_in &dag_in) = 0; + virtual std::vector> GenerateVertexContractionMap(const GraphTIn &dagIn) = 0; - virtual bool coarsenDag(const Graph_t_in &dag_in, - Graph_t_out &coarsened_dag, - std::vector> &vertex_contraction_map) override { - vertex_contraction_map = dag_in.num_vertices() == 0 ? std::vector>() - : generate_vertex_contraction_map(dag_in); + virtual bool CoarsenDag(const GraphTIn &dagIn, + GraphTOut &coarsenedDag, + std::vector> &vertexContractionMap) override { + vertexContractionMap = dagIn.NumVertices() == 0 ? std::vector>() + : GenerateVertexContractionMap(dagIn); - return coarser_util::construct_coarse_dag(dag_in, coarsened_dag, vertex_contraction_map); + return coarser_util::ConstructCoarseDag(dagIn, coarsenedDag, vertexContractionMap); } /** @@ -106,34 +104,34 @@ class CoarserGenContractionMap : public Coarser { * @brief Abstract base class for coarsening ComputationalDags. * */ -template -class CoarserGenExpansionMap : public Coarser { +template +class CoarserGenExpansionMap : public Coarser { public: - virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in) = 0; + virtual std::vector>> GenerateVertexExpansionMap(const GraphTIn &dagIn) = 0; - virtual bool coarsenDag(const Graph_t_in &dag_in, - Graph_t_out &coarsened_dag, - std::vector> &vertex_contraction_map) override { - if (dag_in.num_vertices() == 0) { - vertex_contraction_map = std::vector>(); + virtual bool CoarsenDag(const GraphTIn &dagIn, + GraphTOut &coarsenedDag, + std::vector> &vertexContractionMap) override { + if (dagIn.NumVertices() == 0) { + vertexContractionMap = std::vector>(); return true; } - std::vector>> vertex_expansion_map = generate_vertex_expansion_map(dag_in); - assert(coarser_util::check_valid_expansion_map(vertex_expansion_map)); + std::vector>> vertexExpansionMap = GenerateVertexExpansionMap(dagIn); + assert(coarser_util::CheckValidExpansionMap(vertexExpansionMap)); - coarser_util::reorder_expansion_map(dag_in, vertex_expansion_map); + coarser_util::ReorderExpansionMap(dagIn, vertexExpansionMap); - vertex_contraction_map = coarser_util::invert_vertex_expansion_map(vertex_expansion_map); + vertexContractionMap = coarser_util::InvertVertexExpansionMap(vertexExpansionMap); - return coarser_util::construct_coarse_dag(dag_in, coarsened_dag, vertex_contraction_map); + return coarser_util::ConstructCoarseDag(dagIn, coarsenedDag, vertexContractionMap); } /** * @brief Get the name of the coarsening algorithm. * @return A human-readable name of the coarsening algorithm, typically used for identification or logging purposes. */ - virtual std::string getCoarserName() const override = 0; + virtual std::string GetCoarserName() const override = 0; /** * @brief Destructor for the CoarserGenExpansionMap class. diff --git a/include/osp/coarser/MultilevelCoarser.hpp b/include/osp/coarser/MultilevelCoarser.hpp index 4baecd3d..47bed189 100644 --- a/include/osp/coarser/MultilevelCoarser.hpp +++ b/include/osp/coarser/MultilevelCoarser.hpp @@ -30,251 +30,247 @@ limitations under the License. namespace osp { -template +template class MultilevelCoarseAndSchedule; -template -class MultilevelCoarser : public Coarser { - friend class MultilevelCoarseAndSchedule; +template +class MultilevelCoarser : public Coarser { + friend class MultilevelCoarseAndSchedule; private: - const Graph_t *original_graph; + const GraphT *originalGraph_; protected: - inline const Graph_t *getOriginalGraph() const { return original_graph; }; + inline const GraphT *GetOriginalGraph() const { return originalGraph_; }; - std::vector> dag_history; - std::vector>>> contraction_maps; + std::vector> dagHistory_; + std::vector>>> contractionMaps_; - RETURN_STATUS add_contraction(const std::vector> &contraction_map); - RETURN_STATUS add_contraction(std::vector> &&contraction_map); - RETURN_STATUS add_contraction(const std::vector> &contraction_map, - const Graph_t_coarse &contracted_graph); - RETURN_STATUS add_contraction(std::vector> &&contraction_map, Graph_t_coarse &&contracted_graph); - void add_identity_contraction(); + ReturnStatus AddContraction(const std::vector> &contractionMap); + ReturnStatus AddContraction(std::vector> &&contractionMap); + ReturnStatus AddContraction(const std::vector> &contractionMap, const GraphTCoarse &contractedGraph); + ReturnStatus AddContraction(std::vector> &&contractionMap, GraphTCoarse &&contractedGraph); + void AddIdentityContraction(); - std::vector> getCombinedContractionMap() const; + std::vector> GetCombinedContractionMap() const; - virtual RETURN_STATUS run_contractions() = 0; - void compactify_dag_history(); + virtual ReturnStatus RunContractions() = 0; + void CompactifyDagHistory(); - void clear_computation_data(); + void ClearComputationData(); public: - MultilevelCoarser() : original_graph(nullptr) {}; - MultilevelCoarser(const Graph_t &graph) : original_graph(&graph) {}; + MultilevelCoarser() : originalGraph_(nullptr) {}; + MultilevelCoarser(const GraphT &graph) : originalGraph_(&graph) {}; virtual ~MultilevelCoarser() = default; - bool coarsenDag(const Graph_t &dag_in, - Graph_t_coarse &coarsened_dag, - std::vector> &vertex_contraction_map) override; + bool CoarsenDag(const GraphT &dagIn, + GraphTCoarse &coarsenedDag, + std::vector> &vertexContractionMap) override; - RETURN_STATUS run(const Graph_t &graph); - RETURN_STATUS run(const BspInstance &inst); + ReturnStatus Run(const GraphT &graph); + ReturnStatus Run(const BspInstance &inst); - virtual std::string getCoarserName() const override = 0; + virtual std::string GetCoarserName() const override = 0; }; -template -RETURN_STATUS MultilevelCoarser::run(const Graph_t &graph) { - clear_computation_data(); - original_graph = &graph; +template +ReturnStatus MultilevelCoarser::Run(const GraphT &graph) { + ClearComputationData(); + originalGraph_ = &graph; - RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; - status = std::max(status, run_contractions()); + ReturnStatus status = ReturnStatus::OSP_SUCCESS; + status = std::max(status, RunContractions()); - if (dag_history.size() == 0) { - add_identity_contraction(); + if (dagHistory_.size() == 0) { + AddIdentityContraction(); } return status; } -template -RETURN_STATUS MultilevelCoarser::run(const BspInstance &inst) { - return run(inst.getComputationalDag()); +template +ReturnStatus MultilevelCoarser::Run(const BspInstance &inst) { + return Run(inst.GetComputationalDag()); } -template -void MultilevelCoarser::clear_computation_data() { - dag_history.clear(); - dag_history.shrink_to_fit(); +template +void MultilevelCoarser::ClearComputationData() { + dagHistory_.clear(); + dagHistory_.shrink_to_fit(); - contraction_maps.clear(); - contraction_maps.shrink_to_fit(); + contractionMaps_.clear(); + contractionMaps_.shrink_to_fit(); } -template -void MultilevelCoarser::compactify_dag_history() { - if (dag_history.size() < 3) { +template +void MultilevelCoarser::CompactifyDagHistory() { + if (dagHistory_.size() < 3) { return; } - size_t dag_indx_first = dag_history.size() - 2; - size_t map_indx_first = contraction_maps.size() - 2; + size_t dagIndxFirst = dagHistory_.size() - 2; + size_t mapIndxFirst = contractionMaps_.size() - 2; - size_t dag_indx_second = dag_history.size() - 1; - size_t map_indx_second = contraction_maps.size() - 1; + size_t dagIndxSecond = dagHistory_.size() - 1; + size_t mapIndxSecond = contractionMaps_.size() - 1; - if ((static_cast(dag_history[dag_indx_first - 1]->num_vertices()) - / static_cast(dag_history[dag_indx_second - 1]->num_vertices())) + if ((static_cast(dagHistory_[dagIndxFirst - 1]->NumVertices()) + / static_cast(dagHistory_[dagIndxSecond - 1]->NumVertices())) > 1.25) { return; } // Compute combined contraction_map - std::unique_ptr>> combi_contraction_map - = std::make_unique>>(contraction_maps[map_indx_first]->size()); - for (std::size_t vert = 0; vert < contraction_maps[map_indx_first]->size(); ++vert) { - combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at(contraction_maps[map_indx_first]->at(vert)); + std::unique_ptr>> combiContractionMap + = std::make_unique>>(contractionMaps_[mapIndxFirst]->size()); + for (std::size_t vert = 0; vert < contractionMaps_[mapIndxFirst]->size(); ++vert) { + combiContractionMap->at(vert) = contractionMaps_[mapIndxSecond]->at(contractionMaps_[mapIndxFirst]->at(vert)); } // Delete ComputationalDag - auto dag_it = dag_history.begin(); - std::advance(dag_it, dag_indx_first); - dag_history.erase(dag_it); + auto dagIt = dagHistory_.begin(); + std::advance(dagIt, dagIndxFirst); + dagHistory_.erase(dagIt); // Delete contraction map - auto contr_map_it = contraction_maps.begin(); - std::advance(contr_map_it, map_indx_second); - contraction_maps.erase(contr_map_it); + auto contrMapIt = contractionMaps_.begin(); + std::advance(contrMapIt, mapIndxSecond); + contractionMaps_.erase(contrMapIt); // Replace contraction map - contraction_maps[map_indx_first] = std::move(combi_contraction_map); + contractionMaps_[mapIndxFirst] = std::move(combiContractionMap); } -template -RETURN_STATUS MultilevelCoarser::add_contraction( - const std::vector> &contraction_map) { - std::unique_ptr new_graph = std::make_unique(); +template +ReturnStatus MultilevelCoarser::AddContraction(const std::vector> &contractionMap) { + std::unique_ptr newGraph = std::make_unique(); - contraction_maps.emplace_back(contraction_map); + contractionMaps_.emplace_back(contractionMap); bool success = false; - if (dag_history.size() == 0) { - success = coarser_util::construct_coarse_dag( - *(getOriginalGraph()), *new_graph, *(contraction_maps.back())); + if (dagHistory_.size() == 0) { + success = coarser_util::ConstructCoarseDag( + *(GetOriginalGraph()), *newGraph, *(contractionMaps_.back())); } else { - success = coarser_util::construct_coarse_dag( - *(dag_history.back()), *new_graph, *(contraction_maps.back())); + success = coarser_util::ConstructCoarseDag( + *(dagHistory_.back()), *newGraph, *(contractionMaps_.back())); } - dag_history.emplace_back(std::move(new_graph)); + dagHistory_.emplace_back(std::move(newGraph)); if (success) { - compactify_dag_history(); - return RETURN_STATUS::OSP_SUCCESS; + CompactifyDagHistory(); + return ReturnStatus::OSP_SUCCESS; } else { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } } -template -RETURN_STATUS MultilevelCoarser::add_contraction( - std::vector> &&contraction_map) { - std::unique_ptr new_graph = std::make_unique(); +template +ReturnStatus MultilevelCoarser::AddContraction(std::vector> &&contractionMap) { + std::unique_ptr newGraph = std::make_unique(); - std::unique_ptr>> contr_map_ptr( - new std::vector>(std::move(contraction_map))); - contraction_maps.emplace_back(std::move(contr_map_ptr)); + std::unique_ptr>> contrMapPtr( + new std::vector>(std::forward>>(contractionMap))); + contractionMaps_.emplace_back(std::move(contrMapPtr)); bool success = false; - if (dag_history.size() == 0) { - success = coarser_util::construct_coarse_dag( - *(getOriginalGraph()), *new_graph, *(contraction_maps.back())); + if (dagHistory_.size() == 0) { + success = coarser_util::ConstructCoarseDag( + *(GetOriginalGraph()), *newGraph, *(contractionMaps_.back())); } else { - success = coarser_util::construct_coarse_dag( - *(dag_history.back()), *new_graph, *(contraction_maps.back())); + success = coarser_util::ConstructCoarseDag( + *(dagHistory_.back()), *newGraph, *(contractionMaps_.back())); } - dag_history.emplace_back(std::move(new_graph)); + dagHistory_.emplace_back(std::move(newGraph)); if (success) { - compactify_dag_history(); - return RETURN_STATUS::OSP_SUCCESS; + CompactifyDagHistory(); + return ReturnStatus::OSP_SUCCESS; } else { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } } -template -RETURN_STATUS MultilevelCoarser::add_contraction( - const std::vector> &contraction_map, const Graph_t_coarse &contracted_graph) { - std::unique_ptr graph_ptr(new Graph_t_coarse(contracted_graph)); - dag_history.emplace_back(std::move(graph_ptr)); +template +ReturnStatus MultilevelCoarser::AddContraction(const std::vector> &contractionMap, + const GraphTCoarse &contractedGraph) { + std::unique_ptr graphPtr(new GraphTCoarse(contractedGraph)); + dagHistory_.emplace_back(std::move(graphPtr)); - std::unique_ptr>> contr_map_ptr( - new std::vector>(contraction_map)); - contraction_maps.emplace_back(std::move(contr_map_ptr)); + std::unique_ptr>> contrMapPtr(new std::vector>(contractionMap)); + contractionMaps_.emplace_back(std::move(contrMapPtr)); - compactify_dag_history(); - return RETURN_STATUS::OSP_SUCCESS; + CompactifyDagHistory(); + return ReturnStatus::OSP_SUCCESS; } -template -RETURN_STATUS MultilevelCoarser::add_contraction( - std::vector> &&contraction_map, Graph_t_coarse &&contracted_graph) { - std::unique_ptr graph_ptr(new Graph_t_coarse(std::move(contracted_graph))); - dag_history.emplace_back(std::move(graph_ptr)); +template +ReturnStatus MultilevelCoarser::AddContraction(std::vector> &&contractionMap, + GraphTCoarse &&contractedGraph) { + std::unique_ptr graphPtr(new GraphTCoarse(std::forward(contractedGraph))); + dagHistory_.emplace_back(std::move(graphPtr)); - std::unique_ptr>> contr_map_ptr( - new std::vector>(std::move(contraction_map))); - contraction_maps.emplace_back(std::move(contr_map_ptr)); + std::unique_ptr>> contrMapPtr( + new std::vector>(std::forward>>(contractionMap))); + contractionMaps_.emplace_back(std::move(contrMapPtr)); - compactify_dag_history(); - return RETURN_STATUS::OSP_SUCCESS; + CompactifyDagHistory(); + return ReturnStatus::OSP_SUCCESS; } -template -std::vector> MultilevelCoarser::getCombinedContractionMap() const { - std::vector> combinedContractionMap(original_graph->num_vertices()); +template +std::vector> MultilevelCoarser::GetCombinedContractionMap() const { + std::vector> combinedContractionMap(originalGraph_->NumVertices()); std::iota(combinedContractionMap.begin(), combinedContractionMap.end(), 0); - for (std::size_t j = 0; j < contraction_maps.size(); ++j) { + for (std::size_t j = 0; j < contractionMaps_.size(); ++j) { for (std::size_t i = 0; i < combinedContractionMap.size(); ++i) { - combinedContractionMap[i] = contraction_maps[j]->at(combinedContractionMap[i]); + combinedContractionMap[i] = contractionMaps_[j]->at(combinedContractionMap[i]); } } return combinedContractionMap; } -template -bool MultilevelCoarser::coarsenDag(const Graph_t &dag_in, - Graph_t_coarse &coarsened_dag, - std::vector> &vertex_contraction_map) { - clear_computation_data(); +template +bool MultilevelCoarser::CoarsenDag(const GraphT &dagIn, + GraphTCoarse &coarsenedDag, + std::vector> &vertexContractionMap) { + ClearComputationData(); - RETURN_STATUS status = run(dag_in); + ReturnStatus status = Run(dagIn); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) { return false; } - assert(dag_history.size() != 0); - coarsened_dag = *(dag_history.back()); + assert(dagHistory_.size() != 0); + coarsenedDag = *(dagHistory_.back()); - vertex_contraction_map = getCombinedContractionMap(); + vertexContractionMap = GetCombinedContractionMap(); return true; } -template -void MultilevelCoarser::add_identity_contraction() { - std::size_t n_vert; - if (dag_history.size() == 0) { - n_vert = static_cast(original_graph->num_vertices()); +template +void MultilevelCoarser::AddIdentityContraction() { + std::size_t nVert; + if (dagHistory_.size() == 0) { + nVert = static_cast(originalGraph_->NumVertices()); } else { - n_vert = static_cast(dag_history.back()->num_vertices()); + nVert = static_cast(dagHistory_.back()->NumVertices()); } - std::vector> contraction_map(n_vert); - std::iota(contraction_map.begin(), contraction_map.end(), 0); + std::vector> contractionMap(nVert); + std::iota(contractionMap.begin(), contractionMap.end(), 0); - add_contraction(std::move(contraction_map)); - compactify_dag_history(); + AddContraction(std::move(contractionMap)); + CompactifyDagHistory(); } } // end namespace osp diff --git a/include/osp/coarser/Sarkar/Sarkar.hpp b/include/osp/coarser/Sarkar/Sarkar.hpp index 672fc6d2..0af5a9e9 100644 --- a/include/osp/coarser/Sarkar/Sarkar.hpp +++ b/include/osp/coarser/Sarkar/Sarkar.hpp @@ -34,7 +34,7 @@ limitations under the License. namespace osp { -namespace SarkarParams { +namespace sarkar_params { enum class Mode { LINES, @@ -49,69 +49,68 @@ enum class Mode { HOMOGENEOUS_BUFFER }; -template +template struct Parameters { - double geomDecay{0.875}; - double leniency{0.0}; - Mode mode{Mode::LINES}; - commCostType commCost{static_cast(0)}; - commCostType maxWeight{std::numeric_limits::max()}; - commCostType smallWeightThreshold{std::numeric_limits::lowest()}; - bool useTopPoset{true}; + double geomDecay_{0.875}; + double leniency_{0.0}; + Mode mode_{Mode::LINES}; + CommCostType commCost_{static_cast(0)}; + CommCostType maxWeight_{std::numeric_limits::max()}; + CommCostType smallWeightThreshold_{std::numeric_limits::lowest()}; + bool useTopPoset_{true}; }; -} // end namespace SarkarParams +} // namespace sarkar_params -template -class Sarkar : public CoarserGenExpansionMap { +template +class Sarkar : public CoarserGenExpansionMap { private: - SarkarParams::Parameters> params; - - std::vector> getBotPosetMap(const Graph_t_in &graph) const; - std::vector> getTopDistance(v_workw_t commCost, const Graph_t_in &graph) const; - std::vector> getBotDistance(v_workw_t commCost, const Graph_t_in &graph) const; - - vertex_idx_t singleContraction(v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const; - vertex_idx_t allChildrenContraction(v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const; - vertex_idx_t someChildrenContraction(v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const; - vertex_idx_t allParentsContraction(v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const; - vertex_idx_t someParentsContraction(v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const; - vertex_idx_t levelContraction(v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const; - - vertex_idx_t homogeneous_buffer_merge(v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const; - std::vector homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const; - - std::vector computeNodeHashes(const Graph_t_in &graph, - const std::vector> &vertexPoset, - const std::vector> &dist) const; + sarkar_params::Parameters> params_; + + std::vector> GetBotPosetMap(const GraphTIn &graph) const; + std::vector> GetTopDistance(VWorkwT commCost, const GraphTIn &graph) const; + std::vector> GetBotDistance(VWorkwT commCost, const GraphTIn &graph) const; + + VertexIdxT SingleContraction(VWorkwT commCost, + const GraphTIn &graph, + std::vector>> &expansionMapOutput) const; + VertexIdxT AllChildrenContraction(VWorkwT commCost, + const GraphTIn &graph, + std::vector>> &expansionMapOutput) const; + VertexIdxT SomeChildrenContraction(VWorkwT commCost, + const GraphTIn &graph, + std::vector>> &expansionMapOutput) const; + VertexIdxT AllParentsContraction(VWorkwT commCost, + const GraphTIn &graph, + std::vector>> &expansionMapOutput) const; + VertexIdxT SomeParentsContraction(VWorkwT commCost, + const GraphTIn &graph, + std::vector>> &expansionMapOutput) const; + VertexIdxT LevelContraction(VWorkwT commCost, + const GraphTIn &graph, + std::vector>> &expansionMapOutput) const; + + VertexIdxT HomogeneousBufferMerge(VWorkwT commCost, + const GraphTIn &graph, + std::vector>> &expansionMapOutput) const; + std::vector HomogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const; + + std::vector ComputeNodeHashes(const GraphTIn &graph, + const std::vector> &vertexPoset, + const std::vector> &dist) const; public: - virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in) override; - std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in, - vertex_idx_t &diff); + virtual std::vector>> GenerateVertexExpansionMap(const GraphTIn &dagIn) override; + std::vector>> GenerateVertexExpansionMap(const GraphTIn &dagIn, VertexIdxT &diff); - inline void setParameters(const SarkarParams::Parameters> ¶ms_) { params = params_; }; + inline void SetParameters(const sarkar_params::Parameters> ¶ms) { params_ = params; }; - inline SarkarParams::Parameters> &getParameters() { return params; }; + inline sarkar_params::Parameters> &GetParameters() { return params_; }; - inline const SarkarParams::Parameters> &getParameters() const { return params; }; + inline const sarkar_params::Parameters> &GetParameters() const { return params_; }; - Sarkar(SarkarParams::Parameters> params_ = SarkarParams::Parameters>()) - : params(params_) {}; + Sarkar(sarkar_params::Parameters> params = sarkar_params::Parameters>()) + : params_(params) {}; Sarkar(const Sarkar &) = default; Sarkar(Sarkar &&) = default; @@ -119,14 +118,14 @@ class Sarkar : public CoarserGenExpansionMap { Sarkar &operator=(Sarkar &&) = default; virtual ~Sarkar() override = default; - std::string getCoarserName() const override { return "Sarkar"; } + std::string GetCoarserName() const override { return "Sarkar"; } }; -template -std::vector> Sarkar::getBotPosetMap(const Graph_t_in &graph) const { - std::vector> botPosetMap = get_bottom_node_distance>(graph); +template +std::vector> Sarkar::GetBotPosetMap(const GraphTIn &graph) const { + std::vector> botPosetMap = GetBottomNodeDistance>(graph); - vertex_idx_t max = *std::max_element(botPosetMap.begin(), botPosetMap.end()); + VertexIdxT max = *std::max_element(botPosetMap.begin(), botPosetMap.end()); ++max; for (std::size_t i = 0; i < botPosetMap.size(); i++) { @@ -136,60 +135,58 @@ std::vector> Sarkar::getBotPos return botPosetMap; } -template -std::vector> Sarkar::getTopDistance(v_workw_t commCost, - const Graph_t_in &graph) const { - std::vector> topDist(graph.num_vertices(), 0); +template +std::vector> Sarkar::GetTopDistance(VWorkwT commCost, + const GraphTIn &graph) const { + std::vector> topDist(graph.NumVertices(), 0); - for (const auto &vertex : GetTopOrder(graph)) { - v_workw_t max_temp = 0; + for (const auto &vertex : GetTopOrder(graph)) { + VWorkwT maxTemp = 0; - for (const auto &j : graph.parents(vertex)) { - max_temp = std::max(max_temp, topDist[j]); + for (const auto &j : graph.Parents(vertex)) { + maxTemp = std::max(maxTemp, topDist[j]); } - if (graph.in_degree(vertex) > 0) { - max_temp += commCost; + if (graph.InDegree(vertex) > 0) { + maxTemp += commCost; } - topDist[vertex] = max_temp + graph.vertex_work_weight(vertex); + topDist[vertex] = maxTemp + graph.VertexWorkWeight(vertex); } return topDist; } -template -std::vector> Sarkar::getBotDistance(v_workw_t commCost, - const Graph_t_in &graph) const { - std::vector> botDist(graph.num_vertices(), 0); +template +std::vector> Sarkar::GetBotDistance(VWorkwT commCost, + const GraphTIn &graph) const { + std::vector> botDist(graph.NumVertices(), 0); - for (const auto &vertex : GetTopOrderReverse(graph)) { - v_workw_t max_temp = 0; + for (const auto &vertex : GetTopOrderReverse(graph)) { + VWorkwT maxTemp = 0; - for (const auto &j : graph.children(vertex)) { - max_temp = std::max(max_temp, botDist[j]); + for (const auto &j : graph.Children(vertex)) { + maxTemp = std::max(maxTemp, botDist[j]); } - if (graph.out_degree(vertex) > 0) { - max_temp += commCost; + if (graph.OutDegree(vertex) > 0) { + maxTemp += commCost; } - botDist[vertex] = max_temp + graph.vertex_work_weight(vertex); + botDist[vertex] = maxTemp + graph.VertexWorkWeight(vertex); } return botDist; } -template -vertex_idx_t Sarkar::singleContraction( - v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const { - using VertexType = vertex_idx_t; +template +VertexIdxT Sarkar::SingleContraction( + VWorkwT commCost, const GraphTIn &graph, std::vector>> &expansionMapOutput) const { + using VertexType = VertexIdxT; assert(expansionMapOutput.size() == 0); - const std::vector> vertexPoset - = params.useTopPoset ? get_top_node_distance>(graph) : getBotPosetMap(graph); - const std::vector> topDist = getTopDistance(commCost, graph); - const std::vector> botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset + = params_.useTopPoset_ ? GetTopNodeDistance>(graph) : GetBotPosetMap(graph); + const std::vector> topDist = GetTopDistance(commCost, graph); + const std::vector> botDist = GetBotDistance(commCost, graph); auto cmp = [](const std::tuple &lhs, const std::tuple &rhs) { return (std::get<0>(lhs) > std::get<0>(rhs)) @@ -199,10 +196,10 @@ vertex_idx_t Sarkar::singleContraction( }; std::set, decltype(cmp)> edgePriority(cmp); - for (const VertexType &edgeSrc : graph.vertices()) { - for (const VertexType &edgeTgt : graph.children(edgeSrc)) { - if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(edgeSrc) != graph.vertex_type(edgeTgt)) { + for (const VertexType &edgeSrc : graph.Vertices()) { + for (const VertexType &edgeTgt : graph.Children(edgeSrc)) { + if constexpr (hasTypedVerticesV) { + if (graph.VertexType(edgeSrc) != graph.VertexType(edgeTgt)) { continue; } } @@ -210,42 +207,42 @@ vertex_idx_t Sarkar::singleContraction( if (vertexPoset[edgeSrc] + 1 != vertexPoset[edgeTgt]) { continue; } - if (topDist[edgeSrc] + commCost + graph.vertex_work_weight(edgeTgt) != topDist[edgeTgt]) { + if (topDist[edgeSrc] + commCost + graph.VertexWorkWeight(edgeTgt) != topDist[edgeTgt]) { continue; } - if (botDist[edgeTgt] + commCost + graph.vertex_work_weight(edgeSrc) != botDist[edgeSrc]) { + if (botDist[edgeTgt] + commCost + graph.VertexWorkWeight(edgeSrc) != botDist[edgeSrc]) { continue; } - if (graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt) > params.maxWeight) { + if (graph.VertexWorkWeight(edgeSrc) + graph.VertexWorkWeight(edgeTgt) > params_.maxWeight_) { continue; } - v_workw_t maxPath = topDist[edgeSrc] + botDist[edgeTgt] + commCost; - v_workw_t maxParentDist = 0; - v_workw_t maxChildDist = 0; + VWorkwT maxPath = topDist[edgeSrc] + botDist[edgeTgt] + commCost; + VWorkwT maxParentDist = 0; + VWorkwT maxChildDist = 0; - for (const auto &par : graph.parents(edgeSrc)) { + for (const auto &par : graph.Parents(edgeSrc)) { maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } - for (const auto &par : graph.parents(edgeTgt)) { + for (const auto &par : graph.Parents(edgeTgt)) { if (par == edgeSrc) { continue; } maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } - for (const auto &chld : graph.children(edgeSrc)) { + for (const auto &chld : graph.Children(edgeSrc)) { if (chld == edgeTgt) { continue; } maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } - for (const auto &chld : graph.children(edgeTgt)) { + for (const auto &chld : graph.Children(edgeTgt)) { maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } - v_workw_t newMaxPath - = maxParentDist + maxChildDist + graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt); + VWorkwT newMaxPath + = maxParentDist + maxChildDist + graph.VertexWorkWeight(edgeSrc) + graph.VertexWorkWeight(edgeTgt); long savings = static_cast(maxPath) - static_cast(newMaxPath); // cannot have leniency here as it may destroy symmetries @@ -255,14 +252,13 @@ vertex_idx_t Sarkar::singleContraction( } } - std::vector partitionedSourceFlag(graph.num_vertices(), false); - std::vector partitionedTargetFlag(graph.num_vertices(), false); + std::vector partitionedSourceFlag(graph.NumVertices(), false); + std::vector partitionedTargetFlag(graph.NumVertices(), false); - vertex_idx_t maxCorseningNum - = graph.num_vertices() - - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + VertexIdxT maxCorseningNum + = graph.NumVertices() - static_cast>(static_cast(graph.NumVertices()) * params_.geomDecay_); - vertex_idx_t counter = 0; + VertexIdxT counter = 0; long minSave = std::numeric_limits::lowest(); for (auto prioIter = edgePriority.begin(); prioIter != edgePriority.end(); prioIter++) { const long &edgeSave = std::get<0>(*prioIter); @@ -289,14 +285,14 @@ vertex_idx_t Sarkar::singleContraction( } bool shouldSkipSrc = false; - for (const VertexType &chld : graph.children(edgeSrc)) { + for (const VertexType &chld : graph.Children(edgeSrc)) { if ((vertexPoset[chld] == vertexPoset[edgeSrc] + 1) && partitionedTargetFlag[chld]) { shouldSkipSrc = true; break; } } bool shouldSkipTgt = false; - for (const VertexType &par : graph.parents(edgeTgt)) { + for (const VertexType &par : graph.Parents(edgeTgt)) { if ((vertexPoset[par] + 1 == vertexPoset[edgeTgt]) && partitionedSourceFlag[par]) { shouldSkipTgt = true; break; @@ -316,8 +312,8 @@ vertex_idx_t Sarkar::singleContraction( partitionedTargetFlag[edgeTgt] = true; } - expansionMapOutput.reserve(graph.num_vertices() - counter); - for (const VertexType &vert : graph.vertices()) { + expansionMapOutput.reserve(graph.NumVertices() - counter); + for (const VertexType &vert : graph.Vertices()) { if (partitionedSourceFlag[vert]) { continue; } @@ -331,32 +327,30 @@ vertex_idx_t Sarkar::singleContraction( return counter; } -template -vertex_idx_t Sarkar::allChildrenContraction( - v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const { - using VertexType = vertex_idx_t; +template +VertexIdxT Sarkar::AllChildrenContraction( + VWorkwT commCost, const GraphTIn &graph, std::vector>> &expansionMapOutput) const { + using VertexType = VertexIdxT; assert(expansionMapOutput.size() == 0); - const std::vector> vertexPoset = get_top_node_distance>(graph); - const std::vector> topDist = getTopDistance(commCost, graph); - const std::vector> botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset = GetTopNodeDistance>(graph); + const std::vector> topDist = GetTopDistance(commCost, graph); + const std::vector> botDist = GetBotDistance(commCost, graph); auto cmp = [](const std::pair &lhs, const std::pair &rhs) { return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set, decltype(cmp)> vertPriority(cmp); - for (const VertexType &groupHead : graph.vertices()) { - if (graph.out_degree(groupHead) < 2) { + for (const VertexType &groupHead : graph.Vertices()) { + if (graph.OutDegree(groupHead) < 2) { continue; } bool shouldSkip = false; - if constexpr (has_typed_vertices_v) { - for (const VertexType &groupFoot : graph.children(groupHead)) { - if (graph.vertex_type(groupHead) != graph.vertex_type(groupFoot)) { + if constexpr (hasTypedVerticesV) { + for (const VertexType &groupFoot : graph.Children(groupHead)) { + if (graph.VertexType(groupHead) != graph.VertexType(groupFoot)) { shouldSkip = true; break; } @@ -365,7 +359,7 @@ vertex_idx_t Sarkar::allChildrenContraction if (shouldSkip) { continue; } - for (const VertexType &groupFoot : graph.children(groupHead)) { + for (const VertexType &groupFoot : graph.Children(groupHead)) { if (vertexPoset[groupFoot] != vertexPoset[groupHead] + 1) { shouldSkip = true; break; @@ -374,27 +368,27 @@ vertex_idx_t Sarkar::allChildrenContraction if (shouldSkip) { continue; } - v_workw_t combined_weight = graph.vertex_work_weight(groupHead); - for (const VertexType &groupFoot : graph.children(groupHead)) { - combined_weight += graph.vertex_work_weight(groupFoot); + VWorkwT combinedWeight = graph.VertexWorkWeight(groupHead); + for (const VertexType &groupFoot : graph.Children(groupHead)) { + combinedWeight += graph.VertexWorkWeight(groupFoot); } - if (combined_weight > params.maxWeight) { + if (combinedWeight > params_.maxWeight_) { continue; } - v_workw_t maxPath = topDist[groupHead] + botDist[groupHead] - graph.vertex_work_weight(groupHead); - for (const VertexType &chld : graph.children(groupHead)) { - maxPath = std::max(maxPath, topDist[chld] + botDist[chld] - graph.vertex_work_weight(chld)); + VWorkwT maxPath = topDist[groupHead] + botDist[groupHead] - graph.VertexWorkWeight(groupHead); + for (const VertexType &chld : graph.Children(groupHead)) { + maxPath = std::max(maxPath, topDist[chld] + botDist[chld] - graph.VertexWorkWeight(chld)); } - v_workw_t maxParentDist = 0; - v_workw_t maxChildDist = 0; + VWorkwT maxParentDist = 0; + VWorkwT maxChildDist = 0; - for (const VertexType &par : graph.parents(groupHead)) { + for (const VertexType &par : graph.Parents(groupHead)) { maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } - for (const VertexType &groupFoot : graph.children(groupHead)) { - for (const VertexType &par : graph.parents(groupFoot)) { + for (const VertexType &groupFoot : graph.Children(groupHead)) { + for (const VertexType &par : graph.Parents(groupFoot)) { if (par == groupHead) { continue; } @@ -402,30 +396,29 @@ vertex_idx_t Sarkar::allChildrenContraction } } - for (const VertexType &groupFoot : graph.children(groupHead)) { - for (const VertexType &chld : graph.children(groupFoot)) { + for (const VertexType &groupFoot : graph.Children(groupHead)) { + for (const VertexType &chld : graph.Children(groupFoot)) { maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } } - v_workw_t newMaxPath = maxParentDist + maxChildDist + graph.vertex_work_weight(groupHead); - for (const VertexType &groupFoot : graph.children(groupHead)) { - newMaxPath += graph.vertex_work_weight(groupFoot); + VWorkwT newMaxPath = maxParentDist + maxChildDist + graph.VertexWorkWeight(groupHead); + for (const VertexType &groupFoot : graph.Children(groupHead)) { + newMaxPath += graph.VertexWorkWeight(groupFoot); } long savings = static_cast(maxPath) - static_cast(newMaxPath); - if (savings + static_cast(params.leniency * static_cast(maxPath)) >= 0) { + if (savings + static_cast(params_.leniency_ * static_cast(maxPath)) >= 0) { vertPriority.emplace(savings, groupHead); } } - std::vector partitionedFlag(graph.num_vertices(), false); + std::vector partitionedFlag(graph.NumVertices(), false); - vertex_idx_t maxCorseningNum - = graph.num_vertices() - - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + VertexIdxT maxCorseningNum + = graph.NumVertices() - static_cast>(static_cast(graph.NumVertices()) * params_.geomDecay_); - vertex_idx_t counter = 0; + VertexIdxT counter = 0; long minSave = std::numeric_limits::lowest(); for (auto prioIter = vertPriority.begin(); prioIter != vertPriority.end(); prioIter++) { const long &vertSave = prioIter->first; @@ -441,7 +434,7 @@ vertex_idx_t Sarkar::allChildrenContraction continue; } bool shouldSkip = false; - for (const VertexType &groupFoot : graph.children(groupHead)) { + for (const VertexType &groupFoot : graph.Children(groupHead)) { if (partitionedFlag[groupFoot]) { shouldSkip = true; break; @@ -453,24 +446,24 @@ vertex_idx_t Sarkar::allChildrenContraction // Adding to partition std::vector part; - part.reserve(1 + graph.out_degree(groupHead)); + part.reserve(1 + graph.OutDegree(groupHead)); part.emplace_back(groupHead); - for (const VertexType &groupFoot : graph.children(groupHead)) { + for (const VertexType &groupFoot : graph.Children(groupHead)) { part.emplace_back(groupFoot); } expansionMapOutput.emplace_back(std::move(part)); - counter += static_cast>(graph.out_degree(groupHead)); + counter += static_cast>(graph.OutDegree(groupHead)); if (counter > maxCorseningNum) { minSave = vertSave; } partitionedFlag[groupHead] = true; - for (const VertexType &groupFoot : graph.children(groupHead)) { + for (const VertexType &groupFoot : graph.Children(groupHead)) { partitionedFlag[groupFoot] = true; } } - for (const VertexType &vert : graph.vertices()) { + for (const VertexType &vert : graph.Vertices()) { if (partitionedFlag[vert]) { continue; } @@ -480,32 +473,30 @@ vertex_idx_t Sarkar::allChildrenContraction return counter; } -template -vertex_idx_t Sarkar::allParentsContraction( - v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const { - using VertexType = vertex_idx_t; +template +VertexIdxT Sarkar::AllParentsContraction( + VWorkwT commCost, const GraphTIn &graph, std::vector>> &expansionMapOutput) const { + using VertexType = VertexIdxT; assert(expansionMapOutput.size() == 0); - const std::vector> vertexPoset = getBotPosetMap(graph); - const std::vector> topDist = getTopDistance(commCost, graph); - const std::vector> botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset = GetBotPosetMap(graph); + const std::vector> topDist = GetTopDistance(commCost, graph); + const std::vector> botDist = GetBotDistance(commCost, graph); auto cmp = [](const std::pair &lhs, const std::pair &rhs) { return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set, decltype(cmp)> vertPriority(cmp); - for (const VertexType &groupFoot : graph.vertices()) { - if (graph.in_degree(groupFoot) < 2) { + for (const VertexType &groupFoot : graph.Vertices()) { + if (graph.InDegree(groupFoot) < 2) { continue; } bool shouldSkip = false; - if constexpr (has_typed_vertices_v) { - for (const VertexType &groupHead : graph.parents(groupFoot)) { - if (graph.vertex_type(groupHead) != graph.vertex_type(groupFoot)) { + if constexpr (hasTypedVerticesV) { + for (const VertexType &groupHead : graph.Parents(groupFoot)) { + if (graph.VertexType(groupHead) != graph.VertexType(groupFoot)) { shouldSkip = true; break; } @@ -514,7 +505,7 @@ vertex_idx_t Sarkar::allParentsContraction( if (shouldSkip) { continue; } - for (const VertexType &groupHead : graph.parents(groupFoot)) { + for (const VertexType &groupHead : graph.Parents(groupFoot)) { if (vertexPoset[groupFoot] != vertexPoset[groupHead] + 1) { shouldSkip = true; break; @@ -523,27 +514,27 @@ vertex_idx_t Sarkar::allParentsContraction( if (shouldSkip) { continue; } - v_workw_t combined_weight = graph.vertex_work_weight(groupFoot); - for (const VertexType &groupHead : graph.parents(groupFoot)) { - combined_weight += graph.vertex_work_weight(groupHead); + VWorkwT combinedWeight = graph.VertexWorkWeight(groupFoot); + for (const VertexType &groupHead : graph.Parents(groupFoot)) { + combinedWeight += graph.VertexWorkWeight(groupHead); } - if (combined_weight > params.maxWeight) { + if (combinedWeight > params_.maxWeight_) { continue; } - v_workw_t maxPath = topDist[groupFoot] + botDist[groupFoot] - graph.vertex_work_weight(groupFoot); - for (const VertexType &par : graph.parents(groupFoot)) { - maxPath = std::max(maxPath, topDist[par] + botDist[par] - graph.vertex_work_weight(par)); + VWorkwT maxPath = topDist[groupFoot] + botDist[groupFoot] - graph.VertexWorkWeight(groupFoot); + for (const VertexType &par : graph.Parents(groupFoot)) { + maxPath = std::max(maxPath, topDist[par] + botDist[par] - graph.VertexWorkWeight(par)); } - v_workw_t maxParentDist = 0; - v_workw_t maxChildDist = 0; + VWorkwT maxParentDist = 0; + VWorkwT maxChildDist = 0; - for (const VertexType &child : graph.children(groupFoot)) { + for (const VertexType &child : graph.Children(groupFoot)) { maxChildDist = std::max(maxChildDist, botDist[child] + commCost); } - for (const VertexType &groupHead : graph.parents(groupFoot)) { - for (const VertexType &chld : graph.children(groupHead)) { + for (const VertexType &groupHead : graph.Parents(groupFoot)) { + for (const VertexType &chld : graph.Children(groupHead)) { if (chld == groupFoot) { continue; } @@ -551,30 +542,29 @@ vertex_idx_t Sarkar::allParentsContraction( } } - for (const VertexType &groupHead : graph.parents(groupFoot)) { - for (const VertexType &par : graph.parents(groupHead)) { + for (const VertexType &groupHead : graph.Parents(groupFoot)) { + for (const VertexType &par : graph.Parents(groupHead)) { maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } } - v_workw_t newMaxPath = maxParentDist + maxChildDist + graph.vertex_work_weight(groupFoot); - for (const VertexType &groupHead : graph.parents(groupFoot)) { - newMaxPath += graph.vertex_work_weight(groupHead); + VWorkwT newMaxPath = maxParentDist + maxChildDist + graph.VertexWorkWeight(groupFoot); + for (const VertexType &groupHead : graph.Parents(groupFoot)) { + newMaxPath += graph.VertexWorkWeight(groupHead); } long savings = maxPath - newMaxPath; - if (savings + static_cast(params.leniency * static_cast(maxPath)) >= 0) { + if (savings + static_cast(params_.leniency_ * static_cast(maxPath)) >= 0) { vertPriority.emplace(savings, groupFoot); } } - std::vector partitionedFlag(graph.num_vertices(), false); + std::vector partitionedFlag(graph.NumVertices(), false); - vertex_idx_t maxCorseningNum - = graph.num_vertices() - - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + VertexIdxT maxCorseningNum + = graph.NumVertices() - static_cast>(static_cast(graph.NumVertices()) * params_.geomDecay_); - vertex_idx_t counter = 0; + VertexIdxT counter = 0; long minSave = std::numeric_limits::lowest(); for (auto prioIter = vertPriority.begin(); prioIter != vertPriority.end(); prioIter++) { const long &vertSave = prioIter->first; @@ -590,7 +580,7 @@ vertex_idx_t Sarkar::allParentsContraction( continue; } bool shouldSkip = false; - for (const VertexType &groupHead : graph.parents(groupFoot)) { + for (const VertexType &groupHead : graph.Parents(groupFoot)) { if (partitionedFlag[groupHead]) { shouldSkip = true; break; @@ -602,24 +592,24 @@ vertex_idx_t Sarkar::allParentsContraction( // Adding to partition std::vector part; - part.reserve(1 + graph.in_degree(groupFoot)); + part.reserve(1 + graph.InDegree(groupFoot)); part.emplace_back(groupFoot); - for (const VertexType &groupHead : graph.parents(groupFoot)) { + for (const VertexType &groupHead : graph.Parents(groupFoot)) { part.emplace_back(groupHead); } expansionMapOutput.emplace_back(std::move(part)); - counter += static_cast>(graph.in_degree(groupFoot)); + counter += static_cast>(graph.InDegree(groupFoot)); if (counter > maxCorseningNum) { minSave = vertSave; } partitionedFlag[groupFoot] = true; - for (const VertexType &groupHead : graph.parents(groupFoot)) { + for (const VertexType &groupHead : graph.Parents(groupFoot)) { partitionedFlag[groupHead] = true; } } - for (const VertexType &vert : graph.vertices()) { + for (const VertexType &vert : graph.Vertices()) { if (partitionedFlag[vert]) { continue; } @@ -629,45 +619,42 @@ vertex_idx_t Sarkar::allParentsContraction( return counter; } -template -std::vector>> Sarkar::generate_vertex_expansion_map( - const Graph_t_in &dag_in, vertex_idx_t &diff) { - std::vector>> expansionMap; +template +std::vector>> Sarkar::GenerateVertexExpansionMap(const GraphTIn &dagIn, + VertexIdxT &diff) { + std::vector>> expansionMap; // std::cout << "Mode: " << static_cast(params.mode) << "\n"; - switch (params.mode) { - case SarkarParams::Mode::LINES: { - diff = singleContraction(params.commCost, dag_in, expansionMap); + switch (params_.mode_) { + case sarkar_params::Mode::LINES: { + diff = SingleContraction(params_.commCost_, dagIn, expansionMap); } break; - case SarkarParams::Mode::FAN_IN_FULL: { - diff = allParentsContraction(params.commCost, dag_in, expansionMap); + case sarkar_params::Mode::FAN_IN_FULL: { + diff = AllParentsContraction(params_.commCost_, dagIn, expansionMap); } break; - case SarkarParams::Mode::FAN_IN_PARTIAL: { - diff = someParentsContraction(params.commCost, dag_in, expansionMap); + case sarkar_params::Mode::FAN_IN_PARTIAL: { + diff = SomeParentsContraction(params_.commCost_, dagIn, expansionMap); } break; - case SarkarParams::Mode::FAN_OUT_FULL: { - diff = allChildrenContraction(params.commCost, dag_in, expansionMap); + case sarkar_params::Mode::FAN_OUT_FULL: { + diff = AllChildrenContraction(params_.commCost_, dagIn, expansionMap); } break; - case SarkarParams::Mode::FAN_OUT_PARTIAL: { - diff = someChildrenContraction(params.commCost, dag_in, expansionMap); + case sarkar_params::Mode::FAN_OUT_PARTIAL: { + diff = SomeChildrenContraction(params_.commCost_, dagIn, expansionMap); } break; - case SarkarParams::Mode::LEVEL_EVEN: { - diff = levelContraction(params.commCost, dag_in, expansionMap); + case sarkar_params::Mode::LEVEL_EVEN: + case sarkar_params::Mode::LEVEL_ODD: { + diff = LevelContraction(params_.commCost_, dagIn, expansionMap); } break; - case SarkarParams::Mode::LEVEL_ODD: { - diff = levelContraction(params.commCost, dag_in, expansionMap); - } break; - - case SarkarParams::Mode::FAN_IN_BUFFER: - case SarkarParams::Mode::FAN_OUT_BUFFER: - case SarkarParams::Mode::HOMOGENEOUS_BUFFER: { - diff = homogeneous_buffer_merge(params.commCost, dag_in, expansionMap); + case sarkar_params::Mode::FAN_IN_BUFFER: + case sarkar_params::Mode::FAN_OUT_BUFFER: + case sarkar_params::Mode::HOMOGENEOUS_BUFFER: { + diff = HomogeneousBufferMerge(params_.commCost_, dagIn, expansionMap); } break; default: { @@ -683,41 +670,38 @@ std::vector>> Sarkar -std::vector>> Sarkar::generate_vertex_expansion_map( - const Graph_t_in &dag_in) { - vertex_idx_t dummy; - return generate_vertex_expansion_map(dag_in, dummy); +template +std::vector>> Sarkar::GenerateVertexExpansionMap(const GraphTIn &dagIn) { + VertexIdxT dummy; + return GenerateVertexExpansionMap(dagIn, dummy); } -template -vertex_idx_t Sarkar::someChildrenContraction( - v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const { - using VertexType = vertex_idx_t; +template +VertexIdxT Sarkar::SomeChildrenContraction( + VWorkwT commCost, const GraphTIn &graph, std::vector>> &expansionMapOutput) const { + using VertexType = VertexIdxT; assert(expansionMapOutput.size() == 0); - const std::vector> vertexPoset = get_top_node_distance>(graph); - const std::vector> topDist = getTopDistance(commCost, graph); - const std::vector> botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset = GetTopNodeDistance>(graph); + const std::vector> topDist = GetTopDistance(commCost, graph); + const std::vector> botDist = GetBotDistance(commCost, graph); auto cmp = [](const std::pair> &lhs, const std::pair> &rhs) { return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set>, decltype(cmp)> vertPriority(cmp); - for (const VertexType &groupHead : graph.vertices()) { - if (graph.out_degree(groupHead) < 2) { + for (const VertexType &groupHead : graph.Vertices()) { + if (graph.OutDegree(groupHead) < 2) { continue; } - auto cmp_chld = [&topDist, &botDist](const VertexType &lhs, const VertexType &rhs) { + auto cmpChld = [&topDist, &botDist](const VertexType &lhs, const VertexType &rhs) { return (topDist[lhs] < topDist[rhs]) || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] > botDist[rhs])) || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] == botDist[rhs]) && (lhs < rhs)); }; - std::set childrenPriority(cmp_chld); - for (const VertexType &chld : graph.children(groupHead)) { + std::set childrenPriority(cmpChld); + for (const VertexType &chld : graph.Children(groupHead)) { if (vertexPoset[chld] == vertexPoset[groupHead] + 1) { childrenPriority.emplace(chld); } @@ -726,63 +710,62 @@ vertex_idx_t Sarkar::someChildrenContractio continue; } - std::vector::const_iterator, - typename std::set::const_iterator>> - admissble_children_groups; - for (auto chld_iter_start = childrenPriority.cbegin(); chld_iter_start != childrenPriority.cend();) { - if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(groupHead) != graph.vertex_type(*chld_iter_start)) { - ++chld_iter_start; + std::vector::const_iterator, + typename std::set::const_iterator>> + admissbleChildrenGroups; + for (auto chldIterStart = childrenPriority.cbegin(); chldIterStart != childrenPriority.cend();) { + if constexpr (hasTypedVerticesV) { + if (graph.VertexType(groupHead) != graph.VertexType(*chldIterStart)) { + ++chldIterStart; continue; } } - const v_workw_t t_dist = topDist[*chld_iter_start]; - const v_workw_t b_dist = botDist[*chld_iter_start]; - auto chld_iter_end = chld_iter_start; - while (chld_iter_end != childrenPriority.cend() && t_dist == topDist[*chld_iter_end] - && b_dist == botDist[*chld_iter_end]) { - if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(groupHead) != graph.vertex_type(*chld_iter_end)) { + const VWorkwT tDist = topDist[*chldIterStart]; + const VWorkwT bDist = botDist[*chldIterStart]; + auto chldIterEnd = chldIterStart; + while (chldIterEnd != childrenPriority.cend() && tDist == topDist[*chldIterEnd] && bDist == botDist[*chldIterEnd]) { + if constexpr (hasTypedVerticesV) { + if (graph.VertexType(groupHead) != graph.VertexType(*chldIterEnd)) { break; } } - ++chld_iter_end; + ++chldIterEnd; } - admissble_children_groups.emplace_back(chld_iter_start, chld_iter_end); - chld_iter_start = chld_iter_end; + admissbleChildrenGroups.emplace_back(chldIterStart, chldIterEnd); + chldIterStart = chldIterEnd; } std::vector contractionEnsemble; std::set contractionChildrenSet; - contractionEnsemble.reserve(1 + graph.out_degree(groupHead)); + contractionEnsemble.reserve(1 + graph.OutDegree(groupHead)); contractionEnsemble.emplace_back(groupHead); - v_workw_t added_weight = graph.vertex_work_weight(groupHead); + VWorkwT addedWeight = graph.VertexWorkWeight(groupHead); - for (std::size_t i = 0U; i < admissble_children_groups.size(); ++i) { - const auto &first = admissble_children_groups[i].first; - const auto &last = admissble_children_groups[i].second; + for (std::size_t i = 0U; i < admissbleChildrenGroups.size(); ++i) { + const auto &first = admissbleChildrenGroups[i].first; + const auto &last = admissbleChildrenGroups[i].second; for (auto it = first; it != last; ++it) { contractionEnsemble.emplace_back(*it); contractionChildrenSet.emplace(*it); - added_weight += graph.vertex_work_weight(*it); + addedWeight += graph.VertexWorkWeight(*it); } - if (added_weight > params.maxWeight) { + if (addedWeight > params_.maxWeight_) { break; } - v_workw_t maxPath = 0; + VWorkwT maxPath = 0; for (const VertexType &vert : contractionEnsemble) { - maxPath = std::max(maxPath, topDist[vert] + botDist[vert] - graph.vertex_work_weight(vert)); + maxPath = std::max(maxPath, topDist[vert] + botDist[vert] - graph.VertexWorkWeight(vert)); } - v_workw_t maxParentDist = 0; - v_workw_t maxChildDist = 0; + VWorkwT maxParentDist = 0; + VWorkwT maxChildDist = 0; for (const VertexType &vert : contractionEnsemble) { - for (const VertexType &par : graph.parents(vert)) { + for (const VertexType &par : graph.Parents(vert)) { if (par == groupHead) { continue; } @@ -790,38 +773,37 @@ vertex_idx_t Sarkar::someChildrenContractio } } - for (const VertexType &chld : graph.children(groupHead)) { + for (const VertexType &chld : graph.Children(groupHead)) { if (contractionChildrenSet.find(chld) == contractionChildrenSet.end()) { maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } } for (std::size_t j = 1; j < contractionEnsemble.size(); j++) { - for (const VertexType &chld : graph.children(contractionEnsemble[j])) { + for (const VertexType &chld : graph.Children(contractionEnsemble[j])) { maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } } - v_workw_t newMaxPath = maxParentDist + maxChildDist; + VWorkwT newMaxPath = maxParentDist + maxChildDist; for (const VertexType &vert : contractionEnsemble) { - newMaxPath += graph.vertex_work_weight(vert); + newMaxPath += graph.VertexWorkWeight(vert); } long savings = static_cast(maxPath) - static_cast(newMaxPath); - if (savings + static_cast(params.leniency * static_cast(maxPath)) >= 0) { + if (savings + static_cast(params_.leniency_ * static_cast(maxPath)) >= 0) { vertPriority.emplace(savings, contractionEnsemble); } } } - std::vector partitionedFlag(graph.num_vertices(), false); - std::vector partitionedHeadFlag(graph.num_vertices(), false); + std::vector partitionedFlag(graph.NumVertices(), false); + std::vector partitionedHeadFlag(graph.NumVertices(), false); - vertex_idx_t maxCorseningNum - = graph.num_vertices() - - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + VertexIdxT maxCorseningNum + = graph.NumVertices() - static_cast>(static_cast(graph.NumVertices()) * params_.geomDecay_); - vertex_idx_t counter = 0; + VertexIdxT counter = 0; long minSave = std::numeric_limits::lowest(); for (auto prioIter = vertPriority.begin(); prioIter != vertPriority.end(); prioIter++) { const long &vertSave = prioIter->first; @@ -845,7 +827,7 @@ vertex_idx_t Sarkar::someChildrenContractio continue; } - for (const VertexType &chld : graph.children(groupHead)) { + for (const VertexType &chld : graph.Children(groupHead)) { if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), chld) == contractionEnsemble.cend()) && (vertexPoset[chld] == vertexPoset[groupHead] + 1)) { if ((partitionedFlag[chld]) && (!partitionedHeadFlag[chld])) { @@ -860,7 +842,7 @@ vertex_idx_t Sarkar::someChildrenContractio // Adding to partition expansionMapOutput.emplace_back(contractionEnsemble); - counter += static_cast>(contractionEnsemble.size()) - 1; + counter += static_cast>(contractionEnsemble.size()) - 1; if (counter > maxCorseningNum) { minSave = vertSave; } @@ -870,7 +852,7 @@ vertex_idx_t Sarkar::someChildrenContractio } } - for (const VertexType &vert : graph.vertices()) { + for (const VertexType &vert : graph.Vertices()) { if (partitionedFlag[vert]) { continue; } @@ -880,34 +862,32 @@ vertex_idx_t Sarkar::someChildrenContractio return counter; } -template -vertex_idx_t Sarkar::someParentsContraction( - v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const { - using VertexType = vertex_idx_t; +template +VertexIdxT Sarkar::SomeParentsContraction( + VWorkwT commCost, const GraphTIn &graph, std::vector>> &expansionMapOutput) const { + using VertexType = VertexIdxT; assert(expansionMapOutput.size() == 0); - const std::vector> vertexPoset = getBotPosetMap(graph); - const std::vector> topDist = getTopDistance(commCost, graph); - const std::vector> botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset = GetBotPosetMap(graph); + const std::vector> topDist = GetTopDistance(commCost, graph); + const std::vector> botDist = GetBotDistance(commCost, graph); auto cmp = [](const std::pair> &lhs, const std::pair> &rhs) { return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set>, decltype(cmp)> vertPriority(cmp); - for (const VertexType &groupFoot : graph.vertices()) { - if (graph.in_degree(groupFoot) < 2) { + for (const VertexType &groupFoot : graph.Vertices()) { + if (graph.InDegree(groupFoot) < 2) { continue; } - auto cmp_par = [&topDist, &botDist](const VertexType &lhs, const VertexType &rhs) { + auto cmpPar = [&topDist, &botDist](const VertexType &lhs, const VertexType &rhs) { return (botDist[lhs] < botDist[rhs]) || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] > topDist[rhs])) || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] == topDist[rhs]) && (lhs < rhs)); }; - std::set parentsPriority(cmp_par); - for (const VertexType &par : graph.parents(groupFoot)) { + std::set parentsPriority(cmpPar); + for (const VertexType &par : graph.Parents(groupFoot)) { if (vertexPoset[par] + 1 == vertexPoset[groupFoot]) { parentsPriority.emplace(par); } @@ -916,62 +896,62 @@ vertex_idx_t Sarkar::someParentsContraction continue; } - std::vector::const_iterator, - typename std::set::const_iterator>> - admissble_parent_groups; - for (auto par_iter_start = parentsPriority.cbegin(); par_iter_start != parentsPriority.cend();) { - if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(groupFoot) != graph.vertex_type(*par_iter_start)) { - ++par_iter_start; + std::vector::const_iterator, + typename std::set::const_iterator>> + admissbleParentGroups; + for (auto parIterStart = parentsPriority.cbegin(); parIterStart != parentsPriority.cend();) { + if constexpr (hasTypedVerticesV) { + if (graph.VertexType(groupFoot) != graph.VertexType(*parIterStart)) { + ++parIterStart; continue; } } - const v_workw_t t_dist = topDist[*par_iter_start]; - const v_workw_t b_dist = botDist[*par_iter_start]; - auto par_iter_end = par_iter_start; - while (par_iter_end != parentsPriority.cend() && t_dist == topDist[*par_iter_end] && b_dist == botDist[*par_iter_end]) { - if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(groupFoot) != graph.vertex_type(*par_iter_end)) { + const VWorkwT tDist = topDist[*parIterStart]; + const VWorkwT bDist = botDist[*parIterStart]; + auto parIterEnd = parIterStart; + while (parIterEnd != parentsPriority.cend() && tDist == topDist[*parIterEnd] && bDist == botDist[*parIterEnd]) { + if constexpr (hasTypedVerticesV) { + if (graph.VertexType(groupFoot) != graph.VertexType(*parIterEnd)) { break; } } - ++par_iter_end; + ++parIterEnd; } - admissble_parent_groups.emplace_back(par_iter_start, par_iter_end); - par_iter_start = par_iter_end; + admissbleParentGroups.emplace_back(parIterStart, parIterEnd); + parIterStart = parIterEnd; } std::vector contractionEnsemble; std::set contractionParentsSet; - contractionEnsemble.reserve(1 + graph.in_degree(groupFoot)); + contractionEnsemble.reserve(1 + graph.InDegree(groupFoot)); contractionEnsemble.emplace_back(groupFoot); - v_workw_t added_weight = graph.vertex_work_weight(groupFoot); + VWorkwT addedWeight = graph.VertexWorkWeight(groupFoot); - for (std::size_t i = 0U; i < admissble_parent_groups.size(); ++i) { - const auto &first = admissble_parent_groups[i].first; - const auto &last = admissble_parent_groups[i].second; + for (std::size_t i = 0U; i < admissbleParentGroups.size(); ++i) { + const auto &first = admissbleParentGroups[i].first; + const auto &last = admissbleParentGroups[i].second; for (auto it = first; it != last; ++it) { contractionEnsemble.emplace_back(*it); contractionParentsSet.emplace(*it); - added_weight += graph.vertex_work_weight(*it); + addedWeight += graph.VertexWorkWeight(*it); } - if (added_weight > params.maxWeight) { + if (addedWeight > params_.maxWeight_) { break; } - v_workw_t maxPath = 0; + VWorkwT maxPath = 0; for (const VertexType &vert : contractionEnsemble) { - maxPath = std::max(maxPath, topDist[vert] + botDist[vert] - graph.vertex_work_weight(vert)); + maxPath = std::max(maxPath, topDist[vert] + botDist[vert] - graph.VertexWorkWeight(vert)); } - v_workw_t maxParentDist = 0; - v_workw_t maxChildDist = 0; + VWorkwT maxParentDist = 0; + VWorkwT maxChildDist = 0; for (const VertexType &vert : contractionEnsemble) { - for (const VertexType &chld : graph.children(vert)) { + for (const VertexType &chld : graph.Children(vert)) { if (chld == groupFoot) { continue; } @@ -979,38 +959,37 @@ vertex_idx_t Sarkar::someParentsContraction } } - for (const VertexType &par : graph.parents(groupFoot)) { + for (const VertexType &par : graph.Parents(groupFoot)) { if (contractionParentsSet.find(par) == contractionParentsSet.end()) { maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } } for (std::size_t j = 1; j < contractionEnsemble.size(); j++) { - for (const VertexType &par : graph.parents(contractionEnsemble[j])) { + for (const VertexType &par : graph.Parents(contractionEnsemble[j])) { maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } } - v_workw_t newMaxPath = maxParentDist + maxChildDist; + VWorkwT newMaxPath = maxParentDist + maxChildDist; for (const VertexType &vert : contractionEnsemble) { - newMaxPath += graph.vertex_work_weight(vert); + newMaxPath += graph.VertexWorkWeight(vert); } long savings = static_cast(maxPath) - static_cast(newMaxPath); - if (savings + static_cast(params.leniency * static_cast(maxPath)) >= 0) { + if (savings + static_cast(params_.leniency_ * static_cast(maxPath)) >= 0) { vertPriority.emplace(savings, contractionEnsemble); } } } - std::vector partitionedFlag(graph.num_vertices(), false); - std::vector partitionedFootFlag(graph.num_vertices(), false); + std::vector partitionedFlag(graph.NumVertices(), false); + std::vector partitionedFootFlag(graph.NumVertices(), false); - vertex_idx_t maxCorseningNum - = graph.num_vertices() - - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + VertexIdxT maxCorseningNum + = graph.NumVertices() - static_cast>(static_cast(graph.NumVertices()) * params_.geomDecay_); - vertex_idx_t counter = 0; + VertexIdxT counter = 0; long minSave = std::numeric_limits::lowest(); for (auto prioIter = vertPriority.begin(); prioIter != vertPriority.end(); prioIter++) { const long &vertSave = prioIter->first; @@ -1034,7 +1013,7 @@ vertex_idx_t Sarkar::someParentsContraction continue; } - for (const VertexType &par : graph.parents(groupFoot)) { + for (const VertexType &par : graph.Parents(groupFoot)) { if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), par) == contractionEnsemble.cend()) && (vertexPoset[par] + 1 == vertexPoset[groupFoot])) { if ((partitionedFlag[par]) && (!partitionedFootFlag[par])) { @@ -1049,7 +1028,7 @@ vertex_idx_t Sarkar::someParentsContraction // Adding to partition expansionMapOutput.emplace_back(contractionEnsemble); - counter += static_cast>(contractionEnsemble.size()) - 1; + counter += static_cast>(contractionEnsemble.size()) - 1; if (counter > maxCorseningNum) { minSave = vertSave; } @@ -1059,7 +1038,7 @@ vertex_idx_t Sarkar::someParentsContraction } } - for (const VertexType &vert : graph.vertices()) { + for (const VertexType &vert : graph.Vertices()) { if (partitionedFlag[vert]) { continue; } @@ -1069,83 +1048,81 @@ vertex_idx_t Sarkar::someParentsContraction return counter; } -template -vertex_idx_t Sarkar::levelContraction( - v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const { - using VertexType = vertex_idx_t; +template +VertexIdxT Sarkar::LevelContraction( + VWorkwT commCost, const GraphTIn &graph, std::vector>> &expansionMapOutput) const { + using VertexType = VertexIdxT; assert(expansionMapOutput.size() == 0); - const std::vector> vertexPoset - = params.useTopPoset ? get_top_node_distance>(graph) : getBotPosetMap(graph); - const std::vector> topDist = getTopDistance(commCost, graph); - const std::vector> botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset + = params_.useTopPoset_ ? GetTopNodeDistance>(graph) : GetBotPosetMap(graph); + const std::vector> topDist = GetTopDistance(commCost, graph); + const std::vector> botDist = GetBotDistance(commCost, graph); auto cmp = [](const std::pair> &lhs, const std::pair> &rhs) { return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set>, decltype(cmp)> vertPriority(cmp); - const vertex_idx_t minLevel = *std::min_element(vertexPoset.cbegin(), vertexPoset.cend()); - const vertex_idx_t maxLevel = *std::max_element(vertexPoset.cbegin(), vertexPoset.cend()); + const VertexIdxT minLevel = *std::min_element(vertexPoset.cbegin(), vertexPoset.cend()); + const VertexIdxT maxLevel = *std::max_element(vertexPoset.cbegin(), vertexPoset.cend()); - const vertex_idx_t parity = params.mode == SarkarParams::Mode::LEVEL_EVEN ? 0 : 1; + const VertexIdxT parity = params_.mode_ == sarkar_params::Mode::LEVEL_EVEN ? 0 : 1; - std::vector>> levels(maxLevel - minLevel + 1); - for (const VertexType &vert : graph.vertices()) { + std::vector>> levels(maxLevel - minLevel + 1); + for (const VertexType &vert : graph.Vertices()) { levels[vertexPoset[vert] - minLevel].emplace_back(vert); } - for (vertex_idx_t headLevel = minLevel + parity; headLevel < maxLevel; headLevel += 2) { - const vertex_idx_t footLevel = headLevel + 1; + for (VertexIdxT headLevel = minLevel + parity; headLevel < maxLevel; headLevel += 2) { + const VertexIdxT footLevel = headLevel + 1; - const std::vector> &headVertices = levels[headLevel - minLevel]; - const std::vector> &footVertices = levels[footLevel - minLevel]; + const std::vector> &headVertices = levels[headLevel - minLevel]; + const std::vector> &footVertices = levels[footLevel - minLevel]; - Union_Find_Universe, v_memw_t> uf; + UnionFindUniverse, VMemwT> uf; for (const VertexType &vert : headVertices) { - uf.add_object(vert, graph.vertex_work_weight(vert)); + uf.AddObject(vert, graph.VertexWorkWeight(vert)); } for (const VertexType &vert : footVertices) { - uf.add_object(vert, graph.vertex_work_weight(vert)); + uf.AddObject(vert, graph.VertexWorkWeight(vert)); } for (const VertexType &srcVert : headVertices) { - for (const VertexType &tgtVert : graph.children(srcVert)) { + for (const VertexType &tgtVert : graph.Children(srcVert)) { if (vertexPoset[tgtVert] != footLevel) { continue; } - if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(srcVert) != graph.vertex_type(tgtVert)) { + if constexpr (hasTypedVerticesV) { + if (graph.VertexType(srcVert) != graph.VertexType(tgtVert)) { continue; } } - uf.join_by_name(srcVert, tgtVert); + uf.JoinByName(srcVert, tgtVert); } } - std::vector> components = uf.get_connected_components(); + std::vector> components = uf.GetConnectedComponents(); for (std::vector &comp : components) { if (comp.size() < 2) { continue; } - if (uf.get_weight_of_component_by_name(comp.at(0)) > params.maxWeight) { + if (uf.GetWeightOfComponentByName(comp.at(0)) > params_.maxWeight_) { continue; } std::sort(comp.begin(), comp.end()); - v_workw_t maxPath = std::numeric_limits>::lowest(); + VWorkwT maxPath = std::numeric_limits>::lowest(); for (const VertexType &vert : comp) { - maxPath = std::max(maxPath, topDist[vert] + botDist[vert] - graph.vertex_work_weight(vert)); + maxPath = std::max(maxPath, topDist[vert] + botDist[vert] - graph.VertexWorkWeight(vert)); } - v_workw_t maxParentDist = 0; + VWorkwT maxParentDist = 0; for (const VertexType &vert : comp) { - for (const VertexType &par : graph.parents(vert)) { + for (const VertexType &par : graph.Parents(vert)) { if (std::binary_search(comp.cbegin(), comp.cend(), par)) { continue; } @@ -1154,9 +1131,9 @@ vertex_idx_t Sarkar::levelContraction( } } - v_workw_t maxChildDist = 0; + VWorkwT maxChildDist = 0; for (const VertexType &vert : comp) { - for (const VertexType &chld : graph.children(vert)) { + for (const VertexType &chld : graph.Children(vert)) { if (std::binary_search(comp.cbegin(), comp.cend(), chld)) { continue; } @@ -1165,26 +1142,25 @@ vertex_idx_t Sarkar::levelContraction( } } - v_workw_t newMaxPath = maxParentDist + maxChildDist; + VWorkwT newMaxPath = maxParentDist + maxChildDist; for (const VertexType &vert : comp) { - newMaxPath += graph.vertex_work_weight(vert); + newMaxPath += graph.VertexWorkWeight(vert); } long savings = static_cast(maxPath) - static_cast(newMaxPath); - if (savings + static_cast(params.leniency * static_cast(maxPath)) >= 0) { + if (savings + static_cast(params_.leniency_ * static_cast(maxPath)) >= 0) { vertPriority.emplace(savings, comp); } } } - std::vector partitionedFlag(graph.num_vertices(), false); + std::vector partitionedFlag(graph.NumVertices(), false); - vertex_idx_t maxCorseningNum - = graph.num_vertices() - - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + VertexIdxT maxCorseningNum + = graph.NumVertices() - static_cast>(static_cast(graph.NumVertices()) * params_.geomDecay_); - vertex_idx_t counter = 0; + VertexIdxT counter = 0; long minSave = std::numeric_limits::lowest(); for (auto prioIter = vertPriority.cbegin(); prioIter != vertPriority.cend(); prioIter++) { const long &compSave = prioIter->first; @@ -1200,13 +1176,13 @@ vertex_idx_t Sarkar::levelContraction( bool shouldSkipFoot = false; for (const VertexType &vert : comp) { if (((vertexPoset[vert] - minLevel - parity) % 2) == 0) { // head vertex - for (const VertexType &chld : graph.children(vert)) { + for (const VertexType &chld : graph.Children(vert)) { if ((vertexPoset[chld] == vertexPoset[vert] + 1) && partitionedFlag[chld]) { shouldSkipHead = true; } } } else { // foot vertex - for (const VertexType &par : graph.parents(vert)) { + for (const VertexType &par : graph.Parents(vert)) { if ((vertexPoset[par] + 1 == vertexPoset[vert]) && partitionedFlag[par]) { shouldSkipFoot = true; } @@ -1220,7 +1196,7 @@ vertex_idx_t Sarkar::levelContraction( // Adding to partition expansionMapOutput.emplace_back(comp); - counter += static_cast>(comp.size() - 1); + counter += static_cast>(comp.size() - 1); if (counter > maxCorseningNum) { minSave = compSave; } @@ -1230,8 +1206,8 @@ vertex_idx_t Sarkar::levelContraction( } } - expansionMapOutput.reserve(graph.num_vertices() - counter); - for (const VertexType &vert : graph.vertices()) { + expansionMapOutput.reserve(graph.NumVertices() - counter); + for (const VertexType &vert : graph.Vertices()) { if (partitionedFlag[vert]) { continue; } @@ -1242,35 +1218,35 @@ vertex_idx_t Sarkar::levelContraction( return counter; } -template -std::vector Sarkar::computeNodeHashes(const Graph_t_in &graph, - const std::vector> &vertexPoset, - const std::vector> &dist) const { - using VertexType = vertex_idx_t; +template +std::vector Sarkar::ComputeNodeHashes(const GraphTIn &graph, + const std::vector> &vertexPoset, + const std::vector> &dist) const { + using VertexType = VertexIdxT; - std::vector hashes(graph.num_vertices()); - for (const VertexType &vert : graph.vertices()) { + std::vector hashes(graph.NumVertices()); + for (const VertexType &vert : graph.Vertices()) { std::size_t &hash = hashes[vert]; - hash = std::hash>{}(graph.vertex_work_weight(vert)); - hash_combine(hash, vertexPoset[vert]); - hash_combine(hash, dist[vert]); - if constexpr (has_typed_vertices_v) { - hash_combine(hash, graph.vertex_type(vert)); + hash = std::hash>{}(graph.VertexWorkWeight(vert)); + HashCombine(hash, vertexPoset[vert]); + HashCombine(hash, dist[vert]); + if constexpr (hasTypedVerticesV) { + HashCombine(hash, graph.VertexType(vert)); } } return hashes; } -template -std::vector Sarkar::homogeneousMerge(const std::size_t number, - const std::size_t minSize, - const std::size_t maxSize) const { +template +std::vector Sarkar::HomogeneousMerge(const std::size_t number, + const std::size_t minSize, + const std::size_t maxSize) const { assert(minSize <= maxSize); assert(number > 0); std::size_t bestDiv = 1U; - for (std::size_t div : divisorsList(number)) { + for (std::size_t div : DivisorsList(number)) { if (div > maxSize) { continue; } @@ -1294,7 +1270,7 @@ std::vector Sarkar::homogeneousMerge(const return std::vector(bins, number / bins); } - std::size_t score = std::min(divisorsList(number / bins).size(), divisorsList((number / bins) + 1).size()); + std::size_t score = std::min(DivisorsList(number / bins).size(), DivisorsList((number / bins) + 1).size()); if (score >= bestScore) { bestScore = score; bestBins = bins; @@ -1317,68 +1293,66 @@ std::vector Sarkar::homogeneousMerge(const return groups; } -template -vertex_idx_t Sarkar::homogeneous_buffer_merge( - v_workw_t commCost, - const Graph_t_in &graph, - std::vector>> &expansionMapOutput) const { - using VertexType = vertex_idx_t; +template +VertexIdxT Sarkar::HomogeneousBufferMerge( + VWorkwT commCost, const GraphTIn &graph, std::vector>> &expansionMapOutput) const { + using VertexType = VertexIdxT; assert(expansionMapOutput.size() == 0); - const std::vector> vertexTopPoset = get_top_node_distance>(graph); - const std::vector> vertexBotPoset = getBotPosetMap(graph); - const std::vector> topDist = getTopDistance(commCost, graph); - const std::vector> botDist = getBotDistance(commCost, graph); + const std::vector> vertexTopPoset = GetTopNodeDistance>(graph); + const std::vector> vertexBotPoset = GetBotPosetMap(graph); + const std::vector> topDist = GetTopDistance(commCost, graph); + const std::vector> botDist = GetBotDistance(commCost, graph); - std::vector hashValuesCombined(graph.num_vertices(), 1729U); + std::vector hashValuesCombined(graph.NumVertices(), 1729U); - if (params.mode == SarkarParams::Mode::FAN_OUT_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { - const std::vector hashValues = computeNodeHashes(graph, vertexTopPoset, topDist); + if (params_.mode_ == sarkar_params::Mode::FAN_OUT_BUFFER || params_.mode_ == sarkar_params::Mode::HOMOGENEOUS_BUFFER) { + const std::vector hashValues = ComputeNodeHashes(graph, vertexTopPoset, topDist); std::vector hashValuesWithParents = hashValues; - for (const VertexType &par : graph.vertices()) { - for (const VertexType &chld : graph.children(par)) { - hash_combine(hashValuesWithParents[chld], hashValues[par]); + for (const VertexType &par : graph.Vertices()) { + for (const VertexType &chld : graph.Children(par)) { + HashCombine(hashValuesWithParents[chld], hashValues[par]); } } - for (const VertexType &vert : graph.vertices()) { - hash_combine(hashValuesCombined[vert], hashValuesWithParents[vert]); + for (const VertexType &vert : graph.Vertices()) { + HashCombine(hashValuesCombined[vert], hashValuesWithParents[vert]); } } - if (params.mode == SarkarParams::Mode::FAN_IN_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { - const std::vector hashValues = computeNodeHashes(graph, vertexBotPoset, botDist); + if (params_.mode_ == sarkar_params::Mode::FAN_IN_BUFFER || params_.mode_ == sarkar_params::Mode::HOMOGENEOUS_BUFFER) { + const std::vector hashValues = ComputeNodeHashes(graph, vertexBotPoset, botDist); std::vector hashValuesWithChildren = hashValues; - for (const VertexType &chld : graph.vertices()) { - for (const VertexType &par : graph.parents(chld)) { - hash_combine(hashValuesWithChildren[par], hashValues[chld]); + for (const VertexType &chld : graph.Vertices()) { + for (const VertexType &par : graph.Parents(chld)) { + HashCombine(hashValuesWithChildren[par], hashValues[chld]); } } - for (const VertexType &vert : graph.vertices()) { - hash_combine(hashValuesCombined[vert], hashValuesWithChildren[vert]); + for (const VertexType &vert : graph.Vertices()) { + HashCombine(hashValuesCombined[vert], hashValuesWithChildren[vert]); } } std::unordered_map> orbits; - for (const VertexType &vert : graph.vertices()) { - if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) { + for (const VertexType &vert : graph.Vertices()) { + if (graph.VertexWorkWeight(vert) > params_.smallWeightThreshold_) { continue; } const std::size_t hash = hashValuesCombined[vert]; - auto found_iter = orbits.find(hash); - if (found_iter == orbits.end()) { + auto foundIter = orbits.find(hash); + if (foundIter == orbits.end()) { orbits.emplace(std::piecewise_construct, std::forward_as_tuple(hash), - std::forward_as_tuple(std::initializer_list>{vert})); + std::forward_as_tuple(std::initializer_list>{vert})); } else { - found_iter->second.emplace(vert); + foundIter->second.emplace(vert); } } - vertex_idx_t counter = 0; - std::vector partitionedFlag(graph.num_vertices(), false); + VertexIdxT counter = 0; + std::vector partitionedFlag(graph.NumVertices(), false); - for (const VertexType &vert : graph.vertices()) { - if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) { + for (const VertexType &vert : graph.Vertices()) { + if (graph.VertexWorkWeight(vert) > params_.smallWeightThreshold_) { continue; } if (partitionedFlag[vert]) { @@ -1391,15 +1365,15 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer } std::set parents; - if (params.mode == SarkarParams::Mode::FAN_OUT_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { - for (const VertexType &par : graph.parents(vert)) { + if (params_.mode_ == sarkar_params::Mode::FAN_OUT_BUFFER || params_.mode_ == sarkar_params::Mode::HOMOGENEOUS_BUFFER) { + for (const VertexType &par : graph.Parents(vert)) { parents.emplace(par); } } std::set children; - if (params.mode == SarkarParams::Mode::FAN_IN_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { - for (const VertexType &chld : graph.children(vert)) { + if (params_.mode_ == sarkar_params::Mode::FAN_IN_BUFFER || params_.mode_ == sarkar_params::Mode::HOMOGENEOUS_BUFFER) { + for (const VertexType &chld : graph.Children(vert)) { children.emplace(chld); } } @@ -1412,7 +1386,7 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer if (vertexBotPoset[vertCandidate] != vertexBotPoset[vert]) { continue; } - if (graph.vertex_work_weight(vertCandidate) != graph.vertex_work_weight(vert)) { + if (graph.VertexWorkWeight(vertCandidate) != graph.VertexWorkWeight(vert)) { continue; } if (topDist[vertCandidate] != topDist[vert]) { @@ -1421,15 +1395,15 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer if (botDist[vertCandidate] != botDist[vert]) { continue; } - if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(vertCandidate) != graph.vertex_type(vert)) { + if constexpr (hasTypedVerticesV) { + if (graph.VertexType(vertCandidate) != graph.VertexType(vert)) { continue; } } - if (params.mode == SarkarParams::Mode::FAN_OUT_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { + if (params_.mode_ == sarkar_params::Mode::FAN_OUT_BUFFER || params_.mode_ == sarkar_params::Mode::HOMOGENEOUS_BUFFER) { std::set candidateParents; - for (const VertexType &par : graph.parents(vertCandidate)) { + for (const VertexType &par : graph.Parents(vertCandidate)) { candidateParents.emplace(par); } if (candidateParents != parents) { @@ -1437,9 +1411,9 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer } } - if (params.mode == SarkarParams::Mode::FAN_IN_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { + if (params_.mode_ == sarkar_params::Mode::FAN_IN_BUFFER || params_.mode_ == sarkar_params::Mode::HOMOGENEOUS_BUFFER) { std::set candidateChildren; - for (const VertexType &chld : graph.children(vertCandidate)) { + for (const VertexType &chld : graph.Children(vertCandidate)) { candidateChildren.emplace(chld); } if (candidateChildren != children) { @@ -1453,18 +1427,18 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer continue; } - const v_workw_t desiredVerticesInGroup = graph.vertex_work_weight(vert) == 0 - ? std::numeric_limits>::lowest() - : params.smallWeightThreshold / graph.vertex_work_weight(vert); - const v_workw_t maxVerticesInGroup = graph.vertex_work_weight(vert) == 0 - ? std::numeric_limits>::max() - : params.maxWeight / graph.vertex_work_weight(vert); + const VWorkwT desiredVerticesInGroup = graph.VertexWorkWeight(vert) == 0 + ? std::numeric_limits>::lowest() + : params_.smallWeightThreshold_ / graph.VertexWorkWeight(vert); + const VWorkwT maxVerticesInGroup = graph.VertexWorkWeight(vert) == 0 + ? std::numeric_limits>::max() + : params_.maxWeight_ / graph.VertexWorkWeight(vert); const std::size_t minDesiredSize = desiredVerticesInGroup < 2 ? 2U : static_cast(desiredVerticesInGroup); const std::size_t maxDesiredSize = std::max(minDesiredSize, std::min(minDesiredSize * 2U, static_cast(maxVerticesInGroup))); - std::vector groups = homogeneousMerge(secureOrb.size(), minDesiredSize, maxDesiredSize); + std::vector groups = HomogeneousMerge(secureOrb.size(), minDesiredSize, maxDesiredSize); auto secureOrbIter = secureOrb.begin(); for (std::size_t groupSize : groups) { @@ -1482,7 +1456,7 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer } } - for (const VertexType &vert : graph.vertices()) { + for (const VertexType &vert : graph.Vertices()) { if (partitionedFlag[vert]) { continue; } diff --git a/include/osp/coarser/Sarkar/SarkarMul.hpp b/include/osp/coarser/Sarkar/SarkarMul.hpp index a89bd1bf..86793b99 100644 --- a/include/osp/coarser/Sarkar/SarkarMul.hpp +++ b/include/osp/coarser/Sarkar/SarkarMul.hpp @@ -24,280 +24,280 @@ limitations under the License. namespace osp { -namespace SarkarParams { +namespace sarkar_params { enum class BufferMergeMode { OFF, FAN_IN, FAN_OUT, HOMOGENEOUS, FULL }; -template +template struct MulParameters { - std::size_t seed{42U}; - double geomDecay{0.875}; - double leniency{0.0}; - std::vector commCostVec{std::initializer_list{}}; - commCostType maxWeight{std::numeric_limits::max()}; - commCostType smallWeightThreshold{std::numeric_limits::lowest()}; - unsigned max_num_iteration_without_changes{3U}; - BufferMergeMode buffer_merge_mode{BufferMergeMode::OFF}; + std::size_t seed_{42U}; + double geomDecay_{0.875}; + double leniency_{0.0}; + std::vector commCostVec_{std::initializer_list{}}; + CommCostType maxWeight_{std::numeric_limits::max()}; + CommCostType smallWeightThreshold_{std::numeric_limits::lowest()}; + unsigned maxNumIterationWithoutChanges_{3U}; + BufferMergeMode bufferMergeMode_{BufferMergeMode::OFF}; }; -} // end namespace SarkarParams +} // namespace sarkar_params -template -class SarkarMul : public MultilevelCoarser { +template +class SarkarMul : public MultilevelCoarser { private: - bool first_coarsen{true}; - Thue_Morse_Sequence thue_coin{42U}; - Biased_Random balanced_random{42U}; + bool firstCoarsen_{true}; + ThueMorseSequence thueCoin_{42U}; + BiasedRandom balancedRandom_{42U}; // Multilevel coarser parameters - SarkarParams::MulParameters> ml_params; + sarkar_params::MulParameters> mlParams_; // Coarser parameters - SarkarParams::Parameters> params; + sarkar_params::Parameters> params_; // Initial coarser - Sarkar coarser_initial; + Sarkar coarserInitial_; // Subsequent coarser - Sarkar coarser_secondary; + Sarkar coarserSecondary_; - void setSeed(); - void initParams(); - void updateParams(); + void SetSeed(); + void InitParams(); + void UpdateParams(); - RETURN_STATUS run_single_contraction_mode(vertex_idx_t &diff_vertices); - RETURN_STATUS run_buffer_merges(); - RETURN_STATUS run_contractions(v_workw_t commCost); - RETURN_STATUS run_contractions() override; + ReturnStatus RunSingleContractionMode(VertexIdxT &diffVertices); + ReturnStatus RunBufferMerges(); + ReturnStatus RunContractions(VWorkwT commCost); + ReturnStatus RunContractions() override; public: - void setParameters(SarkarParams::MulParameters> ml_params_) { - ml_params = std::move(ml_params_); - setSeed(); - initParams(); + void SetParameters(sarkar_params::MulParameters> mlParams) { + mlParams_ = std::move(mlParams); + SetSeed(); + InitParams(); }; - std::string getCoarserName() const { return "Sarkar"; }; + std::string GetCoarserName() const override { return "Sarkar"; }; }; -template -void SarkarMul::setSeed() { +template +void SarkarMul::SetSeed() { constexpr std::size_t seedReduction = 4096U; - thue_coin = Thue_Morse_Sequence(ml_params.seed % seedReduction); - balanced_random = Biased_Random(ml_params.seed); + thueCoin_ = ThueMorseSequence(mlParams_.seed_ % seedReduction); + balancedRandom_ = BiasedRandom(mlParams_.seed_); } -template -void SarkarMul::initParams() { - first_coarsen = true; +template +void SarkarMul::InitParams() { + firstCoarsen_ = true; - params.geomDecay = ml_params.geomDecay; - params.leniency = ml_params.leniency; - params.maxWeight = ml_params.maxWeight; - params.smallWeightThreshold = ml_params.smallWeightThreshold; + params_.geomDecay_ = mlParams_.geomDecay_; + params_.leniency_ = mlParams_.leniency_; + params_.maxWeight_ = mlParams_.maxWeight_; + params_.smallWeightThreshold_ = mlParams_.smallWeightThreshold_; - if (ml_params.commCostVec.empty()) { - v_workw_t syncCosts = 128; - syncCosts = std::max(syncCosts, static_cast>(1)); + if (mlParams_.commCostVec_.empty()) { + VWorkwT syncCosts = 128; + syncCosts = std::max(syncCosts, static_cast>(1)); - while (syncCosts >= static_cast>(1)) { - ml_params.commCostVec.emplace_back(syncCosts); + while (syncCosts >= static_cast>(1)) { + mlParams_.commCostVec_.emplace_back(syncCosts); syncCosts /= 2; } } - std::sort(ml_params.commCostVec.begin(), ml_params.commCostVec.end()); + std::sort(mlParams_.commCostVec_.begin(), mlParams_.commCostVec_.end()); - updateParams(); + UpdateParams(); } -template -void SarkarMul::updateParams() { - coarser_initial.setParameters(params); - coarser_secondary.setParameters(params); +template +void SarkarMul::UpdateParams() { + coarserInitial_.SetParameters(params_); + coarserSecondary_.SetParameters(params_); } -template -RETURN_STATUS SarkarMul::run_single_contraction_mode(vertex_idx_t &diff_vertices) { - RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; +template +ReturnStatus SarkarMul::RunSingleContractionMode(VertexIdxT &diffVertices) { + ReturnStatus status = ReturnStatus::OSP_SUCCESS; - vertex_idx_t current_num_vertices; - if (first_coarsen) { - current_num_vertices = MultilevelCoarser::getOriginalGraph()->num_vertices(); + VertexIdxT currentNumVertices; + if (firstCoarsen_) { + currentNumVertices = MultilevelCoarser::GetOriginalGraph()->NumVertices(); } else { - current_num_vertices = MultilevelCoarser::dag_history.back()->num_vertices(); + currentNumVertices = MultilevelCoarser::dagHistory_.back()->NumVertices(); } - Graph_t_coarse coarsened_dag; - std::vector> contraction_map; - bool coarsen_success; + GraphTCoarse coarsenedDag; + std::vector> contractionMap; + bool coarsenSuccess; - if (first_coarsen) { - coarsen_success = coarser_initial.coarsenDag( - *(MultilevelCoarser::getOriginalGraph()), coarsened_dag, contraction_map); - first_coarsen = false; + if (firstCoarsen_) { + coarsenSuccess = coarserInitial_.CoarsenDag( + *(MultilevelCoarser::GetOriginalGraph()), coarsenedDag, contractionMap); + firstCoarsen_ = false; } else { - coarsen_success = coarser_secondary.coarsenDag( - *(MultilevelCoarser::dag_history.back()), coarsened_dag, contraction_map); + coarsenSuccess = coarserSecondary_.CoarsenDag( + *(MultilevelCoarser::dagHistory_.back()), coarsenedDag, contractionMap); } - if (!coarsen_success) { - status = RETURN_STATUS::ERROR; + if (!coarsenSuccess) { + status = ReturnStatus::ERROR; } status = std::max( - status, MultilevelCoarser::add_contraction(std::move(contraction_map), std::move(coarsened_dag))); + status, MultilevelCoarser::AddContraction(std::move(contractionMap), std::move(coarsenedDag))); - vertex_idx_t new_num_vertices = MultilevelCoarser::dag_history.back()->num_vertices(); - diff_vertices = current_num_vertices - new_num_vertices; + VertexIdxT newNumVertices = MultilevelCoarser::dagHistory_.back()->NumVertices(); + diffVertices = currentNumVertices - newNumVertices; return status; } -template -RETURN_STATUS SarkarMul::run_contractions(v_workw_t commCost) { - RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; - vertex_idx_t diff = 0; +template +ReturnStatus SarkarMul::RunContractions(VWorkwT commCost) { + ReturnStatus status = ReturnStatus::OSP_SUCCESS; + VertexIdxT diff = 0; - params.commCost = commCost; - updateParams(); + params_.commCost_ = commCost; + UpdateParams(); - unsigned outer_no_change = 0; - while (outer_no_change < ml_params.max_num_iteration_without_changes) { - unsigned inner_no_change = 0; - bool outer_change = false; + unsigned outerNoChange = 0; + while (outerNoChange < mlParams_.maxNumIterationWithoutChanges_) { + unsigned innerNoChange = 0; + bool outerChange = false; // Lines - while (inner_no_change < ml_params.max_num_iteration_without_changes) { - params.mode = SarkarParams::Mode::LINES; - params.useTopPoset = thue_coin.get_flip(); - updateParams(); + while (innerNoChange < mlParams_.maxNumIterationWithoutChanges_) { + params_.mode_ = sarkar_params::Mode::LINES; + params_.useTopPoset_ = thueCoin_.GetFlip(); + UpdateParams(); - status = std::max(status, run_single_contraction_mode(diff)); + status = std::max(status, RunSingleContractionMode(diff)); if (diff > 0) { - outer_change = true; - inner_no_change = 0; + outerChange = true; + innerNoChange = 0; } else { - inner_no_change++; + innerNoChange++; } } - inner_no_change = 0; + innerNoChange = 0; // Partial Fans - while (inner_no_change < ml_params.max_num_iteration_without_changes) { - params.mode = thue_coin.get_flip() ? SarkarParams::Mode::FAN_IN_PARTIAL : SarkarParams::Mode::FAN_OUT_PARTIAL; - updateParams(); + while (innerNoChange < mlParams_.maxNumIterationWithoutChanges_) { + params_.mode_ = thueCoin_.GetFlip() ? sarkar_params::Mode::FAN_IN_PARTIAL : sarkar_params::Mode::FAN_OUT_PARTIAL; + UpdateParams(); - status = std::max(status, run_single_contraction_mode(diff)); + status = std::max(status, RunSingleContractionMode(diff)); if (diff > 0) { - outer_change = true; - inner_no_change = 0; + outerChange = true; + innerNoChange = 0; } else { - inner_no_change++; + innerNoChange++; } } - inner_no_change = 0; + innerNoChange = 0; // Full Fans - while (inner_no_change < ml_params.max_num_iteration_without_changes) { - params.mode = thue_coin.get_flip() ? SarkarParams::Mode::FAN_IN_FULL : SarkarParams::Mode::FAN_OUT_FULL; - updateParams(); + while (innerNoChange < mlParams_.maxNumIterationWithoutChanges_) { + params_.mode_ = thueCoin_.GetFlip() ? sarkar_params::Mode::FAN_IN_FULL : sarkar_params::Mode::FAN_OUT_FULL; + UpdateParams(); - status = std::max(status, run_single_contraction_mode(diff)); + status = std::max(status, RunSingleContractionMode(diff)); if (diff > 0) { - outer_change = true; - inner_no_change = 0; + outerChange = true; + innerNoChange = 0; } else { - inner_no_change++; + innerNoChange++; } } - inner_no_change = 0; + innerNoChange = 0; // Levels - while (inner_no_change < ml_params.max_num_iteration_without_changes) { - params.mode = thue_coin.get_flip() ? SarkarParams::Mode::LEVEL_EVEN : SarkarParams::Mode::LEVEL_ODD; - params.useTopPoset = balanced_random.get_flip(); - updateParams(); + while (innerNoChange < mlParams_.maxNumIterationWithoutChanges_) { + params_.mode_ = thueCoin_.GetFlip() ? sarkar_params::Mode::LEVEL_EVEN : sarkar_params::Mode::LEVEL_ODD; + params_.useTopPoset_ = balancedRandom_.GetFlip(); + UpdateParams(); - status = std::max(status, run_single_contraction_mode(diff)); + status = std::max(status, RunSingleContractionMode(diff)); if (diff > 0) { - outer_change = true; - inner_no_change = 0; + outerChange = true; + innerNoChange = 0; } else { - inner_no_change++; + innerNoChange++; } } - if (outer_change) { - outer_no_change = 0; + if (outerChange) { + outerNoChange = 0; } else { - outer_no_change++; + outerNoChange++; } } return status; } -template -RETURN_STATUS SarkarMul::run_buffer_merges() { - RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; - - unsigned no_change = 0; - while (no_change < ml_params.max_num_iteration_without_changes) { - vertex_idx_t diff = 0; - if ((ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::HOMOGENEOUS) - || (ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::FULL && diff == 0)) { - params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER; - updateParams(); - status = std::max(status, run_single_contraction_mode(diff)); +template +ReturnStatus SarkarMul::RunBufferMerges() { + ReturnStatus status = ReturnStatus::OSP_SUCCESS; + + unsigned noChange = 0; + while (noChange < mlParams_.maxNumIterationWithoutChanges_) { + VertexIdxT diff = 0; + if ((mlParams_.bufferMergeMode_ == sarkar_params::BufferMergeMode::HOMOGENEOUS) + || (mlParams_.bufferMergeMode_ == sarkar_params::BufferMergeMode::FULL && diff == 0)) { + params_.mode_ = sarkar_params::Mode::HOMOGENEOUS_BUFFER; + UpdateParams(); + status = std::max(status, RunSingleContractionMode(diff)); } - if (ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::FAN_IN) { - params.mode = SarkarParams::Mode::FAN_IN_BUFFER; - updateParams(); - status = std::max(status, run_single_contraction_mode(diff)); + if (mlParams_.bufferMergeMode_ == sarkar_params::BufferMergeMode::FAN_IN) { + params_.mode_ = sarkar_params::Mode::FAN_IN_BUFFER; + UpdateParams(); + status = std::max(status, RunSingleContractionMode(diff)); } - if (ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::FAN_OUT) { - params.mode = SarkarParams::Mode::FAN_OUT_BUFFER; - updateParams(); - status = std::max(status, run_single_contraction_mode(diff)); + if (mlParams_.bufferMergeMode_ == sarkar_params::BufferMergeMode::FAN_OUT) { + params_.mode_ = sarkar_params::Mode::FAN_OUT_BUFFER; + UpdateParams(); + status = std::max(status, RunSingleContractionMode(diff)); } - if (ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::FULL && diff == 0) { - const bool flip = thue_coin.get_flip(); - params.mode = flip ? SarkarParams::Mode::FAN_IN_BUFFER : SarkarParams::Mode::FAN_OUT_BUFFER; - updateParams(); - status = std::max(status, run_single_contraction_mode(diff)); + if (mlParams_.bufferMergeMode_ == sarkar_params::BufferMergeMode::FULL && diff == 0) { + const bool flip = thueCoin_.GetFlip(); + params_.mode_ = flip ? sarkar_params::Mode::FAN_IN_BUFFER : sarkar_params::Mode::FAN_OUT_BUFFER; + UpdateParams(); + status = std::max(status, RunSingleContractionMode(diff)); if (diff == 0) { - params.mode = (!flip) ? SarkarParams::Mode::FAN_IN_BUFFER : SarkarParams::Mode::FAN_OUT_BUFFER; - updateParams(); - status = std::max(status, run_single_contraction_mode(diff)); + params_.mode_ = (!flip) ? sarkar_params::Mode::FAN_IN_BUFFER : sarkar_params::Mode::FAN_OUT_BUFFER; + UpdateParams(); + status = std::max(status, RunSingleContractionMode(diff)); } } if (diff > 0) { - no_change = 0; - status = std::max(status, run_contractions(ml_params.commCostVec.back())); + noChange = 0; + status = std::max(status, RunContractions(mlParams_.commCostVec_.back())); } else { - no_change++; + noChange++; } } return status; } -template -RETURN_STATUS SarkarMul::run_contractions() { - initParams(); +template +ReturnStatus SarkarMul::RunContractions() { + InitParams(); - RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; + ReturnStatus status = ReturnStatus::OSP_SUCCESS; - for (const v_workw_t commCost : ml_params.commCostVec) { - status = std::max(status, run_contractions(commCost)); + for (const VWorkwT commCost : mlParams_.commCostVec_) { + status = std::max(status, RunContractions(commCost)); } - if (ml_params.buffer_merge_mode != SarkarParams::BufferMergeMode::OFF) { - status = std::max(status, run_buffer_merges()); + if (mlParams_.bufferMergeMode_ != sarkar_params::BufferMergeMode::OFF) { + status = std::max(status, RunBufferMerges()); } return status; diff --git a/include/osp/coarser/SquashA/SquashA.hpp b/include/osp/coarser/SquashA/SquashA.hpp index f41b3c5c..177361cf 100644 --- a/include/osp/coarser/SquashA/SquashA.hpp +++ b/include/osp/coarser/SquashA/SquashA.hpp @@ -30,190 +30,188 @@ limitations under the License. namespace osp { -namespace SquashAParams { +namespace squash_a_params { enum class Mode { EDGE_WEIGHT, TRIANGLES }; struct Parameters { - double geom_decay_num_nodes{17.0 / 16.0}; - double poisson_par{0.0}; - unsigned noise{0U}; - std::pair edge_sort_ratio{3, 2}; - unsigned num_rep_without_node_decrease{4}; - double temperature_multiplier{1.125}; - unsigned number_of_temperature_increases{14}; - Mode mode{Mode::EDGE_WEIGHT}; - bool use_structured_poset{false}; - bool use_top_poset{true}; + double geomDecayNumNodes_{17.0 / 16.0}; + double poissonPar_{0.0}; + unsigned noise_{0U}; + std::pair edgeSortRatio_{3, 2}; + unsigned numRepWithoutNodeDecrease_{4}; + double temperatureMultiplier_{1.125}; + unsigned numberOfTemperatureIncreases_{14}; + Mode mode_{Mode::EDGE_WEIGHT}; + bool useStructuredPoset_{false}; + bool useTopPoset_{true}; }; -} // end namespace SquashAParams +} // namespace squash_a_params -template -class SquashA : public CoarserGenExpansionMap { +template +class SquashA : public CoarserGenExpansionMap { private: - SquashAParams::Parameters params; + squash_a_params::Parameters params_; - std::vector generate_poset_in_map(const Graph_t_in &dag_in); + std::vector GeneratePosetInMap(const GraphTIn &dagIn); template - std::vector>> gen_exp_map_from_contractable_edges( - const std::multiset, T>, CMP> &edge_weights, - const std::vector &poset_int_mapping, - const Graph_t_in &dag_in) { + std::vector>> GenExpMapFromContractableEdges( + const std::multiset, T>, CMP> &edgeWeights, + const std::vector &posetIntMapping, + const GraphTIn &dagIn) { static_assert(std::is_arithmetic_v, "T must be of arithmetic type!"); - auto lower_third_it = edge_weights.begin(); - std::advance(lower_third_it, edge_weights.size() / 3); - T lower_third_wt = std::max(lower_third_it->second, static_cast(1)); // Could be 0 + auto lowerThirdIt = edgeWeights.begin(); + std::advance(lowerThirdIt, edgeWeights.size() / 3); + T lowerThirdWt = std::max(lowerThirdIt->second, static_cast(1)); // Could be 0 - Union_Find_Universe, vertex_idx_t, v_workw_t, v_memw_t> - connected_components; - for (const auto &vert : dag_in.vertices()) { - connected_components.add_object(vert, dag_in.vertex_work_weight(vert), dag_in.vertex_mem_weight(vert)); + UnionFindUniverse, VertexIdxT, VWorkwT, VMemwT> connectedComponents; + for (const auto &vert : dagIn.Vertices()) { + connectedComponents.AddObject(vert, dagIn.VertexWorkWeight(vert), dagIn.VertexMemWeight(vert)); } - std::vector merged_nodes(dag_in.num_vertices(), false); + std::vector mergedNodes(dagIn.NumVertices(), false); - vertex_idx_t num_nodes_decrease = 0; - vertex_idx_t num_nodes_aim - = dag_in.num_vertices() - - static_cast>(static_cast(dag_in.num_vertices()) / params.geom_decay_num_nodes); + VertexIdxT numNodesDecrease = 0; + VertexIdxT numNodesAim + = dagIn.NumVertices() + - static_cast>(static_cast(dagIn.NumVertices()) / params_.geomDecayNumNodes_); double temperature = 1; - unsigned temperature_increase_iteration = 0; - while (num_nodes_decrease < num_nodes_aim && temperature_increase_iteration <= params.number_of_temperature_increases) { - for (const auto &wt_edge : edge_weights) { - const auto &edge_d = wt_edge.first; - const vertex_idx_t edge_source = source(edge_d, dag_in); - const vertex_idx_t edge_target = target(edge_d, dag_in); + unsigned temperatureIncreaseIteration = 0; + while (numNodesDecrease < numNodesAim && temperatureIncreaseIteration <= params_.numberOfTemperatureIncreases_) { + for (const auto &wtEdge : edgeWeights) { + const auto &edgeD = wtEdge.first; + const VertexIdxT edgeSource = Source(edgeD, dagIn); + const VertexIdxT edgeTarget = Target(edgeD, dagIn); // Previously merged - if (merged_nodes[edge_source]) { + if (mergedNodes[edgeSource]) { continue; } - if (merged_nodes[edge_target]) { + if (mergedNodes[edgeTarget]) { continue; } // weight check - if (connected_components.get_weight_of_component_by_name(edge_source) - + connected_components.get_weight_of_component_by_name(edge_target) - > static_cast(lower_third_wt) * temperature) { + if (connectedComponents.GetWeightOfComponentByName(edgeSource) + + connectedComponents.GetWeightOfComponentByName(edgeTarget) + > static_cast(lowerThirdWt) * temperature) { continue; } // no loops criteria check - bool check_failed = false; + bool checkFailed = false; // safety check - this should already be the case - assert(abs(poset_int_mapping[edge_source] - poset_int_mapping[edge_target]) <= 1); + assert(abs(posetIntMapping[edgeSource] - posetIntMapping[edgeTarget]) <= 1); // Checks over all affected edges // In edges first - for (const auto &node : dag_in.parents(edge_source)) { - if (node == edge_target) { + for (const auto &node : dagIn.Parents(edgeSource)) { + if (node == edgeTarget) { continue; } - if (!merged_nodes[node]) { + if (!mergedNodes[node]) { continue; } - if (poset_int_mapping[edge_source] >= poset_int_mapping[node] + 2) { + if (posetIntMapping[edgeSource] >= posetIntMapping[node] + 2) { continue; } - check_failed = true; + checkFailed = true; break; } - if (check_failed) { + if (checkFailed) { continue; } // Out edges first - for (const auto &node : dag_in.children(edge_source)) { - if (node == edge_target) { + for (const auto &node : dagIn.Children(edgeSource)) { + if (node == edgeTarget) { continue; } - if (!merged_nodes[node]) { + if (!mergedNodes[node]) { continue; } - if (poset_int_mapping[node] >= poset_int_mapping[edge_source] + 2) { + if (posetIntMapping[node] >= posetIntMapping[edgeSource] + 2) { continue; } - check_failed = true; + checkFailed = true; break; } - if (check_failed) { + if (checkFailed) { continue; } // In edges second - for (const auto &node : dag_in.parents(edge_target)) { - if (node == edge_source) { + for (const auto &node : dagIn.Parents(edgeTarget)) { + if (node == edgeSource) { continue; } - if (!merged_nodes[node]) { + if (!mergedNodes[node]) { continue; } - if (poset_int_mapping[edge_target] >= poset_int_mapping[node] + 2) { + if (posetIntMapping[edgeTarget] >= posetIntMapping[node] + 2) { continue; } - check_failed = true; + checkFailed = true; break; } - if (check_failed) { + if (checkFailed) { continue; } // Out edges second - for (const auto &node : dag_in.children(edge_target)) { - if (node == edge_source) { + for (const auto &node : dagIn.Children(edgeTarget)) { + if (node == edgeSource) { continue; } - if (!merged_nodes[node]) { + if (!mergedNodes[node]) { continue; } - if (poset_int_mapping[node] >= poset_int_mapping[edge_target] + 2) { + if (posetIntMapping[node] >= posetIntMapping[edgeTarget] + 2) { continue; } - check_failed = true; + checkFailed = true; break; } - if (check_failed) { + if (checkFailed) { continue; } // merging - connected_components.join_by_name(edge_source, edge_target); - merged_nodes[edge_source] = true; - merged_nodes[edge_target] = true; - num_nodes_decrease++; + connectedComponents.JoinByName(edgeSource, edgeTarget); + mergedNodes[edgeSource] = true; + mergedNodes[edgeTarget] = true; + numNodesDecrease++; } - temperature *= params.temperature_multiplier; - temperature_increase_iteration++; + temperature *= params_.temperatureMultiplier_; + temperatureIncreaseIteration++; } // Getting components to contract and adding graph contraction - std::vector>> partition_vec; + std::vector>> partitionVec; - vertex_idx_t min_node_decrease - = dag_in.num_vertices() - - static_cast>(static_cast(dag_in.num_vertices()) - / std::pow(params.geom_decay_num_nodes, 0.25)); - if (num_nodes_decrease > 0 && num_nodes_decrease >= min_node_decrease) { - partition_vec = connected_components.get_connected_components(); + VertexIdxT minNodeDecrease = dagIn.NumVertices() + - static_cast>(static_cast(dagIn.NumVertices()) + / std::pow(params_.geomDecayNumNodes_, 0.25)); + if (numNodesDecrease > 0 && numNodesDecrease >= minNodeDecrease) { + partitionVec = connectedComponents.GetConnectedComponents(); } else { - partition_vec.reserve(dag_in.num_vertices()); - for (const auto &vert : dag_in.vertices()) { - std::vector> vect; + partitionVec.reserve(dagIn.NumVertices()); + for (const auto &vert : dagIn.Vertices()) { + std::vector> vect; vect.push_back(vert); - partition_vec.emplace_back(vect); + partitionVec.emplace_back(vect); } } - return partition_vec; + return partitionVec; } public: - virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in) override; + virtual std::vector>> GenerateVertexExpansionMap(const GraphTIn &dagIn) override; - SquashA(SquashAParams::Parameters params_ = SquashAParams::Parameters()) : params(params_) {}; + SquashA(squash_a_params::Parameters params = squash_a_params::Parameters()) : params_(params) {}; SquashA(const SquashA &) = default; SquashA(SquashA &&) = default; @@ -221,79 +219,75 @@ class SquashA : public CoarserGenExpansionMap { SquashA &operator=(SquashA &&) = default; virtual ~SquashA() override = default; - inline SquashAParams::Parameters &getParams() { return params; } + inline squash_a_params::Parameters &GetParams() { return params_; } - inline void setParams(SquashAParams::Parameters params_) { params = params_; } + inline void SetParams(squash_a_params::Parameters params) { params_ = params; } - std::string getCoarserName() const override { return "SquashA"; } + std::string GetCoarserName() const override { return "SquashA"; } }; -template -std::vector SquashA::generate_poset_in_map(const Graph_t_in &dag_in) { - std::vector poset_int_mapping; - if (!params.use_structured_poset) { - poset_int_mapping = get_strict_poset_integer_map(params.noise, params.poisson_par, dag_in); +template +std::vector SquashA::GeneratePosetInMap(const GraphTIn &dagIn) { + std::vector posetIntMapping; + if (!params_.useStructuredPoset_) { + posetIntMapping = GetStrictPosetIntegerMap(params_.noise_, params_.poissonPar_, dagIn); } else { - if (params.use_top_poset) { - poset_int_mapping = get_top_node_distance(dag_in); + if (params_.useTopPoset_) { + posetIntMapping = GetTopNodeDistance(dagIn); } else { - std::vector bot_dist = get_bottom_node_distance(dag_in); - poset_int_mapping.resize(bot_dist.size()); - for (std::size_t i = 0; i < bot_dist.size(); i++) { - poset_int_mapping[i] = -bot_dist[i]; + std::vector botDist = GetBottomNodeDistance(dagIn); + posetIntMapping.resize(botDist.size()); + for (std::size_t i = 0; i < botDist.size(); i++) { + posetIntMapping[i] = -botDist[i]; } } } - return poset_int_mapping; + return posetIntMapping; } -template -std::vector>> SquashA::generate_vertex_expansion_map( - const Graph_t_in &dag_in) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t_in must satisfy the directed_graph_edge_desc concept"); - static_assert(is_computational_dag_edge_desc_v, - "Graph_t_in must satisfy the is_computational_dag_edge_desc concept"); - // static_assert(has_hashable_edge_desc_v, "Graph_t_in must have hashable edge descriptors"); - - std::vector poset_int_mapping = generate_poset_in_map(dag_in); - - if constexpr (has_edge_weights_v) { - if (params.mode == SquashAParams::Mode::EDGE_WEIGHT) { - auto edge_w_cmp - = [](const std::pair, e_commw_t> &lhs, - const std::pair, e_commw_t> &rhs) { return lhs.second < rhs.second; }; - std::multiset, e_commw_t>, decltype(edge_w_cmp)> edge_weights(edge_w_cmp); +template +std::vector>> SquashA::GenerateVertexExpansionMap(const GraphTIn &dagIn) { + static_assert(isDirectedGraphEdgeDescV, "GraphTIn must satisfy the directed_graph_edge_desc concept"); + static_assert(isComputationalDagEdgeDescV, "GraphTIn must satisfy the is_computational_dag_edge_desc concept"); + // static_assert(hasHashableEdgeDescV, "GraphTIn must have hashable edge descriptors"); + + std::vector posetIntMapping = GeneratePosetInMap(dagIn); + + if constexpr (hasEdgeWeightsV) { + if (params_.mode_ == squash_a_params::Mode::EDGE_WEIGHT) { + auto edgeWCmp = [](const std::pair, ECommwT> &lhs, + const std::pair, ECommwT> &rhs) { return lhs.second < rhs.second; }; + std::multiset, ECommwT>, decltype(edgeWCmp)> edgeWeights(edgeWCmp); { - std::vector> contractable_edges - = get_contractable_edges_from_poset_int_map(poset_int_mapping, dag_in); - for (const auto &edge : contractable_edges) { - if constexpr (has_edge_weights_v) { - edge_weights.emplace(edge, dag_in.edge_comm_weight(edge)); + std::vector> contractableEdges + = GetContractableEdgesFromPosetIntMap(posetIntMapping, dagIn); + for (const auto &edge : contractableEdges) { + if constexpr (hasEdgeWeightsV) { + edgeWeights.emplace(edge, dagIn.EdgeCommWeight(edge)); } else { - edge_weights.emplace(edge, dag_in.vertex_comm_weight(source(edge, dag_in))); + edgeWeights.emplace(edge, dagIn.VertexCommWeight(Source(edge, dagIn))); } } } - return gen_exp_map_from_contractable_edges, decltype(edge_w_cmp)>( - edge_weights, poset_int_mapping, dag_in); + return GenExpMapFromContractableEdges, decltype(edgeWCmp)>(edgeWeights, posetIntMapping, dagIn); } } - if (params.mode == SquashAParams::Mode::TRIANGLES) { - auto edge_w_cmp = [](const std::pair, std::size_t> &lhs, - const std::pair, std::size_t> &rhs) { return lhs.second < rhs.second; }; - std::multiset, std::size_t>, decltype(edge_w_cmp)> edge_weights(edge_w_cmp); + if (params_.mode_ == squash_a_params::Mode::TRIANGLES) { + auto edgeWCmp = [](const std::pair, std::size_t> &lhs, + const std::pair, std::size_t> &rhs) { return lhs.second < rhs.second; }; + std::multiset, std::size_t>, decltype(edgeWCmp)> edgeWeights(edgeWCmp); { - std::vector> contractable_edges - = get_contractable_edges_from_poset_int_map(poset_int_mapping, dag_in); - for (const auto &edge : contractable_edges) { - std::size_t num_common_triangles = num_common_parents(dag_in, source(edge, dag_in), target(edge, dag_in)); - num_common_triangles += num_common_children(dag_in, source(edge, dag_in), target(edge, dag_in)); - edge_weights.emplace(edge, num_common_triangles); + std::vector> contractableEdges + = GetContractableEdgesFromPosetIntMap(posetIntMapping, dagIn); + for (const auto &edge : contractableEdges) { + std::size_t numCommonTriangles = NumCommonParents(dagIn, Source(edge, dagIn), Target(edge, dagIn)); + numCommonTriangles += NumCommonChildren(dagIn, Source(edge, dagIn), Target(edge, dagIn)); + edgeWeights.emplace(edge, numCommonTriangles); } } - return gen_exp_map_from_contractable_edges(edge_weights, poset_int_mapping, dag_in); + return GenExpMapFromContractableEdges(edgeWeights, posetIntMapping, dagIn); } else { throw std::runtime_error("Edge sorting mode not recognised."); diff --git a/include/osp/coarser/SquashA/SquashAMul.hpp b/include/osp/coarser/SquashA/SquashAMul.hpp index f238d576..2d0c85fb 100644 --- a/include/osp/coarser/SquashA/SquashAMul.hpp +++ b/include/osp/coarser/SquashA/SquashAMul.hpp @@ -24,82 +24,86 @@ limitations under the License. namespace osp { -template -class SquashAMul : public MultilevelCoarser { +template +class SquashAMul : public MultilevelCoarser { private: - vertex_idx_t min_nodes{1}; - Thue_Morse_Sequence thue_coin{}; - Biased_Random balanced_random{}; + VertexIdxT minNodes_{1}; + ThueMorseSequence thueCoin_{}; + BiasedRandom balancedRandom_{}; // Coarser Params - SquashAParams::Parameters params; + squash_a_params::Parameters params_; // Initial coarser - SquashA coarser_initial; + SquashA coarserInitial_; // Subsequent coarser - SquashA coarser_secondary; + SquashA coarserSecondary_; - void updateParams(); + void UpdateParams(); - RETURN_STATUS run_contractions() override; + ReturnStatus RunContractions() override; public: - void setParams(SquashAParams::Parameters params_) { params = params_; }; + void SetParams(squash_a_params::Parameters params) { params_ = params; }; - void setMinimumNumberVertices(vertex_idx_t num) { min_nodes = num; }; + void SetMinimumNumberVertices(VertexIdxT num) { minNodes_ = num; }; - std::string getCoarserName() const { return "SquashA"; }; + std::string GetCoarserName() const override { return "SquashA"; }; }; -template -void SquashAMul::updateParams() { - params.use_structured_poset = thue_coin.get_flip(); - params.use_top_poset = balanced_random.get_flip(); +template +void SquashAMul::UpdateParams() { + params_.useStructuredPoset_ = thueCoin_.GetFlip(); + params_.useTopPoset_ = balancedRandom_.GetFlip(); - coarser_initial.setParams(params); - coarser_secondary.setParams(params); + coarserInitial_.SetParams(params_); + coarserSecondary_.SetParams(params_); } -template -RETURN_STATUS SquashAMul::run_contractions() { - RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; +template +ReturnStatus SquashAMul::RunContractions() { + ReturnStatus status = ReturnStatus::OSP_SUCCESS; - Biased_Random_with_side_bias coin(params.edge_sort_ratio); + BiasedRandomWithSideBias coin(params_.edgeSortRatio_); - bool first_coarsen = true; - unsigned no_change_in_a_row = 0; - vertex_idx_t current_num_vertices = MultilevelCoarser::getOriginalGraph()->num_vertices(); + bool firstCoarsen = true; + unsigned noChangeInARow = 0; + VertexIdxT currentNumVertices; + if (MultilevelCoarser::GetOriginalGraph()) { + currentNumVertices = MultilevelCoarser::GetOriginalGraph()->NumVertices(); + } else { + return ReturnStatus::ERROR; + } - while (no_change_in_a_row < params.num_rep_without_node_decrease && current_num_vertices > min_nodes) { - updateParams(); + while (noChangeInARow < params_.numRepWithoutNodeDecrease_ && currentNumVertices > minNodes_) { + UpdateParams(); - Graph_t_coarse coarsened_dag; - std::vector> contraction_map; - bool coarsen_success; + GraphTCoarse coarsenedDag; + std::vector> contractionMap; + bool coarsenSuccess; - if (first_coarsen) { - coarsen_success = coarser_initial.coarsenDag( - *(MultilevelCoarser::getOriginalGraph()), coarsened_dag, contraction_map); - first_coarsen = false; + if (firstCoarsen) { + coarsenSuccess = coarserInitial_.CoarsenDag( + *(MultilevelCoarser::GetOriginalGraph()), coarsenedDag, contractionMap); + firstCoarsen = false; } else { - coarsen_success = coarser_secondary.coarsenDag( - *(MultilevelCoarser::dag_history.back()), coarsened_dag, contraction_map); + coarsenSuccess = coarserSecondary_.CoarsenDag( + *(MultilevelCoarser::dagHistory_.back()), coarsenedDag, contractionMap); } - if (!coarsen_success) { - status = RETURN_STATUS::ERROR; + if (!coarsenSuccess) { + status = ReturnStatus::ERROR; } status = std::max( - status, - MultilevelCoarser::add_contraction(std::move(contraction_map), std::move(coarsened_dag))); + status, MultilevelCoarser::AddContraction(std::move(contractionMap), std::move(coarsenedDag))); - vertex_idx_t new_num_vertices = MultilevelCoarser::dag_history.back()->num_vertices(); + VertexIdxT newNumVertices = MultilevelCoarser::dagHistory_.back()->NumVertices(); - if (new_num_vertices == current_num_vertices) { - no_change_in_a_row++; + if (newNumVertices == currentNumVertices) { + noChangeInARow++; } else { - no_change_in_a_row = 0; - current_num_vertices = new_num_vertices; + noChangeInARow = 0; + currentNumVertices = newNumVertices; } } diff --git a/include/osp/coarser/StepByStep/StepByStepCoarser.hpp b/include/osp/coarser/StepByStep/StepByStepCoarser.hpp index 4f655d62..afa98cfc 100644 --- a/include/osp/coarser/StepByStep/StepByStepCoarser.hpp +++ b/include/osp/coarser/StepByStep/StepByStepCoarser.hpp @@ -28,152 +28,151 @@ limitations under the License. namespace osp { -template -class StepByStepCoarser : public CoarserGenContractionMap { - using vertex_idx = vertex_idx_t; +template +class StepByStepCoarser : public CoarserGenContractionMap { + using VertexIdx = VertexIdxT; - using vertex_type_t_or_default - = std::conditional_t, v_type_t, unsigned>; - using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; + using VertexTypeTOrDefault = std::conditional_t, VTypeT, unsigned>; + using EdgeCommwTOrDefault = std::conditional_t, ECommwT, VCommwT>; - using boost_graph_t - = boost_graph, v_commw_t, v_memw_t, vertex_type_t_or_default, edge_commw_t_or_default>; + using BoostGraphT = BoostGraph, VCommwT, VMemwT, VertexTypeTOrDefault, EdgeCommwTOrDefault>; public: - enum COARSENING_STRATEGY { EDGE_BY_EDGE, BOTTOM_LEVEL_CLUSTERS }; + enum CoarseningStrategy { EDGE_BY_EDGE, BOTTOM_LEVEL_CLUSTERS }; - enum PROBLEM_TYPE { SCHEDULING, PEBBLING }; + enum ProblemType { SCHEDULING, PEBBLING }; struct EdgeToContract { - std::pair edge; - v_workw_t work_weight; - v_commw_t comm_weight; + std::pair edge_; + VWorkwT workWeight_; + VCommwT commWeight_; - EdgeToContract(const vertex_idx source, - const vertex_idx target, - const v_workw_t work_weight_, - const v_commw_t comm_weight_) - : edge(source, target), work_weight(work_weight_), comm_weight(comm_weight_) {} + EdgeToContract(const VertexIdx source, + const VertexIdx target, + const VWorkwT workWeight, + const VCommwT commWeight) + : edge_(source, target), workWeight_(workWeight), commWeight_(commWeight) {} bool operator<(const EdgeToContract &other) const { - return (work_weight < other.work_weight || (work_weight == other.work_weight && comm_weight < other.comm_weight)); + return (workWeight_ < other.workWeight_ || (workWeight_ == other.workWeight_ && commWeight_ < other.commWeight_) + || (workWeight_ == other.workWeight_ && commWeight_ == other.commWeight_ && edge_ < other.edge_)); } }; private: - std::vector> contractionHistory; + std::vector> contractionHistory_; - COARSENING_STRATEGY coarsening_strategy = COARSENING_STRATEGY::EDGE_BY_EDGE; - PROBLEM_TYPE problem_type = PROBLEM_TYPE::SCHEDULING; + CoarseningStrategy coarseningStrategy_ = CoarseningStrategy::EDGE_BY_EDGE; + ProblemType problemType_ = ProblemType::SCHEDULING; - unsigned target_nr_of_nodes = 0; + unsigned targetNrOfNodes_ = 0; - Graph_t G_full; - boost_graph_t G_coarse; + GraphT gFull_; + BoostGraphT gCoarse_; - std::vector> contains; + std::vector> contains_; - std::map, v_commw_t> edgeWeights; - std::map, v_commw_t> contractable; - std::vector node_valid; - std::vector top_order_idx; + std::map, VCommwT> edgeWeights_; + std::map, VCommwT> contractable_; + std::vector nodeValid_; + std::vector topOrderIdx_; - v_memw_t fast_mem_capacity = std::numeric_limits>::max(); // for pebbling + VMemwT fastMemCapacity_ = std::numeric_limits>::max(); // for pebbling // Utility functions for coarsening in general - void ContractSingleEdge(std::pair edge); + void ContractSingleEdge(std::pair edge); void ComputeFilteredTopOrderIdx(); void InitializeContractableEdges(); - bool isContractable(std::pair edge) const; - std::set getContractableChildren(vertex_idx node) const; - std::set getContractableParents(vertex_idx node) const; - void updateDistantEdgeContractibility(std::pair edge); + bool IsContractable(std::pair edge) const; + std::set GetContractableChildren(VertexIdx node) const; + std::set GetContractableParents(VertexIdx node) const; + void UpdateDistantEdgeContractibility(std::pair edge); - std::pair PickEdgeToContract(const std::vector &candidates) const; + std::pair PickEdgeToContract(const std::vector &candidates) const; std::vector CreateEdgeCandidateList() const; // Utility functions for cluster coarsening - std::vector> ClusterCoarsen() const; + std::vector> ClusterCoarsen() const; std::vector ComputeFilteredTopLevel() const; // Utility functions for coarsening in a pebbling problem - bool IncontractableForPebbling(const std::pair &) const; + bool IncontractableForPebbling(const std::pair &) const; void MergeSourcesInPebbling(); // Utility for contracting into final format - void SetIdVector(std::vector> &new_vertex_id) const; - static std::vector GetFilteredTopOrderIdx(const Graph_t &G, const std::vector &is_valid); + void SetIdVector(std::vector> &newVertexId) const; + static std::vector GetFilteredTopOrderIdx(const GraphT &g, const std::vector &isValid); public: virtual ~StepByStepCoarser() = default; - virtual std::string getCoarserName() const override { return "StepByStepCoarsening"; } + virtual std::string GetCoarserName() const override { return "StepByStepCoarsening"; } // DAG coarsening - virtual std::vector> generate_vertex_contraction_map(const Graph_t &dag_in) override; + virtual std::vector> GenerateVertexContractionMap(const GraphT &dagIn) override; // Coarsening for pebbling problems - leaves source nodes intact, considers memory bound - void coarsenForPebbling(const Graph_t &dag_in, Graph_t &coarsened_dag, std::vector> &new_vertex_id); + void CoarsenForPebbling(const GraphT &dagIn, GraphT &coarsenedDag, std::vector> &newVertexId); - void setCoarseningStrategy(COARSENING_STRATEGY strategy_) { coarsening_strategy = strategy_; } + void SetCoarseningStrategy(CoarseningStrategy strategy) { coarseningStrategy_ = strategy; } - void setTargetNumberOfNodes(const unsigned nr_nodes_) { target_nr_of_nodes = nr_nodes_; } + void SetTargetNumberOfNodes(const unsigned nrNodes) { targetNrOfNodes_ = nrNodes; } - void setFastMemCapacity(const v_memw_t capacity_) { fast_mem_capacity = capacity_; } + void SetFastMemCapacity(const VMemwT capacity) { fastMemCapacity_ = capacity; } - std::vector> getContractionHistory() const { return contractionHistory; } + std::vector> GetContractionHistory() const { return contractionHistory_; } - std::vector GetIntermediateIDs(vertex_idx until_which_step) const; - Graph_t Contract(const std::vector> &new_vertex_id) const; + std::vector GetIntermediateIDs(VertexIdx untilWhichStep) const; + GraphT Contract(const std::vector> &newVertexId) const; - const Graph_t &getOriginalDag() const { return G_full; } + const GraphT &GetOriginalDag() const { return gFull_; } }; -// template -// bool StepByStepCoarser::coarseDag(const Graph_t& dag_in, Graph_t &dag_out, -// std::vector>> &old_vertex_ids, -// std::vector> &new_vertex_id) +// template +// bool StepByStepCoarser::coarseDag(const GraphT& dag_in, GraphT &dag_out, +// std::vector>> &old_vertex_ids, +// std::vector> &new_vertex_id) -template -std::vector> StepByStepCoarser::generate_vertex_contraction_map(const Graph_t &dag_in) { - const unsigned N = static_cast(dag_in.num_vertices()); +template +std::vector> StepByStepCoarser::GenerateVertexContractionMap(const GraphT &dagIn) { + const unsigned n = static_cast(dagIn.NumVertices()); - G_full = dag_in; - for (vertex_idx node = G_coarse.num_vertices(); node > 0;) { + gFull_ = dagIn; + for (VertexIdx node = gCoarse_.NumVertices(); node > 0;) { --node; - G_coarse.remove_vertex(node); + gCoarse_.RemoveVertex(node); } - constructComputationalDag(G_full, G_coarse); + ConstructComputationalDag(gFull_, gCoarse_); - contractionHistory.clear(); + contractionHistory_.clear(); // target nr of nodes must be reasonable - if (target_nr_of_nodes == 0 || target_nr_of_nodes > N) { - target_nr_of_nodes = std::max(N / 2, 1U); + if (targetNrOfNodes_ == 0 || targetNrOfNodes_ > n) { + targetNrOfNodes_ = std::max(n / 2, 1U); } // list of original node indices contained in each contracted node - contains.clear(); - contains.resize(N); + contains_.clear(); + contains_.resize(n); - node_valid.clear(); - node_valid.resize(N, true); + nodeValid_.clear(); + nodeValid_.resize(n, true); - for (vertex_idx node = 0; node < N; ++node) { - contains[node].insert(node); + for (VertexIdx node = 0; node < n; ++node) { + contains_[node].insert(node); } // used for original, slow coarsening - edgeWeights.clear(); - contractable.clear(); + edgeWeights_.clear(); + contractable_.clear(); - if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) { + if (coarseningStrategy_ == CoarseningStrategy::EDGE_BY_EDGE) { // Init edge weights - for (vertex_idx node = 0; node < N; ++node) { - for (vertex_idx succ : G_full.children(node)) { - edgeWeights[std::make_pair(node, succ)] = G_full.vertex_comm_weight(node); + for (VertexIdx node = 0; node < n; ++node) { + for (VertexIdx succ : gFull_.Children(node)) { + edgeWeights_[std::make_pair(node, succ)] = gFull_.VertexCommWeight(node); } } @@ -181,23 +180,23 @@ std::vector> StepByStepCoarser::generate_vertex_c InitializeContractableEdges(); } - for (unsigned NrOfNodes = N; NrOfNodes > target_nr_of_nodes;) { + for (unsigned nrOfNodes = n; nrOfNodes > targetNrOfNodes_;) { // Single contraction step - std::vector> edgesToContract; + std::vector> edgesToContract; // choose edges to contract in this step - if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) { + if (coarseningStrategy_ == CoarseningStrategy::EDGE_BY_EDGE) { std::vector candidates = CreateEdgeCandidateList(); if (candidates.empty()) { std::cout << "Error: no more edges to contract" << std::endl; break; } - std::pair chosenEdge = PickEdgeToContract(candidates); + std::pair chosenEdge = PickEdgeToContract(candidates); edgesToContract.push_back(chosenEdge); // Update far-away edges that become uncontractable now - updateDistantEdgeContractibility(chosenEdge); + UpdateDistantEdgeContractibility(chosenEdge); } else { edgesToContract = ClusterCoarsen(); } @@ -207,150 +206,150 @@ std::vector> StepByStepCoarser::generate_vertex_c } // contract these edges - for (const std::pair &edge : edgesToContract) { - if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) { + for (const std::pair &edge : edgesToContract) { + if (coarseningStrategy_ == CoarseningStrategy::EDGE_BY_EDGE) { // Update contractable edges - edge.b - for (vertex_idx pred : G_coarse.parents(edge.second)) { - contractable.erase(std::make_pair(pred, edge.second)); + for (VertexIdx pred : gCoarse_.Parents(edge.second)) { + contractable_.erase(std::make_pair(pred, edge.second)); } - for (vertex_idx succ : G_coarse.children(edge.second)) { - contractable.erase(std::make_pair(edge.second, succ)); + for (VertexIdx succ : gCoarse_.Children(edge.second)) { + contractable_.erase(std::make_pair(edge.second, succ)); } } ContractSingleEdge(edge); - node_valid[edge.second] = false; + nodeValid_[edge.second] = false; - if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) { + if (coarseningStrategy_ == CoarseningStrategy::EDGE_BY_EDGE) { ComputeFilteredTopOrderIdx(); // Update contractable edges - edge.a - std::set contractableParents = getContractableParents(edge.first); - for (vertex_idx pred : G_coarse.parents(edge.first)) { + std::set contractableParents = GetContractableParents(edge.first); + for (VertexIdx pred : gCoarse_.Parents(edge.first)) { if (contractableParents.find(pred) != contractableParents.end()) { - contractable[std::make_pair(pred, edge.first)] = edgeWeights[std::make_pair(pred, edge.first)]; + contractable_[std::make_pair(pred, edge.first)] = edgeWeights_[std::make_pair(pred, edge.first)]; } else { - contractable.erase(std::make_pair(pred, edge.first)); + contractable_.erase(std::make_pair(pred, edge.first)); } } - std::set contractableChildren = getContractableChildren(edge.first); - for (vertex_idx succ : G_coarse.children(edge.first)) { + std::set contractableChildren = GetContractableChildren(edge.first); + for (VertexIdx succ : gCoarse_.Children(edge.first)) { if (contractableChildren.find(succ) != contractableChildren.end()) { - contractable[std::make_pair(edge.first, succ)] = edgeWeights[std::make_pair(edge.first, succ)]; + contractable_[std::make_pair(edge.first, succ)] = edgeWeights_[std::make_pair(edge.first, succ)]; } else { - contractable.erase(std::make_pair(edge.first, succ)); + contractable_.erase(std::make_pair(edge.first, succ)); } } } - --NrOfNodes; - if (NrOfNodes == target_nr_of_nodes) { + --nrOfNodes; + if (nrOfNodes == targetNrOfNodes_) { break; } } } - if (problem_type == PROBLEM_TYPE::PEBBLING) { + if (problemType_ == ProblemType::PEBBLING) { MergeSourcesInPebbling(); } - std::vector> new_vertex_id; - SetIdVector(new_vertex_id); + std::vector> newVertexId; + SetIdVector(newVertexId); - return new_vertex_id; + return newVertexId; } -template -void StepByStepCoarser::ContractSingleEdge(std::pair edge) { - G_coarse.set_vertex_work_weight(edge.first, G_coarse.vertex_work_weight(edge.first) + G_coarse.vertex_work_weight(edge.second)); - G_coarse.set_vertex_work_weight(edge.second, 0); +template +void StepByStepCoarser::ContractSingleEdge(std::pair edge) { + gCoarse_.SetVertexWorkWeight(edge.first, gCoarse_.VertexWorkWeight(edge.first) + gCoarse_.VertexWorkWeight(edge.second)); + gCoarse_.SetVertexWorkWeight(edge.second, 0); - G_coarse.set_vertex_comm_weight(edge.first, G_coarse.vertex_comm_weight(edge.first) + G_coarse.vertex_comm_weight(edge.second)); - G_coarse.set_vertex_comm_weight(edge.second, 0); + gCoarse_.SetVertexCommWeight(edge.first, gCoarse_.VertexCommWeight(edge.first) + gCoarse_.VertexCommWeight(edge.second)); + gCoarse_.SetVertexCommWeight(edge.second, 0); - G_coarse.set_vertex_mem_weight(edge.first, G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second)); - G_coarse.set_vertex_mem_weight(edge.second, 0); + gCoarse_.SetVertexMemWeight(edge.first, gCoarse_.VertexMemWeight(edge.first) + gCoarse_.VertexMemWeight(edge.second)); + gCoarse_.SetVertexMemWeight(edge.second, 0); - contractionHistory.emplace_back(edge.first, edge.second); + contractionHistory_.emplace_back(edge.first, edge.second); // process incoming edges - std::set parents_of_source; - for (vertex_idx pred : G_coarse.parents(edge.first)) { - parents_of_source.insert(pred); + std::set parentsOfSource; + for (VertexIdx pred : gCoarse_.Parents(edge.first)) { + parentsOfSource.insert(pred); } - for (vertex_idx pred : G_coarse.parents(edge.second)) { + for (VertexIdx pred : gCoarse_.Parents(edge.second)) { if (pred == edge.first) { continue; } - if (parents_of_source.find(pred) != parents_of_source.end()) // combine edges + if (parentsOfSource.find(pred) != parentsOfSource.end()) // combine edges { - edgeWeights[std::make_pair(pred, edge.first)] = 0; - for (vertex_idx node : contains[pred]) { - for (vertex_idx succ : G_coarse.children(node)) { + edgeWeights_[std::make_pair(pred, edge.first)] = 0; + for (VertexIdx node : contains_[pred]) { + for (VertexIdx succ : gCoarse_.Children(node)) { if (succ == edge.first || succ == edge.second) { - edgeWeights[std::make_pair(pred, edge.first)] += G_full.vertex_comm_weight(node); + edgeWeights_[std::make_pair(pred, edge.first)] += gFull_.VertexCommWeight(node); } } } - edgeWeights.erase(std::make_pair(pred, edge.second)); + edgeWeights_.erase(std::make_pair(pred, edge.second)); } else // add incoming edge { - G_coarse.add_edge(pred, edge.first); - edgeWeights[std::make_pair(pred, edge.first)] = edgeWeights[std::make_pair(pred, edge.second)]; + gCoarse_.AddEdge(pred, edge.first); + edgeWeights_[std::make_pair(pred, edge.first)] = edgeWeights_[std::make_pair(pred, edge.second)]; } } // process outgoing edges - std::set children_of_source; - for (vertex_idx succ : G_coarse.children(edge.first)) { - children_of_source.insert(succ); + std::set childrenOfSource; + for (VertexIdx succ : gCoarse_.Children(edge.first)) { + childrenOfSource.insert(succ); } - for (vertex_idx succ : G_coarse.children(edge.second)) { - if (children_of_source.find(succ) != children_of_source.end()) // combine edges + for (VertexIdx succ : gCoarse_.Children(edge.second)) { + if (childrenOfSource.find(succ) != childrenOfSource.end()) // combine edges { - edgeWeights[std::make_pair(edge.first, succ)] += edgeWeights[std::make_pair(edge.second, succ)]; - edgeWeights.erase(std::make_pair(edge.second, succ)); + edgeWeights_[std::make_pair(edge.first, succ)] += edgeWeights_[std::make_pair(edge.second, succ)]; + edgeWeights_.erase(std::make_pair(edge.second, succ)); } else // add outgoing edge { - G_coarse.add_edge(edge.first, succ); - edgeWeights[std::make_pair(edge.first, succ)] = edgeWeights[std::make_pair(edge.second, succ)]; + gCoarse_.AddEdge(edge.first, succ); + edgeWeights_[std::make_pair(edge.first, succ)] = edgeWeights_[std::make_pair(edge.second, succ)]; } } - G_coarse.clear_vertex(edge.second); + gCoarse_.ClearVertex(edge.second); - for (vertex_idx node : contains[edge.second]) { - contains[edge.first].insert(node); + for (VertexIdx node : contains_[edge.second]) { + contains_[edge.first].insert(node); } - contains[edge.second].clear(); + contains_[edge.second].clear(); } -template -bool StepByStepCoarser::isContractable(std::pair edge) const { - std::deque Queue; - std::set visited; - for (vertex_idx succ : G_coarse.children(edge.first)) { - if (node_valid[succ] && top_order_idx[succ] < top_order_idx[edge.second]) { - Queue.push_back(succ); +template +bool StepByStepCoarser::IsContractable(std::pair edge) const { + std::deque queue; + std::set visited; + for (VertexIdx succ : gCoarse_.Children(edge.first)) { + if (nodeValid_[succ] && topOrderIdx_[succ] < topOrderIdx_[edge.second]) { + queue.push_back(succ); visited.insert(succ); } } - while (!Queue.empty()) { - const vertex_idx node = Queue.front(); - Queue.pop_front(); - for (vertex_idx succ : G_coarse.children(node)) { + while (!queue.empty()) { + const VertexIdx node = queue.front(); + queue.pop_front(); + for (VertexIdx succ : gCoarse_.Children(node)) { if (succ == edge.second) { return false; } - if (node_valid[succ] && top_order_idx[succ] < top_order_idx[edge.second] && visited.count(succ) == 0) { - Queue.push_back(succ); + if (nodeValid_[succ] && topOrderIdx_[succ] < topOrderIdx_[edge.second] && visited.count(succ) == 0) { + queue.push_back(succ); visited.insert(succ); } } @@ -358,164 +357,164 @@ bool StepByStepCoarser::isContractable(std::pair -std::set> StepByStepCoarser::getContractableChildren(const vertex_idx node) const { - std::deque Queue; - std::set visited; - std::set succ_contractable; - vertex_idx topOrderMax = top_order_idx[node]; +template +std::set> StepByStepCoarser::GetContractableChildren(const VertexIdx node) const { + std::deque queue; + std::set visited; + std::set succContractable; + VertexIdx topOrderMax = topOrderIdx_[node]; - for (vertex_idx succ : G_coarse.children(node)) { - if (node_valid[succ]) { - succ_contractable.insert(succ); + for (VertexIdx succ : gCoarse_.Children(node)) { + if (nodeValid_[succ]) { + succContractable.insert(succ); } - if (top_order_idx[succ] > topOrderMax) { - topOrderMax = top_order_idx[succ]; + if (topOrderIdx_[succ] > topOrderMax) { + topOrderMax = topOrderIdx_[succ]; } - if (node_valid[succ]) { - Queue.push_back(succ); + if (nodeValid_[succ]) { + queue.push_back(succ); visited.insert(succ); } } - while (!Queue.empty()) { - const vertex_idx node_local = Queue.front(); - Queue.pop_front(); - for (vertex_idx succ : G_coarse.children(node_local)) { - succ_contractable.erase(succ); + while (!queue.empty()) { + const VertexIdx nodeLocal = queue.front(); + queue.pop_front(); + for (VertexIdx succ : gCoarse_.Children(nodeLocal)) { + succContractable.erase(succ); - if (node_valid[succ] && top_order_idx[succ] < topOrderMax && visited.count(succ) == 0) { - Queue.push_back(succ); + if (nodeValid_[succ] && topOrderIdx_[succ] < topOrderMax && visited.count(succ) == 0) { + queue.push_back(succ); visited.insert(succ); } } } - return succ_contractable; + return succContractable; } -template -std::set> StepByStepCoarser::getContractableParents(const vertex_idx node) const { - std::deque Queue; - std::set visited; - std::set pred_contractable; - vertex_idx topOrderMin = top_order_idx[node]; +template +std::set> StepByStepCoarser::GetContractableParents(const VertexIdx node) const { + std::deque queue; + std::set visited; + std::set predContractable; + VertexIdx topOrderMin = topOrderIdx_[node]; - for (vertex_idx pred : G_coarse.parents(node)) { - if (node_valid[pred]) { - pred_contractable.insert(pred); + for (VertexIdx pred : gCoarse_.Parents(node)) { + if (nodeValid_[pred]) { + predContractable.insert(pred); } - if (top_order_idx[pred] < topOrderMin) { - topOrderMin = top_order_idx[pred]; + if (topOrderIdx_[pred] < topOrderMin) { + topOrderMin = topOrderIdx_[pred]; } - if (node_valid[pred]) { - Queue.push_back(pred); + if (nodeValid_[pred]) { + queue.push_back(pred); visited.insert(pred); } } - while (!Queue.empty()) { - const vertex_idx node_local = Queue.front(); - Queue.pop_front(); - for (vertex_idx pred : G_coarse.parents(node_local)) { - pred_contractable.erase(pred); + while (!queue.empty()) { + const VertexIdx nodeLocal = queue.front(); + queue.pop_front(); + for (VertexIdx pred : gCoarse_.Parents(nodeLocal)) { + predContractable.erase(pred); - if (node_valid[pred] && top_order_idx[pred] > topOrderMin && visited.count(pred) == 0) { - Queue.push_back(pred); + if (nodeValid_[pred] && topOrderIdx_[pred] > topOrderMin && visited.count(pred) == 0) { + queue.push_back(pred); visited.insert(pred); } } } - return pred_contractable; + return predContractable; } -template -void StepByStepCoarser::InitializeContractableEdges() { +template +void StepByStepCoarser::InitializeContractableEdges() { ComputeFilteredTopOrderIdx(); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - std::set succ_contractable = getContractableChildren(node); - for (vertex_idx succ : succ_contractable) { - contractable[std::make_pair(node, succ)] = G_full.vertex_comm_weight(node); + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + std::set succContractable = GetContractableChildren(node); + for (VertexIdx succ : succContractable) { + contractable_[std::make_pair(node, succ)] = gFull_.VertexCommWeight(node); } } } -template -void StepByStepCoarser::updateDistantEdgeContractibility(std::pair edge) { - std::unordered_set ancestors, descendant; - std::deque Queue; - for (vertex_idx succ : G_coarse.children(edge.first)) { +template +void StepByStepCoarser::UpdateDistantEdgeContractibility(std::pair edge) { + std::unordered_set ancestors, descendant; + std::deque queue; + for (VertexIdx succ : gCoarse_.Children(edge.first)) { if (succ != edge.second) { - Queue.push_back(succ); + queue.push_back(succ); descendant.insert(succ); } } - while (!Queue.empty()) { - const vertex_idx node = Queue.front(); - Queue.pop_front(); - for (vertex_idx succ : G_coarse.children(node)) { + while (!queue.empty()) { + const VertexIdx node = queue.front(); + queue.pop_front(); + for (VertexIdx succ : gCoarse_.Children(node)) { if (descendant.count(succ) == 0) { - Queue.push_back(succ); + queue.push_back(succ); descendant.insert(succ); } } } - for (vertex_idx pred : G_coarse.parents(edge.second)) { + for (VertexIdx pred : gCoarse_.Parents(edge.second)) { if (pred != edge.first) { - Queue.push_back(pred); + queue.push_back(pred); ancestors.insert(pred); } } - while (!Queue.empty()) { - const vertex_idx node = Queue.front(); - Queue.pop_front(); - for (vertex_idx pred : G_coarse.parents(node)) { + while (!queue.empty()) { + const VertexIdx node = queue.front(); + queue.pop_front(); + for (VertexIdx pred : gCoarse_.Parents(node)) { if (ancestors.count(pred) == 0) { - Queue.push_back(pred); + queue.push_back(pred); ancestors.insert(pred); } } } - for (const vertex_idx node : ancestors) { - for (const vertex_idx succ : G_coarse.children(node)) { + for (const VertexIdx node : ancestors) { + for (const VertexIdx succ : gCoarse_.Children(node)) { if (descendant.count(succ) > 0) { - contractable.erase(std::make_pair(node, succ)); + contractable_.erase(std::make_pair(node, succ)); } } } } -template -std::vector::EdgeToContract> StepByStepCoarser::CreateEdgeCandidateList() const { +template +std::vector::EdgeToContract> StepByStepCoarser::CreateEdgeCandidateList() const { std::vector candidates; - for (auto it = contractable.cbegin(); it != contractable.cend(); ++it) { - if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(it->first)) { + for (auto it = contractable_.cbegin(); it != contractable_.cend(); ++it) { + if (problemType_ == ProblemType::PEBBLING && IncontractableForPebbling(it->first)) { continue; } candidates.emplace_back( - it->first.first, it->first.second, contains[it->first.first].size() + contains[it->first.second].size(), it->second); + it->first.first, it->first.second, contains_[it->first.first].size() + contains_[it->first.second].size(), it->second); } std::sort(candidates.begin(), candidates.end()); return candidates; } -template -std::pair, vertex_idx_t> StepByStepCoarser::PickEdgeToContract( +template +std::pair, VertexIdxT> StepByStepCoarser::PickEdgeToContract( const std::vector &candidates) const { size_t limit = (candidates.size() + 2) / 3; - v_workw_t limitCardinality = candidates[limit].work_weight; - while (limit < candidates.size() - 1 && candidates[limit + 1].work_weight == limitCardinality) { + VWorkwT limitCardinality = candidates[limit].workWeight_; + while (limit < candidates.size() - 1 && candidates[limit + 1].workWeight_ == limitCardinality) { ++limit; } @@ -527,13 +526,13 @@ std::pair, vertex_idx_t> StepByStepCoarser candidates[best].comm_weight) { + if (candidates[idx].commWeight_ > candidates[best].commWeight_) { best = idx; } } chosen = candidates[best]; - return chosen.edge; + return chosen.edge_; } /** @@ -542,24 +541,24 @@ std::pair, vertex_idx_t> StepByStepCoarser -std::vector, vertex_idx_t>> StepByStepCoarser::ClusterCoarsen() const { - std::vector singleton(G_full.num_vertices(), true); - std::vector leader(G_full.num_vertices()); - std::vector weight(G_full.num_vertices()); - std::vector nrBadNeighbors(G_full.num_vertices()); - std::vector leaderBadNeighbors(G_full.num_vertices()); - - std::vector minTopLevel(G_full.num_vertices()); - std::vector maxTopLevel(G_full.num_vertices()); - std::vector clusterNewID(G_full.num_vertices()); - - std::vector> contractionSteps; +template +std::vector, VertexIdxT>> StepByStepCoarser::ClusterCoarsen() const { + std::vector singleton(gFull_.NumVertices(), true); + std::vector leader(gFull_.NumVertices()); + std::vector weight(gFull_.NumVertices()); + std::vector nrBadNeighbors(gFull_.NumVertices()); + std::vector leaderBadNeighbors(gFull_.NumVertices()); + + std::vector minTopLevel(gFull_.NumVertices()); + std::vector maxTopLevel(gFull_.NumVertices()); + std::vector clusterNewID(gFull_.NumVertices()); + + std::vector> contractionSteps; std::vector topLevel = ComputeFilteredTopLevel(); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - if (node_valid[node]) { + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + if (nodeValid_[node]) { leader[node] = node; - weight[node] = 1 /*G_coarse.vertex_work_weight(node)*/; + weight[node] = 1 /*gCoarse_.vertex_work_weight(node)*/; nrBadNeighbors[node] = 0; leaderBadNeighbors[node] = UINT_MAX; clusterNewID[node] = node; @@ -568,8 +567,8 @@ std::vector, vertex_idx_t>> StepByStepC } } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - if (!node_valid[node] || !singleton[node]) { + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + if (!nodeValid_[node] || !singleton[node]) { continue; } @@ -577,8 +576,8 @@ std::vector, vertex_idx_t>> StepByStepC continue; } - std::vector validNeighbors; - for (vertex_idx pred : G_coarse.parents(node)) { + std::vector validNeighbors; + for (VertexIdx pred : gCoarse_.Parents(node)) { // direct check of condition 1 if (topLevel[node] < maxTopLevel[leader[pred]] - 1 || topLevel[node] > minTopLevel[leader[pred]] + 1) { continue; @@ -593,13 +592,13 @@ std::vector, vertex_idx_t>> StepByStepC } // check viability for pebbling - if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(pred, node))) { + if (problemType_ == ProblemType::PEBBLING && IncontractableForPebbling(std::make_pair(pred, node))) { continue; } validNeighbors.push_back(pred); } - for (vertex_idx succ : G_coarse.children(node)) { + for (VertexIdx succ : gCoarse_.Children(node)) { // direct check of condition 1 if (topLevel[node] < maxTopLevel[leader[succ]] - 1 || topLevel[node] > minTopLevel[leader[succ]] + 1) { continue; @@ -614,36 +613,36 @@ std::vector, vertex_idx_t>> StepByStepC } // check viability for pebbling - if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(node, succ))) { + if (problemType_ == ProblemType::PEBBLING && IncontractableForPebbling(std::make_pair(node, succ))) { continue; } validNeighbors.push_back(succ); } - vertex_idx bestNeighbor = std::numeric_limits::max(); - for (vertex_idx neigh : validNeighbors) { - if (bestNeighbor == std::numeric_limits::max() || weight[leader[neigh]] < weight[leader[bestNeighbor]]) { + VertexIdx bestNeighbor = std::numeric_limits::max(); + for (VertexIdx neigh : validNeighbors) { + if (bestNeighbor == std::numeric_limits::max() || weight[leader[neigh]] < weight[leader[bestNeighbor]]) { bestNeighbor = neigh; } } - if (bestNeighbor == std::numeric_limits::max()) { + if (bestNeighbor == std::numeric_limits::max()) { continue; } - vertex_idx newLead = leader[bestNeighbor]; + VertexIdx newLead = leader[bestNeighbor]; leader[node] = newLead; weight[newLead] += weight[node]; - bool is_parent = false; - for (vertex_idx pred : G_coarse.parents(node)) { + bool isParent = false; + for (VertexIdx pred : gCoarse_.Parents(node)) { if (pred == bestNeighbor) { - is_parent = true; + isParent = true; } } - if (is_parent) { + if (isParent) { contractionSteps.emplace_back(clusterNewID[newLead], node); } else { contractionSteps.emplace_back(node, clusterNewID[newLead]); @@ -653,7 +652,7 @@ std::vector, vertex_idx_t>> StepByStepC minTopLevel[newLead] = std::min(minTopLevel[newLead], topLevel[node]); maxTopLevel[newLead] = std::max(maxTopLevel[newLead], topLevel[node]); - for (vertex_idx pred : G_coarse.parents(node)) { + for (VertexIdx pred : gCoarse_.Parents(node)) { if (std::abs(static_cast(topLevel[pred]) - static_cast(maxTopLevel[newLead])) != 1 && std::abs(static_cast(topLevel[pred]) - static_cast(minTopLevel[newLead])) != 1) { continue; @@ -666,7 +665,7 @@ std::vector, vertex_idx_t>> StepByStepC ++nrBadNeighbors[pred]; } } - for (vertex_idx succ : G_coarse.children(node)) { + for (VertexIdx succ : gCoarse_.Children(node)) { if (std::abs(static_cast(topLevel[succ]) - static_cast(maxTopLevel[newLead])) != 1 && std::abs(static_cast(topLevel[succ]) - static_cast(minTopLevel[newLead])) != 1) { continue; @@ -681,7 +680,7 @@ std::vector, vertex_idx_t>> StepByStepC } if (singleton[bestNeighbor]) { - for (vertex_idx pred : G_coarse.parents(bestNeighbor)) { + for (VertexIdx pred : gCoarse_.Parents(bestNeighbor)) { if (std::abs(static_cast(topLevel[pred]) - static_cast(maxTopLevel[newLead])) != 1 && std::abs(static_cast(topLevel[pred]) - static_cast(minTopLevel[newLead])) != 1) { continue; @@ -694,7 +693,7 @@ std::vector, vertex_idx_t>> StepByStepC ++nrBadNeighbors[pred]; } } - for (vertex_idx succ : G_coarse.children(bestNeighbor)) { + for (VertexIdx succ : gCoarse_.Children(bestNeighbor)) { if (std::abs(static_cast(topLevel[succ]) - static_cast(maxTopLevel[newLead])) != 1 && std::abs(static_cast(topLevel[succ]) - static_cast(minTopLevel[newLead])) != 1) { continue; @@ -715,197 +714,195 @@ std::vector, vertex_idx_t>> StepByStepC return contractionSteps; } -template -std::vector StepByStepCoarser::ComputeFilteredTopLevel() const { - std::vector TopLevel(G_full.num_vertices()); - for (const vertex_idx node : top_sort_view(G_coarse)) { - if (!node_valid[node]) { +template +std::vector StepByStepCoarser::ComputeFilteredTopLevel() const { + std::vector topLevel(gFull_.NumVertices()); + for (const VertexIdx node : TopSortView(gCoarse_)) { + if (!nodeValid_[node]) { continue; } - TopLevel[node] = 0; - for (const vertex_idx pred : G_coarse.parents(node)) { - TopLevel[node] = std::max(TopLevel[node], TopLevel[pred] + 1); + topLevel[node] = 0; + for (const VertexIdx pred : gCoarse_.Parents(node)) { + topLevel[node] = std::max(topLevel[node], topLevel[pred] + 1); } } - return TopLevel; + return topLevel; } -template -void StepByStepCoarser::ComputeFilteredTopOrderIdx() { - top_order_idx = GetFilteredTopOrderIdx(G_coarse, node_valid); +template +void StepByStepCoarser::ComputeFilteredTopOrderIdx() { + topOrderIdx_ = GetFilteredTopOrderIdx(gCoarse_, nodeValid_); } -template -std::vector> StepByStepCoarser::GetFilteredTopOrderIdx(const Graph_t &G, - const std::vector &is_valid) { - std::vector top_order = GetFilteredTopOrder(is_valid, G); - std::vector idx(G.num_vertices()); - for (vertex_idx node = 0; node < top_order.size(); ++node) { - idx[top_order[node]] = node; +template +std::vector> StepByStepCoarser::GetFilteredTopOrderIdx(const GraphT &g, + const std::vector &isValid) { + std::vector topOrder = GetFilteredTopOrder(isValid, g); + std::vector idx(g.NumVertices()); + for (VertexIdx node = 0; node < topOrder.size(); ++node) { + idx[topOrder[node]] = node; } return idx; } -template -void StepByStepCoarser::coarsenForPebbling(const Graph_t &dag_in, - Graph_t &coarsened_dag, - std::vector> &new_vertex_id) { - problem_type = PROBLEM_TYPE::PEBBLING; - coarsening_strategy = COARSENING_STRATEGY::EDGE_BY_EDGE; - - unsigned nr_sources = 0; - for (vertex_idx node = 0; node < dag_in.num_vertices(); ++node) { - if (dag_in.in_degree(node) == 0) { - ++nr_sources; +template +void StepByStepCoarser::CoarsenForPebbling(const GraphT &dagIn, + GraphT &coarsenedDag, + std::vector> &newVertexId) { + problemType_ = ProblemType::PEBBLING; + coarseningStrategy_ = CoarseningStrategy::EDGE_BY_EDGE; + + unsigned nrSources = 0; + for (VertexIdx node = 0; node < dagIn.NumVertices(); ++node) { + if (dagIn.InDegree(node) == 0) { + ++nrSources; } } - target_nr_of_nodes = std::max(target_nr_of_nodes, nr_sources + 1); + targetNrOfNodes_ = std::max(targetNrOfNodes_, nrSources + 1); - CoarserGenContractionMap::coarsenDag(dag_in, coarsened_dag, new_vertex_id); + CoarserGenContractionMap::CoarsenDag(dagIn, coarsenedDag, newVertexId); } -template -bool StepByStepCoarser::IncontractableForPebbling(const std::pair &edge) const { - if (G_coarse.in_degree(edge.first) == 0) { +template +bool StepByStepCoarser::IncontractableForPebbling(const std::pair &edge) const { + if (gCoarse_.InDegree(edge.first) == 0) { return true; } - v_memw_t sum_weight = G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second); - std::set parents; - for (vertex_idx pred : G_coarse.parents(edge.first)) { + VMemwT sumWeight = gCoarse_.VertexMemWeight(edge.first) + gCoarse_.VertexMemWeight(edge.second); + std::set parents; + for (VertexIdx pred : gCoarse_.Parents(edge.first)) { parents.insert(pred); } - for (vertex_idx pred : G_coarse.parents(edge.second)) { + for (VertexIdx pred : gCoarse_.Parents(edge.second)) { if (pred != edge.first) { parents.insert(pred); } } - for (vertex_idx node : parents) { - sum_weight += G_coarse.vertex_mem_weight(node); + for (VertexIdx node : parents) { + sumWeight += gCoarse_.VertexMemWeight(node); } - if (sum_weight > fast_mem_capacity) { + if (sumWeight > fastMemCapacity_) { return true; } - std::set children; - for (vertex_idx succ : G_coarse.children(edge.second)) { + std::set children; + for (VertexIdx succ : gCoarse_.Children(edge.second)) { children.insert(succ); } - for (vertex_idx succ : G_coarse.children(edge.first)) { + for (VertexIdx succ : gCoarse_.Children(edge.first)) { if (succ != edge.second) { children.insert(succ); } } - for (vertex_idx child : children) { - sum_weight = G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second) - + G_coarse.vertex_mem_weight(child); - for (vertex_idx pred : G_coarse.parents(child)) { + for (VertexIdx child : children) { + sumWeight = gCoarse_.VertexMemWeight(edge.first) + gCoarse_.VertexMemWeight(edge.second) + gCoarse_.VertexMemWeight(child); + for (VertexIdx pred : gCoarse_.Parents(child)) { if (pred != edge.first && pred != edge.second) { - sum_weight += G_coarse.vertex_mem_weight(pred); + sumWeight += gCoarse_.VertexMemWeight(pred); } } - if (sum_weight > fast_mem_capacity) { + if (sumWeight > fastMemCapacity_) { return true; } } return false; } -template -void StepByStepCoarser::MergeSourcesInPebbling() { +template +void StepByStepCoarser::MergeSourcesInPebbling() { // initialize memory requirement sums to check viability later - std::vector> memory_sum(G_coarse.num_vertices(), 0); - std::vector sources; - for (vertex_idx node = 0; node < G_coarse.num_vertices(); ++node) { - if (!node_valid[node]) { + std::vector> memorySum(gCoarse_.NumVertices(), 0); + std::vector sources; + for (VertexIdx node = 0; node < gCoarse_.NumVertices(); ++node) { + if (!nodeValid_[node]) { continue; } - if (G_coarse.in_degree(node) > 0) { - memory_sum[node] = G_coarse.vertex_mem_weight(node); - for (vertex_idx pred : G_coarse.parents(node)) { - memory_sum[node] += G_coarse.vertex_mem_weight(pred); + if (gCoarse_.InDegree(node) > 0) { + memorySum[node] = gCoarse_.VertexMemWeight(node); + for (VertexIdx pred : gCoarse_.Parents(node)) { + memorySum[node] += gCoarse_.VertexMemWeight(pred); } } else { sources.push_back(node); } } - std::set invalidated_sources; - bool could_merge = true; - while (could_merge) { - could_merge = false; + std::set invalidatedSources; + bool couldMerge = true; + while (couldMerge) { + couldMerge = false; for (unsigned idx1 = 0; idx1 < sources.size(); ++idx1) { - vertex_idx source_a = sources[idx1]; - if (invalidated_sources.find(source_a) != invalidated_sources.end()) { + VertexIdx sourceA = sources[idx1]; + if (invalidatedSources.find(sourceA) != invalidatedSources.end()) { continue; } for (unsigned idx2 = idx1 + 1; idx2 < sources.size(); ++idx2) { - vertex_idx source_b = sources[idx2]; - if (invalidated_sources.find(source_b) != invalidated_sources.end()) { + VertexIdx sourceB = sources[idx2]; + if (invalidatedSources.find(sourceB) != invalidatedSources.end()) { continue; } - // check if we can merge source_a and source_b - std::set a_children, b_children; - for (vertex_idx succ : G_coarse.children(source_a)) { - a_children.insert(succ); + // check if we can merge sourceA and sourceB + std::set aChildren, bChildren; + for (VertexIdx succ : gCoarse_.Children(sourceA)) { + aChildren.insert(succ); } - for (vertex_idx succ : G_coarse.children(source_b)) { - b_children.insert(succ); + for (VertexIdx succ : gCoarse_.Children(sourceB)) { + bChildren.insert(succ); } - std::set only_a, only_b, both; - for (vertex_idx succ : G_coarse.children(source_a)) { - if (b_children.find(succ) == b_children.end()) { - only_a.insert(succ); + std::set onlyA, onlyB, both; + for (VertexIdx succ : gCoarse_.Children(sourceA)) { + if (bChildren.find(succ) == bChildren.end()) { + onlyA.insert(succ); } else { both.insert(succ); } } - for (vertex_idx succ : G_coarse.children(source_b)) { - if (a_children.find(succ) == a_children.end()) { - only_b.insert(succ); + for (VertexIdx succ : gCoarse_.Children(sourceB)) { + if (aChildren.find(succ) == aChildren.end()) { + onlyB.insert(succ); } } - bool violates_constraint = false; - for (vertex_idx node : only_a) { - if (memory_sum[node] + G_coarse.vertex_mem_weight(source_b) > fast_mem_capacity) { - violates_constraint = true; + bool violatesConstraint = false; + for (VertexIdx node : onlyA) { + if (memorySum[node] + gCoarse_.VertexMemWeight(sourceB) > fastMemCapacity_) { + violatesConstraint = true; } } - for (vertex_idx node : only_b) { - if (memory_sum[node] + G_coarse.vertex_mem_weight(source_a) > fast_mem_capacity) { - violates_constraint = true; + for (VertexIdx node : onlyB) { + if (memorySum[node] + gCoarse_.VertexMemWeight(sourceA) > fastMemCapacity_) { + violatesConstraint = true; } } - if (violates_constraint) { + if (violatesConstraint) { continue; } - // check if we want to merge source_a and source_b - double sim_diff = (only_a.size() + only_b.size() == 0) ? 0.0001 - : static_cast(only_a.size() + only_b.size()); - double ratio = static_cast(both.size()) / sim_diff; + // check if we want to merge sourceA and sourceB + double simDiff = (onlyA.size() + onlyB.size() == 0) ? 0.0001 : static_cast(onlyA.size() + onlyB.size()); + double ratio = static_cast(both.size()) / simDiff; if (ratio > 2) { - ContractSingleEdge(std::make_pair(source_a, source_b)); - invalidated_sources.insert(source_b); - could_merge = true; + ContractSingleEdge(std::make_pair(sourceA, sourceB)); + invalidatedSources.insert(sourceB); + couldMerge = true; - for (vertex_idx node : only_a) { - memory_sum[node] += G_coarse.vertex_mem_weight(source_b); + for (VertexIdx node : onlyA) { + memorySum[node] += gCoarse_.VertexMemWeight(sourceB); } - for (vertex_idx node : only_b) { - memory_sum[node] += G_coarse.vertex_mem_weight(source_a); + for (VertexIdx node : onlyB) { + memorySum[node] += gCoarse_.VertexMemWeight(sourceA); } } } @@ -913,111 +910,111 @@ void StepByStepCoarser::MergeSourcesInPebbling() { } } -template -Graph_t StepByStepCoarser::Contract(const std::vector> &new_vertex_id) const { - Graph_t G_contracted; - std::vector is_valid(G_full.num_vertices(), false); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - is_valid[new_vertex_id[node]] = true; +template +GraphT StepByStepCoarser::Contract(const std::vector> &newVertexId) const { + GraphT gContracted; + std::vector isValid(gFull_.NumVertices(), false); + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + isValid[newVertexId[node]] = true; } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - if (is_valid[node]) { - G_contracted.add_vertex(0, 0, 0, 0); + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + if (isValid[node]) { + gContracted.AddVertex(0, 0, 0, 0); } } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - G_contracted.set_vertex_work_weight( - new_vertex_id[node], G_contracted.vertex_work_weight(new_vertex_id[node]) + G_full.vertex_work_weight(node)); - G_contracted.set_vertex_comm_weight( - new_vertex_id[node], G_contracted.vertex_comm_weight(new_vertex_id[node]) + G_full.vertex_comm_weight(node)); - G_contracted.set_vertex_mem_weight(new_vertex_id[node], - G_contracted.vertex_mem_weight(new_vertex_id[node]) + G_full.vertex_mem_weight(node)); - G_contracted.set_vertex_type(new_vertex_id[node], G_full.vertex_type(node)); + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + gContracted.SetVertexWorkWeight(newVertexId[node], + gContracted.VertexWorkWeight(newVertexId[node]) + gFull_.VertexWorkWeight(node)); + gContracted.SetVertexCommWeight(newVertexId[node], + gContracted.VertexCommWeight(newVertexId[node]) + gFull_.VertexCommWeight(node)); + gContracted.SetVertexMemWeight(newVertexId[node], + gContracted.VertexMemWeight(newVertexId[node]) + gFull_.VertexMemWeight(node)); + gContracted.SetVertexType(newVertexId[node], gFull_.VertexType(node)); } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - for (const auto &out_edge : out_edges(node, G_full)) { - const vertex_idx succ = target(out_edge, G_full); + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + for (const auto &outEdge : OutEdges(node, gFull_)) { + const VertexIdx succ = Target(outEdge, gFull_); - if (new_vertex_id[node] == new_vertex_id[succ]) { + if (newVertexId[node] == newVertexId[succ]) { continue; } - if constexpr (has_edge_weights_v) { - const auto pair = edge_desc(new_vertex_id[node], new_vertex_id[succ], G_contracted); + if constexpr (hasEdgeWeightsV) { + const auto pair = EdgeDesc(newVertexId[node], newVertexId[succ], gContracted); if (pair.second) { - G_contracted.set_edge_comm_weight( - pair.first, G_contracted.edge_comm_weight(pair.first) + G_full.edge_comm_weight(out_edge)); + gContracted.SetEdgeCommWeight(pair.first, + gContracted.EdgeCommWeight(pair.first) + gFull_.EdgeCommWeight(outEdge)); } else { - G_contracted.add_edge(new_vertex_id[node], new_vertex_id[succ], G_full.edge_comm_weight(out_edge)); + gContracted.AddEdge(newVertexId[node], newVertexId[succ], gFull_.EdgeCommWeight(outEdge)); } } else { - if (not edge(new_vertex_id[node], new_vertex_id[succ], G_contracted)) { - G_contracted.add_edge(new_vertex_id[node], new_vertex_id[succ]); + if (not Edge(newVertexId[node], newVertexId[succ], gContracted)) { + gContracted.AddEdge(newVertexId[node], newVertexId[succ]); } } } } - return G_contracted; + return gContracted; } -template -void StepByStepCoarser::SetIdVector(std::vector> &new_vertex_id) const { - new_vertex_id.clear(); - new_vertex_id.resize(G_full.num_vertices()); +template +void StepByStepCoarser::SetIdVector(std::vector> &newVertexId) const { + newVertexId.clear(); + newVertexId.resize(gFull_.NumVertices()); - new_vertex_id = GetIntermediateIDs(contractionHistory.size()); + newVertexId = GetIntermediateIDs(contractionHistory_.size()); } -template -std::vector> StepByStepCoarser::GetIntermediateIDs(vertex_idx until_which_step) const { - std::vector target(G_full.num_vertices()), pointsTo(G_full.num_vertices(), std::numeric_limits::max()); +template +std::vector> StepByStepCoarser::GetIntermediateIDs(VertexIdx untilWhichStep) const { + std::vector target(gFull_.NumVertices()), pointsTo(gFull_.NumVertices(), std::numeric_limits::max()); - for (vertex_idx iterate = 0; iterate < contractionHistory.size() && iterate < until_which_step; ++iterate) { - const std::pair &contractionStep = contractionHistory[iterate]; + for (VertexIdx iterate = 0; iterate < contractionHistory_.size() && iterate < untilWhichStep; ++iterate) { + const std::pair &contractionStep = contractionHistory_[iterate]; pointsTo[contractionStep.second] = contractionStep.first; } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { target[node] = node; - while (pointsTo[target[node]] != std::numeric_limits::max()) { + while (pointsTo[target[node]] != std::numeric_limits::max()) { target[node] = pointsTo[target[node]]; } } - if (contractionHistory.empty() || until_which_step == 0) { + if (contractionHistory_.empty() || untilWhichStep == 0) { return target; } - std::vector is_valid(G_full.num_vertices(), false); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - is_valid[target[node]] = true; + std::vector isValid(gFull_.NumVertices(), false); + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + isValid[target[node]] = true; } - std::vector new_id(G_full.num_vertices()); - vertex_idx current_index = 0; - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - if (is_valid[node]) { - new_id[node] = current_index++; + std::vector newId(gFull_.NumVertices()); + VertexIdx currentIndex = 0; + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + if (isValid[node]) { + newId[node] = currentIndex++; } } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - target[node] = new_id[target[node]]; + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + target[node] = newId[target[node]]; } - boost_graph_t temp_dag; - temp_dag = Contract(target); - std::vector all_valid(temp_dag.num_vertices(), true); - std::vector top_idx = GetFilteredTopOrderIdx(temp_dag, all_valid); + BoostGraphT tempDag; + tempDag = Contract(target); + std::vector allValid(tempDag.NumVertices(), true); + std::vector topIdx = GetFilteredTopOrderIdx(tempDag, allValid); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { - target[node] = top_idx[target[node]]; + for (VertexIdx node = 0; node < gFull_.NumVertices(); ++node) { + target[node] = topIdx[target[node]]; } return target; diff --git a/include/osp/coarser/coarser_util.hpp b/include/osp/coarser/coarser_util.hpp index e2b1fa31..aec1a48e 100644 --- a/include/osp/coarser/coarser_util.hpp +++ b/include/osp/coarser/coarser_util.hpp @@ -34,198 +34,186 @@ limitations under the License. namespace osp { namespace coarser_util { -template -bool check_valid_contraction_map(const std::vector> &vertex_contraction_map) { - std::set> image(vertex_contraction_map.cbegin(), vertex_contraction_map.cend()); - const vertex_idx_t image_size = static_cast>(image.size()); - return std::all_of(image.cbegin(), image.cend(), [image_size](const vertex_idx_t &vert) { - return (vert >= static_cast>(0)) && (vert < image_size); +template +bool CheckValidContractionMap(const std::vector> &vertexContractionMap) { + std::set> image(vertexContractionMap.cbegin(), vertexContractionMap.cend()); + const VertexIdxT imageSize = static_cast>(image.size()); + + return std::all_of(image.cbegin(), image.cend(), [imageSize](const VertexIdxT &vert) { + return (vert >= static_cast>(0)) && (vert < imageSize); }); } template -struct acc_sum { +struct AccSum { T operator()(const T &a, const T &b) { return a + b; } }; template -struct acc_max { +struct AccMax { T operator()(const T &a, const T &b) { return std::max(a, b); } }; /** * @brief Coarsens the input computational DAG into a simplified version. * - * @param dag_in The input computational DAG to be coarsened. It is expected to be a valid graph structure. - * @param coarsened_dag The output computational DAG after coarsening. It will be populated by this method. - * @param vertex_contraction_map Output mapping from dag_in to coarsened_dag. + * @param dagIn The input computational DAG to be coarsened. It is expected to be a valid graph structure. + * @param coarsenedDag The output computational DAG after coarsening. It will be populated by this method. + * @param vertexContractionMap Output mapping from dagIn to coarsenedDag. * @return A status code indicating the success or failure of the coarsening operation. */ -template -bool construct_coarse_dag(const Graph_t_in &dag_in, - Graph_t_out &coarsened_dag, - const std::vector> &vertex_contraction_map) { - static_assert(is_directed_graph_v && is_directed_graph_v, +template +bool ConstructCoarseDag(const GraphTIn &dagIn, + GraphTOut &coarsenedDag, + const std::vector> &vertexContractionMap) { + static_assert(isDirectedGraphV && isDirectedGraphV, "Graph types need to satisfy the is_directed_graph concept."); - static_assert(is_computational_dag_v, "Graph_t_in must be a computational DAG"); - static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, - "Graph_t_out must be a (direct) constructable computational DAG"); + static_assert(isComputationalDagV, "GraphTIn must be a computational DAG"); + static_assert(isConstructableCdagV || isDirectConstructableCdagV, + "GraphTOut must be a (direct) constructable computational DAG"); - assert(check_valid_contraction_map(vertex_contraction_map)); + assert(CheckValidContractionMap(vertexContractionMap)); - if (vertex_contraction_map.size() == 0) { - coarsened_dag = Graph_t_out(); + if (vertexContractionMap.size() == 0) { + coarsenedDag = GraphTOut(); return true; } - if constexpr (is_direct_constructable_cdag_v) { - const vertex_idx_t num_vert_quotient - = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; + if constexpr (isDirectConstructableCdagV) { + const VertexIdxT numVertQuotient + = (*std::max_element(vertexContractionMap.cbegin(), vertexContractionMap.cend())) + 1; - std::set, vertex_idx_t>> quotient_edges; + std::set, VertexIdxT>> quotientEdges; - for (const vertex_idx_t &vert : dag_in.vertices()) { - for (const vertex_idx_t &chld : dag_in.children(vert)) { - if (vertex_contraction_map[vert] == vertex_contraction_map[chld]) { + for (const VertexIdxT &vert : dagIn.Vertices()) { + for (const VertexIdxT &chld : dagIn.Children(vert)) { + if (vertexContractionMap[vert] == vertexContractionMap[chld]) { continue; } - quotient_edges.emplace(vertex_contraction_map[vert], vertex_contraction_map[chld]); + quotientEdges.emplace(vertexContractionMap[vert], vertexContractionMap[chld]); } } - coarsened_dag = Graph_t_out(num_vert_quotient, quotient_edges); + coarsenedDag = GraphTOut(numVertQuotient, quotientEdges); - if constexpr (has_vertex_weights_v && is_modifiable_cdag_vertex_v) { - static_assert(std::is_same_v, v_workw_t>, + if constexpr (hasVertexWeightsV && isModifiableCdagVertexV) { + static_assert(std::is_same_v, VWorkwT>, "Work weight types of in-graph and out-graph must be the same."); - static_assert(std::is_same_v, v_commw_t>, + static_assert(std::is_same_v, VCommwT>, "Vertex communication types of in-graph and out-graph must be the same."); - static_assert(std::is_same_v, v_memw_t>, + static_assert(std::is_same_v, VMemwT>, "Memory weight types of in-graph and out-graph must be the same."); - for (const vertex_idx_t &vert : coarsened_dag.vertices()) { - coarsened_dag.set_vertex_work_weight(vert, 0); - coarsened_dag.set_vertex_comm_weight(vert, 0); - coarsened_dag.set_vertex_mem_weight(vert, 0); + for (const VertexIdxT &vert : coarsenedDag.Vertices()) { + coarsenedDag.SetVertexWorkWeight(vert, 0); + coarsenedDag.SetVertexCommWeight(vert, 0); + coarsenedDag.SetVertexMemWeight(vert, 0); } - for (const vertex_idx_t &vert : dag_in.vertices()) { - coarsened_dag.set_vertex_work_weight( - vertex_contraction_map[vert], - v_work_acc_method()(coarsened_dag.vertex_work_weight(vertex_contraction_map[vert]), - dag_in.vertex_work_weight(vert))); - - coarsened_dag.set_vertex_comm_weight( - vertex_contraction_map[vert], - v_comm_acc_method()(coarsened_dag.vertex_comm_weight(vertex_contraction_map[vert]), - dag_in.vertex_comm_weight(vert))); - - coarsened_dag.set_vertex_mem_weight( - vertex_contraction_map[vert], - v_mem_acc_method()(coarsened_dag.vertex_mem_weight(vertex_contraction_map[vert]), - dag_in.vertex_mem_weight(vert))); + for (const VertexIdxT &vert : dagIn.Vertices()) { + coarsenedDag.SetVertexWorkWeight( + vertexContractionMap[vert], + VWorkAccMethod()(coarsenedDag.VertexWorkWeight(vertexContractionMap[vert]), dagIn.VertexWorkWeight(vert))); + + coarsenedDag.SetVertexCommWeight( + vertexContractionMap[vert], + VCommAccMethod()(coarsenedDag.VertexCommWeight(vertexContractionMap[vert]), dagIn.VertexCommWeight(vert))); + + coarsenedDag.SetVertexMemWeight( + vertexContractionMap[vert], + VMemAccMethod()(coarsenedDag.VertexMemWeight(vertexContractionMap[vert]), dagIn.VertexMemWeight(vert))); } } - if constexpr (has_typed_vertices_v && is_modifiable_cdag_typed_vertex_v) { - static_assert(std::is_same_v, v_type_t>, + if constexpr (hasTypedVerticesV && isModifiableCdagTypedVertexV) { + static_assert(std::is_same_v, VTypeT>, "Vertex type types of in graph and out graph must be the same!"); - for (const vertex_idx_t &vert : dag_in.vertices()) { - coarsened_dag.set_vertex_type(vertex_contraction_map[vert], dag_in.vertex_type(vert)); + for (const VertexIdxT &vert : dagIn.Vertices()) { + coarsenedDag.SetVertexType(vertexContractionMap[vert], dagIn.VertexType(vert)); } - // assert(std::all_of(dag_in.vertices().begin(), dag_in.vertices().end(), - // [&dag_in, &vertex_contraction_map, &coarsened_dag](const auto &vert){ return - // dag_in.vertex_type(vert) == coarsened_dag.vertex_type(vertex_contraction_map[vert]); }) - // && "Contracted vertices must be of the same type"); } - if constexpr (has_edge_weights_v && is_modifiable_cdag_comm_edge_v) { - static_assert(std::is_same_v, e_commw_t>, + if constexpr (hasEdgeWeightsV && isModifiableCdagCommEdgeV) { + static_assert(std::is_same_v, ECommwT>, "Edge weight type of in graph and out graph must be the same!"); - for (const auto &edge : edges(coarsened_dag)) { - coarsened_dag.set_edge_comm_weight(edge, 0); + for (const auto &edge : Edges(coarsenedDag)) { + coarsenedDag.SetEdgeCommWeight(edge, 0); } - for (const auto &ori_edge : edges(dag_in)) { - vertex_idx_t src = vertex_contraction_map[source(ori_edge, dag_in)]; - vertex_idx_t tgt = vertex_contraction_map[target(ori_edge, dag_in)]; + for (const auto &oriEdge : Edges(dagIn)) { + VertexIdxT src = vertexContractionMap[Source(oriEdge, dagIn)]; + VertexIdxT tgt = vertexContractionMap[Target(oriEdge, dagIn)]; if (src == tgt) { continue; } - const auto [cont_edge, found] = edge_desc(src, tgt, coarsened_dag); + const auto [contEdge, found] = EdgeDesc(src, tgt, coarsenedDag); assert(found && "The edge should already exist"); - coarsened_dag.set_edge_comm_weight( - cont_edge, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge))); + coarsenedDag.SetEdgeCommWeight( + contEdge, ECommAccMethod()(coarsenedDag.EdgeCommWeight(contEdge), dagIn.EdgeCommWeight(oriEdge))); } } return true; } - if constexpr (is_constructable_cdag_v) { - coarsened_dag = Graph_t_out(); + if constexpr (isConstructableCdagV) { + coarsenedDag = GraphTOut(); - const vertex_idx_t num_vert_quotient - = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; + const VertexIdxT numVertQuotient + = (*std::max_element(vertexContractionMap.cbegin(), vertexContractionMap.cend())) + 1; - for (vertex_idx_t vert = 0; vert < num_vert_quotient; ++vert) { - coarsened_dag.add_vertex(0, 0, 0); + for (VertexIdxT vert = 0; vert < numVertQuotient; ++vert) { + coarsenedDag.AddVertex(0, 0, 0); } - for (const vertex_idx_t &vert : dag_in.vertices()) { - coarsened_dag.set_vertex_work_weight(vertex_contraction_map[vert], - v_work_acc_method()(coarsened_dag.vertex_work_weight(vertex_contraction_map[vert]), - dag_in.vertex_work_weight(vert))); + for (const VertexIdxT &vert : dagIn.Vertices()) { + coarsenedDag.SetVertexWorkWeight( + vertexContractionMap[vert], + VWorkAccMethod()(coarsenedDag.VertexWorkWeight(vertexContractionMap[vert]), dagIn.VertexWorkWeight(vert))); - coarsened_dag.set_vertex_comm_weight(vertex_contraction_map[vert], - v_comm_acc_method()(coarsened_dag.vertex_comm_weight(vertex_contraction_map[vert]), - dag_in.vertex_comm_weight(vert))); + coarsenedDag.SetVertexCommWeight( + vertexContractionMap[vert], + VCommAccMethod()(coarsenedDag.VertexCommWeight(vertexContractionMap[vert]), dagIn.VertexCommWeight(vert))); - coarsened_dag.set_vertex_mem_weight( - vertex_contraction_map[vert], - v_mem_acc_method()(coarsened_dag.vertex_mem_weight(vertex_contraction_map[vert]), dag_in.vertex_mem_weight(vert))); + coarsenedDag.SetVertexMemWeight( + vertexContractionMap[vert], + VMemAccMethod()(coarsenedDag.VertexMemWeight(vertexContractionMap[vert]), dagIn.VertexMemWeight(vert))); } - if constexpr (has_typed_vertices_v && is_constructable_cdag_typed_vertex_v) { - static_assert(std::is_same_v, v_type_t>, + if constexpr (hasTypedVerticesV && isModifiableCdagTypedVertexV) { + static_assert(std::is_same_v, VTypeT>, "Vertex type types of in graph and out graph must be the same!"); - for (const vertex_idx_t &vert : dag_in.vertices()) { - coarsened_dag.set_vertex_type(vertex_contraction_map[vert], dag_in.vertex_type(vert)); + for (const VertexIdxT &vert : dagIn.Vertices()) { + coarsenedDag.SetVertexType(vertexContractionMap[vert], dagIn.VertexType(vert)); } - // assert(std::all_of(dag_in.vertices().begin(), dag_in.vertices().end(), - // [&dag_in, &vertex_contraction_map, &coarsened_dag](const auto &vert){ return - // dag_in.vertex_type(vert) == coarsened_dag.vertex_type(vertex_contraction_map[vert]); }) - // && "Contracted vertices must be of the same type"); } - for (const vertex_idx_t &vert : dag_in.vertices()) { - for (const vertex_idx_t &chld : dag_in.children(vert)) { - if (vertex_contraction_map[vert] == vertex_contraction_map[chld]) { + for (const VertexIdxT &vert : dagIn.Vertices()) { + for (const VertexIdxT &chld : dagIn.Children(vert)) { + if (vertexContractionMap[vert] == vertexContractionMap[chld]) { continue; } - if constexpr (has_edge_weights_v && is_constructable_cdag_comm_edge_v) { - static_assert(std::is_same_v, e_commw_t>, + if constexpr (hasEdgeWeightsV && isModifiableCdagCommEdgeV) { + static_assert(std::is_same_v, ECommwT>, "Edge weight type of in graph and out graph must be the same!"); - edge_desc_t ori_edge = edge_desc(vert, chld, dag_in).first; - const auto pair = edge_desc(vertex_contraction_map[vert], vertex_contraction_map[chld], coarsened_dag); + EdgeDescT oriEdge = EdgeDesc(vert, chld, dagIn).first; + const auto pair = EdgeDesc(vertexContractionMap[vert], vertexContractionMap[chld], coarsenedDag); if (pair.second) { - coarsened_dag.set_edge_comm_weight( - pair.first, - e_comm_acc_method()(coarsened_dag.edge_comm_weight(pair.first), dag_in.edge_comm_weight(ori_edge))); + coarsenedDag.SetEdgeCommWeight( + pair.first, ECommAccMethod()(coarsenedDag.EdgeCommWeight(pair.first), dagIn.EdgeCommWeight(oriEdge))); } else { - coarsened_dag.add_edge( - vertex_contraction_map[vert], vertex_contraction_map[chld], dag_in.edge_comm_weight(ori_edge)); + coarsenedDag.AddEdge(vertexContractionMap[vert], vertexContractionMap[chld], dagIn.EdgeCommWeight(oriEdge)); } } else { - if (not edge(vertex_contraction_map[vert], vertex_contraction_map[chld], coarsened_dag)) { - coarsened_dag.add_edge(vertex_contraction_map[vert], vertex_contraction_map[chld]); + if (not Edge(vertexContractionMap[vert], vertexContractionMap[chld], coarsenedDag)) { + coarsenedDag.AddEdge(vertexContractionMap[vert], vertexContractionMap[chld]); } } } @@ -235,146 +223,137 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, return false; } -template >, - typename v_comm_acc_method = acc_sum>, - typename v_mem_acc_method = acc_sum>, - typename e_comm_acc_method = acc_sum>> -bool construct_coarse_dag(const Graph_t_in &dag_in, - Graph_t_out &coarsened_dag, - std::vector> &vertex_contraction_map) { - if constexpr (is_Compact_Sparse_Graph_reorder_v) { - static_assert(is_directed_graph_v && is_directed_graph_v, +template >, + typename VCommAccMethod = AccSum>, + typename VMemAccMethod = AccSum>, + typename ECommAccMethod = AccSum>> +bool ConstructCoarseDag(const GraphTIn &dagIn, GraphTOut &coarsenedDag, std::vector> &vertexContractionMap) { + if constexpr (isCompactSparseGraphReorderV) { + static_assert(isDirectedGraphV && isDirectedGraphV, "Graph types need to satisfy the is_directed_graph concept."); - static_assert(is_computational_dag_v, "Graph_t_in must be a computational DAG"); - static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, - "Graph_t_out must be a (direct) constructable computational DAG"); + static_assert(isComputationalDagV, "GraphTIn must be a computational DAG"); + static_assert(isConstructableCdagV || isDirectConstructableCdagV, + "GraphTOut must be a (direct) constructable computational DAG"); - assert(check_valid_contraction_map(vertex_contraction_map)); + assert(CheckValidContractionMap(vertexContractionMap)); - if (vertex_contraction_map.size() == 0) { - coarsened_dag = Graph_t_out(); + if (vertexContractionMap.size() == 0) { + coarsenedDag = GraphTOut(); return true; } - const vertex_idx_t num_vert_quotient - = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; + const VertexIdxT numVertQuotient + = (*std::max_element(vertexContractionMap.cbegin(), vertexContractionMap.cend())) + 1; - std::set, vertex_idx_t>> quotient_edges; + std::set, VertexIdxT>> quotientEdges; - for (const vertex_idx_t &vert : dag_in.vertices()) { - for (const vertex_idx_t &chld : dag_in.children(vert)) { - if (vertex_contraction_map[vert] == vertex_contraction_map[chld]) { + for (const VertexIdxT &vert : dagIn.Vertices()) { + for (const VertexIdxT &chld : dagIn.Children(vert)) { + if (vertexContractionMap[vert] == vertexContractionMap[chld]) { continue; } - quotient_edges.emplace(vertex_contraction_map[vert], vertex_contraction_map[chld]); + quotientEdges.emplace(vertexContractionMap[vert], vertexContractionMap[chld]); } } - coarsened_dag = Graph_t_out(num_vert_quotient, quotient_edges); + coarsenedDag = GraphTOut(numVertQuotient, quotientEdges); - const auto &pushforward_map = coarsened_dag.get_pushforward_permutation(); - std::vector> combined_expansion_map(dag_in.num_vertices()); - for (const auto &vert : dag_in.vertices()) { - combined_expansion_map[vert] = pushforward_map[vertex_contraction_map[vert]]; + const auto &pushforwardMap = coarsenedDag.GetPushforwardPermutation(); + std::vector> combinedExpansionMap(dagIn.NumVertices()); + for (const auto &vert : dagIn.Vertices()) { + combinedExpansionMap[vert] = pushforwardMap[vertexContractionMap[vert]]; } - if constexpr (has_vertex_weights_v && is_modifiable_cdag_vertex_v) { - static_assert(std::is_same_v, v_workw_t>, + if constexpr (hasVertexWeightsV && isModifiableCdagVertexV) { + static_assert(std::is_same_v, VWorkwT>, "Work weight types of in-graph and out-graph must be the same."); - static_assert(std::is_same_v, v_commw_t>, + static_assert(std::is_same_v, VCommwT>, "Vertex communication types of in-graph and out-graph must be the same."); - static_assert(std::is_same_v, v_memw_t>, + static_assert(std::is_same_v, VMemwT>, "Memory weight types of in-graph and out-graph must be the same."); - for (const vertex_idx_t &vert : coarsened_dag.vertices()) { - coarsened_dag.set_vertex_work_weight(vert, 0); - coarsened_dag.set_vertex_comm_weight(vert, 0); - coarsened_dag.set_vertex_mem_weight(vert, 0); + for (const VertexIdxT &vert : coarsenedDag.Vertices()) { + coarsenedDag.SetVertexWorkWeight(vert, 0); + coarsenedDag.SetVertexCommWeight(vert, 0); + coarsenedDag.SetVertexMemWeight(vert, 0); } - for (const vertex_idx_t &vert : dag_in.vertices()) { - coarsened_dag.set_vertex_work_weight( - vertex_contraction_map[vert], - v_work_acc_method()(coarsened_dag.vertex_work_weight(combined_expansion_map[vert]), - dag_in.vertex_work_weight(vert))); - - coarsened_dag.set_vertex_comm_weight( - vertex_contraction_map[vert], - v_comm_acc_method()(coarsened_dag.vertex_comm_weight(combined_expansion_map[vert]), - dag_in.vertex_comm_weight(vert))); - - coarsened_dag.set_vertex_mem_weight( - vertex_contraction_map[vert], - v_mem_acc_method()(coarsened_dag.vertex_mem_weight(combined_expansion_map[vert]), - dag_in.vertex_mem_weight(vert))); + for (const VertexIdxT &vert : dagIn.Vertices()) { + coarsenedDag.SetVertexWorkWeight( + vertexContractionMap[vert], + VWorkAccMethod()(coarsenedDag.VertexWorkWeight(combinedExpansionMap[vert]), dagIn.VertexWorkWeight(vert))); + + coarsenedDag.SetVertexCommWeight( + vertexContractionMap[vert], + VCommAccMethod()(coarsenedDag.VertexCommWeight(combinedExpansionMap[vert]), dagIn.VertexCommWeight(vert))); + + coarsenedDag.SetVertexMemWeight( + vertexContractionMap[vert], + VMemAccMethod()(coarsenedDag.VertexMemWeight(combinedExpansionMap[vert]), dagIn.VertexMemWeight(vert))); } } - if constexpr (has_typed_vertices_v && is_modifiable_cdag_typed_vertex_v) { - static_assert(std::is_same_v, v_type_t>, + if constexpr (hasTypedVerticesV && isModifiableCdagTypedVertexV) { + static_assert(std::is_same_v, VTypeT>, "Vertex type types of in graph and out graph must be the same!"); - for (const vertex_idx_t &vert : dag_in.vertices()) { - coarsened_dag.set_vertex_type(vertex_contraction_map[vert], dag_in.vertex_type(vert)); + for (const VertexIdxT &vert : dagIn.Vertices()) { + coarsenedDag.SetVertexType(vertexContractionMap[vert], dagIn.VertexType(vert)); } - // assert(std::all_of(dag_in.vertices().begin(), dag_in.vertices().end(), - // [&dag_in, &vertex_contraction_map, &coarsened_dag](const auto &vert){ return - // dag_in.vertex_type(vert) == coarsened_dag.vertex_type(vertex_contraction_map[vert]); }) - // && "Contracted vertices must be of the same type"); } - if constexpr (has_edge_weights_v && has_edge_weights_v) { - static_assert(std::is_same_v, e_commw_t>, + if constexpr (hasEdgeWeightsV && hasEdgeWeightsV) { + static_assert(std::is_same_v, ECommwT>, "Edge weight type of in graph and out graph must be the same!"); - for (const auto &ori_edge : edges(dag_in)) { - vertex_idx_t src = vertex_contraction_map[source(ori_edge, dag_in)]; - vertex_idx_t tgt = vertex_contraction_map[target(ori_edge, dag_in)]; + for (const auto &oriEdge : Edges(dagIn)) { + VertexIdxT src = vertexContractionMap[Source(oriEdge, dagIn)]; + VertexIdxT tgt = vertexContractionMap[Target(oriEdge, dagIn)]; if (src == tgt) { continue; } - coarsened_dag.set_edge_comm_weight(src, tgt, 0); + coarsenedDag.SetEdgeCommWeight(src, tgt, 0); } - for (const auto &ori_edge : edges(dag_in)) { - vertex_idx_t src = vertex_contraction_map[source(ori_edge, dag_in)]; - vertex_idx_t tgt = vertex_contraction_map[target(ori_edge, dag_in)]; + for (const auto &oriEdge : Edges(dagIn)) { + VertexIdxT src = vertexContractionMap[Source(oriEdge, dagIn)]; + VertexIdxT tgt = vertexContractionMap[Target(oriEdge, dagIn)]; if (src == tgt) { continue; } - const auto cont_edge = coarsened_dag.edge(pushforward_map[src], pushforward_map[tgt]); - assert(source(cont_edge, coarsened_dag) == pushforward_map[src] - && target(cont_edge, coarsened_dag) == pushforward_map[tgt]); - coarsened_dag.set_edge_comm_weight( - src, tgt, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge))); + const auto contEdge = coarsenedDag.Edge(pushforwardMap[src], pushforwardMap[tgt]); + assert(Source(contEdge, coarsenedDag) == pushforwardMap[src] + && Target(contEdge, coarsenedDag) == pushforwardMap[tgt]); + coarsenedDag.SetEdgeCommWeight( + src, tgt, ECommAccMethod()(coarsenedDag.EdgeCommWeight(contEdge), dagIn.EdgeCommWeight(oriEdge))); } } - std::swap(vertex_contraction_map, combined_expansion_map); + std::swap(vertexContractionMap, combinedExpansionMap); return true; } else { - return construct_coarse_dag( - dag_in, coarsened_dag, static_cast> &>(vertex_contraction_map)); + return ConstructCoarseDag( + dagIn, coarsenedDag, static_cast> &>(vertexContractionMap)); } } -template -bool check_valid_expansion_map(const std::vector>> &vertex_expansion_map) { +template +bool CheckValidExpansionMap(const std::vector>> &vertexExpansionMap) { std::size_t cntr = 0; std::vector preImage; - for (const std::vector> &group : vertex_expansion_map) { + for (const std::vector> &group : vertexExpansionMap) { if (group.size() == 0) { return false; } - for (const vertex_idx_t vert : group) { - if (vert < static_cast>(0)) { + for (const VertexIdxT vert : group) { + if (vert < static_cast>(0)) { return false; } @@ -394,141 +373,138 @@ bool check_valid_expansion_map(const std::vector -std::vector>> invert_vertex_contraction_map( - const std::vector> &vertex_contraction_map) { - assert(check_valid_contraction_map(vertex_contraction_map)); +template +std::vector>> InvertVertexContractionMap( + const std::vector> &vertexContractionMap) { + assert(CheckValidContractionMap(vertexContractionMap)); - vertex_idx_t num_vert - = vertex_contraction_map.size() == 0 - ? 0 - : *std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend()) + 1; + VertexIdxT numVert + = vertexContractionMap.size() == 0 ? 0 : *std::max_element(vertexContractionMap.cbegin(), vertexContractionMap.cend()) + 1; - std::vector>> expansion_map(num_vert); + std::vector>> expansionMap(numVert); - for (std::size_t i = 0; i < vertex_contraction_map.size(); ++i) { - expansion_map[vertex_contraction_map[i]].push_back(i); + for (std::size_t i = 0; i < vertexContractionMap.size(); ++i) { + expansionMap[vertexContractionMap[i]].push_back(i); } - return expansion_map; + return expansionMap; } -template -std::vector> invert_vertex_expansion_map( - const std::vector>> &vertex_expansion_map) { - assert(check_valid_expansion_map(vertex_expansion_map)); +template +std::vector> InvertVertexExpansionMap(const std::vector>> &vertexExpansionMap) { + assert(CheckValidExpansionMap(vertexExpansionMap)); - vertex_idx_t num_vert = 0; - for (const auto &group : vertex_expansion_map) { - for (const vertex_idx_t &vert : group) { - num_vert = std::max(num_vert, vert + 1); + VertexIdxT numVert = 0; + for (const auto &group : vertexExpansionMap) { + for (const VertexIdxT &vert : group) { + numVert = std::max(numVert, vert + 1); } } - std::vector> vertex_contraction_map(num_vert); - for (std::size_t i = 0; i < vertex_expansion_map.size(); i++) { - for (const vertex_idx_t &vert : vertex_expansion_map[i]) { - vertex_contraction_map[vert] = static_cast>(i); + std::vector> vertexContractionMap(numVert); + for (std::size_t i = 0; i < vertexExpansionMap.size(); i++) { + for (const VertexIdxT &vert : vertexExpansionMap[i]) { + vertexContractionMap[vert] = static_cast>(i); } } - return vertex_contraction_map; + return vertexContractionMap; } -template -void reorder_expansion_map(const Graph_t_in &graph, std::vector>> &vertex_expansion_map) { - assert(check_valid_expansion_map(vertex_expansion_map)); +template +void ReorderExpansionMap(const GraphTIn &graph, std::vector>> &vertexExpansionMap) { + assert(CheckValidExpansionMap(vertexExpansionMap)); - std::vector vertex_contraction_map(graph.num_vertices()); - for (std::size_t i = 0; i < vertex_expansion_map.size(); i++) { - for (const vertex_idx_t &vert : vertex_expansion_map[i]) { - vertex_contraction_map[vert] = i; + std::vector vertexContractionMap(graph.NumVertices()); + for (std::size_t i = 0; i < vertexExpansionMap.size(); i++) { + for (const VertexIdxT &vert : vertexExpansionMap[i]) { + vertexContractionMap[vert] = i; } } - std::vector prec(vertex_expansion_map.size(), 0); - for (const auto &vert : graph.vertices()) { - for (const auto &par : graph.parents(vert)) { - if (vertex_contraction_map.at(par) != vertex_contraction_map.at(vert)) { - prec[vertex_contraction_map.at(vert)] += 1; + std::vector prec(vertexExpansionMap.size(), 0); + for (const auto &vert : graph.Vertices()) { + for (const auto &par : graph.Parents(vert)) { + if (vertexContractionMap.at(par) != vertexContractionMap.at(vert)) { + prec[vertexContractionMap.at(vert)] += 1; } } } - for (auto &comp : vertex_expansion_map) { + for (auto &comp : vertexExpansionMap) { std::nth_element(comp.begin(), comp.begin(), comp.end()); } - auto cmp = [&vertex_expansion_map](const std::size_t &lhs, const std::size_t &rhs) { - return vertex_expansion_map[lhs] > vertex_expansion_map[rhs]; // because priority queue is a max_priority queue + auto cmp = [&vertexExpansionMap](const std::size_t &lhs, const std::size_t &rhs) { + return vertexExpansionMap[lhs] > vertexExpansionMap[rhs]; // because priority queue is a max_priority queue }; std::priority_queue, decltype(cmp)> ready(cmp); std::vector topOrder; - topOrder.reserve(vertex_expansion_map.size()); - for (std::size_t i = 0; i < vertex_expansion_map.size(); ++i) { + topOrder.reserve(vertexExpansionMap.size()); + for (std::size_t i = 0; i < vertexExpansionMap.size(); ++i) { if (prec[i] == 0) { ready.emplace(i); } } while (!ready.empty()) { - const std::size_t next_group = ready.top(); + const std::size_t nextGroup = ready.top(); ready.pop(); - topOrder.emplace_back(next_group); - - for (const auto &vert : vertex_expansion_map[next_group]) { - for (const auto &chld : graph.children(vert)) { - if (vertex_contraction_map.at(vert) != vertex_contraction_map.at(chld)) { - prec[vertex_contraction_map.at(chld)] -= 1; - if (prec[vertex_contraction_map.at(chld)] == 0) { - ready.emplace(vertex_contraction_map.at(chld)); + topOrder.emplace_back(nextGroup); + + for (const auto &vert : vertexExpansionMap[nextGroup]) { + for (const auto &chld : graph.Children(vert)) { + if (vertexContractionMap.at(vert) != vertexContractionMap.at(chld)) { + prec[vertexContractionMap.at(chld)] -= 1; + if (prec[vertexContractionMap.at(chld)] == 0) { + ready.emplace(vertexContractionMap.at(chld)); } } } } } - assert(topOrder.size() == vertex_expansion_map.size()); + assert(topOrder.size() == vertexExpansionMap.size()); - inverse_permute_inplace(vertex_expansion_map, topOrder); + InversePermuteInplace(vertexExpansionMap, topOrder); return; } -template -bool pull_back_schedule(const BspSchedule &schedule_in, - const std::vector>> &vertex_map, - BspSchedule &schedule_out) { - for (unsigned v = 0; v < vertex_map.size(); ++v) { - const auto proc = schedule_in.assignedProcessor(v); - const auto step = schedule_in.assignedSuperstep(v); - - for (const auto &u : vertex_map[v]) { - schedule_out.setAssignedSuperstep(u, step); - schedule_out.setAssignedProcessor(u, proc); +template +bool PullBackSchedule(const BspSchedule &scheduleIn, + const std::vector>> &vertexMap, + BspSchedule &scheduleOut) { + for (unsigned v = 0; v < vertexMap.size(); ++v) { + const auto proc = scheduleIn.AssignedProcessor(v); + const auto step = scheduleIn.AssignedSuperstep(v); + + for (const auto &u : vertexMap[v]) { + scheduleOut.SetAssignedSuperstep(u, step); + scheduleOut.SetAssignedProcessor(u, proc); } } return true; } -template -bool pull_back_schedule(const BspSchedule &schedule_in, - const std::vector> &reverse_vertex_map, - BspSchedule &schedule_out) { - for (unsigned idx = 0; idx < reverse_vertex_map.size(); ++idx) { - const auto &v = reverse_vertex_map[idx]; +template +bool PullBackSchedule(const BspSchedule &scheduleIn, + const std::vector> &reverseVertexMap, + BspSchedule &scheduleOut) { + for (unsigned idx = 0; idx < reverseVertexMap.size(); ++idx) { + const auto &v = reverseVertexMap[idx]; - schedule_out.setAssignedSuperstep(idx, schedule_in.assignedSuperstep(v)); - schedule_out.setAssignedProcessor(idx, schedule_in.assignedProcessor(v)); + scheduleOut.SetAssignedSuperstep(idx, scheduleIn.AssignedSuperstep(v)); + scheduleOut.SetAssignedProcessor(idx, scheduleIn.AssignedProcessor(v)); } return true; } template -std::vector compose_vertex_contraction_map(const std::vector &firstMap, - const std::vector &secondMap) { +std::vector ComposeVertexContractionMap(const std::vector &firstMap, + const std::vector &secondMap) { static_assert(std::is_integral_v); std::vector composedMap(firstMap.size()); diff --git a/include/osp/coarser/funnel/FunnelBfs.hpp b/include/osp/coarser/funnel/FunnelBfs.hpp index 07ba93d0..1ebdbcb5 100644 --- a/include/osp/coarser/funnel/FunnelBfs.hpp +++ b/include/osp/coarser/funnel/FunnelBfs.hpp @@ -29,168 +29,168 @@ namespace osp { * (from outside the group) * */ -template -class FunnelBfs : public CoarserGenExpansionMap { +template +class FunnelBfs : public CoarserGenExpansionMap { public: /** * @brief Parameters for Funnel coarsener * */ - struct FunnelBfs_parameters { - bool funnel_incoming; + struct FunnelBfsParameters { + bool funnelIncoming_; - bool use_approx_transitive_reduction; + bool useApproxTransitiveReduction_; - v_workw_t max_work_weight; - v_memw_t max_memory_weight; + VWorkwT maxWorkWeight_; + VMemwT maxMemoryWeight_; - unsigned max_depth; + unsigned maxDepth_; - FunnelBfs_parameters(v_workw_t max_work_weight_ = std::numeric_limits>::max(), - v_memw_t max_memory_weight_ = std::numeric_limits>::max(), - unsigned max_depth_ = std::numeric_limits::max(), - bool funnel_incoming_ = true, - bool use_approx_transitive_reduction_ = true) - : funnel_incoming(funnel_incoming_), - use_approx_transitive_reduction(use_approx_transitive_reduction_), - max_work_weight(max_work_weight_), - max_memory_weight(max_memory_weight_), - max_depth(max_depth_) {}; + FunnelBfsParameters(VWorkwT maxWorkWeight = std::numeric_limits>::max(), + VMemwT maxMemoryWeight = std::numeric_limits>::max(), + unsigned maxDepth = std::numeric_limits::max(), + bool funnelIncoming = true, + bool useApproxTransitiveReduction = true) + : funnelIncoming_(funnelIncoming), + useApproxTransitiveReduction_(useApproxTransitiveReduction), + maxWorkWeight_(maxWorkWeight), + maxMemoryWeight_(maxMemoryWeight), + maxDepth_(maxDepth) {}; - ~FunnelBfs_parameters() = default; + ~FunnelBfsParameters() = default; }; - FunnelBfs(FunnelBfs_parameters parameters_ = FunnelBfs_parameters()) : parameters(parameters_) {} + FunnelBfs(FunnelBfsParameters parameters = FunnelBfsParameters()) : parameters_(parameters) {} virtual ~FunnelBfs() = default; - virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &graph) override { - if constexpr (use_architecture_memory_contraints) { - if (max_memory_per_vertex_type.size() < graph.num_vertex_types()) { + virtual std::vector>> GenerateVertexExpansionMap(const GraphTIn &graph) override { + if constexpr (useArchitectureMemoryContraints) { + if (maxMemoryPerVertexType_.size() < graph.NumVertexTypes()) { throw std::runtime_error("FunnelBfs: max_memory_per_vertex_type has insufficient size."); } } - std::vector>> partition; + std::vector>> partition; - if (parameters.funnel_incoming) { - run_in_contraction(graph, partition); + if (parameters_.funnelIncoming_) { + RunInContraction(graph, partition); } else { - run_out_contraction(graph, partition); + RunOutContraction(graph, partition); } return partition; } - std::string getCoarserName() const override { return "FunnelBfs"; } + std::string GetCoarserName() const override { return "FunnelBfs"; } - std::vector> &get_max_memory_per_vertex_type() { return max_memory_per_vertex_type; } + std::vector> &GetMaxMemoryPerVertexType() { return maxMemoryPerVertexType_; } private: - FunnelBfs_parameters parameters; + FunnelBfsParameters parameters_; - std::vector> max_memory_per_vertex_type; + std::vector> maxMemoryPerVertexType_; - void run_in_contraction(const Graph_t_in &graph, std::vector>> &partition) { - using vertex_idx_t = vertex_idx_t; + void RunInContraction(const GraphTIn &graph, std::vector>> &partition) { + using VertexIdxT = VertexIdxT; - const std::unordered_set> edge_mask = parameters.use_approx_transitive_reduction - ? long_edges_in_triangles_parallel(graph) - : std::unordered_set>(); + const std::unordered_set> edgeMask = parameters_.useApproxTransitiveReduction_ + ? LongEdgesInTrianglesParallel(graph) + : std::unordered_set>(); - std::vector visited(graph.num_vertices(), false); + std::vector visited(graph.NumVertices(), false); - const std::vector top_order = GetTopOrder(graph); + const std::vector topOrder = GetTopOrder(graph); - for (auto rev_top_it = top_order.rbegin(); rev_top_it != top_order.crend(); rev_top_it++) { - const vertex_idx_t &bottom_node = *rev_top_it; + for (auto revTopIt = topOrder.rbegin(); revTopIt != topOrder.crend(); revTopIt++) { + const VertexIdxT &bottomNode = *revTopIt; - if (visited[bottom_node]) { + if (visited[bottomNode]) { continue; } - v_workw_t work_weight_of_group = 0; - v_memw_t memory_weight_of_group = 0; + VWorkwT workWeightOfGroup = 0; + VMemwT memoryWeightOfGroup = 0; - std::unordered_map children_not_in_group; - std::vector group; + std::unordered_map childrenNotInGroup; + std::vector group; - std::deque vertex_processing_fifo({bottom_node}); - std::deque next_vertex_processing_fifo; + std::deque vertexProcessingFifo({bottomNode}); + std::deque nextVertexProcessingFifo; - unsigned depth_counter = 0; + unsigned depthCounter = 0; - while ((not vertex_processing_fifo.empty()) || (not next_vertex_processing_fifo.empty())) { - if (vertex_processing_fifo.empty()) { - vertex_processing_fifo = next_vertex_processing_fifo; - next_vertex_processing_fifo.clear(); - depth_counter++; - if (depth_counter > parameters.max_depth) { + while ((not vertexProcessingFifo.empty()) || (not nextVertexProcessingFifo.empty())) { + if (vertexProcessingFifo.empty()) { + vertexProcessingFifo = nextVertexProcessingFifo; + nextVertexProcessingFifo.clear(); + depthCounter++; + if (depthCounter > parameters_.maxDepth_) { break; } } - vertex_idx_t active_node = vertex_processing_fifo.front(); - vertex_processing_fifo.pop_front(); + VertexIdxT activeNode = vertexProcessingFifo.front(); + vertexProcessingFifo.pop_front(); - if (graph.vertex_type(active_node) != graph.vertex_type(bottom_node)) { + if (graph.VertexType(activeNode) != graph.VertexType(bottomNode)) { continue; } - if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight) { + if (workWeightOfGroup + graph.VertexWorkWeight(activeNode) > parameters_.maxWorkWeight_) { continue; } - if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight) { + if (memoryWeightOfGroup + graph.VertexMemWeight(activeNode) > parameters_.maxMemoryWeight_) { continue; } - if constexpr (use_architecture_memory_contraints) { - if (memory_weight_of_group + graph.vertex_mem_weight(active_node) - > max_memory_per_vertex_type[graph.vertex_type(bottom_node)]) { + if constexpr (useArchitectureMemoryContraints) { + if (memoryWeightOfGroup + graph.VertexMemWeight(activeNode) + > maxMemoryPerVertexType_[graph.VertexType(bottomNode)]) { continue; } } - group.emplace_back(active_node); - work_weight_of_group += graph.vertex_work_weight(active_node); - memory_weight_of_group += graph.vertex_mem_weight(active_node); + group.emplace_back(activeNode); + workWeightOfGroup += graph.VertexWorkWeight(activeNode); + memoryWeightOfGroup += graph.VertexMemWeight(activeNode); - for (const auto &in_edge : in_edges(active_node, graph)) { - if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend())) { + for (const auto &inEdge : InEdges(activeNode, graph)) { + if (parameters_.useApproxTransitiveReduction_ && (edgeMask.find(inEdge) != edgeMask.cend())) { continue; } - const vertex_idx_t &par = source(in_edge, graph); + const VertexIdxT &par = Source(inEdge, graph); - if (children_not_in_group.find(par) != children_not_in_group.cend()) { - children_not_in_group[par] -= 1; + if (childrenNotInGroup.find(par) != childrenNotInGroup.cend()) { + childrenNotInGroup[par] -= 1; } else { - if (parameters.use_approx_transitive_reduction) { - children_not_in_group[par] = 0; + if (parameters_.useApproxTransitiveReduction_) { + childrenNotInGroup[par] = 0; - for (const auto out_edge : out_edges(par, graph)) { - if (edge_mask.find(out_edge) != edge_mask.cend()) { + for (const auto outEdge : OutEdges(par, graph)) { + if (edgeMask.find(outEdge) != edgeMask.cend()) { continue; } - children_not_in_group[par] += 1; + childrenNotInGroup[par] += 1; } } else { - children_not_in_group[par] = graph.out_degree(par); + childrenNotInGroup[par] = graph.OutDegree(par); } - children_not_in_group[par] -= 1; + childrenNotInGroup[par] -= 1; } } - for (const auto &in_edge : in_edges(active_node, graph)) { - if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend())) { + for (const auto &inEdge : InEdges(activeNode, graph)) { + if (parameters_.useApproxTransitiveReduction_ && (edgeMask.find(inEdge) != edgeMask.cend())) { continue; } - const vertex_idx_t &par = source(in_edge, graph); - if (children_not_in_group[par] == 0) { - next_vertex_processing_fifo.emplace_back(par); + const VertexIdxT &par = Source(inEdge, graph); + if (childrenNotInGroup[par] == 0) { + nextVertexProcessingFifo.emplace_back(par); } } } @@ -203,102 +203,102 @@ class FunnelBfs : public CoarserGenExpansionMap { } } - void run_out_contraction(const Graph_t_in &graph, std::vector>> &partition) { - using vertex_idx_t = vertex_idx_t; + void RunOutContraction(const GraphTIn &graph, std::vector>> &partition) { + using VertexIdxT = VertexIdxT; - const std::unordered_set> edge_mask = parameters.use_approx_transitive_reduction - ? long_edges_in_triangles_parallel(graph) - : std::unordered_set>(); + const std::unordered_set> edgeMask = parameters_.useApproxTransitiveReduction_ + ? LongEdgesInTrianglesParallel(graph) + : std::unordered_set>(); - std::vector visited(graph.num_vertices(), false); + std::vector visited(graph.NumVertices(), false); - for (const auto &top_node : top_sort_view(graph)) { - if (visited[top_node]) { + for (const auto &topNode : TopSortView(graph)) { + if (visited[topNode]) { continue; } - v_workw_t work_weight_of_group = 0; - v_memw_t memory_weight_of_group = 0; + VWorkwT workWeightOfGroup = 0; + VMemwT memoryWeightOfGroup = 0; - std::unordered_map parents_not_in_group; - std::vector group; + std::unordered_map parentsNotInGroup; + std::vector group; - std::deque vertex_processing_fifo({top_node}); - std::deque next_vertex_processing_fifo; + std::deque vertexProcessingFifo({topNode}); + std::deque nextVertexProcessingFifo; - unsigned depth_counter = 0; + unsigned depthCounter = 0; - while ((not vertex_processing_fifo.empty()) || (not next_vertex_processing_fifo.empty())) { - if (vertex_processing_fifo.empty()) { - vertex_processing_fifo = next_vertex_processing_fifo; - next_vertex_processing_fifo.clear(); - depth_counter++; - if (depth_counter > parameters.max_depth) { + while ((not vertexProcessingFifo.empty()) || (not nextVertexProcessingFifo.empty())) { + if (vertexProcessingFifo.empty()) { + vertexProcessingFifo = nextVertexProcessingFifo; + nextVertexProcessingFifo.clear(); + depthCounter++; + if (depthCounter > parameters_.maxDepth_) { break; } } - vertex_idx_t active_node = vertex_processing_fifo.front(); - vertex_processing_fifo.pop_front(); + VertexIdxT activeNode = vertexProcessingFifo.front(); + vertexProcessingFifo.pop_front(); - if (graph.vertex_type(active_node) != graph.vertex_type(top_node)) { + if (graph.VertexType(activeNode) != graph.VertexType(topNode)) { continue; } - if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight) { + if (workWeightOfGroup + graph.VertexWorkWeight(activeNode) > parameters_.maxWorkWeight_) { continue; } - if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight) { + if (memoryWeightOfGroup + graph.VertexMemWeight(activeNode) > parameters_.maxMemoryWeight_) { continue; } - if constexpr (use_architecture_memory_contraints) { - if (memory_weight_of_group + graph.vertex_mem_weight(active_node) - > max_memory_per_vertex_type[graph.vertex_type(top_node)]) { + if constexpr (useArchitectureMemoryContraints) { + if (memoryWeightOfGroup + graph.VertexMemWeight(activeNode) + > maxMemoryPerVertexType_[graph.VertexType(topNode)]) { continue; } } - group.emplace_back(active_node); - work_weight_of_group += graph.vertex_work_weight(active_node); - memory_weight_of_group += graph.vertex_mem_weight(active_node); + group.emplace_back(activeNode); + workWeightOfGroup += graph.VertexWorkWeight(activeNode); + memoryWeightOfGroup += graph.VertexMemWeight(activeNode); - for (const auto &out_edge : out_edges(active_node, graph)) { - if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend())) { + for (const auto &outEdge : OutEdges(activeNode, graph)) { + if (parameters_.useApproxTransitiveReduction_ && (edgeMask.find(outEdge) != edgeMask.cend())) { continue; } - const vertex_idx_t &child = target(out_edge, graph); + const VertexIdxT &child = Target(outEdge, graph); - if (parents_not_in_group.find(child) != parents_not_in_group.cend()) { - parents_not_in_group[child] -= 1; + if (parentsNotInGroup.find(child) != parentsNotInGroup.cend()) { + parentsNotInGroup[child] -= 1; } else { - if (parameters.use_approx_transitive_reduction) { - parents_not_in_group[child] = 0; + if (parameters_.useApproxTransitiveReduction_) { + parentsNotInGroup[child] = 0; - for (const auto in_edge : in_edges(child, graph)) { - if (edge_mask.find(in_edge) != edge_mask.cend()) { + for (const auto inEdge : InEdges(child, graph)) { + if (edgeMask.find(inEdge) != edgeMask.cend()) { continue; } - parents_not_in_group[child] += 1; + parentsNotInGroup[child] += 1; } } else { - parents_not_in_group[child] = graph.in_degree(child); + parentsNotInGroup[child] = graph.InDegree(child); } - parents_not_in_group[child] -= 1; + parentsNotInGroup[child] -= 1; } } - for (const auto &out_edge : out_edges(active_node, graph)) { - if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend())) { + for (const auto &outEdge : OutEdges(activeNode, graph)) { + if (parameters_.useApproxTransitiveReduction_ && (edgeMask.find(outEdge) != edgeMask.cend())) { continue; } - const vertex_idx_t &child = target(out_edge, graph); - if (parents_not_in_group[child] == 0) { - next_vertex_processing_fifo.emplace_back(child); + const VertexIdxT &child = Target(outEdge, graph); + if (parentsNotInGroup[child] == 0) { + nextVertexProcessingFifo.emplace_back(child); } } } diff --git a/include/osp/coarser/hdagg/hdagg_coarser.hpp b/include/osp/coarser/hdagg/hdagg_coarser.hpp index 7d206187..d319a4da 100644 --- a/include/osp/coarser/hdagg/hdagg_coarser.hpp +++ b/include/osp/coarser/hdagg/hdagg_coarser.hpp @@ -26,163 +26,163 @@ limitations under the License. namespace osp { -template -class hdagg_coarser : public CoarserGenContractionMap { - static_assert(is_directed_graph_edge_desc_v, "Graph_t_in must satisfy the directed_graph edge desc concept"); - static_assert(has_hashable_edge_desc_v, "Graph_t_in must satisfy the has_hashable_edge_desc concept"); - static_assert(has_typed_vertices_v, "Graph_t_in must have typed vertices"); +template +class HdaggCoarser : public CoarserGenContractionMap { + static_assert(isDirectedGraphEdgeDescV, "GraphTIn must satisfy the directed_graph edge desc concept"); + static_assert(hasHashableEdgeDescV, "GraphTIn must satisfy the HasHashableEdgeDesc concept"); + static_assert(hasTypedVerticesV, "GraphTIn must have typed vertices"); private: - using VertexType_in = vertex_idx_t; - using VertexType_out = vertex_idx_t; + using VertexTypeIn = VertexIdxT; + using VertexTypeOut = VertexIdxT; protected: - v_workw_t work_threshold = std::numeric_limits>::max(); - v_memw_t memory_threshold = std::numeric_limits>::max(); - v_commw_t communication_threshold = std::numeric_limits>::max(); + VWorkwT workThreshold_ = std::numeric_limits>::max(); + VMemwT memoryThreshold_ = std::numeric_limits>::max(); + VCommwT communicationThreshold_ = std::numeric_limits>::max(); - std::size_t super_node_size_threshold = std::numeric_limits::max(); + std::size_t superNodeSizeThreshold_ = std::numeric_limits::max(); - // MEMORY_CONSTRAINT_TYPE memory_constraint_type = NONE; + // MemoryConstraintType memory_constraint_type = NONE; // internal data strauctures - v_memw_t current_memory = 0; - v_workw_t current_work = 0; - v_commw_t current_communication = 0; - VertexType_out current_super_node_idx = 0; - v_type_t current_v_type = 0; - - void add_new_super_node(const Graph_t_in &dag_in, VertexType_in node) { - v_memw_t node_mem = dag_in.vertex_mem_weight(node); - - current_memory = node_mem; - current_work = dag_in.vertex_work_weight(node); - current_communication = dag_in.vertex_comm_weight(node); - current_v_type = dag_in.vertex_type(node); + VMemwT currentMemory_ = 0; + VWorkwT currentWork_ = 0; + VCommwT currentCommunication_ = 0; + VertexTypeOut currentSuperNodeIdx_ = 0; + VTypeT currentVType_ = 0; + + void AddNewSuperNode(const GraphTIn &dagIn, VertexTypeIn node) { + VMemwT nodeMem = dagIn.VertexMemWeight(node); + + currentMemory_ = nodeMem; + currentWork_ = dagIn.VertexWorkWeight(node); + currentCommunication_ = dagIn.VertexCommWeight(node); + currentVType_ = dagIn.VertexType(node); } public: - hdagg_coarser() {}; + HdaggCoarser() {}; - virtual ~hdagg_coarser() = default; + virtual ~HdaggCoarser() = default; - virtual std::string getCoarserName() const override { return "hdagg_coarser"; }; + virtual std::string GetCoarserName() const override { return "hdagg_coarser"; }; - virtual std::vector> generate_vertex_contraction_map(const Graph_t_in &dag_in) override { - std::vector visited(dag_in.num_vertices(), false); - std::vector reverse_vertex_map(dag_in.num_vertices()); + virtual std::vector> GenerateVertexContractionMap(const GraphTIn &dagIn) override { + std::vector visited(dagIn.NumVertices(), false); + std::vector reverseVertexMap(dagIn.NumVertices()); - std::vector> vertex_map; + std::vector> vertexMap; - auto edge_mask = long_edges_in_triangles(dag_in); - const auto edge_mast_end = edge_mask.cend(); + auto edgeMask = LongEdgesInTriangles(dagIn); + const auto edgeMastEnd = edgeMask.cend(); - for (const auto &sink : sink_vertices_view(dag_in)) { - vertex_map.push_back(std::vector({sink})); + for (const auto &sink : SinkVerticesView(dagIn)) { + vertexMap.push_back(std::vector({sink})); } - std::size_t part_ind = 0; - std::size_t partition_size = vertex_map.size(); - while (part_ind < partition_size) { - std::size_t vert_ind = 0; - std::size_t part_size = vertex_map[part_ind].size(); + std::size_t partInd = 0; + std::size_t partitionSize = vertexMap.size(); + while (partInd < partitionSize) { + std::size_t vertInd = 0; + std::size_t partSize = vertexMap[partInd].size(); - add_new_super_node(dag_in, vertex_map[part_ind][vert_ind]); + AddNewSuperNode(dagIn, vertexMap[partInd][vertInd]); - while (vert_ind < part_size) { - const VertexType_in vert = vertex_map[part_ind][vert_ind]; - reverse_vertex_map[vert] = current_super_node_idx; - bool indegree_one = true; + while (vertInd < partSize) { + const VertexTypeIn vert = vertexMap[partInd][vertInd]; + reverseVertexMap[vert] = currentSuperNodeIdx_; + bool indegreeOne = true; - for (const auto &in_edge : in_edges(vert, dag_in)) { - if (edge_mask.find(in_edge) != edge_mast_end) { + for (const auto &inEdge : InEdges(vert, dagIn)) { + if (edgeMask.find(inEdge) != edgeMastEnd) { continue; } unsigned count = 0; - for (const auto &out_edge : out_edges(source(in_edge, dag_in), dag_in)) { - if (edge_mask.find(out_edge) != edge_mast_end) { + for (const auto &outEdge : OutEdges(Source(inEdge, dagIn), dagIn)) { + if (edgeMask.find(outEdge) != edgeMastEnd) { continue; } count++; if (count > 1) { - indegree_one = false; + indegreeOne = false; break; } } - if (not indegree_one) { + if (not indegreeOne) { break; } } - if (indegree_one) { - for (const auto &in_edge : in_edges(vert, dag_in)) { - if (edge_mask.find(in_edge) != edge_mast_end) { + if (indegreeOne) { + for (const auto &inEdge : InEdges(vert, dagIn)) { + if (edgeMask.find(inEdge) != edgeMastEnd) { continue; } - const auto &edge_source = source(in_edge, dag_in); + const auto &edgeSource = Source(inEdge, dagIn); - v_memw_t node_mem = dag_in.vertex_mem_weight(edge_source); + VMemwT nodeMem = dagIn.VertexMemWeight(edgeSource); - if (((current_memory + node_mem > memory_threshold) - || (current_work + dag_in.vertex_work_weight(edge_source) > work_threshold) - || (vertex_map[part_ind].size() >= super_node_size_threshold) - || (current_communication + dag_in.vertex_comm_weight(edge_source) > communication_threshold)) + if (((currentMemory_ + nodeMem > memoryThreshold_) + || (currentWork_ + dagIn.VertexWorkWeight(edgeSource) > workThreshold_) + || (vertexMap[partInd].size() >= superNodeSizeThreshold_) + || (currentCommunication_ + dagIn.VertexCommWeight(edgeSource) > communicationThreshold_)) || // or node type changes - (current_v_type != dag_in.vertex_type(edge_source))) { - if (!visited[edge_source]) { - vertex_map.push_back(std::vector({edge_source})); - partition_size++; - visited[edge_source] = true; + (currentVType_ != dagIn.VertexType(edgeSource))) { + if (!visited[edgeSource]) { + vertexMap.push_back(std::vector({edgeSource})); + partitionSize++; + visited[edgeSource] = true; } } else { - current_memory += node_mem; - current_work += dag_in.vertex_work_weight(edge_source); - current_communication += dag_in.vertex_comm_weight(edge_source); + currentMemory_ += nodeMem; + currentWork_ += dagIn.VertexWorkWeight(edgeSource); + currentCommunication_ += dagIn.VertexCommWeight(edgeSource); - vertex_map[part_ind].push_back(edge_source); - part_size++; + vertexMap[partInd].push_back(edgeSource); + partSize++; } } } else { - for (const auto &in_edge : in_edges(vert, dag_in)) { - if (edge_mask.find(in_edge) != edge_mast_end) { + for (const auto &inEdge : InEdges(vert, dagIn)) { + if (edgeMask.find(inEdge) != edgeMastEnd) { continue; } - const auto &edge_source = source(in_edge, dag_in); + const auto &edgeSource = Source(inEdge, dagIn); - if (!visited[edge_source]) { - vertex_map.push_back(std::vector({edge_source})); - partition_size++; - visited[edge_source] = true; + if (!visited[edgeSource]) { + vertexMap.push_back(std::vector({edgeSource})); + partitionSize++; + visited[edgeSource] = true; } } } - vert_ind++; + vertInd++; } - part_ind++; + partInd++; } - return reverse_vertex_map; + return reverseVertexMap; } - inline void set_work_threshold(v_workw_t work_threshold_) { work_threshold = work_threshold_; } + inline void SetWorkThreshold(VWorkwT workThreshold) { workThreshold_ = workThreshold; } - inline void set_memory_threshold(v_memw_t memory_threshold_) { memory_threshold = memory_threshold_; } + inline void SetMemoryThreshold(VMemwT memoryThreshold) { memoryThreshold_ = memoryThreshold; } - inline void set_communication_threshold(v_commw_t communication_threshold_) { - communication_threshold = communication_threshold_; + inline void SetCommunicationThreshold(VCommwT communicationThreshold) { + communicationThreshold_ = communicationThreshold; } - inline void set_super_node_size_threshold(std::size_t super_node_size_threshold_) { - super_node_size_threshold = super_node_size_threshold_; + inline void SetSuperNodeSizeThreshold(std::size_t superNodeSizeThreshold) { + superNodeSizeThreshold_ = superNodeSizeThreshold; } }; diff --git a/include/osp/coarser/top_order/top_order_coarser.hpp b/include/osp/coarser/top_order/top_order_coarser.hpp index deaf476f..d3b2d9cc 100644 --- a/include/osp/coarser/top_order/top_order_coarser.hpp +++ b/include/osp/coarser/top_order/top_order_coarser.hpp @@ -26,53 +26,52 @@ limitations under the License. namespace osp { -template > (*top_sort_func)(const Graph_t_in &)> -class top_order_coarser : public Coarser { +template > (*TopSortFunc)(const GraphTIn &)> +class TopOrderCoarser : public Coarser { private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; // parameters - v_workw_t work_threshold = std::numeric_limits>::max(); - v_memw_t memory_threshold = std::numeric_limits>::max(); - v_commw_t communication_threshold = std::numeric_limits>::max(); - unsigned degree_threshold = std::numeric_limits::max(); - unsigned node_dist_threshold = std::numeric_limits::max(); - VertexType super_node_size_threshold = std::numeric_limits::max(); + VWorkwT workThreshold_ = std::numeric_limits>::max(); + VMemwT memoryThreshold_ = std::numeric_limits>::max(); + VCommwT communicationThreshold_ = std::numeric_limits>::max(); + unsigned degreeThreshold_ = std::numeric_limits::max(); + unsigned nodeDistThreshold_ = std::numeric_limits::max(); + VertexType superNodeSizeThreshold_ = std::numeric_limits::max(); // internal data strauctures - v_memw_t current_memory = 0; - v_workw_t current_work = 0; - v_commw_t current_communication = 0; - VertexType current_super_node_idx = 0; - - void finish_super_node_add_edges(const Graph_t_in &dag_in, - Graph_t_out &dag_out, - const std::vector &nodes, - std::vector> &reverse_vertex_map) { - dag_out.set_vertex_mem_weight(current_super_node_idx, current_memory); - dag_out.set_vertex_work_weight(current_super_node_idx, current_work); - dag_out.set_vertex_comm_weight(current_super_node_idx, current_communication); + VMemwT currentMemory_ = 0; + VWorkwT currentWork_ = 0; + VCommwT currentCommunication_ = 0; + VertexType currentSuperNodeIdx_ = 0; + + void FinishSuperNodeAddEdges(const GraphTIn &dagIn, + GraphTOut &dagOut, + const std::vector &nodes, + std::vector> &reverseVertexMap) { + dagOut.SetVertexMemWeight(currentSuperNodeIdx_, currentMemory_); + dagOut.SetVertexWorkWeight(currentSuperNodeIdx_, currentWork_); + dagOut.SetVertexCommWeight(currentSuperNodeIdx_, currentCommunication_); for (const auto &node : nodes) { - if constexpr (has_edge_weights_v && has_edge_weights_v) { - for (const auto &in_edge : in_edges(node, dag_in)) { - const VertexType parent_rev = reverse_vertex_map[source(in_edge, dag_in)]; - if (parent_rev != current_super_node_idx && parent_rev != std::numeric_limits::max()) { - auto pair = edge_desc(parent_rev, current_super_node_idx, dag_out); + if constexpr (hasEdgeWeightsV && hasEdgeWeightsV) { + for (const auto &inEdge : InEdges(node, dagIn)) { + const VertexType parentRev = reverseVertexMap[Source(inEdge, dagIn)]; + if (parentRev != currentSuperNodeIdx_ && parentRev != std::numeric_limits::max()) { + auto pair = EdgeDesc(parentRev, currentSuperNodeIdx_, dagOut); if (pair.second) { - dag_out.set_edge_comm_weight(pair.first, - dag_out.edge_comm_weight(pair.first) + dag_in.edge_comm_weight(in_edge)); + dagOut.SetEdgeCommWeight(pair.first, dagOut.EdgeCommWeight(pair.first) + dagIn.EdgeCommWeight(inEdge)); } else { - dag_out.add_edge(parent_rev, current_super_node_idx, dag_in.edge_comm_weight(in_edge)); + dagOut.AddEdge(parentRev, currentSuperNodeIdx_, dagIn.EdgeCommWeight(inEdge)); } } } } else { - for (const auto &parent : dag_in.parents(node)) { - const VertexType parent_rev = reverse_vertex_map[parent]; - if (parent_rev != current_super_node_idx && parent_rev != std::numeric_limits::max()) { - if (not edge(parent_rev, current_super_node_idx, dag_out)) { - dag_out.add_edge(parent_rev, current_super_node_idx); + for (const auto &parent : dagIn.Parents(node)) { + const VertexType parentRev = reverseVertexMap[parent]; + if (parentRev != currentSuperNodeIdx_ && parentRev != std::numeric_limits::max()) { + if (not Edge(parentRev, currentSuperNodeIdx_, dagOut)) { + dagOut.AddEdge(parentRev, currentSuperNodeIdx_); } } } @@ -80,7 +79,7 @@ class top_order_coarser : public Coarser { } } - void add_new_super_node(const Graph_t_in &dag_in, Graph_t_out &dag_out, VertexType node) { + void AddNewSuperNode(const GraphTIn &dagIn, GraphTOut &dagOut, VertexType node) { // int node_mem = dag_in.nodeMemoryWeight(node); // if (memory_constraint_type == LOCAL_INC_EDGES_2) { @@ -90,68 +89,63 @@ class top_order_coarser : public Coarser { // } // } - current_memory = dag_in.vertex_mem_weight(node); - current_work = dag_in.vertex_work_weight(node); - current_communication = dag_in.vertex_comm_weight(node); + currentMemory_ = dagIn.VertexMemWeight(node); + currentWork_ = dagIn.VertexWorkWeight(node); + currentCommunication_ = dagIn.VertexCommWeight(node); - if constexpr (is_computational_dag_typed_vertices_v && is_computational_dag_typed_vertices_v) { - current_super_node_idx - = dag_out.add_vertex(current_work, current_communication, current_memory, dag_in.vertex_type(node)); + if constexpr (isComputationalDagTypedVerticesV && isComputationalDagTypedVerticesV) { + currentSuperNodeIdx_ = dagOut.AddVertex(currentWork_, currentCommunication_, currentMemory_, dagIn.VertexType(node)); } else { - current_super_node_idx = dag_out.add_vertex(current_work, current_communication, current_memory); + currentSuperNodeIdx_ = dagOut.AddVertex(currentWork_, currentCommunication_, currentMemory_); } } public: - top_order_coarser() {}; - virtual ~top_order_coarser() = default; + TopOrderCoarser() {}; + virtual ~TopOrderCoarser() = default; - inline void set_degree_threshold(unsigned degree_threshold_) { degree_threshold = degree_threshold_; } + inline void SetDegreeThreshold(unsigned degreeThreshold) { degreeThreshold_ = degreeThreshold; } - inline void set_work_threshold(v_workw_t work_threshold_) { work_threshold = work_threshold_; } + inline void SetWorkThreshold(VWorkwT workThreshold) { workThreshold_ = workThreshold; } - inline void set_memory_threshold(v_memw_t memory_threshold_) { memory_threshold = memory_threshold_; } + inline void SetMemoryThreshold(VMemwT memoryThreshold) { memoryThreshold_ = memoryThreshold; } - inline void set_communication_threshold(v_commw_t communication_threshold_) { - communication_threshold = communication_threshold_; + inline void SetCommunicationThreshold(VCommwT communicationThreshold) { + communicationThreshold_ = communicationThreshold; } - inline void set_super_node_size_threshold(VertexType super_node_size_threshold_) { - super_node_size_threshold = super_node_size_threshold_; - } + inline void SetSuperNodeSizeThreshold(VertexType superNodeSizeThreshold) { superNodeSizeThreshold_ = superNodeSizeThreshold; } - inline void set_node_dist_threshold(unsigned node_dist_threshold_) { node_dist_threshold = node_dist_threshold_; } + inline void SetNodeDistThreshold(unsigned nodeDistThreshold) { nodeDistThreshold_ = nodeDistThreshold; } - // inline void set_memory_constraint_type(MEMORY_CONSTRAINT_TYPE memory_constraint_type_) { memory_constraint_type = + // inline void set_memory_constraint_type(MemoryConstraintType memory_constraint_type_) { memory_constraint_type = // memory_constraint_type_; } - virtual std::string getCoarserName() const override { return "top_order_coarser"; }; + virtual std::string GetCoarserName() const override { return "top_order_coarser"; }; - virtual bool coarsenDag(const Graph_t_in &dag_in, - Graph_t_out &dag_out, - std::vector> &reverse_vertex_map) override { - assert(dag_out.num_vertices() == 0); - if (dag_in.num_vertices() == 0) { - reverse_vertex_map = std::vector>(); + virtual bool CoarsenDag(const GraphTIn &dagIn, GraphTOut &dagOut, std::vector> &reverseVertexMap) override { + assert(dagOut.NumVertices() == 0); + if (dagIn.NumVertices() == 0) { + reverseVertexMap = std::vector>(); return true; } - std::vector top_ordering = top_sort_func(dag_in); + std::vector topOrdering = TopSortFunc(dagIn); - std::vector source_node_dist = get_top_node_distance(dag_in); + std::vector sourceNodeDist = GetTopNodeDistance(dagIn); - reverse_vertex_map.resize(dag_in.num_vertices(), std::numeric_limits::max()); + reverseVertexMap.resize(dagIn.NumVertices(), std::numeric_limits::max()); - std::vector> vertex_map; - vertex_map.push_back(std::vector({top_ordering[0]})); + std::vector> vertexMap; + vertexMap.push_back(std::vector({topOrdering[0]})); - add_new_super_node(dag_in, dag_out, top_ordering[0]); - reverse_vertex_map[top_ordering[0]] = current_super_node_idx; + AddNewSuperNode(dagIn, dagOut, topOrdering[0]); + reverseVertexMap[topOrdering[0]] = currentSuperNodeIdx_; - for (size_t i = 1; i < top_ordering.size(); i++) { - const auto v = top_ordering[i]; + for (size_t i = 1; i < topOrdering.size(); i++) { + const auto v = topOrdering[i]; - // int node_mem = dag_in.vertex_mem_weight(v); + // int node_mem = dag_in.VertexMemWeight(v); // if (memory_constraint_type == LOCAL_INC_EDGES_2) { @@ -160,51 +154,50 @@ class top_order_coarser : public Coarser { // } // } - const unsigned dist = source_node_dist[v] - source_node_dist[top_ordering[i - 1]]; + const unsigned dist = sourceNodeDist[v] - sourceNodeDist[topOrdering[i - 1]]; // start new super node if thresholds are exceeded - if (((current_memory + dag_in.vertex_mem_weight(v) > memory_threshold) - || (current_work + dag_in.vertex_work_weight(v) > work_threshold) - || (vertex_map.back().size() >= super_node_size_threshold) - || (current_communication + dag_in.vertex_comm_weight(v) > communication_threshold)) - || (dist > node_dist_threshold) || + if (((currentMemory_ + dagIn.VertexMemWeight(v) > memoryThreshold_) + || (currentWork_ + dagIn.VertexWorkWeight(v) > workThreshold_) + || (vertexMap.back().size() >= superNodeSizeThreshold_) + || (currentCommunication_ + dagIn.VertexCommWeight(v) > communicationThreshold_)) + || (dist > nodeDistThreshold_) || // or prev node high out degree - (dag_in.out_degree(top_ordering[i - 1]) > degree_threshold)) { - finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(), reverse_vertex_map); - vertex_map.push_back(std::vector({v})); - add_new_super_node(dag_in, dag_out, v); + (dagIn.OutDegree(topOrdering[i - 1]) > degreeThreshold_)) { + FinishSuperNodeAddEdges(dagIn, dagOut, vertexMap.back(), reverseVertexMap); + vertexMap.push_back(std::vector({v})); + AddNewSuperNode(dagIn, dagOut, v); } else { // grow current super node - if constexpr (is_computational_dag_typed_vertices_v - && is_computational_dag_typed_vertices_v) { - if (dag_out.vertex_type(current_super_node_idx) != dag_in.vertex_type(v)) { - finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(), reverse_vertex_map); - vertex_map.push_back(std::vector({v})); - add_new_super_node(dag_in, dag_out, v); + if constexpr (isComputationalDagTypedVerticesV && isComputationalDagTypedVerticesV) { + if (dagOut.VertexType(currentSuperNodeIdx_) != dagIn.VertexType(v)) { + FinishSuperNodeAddEdges(dagIn, dagOut, vertexMap.back(), reverseVertexMap); + vertexMap.push_back(std::vector({v})); + AddNewSuperNode(dagIn, dagOut, v); } else { - current_memory += dag_in.vertex_mem_weight(v); - current_work += dag_in.vertex_work_weight(v); - current_communication += dag_in.vertex_comm_weight(v); + currentMemory_ += dagIn.VertexMemWeight(v); + currentWork_ += dagIn.VertexWorkWeight(v); + currentCommunication_ += dagIn.VertexCommWeight(v); - vertex_map.back().push_back(v); + vertexMap.back().push_back(v); } } else { - current_memory += dag_in.vertex_mem_weight(v); - current_work += dag_in.vertex_work_weight(v); - current_communication += dag_in.vertex_comm_weight(v); + currentMemory_ += dagIn.VertexMemWeight(v); + currentWork_ += dagIn.VertexWorkWeight(v); + currentCommunication_ += dagIn.VertexCommWeight(v); - vertex_map.back().push_back(v); + vertexMap.back().push_back(v); } } - reverse_vertex_map[v] = current_super_node_idx; + reverseVertexMap[v] = currentSuperNodeIdx_; } - if (!vertex_map.back().empty()) { - finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(), reverse_vertex_map); + if (!vertexMap.back().empty()) { + FinishSuperNodeAddEdges(dagIn, dagOut, vertexMap.back(), reverseVertexMap); } return true; diff --git a/include/osp/concepts/computational_dag_concept.hpp b/include/osp/concepts/computational_dag_concept.hpp index 29ca517e..c02f1568 100644 --- a/include/osp/concepts/computational_dag_concept.hpp +++ b/include/osp/concepts/computational_dag_concept.hpp @@ -46,33 +46,33 @@ namespace osp { * @brief Concept to check if a graph has vertex weights. * * Requires validation of: - * - `vertex_work_weight(v)`: Returns arithmetic type. - * - `vertex_comm_weight(v)`: Returns arithmetic type. - * - `vertex_mem_weight(v)`: Returns arithmetic type. + * - `VertexWorkWeight(v)`: Returns arithmetic type. + * - `VertexCommWeight(v)`: Returns arithmetic type. + * - `VertexMemWeight(v)`: Returns arithmetic type. * * @tparam T The graph type. */ template -struct has_vertex_weights : std::false_type {}; +struct HasVertexWeights : std::false_type {}; template -struct has_vertex_weights().vertex_work_weight(std::declval>())), - decltype(std::declval().vertex_comm_weight(std::declval>())), - decltype(std::declval().vertex_mem_weight(std::declval>()))>> - : std::conjunction().vertex_work_weight(std::declval>()))>, - std::is_arithmetic().vertex_comm_weight(std::declval>()))>, - std::is_arithmetic().vertex_mem_weight(std::declval>()))>> {}; +struct HasVertexWeights().VertexWorkWeight(std::declval>())), + decltype(std::declval().VertexCommWeight(std::declval>())), + decltype(std::declval().VertexMemWeight(std::declval>()))>> + : std::conjunction().VertexWorkWeight(std::declval>()))>, + std::is_arithmetic().VertexCommWeight(std::declval>()))>, + std::is_arithmetic().VertexMemWeight(std::declval>()))>> {}; template -inline constexpr bool has_vertex_weights_v = has_vertex_weights::value; +inline constexpr bool hasVertexWeightsV = HasVertexWeights::value; /** * @brief Concept to check if a graph has typed vertices. * * Requires validation of: - * - `vertex_type(v)`: Returns an integral type representing the type of vertex `v`. - * - `num_vertex_types()`: Returns the total number of distinct vertex types. + * - `VertexType(v)`: Returns an integral type representing the type of vertex `v`. + * - `NumVertexTypes()`: Returns the total number of distinct vertex types. * * This is useful for scheduling on heterogeneous resources where tasks (vertices) * may be compatible only with certain processor types. @@ -80,39 +80,39 @@ inline constexpr bool has_vertex_weights_v = has_vertex_weights::value; * @tparam T The graph type. */ template -struct has_typed_vertices : std::false_type {}; +struct HasTypedVertices : std::false_type {}; template -struct has_typed_vertices().vertex_type(std::declval>())), - decltype(std::declval().num_vertex_types())>> - : std::conjunction().vertex_type(std::declval>()))>, - std::is_integral().num_vertex_types())>> {}; +struct HasTypedVertices< + T, + std::void_t().VertexType(std::declval>())), decltype(std::declval().NumVertexTypes())>> + : std::conjunction().VertexType(std::declval>()))>, + std::is_integral().NumVertexTypes())>> {}; template -inline constexpr bool has_typed_vertices_v = has_typed_vertices::value; +inline constexpr bool hasTypedVerticesV = HasTypedVertices::value; /** * @brief Concept to check if edges have communication weights. * * Requires: * - The graph must satisfy `is_directed_graph_edge_desc` (supports edge descriptors). - * - `edge_comm_weight(e)`: Returns an arithmetic type for a given edge descriptor `e`. + * - `EdgeCommWeight(e)`: Returns an arithmetic type for a given edge descriptor `e`. * * @tparam T The graph type. */ template -struct has_edge_weights : std::false_type {}; +struct HasEdgeWeights : std::false_type {}; template -struct has_edge_weights::directed_edge_descriptor, - decltype(std::declval().edge_comm_weight(std::declval>()))>> - : std::conjunction().edge_comm_weight(std::declval>()))>, - is_directed_graph_edge_desc> {}; +struct HasEdgeWeights::DirectedEdgeDescriptor, + decltype(std::declval().EdgeCommWeight(std::declval>()))>> + : std::conjunction().EdgeCommWeight(std::declval>()))>, + IsDirectedGraphEdgeDesc> {}; template -inline constexpr bool has_edge_weights_v = has_edge_weights::value; +inline constexpr bool hasEdgeWeightsV = HasEdgeWeights::value; /** * @brief Concept for a basic computational DAG. @@ -124,13 +124,13 @@ inline constexpr bool has_edge_weights_v = has_edge_weights::value; * @tparam T The graph type. */ template -struct is_computational_dag : std::false_type {}; +struct IsComputationalDag : std::false_type {}; template -struct is_computational_dag> : std::conjunction, has_vertex_weights> {}; +struct IsComputationalDag> : std::conjunction, HasVertexWeights> {}; template -inline constexpr bool is_computational_dag_v = is_computational_dag::value; +inline constexpr bool isComputationalDagV = IsComputationalDag::value; /** * @brief Concept for a computational DAG with typed vertices. @@ -140,13 +140,13 @@ inline constexpr bool is_computational_dag_v = is_computational_dag::value; * @tparam T The graph type. */ template -struct is_computational_dag_typed_vertices : std::false_type {}; +struct IsComputationalDagTypedVertices : std::false_type {}; template -struct is_computational_dag_typed_vertices> : std::conjunction, has_typed_vertices> {}; +struct IsComputationalDagTypedVertices> : std::conjunction, HasTypedVertices> {}; template -inline constexpr bool is_computational_dag_typed_vertices_v = is_computational_dag_typed_vertices::value; +inline constexpr bool isComputationalDagTypedVerticesV = IsComputationalDagTypedVertices::value; /** * @brief Concept for a computational DAG that supports explicit edge descriptors. @@ -157,14 +157,13 @@ inline constexpr bool is_computational_dag_typed_vertices_v = is_computational_d * @tparam T The graph type. */ template -struct is_computational_dag_edge_desc : std::false_type {}; +struct IsComputationalDagEdgeDesc : std::false_type {}; template -struct is_computational_dag_edge_desc> - : std::conjunction, is_computational_dag> {}; +struct IsComputationalDagEdgeDesc> : std::conjunction, IsComputationalDag> {}; template -inline constexpr bool is_computational_dag_edge_desc_v = is_computational_dag_edge_desc::value; +inline constexpr bool isComputationalDagEdgeDescV = IsComputationalDagEdgeDesc::value; /** * @brief Concept for a computational DAG with both typed vertices and edge descriptors. @@ -174,13 +173,13 @@ inline constexpr bool is_computational_dag_edge_desc_v = is_computational_dag_ed * @tparam T The graph type. */ template -struct is_computational_dag_typed_vertices_edge_desc : std::false_type {}; +struct IsComputationalDagTypedVerticesEdgeDesc : std::false_type {}; template -struct is_computational_dag_typed_vertices_edge_desc> - : std::conjunction, is_computational_dag_typed_vertices> {}; +struct IsComputationalDagTypedVerticesEdgeDesc> + : std::conjunction, IsComputationalDagTypedVertices> {}; template -inline constexpr bool is_computational_dag_typed_vertices_edge_desc_v = is_computational_dag_typed_vertices_edge_desc::value; +inline constexpr bool isComputationalDagTypedVerticesEdgeDescV = IsComputationalDagTypedVerticesEdgeDesc::value; } // namespace osp diff --git a/include/osp/concepts/constructable_computational_dag_concept.hpp b/include/osp/concepts/constructable_computational_dag_concept.hpp index 0caa561e..84754de8 100644 --- a/include/osp/concepts/constructable_computational_dag_concept.hpp +++ b/include/osp/concepts/constructable_computational_dag_concept.hpp @@ -40,24 +40,24 @@ namespace osp { * @brief Concept to check if vertex weights are modifiable. * * Requires: - * - `set_vertex_work_weight(v, w)` - * - `set_vertex_comm_weight(v, w)` - * - `set_vertex_mem_weight(v, w)` + * - `SetVertexWorkWeight(v, w)` + * - `SetVertexCommWeight(v, w)` + * - `SetVertexMemWeight(v, w)` * * Also requires the graph to be default constructible, copy/move constructible, and assignable. * * @tparam T The graph type. */ template -struct is_modifiable_cdag_vertex : std::false_type {}; +struct IsModifiableCdagVertex : std::false_type {}; template -struct is_modifiable_cdag_vertex< +struct IsModifiableCdagVertex< T, - std::void_t().set_vertex_work_weight(std::declval>(), std::declval>())), - decltype(std::declval().set_vertex_comm_weight(std::declval>(), std::declval>())), - decltype(std::declval().set_vertex_mem_weight(std::declval>(), std::declval>()))>> - : std::conjunction, + std::void_t().SetVertexWorkWeight(std::declval>(), std::declval>())), + decltype(std::declval().SetVertexCommWeight(std::declval>(), std::declval>())), + decltype(std::declval().SetVertexMemWeight(std::declval>(), std::declval>()))>> + : std::conjunction, std::is_default_constructible, std::is_copy_constructible, std::is_move_constructible, @@ -65,131 +65,130 @@ struct is_modifiable_cdag_vertex< std::is_move_assignable> {}; template -inline constexpr bool is_modifiable_cdag_vertex_v = is_modifiable_cdag_vertex::value; +inline constexpr bool isModifiableCdagVertexV = IsModifiableCdagVertex::value; /** * @brief Concept to check if vertices can be added to the graph. * * Requires: - * - `add_vertex(work_weight, comm_weight, mem_weight)` + * - `AddVertex(work_weight, comm_weight, mem_weight)` * - Constructibility from `vertex_idx_t` (for reserving size). * * @tparam T The graph type. */ template -struct is_constructable_cdag_vertex : std::false_type {}; +struct IsConstructableCdagVertex : std::false_type {}; template -struct is_constructable_cdag_vertex().add_vertex( - std::declval>(), std::declval>(), std::declval>()))>> - : std::conjunction, std::is_constructible>> {}; +struct IsConstructableCdagVertex().AddVertex( + std::declval>(), std::declval>(), std::declval>()))>> + : std::conjunction, std::is_constructible>> {}; template -inline constexpr bool is_constructable_cdag_vertex_v = is_constructable_cdag_vertex::value; +inline constexpr bool isConstructableCdagVertexV = IsConstructableCdagVertex::value; /** * @brief Concept to check if vertex types are modifiable. * * Requires: - * - `set_vertex_type(v, type)` + * - `SetVertexType(v, type)` * * @tparam T The graph type. */ template -struct is_modifiable_cdag_typed_vertex : std::false_type {}; +struct IsModifiableCdagTypedVertex : std::false_type {}; template -struct is_modifiable_cdag_typed_vertex< +struct IsModifiableCdagTypedVertex< T, - std::void_t().set_vertex_type(std::declval>(), std::declval>()))>> - : std::conjunction, is_computational_dag_typed_vertices> {}; // for default node type + std::void_t().SetVertexType(std::declval>(), std::declval>()))>> + : std::conjunction, IsComputationalDagTypedVertices> {}; // for default node type template -inline constexpr bool is_modifiable_cdag_typed_vertex_v = is_modifiable_cdag_typed_vertex::value; +inline constexpr bool isModifiableCdagTypedVertexV = IsModifiableCdagTypedVertex::value; /** * @brief Concept to check if typed vertices can be added. * * Requires: - * - `add_vertex(work, comm, mem, type)` + * - `AddVertex(work, comm, mem, type)` * * @tparam T The graph type. */ template -struct is_constructable_cdag_typed_vertex : std::false_type {}; +struct IsConstructableCdagTypedVertex : std::false_type {}; template -struct is_constructable_cdag_typed_vertex< +struct IsConstructableCdagTypedVertex< T, - std::void_t().add_vertex( - std::declval>(), std::declval>(), std::declval>(), std::declval>()))>> - : std::conjunction, is_modifiable_cdag_typed_vertex> {}; // for default node type + std::void_t().AddVertex( + std::declval>(), std::declval>(), std::declval>(), std::declval>()))>> + : std::conjunction, IsModifiableCdagTypedVertex> {}; // for default node type template -inline constexpr bool is_constructable_cdag_typed_vertex_v = is_constructable_cdag_typed_vertex::value; +inline constexpr bool isConstructableCdagTypedVertexV = IsConstructableCdagTypedVertex::value; /** * @brief Concept to check if edges can be added (unweighted). * * Requires: - * - `add_edge(source, target)` + * - `AddEdge(source, target)` * * @tparam T The graph type. */ template -struct is_constructable_cdag_edge : std::false_type {}; +struct IsConstructableCdagEdge : std::false_type {}; template -struct is_constructable_cdag_edge< +struct IsConstructableCdagEdge< T, - std::void_t().add_edge(std::declval>(), std::declval>()))>> - : is_directed_graph {}; + std::void_t().AddEdge(std::declval>(), std::declval>()))>> + : IsDirectedGraph {}; template -inline constexpr bool is_constructable_cdag_edge_v = is_constructable_cdag_edge::value; +inline constexpr bool isConstructableCdagEdgeV = IsConstructableCdagEdge::value; /** * @brief Concept to check if edge communication weights are modifiable. * * Requires: - * - `set_edge_comm_weight(edge, weight)` + * - `SetEdgeCommWeight(edge, weight)` * * @tparam T The graph type. */ template -struct is_modifiable_cdag_comm_edge : std::false_type {}; +struct IsModifiableCdagCommEdge : std::false_type {}; template -struct is_modifiable_cdag_comm_edge< +struct IsModifiableCdagCommEdge< T, - std::void_t().set_edge_comm_weight(std::declval>(), std::declval>()))>> - : std::conjunction> {}; // for default edge weight + std::void_t().SetEdgeCommWeight(std::declval>(), std::declval>()))>> + : std::conjunction> {}; // for default edge weight template -inline constexpr bool is_modifiable_cdag_comm_edge_v = is_modifiable_cdag_comm_edge::value; +inline constexpr bool isModifiableCdagCommEdgeV = IsModifiableCdagCommEdge::value; /** * @brief Concept to check if weighted edges can be added. * * Requires: - * - `add_edge(source, target, weight)` + * - `AddEdge(source, target, weight)` * * @tparam T The graph type. */ template -struct is_constructable_cdag_comm_edge : std::false_type {}; +struct IsConstructableCdagCommEdge : std::false_type {}; template -struct is_constructable_cdag_comm_edge< - T, - std::void_t().add_edge( - std::declval>(), std::declval>(), std::declval>()))>> - : std::conjunction, is_computational_dag_edge_desc, is_modifiable_cdag_comm_edge> { +struct IsConstructableCdagCommEdge().AddEdge( + std::declval>(), std::declval>(), std::declval>()))>> + : std::conjunction, IsComputationalDagEdgeDesc, IsModifiableCdagCommEdge> { }; // for default edge weight template -inline constexpr bool is_constructable_cdag_comm_edge_v = is_constructable_cdag_comm_edge::value; +inline constexpr bool isConstructableCdagCommEdgeV = IsConstructableCdagCommEdge::value; /** * @brief Concept for a fully constructable computational DAG. @@ -199,20 +198,20 @@ inline constexpr bool is_constructable_cdag_comm_edge_v = is_constructable_cdag_ * @tparam T The graph type. */ template -struct is_constructable_cdag : std::false_type {}; +struct IsConstructableCdag : std::false_type {}; template -struct is_constructable_cdag> - : std::conjunction, is_constructable_cdag_vertex, is_constructable_cdag_edge> {}; +struct IsConstructableCdag> + : std::conjunction, IsConstructableCdagVertex, IsConstructableCdagEdge> {}; template -inline constexpr bool is_constructable_cdag_v = is_constructable_cdag::value; +inline constexpr bool isConstructableCdagV = IsConstructableCdag::value; /** * @brief Helper trait to check if a graph can be directly constructed from a vertex count and a set of edges. */ template -inline constexpr bool is_direct_constructable_cdag_v - = std::is_constructible, std::set, vertex_idx_t>>>::value; +inline constexpr bool isDirectConstructableCdagV + = std::is_constructible, std::set, VertexIdxT>>>::value; } // namespace osp diff --git a/include/osp/concepts/directed_graph_concept.hpp b/include/osp/concepts/directed_graph_concept.hpp index 0bd65d77..aaa537ad 100644 --- a/include/osp/concepts/directed_graph_concept.hpp +++ b/include/osp/concepts/directed_graph_concept.hpp @@ -33,15 +33,15 @@ namespace osp { * * A type `T` satisfies `is_directed_graph` if it provides the following API: * - * - **vertices()**: Returns a range of all vertices in the graph. - * - **num_vertices()**: Returns the total number of vertices as an integral type. - * - **num_edges()**: Returns the total number of edges as an integral type. + * - **Vertices()**: Returns a range of all vertices in the graph. + * - **NumVertices()**: Returns the total number of vertices as an integral type. + * - **NumEdges()**: Returns the total number of edges as an integral type. * - **parents(v)**: Returns a range of parent vertices for a given vertex `v`. - * - `v` must be of type `vertex_idx_t`. + * - `v` must be of type `VertexIdxT`. * - **children(v)**: Returns a range of child vertices for a given vertex `v`. - * - `v` must be of type `vertex_idx_t`. - * - **in_degree(v)**: Returns the number of incoming edges for vertex `v` as an integral type. - * - **out_degree(v)**: Returns the number of outgoing edges for vertex `v` as an integral type. + * - `v` must be of type `VertexIdxT`. + * - **InDegree(v)**: Returns the number of incoming edges for vertex `v` as an integral type. + * - **OutDegree(v)**: Returns the number of outgoing edges for vertex `v` as an integral type. * * This concept ensures that any graph implementation passed to OSP algorithms exposes * the necessary structural information for processing. @@ -52,28 +52,28 @@ namespace osp { * @tparam T The graph type to check against the concept. */ template -struct is_directed_graph : std::false_type {}; +struct IsDirectedGraph : std::false_type {}; template -struct is_directed_graph::vertex_idx, - decltype(std::declval().vertices()), - decltype(std::declval().num_vertices()), - decltype(std::declval().num_edges()), - decltype(std::declval().parents(std::declval>())), - decltype(std::declval().children(std::declval>())), - decltype(std::declval().in_degree(std::declval>())), - decltype(std::declval().out_degree(std::declval>()))>> - : std::conjunction().vertices()), vertex_idx_t>, - std::is_integral().num_vertices())>, - std::is_integral().num_edges())>, - is_input_range_of().parents(std::declval>())), vertex_idx_t>, - is_input_range_of().children(std::declval>())), vertex_idx_t>, - std::is_integral().in_degree(std::declval>()))>, - std::is_integral().out_degree(std::declval>()))>> {}; +struct IsDirectedGraph::VertexIdx, + decltype(std::declval().Vertices()), + decltype(std::declval().NumVertices()), + decltype(std::declval().NumEdges()), + decltype(std::declval().Parents(std::declval>())), + decltype(std::declval().Children(std::declval>())), + decltype(std::declval().InDegree(std::declval>())), + decltype(std::declval().OutDegree(std::declval>()))>> + : std::conjunction().Vertices()), VertexIdxT>, + std::is_integral().NumVertices())>, + std::is_integral().NumEdges())>, + IsInputRangeOf().Parents(std::declval>())), VertexIdxT>, + IsInputRangeOf().Children(std::declval>())), VertexIdxT>, + std::is_integral().InDegree(std::declval>()))>, + std::is_integral().OutDegree(std::declval>()))>> {}; template -inline constexpr bool is_directed_graph_v = is_directed_graph::value; +inline constexpr bool isDirectedGraphV = IsDirectedGraph::value; /** * @brief Concept for an edge list structure. @@ -86,28 +86,28 @@ inline constexpr bool is_directed_graph_v = is_directed_graph::value; * @tparam v_type The vertex type. * @tparam e_type The size type (usually integral). */ -template -struct is_edge_list_type : std::false_type {}; +template +struct IsEdgeListType : std::false_type {}; -template -struct is_edge_list_type< +template +struct IsEdgeListType< T, - v_type, - e_type, + VType, + EType, std::void_t().begin()), decltype(std::declval().end()), decltype(std::declval().size()), typename std::iterator_traits()))>::value_type, - decltype(std::declval()))>::value_type>().source), - decltype(std::declval()))>::value_type>().target)>> + decltype(std::declval()))>::value_type>().source_), + decltype(std::declval()))>::value_type>().target_)>> : std::conjunction< - std::is_same()))>::value_type>().source), - v_type>, - std::is_same()))>::value_type>().target), - v_type>, - std::is_same().size()), e_type>> {}; + std::is_same()))>::value_type>().source_), + VType>, + std::is_same()))>::value_type>().target_), + VType>, + std::is_same().size()), EType>> {}; -template -inline constexpr bool is_edge_list_type_v = is_edge_list_type::value; +template +inline constexpr bool isEdgeListTypeV = IsEdgeListType::value; } // namespace osp diff --git a/include/osp/concepts/directed_graph_edge_desc_concept.hpp b/include/osp/concepts/directed_graph_edge_desc_concept.hpp index d3a8227b..93612c96 100644 --- a/include/osp/concepts/directed_graph_edge_desc_concept.hpp +++ b/include/osp/concepts/directed_graph_edge_desc_concept.hpp @@ -41,63 +41,63 @@ namespace osp { /** * @brief Default implementation to get the source vertex of an edge. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. * @param edge The edge descriptor. * @return The source vertex index. */ -template -inline vertex_idx_t source(const directed_edge &edge, const Graph_t &) { - return edge.source; +template +inline VertexIdxT Source(const DirectedEdge &edge, const GraphT &) { + return edge.source_; } /** * @brief Default implementation to get the target vertex of an edge. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. * @param edge The edge descriptor. * @return The target vertex index. */ -template -inline vertex_idx_t target(const directed_edge &edge, const Graph_t &) { - return edge.target; +template +inline VertexIdxT Target(const DirectedEdge &edge, const GraphT &) { + return edge.target_; } /** * @brief Get a view of all edges in the graph. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. * @param graph The graph instance. * @return An `edge_view` allowing iteration over all edges. */ -template -inline edge_view edges(const Graph_t &graph) { - return edge_view(graph); +template +inline EdgeView Edges(const GraphT &graph) { + return EdgeView(graph); } /** * @brief Get a view of outgoing edges from a vertex. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. * @param u The source vertex index. * @param graph The graph instance. * @return An `out_edge_view` allowing iteration over outgoing edges from `u`. */ -template -inline OutEdgeView out_edges(vertex_idx_t u, const Graph_t &graph) { - return OutEdgeView(graph, u); +template +inline OutEdgeView OutEdges(VertexIdxT u, const GraphT &graph) { + return OutEdgeView(graph, u); } /** * @brief Get a view of incoming edges to a vertex. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. * @param v The target vertex index. * @param graph The graph instance. * @return An `in_edge_view` allowing iteration over incoming edges to `v`. */ -template -inline InEdgeView in_edges(vertex_idx_t v, const Graph_t &graph) { - return InEdgeView(graph, v); +template +inline InEdgeView InEdges(VertexIdxT v, const GraphT &graph) { + return InEdgeView(graph, v); } /** @@ -106,34 +106,34 @@ inline InEdgeView in_edges(vertex_idx_t v, const Graph_t &grap * Checks if a type `T` satisfies the requirements of a directed graph that also * supports edge descriptors, including: * - Validity of `directed_graph_edge_desc_traits`. - * - Existence of `edges()`, `out_edges()`, and `in_edges()` functions returning input ranges of edge descriptors. + * - Existence of `Edges()`, `OutEdges()`, and `InEdges()` functions returning input ranges of edge descriptors. * - Existence of `source()` and `target()` functions mapping edge descriptors to vertex indices. * - Default and copy constructibility of the edge descriptor type. * * @tparam T The graph type to check. */ template -struct is_directed_graph_edge_desc : std::false_type {}; +struct IsDirectedGraphEdgeDesc : std::false_type {}; template -struct is_directed_graph_edge_desc::directed_edge_descriptor, - decltype(edges(std::declval())), - decltype(out_edges(std::declval>(), std::declval())), - decltype(in_edges(std::declval>(), std::declval())), - decltype(source(std::declval>(), std::declval())), - decltype(target(std::declval>(), std::declval()))>> - : std::conjunction, - std::is_default_constructible>, - std::is_copy_constructible>, - is_input_range_of())), edge_desc_t>, - is_input_range_of>(), std::declval())), edge_desc_t>, - is_input_range_of>(), std::declval())), edge_desc_t>, - std::is_same>(), std::declval())), vertex_idx_t>, - std::is_same>(), std::declval())), vertex_idx_t>> {}; +struct IsDirectedGraphEdgeDesc::DirectedEdgeDescriptor, + decltype(Edges(std::declval())), + decltype(OutEdges(std::declval>(), std::declval())), + decltype(InEdges(std::declval>(), std::declval())), + decltype(Source(std::declval>(), std::declval())), + decltype(Target(std::declval>(), std::declval()))>> + : std::conjunction, + std::is_default_constructible>, + std::is_copy_constructible>, + IsInputRangeOf())), EdgeDescT>, + IsInputRangeOf>(), std::declval())), EdgeDescT>, + IsInputRangeOf>(), std::declval())), EdgeDescT>, + std::is_same>(), std::declval())), VertexIdxT>, + std::is_same>(), std::declval())), VertexIdxT>> {}; template -inline constexpr bool is_directed_graph_edge_desc_v = is_directed_graph_edge_desc::value; +inline constexpr bool isDirectedGraphEdgeDescV = IsDirectedGraphEdgeDesc::value; /** * @brief Specialization for graphs that define a directed_edge_descriptor that can be used as a key in a hash table. @@ -143,18 +143,18 @@ inline constexpr bool is_directed_graph_edge_desc_v = is_directed_graph_edge_des * @tparam T The graph type. */ template -struct has_hashable_edge_desc : std::false_type {}; +struct HasHashableEdgeDesc : std::false_type {}; template -struct has_hashable_edge_desc>{}(std::declval>())), - decltype(std::declval>() == std::declval>()), - decltype(std::declval>() != std::declval>())>> - : std::conjunction, - std::is_default_constructible>, - std::is_copy_constructible>> {}; +struct HasHashableEdgeDesc>{}(std::declval>())), + decltype(std::declval>() == std::declval>()), + decltype(std::declval>() != std::declval>())>> + : std::conjunction, + std::is_default_constructible>, + std::is_copy_constructible>> {}; template -inline constexpr bool has_hashable_edge_desc_v = has_hashable_edge_desc::value; +inline constexpr bool hasHashableEdgeDescV = HasHashableEdgeDesc::value; } // namespace osp diff --git a/include/osp/concepts/graph_traits.hpp b/include/osp/concepts/graph_traits.hpp index 05b86a56..dee28098 100644 --- a/include/osp/concepts/graph_traits.hpp +++ b/include/osp/concepts/graph_traits.hpp @@ -19,6 +19,7 @@ limitations under the License. #pragma once #include "iterator_concepts.hpp" +#include "osp/auxiliary/hash_util.hpp" /** * @file graph_traits.hpp @@ -39,65 +40,65 @@ namespace osp { * otherwise they inherit from `std::false_type`. */ template -struct has_vertex_idx_tmember : std::false_type {}; +struct HasVertexIdxTmember : std::false_type {}; template -struct has_vertex_idx_tmember> : std::true_type {}; +struct HasVertexIdxTmember> : std::true_type {}; template -struct has_edge_desc_tmember : std::false_type {}; +struct HasEdgeDescTmember : std::false_type {}; template -struct has_edge_desc_tmember> : std::true_type {}; +struct HasEdgeDescTmember> : std::true_type {}; template -struct has_vertex_work_weight_tmember : std::false_type {}; +struct HasVertexWorkWeightTmember : std::false_type {}; template -struct has_vertex_work_weight_tmember> : std::true_type {}; +struct HasVertexWorkWeightTmember> : std::true_type {}; template -struct has_vertex_comm_weight_tmember : std::false_type {}; +struct HasVertexCommWeightTmember : std::false_type {}; template -struct has_vertex_comm_weight_tmember> : std::true_type {}; +struct HasVertexCommWeightTmember> : std::true_type {}; template -struct has_vertex_mem_weight_tmember : std::false_type {}; +struct HasVertexMemWeightTmember : std::false_type {}; template -struct has_vertex_mem_weight_tmember> : std::true_type {}; +struct HasVertexMemWeightTmember> : std::true_type {}; template -struct has_vertex_type_tmember : std::false_type {}; +struct HasVertexTypeTmember : std::false_type {}; template -struct has_vertex_type_tmember> : std::true_type {}; +struct HasVertexTypeTmember> : std::true_type {}; template -struct has_edge_comm_weight_tmember : std::false_type {}; +struct HasEdgeCommWeightTmember : std::false_type {}; template -struct has_edge_comm_weight_tmember> : std::true_type {}; +struct HasEdgeCommWeightTmember> : std::true_type {}; /** * @brief Core traits for any directed graph type. * - * Requires that the graph type `T` defines a `vertex_idx` type member. + * Requires that the graph type `T` defines a `VertexIdx` type member. * * @tparam T The graph type. */ template -struct directed_graph_traits { - static_assert(has_vertex_idx_tmember::value, "graph must have vertex_idx"); - using vertex_idx = typename T::vertex_idx; +struct DirectedGraphTraits { + static_assert(HasVertexIdxTmember::value, "graph must have VertexIdx"); + using VertexIdx = typename T::VertexIdx; }; /** * @brief Alias to easily access the vertex index type of a graph. */ template -using vertex_idx_t = typename directed_graph_traits::vertex_idx; +using VertexIdxT = typename DirectedGraphTraits::VertexIdx; /** * @brief A default edge descriptor for directed graphs. @@ -105,26 +106,26 @@ using vertex_idx_t = typename directed_graph_traits::vertex_idx; * This struct is used when the graph type does not provide its own edge descriptor. * It simply holds the source and target vertex indices. * - * @tparam Graph_t The graph type. + * @tparam GraphT The graph type. */ -template -struct directed_edge { - vertex_idx_t source; - vertex_idx_t target; +template +struct DirectedEdge { + VertexIdxT source_; + VertexIdxT target_; - bool operator==(const directed_edge &other) const { return source == other.source && target == other.target; } + bool operator==(const DirectedEdge &other) const { return source_ == other.source_ && target_ == other.target_; } - bool operator!=(const directed_edge &other) const { return !(*this == other); } + bool operator!=(const DirectedEdge &other) const { return !(*this == other); } - directed_edge() : source(0), target(0) {} + DirectedEdge() : source_(0), target_(0) {} - directed_edge(const directed_edge &other) = default; - directed_edge(directed_edge &&other) = default; - directed_edge &operator=(const directed_edge &other) = default; - directed_edge &operator=(directed_edge &&other) = default; - ~directed_edge() = default; + DirectedEdge(const DirectedEdge &other) = default; + DirectedEdge(DirectedEdge &&other) = default; + DirectedEdge &operator=(const DirectedEdge &other) = default; + DirectedEdge &operator=(DirectedEdge &&other) = default; + ~DirectedEdge() = default; - directed_edge(vertex_idx_t src, vertex_idx_t tgt) : source(src), target(tgt) {} + DirectedEdge(VertexIdxT src, VertexIdxT tgt) : source_(src), target_(tgt) {} }; /** @@ -132,72 +133,72 @@ struct directed_edge { * * If the graph defines `directed_edge_descriptor`, it is extracted; otherwise, `directed_edge` is used as a default implementation. */ -template -struct directed_graph_edge_desc_traits_helper { - using directed_edge_descriptor = directed_edge; +template +struct DirectedGraphEdgeDescTraitsHelper { + using DirectedEdgeDescriptor = DirectedEdge; }; template -struct directed_graph_edge_desc_traits_helper { - using directed_edge_descriptor = typename T::directed_edge_descriptor; +struct DirectedGraphEdgeDescTraitsHelper { + using DirectedEdgeDescriptor = typename T::DirectedEdgeDescriptor; }; template -struct directed_graph_edge_desc_traits { - using directed_edge_descriptor = - typename directed_graph_edge_desc_traits_helper::value>::directed_edge_descriptor; +struct DirectedGraphEdgeDescTraits { + using DirectedEdgeDescriptor = + typename DirectedGraphEdgeDescTraitsHelper::value>::DirectedEdgeDescriptor; }; template -using edge_desc_t = typename directed_graph_edge_desc_traits::directed_edge_descriptor; +using EdgeDescT = typename DirectedGraphEdgeDescTraits::DirectedEdgeDescriptor; /** * @brief Traits for computational Directed Acyclic Graphs (DAGs). * * Computational DAGs extend basic graphs by adding requirements for weight types: - * - `vertex_work_weight_type`: Represents computational cost of a task. - * - `vertex_comm_weight_type`: Represents data size/communication cost. - * - `vertex_mem_weight_type`: Represents memory usage of a task. + * - `VertexWorkWeightType`: Represents computational cost of a task. + * - `VertexCommWeightType`: Represents data size/communication cost. + * - `VertexMemWeightType`: Represents memory usage of a task. * * @tparam T The computational DAG type. */ template -struct computational_dag_traits { - static_assert(has_vertex_work_weight_tmember::value, "cdag must have vertex work weight type"); - static_assert(has_vertex_comm_weight_tmember::value, "cdag must have vertex comm weight type"); - static_assert(has_vertex_mem_weight_tmember::value, "cdag must have vertex mem weight type"); - - using vertex_work_weight_type = typename T::vertex_work_weight_type; - using vertex_comm_weight_type = typename T::vertex_comm_weight_type; - using vertex_mem_weight_type = typename T::vertex_mem_weight_type; +struct ComputationalDagTraits { + static_assert(HasVertexWorkWeightTmember::value, "cdag must have vertex work weight type"); + static_assert(HasVertexCommWeightTmember::value, "cdag must have vertex comm weight type"); + static_assert(HasVertexMemWeightTmember::value, "cdag must have vertex mem weight type"); + + using VertexWorkWeightType = typename T::VertexWorkWeightType; + using VertexCommWeightType = typename T::VertexCommWeightType; + using VertexMemWeightType = typename T::VertexMemWeightType; }; template -using v_workw_t = typename computational_dag_traits::vertex_work_weight_type; +using VWorkwT = typename ComputationalDagTraits::VertexWorkWeightType; template -using v_commw_t = typename computational_dag_traits::vertex_comm_weight_type; +using VCommwT = typename ComputationalDagTraits::VertexCommWeightType; template -using v_memw_t = typename computational_dag_traits::vertex_mem_weight_type; +using VMemwT = typename ComputationalDagTraits::VertexMemWeightType; /** * @brief Traits to extract the vertex type of a computational DAG, if defined. * - * If the DAG defines `vertex_type_type`, it is extracted; otherwise, `void` is used. + * If the DAG defines `VertexTypeType`, it is extracted; otherwise, `void` is used. */ template -struct computational_dag_typed_vertices_traits { - using vertex_type_type = void; +struct ComputationalDagTypedVerticesTraits { + using VertexTypeType = void; }; template -struct computational_dag_typed_vertices_traits> { - using vertex_type_type = typename T::vertex_type_type; +struct ComputationalDagTypedVerticesTraits> { + using VertexTypeType = typename T::VertexTypeType; }; template -using v_type_t = typename computational_dag_typed_vertices_traits::vertex_type_type; +using VTypeT = typename ComputationalDagTypedVerticesTraits::VertexTypeType; /** * @brief Traits to extract the edge communication weight type of a computational DAG, if defined. @@ -205,17 +206,17 @@ using v_type_t = typename computational_dag_typed_vertices_traits::vertex_typ * If the DAG defines `edge_comm_weight_type`, it is extracted; otherwise, `void` is used. */ template -struct computational_dag_edge_desc_traits { - using edge_comm_weight_type = void; +struct ComputationalDagEdgeDescTraits { + using EdgeCommWeightType = void; }; template -struct computational_dag_edge_desc_traits> { - using edge_comm_weight_type = typename T::edge_comm_weight_type; +struct ComputationalDagEdgeDescTraits> { + using EdgeCommWeightType = typename T::EdgeCommWeightType; }; template -using e_commw_t = typename computational_dag_edge_desc_traits::edge_comm_weight_type; +using ECommwT = typename ComputationalDagEdgeDescTraits::EdgeCommWeightType; // ----------------------------------------------------------------------------- // Property Traits @@ -227,40 +228,40 @@ using e_commw_t = typename computational_dag_edge_desc_traits::edge_comm_weig * to optimize the algorithm. */ template -struct has_vertices_in_top_order_trait : std::false_type {}; +struct HasVerticesInTopOrderTrait : std::false_type {}; template -struct has_vertices_in_top_order_trait> - : std::bool_constant && T::vertices_in_top_order> {}; +struct HasVerticesInTopOrderTrait> + : std::bool_constant && T::verticesInTopOrder_> {}; template -inline constexpr bool has_vertices_in_top_order_v = has_vertices_in_top_order_trait::value; +inline constexpr bool hasVerticesInTopOrderV = HasVerticesInTopOrderTrait::value; /** * @brief Check if a graph guarantees children of a vertex are stored/iterated in vertex index order. */ template -struct has_children_in_vertex_order_trait : std::false_type {}; +struct HasChildrenInVertexOrderTrait : std::false_type {}; template -struct has_children_in_vertex_order_trait> - : std::bool_constant && T::children_in_vertex_order> {}; +struct HasChildrenInVertexOrderTrait> + : std::bool_constant && T::childrenInVertexOrder_> {}; template -inline constexpr bool has_children_in_vertex_order_v = has_children_in_vertex_order_trait::value; +inline constexpr bool hasChildrenInVertexOrderV = HasChildrenInVertexOrderTrait::value; /** * @brief Check if a graph guarantees parents of a vertex are stored/iterated in vertex index order. */ template -struct has_parents_in_vertex_order_trait : std::false_type {}; +struct HasParentsInVertexOrderTrait : std::false_type {}; template -struct has_parents_in_vertex_order_trait> - : std::bool_constant && T::parents_in_vertex_order> {}; +struct HasParentsInVertexOrderTrait> + : std::bool_constant && T::parentsInVertexOrder_> {}; template -inline constexpr bool has_parents_in_vertex_order_v = has_parents_in_vertex_order_trait::value; +inline constexpr bool hasParentsInVertexOrderV = HasParentsInVertexOrderTrait::value; } // namespace osp @@ -270,12 +271,13 @@ inline constexpr bool has_parents_in_vertex_order_v = has_parents_in_vertex_orde * This specialization provides a hash function for osp::directed_edge, which is used in hash-based containers like * std::unordered_set and std::unordered_map. */ -template -struct std::hash> { - std::size_t operator()(const osp::directed_edge &p) const noexcept { +template +struct std::hash> { + std::size_t operator()(const osp::DirectedEdge &p) const noexcept { // Combine hashes of source and target - std::size_t h1 = std::hash>{}(p.source); - std::size_t h2 = std::hash>{}(p.target); - return h1 ^ (h2 << 1); // Simple hash combining + std::size_t h1 = std::hash>{}(p.source_); + std::size_t h2 = std::hash>{}(p.target_); + osp::HashCombine(h1, h2); + return h1; } }; diff --git a/include/osp/concepts/iterator_concepts.hpp b/include/osp/concepts/iterator_concepts.hpp index bb827ae6..ecb10502 100644 --- a/include/osp/concepts/iterator_concepts.hpp +++ b/include/osp/concepts/iterator_concepts.hpp @@ -44,19 +44,19 @@ namespace osp { * @tparam T The type to check. */ template -struct is_forward_iterator : std::false_type {}; +struct IsForwardIterator : std::false_type {}; template -struct is_forward_iterator::difference_type, - typename std::iterator_traits::value_type, - typename std::iterator_traits::pointer, - typename std::iterator_traits::reference, - typename std::iterator_traits::iterator_category>> +struct IsForwardIterator::difference_type, + typename std::iterator_traits::value_type, + typename std::iterator_traits::pointer, + typename std::iterator_traits::reference, + typename std::iterator_traits::iterator_category>> : std::conjunction::iterator_category>> {}; template -inline constexpr bool is_forward_iterator_v = is_forward_iterator::value; +inline constexpr bool isForwardIteratorV = IsForwardIterator::value; /** * @brief Checks if a type is a range of forward iterators with a specific value type. @@ -70,16 +70,16 @@ inline constexpr bool is_forward_iterator_v = is_forward_iterator::value; * @tparam ValueType The expected value type of the range. */ template -struct is_forward_range_of : std::false_type {}; +struct IsForwardRangeOf : std::false_type {}; template -struct is_forward_range_of())), decltype(std::end(std::declval()))>> - : std::conjunction()))>, +struct IsForwardRangeOf())), decltype(std::end(std::declval()))>> + : std::conjunction()))>, std::is_same()))>::value_type>> { }; template -inline constexpr bool is_forward_range_of_v = is_forward_range_of::value; +inline constexpr bool isForwardRangeOfV = IsForwardRangeOf::value; /** * @brief Checks if a type is a container (sized forward range). @@ -93,14 +93,14 @@ inline constexpr bool is_forward_range_of_v = is_forward_range_of: * @tparam ValueType The expected value type of the container. */ template -struct is_container_of : std::false_type {}; +struct IsContainerOf : std::false_type {}; template -struct is_container_of()))>> - : std::conjunction> {}; +struct IsContainerOf()))>> + : std::conjunction> {}; template -inline constexpr bool is_container_of_v = is_container_of::value; +inline constexpr bool isContainerOfV = IsContainerOf::value; /** * @brief Checks if a type is an input iterator. @@ -114,19 +114,19 @@ inline constexpr bool is_container_of_v = is_container_of::value; * @tparam T The type to check. */ template -struct is_input_iterator : std::false_type {}; +struct IsInputIterator : std::false_type {}; template -struct is_input_iterator::difference_type, - typename std::iterator_traits::value_type, - typename std::iterator_traits::pointer, - typename std::iterator_traits::reference, - typename std::iterator_traits::iterator_category>> +struct IsInputIterator::difference_type, + typename std::iterator_traits::value_type, + typename std::iterator_traits::pointer, + typename std::iterator_traits::reference, + typename std::iterator_traits::iterator_category>> : std::conjunction::iterator_category>> {}; template -inline constexpr bool is_input_iterator_v = is_input_iterator::value; +inline constexpr bool isInputIteratorV = IsInputIterator::value; /** * @brief Checks if a type is a range of input iterators with a specific value type. @@ -140,15 +140,15 @@ inline constexpr bool is_input_iterator_v = is_input_iterator::value; * @tparam ValueType The expected value type of the range. */ template -struct is_input_range_of : std::false_type {}; +struct IsInputRangeOf : std::false_type {}; template -struct is_input_range_of())), decltype(std::end(std::declval()))>> - : std::conjunction()))>, +struct IsInputRangeOf())), decltype(std::end(std::declval()))>> + : std::conjunction()))>, std::is_same()))>::value_type>> { }; template -inline constexpr bool is_input_range_of_v = is_input_range_of::value; +inline constexpr bool isInputRangeOfV = IsInputRangeOf::value; } // namespace osp diff --git a/include/osp/concepts/specific_graph_impl.hpp b/include/osp/concepts/specific_graph_impl.hpp index 810211c3..f2ac4b86 100644 --- a/include/osp/concepts/specific_graph_impl.hpp +++ b/include/osp/concepts/specific_graph_impl.hpp @@ -30,25 +30,25 @@ limitations under the License. namespace osp { /** - * @brief Trait to check if a graph type is a `Compact_Sparse_Graph`. + * @brief Trait to check if a graph type is a `CompactSparseGraph`. * * @tparam T The graph type. */ template -struct is_Compact_Sparse_Graph : std::false_type {}; +struct IsCompactSparseGraph : std::false_type {}; template -inline constexpr bool is_Compact_Sparse_Graph_v = is_Compact_Sparse_Graph::value; +inline constexpr bool isCompactSparseGraphV = IsCompactSparseGraph::value; /** - * @brief Trait to check if a graph type is a `Compact_Sparse_Graph` that supports reordering. + * @brief Trait to check if a graph type is a `CompactSparseGraph` that supports reordering. * * @tparam T The graph type. */ template -struct is_Compact_Sparse_Graph_reorder : std::false_type {}; +struct IsCompactSparseGraphReorder : std::false_type {}; template -inline constexpr bool is_Compact_Sparse_Graph_reorder_v = is_Compact_Sparse_Graph_reorder::value; +inline constexpr bool isCompactSparseGraphReorderV = IsCompactSparseGraphReorder::value; } // namespace osp diff --git a/include/osp/dag_divider/AbstractWavefrontScheduler.hpp b/include/osp/dag_divider/AbstractWavefrontScheduler.hpp index 627ee33d..f306c53f 100644 --- a/include/osp/dag_divider/AbstractWavefrontScheduler.hpp +++ b/include/osp/dag_divider/AbstractWavefrontScheduler.hpp @@ -33,90 +33,90 @@ namespace osp { * @class AbstractWavefrontScheduler * @brief Base class for schedulers that operate on wavefronts of a DAG. */ -template -class AbstractWavefrontScheduler : public Scheduler { +template +class AbstractWavefrontScheduler : public Scheduler { protected: - IDagDivider *divider; - Scheduler *scheduler; - static constexpr bool enable_debug_prints = true; + IDagDivider *divider_; + Scheduler *scheduler_; + static constexpr bool enableDebugPrints_ = true; /** * @brief Distributes processors proportionally, ensuring active components get at least one if possible. * @param allocation A reference to the vector that will be filled with the processor allocation. * @return True if the scarcity case was hit (fewer processors than active components), false otherwise. */ - bool distributeProcessors(unsigned total_processors_of_type, - const std::vector &work_weights, + bool DistributeProcessors(unsigned totalProcessorsOfType, + const std::vector &workWeights, std::vector &allocation) const { - allocation.assign(work_weights.size(), 0); - double total_work = std::accumulate(work_weights.begin(), work_weights.end(), 0.0); - if (total_work <= 1e-9 || total_processors_of_type == 0) { + allocation.assign(workWeights.size(), 0); + double totalWork = std::accumulate(workWeights.begin(), workWeights.end(), 0.0); + if (totalWork <= 1e-9 || totalProcessorsOfType == 0) { return false; } - std::vector active_indices; - for (size_t i = 0; i < work_weights.size(); ++i) { - if (work_weights[i] > 1e-9) { - active_indices.push_back(i); + std::vector activeIndices; + for (size_t i = 0; i < workWeights.size(); ++i) { + if (workWeights[i] > 1e-9) { + activeIndices.push_back(i); } } - if (active_indices.empty()) { + if (activeIndices.empty()) { return false; } - size_t num_active_components = active_indices.size(); - unsigned remaining_procs = total_processors_of_type; + size_t numActiveComponents = activeIndices.size(); + unsigned remainingProcs = totalProcessorsOfType; // --- Stage 1: Guarantee at least one processor if possible (anti-starvation) --- - if (total_processors_of_type >= num_active_components) { + if (totalProcessorsOfType >= numActiveComponents) { // Abundance case: Give one processor to each active component first. - for (size_t idx : active_indices) { + for (size_t idx : activeIndices) { allocation[idx] = 1; } - remaining_procs -= static_cast(num_active_components); + remainingProcs -= static_cast(numActiveComponents); } else { // Scarcity case: Not enough processors for each active component. - std::vector> sorted_work; - for (size_t idx : active_indices) { - sorted_work.push_back({work_weights[idx], idx}); + std::vector> sortedWork; + for (size_t idx : activeIndices) { + sortedWork.push_back({workWeights[idx], idx}); } - std::sort(sorted_work.rbegin(), sorted_work.rend()); - for (unsigned i = 0; i < remaining_procs; ++i) { - allocation[sorted_work[i].second]++; + std::sort(sortedWork.rbegin(), sortedWork.rend()); + for (unsigned i = 0; i < remainingProcs; ++i) { + allocation[sortedWork[i].second]++; } return true; // Scarcity case was hit. } // --- Stage 2: Proportional Distribution of Remaining Processors --- - if (remaining_procs > 0) { - std::vector adjusted_work_weights; - double adjusted_total_work = 0; + if (remainingProcs > 0) { + std::vector adjustedWorkWeights; + double adjustedTotalWork = 0; - double work_per_proc = total_work / static_cast(total_processors_of_type); + double workPerProc = totalWork / static_cast(totalProcessorsOfType); - for (size_t idx : active_indices) { - double adjusted_work = std::max(0.0, work_weights[idx] - work_per_proc); - adjusted_work_weights.push_back(adjusted_work); - adjusted_total_work += adjusted_work; + for (size_t idx : activeIndices) { + double adjustedWork = std::max(0.0, workWeights[idx] - workPerProc); + adjustedWorkWeights.push_back(adjustedWork); + adjustedTotalWork += adjustedWork; } - if (adjusted_total_work > 1e-9) { + if (adjustedTotalWork > 1e-9) { std::vector> remainders; - unsigned allocated_count = 0; - - for (size_t i = 0; i < active_indices.size(); ++i) { - double exact_share = (adjusted_work_weights[i] / adjusted_total_work) * remaining_procs; - unsigned additional_alloc = static_cast(std::floor(exact_share)); - allocation[active_indices[i]] += additional_alloc; // Add to the base allocation of 1 - remainders.push_back({exact_share - additional_alloc, active_indices[i]}); - allocated_count += additional_alloc; + unsigned allocatedCount = 0; + + for (size_t i = 0; i < activeIndices.size(); ++i) { + double exactShare = (adjustedWorkWeights[i] / adjustedTotalWork) * remainingProcs; + unsigned additionalAlloc = static_cast(std::floor(exactShare)); + allocation[activeIndices[i]] += additionalAlloc; // Add to the base allocation of 1 + remainders.push_back({exactShare - additionalAlloc, activeIndices[i]}); + allocatedCount += additionalAlloc; } std::sort(remainders.rbegin(), remainders.rend()); - unsigned remainder_processors = remaining_procs - allocated_count; - for (unsigned i = 0; i < remainder_processors; ++i) { + unsigned remainderProcessors = remainingProcs - allocatedCount; + for (unsigned i = 0; i < remainderProcessors; ++i) { if (i < remainders.size()) { allocation[remainders[i].second]++; } @@ -126,37 +126,37 @@ class AbstractWavefrontScheduler : public Scheduler { return false; // Scarcity case was not hit. } - BspArchitecture createSubArchitecture(const BspArchitecture &original_arch, - const std::vector &sub_dag_proc_types) const { + BspArchitecture CreateSubArchitecture(const BspArchitecture &originalArch, + const std::vector &subDagProcTypes) const { // The calculation is now inside the assert, so it only happens in debug builds. - assert(std::accumulate(sub_dag_proc_types.begin(), sub_dag_proc_types.end(), 0u) > 0 + assert(std::accumulate(subDagProcTypes.begin(), subDagProcTypes.end(), 0u) > 0 && "Attempted to create a sub-architecture with zero processors."); - BspArchitecture sub_architecture(original_arch); - std::vector> sub_dag_processor_memory(original_arch.getProcessorTypeCount().size(), - std::numeric_limits>::max()); - for (unsigned i = 0; i < original_arch.numberOfProcessors(); ++i) { - sub_dag_processor_memory[original_arch.processorType(i)] - = std::min(original_arch.memoryBound(i), sub_dag_processor_memory[original_arch.processorType(i)]); + BspArchitecture subArchitecture(originalArch); + std::vector> subDagProcessorMemory(originalArch.GetProcessorTypeCount().size(), + std::numeric_limits>::max()); + for (unsigned i = 0; i < originalArch.NumberOfProcessors(); ++i) { + subDagProcessorMemory[originalArch.ProcessorType(i)] + = std::min(originalArch.MemoryBound(i), subDagProcessorMemory[originalArch.ProcessorType(i)]); } - sub_architecture.SetProcessorsConsequTypes(sub_dag_proc_types, sub_dag_processor_memory); - return sub_architecture; + subArchitecture.SetProcessorsConsequTypes(subDagProcTypes, subDagProcessorMemory); + return subArchitecture; } - bool validateWorkDistribution(const std::vector &sub_dags, const BspInstance &instance) const { - const auto &original_arch = instance.getArchitecture(); - for (const auto &rep_sub_dag : sub_dags) { - const double total_rep_work = sumOfVerticesWorkWeights(rep_sub_dag); + bool ValidateWorkDistribution(const std::vector &subDags, const BspInstance &instance) const { + const auto &originalArch = instance.GetArchitecture(); + for (const auto &repSubDag : subDags) { + const double totalRepWork = SumOfVerticesWorkWeights(repSubDag); - double sum_of_compatible_works_for_rep = 0.0; - for (unsigned type_idx = 0; type_idx < original_arch.getNumberOfProcessorTypes(); ++type_idx) { - sum_of_compatible_works_for_rep += sumOfCompatibleWorkWeights(rep_sub_dag, instance, type_idx); + double sumOfCompatibleWorksForRep = 0.0; + for (unsigned typeIdx = 0; typeIdx < originalArch.GetNumberOfProcessorTypes(); ++typeIdx) { + sumOfCompatibleWorksForRep += SumOfCompatibleWorkWeights(repSubDag, instance, typeIdx); } - if (sum_of_compatible_works_for_rep > total_rep_work + 1e-9) { - if constexpr (enable_debug_prints) { - std::cerr << "ERROR: Sum of compatible work (" << sum_of_compatible_works_for_rep << ") exceeds total work (" - << total_rep_work << ") for a sub-dag. Aborting." << std::endl; + if (sumOfCompatibleWorksForRep > totalRepWork + 1e-9) { + if constexpr (enableDebugPrints_) { + std::cerr << "ERROR: Sum of compatible work (" << sumOfCompatibleWorksForRep << ") exceeds total work (" + << totalRepWork << ") for a sub-dag. Aborting." << std::endl; } return false; } @@ -165,7 +165,7 @@ class AbstractWavefrontScheduler : public Scheduler { } public: - AbstractWavefrontScheduler(IDagDivider &div, Scheduler &sched) : divider(&div), scheduler(&sched) {} + AbstractWavefrontScheduler(IDagDivider &div, Scheduler &sched) : divider_(&div), scheduler_(&sched) {} }; } // namespace osp diff --git a/include/osp/dag_divider/ConnectedComponentDivider.hpp b/include/osp/dag_divider/ConnectedComponentDivider.hpp index 7edc747f..e0fabca5 100644 --- a/include/osp/dag_divider/ConnectedComponentDivider.hpp +++ b/include/osp/dag_divider/ConnectedComponentDivider.hpp @@ -27,156 +27,156 @@ limitations under the License. namespace osp { -template -class ConnectedComponentDivider : public IDagDivider { - static_assert(is_computational_dag_v, "Graph must be a computational DAG"); - static_assert(is_computational_dag_v, "Constr_Graph_t must be a computational DAG"); - static_assert(is_constructable_cdag_v, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); - static_assert(std::is_same_v, vertex_idx_t>, - "Graph_t and Constr_Graph_t must have the same vertex_idx types"); +template +class ConnectedComponentDivider : public IDagDivider { + static_assert(isComputationalDagV, "Graph must be a computational DAG"); + static_assert(isComputationalDagV, "ConstrGraphT must be a computational DAG"); + static_assert(isConstructableCdagV, "ConstrGraphT must satisfy the constructable_cdag_vertex concept"); + static_assert(std::is_same_v, VertexIdxT>, + "GraphT and ConstrGraphT must have the same VertexIdx types"); private: - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; - std::vector sub_dags; + std::vector subDags_; // For each component: local_idx -> global vertex - std::vector> vertex_mapping; + std::vector> vertexMapping_; // Global vertex -> local index - std::vector vertex_map; + std::vector vertexMap_; // Global vertex -> component id - std::vector component; + std::vector component_; public: - inline std::vector &get_sub_dags() { return sub_dags; } + inline std::vector &GetSubDags() { return subDags_; } - inline const std::vector &get_sub_dags() const { return sub_dags; } + inline const std::vector &GetSubDags() const { return subDags_; } - inline const std::vector> &get_vertex_mapping() const { return vertex_mapping; } + inline const std::vector> &GetVertexMapping() const { return vertexMapping_; } - inline const std::vector &get_component() const { return component; } + inline const std::vector &GetComponent() const { return component_; } - inline const std::vector &get_vertex_map() const { return vertex_map; } + inline const std::vector &GetVertexMap() const { return vertexMap_; } - virtual std::vector>>> divide(const Graph_t &dag) override { - if (dag.num_vertices() == 0) { + virtual std::vector>>> Divide(const GraphT &dag) override { + if (dag.NumVertices() == 0) { return {}; } - bool has_more_than_one_connected_component = compute_connected_components(dag); + bool hasMoreThanOneConnectedComponent = ComputeConnectedComponents(dag); - std::vector>>> vertex_maps(1); + std::vector>>> vertexMaps(1); - if (has_more_than_one_connected_component) { - vertex_maps[0].resize(sub_dags.size()); - for (unsigned i = 0; i < sub_dags.size(); ++i) { - vertex_maps[0][i].resize(sub_dags[i].num_vertices()); + if (hasMoreThanOneConnectedComponent) { + vertexMaps[0].resize(subDags_.size()); + for (unsigned i = 0; i < subDags_.size(); ++i) { + vertexMaps[0][i].resize(subDags_[i].NumVertices()); } - for (const auto &v : dag.vertices()) { - vertex_maps[0][component[v]][vertex_map[v]] = v; + for (const auto &v : dag.Vertices()) { + vertexMaps[0][component_[v]][vertexMap_[v]] = v; } } else { - sub_dags.resize(1); - sub_dags[0] = dag; - vertex_mapping.resize(1); - vertex_mapping[0].resize(dag.num_vertices()); - vertex_map.resize(dag.num_vertices()); - - vertex_maps[0].resize(1); - vertex_maps[0][0].resize(dag.num_vertices()); - for (const auto &v : dag.vertices()) { - vertex_maps[0][0][v] = v; - vertex_map[v] = v; - vertex_mapping[0][v] = v; + subDags_.resize(1); + subDags_[0] = dag; + vertexMapping_.resize(1); + vertexMapping_[0].resize(dag.NumVertices()); + vertexMap_.resize(dag.NumVertices()); + + vertexMaps[0].resize(1); + vertexMaps[0][0].resize(dag.NumVertices()); + for (const auto &v : dag.Vertices()) { + vertexMaps[0][0][v] = v; + vertexMap_[v] = v; + vertexMapping_[0][v] = v; } } - return vertex_maps; + return vertexMaps; } - std::vector>>> compute_vertex_maps(const Graph_t &dag) { - std::vector>>> vertex_maps(1); + std::vector>>> ComputeVertexMaps(const GraphT &dag) { + std::vector>>> vertexMaps(1); - vertex_maps[0].resize(sub_dags.size()); - for (unsigned i = 0; i < sub_dags.size(); ++i) { - vertex_maps[0][i].resize(sub_dags[i].num_vertices()); + vertexMaps[0].resize(subDags_.size()); + for (unsigned i = 0; i < subDags_.size(); ++i) { + vertexMaps[0][i].resize(subDags_[i].NumVertices()); } - for (const auto &v : dag.vertices()) { - vertex_maps[0][component[v]][vertex_map[v]] = v; + for (const auto &v : dag.Vertices()) { + vertexMaps[0][component_[v]][vertexMap_[v]] = v; } - return vertex_maps; + return vertexMaps; } - bool compute_connected_components(const Graph_t &dag) { + bool ComputeConnectedComponents(const GraphT &dag) { // Clear previous state - sub_dags.clear(); - vertex_mapping.clear(); - vertex_map.clear(); - component.assign(dag.num_vertices(), std::numeric_limits::max()); + subDags_.clear(); + vertexMapping_.clear(); + vertexMap_.clear(); + component_.assign(dag.NumVertices(), std::numeric_limits::max()); - if (dag.num_vertices() == 0) { + if (dag.NumVertices() == 0) { return false; } - unsigned component_id = 0; - for (const auto &v : dag.vertices()) { - if (component[v] == std::numeric_limits::max()) { - component[v] = component_id; + unsigned componentId = 0; + for (const auto &v : dag.Vertices()) { + if (component_[v] == std::numeric_limits::max()) { + component_[v] = componentId; // BFS for weakly connected component - std::queue q; + std::queue q; q.push(v); while (!q.empty()) { - vertex_idx current = q.front(); + VertexIdx current = q.front(); q.pop(); - for (const auto &child : dag.children(current)) { - if (component[child] == std::numeric_limits::max()) { + for (const auto &child : dag.Children(current)) { + if (component_[child] == std::numeric_limits::max()) { q.push(child); - component[child] = component_id; + component_[child] = componentId; } } - for (const auto &parent : dag.parents(current)) { - if (component[parent] == std::numeric_limits::max()) { + for (const auto &parent : dag.Parents(current)) { + if (component_[parent] == std::numeric_limits::max()) { q.push(parent); - component[parent] = component_id; + component_[parent] = componentId; } } } - ++component_id; + ++componentId; } } - if (component_id == 1) { + if (componentId == 1) { // Single component: no need to build sub_dags or maps return false; } - sub_dags = create_induced_subgraphs(dag, component); + subDags_ = CreateInducedSubgraphs(dag, component_); // Create the mappings between global and local vertex indices. - vertex_mapping.resize(sub_dags.size()); - vertex_map.resize(dag.num_vertices()); + vertexMapping_.resize(subDags_.size()); + vertexMap_.resize(dag.NumVertices()); - std::vector current_index_in_subdag(sub_dags.size(), 0); - for (const auto &v : dag.vertices()) { - unsigned comp_id = component[v]; - vertex_idx local_idx = current_index_in_subdag[comp_id]++; - vertex_map[v] = local_idx; + std::vector currentIndexInSubdag(subDags_.size(), 0); + for (const auto &v : dag.Vertices()) { + unsigned compId = component_[v]; + VertexIdx localIdx = currentIndexInSubdag[compId]++; + vertexMap_[v] = localIdx; - if (vertex_mapping[comp_id].empty()) { - vertex_mapping[comp_id].resize(sub_dags[comp_id].num_vertices()); + if (vertexMapping_[compId].empty()) { + vertexMapping_[compId].resize(subDags_[compId].NumVertices()); } - vertex_mapping[comp_id][local_idx] = v; + vertexMapping_[compId][localIdx] = v; } return true; diff --git a/include/osp/dag_divider/ConnectedComponentScheduler.hpp b/include/osp/dag_divider/ConnectedComponentScheduler.hpp index 7d6cdece..1f960867 100644 --- a/include/osp/dag_divider/ConnectedComponentScheduler.hpp +++ b/include/osp/dag_divider/ConnectedComponentScheduler.hpp @@ -24,59 +24,57 @@ limitations under the License. namespace osp { -template -class ConnectedComponentScheduler : public Scheduler { - Scheduler *scheduler; +template +class ConnectedComponentScheduler : public Scheduler { + Scheduler *scheduler_; public: - ConnectedComponentScheduler(Scheduler &_scheduler) : scheduler(&_scheduler) {} + ConnectedComponentScheduler(Scheduler &scheduler) : scheduler_(&scheduler) {} - std::string getScheduleName() const override { return "SubDagScheduler"; } + std::string GetScheduleName() const override { return "SubDagScheduler"; } - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); - const Graph_t &dag = instance.getComputationalDag(); - ConnectedComponentDivider partitioner; + const GraphT &dag = instance.GetComputationalDag(); + ConnectedComponentDivider partitioner; - partitioner.divide(dag); + partitioner.Divide(dag); - v_workw_t total_work_weight = sumOfVerticesWorkWeights(dag); + VWorkwT totalWorkWeight = SumOfVerticesWorkWeights(dag); - unsigned num_processors_offset = 0; + unsigned numProcessorsOffset = 0; - for (std::size_t i = 0; i < partitioner.get_sub_dags().size(); i++) { - const auto &sub_dag = partitioner.get_sub_dags()[i]; - const auto &mapping = partitioner.get_vertex_mapping()[i]; + for (std::size_t i = 0; i < partitioner.GetSubDags().size(); i++) { + const auto &subDag = partitioner.GetSubDags()[i]; + const auto &mapping = partitioner.GetVertexMapping()[i]; - v_workw_t sub_dag_work_weight = sumOfVerticesWorkWeights(sub_dag); + VWorkwT subDagWorkWeight = SumOfVerticesWorkWeights(subDag); - BspInstance sub_instance(sub_dag, instance.getArchitecture()); - BspArchitecture &sub_architecture = sub_instance.getArchitecture(); + BspInstance subInstance(subDag, instance.GetArchitecture()); + BspArchitecture &subArchitecture = subInstance.GetArchitecture(); - const double sub_dag_work_weight_percent - = static_cast(sub_dag_work_weight) / static_cast(total_work_weight); - const unsigned sub_dag_processors - = static_cast(sub_dag_work_weight_percent * sub_architecture.numberOfProcessors()); + const double subDagWorkWeightPercent = static_cast(subDagWorkWeight) / static_cast(totalWorkWeight); + const unsigned subDagProcessors = static_cast(subDagWorkWeightPercent * subArchitecture.NumberOfProcessors()); - sub_architecture.setNumberOfProcessors(sub_dag_processors); + subArchitecture.SetNumberOfProcessors(subDagProcessors); - BspSchedule sub_schedule(sub_instance); - auto status = scheduler->computeSchedule(sub_schedule); + BspSchedule subSchedule(subInstance); + auto status = scheduler_->ComputeSchedule(subSchedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) { return status; } - for (const auto &v : sub_instance.vertices()) { - schedule.setAssignedProcessor(mapping.at(v), sub_schedule.assignedProcessor(v) + num_processors_offset); - schedule.setAssignedSuperstep(mapping.at(v), sub_schedule.assignedSuperstep(v)); + for (const auto &v : subInstance.Vertices()) { + schedule.SetAssignedProcessor(mapping.at(v), subSchedule.AssignedProcessor(v) + numProcessorsOffset); + schedule.SetAssignedSuperstep(mapping.at(v), subSchedule.AssignedSuperstep(v)); } - num_processors_offset += sub_architecture.numberOfProcessors(); + numProcessorsOffset += subArchitecture.NumberOfProcessors(); } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } }; diff --git a/include/osp/dag_divider/DagDivider.hpp b/include/osp/dag_divider/DagDivider.hpp index e89c2b01..4ec96411 100644 --- a/include/osp/dag_divider/DagDivider.hpp +++ b/include/osp/dag_divider/DagDivider.hpp @@ -29,9 +29,9 @@ namespace osp { * @brief Divides the wavefronts of a computational DAG into consecutive groups or sections. * */ -template +template class IDagDivider { - static_assert(is_directed_graph_v, "Graph must be directed"); + static_assert(isDirectedGraphV, "Graph must be directed"); public: virtual ~IDagDivider() = default; @@ -47,7 +47,7 @@ class IDagDivider { * @return const std::vector>>& * A constant reference to the vertex maps. */ - virtual std::vector>>> divide(const Graph_t &dag) = 0; + virtual std::vector>>> Divide(const GraphT &dag) = 0; }; } // namespace osp diff --git a/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp b/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp index 5b61736f..a0b33345 100644 --- a/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp +++ b/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp @@ -25,300 +25,296 @@ namespace osp { * @class IsomorphicWavefrontComponentScheduler * @brief Schedules wavefronts by grouping isomorphic components. */ -template -class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler { +template +class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler { public: - IsomorphicWavefrontComponentScheduler(IDagDivider &div, Scheduler &scheduler) - : AbstractWavefrontScheduler(div, scheduler) {} + IsomorphicWavefrontComponentScheduler(IDagDivider &div, Scheduler &scheduler) + : AbstractWavefrontScheduler(div, scheduler) {} - std::string getScheduleName() const override { return "IsomorphicWavefrontComponentScheduler"; } + std::string GetScheduleName() const override { return "IsomorphicWavefrontComponentScheduler"; } - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); - const auto &original_arch = instance.getArchitecture(); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); + const auto &originalArch = instance.GetArchitecture(); - std::vector> global_ids_by_type(original_arch.getNumberOfProcessorTypes()); - for (unsigned i = 0; i < original_arch.numberOfProcessors(); ++i) { - global_ids_by_type[original_arch.processorType(i)].push_back(i); + std::vector> globalIdsByType(originalArch.GetNumberOfProcessorTypes()); + for (unsigned i = 0; i < originalArch.NumberOfProcessors(); ++i) { + globalIdsByType[originalArch.ProcessorType(i)].push_back(i); } - IsomorphismGroups iso_groups; - std::vector>>> vertex_maps - = this->divider->divide(instance.getComputationalDag()); - iso_groups.compute_isomorphism_groups(vertex_maps, instance.getComputationalDag()); + IsomorphismGroups isoGroups; + std::vector>>> vertexMaps + = this->divider_->Divide(instance.GetComputationalDag()); + isoGroups.ComputeIsomorphismGroups(vertexMaps, instance.GetComputationalDag()); - unsigned superstep_offset = 0; - for (std::size_t i = 0; i < vertex_maps.size(); ++i) { - if (this->enable_debug_prints) { + unsigned superstepOffset = 0; + for (std::size_t i = 0; i < vertexMaps.size(); ++i) { + if (this->enableDebugPrints_) { std::cout << "\n--- Processing Wavefront Set " << i << " ---" << std::endl; } - unsigned supersteps_in_set = 0; - auto status = process_wavefront_set(schedule, - vertex_maps[i], - iso_groups.get_isomorphism_groups()[i], - iso_groups.get_isomorphism_groups_subgraphs()[i], - global_ids_by_type, - superstep_offset, - supersteps_in_set); - if (status != RETURN_STATUS::OSP_SUCCESS) { + unsigned superstepsInSet = 0; + auto status = this->ProcessWavefrontSet(schedule, + vertexMaps[i], + isoGroups.GetIsomorphismGroups()[i], + isoGroups.GetIsomorphismGroupsSubgraphs()[i], + globalIdsByType, + superstepOffset, + superstepsInSet); + if (status != ReturnStatus::OSP_SUCCESS) { return status; } - superstep_offset += supersteps_in_set; + superstepOffset += superstepsInSet; } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } private: - RETURN_STATUS process_wavefront_set(BspSchedule &schedule, - const std::vector>> &vertex_map_for_set, - const std::vector> &iso_groups_for_set, - const std::vector &subgraphs_for_set, - const std::vector> &global_ids_by_type, - unsigned superstep_offset, - unsigned &supersteps_in_set) { - const auto &instance = schedule.getInstance(); - const auto &original_arch = instance.getArchitecture(); - const auto &original_proc_type_count = original_arch.getProcessorTypeCount(); - - if constexpr (this->enable_debug_prints) { - std::cout << " Found " << iso_groups_for_set.size() << " isomorphism groups in this wavefront set." << std::endl; + ReturnStatus ProcessWavefrontSet(BspSchedule &schedule, + const std::vector>> &vertexMapForSet, + const std::vector> &isoGroupsForSet, + const std::vector &subgraphsForSet, + const std::vector> &globalIdsByType, + unsigned superstepOffset, + unsigned &superstepsInSet) { + const auto &instance = schedule.GetInstance(); + const auto &originalArch = instance.GetArchitecture(); + const auto &originalProcTypeCount = originalArch.GetProcessorTypeCount(); + + if constexpr (this->enableDebugPrints_) { + std::cout << " Found " << isoGroupsForSet.size() << " isomorphism groups in this wavefront set." << std::endl; } // Calculate work for each isomorphism group - std::vector> group_work_by_type(iso_groups_for_set.size(), - std::vector(original_proc_type_count.size(), 0.0)); - - for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) { - const constr_graph_t &rep_sub_dag = subgraphs_for_set[j]; - for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { - const double rep_work_for_type = sumOfCompatibleWorkWeights(rep_sub_dag, instance, type_idx); - group_work_by_type[j][type_idx] = rep_work_for_type * static_cast(iso_groups_for_set[j].size()); + std::vector> groupWorkByType(isoGroupsForSet.size(), + std::vector(originalProcTypeCount.size(), 0.0)); + + for (std::size_t j = 0; j < isoGroupsForSet.size(); ++j) { + const ConstrGraphT &repSubDag = subgraphsForSet[j]; + for (unsigned typeIdx = 0; typeIdx < originalProcTypeCount.size(); ++typeIdx) { + const double repWorkForType = SumOfCompatibleWorkWeights(repSubDag, instance, typeIdx); + groupWorkByType[j][typeIdx] = repWorkForType * static_cast(isoGroupsForSet[j].size()); } } - assert(this->validateWorkDistribution(subgraphs_for_set, instance)); + assert(this->ValidateWorkDistribution(subgraphsForSet, instance)); // Distribute processors among isomorphism groups - std::vector> group_proc_allocations(iso_groups_for_set.size(), - std::vector(original_proc_type_count.size())); + std::vector> groupProcAllocations(isoGroupsForSet.size(), + std::vector(originalProcTypeCount.size())); - for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { - std::vector work_for_this_type; - for (size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) { - work_for_this_type.push_back(group_work_by_type[group_idx][type_idx]); + for (unsigned typeIdx = 0; typeIdx < originalProcTypeCount.size(); ++typeIdx) { + std::vector workForThisType; + for (size_t groupIdx = 0; groupIdx < isoGroupsForSet.size(); ++groupIdx) { + workForThisType.push_back(groupWorkByType[groupIdx][typeIdx]); } - std::vector type_allocation; - bool starvation_hit - = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation); + std::vector typeAllocation; + bool starvationHit = this->DistributeProcessors(originalProcTypeCount[typeIdx], workForThisType, typeAllocation); - if (starvation_hit) { - if constexpr (this->enable_debug_prints) { - std::cerr << "ERROR: Processor starvation detected for type " << type_idx + if (starvationHit) { + if constexpr (this->enableDebugPrints_) { + std::cerr << "ERROR: Processor starvation detected for type " << typeIdx << ". Not enough processors to assign one to each active isomorphism group." << std::endl; } - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } - for (size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) { - group_proc_allocations[group_idx][type_idx] = type_allocation[group_idx]; + for (size_t groupIdx = 0; groupIdx < isoGroupsForSet.size(); ++groupIdx) { + groupProcAllocations[groupIdx][typeIdx] = typeAllocation[groupIdx]; } } // Schedule each group - unsigned max_supersteps = 0; - std::vector proc_type_offsets(original_arch.getNumberOfProcessorTypes(), 0); - - std::vector num_supersteps_per_iso_group(iso_groups_for_set.size()); - - for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) { - unsigned supersteps_for_group = 0; - auto status = schedule_isomorphism_group(schedule, - vertex_map_for_set, - iso_groups_for_set[j], - subgraphs_for_set[j], - group_proc_allocations[j], - global_ids_by_type, - proc_type_offsets, - superstep_offset, - supersteps_for_group); - if (status != RETURN_STATUS::OSP_SUCCESS) { + unsigned maxSupersteps = 0; + std::vector procTypeOffsets(originalArch.GetNumberOfProcessorTypes(), 0); + + std::vector numSuperstepsPerIsoGroup(isoGroupsForSet.size()); + + for (std::size_t j = 0; j < isoGroupsForSet.size(); ++j) { + unsigned superstepsForGroup = 0; + auto status = this->ScheduleIsomorphismGroup(schedule, + vertexMapForSet, + isoGroupsForSet[j], + subgraphsForSet[j], + groupProcAllocations[j], + globalIdsByType, + procTypeOffsets, + superstepOffset, + superstepsForGroup); + if (status != ReturnStatus::OSP_SUCCESS) { return status; } - num_supersteps_per_iso_group[j] = supersteps_for_group; - max_supersteps = std::max(max_supersteps, supersteps_for_group); + numSuperstepsPerIsoGroup[j] = superstepsForGroup; + maxSupersteps = std::max(maxSupersteps, superstepsForGroup); // Advance offsets for the next group - for (size_t k = 0; k < group_proc_allocations[j].size(); ++k) { - proc_type_offsets[k] += group_proc_allocations[j][k]; + for (size_t k = 0; k < groupProcAllocations[j].size(); ++k) { + procTypeOffsets[k] += groupProcAllocations[j][k]; } } - for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) { - num_supersteps_per_iso_group[j] = max_supersteps - num_supersteps_per_iso_group[j]; + for (std::size_t j = 0; j < isoGroupsForSet.size(); ++j) { + numSuperstepsPerIsoGroup[j] = maxSupersteps - numSuperstepsPerIsoGroup[j]; - if (num_supersteps_per_iso_group[j] > 0) { // This is the padding - const auto &group_members = iso_groups_for_set[j]; - for (const auto &original_comp_idx : group_members) { - const auto &component_vertices = vertex_map_for_set[original_comp_idx]; - for (const auto &vertex : component_vertices) { - schedule.setAssignedSuperstep(vertex, schedule.assignedSuperstep(vertex) + num_supersteps_per_iso_group[j]); + if (numSuperstepsPerIsoGroup[j] > 0) { // This is the padding + const auto &groupMembers = isoGroupsForSet[j]; + for (const auto &originalCompIdx : groupMembers) { + const auto &componentVertices = vertexMapForSet[originalCompIdx]; + for (const auto &vertex : componentVertices) { + schedule.SetAssignedSuperstep(vertex, schedule.AssignedSuperstep(vertex) + numSuperstepsPerIsoGroup[j]); } } } } - supersteps_in_set = max_supersteps; - return RETURN_STATUS::OSP_SUCCESS; + superstepsInSet = maxSupersteps; + return ReturnStatus::OSP_SUCCESS; } - RETURN_STATUS schedule_isomorphism_group(BspSchedule &schedule, - const std::vector>> &vertex_map_for_set, - const std::vector &group_members, - const constr_graph_t &rep_sub_dag, - const std::vector &procs_for_group, - const std::vector> &global_ids_by_type, - const std::vector &proc_type_offsets, - unsigned superstep_offset, - unsigned &supersteps_for_group) { - const auto &instance = schedule.getInstance(); - const auto &original_arch = instance.getArchitecture(); - const size_t num_members = group_members.size(); - supersteps_for_group = 0; - - bool scarcity_found = false; - if (num_members > 0) { - for (unsigned type_idx = 0; type_idx < procs_for_group.size(); ++type_idx) { - if (procs_for_group[type_idx] % num_members != 0) { - scarcity_found = true; + ReturnStatus ScheduleIsomorphismGroup(BspSchedule &schedule, + const std::vector>> &vertexMapForSet, + const std::vector &groupMembers, + const ConstrGraphT &repSubDag, + const std::vector &procsForGroup, + const std::vector> &globalIdsByType, + const std::vector &procTypeOffsets, + unsigned superstepOffset, + unsigned &superstepsForGroup) { + const auto &instance = schedule.GetInstance(); + const auto &originalArch = instance.GetArchitecture(); + const size_t numMembers = groupMembers.size(); + superstepsForGroup = 0; + + bool scarcityFound = false; + if (numMembers > 0) { + for (unsigned typeIdx = 0; typeIdx < procsForGroup.size(); ++typeIdx) { + if (procsForGroup[typeIdx] % numMembers != 0) { + scarcityFound = true; break; } } } - if (scarcity_found) { + if (scarcityFound) { // --- SCARCITY/INDIVISIBLE CASE: Schedule sequentially on the shared processor block --- - if constexpr (this->enable_debug_prints) { - std::cout << " Group with " << num_members << " members: Scarcity/Indivisible case. Scheduling sequentially." + if constexpr (this->enableDebugPrints_) { + std::cout << " Group with " << numMembers << " members: Scarcity/Indivisible case. Scheduling sequentially." << std::endl; } - BspInstance sub_instance(rep_sub_dag, this->createSubArchitecture(original_arch, procs_for_group)); - sub_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); - auto &sub_architecture = sub_instance.getArchitecture(); + BspInstance subInstance(repSubDag, this->CreateSubArchitecture(originalArch, procsForGroup)); + subInstance.setNodeProcessorCompatibility(instance.GetProcessorCompatibilityMatrix()); + auto &subArchitecture = subInstance.GetArchitecture(); - if constexpr (this->enable_debug_prints) { + if constexpr (this->enableDebugPrints_) { std::cout << " Sub-architecture for sequential scheduling: { "; - for (unsigned type_idx = 0; type_idx < sub_architecture.getNumberOfProcessorTypes(); ++type_idx) { - std::cout << "Type " << type_idx << ": " << sub_architecture.getProcessorTypeCount()[type_idx] << "; "; + for (unsigned typeIdx = 0; typeIdx < subArchitecture.GetNumberOfProcessorTypes(); ++typeIdx) { + std::cout << "Type " << typeIdx << ": " << subArchitecture.GetProcessorTypeCount()[typeIdx] << "; "; } std::cout << "}" << std::endl; } - unsigned sequential_superstep_offset = 0; - for (const auto &group_member_idx : group_members) { - BspSchedule sub_schedule(sub_instance); - auto status = this->scheduler->computeSchedule(sub_schedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + unsigned sequentialSuperstepOffset = 0; + for (const auto &groupMemberIdx : groupMembers) { + BspSchedule subSchedule(subInstance); + auto status = this->scheduler_->ComputeSchedule(subSchedule); + if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) { return status; } - const auto sub_proc_type_count = sub_architecture.getProcessorTypeCount(); - std::vector sub_proc_type_corrections(sub_architecture.getNumberOfProcessorTypes(), 0); - for (std::size_t k = 1; k < sub_proc_type_corrections.size(); ++k) { - sub_proc_type_corrections[k] = sub_proc_type_corrections[k - 1] + sub_proc_type_count[k - 1]; + const auto subProcTypeCount = subArchitecture.GetProcessorTypeCount(); + std::vector subProcTypeCorrections(subArchitecture.GetNumberOfProcessorTypes(), 0); + for (std::size_t k = 1; k < subProcTypeCorrections.size(); ++k) { + subProcTypeCorrections[k] = subProcTypeCorrections[k - 1] + subProcTypeCount[k - 1]; } - std::vector> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(), - vertex_map_for_set[group_member_idx].end()); - std::sort(sorted_component_vertices.begin(), sorted_component_vertices.end()); - - vertex_idx_t subdag_vertex = 0; - for (const auto &vertex : sorted_component_vertices) { - const unsigned proc_in_sub_sched = sub_schedule.assignedProcessor(subdag_vertex); - const unsigned proc_type = sub_architecture.processorType(proc_in_sub_sched); - const unsigned local_proc_id_within_type = proc_in_sub_sched - sub_proc_type_corrections[proc_type]; - unsigned global_proc_id - = global_ids_by_type[proc_type][proc_type_offsets[proc_type] + local_proc_id_within_type]; - - schedule.setAssignedProcessor(vertex, global_proc_id); - schedule.setAssignedSuperstep( - vertex, superstep_offset + sequential_superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex)); - subdag_vertex++; + std::vector> sortedComponentVertices(vertexMapForSet[groupMemberIdx].begin(), + vertexMapForSet[groupMemberIdx].end()); + std::sort(sortedComponentVertices.begin(), sortedComponentVertices.end()); + + VertexIdxT subdagVertex = 0; + for (const auto &vertex : sortedComponentVertices) { + const unsigned procInSubSched = subSchedule.AssignedProcessor(subdagVertex); + const unsigned procType = subArchitecture.ProcessorType(procInSubSched); + const unsigned localProcIdWithinType = procInSubSched - subProcTypeCorrections[procType]; + unsigned globalProcId = globalIdsByType[procType][procTypeOffsets[procType] + localProcIdWithinType]; + + schedule.SetAssignedProcessor(vertex, globalProcId); + schedule.SetAssignedSuperstep( + vertex, superstepOffset + sequentialSuperstepOffset + subSchedule.AssignedSuperstep(subdagVertex)); + subdagVertex++; } - sequential_superstep_offset += sub_schedule.numberOfSupersteps(); + sequentialSuperstepOffset += subSchedule.NumberOfSupersteps(); } - supersteps_for_group = sequential_superstep_offset; + superstepsForGroup = sequentialSuperstepOffset; } else { // --- ABUNDANCE/DIVISIBLE CASE: Replicate Schedule --- - if constexpr (this->enable_debug_prints) { - std::cout << " Group with " << num_members << " members: Abundance/Divisible case. Replicating schedule." + if constexpr (this->enableDebugPrints_) { + std::cout << " Group with " << numMembers << " members: Abundance/Divisible case. Replicating schedule." << std::endl; } - std::vector single_sub_dag_proc_types = procs_for_group; - if (num_members > 0) { - for (auto &count : single_sub_dag_proc_types) { - count /= static_cast(num_members); + std::vector singleSubDagProcTypes = procsForGroup; + if (numMembers > 0) { + for (auto &count : singleSubDagProcTypes) { + count /= static_cast(numMembers); } } - BspInstance sub_instance(rep_sub_dag, - this->createSubArchitecture(original_arch, single_sub_dag_proc_types)); - sub_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); + BspInstance subInstance(repSubDag, this->CreateSubArchitecture(originalArch, singleSubDagProcTypes)); + subInstance.SetNodeProcessorCompatibility(instance.GetProcessorCompatibilityMatrix()); - if constexpr (this->enable_debug_prints) { - const auto &sub_arch = sub_instance.getArchitecture(); + if constexpr (this->enableDebugPrints_) { + const auto &subArch = subInstance.GetArchitecture(); std::cout << " Sub-architecture for replication (per member): { "; - for (unsigned type_idx = 0; type_idx < sub_arch.getNumberOfProcessorTypes(); ++type_idx) { - std::cout << "Type " << type_idx << ": " << sub_arch.getProcessorTypeCount()[type_idx] << "; "; + for (unsigned typeIdx = 0; typeIdx < subArch.GetNumberOfProcessorTypes(); ++typeIdx) { + std::cout << "Type " << typeIdx << ": " << subArch.GetProcessorTypeCount()[typeIdx] << "; "; } std::cout << "}" << std::endl; } - BspSchedule sub_schedule(sub_instance); - auto status = this->scheduler->computeSchedule(sub_schedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + BspSchedule subSchedule(subInstance); + auto status = this->scheduler_->ComputeSchedule(subSchedule); + if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) { return status; } - const auto sub_proc_type_count = sub_schedule.getInstance().getArchitecture().getProcessorTypeCount(); - std::vector sub_proc_type_corrections(sub_proc_type_count.size(), 0); - for (std::size_t k = 1; k < sub_proc_type_corrections.size(); ++k) { - sub_proc_type_corrections[k] = sub_proc_type_corrections[k - 1] + sub_proc_type_count[k - 1]; + const auto subProcTypeCount = subSchedule.GetInstance().GetArchitecture().GetProcessorTypeCount(); + std::vector subProcTypeCorrections(subProcTypeCount.size(), 0); + for (std::size_t k = 1; k < subProcTypeCorrections.size(); ++k) { + subProcTypeCorrections[k] = subProcTypeCorrections[k - 1] + subProcTypeCount[k - 1]; } - std::vector current_member_proc_offsets = proc_type_offsets; - for (const auto &group_member_idx : group_members) { - std::vector> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(), - vertex_map_for_set[group_member_idx].end()); - std::sort(sorted_component_vertices.begin(), sorted_component_vertices.end()); - - vertex_idx_t subdag_vertex = 0; - for (const auto &vertex : sorted_component_vertices) { - const unsigned proc_in_sub_sched = sub_schedule.assignedProcessor(subdag_vertex); - const unsigned proc_type = sub_schedule.getInstance().getArchitecture().processorType(proc_in_sub_sched); - const unsigned local_proc_id_within_type = proc_in_sub_sched - sub_proc_type_corrections[proc_type]; - unsigned global_proc_id - = global_ids_by_type[proc_type][current_member_proc_offsets[proc_type] + local_proc_id_within_type]; - - schedule.setAssignedProcessor(vertex, global_proc_id); - schedule.setAssignedSuperstep(vertex, superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex)); - subdag_vertex++; + std::vector currentMemberProcOffsets = procTypeOffsets; + for (const auto &groupMemberIdx : groupMembers) { + std::vector> sortedComponentVertices(vertexMapForSet[groupMemberIdx].begin(), + vertexMapForSet[groupMemberIdx].end()); + std::sort(sortedComponentVertices.begin(), sortedComponentVertices.end()); + + VertexIdxT subdagVertex = 0; + for (const auto &vertex : sortedComponentVertices) { + const unsigned procInSubSched = subSchedule.AssignedProcessor(subdagVertex); + const unsigned procType = subSchedule.GetInstance().GetArchitecture().ProcessorType(procInSubSched); + const unsigned localProcIdWithinType = procInSubSched - subProcTypeCorrections[procType]; + unsigned globalProcId = globalIdsByType[procType][currentMemberProcOffsets[procType] + localProcIdWithinType]; + + schedule.SetAssignedProcessor(vertex, globalProcId); + schedule.SetAssignedSuperstep(vertex, superstepOffset + subSchedule.AssignedSuperstep(subdagVertex)); + subdagVertex++; } - for (size_t k = 0; k < sub_proc_type_count.size(); ++k) { - current_member_proc_offsets[k] += sub_proc_type_count[k]; + for (size_t k = 0; k < subProcTypeCount.size(); ++k) { + currentMemberProcOffsets[k] += subProcTypeCount[k]; } } - supersteps_for_group = sub_schedule.numberOfSupersteps(); + superstepsForGroup = subSchedule.NumberOfSupersteps(); } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } }; -template -using IsomorphicWavefrontComponentScheduler_def_int_t = IsomorphicWavefrontComponentScheduler; +template +using IsomorphicWavefrontComponentSchedulerDefIntT = IsomorphicWavefrontComponentScheduler; } // namespace osp diff --git a/include/osp/dag_divider/IsomorphismGroups.hpp b/include/osp/dag_divider/IsomorphismGroups.hpp index 46b91c3d..2d01f72b 100644 --- a/include/osp/dag_divider/IsomorphismGroups.hpp +++ b/include/osp/dag_divider/IsomorphismGroups.hpp @@ -27,24 +27,24 @@ limitations under the License. namespace osp { -template +template class IsomorphismGroups { private: - std::vector>> isomorphism_groups; + std::vector>> isomorphismGroups_; - std::vector> isomorphism_groups_subgraphs; + std::vector> isomorphismGroupsSubgraphs_; - void print_isomorphism_groups() const { + void PrintIsomorphismGroups() const { std::cout << "Isomorphism groups: " << std::endl; - for (std::size_t i = 0; i < isomorphism_groups.size(); i++) { + for (std::size_t i = 0; i < isomorphismGroups_.size(); i++) { std::cout << "Level " << i << std::endl; - for (size_t j = 0; j < isomorphism_groups[i].size(); j++) { - std::cout << "Group " << j << " of size " << isomorphism_groups_subgraphs[i][j].num_vertices() << " : "; + for (size_t j = 0; j < isomorphismGroups_[i].size(); j++) { + std::cout << "Group " << j << " of size " << isomorphismGroupsSubgraphs_[i][j].NumVertices() << " : "; // ComputationalDagWriter writer(isomorphism_groups_subgraphs[i][j]); // writer.write_dot("isomorphism_group_" + std::to_string(i) + "_" + std::to_string(j) + ".dot"); - for (const auto &vertex : isomorphism_groups[i][j]) { + for (const auto &vertex : isomorphismGroups_[i][j]) { std::cout << vertex << " "; } std::cout << std::endl; @@ -69,9 +69,9 @@ class IsomorphismGroups { * @return const std::vector>>& * A constant reference to the vertex maps. */ - const std::vector>> &get_isomorphism_groups() const { return isomorphism_groups; } + const std::vector>> &GetIsomorphismGroups() const { return isomorphismGroups_; } - std::vector>> &get_isomorphism_groups() { return isomorphism_groups; } + std::vector>> &GetIsomorphismGroups() { return isomorphismGroups_; } /** * @brief Retrieves the isomorphism groups subgraphs. @@ -82,14 +82,12 @@ class IsomorphismGroups { * - The second dimension represents the groups of isomorphic connected components. * - The third dimension contains the subgraph of the isomorphism group. * - * @return const std::vector>& A constant reference + * @return const std::vector>& A constant reference * to the isomorphism groups subgraphs. */ - const std::vector> &get_isomorphism_groups_subgraphs() const { - return isomorphism_groups_subgraphs; - } + const std::vector> &GetIsomorphismGroupsSubgraphs() const { return isomorphismGroupsSubgraphs_; } - std::vector> &get_isomorphism_groups_subgraphs() { return isomorphism_groups_subgraphs; } + std::vector> &GetIsomorphismGroupsSubgraphs() { return isomorphismGroupsSubgraphs_; } /** * @brief Computes the isomorphism map for a computed division of the current DAG. @@ -98,102 +96,102 @@ class IsomorphismGroups { * * Reqires the dag to be divided before calling this function. */ - void compute_isomorphism_groups(std::vector>>> &vertex_maps, const Graph_t &dag) { - isomorphism_groups = std::vector>>(vertex_maps.size()); + void ComputeIsomorphismGroups(std::vector>>> &vertexMaps, const GraphT &dag) { + isomorphismGroups_ = std::vector>>(vertexMaps.size()); - isomorphism_groups_subgraphs = std::vector>(vertex_maps.size()); + isomorphismGroupsSubgraphs_ = std::vector>(vertexMaps.size()); - for (size_t i = 0; i < vertex_maps.size(); i++) { - for (std::size_t j = 0; j < vertex_maps[i].size(); j++) { - Constr_Graph_t current_subgraph; - create_induced_subgraph(dag, current_subgraph, vertex_maps[i][j]); + for (size_t i = 0; i < vertexMaps.size(); i++) { + for (std::size_t j = 0; j < vertexMaps[i].size(); j++) { + ConstrGraphT currentSubgraph; + CreateInducedSubgraph(dag, currentSubgraph, vertexMaps[i][j]); - bool isomorphism_group_found = false; - for (size_t k = 0; k < isomorphism_groups[i].size(); k++) { - if (are_isomorphic_by_merkle_hash(isomorphism_groups_subgraphs[i][k], current_subgraph)) { - isomorphism_groups[i][k].emplace_back(j); - isomorphism_group_found = true; + bool isomorphismGroupFound = false; + for (size_t k = 0; k < isomorphismGroups_[i].size(); k++) { + if (AreIsomorphicByMerkleHash(isomorphismGroupsSubgraphs_[i][k], currentSubgraph)) { + isomorphismGroups_[i][k].emplace_back(j); + isomorphismGroupFound = true; break; } } - if (!isomorphism_group_found) { - isomorphism_groups[i].emplace_back(std::vector{j}); - isomorphism_groups_subgraphs[i].emplace_back(std::move(current_subgraph)); + if (!isomorphismGroupFound) { + isomorphismGroups_[i].emplace_back(std::vector{j}); + isomorphismGroupsSubgraphs_[i].emplace_back(std::move(currentSubgraph)); } } } - print_isomorphism_groups(); + PrintIsomorphismGroups(); } /** * @brief Merges large isomorphism groups to avoid resource scarcity in the scheduler. - * * @param vertex_maps The original vertex maps, which will be modified in place. + * * @param vertexMaps The original vertex maps, which will be modified in place. * @param dag The full computational DAG. * @param merge_threshold If a group has more members than this, it will be merged. * @param target_group_count The number of larger groups to create from a single large group. */ - void merge_large_isomorphism_groups(std::vector>>> &vertex_maps, - const Graph_t &dag, - size_t merge_threshold, - size_t target_group_count = 8) { + void MergeLargeIsomorphismGroups(std::vector>>> &vertexMaps, + const GraphT &dag, + size_t mergeThreshold, + size_t targetGroupCount = 8) { // Ensure the merge logic is sound: the threshold must be larger than the target. - assert(merge_threshold > target_group_count); + assert(mergeThreshold > targetGroupCount); - for (size_t i = 0; i < isomorphism_groups.size(); ++i) { - std::vector>> new_vertex_maps_for_level; - std::vector> new_iso_groups_for_level; - std::vector new_iso_subgraphs_for_level; + for (size_t i = 0; i < isomorphismGroups_.size(); ++i) { + std::vector>> newVertexMapsForLevel; + std::vector> newIsoGroupsForLevel; + std::vector newIsoSubgraphsForLevel; - size_t new_component_idx = 0; + size_t newComponentIdx = 0; - for (size_t j = 0; j < isomorphism_groups[i].size(); ++j) { - const auto &group = isomorphism_groups[i][j]; + for (size_t j = 0; j < isomorphismGroups_[i].size(); ++j) { + const auto &group = isomorphismGroups_[i][j]; - if (group.size() <= merge_threshold) { + if (group.size() <= mergeThreshold) { // This group is small enough, copy it over as is. - std::vector new_group; - for (const auto &original_comp_idx : group) { - new_vertex_maps_for_level.push_back(vertex_maps[i][original_comp_idx]); - new_group.push_back(new_component_idx++); + std::vector newGroup; + for (const auto &originalCompIdx : group) { + newVertexMapsForLevel.push_back(vertexMaps[i][originalCompIdx]); + newGroup.push_back(newComponentIdx++); } - new_iso_groups_for_level.push_back(new_group); - new_iso_subgraphs_for_level.push_back(isomorphism_groups_subgraphs[i][j]); + newIsoGroupsForLevel.push_back(newGroup); + newIsoSubgraphsForLevel.push_back(isomorphismGroupsSubgraphs_[i][j]); } else { // This group is too large and needs to be merged. - std::cout << "Merging iso group of size " << group.size() << " into " << target_group_count << " new groups." + std::cout << "Merging iso group of size " << group.size() << " into " << targetGroupCount << " new groups." << std::endl; - size_t base_mult = group.size() / target_group_count; - size_t remainder = group.size() % target_group_count; + size_t baseMult = group.size() / targetGroupCount; + size_t remainder = group.size() % targetGroupCount; - std::vector new_merged_group_indices; - size_t current_original_idx = 0; + std::vector newMergedGroupIndices; + size_t currentOriginalIdx = 0; - for (size_t k = 0; k < target_group_count; ++k) { - std::vector> merged_component; - size_t num_to_merge = base_mult + (k < remainder ? 1 : 0); + for (size_t k = 0; k < targetGroupCount; ++k) { + std::vector> mergedComponent; + size_t numToMerge = baseMult + (k < remainder ? 1 : 0); - for (size_t m = 0; m < num_to_merge; ++m) { - const auto &original_comp = vertex_maps[i][group[current_original_idx++]]; - merged_component.insert(merged_component.end(), original_comp.begin(), original_comp.end()); + for (size_t m = 0; m < numToMerge; ++m) { + const auto &originalComp = vertexMaps[i][group[currentOriginalIdx++]]; + mergedComponent.insert(mergedComponent.end(), originalComp.begin(), originalComp.end()); } - std::sort(merged_component.begin(), merged_component.end()); - new_vertex_maps_for_level.push_back(merged_component); - new_merged_group_indices.push_back(new_component_idx++); + std::sort(mergedComponent.begin(), mergedComponent.end()); + newVertexMapsForLevel.push_back(mergedComponent); + newMergedGroupIndices.push_back(newComponentIdx++); } - new_iso_groups_for_level.push_back(new_merged_group_indices); - Constr_Graph_t new_rep_subgraph; - create_induced_subgraph(dag, new_rep_subgraph, new_vertex_maps_for_level.back()); - new_iso_subgraphs_for_level.push_back(new_rep_subgraph); + newIsoGroupsForLevel.push_back(newMergedGroupIndices); + ConstrGraphT newRepSubgraph; + CreateInducedSubgraph(dag, newRepSubgraph, newVertexMapsForLevel.back()); + newIsoSubgraphsForLevel.push_back(newRepSubgraph); } } // Replace the old level data with the new, potentially merged data. - vertex_maps[i] = new_vertex_maps_for_level; - isomorphism_groups[i] = new_iso_groups_for_level; - isomorphism_groups_subgraphs[i] = new_iso_subgraphs_for_level; + vertexMaps[i] = newVertexMapsForLevel; + isomorphismGroups_[i] = newIsoGroupsForLevel; + isomorphismGroupsSubgraphs_[i] = newIsoSubgraphsForLevel; } // print_isomorphism_groups(); } diff --git a/include/osp/dag_divider/WavefrontComponentScheduler.hpp b/include/osp/dag_divider/WavefrontComponentScheduler.hpp index a38d801f..28a7ee88 100644 --- a/include/osp/dag_divider/WavefrontComponentScheduler.hpp +++ b/include/osp/dag_divider/WavefrontComponentScheduler.hpp @@ -24,129 +24,126 @@ namespace osp { * @class WavefrontComponentScheduler * @brief Schedules wavefronts by treating each component individually. */ -template -class WavefrontComponentScheduler : public AbstractWavefrontScheduler { +template +class WavefrontComponentScheduler : public AbstractWavefrontScheduler { public: - WavefrontComponentScheduler(IDagDivider &div, Scheduler &scheduler_) - : AbstractWavefrontScheduler(div, scheduler_) {} + WavefrontComponentScheduler(IDagDivider &div, Scheduler &scheduler) + : AbstractWavefrontScheduler(div, scheduler) {} - std::string getScheduleName() const override { return "WavefrontComponentScheduler"; } + std::string GetScheduleName() const override { return "WavefrontComponentScheduler"; } - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); - const auto &original_arch = instance.getArchitecture(); - const auto &original_proc_type_count = original_arch.getProcessorTypeCount(); - const auto &computational_dag = instance.getComputationalDag(); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); + const auto &originalArch = instance.GetArchitecture(); + const auto &originalProcTypeCount = originalArch.GetProcessorTypeCount(); + const auto &computationalDag = instance.GetComputationalDag(); - std::vector> global_ids_by_type(original_arch.getNumberOfProcessorTypes()); - for (unsigned i = 0; i < original_arch.numberOfProcessors(); ++i) { - global_ids_by_type[original_arch.processorType(i)].push_back(i); + std::vector> globalIdsByType(originalArch.GetNumberOfProcessorTypes()); + for (unsigned i = 0; i < originalArch.NumberOfProcessors(); ++i) { + globalIdsByType[originalArch.ProcessorType(i)].push_back(i); } - auto vertex_maps = this->divider->divide(computational_dag); - unsigned superstep_offset = 0; + auto vertexMaps = this->divider_->Divide(computationalDag); + unsigned superstepOffset = 0; - for (std::size_t i = 0; i < vertex_maps.size(); ++i) { // For each wavefront set - if (this->enable_debug_prints) { + for (std::size_t i = 0; i < vertexMaps.size(); ++i) { // For each wavefront set + if (this->enableDebugPrints_) { std::cout << "\n--- Processing Wavefront Set " << i << " (No Isomorphism) ---" << std::endl; } - const auto &components = vertex_maps[i]; - std::vector sub_dags(components.size()); - std::vector> work_by_type(components.size(), - std::vector(original_proc_type_count.size(), 0.0)); + const auto &components = vertexMaps[i]; + std::vector subDags(components.size()); + std::vector> workByType(components.size(), std::vector(originalProcTypeCount.size(), 0.0)); for (size_t j = 0; j < components.size(); ++j) { - create_induced_subgraph(computational_dag, sub_dags[j], components[j]); - for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { - work_by_type[j][type_idx] = sumOfCompatibleWorkWeights(sub_dags[j], instance, type_idx); + CreateInducedSubgraph(computationalDag, subDags[j], components[j]); + for (unsigned typeIdx = 0; typeIdx < originalProcTypeCount.size(); ++typeIdx) { + workByType[j][typeIdx] = SumOfCompatibleWorkWeights(subDags[j], instance, typeIdx); } } - assert(this->validateWorkDistribution(sub_dags, instance)); + assert(this->ValidateWorkDistribution(subDags, instance)); // Distribute Processors - std::vector> proc_allocations(components.size(), - std::vector(original_proc_type_count.size())); - for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { - std::vector work_for_this_type(components.size()); - for (size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) { - work_for_this_type[comp_idx] = work_by_type[comp_idx][type_idx]; + std::vector> procAllocations(components.size(), + std::vector(originalProcTypeCount.size())); + for (unsigned typeIdx = 0; typeIdx < originalProcTypeCount.size(); ++typeIdx) { + std::vector workForThisType(components.size()); + for (size_t compIdx = 0; compIdx < components.size(); ++compIdx) { + workForThisType[compIdx] = workByType[compIdx][typeIdx]; } - std::vector type_allocation; - bool starvation_hit - = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation); + std::vector typeAllocation; + bool starvationHit = this->DistributeProcessors(originalProcTypeCount[typeIdx], workForThisType, typeAllocation); - if (starvation_hit) { - if constexpr (this->enable_debug_prints) { - std::cerr << "ERROR: Processor starvation detected for type " << type_idx << " in wavefront set " << i + if (starvationHit) { + if constexpr (this->enableDebugPrints_) { + std::cerr << "ERROR: Processor starvation detected for type " << typeIdx << " in wavefront set " << i << ". Not enough processors to assign one to each active component." << std::endl; } - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } - for (size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) { - proc_allocations[comp_idx][type_idx] = type_allocation[comp_idx]; + for (size_t compIdx = 0; compIdx < components.size(); ++compIdx) { + procAllocations[compIdx][typeIdx] = typeAllocation[compIdx]; } } - unsigned max_number_supersteps = 0; - std::vector proc_type_offsets(original_arch.getNumberOfProcessorTypes(), 0); + unsigned maxNumberSupersteps = 0; + std::vector procTypeOffsets(originalArch.GetNumberOfProcessorTypes(), 0); for (std::size_t j = 0; j < components.size(); ++j) { - BspArchitecture sub_architecture = this->createSubArchitecture(original_arch, proc_allocations[j]); - if constexpr (this->enable_debug_prints) { + BspArchitecture subArchitecture = this->CreateSubArchitecture(originalArch, procAllocations[j]); + if constexpr (this->enableDebugPrints_) { std::cout << " Component " << j << " sub-architecture: { "; - for (unsigned type_idx = 0; type_idx < sub_architecture.getNumberOfProcessorTypes(); ++type_idx) { - std::cout << "Type " << type_idx << ": " << sub_architecture.getProcessorTypeCount()[type_idx] << "; "; + for (unsigned typeIdx = 0; typeIdx < subArchitecture.GetNumberOfProcessorTypes(); ++typeIdx) { + std::cout << "Type " << typeIdx << ": " << subArchitecture.GetProcessorTypeCount()[typeIdx] << "; "; } std::cout << "}" << std::endl; } - BspInstance sub_instance(sub_dags[j], sub_architecture); - sub_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); + BspInstance subInstance(subDags[j], subArchitecture); + subInstance.SetNodeProcessorCompatibility(instance.GetProcessorCompatibilityMatrix()); - BspSchedule sub_schedule(sub_instance); - const auto status = this->scheduler->computeSchedule(sub_schedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + BspSchedule subSchedule(subInstance); + const auto status = this->scheduler_->ComputeSchedule(subSchedule); + if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) { return status; } - const auto sub_proc_type_count = sub_architecture.getProcessorTypeCount(); - std::vector sub_proc_type_corrections(sub_architecture.getNumberOfProcessorTypes(), 0); - for (std::size_t k = 1; k < sub_proc_type_corrections.size(); ++k) { - sub_proc_type_corrections[k] = sub_proc_type_corrections[k - 1] + sub_proc_type_count[k - 1]; + const auto subProcTypeCount = subArchitecture.GetProcessorTypeCount(); + std::vector subProcTypeCorrections(subArchitecture.GetNumberOfProcessorTypes(), 0); + for (std::size_t k = 1; k < subProcTypeCorrections.size(); ++k) { + subProcTypeCorrections[k] = subProcTypeCorrections[k - 1] + subProcTypeCount[k - 1]; } - vertex_idx_t subdag_vertex = 0; - std::vector> sorted_component_vertices(components[j].begin(), components[j].end()); - std::sort(sorted_component_vertices.begin(), sorted_component_vertices.end()); + VertexIdxT subdagVertex = 0; + std::vector> sortedComponentVertices(components[j].begin(), components[j].end()); + std::sort(sortedComponentVertices.begin(), sortedComponentVertices.end()); - for (const auto &vertex : sorted_component_vertices) { - const unsigned proc_in_sub_sched = sub_schedule.assignedProcessor(subdag_vertex); - const unsigned proc_type = sub_architecture.processorType(proc_in_sub_sched); - const unsigned local_proc_id_within_type = proc_in_sub_sched - sub_proc_type_corrections[proc_type]; - unsigned global_proc_id - = global_ids_by_type[proc_type][proc_type_offsets[proc_type] + local_proc_id_within_type]; + for (const auto &vertex : sortedComponentVertices) { + const unsigned procInSubSched = subSchedule.AssignedProcessor(subdagVertex); + const unsigned procType = subArchitecture.ProcessorType(procInSubSched); + const unsigned localProcIdWithinType = procInSubSched - subProcTypeCorrections[procType]; + unsigned globalProcId = globalIdsByType[procType][procTypeOffsets[procType] + localProcIdWithinType]; - schedule.setAssignedProcessor(vertex, global_proc_id); - schedule.setAssignedSuperstep(vertex, superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex)); - subdag_vertex++; + schedule.SetAssignedProcessor(vertex, globalProcId); + schedule.SetAssignedSuperstep(vertex, superstepOffset + subSchedule.AssignedSuperstep(subdagVertex)); + subdagVertex++; } - for (size_t k = 0; k < sub_proc_type_count.size(); ++k) { - proc_type_offsets[k] += sub_proc_type_count[k]; + for (size_t k = 0; k < subProcTypeCount.size(); ++k) { + procTypeOffsets[k] += subProcTypeCount[k]; } - max_number_supersteps = std::max(max_number_supersteps, sub_schedule.numberOfSupersteps()); + maxNumberSupersteps = std::max(maxNumberSupersteps, subSchedule.NumberOfSupersteps()); } - superstep_offset += max_number_supersteps; + superstepOffset += maxNumberSupersteps; } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } }; -template -using WavefrontComponentScheduler_def_int_t = WavefrontComponentScheduler; +template +using WavefrontComponentSchedulerDefIntT = WavefrontComponentScheduler; } // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp index d7a461b0..83225b22 100644 --- a/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp @@ -33,356 +33,353 @@ limitations under the License. namespace osp { struct SubgraphSchedule { - double makespan; - std::vector> node_assigned_worker_per_type; - std::vector was_trimmed; + double makespan_; + std::vector> nodeAssignedWorkerPerType_; + std::vector wasTrimmed_; }; -template +template class EftSubgraphScheduler { public: EftSubgraphScheduler() = default; - SubgraphSchedule run(const BspInstance &instance, + SubgraphSchedule Run(const BspInstance &instance, const std::vector &multiplicities, - const std::vector>> &required_proc_types, - const std::vector &max_num_procs) { - prepare_for_scheduling(instance, multiplicities, required_proc_types, max_num_procs); - return execute_schedule(instance); + const std::vector>> &requiredProcTypes, + const std::vector &maxNumProcs) { + PrepareForScheduling(instance, multiplicities, requiredProcTypes, maxNumProcs); + return ExecuteSchedule(instance); } - void setMinWorkPerProcessor(const v_workw_t min_work_per_processor) { - min_work_per_processor_ = min_work_per_processor; - } + void SetMinWorkPerProcessor(const VWorkwT minWorkPerProcessor) { minWorkPerProcessor_ = minWorkPerProcessor; } private: - static constexpr bool verbose = false; + static constexpr bool verbose_ = false; - using job_id_t = vertex_idx_t; + using JobIdT = VertexIdxT; - v_workw_t min_work_per_processor_ = 2000; + VWorkwT minWorkPerProcessor_ = 2000; enum class JobStatus { WAITING, READY, RUNNING, COMPLETED }; struct Job { - job_id_t id; + JobIdT id_; - std::vector> required_proc_types; - v_workw_t total_work; - unsigned multiplicity = 1; - unsigned max_num_procs = 1; + std::vector> requiredProcTypes_; + VWorkwT totalWork_; + unsigned multiplicity_ = 1; + unsigned maxNumProcs_ = 1; - job_id_t in_degree_current = 0; + JobIdT inDegreeCurrent_ = 0; - JobStatus status = JobStatus::WAITING; - v_workw_t upward_rank = 0.0; + JobStatus status_ = JobStatus::WAITING; + VWorkwT upwardRank_ = 0.0; // --- Execution Tracking Members --- - std::vector assigned_workers; - double start_time = -1.0; - double finish_time = -1.0; + std::vector assignedWorkers_; + double startTime_ = -1.0; + double finishTime_ = -1.0; }; // Custom comparator for storing Job pointers in the ready set, sorted by rank. struct JobPtrCompare { bool operator()(const Job *lhs, const Job *rhs) const { - if (lhs->upward_rank != rhs->upward_rank) { - return lhs->upward_rank > rhs->upward_rank; + if (lhs->upwardRank_ != rhs->upwardRank_) { + return lhs->upwardRank_ > rhs->upwardRank_; } - return lhs->id > rhs->id; // Tie-breaking + return lhs->id_ > rhs->id_; // Tie-breaking } }; std::vector jobs_; - std::set ready_jobs_; - - void prepare_for_scheduling(const BspInstance &instance, - const std::vector &multiplicities, - const std::vector>> &required_proc_types, - const std::vector &max_num_procs) { - jobs_.resize(instance.numberOfVertices()); - if constexpr (verbose) { + std::set readyJobs_; + + void PrepareForScheduling(const BspInstance &instance, + const std::vector &multiplicities, + const std::vector>> &requiredProcTypes, + const std::vector &maxNumProcs) { + jobs_.resize(instance.NumberOfVertices()); + if constexpr (verbose_) { std::cout << "--- Preparing for Subgraph Scheduling ---" << std::endl; } - const auto &graph = instance.getComputationalDag(); - const size_t num_worker_types = instance.getArchitecture().getProcessorTypeCount().size(); + const auto &graph = instance.GetComputationalDag(); + const size_t numWorkerTypes = instance.GetArchitecture().GetProcessorTypeCount().size(); - calculate_upward_ranks(graph); + CalculateUpwardRanks(graph); - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "Initializing jobs..." << std::endl; } - job_id_t idx = 0; + JobIdT idx = 0; for (auto &job : jobs_) { - job.id = idx; - job.in_degree_current = graph.in_degree(idx); - if (job.in_degree_current == 0) { - job.status = JobStatus::READY; - ready_jobs_.insert(&job); + job.id_ = idx; + job.inDegreeCurrent_ = graph.InDegree(idx); + if (job.inDegreeCurrent_ == 0) { + job.status_ = JobStatus::READY; + readyJobs_.insert(&job); } else { - job.status = JobStatus::WAITING; + job.status_ = JobStatus::WAITING; } - job.total_work = graph.vertex_work_weight(idx); - job.max_num_procs - = std::min(max_num_procs[idx], - static_cast((job.total_work + min_work_per_processor_ - 1) / min_work_per_processor_)); - job.multiplicity = std::min(multiplicities[idx], job.max_num_procs); - job.required_proc_types = required_proc_types[idx]; - job.assigned_workers.resize(num_worker_types, 0); - job.start_time = -1.0; - job.finish_time = -1.0; - - if constexpr (verbose) { - std::cout << " - Job " << idx << ": rank=" << job.upward_rank << ", mult=" << job.multiplicity - << ", max_procs=" << job.max_num_procs << ", work=" << job.total_work - << ", status=" << (job.status == JobStatus::READY ? "READY" : "WAITING") << std::endl; + job.totalWork_ = graph.VertexWorkWeight(idx); + job.maxNumProcs_ = std::min( + maxNumProcs[idx], static_cast((job.totalWork_ + minWorkPerProcessor_ - 1) / minWorkPerProcessor_)); + job.multiplicity_ = std::min(multiplicities[idx], job.maxNumProcs_); + job.requiredProcTypes_ = requiredProcTypes[idx]; + job.assignedWorkers_.resize(numWorkerTypes, 0); + job.startTime_ = -1.0; + job.finishTime_ = -1.0; + + if constexpr (verbose_) { + std::cout << " - Job " << idx << ": rank=" << job.upwardRank_ << ", mult=" << job.multiplicity_ + << ", max_procs=" << job.maxNumProcs_ << ", work=" << job.totalWork_ + << ", status=" << (job.status_ == JobStatus::READY ? "READY" : "WAITING") << std::endl; } idx++; } } - void calculate_upward_ranks(const Graph_t &graph) { - const auto reverse_top_order = GetTopOrderReverse(graph); + void CalculateUpwardRanks(const GraphT &graph) { + const auto reverseTopOrder = GetTopOrderReverse(graph); - for (const auto &vertex : reverse_top_order) { - v_workw_t max_successor_rank = 0.0; - for (const auto &child : graph.children(vertex)) { - max_successor_rank = std::max(max_successor_rank, jobs_.at(child).upward_rank); + for (const auto &vertex : reverseTopOrder) { + VWorkwT maxSuccessorRank = 0.0; + for (const auto &child : graph.Children(vertex)) { + maxSuccessorRank = std::max(maxSuccessorRank, jobs_.at(child).upwardRank_); } Job &job = jobs_.at(vertex); - job.upward_rank = graph.vertex_work_weight(vertex) + max_successor_rank; + job.upwardRank_ = graph.VertexWorkWeight(vertex) + maxSuccessorRank; } } - SubgraphSchedule execute_schedule(const BspInstance &instance) { - double current_time = 0.0; - std::vector available_workers = instance.getArchitecture().getProcessorTypeCount(); - const size_t num_worker_types = available_workers.size(); - std::vector running_jobs; - unsigned completed_count = 0; - const auto &graph = instance.getComputationalDag(); + SubgraphSchedule ExecuteSchedule(const BspInstance &instance) { + double currentTime = 0.0; + std::vector availableWorkers = instance.GetArchitecture().GetProcessorTypeCount(); + const size_t numWorkerTypes = availableWorkers.size(); + std::vector runningJobs; + unsigned completedCount = 0; + const auto &graph = instance.GetComputationalDag(); - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "\n--- Subgraph Scheduling Execution Started ---" << std::endl; std::cout << "Total jobs: " << jobs_.size() << std::endl; std::cout << "Initial available workers: "; - for (size_t i = 0; i < num_worker_types; ++i) { - std::cout << "T" << i << ":" << available_workers[i] << " "; + for (size_t i = 0; i < numWorkerTypes; ++i) { + std::cout << "T" << i << ":" << availableWorkers[i] << " "; } std::cout << std::endl; } - while (completed_count < jobs_.size()) { - if constexpr (verbose) { - std::cout << "\n[T=" << current_time << "] --- New Scheduling Step ---" << std::endl; - std::cout << "Completed jobs: " << completed_count << "/" << jobs_.size() << std::endl; + while (completedCount < jobs_.size()) { + if constexpr (verbose_) { + std::cout << "\n[T=" << currentTime << "] --- New Scheduling Step ---" << std::endl; + std::cout << "Completed jobs: " << completedCount << "/" << jobs_.size() << std::endl; std::cout << "Available workers: "; - for (size_t i = 0; i < num_worker_types; ++i) { - std::cout << "T" << i << ":" << available_workers[i] << " "; + for (size_t i = 0; i < numWorkerTypes; ++i) { + std::cout << "T" << i << ":" << availableWorkers[i] << " "; } std::cout << std::endl; - std::cout << "Ready queue size: " << ready_jobs_.size() << ". Running jobs: " << running_jobs.size() << std::endl; + std::cout << "Ready queue size: " << readyJobs_.size() << ". Running jobs: " << runningJobs.size() << std::endl; } - std::vector jobs_to_start; - v_workw_t total_runnable_priority = 0.0; + std::vector jobsToStart; + VWorkwT totalRunnablePriority = 0.0; // Iterate through ready jobs and assign minimum resources if available. - for (const Job *job_ptr : ready_jobs_) { - Job &job = jobs_[job_ptr->id]; - bool can_start = true; - for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { - if (job.required_proc_types[type_idx] > 0 && available_workers[type_idx] < job.multiplicity) { - can_start = false; + for (const Job *jobPtr : readyJobs_) { + Job &job = jobs_[jobPtr->id_]; + bool canStart = true; + for (size_t typeIdx = 0; typeIdx < numWorkerTypes; ++typeIdx) { + if (job.requiredProcTypes_[typeIdx] > 0 && availableWorkers[typeIdx] < job.multiplicity_) { + canStart = false; break; } } - if (can_start) { - jobs_to_start.push_back(&job); - total_runnable_priority += job.upward_rank; - for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { - if (job.required_proc_types[type_idx] > 0) { - job.assigned_workers[type_idx] = job.multiplicity; - available_workers[type_idx] -= job.multiplicity; + if (canStart) { + jobsToStart.push_back(&job); + totalRunnablePriority += job.upwardRank_; + for (size_t typeIdx = 0; typeIdx < numWorkerTypes; ++typeIdx) { + if (job.requiredProcTypes_[typeIdx] > 0) { + job.assignedWorkers_[typeIdx] = job.multiplicity_; + availableWorkers[typeIdx] -= job.multiplicity_; } } } } - if (!jobs_to_start.empty()) { - if constexpr (verbose) { - std::cout << "Allocating workers to " << jobs_to_start.size() << " runnable jobs..." << std::endl; + if (!jobsToStart.empty()) { + if constexpr (verbose_) { + std::cout << "Allocating workers to " << jobsToStart.size() << " runnable jobs..." << std::endl; } // Distribute remaining workers proportionally among the jobs that just started. - const std::vector remaining_workers_pool = available_workers; - for (Job *job_ptr : jobs_to_start) { - Job &job = *job_ptr; - for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { - if (job.required_proc_types[type_idx] > 0) { - const unsigned current_total_assigned - = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u); - const unsigned max_additional_workers - = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0; + const std::vector remainingWorkersPool = availableWorkers; + for (Job *jobPtr : jobsToStart) { + Job &job = *jobPtr; + for (size_t typeIdx = 0; typeIdx < numWorkerTypes; ++typeIdx) { + if (job.requiredProcTypes_[typeIdx] > 0) { + const unsigned currentTotalAssigned + = std::accumulate(job.assignedWorkers_.begin(), job.assignedWorkers_.end(), 0u); + const unsigned maxAdditionalWorkers + = (job.maxNumProcs_ > currentTotalAssigned) ? (job.maxNumProcs_ - currentTotalAssigned) : 0; const double proportion - = (total_runnable_priority > 0) - ? (static_cast(job.upward_rank) / static_cast(total_runnable_priority)) - : (1.0 / static_cast(jobs_to_start.size())); - const unsigned proportional_share - = static_cast(static_cast(remaining_workers_pool[type_idx]) * proportion); - const unsigned num_proportional_chunks - = (job.multiplicity > 0) ? proportional_share / job.multiplicity : 0; - const unsigned num_available_chunks - = (job.multiplicity > 0) ? available_workers[type_idx] / job.multiplicity : 0; - const unsigned num_chunks_allowed_by_max - = (job.multiplicity > 0) ? max_additional_workers / job.multiplicity : 0; - const unsigned num_chunks_to_assign - = std::min({num_proportional_chunks, num_available_chunks, num_chunks_allowed_by_max}); - const unsigned assigned = num_chunks_to_assign * job.multiplicity; - job.assigned_workers[type_idx] += assigned; - available_workers[type_idx] -= assigned; + = (totalRunnablePriority > 0) + ? (static_cast(job.upwardRank_) / static_cast(totalRunnablePriority)) + : (1.0 / static_cast(jobsToStart.size())); + const unsigned proportionalShare + = static_cast(static_cast(remainingWorkersPool[typeIdx]) * proportion); + const unsigned numProportionalChunks = (job.multiplicity_ > 0) ? proportionalShare / job.multiplicity_ + : 0; + const unsigned numAvailableChunks + = (job.multiplicity_ > 0) ? availableWorkers[typeIdx] / job.multiplicity_ : 0; + const unsigned numChunksAllowedByMax + = (job.multiplicity_ > 0) ? maxAdditionalWorkers / job.multiplicity_ : 0; + const unsigned numChunksToAssign + = std::min({numProportionalChunks, numAvailableChunks, numChunksAllowedByMax}); + const unsigned assigned = numChunksToAssign * job.multiplicity_; + job.assignedWorkers_[typeIdx] += assigned; + availableWorkers[typeIdx] -= assigned; } } } // Greedily assign any remaining workers to the highest-rank jobs that can take them. - for (Job *job_ptr : jobs_to_start) { - Job &job = *job_ptr; - for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { - if (job.required_proc_types[type_idx] > 0 && job.multiplicity > 0) { - const unsigned current_total_assigned - = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u); - const unsigned max_additional_workers - = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0; - const unsigned num_available_chunks = available_workers[type_idx] / job.multiplicity; - const unsigned num_chunks_allowed_by_max = max_additional_workers / job.multiplicity; - const unsigned assigned = std::min(num_available_chunks, num_chunks_allowed_by_max) * job.multiplicity; - job.assigned_workers[type_idx] += assigned; - available_workers[type_idx] -= assigned; + for (Job *jobPtr : jobsToStart) { + Job &job = *jobPtr; + for (size_t typeIdx = 0; typeIdx < numWorkerTypes; ++typeIdx) { + if (job.requiredProcTypes_[typeIdx] > 0 && job.multiplicity_ > 0) { + const unsigned currentTotalAssigned + = std::accumulate(job.assignedWorkers_.begin(), job.assignedWorkers_.end(), 0u); + const unsigned maxAdditionalWorkers + = (job.maxNumProcs_ > currentTotalAssigned) ? (job.maxNumProcs_ - currentTotalAssigned) : 0; + const unsigned numAvailableChunks = availableWorkers[typeIdx] / job.multiplicity_; + const unsigned numChunksAllowedByMax = maxAdditionalWorkers / job.multiplicity_; + const unsigned assigned = std::min(numAvailableChunks, numChunksAllowedByMax) * job.multiplicity_; + job.assignedWorkers_[typeIdx] += assigned; + availableWorkers[typeIdx] -= assigned; } } } - for (Job *job_ptr : jobs_to_start) { - Job &job = *job_ptr; + for (Job *jobPtr : jobsToStart) { + Job &job = *jobPtr; - job.status = JobStatus::RUNNING; - job.start_time = current_time; + job.status_ = JobStatus::RUNNING; + job.startTime_ = currentTime; // Calculate finish time based on total work and total assigned workers. - unsigned total_assigned_workers = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u); - double exec_time = (total_assigned_workers > 0) - ? static_cast(job.total_work) / static_cast(total_assigned_workers) - : 0.0; - job.finish_time = current_time + exec_time; - - running_jobs.push_back(job.id); - ready_jobs_.erase(&job); + unsigned totalAssignedWorkers = std::accumulate(job.assignedWorkers_.begin(), job.assignedWorkers_.end(), 0u); + double execTime = (totalAssignedWorkers > 0) + ? static_cast(job.totalWork_) / static_cast(totalAssignedWorkers) + : 0.0; + job.finishTime_ = currentTime + execTime; + + runningJobs.push_back(job.id_); + readyJobs_.erase(&job); } } // 2. ADVANCE TIME - if (running_jobs.empty() && completed_count < jobs_.size()) { - std::cerr << "Error: Deadlock detected. No running jobs and " << jobs_.size() - completed_count + if (runningJobs.empty() && completedCount < jobs_.size()) { + std::cerr << "Error: Deadlock detected. No running jobs and " << jobs_.size() - completedCount << " jobs incomplete." << std::endl; - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "Deadlock! Ready queue:" << std::endl; - for (const auto *ready_job_ptr : ready_jobs_) { - const Job &job = *ready_job_ptr; - std::cout << " - Job " << job.id << " (mult " << job.multiplicity << ") needs workers: "; - for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { - if (job.required_proc_types[type_idx] > 0) { - std::cout << "T" << type_idx << ":" << job.multiplicity << " "; + for (const auto *readyJobPtr : readyJobs_) { + const Job &job = *readyJobPtr; + std::cout << " - Job " << job.id_ << " (mult " << job.multiplicity_ << ") needs workers: "; + for (size_t typeIdx = 0; typeIdx < numWorkerTypes; ++typeIdx) { + if (job.requiredProcTypes_[typeIdx] > 0) { + std::cout << "T" << typeIdx << ":" << job.multiplicity_ << " "; } } std::cout << std::endl; } std::cout << "Available workers: "; - for (size_t i = 0; i < num_worker_types; ++i) { - std::cout << "T" << i << ":" << available_workers[i] << " "; + for (size_t i = 0; i < numWorkerTypes; ++i) { + std::cout << "T" << i << ":" << availableWorkers[i] << " "; } std::cout << std::endl; } SubgraphSchedule result; - result.makespan = -1.0; + result.makespan_ = -1.0; return result; } - if (running_jobs.empty()) { + if (runningJobs.empty()) { break; // All jobs are done } - double next_event_time = std::numeric_limits::max(); - for (job_id_t id : running_jobs) { - next_event_time = std::min(next_event_time, jobs_.at(id).finish_time); + double nextEventTime = std::numeric_limits::max(); + for (JobIdT id : runningJobs) { + nextEventTime = std::min(nextEventTime, jobs_.at(id).finishTime_); } - if constexpr (verbose) { - std::cout << "Advancing time from " << current_time << " to " << next_event_time << std::endl; + if constexpr (verbose_) { + std::cout << "Advancing time from " << currentTime << " to " << nextEventTime << std::endl; } - current_time = next_event_time; + currentTime = nextEventTime; // 3. PROCESS COMPLETED JOBS - auto it = running_jobs.begin(); - while (it != running_jobs.end()) { + auto it = runningJobs.begin(); + while (it != runningJobs.end()) { Job &job = jobs_.at(*it); - if (job.finish_time <= current_time) { - job.status = JobStatus::COMPLETED; - if constexpr (verbose) { - std::cout << "Job " << job.id << " finished at T=" << current_time << std::endl; + if (job.finishTime_ <= currentTime) { + job.status_ = JobStatus::COMPLETED; + if constexpr (verbose_) { + std::cout << "Job " << job.id_ << " finished at T=" << currentTime << std::endl; } // Release workers - for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { - available_workers[type_idx] += job.assigned_workers[type_idx]; + for (size_t typeIdx = 0; typeIdx < numWorkerTypes; ++typeIdx) { + availableWorkers[typeIdx] += job.assignedWorkers_[typeIdx]; } - completed_count++; + completedCount++; // Update successors - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << " - Updating successors..." << std::endl; } - for (const auto &successor_id : graph.children(job.id)) { - Job &successor_job = jobs_.at(successor_id); - successor_job.in_degree_current--; - if (successor_job.in_degree_current == 0) { - successor_job.status = JobStatus::READY; - ready_jobs_.insert(&successor_job); - if constexpr (verbose) { - std::cout << " - Successor " << successor_job.id << " is now READY." << std::endl; + for (const auto &successorId : graph.Children(job.id_)) { + Job &successorJob = jobs_.at(successorId); + successorJob.inDegreeCurrent_--; + if (successorJob.inDegreeCurrent_ == 0) { + successorJob.status_ = JobStatus::READY; + readyJobs_.insert(&successorJob); + if constexpr (verbose_) { + std::cout << " - Successor " << successorJob.id_ << " is now READY." << std::endl; } } } - it = running_jobs.erase(it); // Remove from running list + it = runningJobs.erase(it); // Remove from running list } else { ++it; } } } - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "\n--- Subgraph Scheduling Finished ---" << std::endl; - std::cout << "Final Makespan: " << current_time << std::endl; + std::cout << "Final Makespan: " << currentTime << std::endl; std::cout << "Job Summary:" << std::endl; for (const auto &job : jobs_) { - std::cout << " - Job " << job.id << ": Multiplicity=" << job.multiplicity << ", Max Procs=" << job.max_num_procs - << ", Work=" << job.total_work << ", Start=" << job.start_time << ", Finish=" << job.finish_time + std::cout << " - Job " << job.id_ << ": Multiplicity=" << job.multiplicity_ << ", Max Procs=" << job.maxNumProcs_ + << ", Work=" << job.totalWork_ << ", Start=" << job.startTime_ << ", Finish=" << job.finishTime_ << ", Workers=["; - for (size_t i = 0; i < job.assigned_workers.size(); ++i) { - std::cout << "T" << i << ":" << job.assigned_workers[i] << (i == job.assigned_workers.size() - 1 ? "" : ", "); + for (size_t i = 0; i < job.assignedWorkers_.size(); ++i) { + std::cout << "T" << i << ":" << job.assignedWorkers_[i] << (i == job.assignedWorkers_.size() - 1 ? "" : ", "); } std::cout << "]" << std::endl; } } SubgraphSchedule result; - result.makespan = current_time; - result.node_assigned_worker_per_type.resize(jobs_.size()); + result.makespan_ = currentTime; + result.nodeAssignedWorkerPerType_.resize(jobs_.size()); for (const auto &job : jobs_) { - result.node_assigned_worker_per_type[job.id].resize(num_worker_types); - for (size_t i = 0; i < num_worker_types; ++i) { - result.node_assigned_worker_per_type[job.id][i] - = (job.assigned_workers[i] + job.multiplicity - 1) / job.multiplicity; + result.nodeAssignedWorkerPerType_[job.id_].resize(numWorkerTypes); + for (size_t i = 0; i < numWorkerTypes; ++i) { + result.nodeAssignedWorkerPerType_[job.id_][i] + = (job.assignedWorkers_[i] + job.multiplicity_ - 1) / job.multiplicity_; } } return result; diff --git a/include/osp/dag_divider/isomorphism_divider/HashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/HashComputer.hpp index 6436ef37..27229e21 100644 --- a/include/osp/dag_divider/isomorphism_divider/HashComputer.hpp +++ b/include/osp/dag_divider/isomorphism_divider/HashComputer.hpp @@ -32,19 +32,19 @@ namespace osp { * * @tparam index_type The type used for indexing vertices in the graph. */ -template +template class HashComputer { public: virtual ~HashComputer() = default; - virtual std::size_t get_vertex_hash(const index_type &v) const = 0; - virtual const std::vector &get_vertex_hashes() const = 0; - virtual std::size_t num_orbits() const = 0; + virtual std::size_t GetVertexHash(const IndexType &v) const = 0; + virtual const std::vector &GetVertexHashes() const = 0; + virtual std::size_t NumOrbits() const = 0; - virtual const std::vector &get_orbit(const index_type &v) const = 0; - virtual const std::unordered_map> &get_orbits() const = 0; + virtual const std::vector &GetOrbit(const IndexType &v) const = 0; + virtual const std::unordered_map> &GetOrbits() const = 0; - virtual const std::vector &get_orbit_from_hash(const std::size_t &hash) const = 0; + virtual const std::vector &GetOrbitFromHash(const std::size_t &hash) const = 0; }; } // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp index 507fa12e..df0ed090 100644 --- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp @@ -47,526 +47,519 @@ namespace osp { * dynamically switch between a standard BSP scheduler and a specialized TrimmedGroupScheduler * for these trimmed groups. * - * @tparam Graph_t The type of the input computational DAG. - * @tparam Constr_Graph_t The type of the constructable computational DAG used for internal representations. + * @tparam GraphT The type of the input computational DAG. + * @tparam ConstrGraphT The type of the constructable computational DAG used for internal representations. */ -template +template class IsomorphicSubgraphScheduler { - static_assert(is_computational_dag_v, "Graph must be a computational DAG"); - static_assert(is_computational_dag_v, "Constr_Graph_t must be a computational DAG"); - static_assert(is_constructable_cdag_v, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); - static_assert(std::is_same_v, vertex_idx_t>, - "Graph_t and Constr_Graph_t must have the same vertex_idx types"); + static_assert(isComputationalDagV, "Graph must be a computational DAG"); + static_assert(isComputationalDagV, "ConstrGraphT must be a computational DAG"); + static_assert(isConstructableCdagV, "ConstrGraphT must satisfy the constructable_cdag_vertex concept"); + static_assert(std::is_same_v, VertexIdxT>, + "GraphT and ConstrGraphT must have the same VertexIdx types"); private: - static constexpr bool verbose = false; - const HashComputer> *hash_computer_; + static constexpr bool verbose_ = false; + const HashComputer> *hashComputer_; size_t symmetry_ = 4; - Scheduler *bsp_scheduler_; - bool use_max_group_size_ = false; - unsigned max_group_size_ = 0; - bool plot_dot_graphs_ = false; - v_workw_t work_threshold_ = 10; - v_workw_t critical_path_threshold_ = 10; - double orbit_lock_ratio_ = 0.4; - double natural_breaks_count_percentage_ = 0.1; - bool merge_different_node_types = true; - bool allow_use_trimmed_scheduler = true; - bool use_max_bsp = false; - bool use_adaptive_symmetry_threshold = true; + Scheduler *bspScheduler_; + bool useMaxGroupSize_ = false; + unsigned maxGroupSize_ = 0; + bool plotDotGraphs_ = false; + VWorkwT workThreshold_ = 10; + VWorkwT criticalPathThreshold_ = 10; + double orbitLockRatio_ = 0.4; + double naturalBreaksCountPercentage_ = 0.1; + bool mergeDifferentNodeTypes_ = true; + bool allowUseTrimmedScheduler_ = true; + bool useMaxBsp_ = false; + bool useAdaptiveSymmetryThreshold_ = true; public: - explicit IsomorphicSubgraphScheduler(Scheduler &bsp_scheduler) - : hash_computer_(nullptr), bsp_scheduler_(&bsp_scheduler), plot_dot_graphs_(false) {} + IsomorphicSubgraphScheduler(Scheduler &bspScheduler) + : hashComputer_(nullptr), bspScheduler_(&bspScheduler), plotDotGraphs_(false) {} - IsomorphicSubgraphScheduler(Scheduler &bsp_scheduler, const HashComputer> &hash_computer) - : hash_computer_(&hash_computer), bsp_scheduler_(&bsp_scheduler), plot_dot_graphs_(false) {} + IsomorphicSubgraphScheduler(Scheduler &bspScheduler, const HashComputer> &hashComputer) + : hashComputer_(&hashComputer), bspScheduler_(&bspScheduler), plotDotGraphs_(false) {} virtual ~IsomorphicSubgraphScheduler() {} - void setMergeDifferentTypes(bool flag) { merge_different_node_types = flag; } + void SetMergeDifferentTypes(bool flag) { mergeDifferentNodeTypes_ = flag; } - void setWorkThreshold(v_workw_t work_threshold) { work_threshold_ = work_threshold; } + void SetWorkThreshold(VWorkwT workThreshold) { workThreshold_ = workThreshold; } - void setCriticalPathThreshold(v_workw_t critical_path_threshold) { - critical_path_threshold_ = critical_path_threshold; - } + void SetCriticalPathThreshold(VWorkwT criticalPathThreshold) { criticalPathThreshold_ = criticalPathThreshold; } - void setOrbitLockRatio(double orbit_lock_ratio) { orbit_lock_ratio_ = orbit_lock_ratio; } + void SetOrbitLockRatio(double orbitLockRatio) { orbitLockRatio_ = orbitLockRatio; } - void setNaturalBreaksCountPercentage(double natural_breaks_count_percentage) { - natural_breaks_count_percentage_ = natural_breaks_count_percentage; + void SetNaturalBreaksCountPercentage(double naturalBreaksCountPercentage) { + naturalBreaksCountPercentage_ = naturalBreaksCountPercentage; } - void setAllowTrimmedScheduler(bool flag) { allow_use_trimmed_scheduler = flag; } + void SetAllowTrimmedScheduler(bool flag) { allowUseTrimmedScheduler_ = flag; } - void set_plot_dot_graphs(bool plot) { plot_dot_graphs_ = plot; } + void SetPlotDotGraphs(bool plot) { plotDotGraphs_ = plot; } - void disable_use_max_group_size() { use_max_group_size_ = false; } + void DisableUseMaxGroupSize() { useMaxGroupSize_ = false; } - void setUseMaxBsp(bool flag) { use_max_bsp = flag; } + void SetUseMaxBsp(bool flag) { useMaxBsp_ = flag; } - void enable_use_max_group_size(const unsigned max_group_size) { - use_max_group_size_ = true; - max_group_size_ = max_group_size; + void EnableUseMaxGroupSize(const unsigned maxGroupSize) { + useMaxGroupSize_ = true; + maxGroupSize_ = maxGroupSize; } - void setEnableAdaptiveSymmetryThreshold() { use_adaptive_symmetry_threshold = true; } + void SetEnableAdaptiveSymmetryThreshold() { useAdaptiveSymmetryThreshold_ = true; } - void setUseStaticSymmetryLevel(size_t static_symmetry_level) { - use_adaptive_symmetry_threshold = false; - symmetry_ = static_symmetry_level; + void SetUseStaticSymmetryLevel(size_t staticSymmetryLevel) { + useAdaptiveSymmetryThreshold_ = false; + symmetry_ = staticSymmetryLevel; } - std::vector> compute_partition(const BspInstance &instance) { - OrbitGraphProcessor orbit_processor; - orbit_processor.set_work_threshold(work_threshold_); - orbit_processor.setMergeDifferentNodeTypes(merge_different_node_types); - orbit_processor.setCriticalPathThreshold(critical_path_threshold_); - orbit_processor.setLockRatio(orbit_lock_ratio_); - orbit_processor.setNaturalBreaksCountPercentage(natural_breaks_count_percentage_); - if (not use_adaptive_symmetry_threshold) { - orbit_processor.setUseStaticSymmetryLevel(symmetry_); + std::vector> ComputePartition(const BspInstance &instance) { + OrbitGraphProcessor orbitProcessor; + orbitProcessor.SetWorkThreshold(workThreshold_); + orbitProcessor.SetMergeDifferentNodeTypes(mergeDifferentNodeTypes_); + orbitProcessor.SetCriticalPathThreshold(criticalPathThreshold_); + orbitProcessor.SetLockRatio(orbitLockRatio_); + orbitProcessor.SetNaturalBreaksCountPercentage(naturalBreaksCountPercentage_); + if (not useAdaptiveSymmetryThreshold_) { + orbitProcessor.SetUseStaticSymmetryLevel(symmetry_); } - std::unique_ptr>> local_hasher; - if (!hash_computer_) { - local_hasher = std::make_unique, true>>( - instance.getComputationalDag(), instance.getComputationalDag()); - hash_computer_ = local_hasher.get(); + std::unique_ptr>> localHasher; + if (!hashComputer_) { + localHasher = std::make_unique, true>>( + instance.GetComputationalDag(), instance.GetComputationalDag()); + hashComputer_ = localHasher.get(); } - orbit_processor.discover_isomorphic_groups(instance.getComputationalDag(), *hash_computer_); + orbitProcessor.DiscoverIsomorphicGroups(instance.GetComputationalDag(), *hashComputer_); - auto isomorphic_groups = orbit_processor.get_final_groups(); + auto isomorphicGroups = orbitProcessor.GetFinalGroups(); - std::vector was_trimmed(isomorphic_groups.size(), false); - trim_subgraph_groups(isomorphic_groups, instance, was_trimmed); // Apply trimming and record which groups were affected + std::vector wasTrimmed(isomorphicGroups.size(), false); + TrimSubgraphGroups(isomorphicGroups, instance, wasTrimmed); // Apply trimming and record which groups were affected - auto input = prepare_subgraph_scheduling_input(instance, isomorphic_groups, was_trimmed); + auto input = PrepareSubgraphSchedulingInput(instance, isomorphicGroups, wasTrimmed); - EftSubgraphScheduler etf_scheduler; - SubgraphSchedule subgraph_schedule - = etf_scheduler.run(input.instance, input.multiplicities, input.required_proc_types, input.max_num_processors); - subgraph_schedule.was_trimmed = std::move(was_trimmed); // Pass through trimming info + EftSubgraphScheduler etfScheduler; + SubgraphSchedule subgraphSchedule + = etfScheduler.Run(input.instance_, input.multiplicities_, input.requiredProcTypes_, input.maxNumProcessors_); + subgraphSchedule.wasTrimmed_ = std::move(wasTrimmed); // Pass through trimming info - std::vector> partition(instance.numberOfVertices(), 0); - schedule_isomorphic_group(instance, isomorphic_groups, subgraph_schedule, partition); + std::vector> partition(instance.NumberOfVertices(), 0); + ScheduleIsomorphicGroup(instance, isomorphicGroups, subgraphSchedule, partition); - if (plot_dot_graphs_) { + if (plotDotGraphs_) { auto now = std::chrono::system_clock::now(); - auto in_time_t = std::chrono::system_clock::to_time_t(now); + auto inTimeT = std::chrono::system_clock::to_time_t(now); std::stringstream ss; - ss << std::put_time(std::localtime(&in_time_t), "%Y%m%d_%H%M%S"); + ss << std::put_time(std::localtime(&inTimeT), "%Y%m%d_%H%M%S"); std::string timestamp = ss.str() + "_"; DotFileWriter writer; - writer.write_colored_graph( - timestamp + "isomorphic_groups.dot", instance.getComputationalDag(), orbit_processor.get_final_contraction_map()); - writer.write_colored_graph( - timestamp + "orbits_colored.dot", instance.getComputationalDag(), orbit_processor.get_contraction_map()); - writer.write_graph(timestamp + "iso_groups_contracted.dot", input.instance.getComputationalDag()); - writer.write_colored_graph(timestamp + "graph_partition.dot", instance.getComputationalDag(), partition); - Constr_Graph_t corase_graph; - coarser_util::construct_coarse_dag(instance.getComputationalDag(), corase_graph, partition); - writer.write_graph(timestamp + "block_graph.dot", corase_graph); + writer.WriteColoredGraph( + timestamp + "isomorphic_groups.dot", instance.GetComputationalDag(), orbitProcessor.GetFinalContractionMap()); + writer.WriteColoredGraph( + timestamp + "orbits_colored.dot", instance.GetComputationalDag(), orbitProcessor.GetContractionMap()); + writer.WriteGraph(timestamp + "iso_groups_contracted.dot", input.instance_.GetComputationalDag()); + writer.WriteColoredGraph(timestamp + "graph_partition.dot", instance.GetComputationalDag(), partition); + ConstrGraphT coarseGraph; + coarser_util::ConstructCoarseDag(instance.GetComputationalDag(), coarseGraph, partition); + writer.WriteGraph(timestamp + "block_graph.dot", coarseGraph); } return partition; } protected: - template - struct subgraph_scheduler_input { - BspInstance instance; - std::vector multiplicities; - std::vector max_num_processors; - std::vector>> required_proc_types; + template + struct SubgraphSchedulerInput { + BspInstance instance_; + std::vector multiplicities_; + std::vector maxNumProcessors_; + std::vector>> requiredProcTypes_; }; - void trim_subgraph_groups(std::vector::Group> &isomorphic_groups, - const BspInstance &instance, - std::vector &was_trimmed) { - if constexpr (verbose) { + void TrimSubgraphGroups(std::vector::Group> &isomorphicGroups, + const BspInstance &instance, + std::vector &wasTrimmed) { + if constexpr (verbose_) { std::cout << "\n--- Trimming Isomorphic Subgraph Groups ---" << std::endl; } - for (size_t group_idx = 0; group_idx < isomorphic_groups.size(); ++group_idx) { - auto &group = isomorphic_groups[group_idx]; - const unsigned group_size = static_cast(group.size()); - if (group_size <= 1) { + for (size_t groupIdx = 0; groupIdx < isomorphicGroups.size(); ++groupIdx) { + auto &group = isomorphicGroups[groupIdx]; + const unsigned groupSize = static_cast(group.size()); + if (groupSize <= 1) { continue; } - unsigned effective_min_proc_type_count = 0; + unsigned effectiveMinProcTypeCount = 0; - if (use_max_group_size_) { - if constexpr (verbose) { - std::cout << "Group " << group_idx << " (size " << group_size - << "): Using fixed max_group_size_ = " << max_group_size_ << " for trimming." << std::endl; + if (useMaxGroupSize_) { + if constexpr (verbose_) { + std::cout << "Group " << groupIdx << " (size " << groupSize + << "): Using fixed max_group_size_ = " << maxGroupSize_ << " for trimming." << std::endl; } - effective_min_proc_type_count = max_group_size_; + effectiveMinProcTypeCount = maxGroupSize_; } else { // Determine if the group consists of a single node type - bool is_single_type_group = true; - v_type_t common_node_type = 0; - - if constexpr (has_typed_vertices_v) { - if (!group.subgraphs.empty() && !group.subgraphs[0].empty()) { - common_node_type = instance.getComputationalDag().vertex_type(group.subgraphs[0][0]); - const auto &rep_subgraph = group.subgraphs[0]; - for (const auto &vertex : rep_subgraph) { - if (instance.getComputationalDag().vertex_type(vertex) != common_node_type) { - is_single_type_group = false; + bool isSingleTypeGroup = true; + VTypeT commonNodeType = 0; + + if constexpr (hasTypedVerticesV) { + if (!group.subgraphs_.empty() && !group.subgraphs_[0].empty()) { + commonNodeType = instance.GetComputationalDag().VertexType(group.subgraphs_[0][0]); + const auto &repSubgraph = group.subgraphs_[0]; + for (const auto &vertex : repSubgraph) { + if (instance.GetComputationalDag().VertexType(vertex) != commonNodeType) { + isSingleTypeGroup = false; break; } } } else { - is_single_type_group = false; + isSingleTypeGroup = false; } } else { - is_single_type_group = false; + isSingleTypeGroup = false; } - if (is_single_type_group) { + if (isSingleTypeGroup) { // Dynamically determine min_proc_type_count based on compatible processors for this type - unsigned min_compatible_processors = std::numeric_limits::max(); - const auto &proc_type_counts = instance.getArchitecture().getProcessorTypeCount(); - - bool found_compatible_processor = false; - for (unsigned proc_type_idx = 0; proc_type_idx < proc_type_counts.size(); ++proc_type_idx) { - if (instance.isCompatibleType(common_node_type, proc_type_idx)) { - min_compatible_processors = std::min(min_compatible_processors, proc_type_counts[proc_type_idx]); - found_compatible_processor = true; + unsigned minCompatibleProcessors = std::numeric_limits::max(); + const auto &procTypeCounts = instance.GetArchitecture().GetProcessorTypeCount(); + + bool foundCompatibleProcessor = false; + for (unsigned procTypeIdx = 0; procTypeIdx < procTypeCounts.size(); ++procTypeIdx) { + if (instance.IsCompatibleType(commonNodeType, procTypeIdx)) { + minCompatibleProcessors = std::min(minCompatibleProcessors, procTypeCounts[procTypeIdx]); + foundCompatibleProcessor = true; } } - if (found_compatible_processor) { - if constexpr (verbose) { - std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" - << common_node_type << "). Min compatible processors: " << min_compatible_processors << "." + if (foundCompatibleProcessor) { + if constexpr (verbose_) { + std::cout << "Group " << groupIdx << " (size " << groupSize << "): Single node type (" + << commonNodeType << "). Min compatible processors: " << minCompatibleProcessors << "." << std::endl; } - effective_min_proc_type_count = min_compatible_processors; + effectiveMinProcTypeCount = minCompatibleProcessors; } else { - if constexpr (verbose) { - std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" - << common_node_type << ") but no compatible processors found. Disabling trimming." + if constexpr (verbose_) { + std::cout << "Group " << groupIdx << " (size " << groupSize << "): Single node type (" + << commonNodeType << ") but no compatible processors found. Disabling trimming." << std::endl; } // If no compatible processors found for this type, effectively disable trimming for this group. - effective_min_proc_type_count = 1; + effectiveMinProcTypeCount = 1; } } else { // Fallback to a default min_proc_type_count if not a single-type group or no typed vertices. - const auto &type_count = instance.getArchitecture().getProcessorTypeCount(); - if (type_count.empty()) { - effective_min_proc_type_count = 0; + const auto &typeCount = instance.GetArchitecture().GetProcessorTypeCount(); + if (typeCount.empty()) { + effectiveMinProcTypeCount = 0; } - effective_min_proc_type_count = *std::min_element(type_count.begin(), type_count.end()); - if constexpr (verbose) { - std::cout << "Group " << group_idx << " (size " << group_size + effectiveMinProcTypeCount = *std::min_element(typeCount.begin(), typeCount.end()); + if constexpr (verbose_) { + std::cout << "Group " << groupIdx << " (size " << groupSize << "): Multi-type or untyped group. Using default min_proc_type_count: " - << effective_min_proc_type_count << "." << std::endl; + << effectiveMinProcTypeCount << "." << std::endl; } } } // Ensure effective_min_proc_type_count is at least 1 for valid GCD calculation. - if (effective_min_proc_type_count == 0) { - effective_min_proc_type_count = 1; + if (effectiveMinProcTypeCount == 0) { + effectiveMinProcTypeCount = 1; } // If effective_min_proc_type_count is 1, no trimming is needed as gcd(X, 1) = 1. - if (effective_min_proc_type_count <= 1) { + if (effectiveMinProcTypeCount <= 1) { continue; } - unsigned gcd = std::gcd(group_size, effective_min_proc_type_count); + unsigned gcd = std::gcd(groupSize, effectiveMinProcTypeCount); - if (gcd < group_size) { - if constexpr (verbose) { - std::cout << " -> Trimming group " << group_idx << ". GCD(" << group_size << ", " - << effective_min_proc_type_count << ") = " << gcd << ". Merging " << group_size / gcd - << " subgraphs at a time." << std::endl; + if (gcd < groupSize) { + if constexpr (verbose_) { + std::cout << " -> Trimming group " << groupIdx << ". GCD(" << groupSize << ", " << effectiveMinProcTypeCount + << ") = " << gcd << ". Merging " << groupSize / gcd << " subgraphs at a time." << std::endl; } - if (allow_use_trimmed_scheduler) { + if (allowUseTrimmedScheduler_) { gcd = 1; } - was_trimmed[group_idx] = true; - const unsigned merge_size = group_size / gcd; - std::vector>> new_subgraphs; - new_subgraphs.reserve(gcd); + wasTrimmed[groupIdx] = true; + const unsigned mergeSize = groupSize / gcd; + std::vector>> newSubgraphs; + newSubgraphs.reserve(gcd); - size_t original_sg_cursor = 0; + size_t originalSgCursor = 0; for (unsigned j = 0; j < gcd; ++j) { - std::vector> merged_sg_vertices; + std::vector> mergedSgVertices; // Estimate capacity for efficiency. Assuming subgraphs have similar sizes. - if (!group.subgraphs.empty()) { - merged_sg_vertices.reserve(group.subgraphs[0].size() * merge_size); + if (!group.subgraphs_.empty()) { + mergedSgVertices.reserve(group.subgraphs_[0].size() * mergeSize); } - for (unsigned k = 0; k < merge_size; ++k) { - const auto &sg_to_merge_vertices = group.subgraphs[original_sg_cursor]; - original_sg_cursor++; - merged_sg_vertices.insert( - merged_sg_vertices.end(), sg_to_merge_vertices.begin(), sg_to_merge_vertices.end()); + for (unsigned k = 0; k < mergeSize; ++k) { + const auto &sgToMergeVertices = group.subgraphs_[originalSgCursor]; + originalSgCursor++; + mergedSgVertices.insert(mergedSgVertices.end(), sgToMergeVertices.begin(), sgToMergeVertices.end()); } - new_subgraphs.push_back(std::move(merged_sg_vertices)); + newSubgraphs.push_back(std::move(mergedSgVertices)); } - group.subgraphs = std::move(new_subgraphs); + group.subgraphs_ = std::move(newSubgraphs); } else { - if constexpr (verbose) { - std::cout << " -> No trim needed for group " << group_idx << "." << std::endl; + if constexpr (verbose_) { + std::cout << " -> No trim needed for group " << groupIdx << "." << std::endl; } - was_trimmed[group_idx] = false; + wasTrimmed[groupIdx] = false; } } } - subgraph_scheduler_input prepare_subgraph_scheduling_input( - const BspInstance &original_instance, - const std::vector::Group> &isomorphic_groups, - const std::vector &was_trimmed) { - subgraph_scheduler_input result; - result.instance.getArchitecture() = original_instance.getArchitecture(); - const unsigned num_proc_types = original_instance.getArchitecture().getNumberOfProcessorTypes(); - - result.multiplicities.resize(isomorphic_groups.size()); - result.max_num_processors.resize(isomorphic_groups.size()); - result.required_proc_types.resize(isomorphic_groups.size()); - std::vector> contraction_map(original_instance.numberOfVertices()); - - size_t coarse_node_idx = 0; - for (const auto &group : isomorphic_groups) { - result.max_num_processors[coarse_node_idx] = static_cast(group.size() * group.subgraphs[0].size()); - result.multiplicities[coarse_node_idx] = (was_trimmed[coarse_node_idx] && allow_use_trimmed_scheduler) - ? 1 - : static_cast(group.subgraphs.size()); - result.required_proc_types[coarse_node_idx].assign(num_proc_types, 0); - - for (const auto &subgraph : group.subgraphs) { + SubgraphSchedulerInput PrepareSubgraphSchedulingInput( + const BspInstance &originalInstance, + const std::vector::Group> &isomorphicGroups, + const std::vector &wasTrimmed) { + SubgraphSchedulerInput result; + result.instance_.GetArchitecture() = originalInstance.GetArchitecture(); + const unsigned numProcTypes = originalInstance.GetArchitecture().GetNumberOfProcessorTypes(); + + result.multiplicities_.resize(isomorphicGroups.size()); + result.maxNumProcessors_.resize(isomorphicGroups.size()); + result.requiredProcTypes_.resize(isomorphicGroups.size()); + std::vector> contractionMap(originalInstance.NumberOfVertices()); + + size_t coarseNodeIdx = 0; + for (const auto &group : isomorphicGroups) { + result.maxNumProcessors_[coarseNodeIdx] = static_cast(group.size() * group.subgraphs_[0].size()); + result.multiplicities_[coarseNodeIdx] + = (wasTrimmed[coarseNodeIdx] && allowUseTrimmedScheduler_) ? 1 : static_cast(group.subgraphs_.size()); + result.requiredProcTypes_[coarseNodeIdx].assign(numProcTypes, 0); + + for (const auto &subgraph : group.subgraphs_) { for (const auto &vertex : subgraph) { - contraction_map[vertex] = static_cast>(coarse_node_idx); - const auto vertex_work = original_instance.getComputationalDag().vertex_work_weight(vertex); - const auto vertex_type = original_instance.getComputationalDag().vertex_type(vertex); - for (unsigned j = 0; j < num_proc_types; ++j) { - if (original_instance.isCompatibleType(vertex_type, j)) { - result.required_proc_types[coarse_node_idx][j] += vertex_work; + contractionMap[vertex] = static_cast>(coarseNodeIdx); + const auto vertexWork = originalInstance.GetComputationalDag().VertexWorkWeight(vertex); + const auto vertexType = originalInstance.GetComputationalDag().VertexType(vertex); + for (unsigned j = 0; j < numProcTypes; ++j) { + if (originalInstance.IsCompatibleType(vertexType, j)) { + result.requiredProcTypes_[coarseNodeIdx][j] += vertexWork; } } } } - ++coarse_node_idx; + ++coarseNodeIdx; } - coarser_util::construct_coarse_dag( - original_instance.getComputationalDag(), result.instance.getComputationalDag(), contraction_map); + coarser_util::ConstructCoarseDag( + originalInstance.GetComputationalDag(), result.instance_.GetComputationalDag(), contractionMap); - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "\n--- Preparing Subgraph Scheduling Input ---\n"; - std::cout << "Found " << isomorphic_groups.size() << " isomorphic groups to schedule as coarse nodes.\n"; - for (size_t j = 0; j < isomorphic_groups.size(); ++j) { - std::cout << " - Coarse Node " << j << " (from " << isomorphic_groups[j].subgraphs.size() + std::cout << "Found " << isomorphicGroups.size() << " isomorphic groups to schedule as coarse nodes.\n"; + for (size_t j = 0; j < isomorphicGroups.size(); ++j) { + std::cout << " - Coarse Node " << j << " (from " << isomorphicGroups[j].subgraphs_.size() << " isomorphic subgraphs):\n"; - std::cout << " - Multiplicity for scheduling: " << result.multiplicities[j] << "\n"; - std::cout << " - Total Work (in coarse graph): " << result.instance.getComputationalDag().vertex_work_weight(j) + std::cout << " - Multiplicity for scheduling: " << result.multiplicities_[j] << "\n"; + std::cout << " - Total Work (in coarse graph): " << result.instance_.GetComputationalDag().VertexWorkWeight(j) << "\n"; std::cout << " - Required Processor Types: "; - for (unsigned k = 0; k < num_proc_types; ++k) { - std::cout << result.required_proc_types[j][k] << " "; + for (unsigned k = 0; k < numProcTypes; ++k) { + std::cout << result.requiredProcTypes_[j][k] << " "; } std::cout << "\n"; - std::cout << " - Max number of processors: " << result.max_num_processors[j] << "\n"; + std::cout << " - Max number of processors: " << result.maxNumProcessors_[j] << "\n"; } } return result; } - void schedule_isomorphic_group(const BspInstance &instance, - const std::vector::Group> &isomorphic_groups, - const SubgraphSchedule &sub_sched, - std::vector> &partition) { - vertex_idx_t current_partition_idx = 0; + void ScheduleIsomorphicGroup(const BspInstance &instance, + const std::vector::Group> &isomorphicGroups, + const SubgraphSchedule &subSched, + std::vector> &partition) { + VertexIdxT currentPartitionIdx = 0; - for (size_t group_idx = 0; group_idx < isomorphic_groups.size(); ++group_idx) { - const auto &group = isomorphic_groups[group_idx]; - if (group.subgraphs.empty()) { + for (size_t groupIdx = 0; groupIdx < isomorphicGroups.size(); ++groupIdx) { + const auto &group = isomorphicGroups[groupIdx]; + if (group.subgraphs_.empty()) { continue; } // Schedule the Representative Subgraph to get a BSP schedule pattern --- - auto rep_subgraph_vertices_sorted = group.subgraphs[0]; - std::sort(rep_subgraph_vertices_sorted.begin(), rep_subgraph_vertices_sorted.end()); - - BspInstance representative_instance; - auto rep_global_to_local_map = create_induced_subgraph_map( - instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted); - - representative_instance.getArchitecture() = instance.getArchitecture(); - const auto &procs_for_group = sub_sched.node_assigned_worker_per_type[group_idx]; - std::vector> mem_weights(procs_for_group.size(), 0); - for (unsigned proc_type = 0; proc_type < procs_for_group.size(); ++proc_type) { - mem_weights[proc_type] - = static_cast>(instance.getArchitecture().maxMemoryBoundProcType(proc_type)); + auto repSubgraphVerticesSorted = group.subgraphs_[0]; + std::sort(repSubgraphVerticesSorted.begin(), repSubgraphVerticesSorted.end()); + + BspInstance representativeInstance; + auto repGlobalToLocalMap = CreateInducedSubgraphMap( + instance.GetComputationalDag(), representativeInstance.GetComputationalDag(), repSubgraphVerticesSorted); + + representativeInstance.GetArchitecture() = instance.GetArchitecture(); + const auto &procsForGroup = subSched.nodeAssignedWorkerPerType_[groupIdx]; + std::vector> memWeights(procsForGroup.size(), 0); + for (unsigned procType = 0; procType < procsForGroup.size(); ++procType) { + memWeights[procType] + = static_cast>(instance.GetArchitecture().MaxMemoryBoundProcType(procType)); } - representative_instance.getArchitecture().SetProcessorsConsequTypes(procs_for_group, mem_weights); - representative_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); + representativeInstance.GetArchitecture().SetProcessorsConsequTypes(procsForGroup, memWeights); + representativeInstance.SetNodeProcessorCompatibility(instance.GetProcessorCompatibilityMatrix()); // --- Decide which scheduler to use --- - unsigned min_non_zero_procs = std::numeric_limits::max(); - for (const auto &proc_count : procs_for_group) { - if (proc_count > 0) { - min_non_zero_procs = std::min(min_non_zero_procs, proc_count); + unsigned minNonZeroProcs = std::numeric_limits::max(); + for (const auto &procCount : procsForGroup) { + if (procCount > 0) { + minNonZeroProcs = std::min(minNonZeroProcs, procCount); } } - bool use_trimmed_scheduler = sub_sched.was_trimmed[group_idx] && min_non_zero_procs > 1 && allow_use_trimmed_scheduler; + bool useTrimmedScheduler = subSched.wasTrimmed_[groupIdx] && minNonZeroProcs > 1 && allowUseTrimmedScheduler_; - Scheduler *scheduler_for_group_ptr; - std::unique_ptr> trimmed_scheduler_owner; - if (use_trimmed_scheduler) { - if constexpr (verbose) { - std::cout << "Using TrimmedGroupScheduler for group " << group_idx << std::endl; + Scheduler *schedulerForGroupPtr; + std::unique_ptr> trimmedSchedulerOwner; + if (useTrimmedScheduler) { + if constexpr (verbose_) { + std::cout << "Using TrimmedGroupScheduler for group " << groupIdx << std::endl; } - trimmed_scheduler_owner - = std::make_unique>(*bsp_scheduler_, min_non_zero_procs); - scheduler_for_group_ptr = trimmed_scheduler_owner.get(); + trimmedSchedulerOwner = std::make_unique>(*bspScheduler_, minNonZeroProcs); + schedulerForGroupPtr = trimmedSchedulerOwner.get(); } else { - if constexpr (verbose) { - std::cout << "Using standard BSP scheduler for group " << group_idx << std::endl; + if constexpr (verbose_) { + std::cout << "Using standard BSP scheduler for group " << groupIdx << std::endl; } - scheduler_for_group_ptr = bsp_scheduler_; + schedulerForGroupPtr = bspScheduler_; } // --- Schedule the representative to get the pattern --- - BspSchedule bsp_schedule(representative_instance); - - if constexpr (verbose) { - std::cout << "--- Scheduling representative for group " << group_idx << " ---" << std::endl; - std::cout << " Number of subgraphs in group: " << group.subgraphs.size() << std::endl; - const auto &rep_dag = representative_instance.getComputationalDag(); - std::cout << " Representative subgraph size: " << rep_dag.num_vertices() << " vertices" << std::endl; - std::vector node_type_counts(rep_dag.num_vertex_types(), 0); - for (const auto &v : rep_dag.vertices()) { - node_type_counts[rep_dag.vertex_type(v)]++; + BspSchedule bspSchedule(representativeInstance); + + if constexpr (verbose_) { + std::cout << "--- Scheduling representative for group " << groupIdx << " ---" << std::endl; + std::cout << " Number of subgraphs in group: " << group.subgraphs_.size() << std::endl; + const auto &repDag = representativeInstance.GetComputationalDag(); + std::cout << " Representative subgraph size: " << repDag.NumVertices() << " vertices" << std::endl; + std::vector nodeTypeCounts(repDag.NumVertexTypes(), 0); + for (const auto &v : repDag.Vertices()) { + nodeTypeCounts[repDag.VertexType(v)]++; } std::cout << " Node type counts: "; - for (size_t type_idx = 0; type_idx < node_type_counts.size(); ++type_idx) { - if (node_type_counts[type_idx] > 0) { - std::cout << "T" << type_idx << ":" << node_type_counts[type_idx] << " "; + for (size_t typeIdx = 0; typeIdx < nodeTypeCounts.size(); ++typeIdx) { + if (nodeTypeCounts[typeIdx] > 0) { + std::cout << "T" << typeIdx << ":" << nodeTypeCounts[typeIdx] << " "; } } std::cout << std::endl; - const auto &sub_arch = representative_instance.getArchitecture(); + const auto &subArch = representativeInstance.GetArchitecture(); std::cout << " Sub-architecture for scheduling:" << std::endl; - std::cout << " Processors: " << sub_arch.numberOfProcessors() << std::endl; + std::cout << " Processors: " << subArch.NumberOfProcessors() << std::endl; std::cout << " Processor types counts: "; - const auto &type_counts = sub_arch.getProcessorTypeCount(); - for (size_t type_idx = 0; type_idx < type_counts.size(); ++type_idx) { - std::cout << "T" << type_idx << ":" << type_counts[type_idx] << " "; + const auto &typeCounts = subArch.GetProcessorTypeCount(); + for (size_t typeIdx = 0; typeIdx < typeCounts.size(); ++typeIdx) { + std::cout << "T" << typeIdx << ":" << typeCounts[typeIdx] << " "; } std::cout << std::endl; - std::cout << " Sync cost: " << sub_arch.synchronisationCosts() - << ", Comm cost: " << sub_arch.communicationCosts() << std::endl; + std::cout << " Sync cost: " << subArch.SynchronisationCosts() + << ", Comm cost: " << subArch.CommunicationCosts() << std::endl; } - scheduler_for_group_ptr->computeSchedule(bsp_schedule); + schedulerForGroupPtr->ComputeSchedule(bspSchedule); - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << " Schedule satisfies precedence constraints: "; - std::cout << bsp_schedule.satisfiesPrecedenceConstraints() << std::endl; + std::cout << bspSchedule.SatisfiesPrecedenceConstraints() << std::endl; std::cout << " Schedule satisfies node type constraints: "; - std::cout << bsp_schedule.satisfiesNodeTypeConstraints() << std::endl; + std::cout << bspSchedule.SatisfiesNodeTypeConstraints() << std::endl; } - if (plot_dot_graphs_) { - const auto &rep_dag = bsp_schedule.getInstance().getComputationalDag(); - std::vector colors(rep_dag.num_vertices()); - std::map, unsigned> proc_ss_to_color; - unsigned next_color = 0; + if (plotDotGraphs_) { + const auto &repDag = bspSchedule.GetInstance().GetComputationalDag(); + std::vector colors(repDag.NumVertices()); + std::map, unsigned> procSsToColor; + unsigned nextColor = 0; - for (const auto &v : rep_dag.vertices()) { - const auto assignment = std::make_pair(bsp_schedule.assignedProcessor(v), bsp_schedule.assignedSuperstep(v)); - if (proc_ss_to_color.find(assignment) == proc_ss_to_color.end()) { - proc_ss_to_color[assignment] = next_color++; + for (const auto &v : repDag.Vertices()) { + const auto assignment = std::make_pair(bspSchedule.AssignedProcessor(v), bspSchedule.AssignedSuperstep(v)); + if (procSsToColor.find(assignment) == procSsToColor.end()) { + procSsToColor[assignment] = nextColor++; } - colors[v] = proc_ss_to_color[assignment]; + colors[v] = procSsToColor[assignment]; } auto now = std::chrono::system_clock::now(); - auto in_time_t = std::chrono::system_clock::to_time_t(now); + auto inTimeT = std::chrono::system_clock::to_time_t(now); std::stringstream ss; - ss << std::put_time(std::localtime(&in_time_t), "%Y%m%d_%H%M%S"); + ss << std::put_time(std::localtime(&inTimeT), "%Y%m%d_%H%M%S"); std::string timestamp = ss.str() + "_"; DotFileWriter writer; - writer.write_colored_graph(timestamp + "iso_group_rep_" + std::to_string(group_idx) + ".dot", rep_dag, colors); + writer.WriteColoredGraph(timestamp + "iso_group_rep_" + std::to_string(groupIdx) + ".dot", repDag, colors); } - const bool max_bsp = use_max_bsp && (representative_instance.getComputationalDag().num_edges() == 0) - && (representative_instance.getComputationalDag().vertex_type(0) == 0); + const bool maxBsp = useMaxBsp_ && (representativeInstance.GetComputationalDag().NumEdges() == 0) + && (representativeInstance.GetComputationalDag().VertexType(0) == 0); // Build data structures for applying the pattern --- // Map (superstep, processor) -> relative partition ID - std::map, vertex_idx_t> sp_proc_to_relative_partition; - vertex_idx_t num_partitions_per_subgraph = 0; - for (vertex_idx_t j = 0; j < static_cast>(rep_subgraph_vertices_sorted.size()); ++j) { - auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(j), bsp_schedule.assignedProcessor(j)); + std::map, VertexIdxT> spProcToRelativePartition; + VertexIdxT numPartitionsPerSubgraph = 0; + for (VertexIdxT j = 0; j < static_cast>(repSubgraphVerticesSorted.size()); ++j) { + auto spPair = std::make_pair(bspSchedule.AssignedSuperstep(j), bspSchedule.AssignedProcessor(j)); - if (max_bsp) { - sp_pair = std::make_pair(j, 0); + if (maxBsp) { + spPair = std::make_pair(j, 0); } - if (sp_proc_to_relative_partition.find(sp_pair) == sp_proc_to_relative_partition.end()) { - sp_proc_to_relative_partition[sp_pair] = num_partitions_per_subgraph++; + if (spProcToRelativePartition.find(spPair) == spProcToRelativePartition.end()) { + spProcToRelativePartition[spPair] = numPartitionsPerSubgraph++; } } // Pre-compute hashes for the representative to use for mapping - MerkleHashComputer rep_hasher(representative_instance.getComputationalDag()); + MerkleHashComputer repHasher(representativeInstance.GetComputationalDag()); // Replicate the schedule pattern for ALL subgraphs in the group --- - for (vertex_idx_t i = 0; i < static_cast>(group.subgraphs.size()); ++i) { - auto current_subgraph_vertices_sorted = group.subgraphs[i]; - std::sort(current_subgraph_vertices_sorted.begin(), current_subgraph_vertices_sorted.end()); + for (VertexIdxT i = 0; i < static_cast>(group.subgraphs_.size()); ++i) { + auto currentSubgraphVerticesSorted = group.subgraphs_[i]; + std::sort(currentSubgraphVerticesSorted.begin(), currentSubgraphVerticesSorted.end()); // Map from a vertex in the current subgraph to its corresponding local index (0, 1, ...) in the representative's schedule - std::unordered_map, vertex_idx_t> current_vertex_to_rep_local_idx; + std::unordered_map, VertexIdxT> currentVertexToRepLocalIdx; if (i == 0) { // The first subgraph is the representative itself - current_vertex_to_rep_local_idx = std::move(rep_global_to_local_map); + currentVertexToRepLocalIdx = std::move(repGlobalToLocalMap); } else { // For other subgraphs, build the isomorphic mapping - Constr_Graph_t current_subgraph_graph; - create_induced_subgraph( - instance.getComputationalDag(), current_subgraph_graph, current_subgraph_vertices_sorted); + ConstrGraphT currentSubgraphGraph; + CreateInducedSubgraph(instance.GetComputationalDag(), currentSubgraphGraph, currentSubgraphVerticesSorted); - MerkleHashComputer current_hasher(current_subgraph_graph); + MerkleHashComputer currentHasher(currentSubgraphGraph); - for (const auto &[hash, rep_orbit_nodes] : rep_hasher.get_orbits()) { - const auto ¤t_orbit_nodes = current_hasher.get_orbit_from_hash(hash); - for (size_t k = 0; k < rep_orbit_nodes.size(); ++k) { + for (const auto &[hash, repOrbitNodes] : repHasher.GetOrbits()) { + const auto ¤tOrbitNodes = currentHasher.GetOrbitFromHash(hash); + for (size_t k = 0; k < repOrbitNodes.size(); ++k) { // Map: current_subgraph_vertex -> representative_subgraph_local_idx - current_vertex_to_rep_local_idx[current_subgraph_vertices_sorted[current_orbit_nodes[k]]] - = static_cast>(rep_orbit_nodes[k]); + currentVertexToRepLocalIdx[currentSubgraphVerticesSorted[currentOrbitNodes[k]]] + = static_cast>(repOrbitNodes[k]); } } } // Apply the partition pattern - for (const auto ¤t_vertex : current_subgraph_vertices_sorted) { - const auto rep_local_idx = current_vertex_to_rep_local_idx.at(current_vertex); - auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(rep_local_idx), - bsp_schedule.assignedProcessor(rep_local_idx)); + for (const auto ¤tVertex : currentSubgraphVerticesSorted) { + const auto repLocalIdx = currentVertexToRepLocalIdx.at(currentVertex); + auto spPair + = std::make_pair(bspSchedule.AssignedSuperstep(repLocalIdx), bspSchedule.AssignedProcessor(repLocalIdx)); - if (max_bsp) { - sp_pair = std::make_pair(rep_local_idx, 0); + if (maxBsp) { + spPair = std::make_pair(repLocalIdx, 0); } - partition[current_vertex] = current_partition_idx + sp_proc_to_relative_partition.at(sp_pair); + partition[currentVertex] = currentPartitionIdx + spProcToRelativePartition.at(spPair); } - current_partition_idx += num_partitions_per_subgraph; + currentPartitionIdx += numPartitionsPerSubgraph; } } } diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp index 9b7c6856..956531df 100644 --- a/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp +++ b/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp @@ -35,24 +35,24 @@ namespace osp { * This class uses a backtracking algorithm pruned by Merkle hashes to * efficiently find the vertex-to-vertex mapping. * - * @tparam Graph_t The original graph type (for global vertex IDs). - * @tparam Constr_Graph_t The subgraph/contracted graph type. + * @tparam GraphT The original graph type (for global vertex IDs). + * @tparam ConstrGraphT The subgraph/contracted graph type. */ -template +template class IsomorphismMapper { - using VertexC = vertex_idx_t; // Local vertex ID - using VertexG = vertex_idx_t; // Global vertex ID + using VertexC = VertexIdxT; // Local vertex ID + using VertexG = VertexIdxT; // Global vertex ID - const Constr_Graph_t &rep_graph; - const MerkleHashComputer rep_hasher; + const ConstrGraphT &repGraph_; + const MerkleHashComputer repHasher_; public: /** * @brief Constructs an IsomorphismMapper. * @param representative_graph The subgraph to use as the "pattern". */ - IsomorphismMapper(const Constr_Graph_t &representative_graph) - : rep_graph(representative_graph), rep_hasher(representative_graph), num_vertices(representative_graph.num_vertices()) {} + IsomorphismMapper(const ConstrGraphT &representativeGraph) + : repGraph_(representativeGraph), repHasher_(representativeGraph), numVertices_(representativeGraph.NumVertices()) {} virtual ~IsomorphismMapper() = default; @@ -64,146 +64,130 @@ class IsomorphismMapper { * @param current_graph The new isomorphic subgraph. * @return A map from `current_local_vertex_id` -> `representative_local_vertex_id`. */ - std::unordered_map find_mapping(const Constr_Graph_t ¤t_graph) const { - if (current_graph.num_vertices() != num_vertices) { + std::unordered_map FindMapping(const ConstrGraphT ¤tGraph) const { + if (currentGraph.NumVertices() != numVertices_) { throw std::runtime_error("IsomorphismMapper: Graph sizes do not match."); } - if (num_vertices == 0) { + if (numVertices_ == 0) { return {}; } // 1. Compute hashes and orbits for the current graph. - MerkleHashComputer current_hasher(current_graph); - const auto &rep_orbits = rep_hasher.get_orbits(); - const auto ¤t_orbits = current_hasher.get_orbits(); + MerkleHashComputer currentHasher(currentGraph); + const auto &repOrbits = repHasher_.GetOrbits(); + const auto ¤tOrbits = currentHasher.GetOrbits(); // 2. Verify that the orbit structures are identical. - if (rep_orbits.size() != current_orbits.size()) { + if (repOrbits.size() != currentOrbits.size()) { throw std::runtime_error("IsomorphismMapper: Graphs have a different number of orbits."); } - for (const auto &[hash, rep_orbit_nodes] : rep_orbits) { - auto it = current_orbits.find(hash); - if (it == current_orbits.end() || it->second.size() != rep_orbit_nodes.size()) { + for (const auto &[hash, repOrbitNodes] : repOrbits) { + auto it = currentOrbits.find(hash); + if (it == currentOrbits.end() || it->second.size() != repOrbitNodes.size()) { throw std::runtime_error("IsomorphismMapper: Mismatched orbit structure between graphs."); } } // 3. Iteratively map all components of the graph. - std::vector map_current_to_rep(num_vertices, std::numeric_limits::max()); - std::vector rep_is_mapped(num_vertices, false); - std::vector current_is_mapped(num_vertices, false); - size_t mapped_count = 0; + std::vector mapCurrentToRep(numVertices_, std::numeric_limits::max()); + std::vector repIsMapped(numVertices_, false); + std::vector currentIsMapped(numVertices_, false); + size_t mappedCount = 0; - while (mapped_count < num_vertices) { + while (mappedCount < numVertices_) { std::queue> q; // Find an unmapped vertex in the representative graph to seed the next component traversal. - VertexC rep_seed = std::numeric_limits::max(); - for (VertexC i = 0; i < num_vertices; ++i) { - if (!rep_is_mapped[i]) { - rep_seed = i; + VertexC repSeed = std::numeric_limits::max(); + for (VertexC i = 0; i < numVertices_; ++i) { + if (!repIsMapped[i]) { + repSeed = i; break; } } - if (rep_seed == std::numeric_limits::max()) { - break; // Should be unreachable if mapped_count < num_vertices + if (repSeed == std::numeric_limits::max()) { + break; // Should be unreachable if mapped_count < NumVertices } // Find a corresponding unmapped vertex in the current graph's orbit. - const auto &candidates = current_orbits.at(rep_hasher.get_vertex_hash(rep_seed)); - VertexC current_seed = std::numeric_limits::max(); // Should always be found + const auto &candidates = currentOrbits.at(repHasher_.GetVertexHash(repSeed)); + VertexC currentSeed = std::numeric_limits::max(); // Should always be found for (const auto &candidate : candidates) { - if (!current_is_mapped[candidate]) { - current_seed = candidate; + if (!currentIsMapped[candidate]) { + currentSeed = candidate; break; } } - if (current_seed == std::numeric_limits::max()) { + if (currentSeed == std::numeric_limits::max()) { throw std::runtime_error("IsomorphismMapper: Could not find an unmapped candidate to seed component mapping."); } // Seed the queue and start the traversal for this component. - q.push({rep_seed, current_seed}); - map_current_to_rep[rep_seed] = current_seed; - rep_is_mapped[rep_seed] = true; - current_is_mapped[current_seed] = true; - mapped_count++; + q.push({repSeed, currentSeed}); + mapCurrentToRep[repSeed] = currentSeed; + repIsMapped[repSeed] = true; + currentIsMapped[currentSeed] = true; + mappedCount++; while (!q.empty()) { auto [u_rep, u_curr] = q.front(); q.pop(); // Match neighbors (both parents and children) - match_neighbors(current_graph, - current_hasher, - u_rep, - u_curr, - map_current_to_rep, - rep_is_mapped, - current_is_mapped, - mapped_count, - q, - true); - match_neighbors(current_graph, - current_hasher, - u_rep, - u_curr, - map_current_to_rep, - rep_is_mapped, - current_is_mapped, - mapped_count, - q, - false); + MatchNeighbors( + currentGraph, currentHasher, u_rep, u_curr, mapCurrentToRep, repIsMapped, currentIsMapped, mappedCount, q, true); + MatchNeighbors( + currentGraph, currentHasher, u_rep, u_curr, mapCurrentToRep, repIsMapped, currentIsMapped, mappedCount, q, false); } } - if (mapped_count != num_vertices) { + if (mappedCount != numVertices_) { throw std::runtime_error("IsomorphismMapper: Failed to map all vertices."); } // 4. Return the inverted map. - std::unordered_map current_local_to_rep_local; - current_local_to_rep_local.reserve(num_vertices); - for (VertexC i = 0; i < num_vertices; ++i) { - current_local_to_rep_local[map_current_to_rep[i]] = i; + std::unordered_map currentLocalToRepLocal; + currentLocalToRepLocal.reserve(numVertices_); + for (VertexC i = 0; i < numVertices_; ++i) { + currentLocalToRepLocal[mapCurrentToRep[i]] = i; } - return current_local_to_rep_local; + return currentLocalToRepLocal; } private: - const size_t num_vertices; - - void match_neighbors(const Constr_Graph_t ¤t_graph, - const MerkleHashComputer ¤t_hasher, - VertexC u_rep, - VertexC u_curr, - std::vector &map_current_to_rep, - std::vector &rep_is_mapped, - std::vector ¤t_is_mapped, - size_t &mapped_count, - std::queue> &q, - bool match_children) const { - const auto &rep_neighbors_range = match_children ? rep_graph.children(u_rep) : rep_graph.parents(u_rep); - const auto &curr_neighbors_range = match_children ? current_graph.children(u_curr) : current_graph.parents(u_curr); - - for (const auto &v_rep : rep_neighbors_range) { - if (rep_is_mapped[v_rep]) { + const size_t numVertices_; + + void MatchNeighbors(const ConstrGraphT ¤tGraph, + const MerkleHashComputer ¤tHasher, + VertexC uRep, + VertexC uCurr, + std::vector &mapCurrentToRep, + std::vector &repIsMapped, + std::vector ¤tIsMapped, + size_t &mappedCount, + std::queue> &q, + bool matchChildren) const { + const auto &repNeighborsRange = matchChildren ? repGraph_.Children(uRep) : repGraph_.Parents(uRep); + const auto &currNeighborsRange = matchChildren ? currentGraph.Children(uCurr) : currentGraph.Parents(uCurr); + + for (const auto &vRep : repNeighborsRange) { + if (repIsMapped[vRep]) { continue; } - for (const auto &v_curr : curr_neighbors_range) { - if (current_is_mapped[v_curr]) { + for (const auto &vCurr : currNeighborsRange) { + if (currentIsMapped[vCurr]) { continue; } - if (rep_hasher.get_vertex_hash(v_rep) == current_hasher.get_vertex_hash(v_curr)) { - map_current_to_rep[v_rep] = v_curr; - rep_is_mapped[v_rep] = true; - current_is_mapped[v_curr] = true; - mapped_count++; - q.push({v_rep, v_curr}); - break; // Found a match for v_rep, move to the next rep neighbor. + if (repHasher_.GetVertexHash(vRep) == currentHasher.GetVertexHash(vCurr)) { + mapCurrentToRep[vRep] = vCurr; + repIsMapped[vRep] = true; + currentIsMapped[vCurr] = true; + mappedCount++; + q.push({vRep, vCurr}); + break; // Found a match for vRep, move to the next rep neighbor. } } } diff --git a/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp index f7bc7106..030fa15f 100644 --- a/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp +++ b/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp @@ -38,107 +38,107 @@ namespace osp { * and the sorted hashes of its parents (or children, depending on the `forward` template parameter). * This allows for the identification of structurally isomorphic subgraphs. * - * @tparam Graph_t The type of the graph, must satisfy the `directed_graph` concept. - * @tparam node_hash_func_t A functor that computes a hash for a single node. - * Defaults to `uniform_node_hash_func`. + * @tparam GraphT The type of the graph, must satisfy the `directed_graph` concept. + * @tparam NodeHashFuncT A functor that computes a hash for a single node. + * Defaults to `UniformNodeHashFunc`. * @tparam forward If true, hashes are computed based on parents (top-down). * If false, hashes are computed based on children (bottom-up). */ -template >, bool forward = true> -class MerkleHashComputer : public HashComputer> { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(std::is_invocable_r>::value, - "node_hash_func_t must be invocable with one vertex_idx_t argument and return std::size_t."); +template >, bool forward = true> +class MerkleHashComputer : public HashComputer> { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + static_assert(std::is_invocable_r>::value, + "NodeHashFuncT must be invocable with one VertexIdxT argument and return std::size_t."); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::vector vertex_hashes; - std::unordered_map> orbits; + std::vector vertexHashes_; + std::unordered_map> orbits_; - node_hash_func_t node_hash_func; + NodeHashFuncT nodeHashFunc_; - inline void compute_hashes_helper(const VertexType &v, std::vector &parent_child_hashes) { - std::sort(parent_child_hashes.begin(), parent_child_hashes.end()); + inline void ComputeHashesHelper(const VertexType &v, std::vector &parentChildHashes) { + std::sort(parentChildHashes.begin(), parentChildHashes.end()); - std::size_t hash = node_hash_func(v); - for (const auto &pc_hash : parent_child_hashes) { - hash_combine(hash, pc_hash); + std::size_t hash = nodeHashFunc_(v); + for (const auto &pcHash : parentChildHashes) { + HashCombine(hash, pcHash); } - vertex_hashes[v] = hash; + vertexHashes_[v] = hash; - if (orbits.find(hash) == orbits.end()) { - orbits[hash] = {v}; + if (orbits_.find(hash) == orbits_.end()) { + orbits_[hash] = {v}; } else { - orbits[hash].push_back(v); + orbits_[hash].push_back(v); } } template - std::enable_if_t compute_hashes(const Graph_t &graph) { - vertex_hashes.resize(graph.num_vertices()); + std::enable_if_t ComputeHashes(const GraphT &graph) { + vertexHashes_.resize(graph.NumVertices()); - for (const VertexType &v : top_sort_view(graph)) { - std::vector parent_hashes; - for (const VertexType &parent : graph.parents(v)) { - parent_hashes.push_back(vertex_hashes[parent]); + for (const VertexType &v : TopSortView(graph)) { + std::vector parentHashes; + for (const VertexType &parent : graph.Parents(v)) { + parentHashes.push_back(vertexHashes_[parent]); } - compute_hashes_helper(v, parent_hashes); + ComputeHashesHelper(v, parentHashes); } } template - std::enable_if_t compute_hashes(const Graph_t &graph) { - vertex_hashes.resize(graph.num_vertices()); + std::enable_if_t ComputeHashes(const GraphT &graph) { + vertexHashes_.resize(graph.NumVertices()); - const auto top_sort = GetTopOrderReverse(graph); - for (auto it = top_sort.cbegin(); it != top_sort.cend(); ++it) { + const auto topSort = GetTopOrderReverse(graph); + for (auto it = topSort.cbegin(); it != topSort.cend(); ++it) { const VertexType &v = *it; - std::vector child_hashes; - for (const VertexType &child : graph.children(v)) { - child_hashes.push_back(vertex_hashes[child]); + std::vector childHashes; + for (const VertexType &child : graph.Children(v)) { + childHashes.push_back(vertexHashes_[child]); } - compute_hashes_helper(v, child_hashes); + ComputeHashesHelper(v, childHashes); } } public: template - MerkleHashComputer(const Graph_t &graph_, Args &&...args) - : HashComputer(), node_hash_func(std::forward(args)...) { - compute_hashes(graph_); + MerkleHashComputer(const GraphT &graph, Args &&...args) + : HashComputer(), nodeHashFunc_(std::forward(args)...) { + ComputeHashes(graph); } virtual ~MerkleHashComputer() override = default; - inline std::size_t get_vertex_hash(const VertexType &v) const override { return vertex_hashes[v]; } + inline std::size_t GetVertexHash(const VertexType &v) const override { return vertexHashes_[v]; } - inline const std::vector &get_vertex_hashes() const override { return vertex_hashes; } + inline const std::vector &GetVertexHashes() const override { return vertexHashes_; } - inline std::size_t num_orbits() const override { return orbits.size(); } + inline std::size_t NumOrbits() const override { return orbits_.size(); } - inline const std::vector &get_orbit(const VertexType &v) const override { - return this->get_orbit_from_hash(this->get_vertex_hash(v)); + inline const std::vector &GetOrbit(const VertexType &v) const override { + return this->GetOrbitFromHash(this->GetVertexHash(v)); } - inline const std::unordered_map> &get_orbits() const override { return orbits; } + inline const std::unordered_map> &GetOrbits() const override { return orbits_; } - inline const std::vector &get_orbit_from_hash(const std::size_t &hash) const override { return orbits.at(hash); } + inline const std::vector &GetOrbitFromHash(const std::size_t &hash) const override { return orbits_.at(hash); } }; -template >, bool Forward = true> -bool are_isomorphic_by_merkle_hash(const Graph_t &g1, const Graph_t &g2) { +template >, bool forward = true> +bool AreIsomorphicByMerkleHash(const GraphT &g1, const GraphT &g2) { // Basic check: Different numbers of vertices or edges mean they can't be isomorphic. - if (g1.num_vertices() != g2.num_vertices() || g1.num_edges() != g2.num_edges()) { + if (g1.NumVertices() != g2.NumVertices() || g1.NumEdges() != g2.NumEdges()) { return false; } // --- Compute Hashes in the Specified Direction --- - MerkleHashComputer hash1(g1); - MerkleHashComputer hash2(g2); + MerkleHashComputer hash1(g1); + MerkleHashComputer hash2(g2); - const auto &orbits1 = hash1.get_orbits(); - const auto &orbits2 = hash2.get_orbits(); + const auto &orbits1 = hash1.GetOrbits(); + const auto &orbits2 = hash2.GetOrbits(); if (orbits1.size() != orbits2.size()) { return false; @@ -146,10 +146,10 @@ bool are_isomorphic_by_merkle_hash(const Graph_t &g1, const Graph_t &g2) { for (const auto &pair : orbits1) { const std::size_t hash = pair.first; - const auto &orbit_vec = pair.second; + const auto &orbitVec = pair.second; auto it = orbits2.find(hash); - if (it == orbits2.end() || it->second.size() != orbit_vec.size()) { + if (it == orbits2.end() || it->second.size() != orbitVec.size()) { return false; } } @@ -157,23 +157,23 @@ bool are_isomorphic_by_merkle_hash(const Graph_t &g1, const Graph_t &g2) { return true; } -template -struct bwd_merkle_node_hash_func { - MerkleHashComputer>, false> bw_merkle_hash; +template +struct BwdMerkleNodeHashFunc { + MerkleHashComputer>, false> bwMerkleHash_; - bwd_merkle_node_hash_func(const Graph_t &graph) : bw_merkle_hash(graph) {} + BwdMerkleNodeHashFunc(const GraphT &graph) : bwMerkleHash_(graph) {} - std::size_t operator()(const vertex_idx_t &v) const { return bw_merkle_hash.get_vertex_hash(v); } + std::size_t operator()(const VertexIdxT &v) const { return bwMerkleHash_.GetVertexHash(v); } }; -template -struct precom_bwd_merkle_node_hash_func { - MerkleHashComputer>, false> bw_merkle_hash; +template +struct PrecomBwdMerkleNodeHashFunc { + MerkleHashComputer>, false> bwMerkleHash_; - precom_bwd_merkle_node_hash_func(const Graph_t &graph, const std::vector &node_hashes) - : bw_merkle_hash(graph, node_hashes) {} + PrecomBwdMerkleNodeHashFunc(const GraphT &graph, const std::vector &nodeHashes) + : bwMerkleHash_(graph, nodeHashes) {} - std::size_t operator()(const vertex_idx_t &v) const { return bw_merkle_hash.get_vertex_hash(v); } + std::size_t operator()(const VertexIdxT &v) const { return bwMerkleHash_.GetVertexHash(v); } }; } // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp b/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp index 03bff72d..070521a0 100644 --- a/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp +++ b/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp @@ -42,7 +42,7 @@ namespace osp { * It then partitions the DAG by grouping all nodes with the same hash into an "orbit". * A coarse graph is constructed where each node represents one such orbit. */ -template +template class OrbitGraphProcessor { public: /** @@ -63,159 +63,160 @@ class OrbitGraphProcessor { NATURAL_BREAKS }; - static_assert(is_computational_dag_v, "Graph must be a computational DAG"); - static_assert(is_computational_dag_v, "Constr_Graph_t must be a computational DAG"); - static_assert(is_constructable_cdag_v, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); - static_assert(std::is_same_v, vertex_idx_t>, - "Graph_t and Constr_Graph_t must have the same vertex_idx types"); + static_assert(isComputationalDagV, "Graph must be a computational DAG"); + static_assert(isComputationalDagV, "ConstrGraphT must be a computational DAG"); + static_assert(isConstructableCdagV, "ConstrGraphT must satisfy the constructable_cdag_vertex concept"); + static_assert(std::is_same_v, VertexIdxT>, + "GraphT and ConstrGraphT must have the same VertexIdx types"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - static constexpr bool verbose = false; + static constexpr bool verbose_ = false; // Represents a group of isomorphic subgraphs, corresponding to a single node in a coarse graph. struct Group { // Each vector of vertices represents one of the isomorphic subgraphs in this group. - std::vector> subgraphs; + std::vector> subgraphs_; - inline size_t size() const { return subgraphs.size(); } + inline size_t size() const { return subgraphs_.size(); } }; private: // Results from the first (orbit) coarsening step - Constr_Graph_t coarse_graph_; - std::vector contraction_map_; + ConstrGraphT coarseGraph_; + std::vector contractionMap_; // Results from the second (custom) coarsening step - Constr_Graph_t final_coarse_graph_; - std::vector final_contraction_map_; - std::vector final_groups_; - size_t current_symmetry; + ConstrGraphT finalCoarseGraph_; + std::vector finalContractionMap_; + std::vector finalGroups_; + size_t currentSymmetry_; - size_t min_symmetry_ = 2; // min symmetry threshold - v_workw_t work_threshold_ = 0; - v_workw_t critical_path_threshold_ = 0; - bool merge_different_node_types_ = true; - double lock_orbit_ratio = 0.5; + size_t minSymmetry_ = 2; // min symmetry threshold + VWorkwT workThreshold_ = 0; + VWorkwT criticalPathThreshold_ = 0; + bool mergeDifferentNodeTypes_ = true; + double lockOrbitRatio_ = 0.5; - SymmetryLevelHeuristic symmetry_level_heuristic_ = SymmetryLevelHeuristic::NATURAL_BREAKS; - std::vector work_percentiles_ = {0.50, 0.75}; - double natural_breaks_count_percentage_ = 0.2; + SymmetryLevelHeuristic symmetryLevelHeuristic_ = SymmetryLevelHeuristic::NATURAL_BREAKS; + std::vector workPercentiles_ = {0.50, 0.75}; + double naturalBreaksCountPercentage_ = 0.2; - bool use_adaptive_symmetry_threshold_ = true; + bool useAdaptiveSymmetryThreshold_ = true; struct PairHasher { template std::size_t operator()(const std::pair &p) const { auto h1 = std::hash{}(p.first); auto h2 = std::hash{}(p.second); - return h1 ^ (h2 << 1); + HashCombine(h1, h2); + return h1; } }; - std::unordered_set, PairHasher> non_viable_edges_cache_; - std::unordered_set, PairHasher> non_viable_crit_path_edges_cache_; + std::unordered_set, PairHasher> nonViableEdgesCache_; + std::unordered_set, PairHasher> nonViableCritPathEdgesCache_; /** * @brief Simulates the merge of node v into u and returns the resulting temporary graph. */ - std::pair> simulate_merge(VertexType u, - VertexType v, - const Constr_Graph_t ¤t_coarse_graph) const { - std::vector temp_contraction_map(current_coarse_graph.num_vertices()); - VertexType new_idx = 0; - for (VertexType i = 0; i < static_cast(temp_contraction_map.size()); ++i) { + std::pair> SimulateMerge(VertexType u, + VertexType v, + const ConstrGraphT ¤tCoarseGraph) const { + std::vector tempContractionMap(currentCoarseGraph.NumVertices()); + VertexType newIdx = 0; + for (VertexType i = 0; i < static_cast(tempContractionMap.size()); ++i) { if (i != v) { - temp_contraction_map[i] = new_idx++; + tempContractionMap[i] = newIdx++; } } - temp_contraction_map[v] = temp_contraction_map[u]; + tempContractionMap[v] = tempContractionMap[u]; - Constr_Graph_t temp_coarse_graph; - coarser_util::construct_coarse_dag(current_coarse_graph, temp_coarse_graph, temp_contraction_map); + ConstrGraphT tempCoarseGraph; + coarser_util::ConstructCoarseDag(currentCoarseGraph, tempCoarseGraph, tempContractionMap); - return {std::move(temp_coarse_graph), std::move(temp_contraction_map)}; + return {std::move(tempCoarseGraph), std::move(tempContractionMap)}; } /** * @brief Commits a merge operation by updating the graph state. */ - void commit_merge(VertexType u, - VertexType v, - Constr_Graph_t &&next_coarse_graph, - const std::vector &group_remap, - std::vector> &&new_subgraphs, - Constr_Graph_t ¤t_coarse_graph, - std::vector ¤t_groups, - std::vector ¤t_contraction_map) { - current_coarse_graph = std::move(next_coarse_graph); + void CommitMerge(VertexType u, + VertexType v, + ConstrGraphT &&nextCoarseGraph, + const std::vector &groupRemap, + std::vector> &&newSubgraphs, + ConstrGraphT ¤tCoarseGraph, + std::vector ¤tGroups, + std::vector ¤tContractionMap) { + currentCoarseGraph = std::move(nextCoarseGraph); // Update caches for new vertex indices - std::unordered_set, PairHasher> next_non_viable_edges; - for (const auto &non_viable_edge : non_viable_edges_cache_) { - const VertexType old_u = non_viable_edge.first; - const VertexType old_v = non_viable_edge.second; - const VertexType new_u = group_remap[old_u]; - const VertexType new_v = group_remap[old_v]; - - if (old_u != v && old_v != v && new_u != new_v) { - next_non_viable_edges.insert({new_u, new_v}); + std::unordered_set, PairHasher> nextNonViableEdges; + for (const auto &nonViableEdge : nonViableEdgesCache_) { + const VertexType oldU = nonViableEdge.first; + const VertexType oldV = nonViableEdge.second; + const VertexType newU = groupRemap[oldU]; + const VertexType newV = groupRemap[oldV]; + + if (oldU != v && oldV != v && newU != newV) { + nextNonViableEdges.insert({newU, newV}); } } - non_viable_edges_cache_ = std::move(next_non_viable_edges); + nonViableEdgesCache_ = std::move(nextNonViableEdges); - std::unordered_set, PairHasher> next_non_viable_crit_path_edges; - for (const auto &non_viable_edge : non_viable_crit_path_edges_cache_) { - const VertexType old_u = non_viable_edge.first; - const VertexType old_v = non_viable_edge.second; - const VertexType new_u = group_remap[old_u]; - const VertexType new_v = group_remap[old_v]; + std::unordered_set, PairHasher> nextNonViableCritPathEdges; + for (const auto &nonViableEdge : nonViableCritPathEdgesCache_) { + const VertexType oldU = nonViableEdge.first; + const VertexType oldV = nonViableEdge.second; + const VertexType newU = groupRemap[oldU]; + const VertexType newV = groupRemap[oldV]; - if (old_u != v && old_v != v && new_u != new_v) { - next_non_viable_crit_path_edges.insert({new_u, new_v}); + if (oldU != v && oldV != v && newU != newV) { + nextNonViableCritPathEdges.insert({newU, newV}); } } - non_viable_crit_path_edges_cache_ = std::move(next_non_viable_crit_path_edges); + nonViableCritPathEdgesCache_ = std::move(nextNonViableCritPathEdges); // Update groups - std::vector next_groups(current_coarse_graph.num_vertices()); - for (VertexType i = 0; i < static_cast(current_groups.size()); ++i) { + std::vector nextGroups(currentCoarseGraph.NumVertices()); + for (VertexType i = 0; i < static_cast(currentGroups.size()); ++i) { if (i != u && i != v) { - next_groups[group_remap[i]] = std::move(current_groups[i]); + nextGroups[groupRemap[i]] = std::move(currentGroups[i]); } } - next_groups[group_remap[u]].subgraphs = std::move(new_subgraphs); - current_groups = std::move(next_groups); + nextGroups[groupRemap[u]].subgraphs_ = std::move(newSubgraphs); + currentGroups = std::move(nextGroups); // Update main contraction map - for (VertexType &node_map : current_contraction_map) { - node_map = group_remap[node_map]; + for (VertexType &nodeMap : currentContractionMap) { + nodeMap = groupRemap[nodeMap]; } } /** * @brief Merges small orbits based on work threshold (final cleanup pass). */ - void merge_small_orbits(const Graph_t &original_dag, - Constr_Graph_t ¤t_coarse_graph, - std::vector ¤t_groups, - std::vector ¤t_contraction_map, - const v_workw_t work_threshold, - const v_workw_t path_threshold = 0) { + void MergeSmallOrbits(const GraphT &originalDag, + ConstrGraphT ¤tCoarseGraph, + std::vector ¤tGroups, + std::vector ¤tContractionMap, + const VWorkwT workThreshold, + const VWorkwT pathThreshold = 0) { bool changed = true; while (changed) { - const std::vector> vertexPoset - = get_top_node_distance>(current_coarse_graph); - const std::vector> vertexBotPoset - = get_bottom_node_distance>(current_coarse_graph); + const std::vector> vertexPoset + = GetTopNodeDistance>(currentCoarseGraph); + const std::vector> vertexBotPoset + = GetBottomNodeDistance>(currentCoarseGraph); changed = false; - for (const auto u : current_coarse_graph.vertices()) { - for (const auto v : current_coarse_graph.children(u)) { - if constexpr (has_typed_vertices_v) { - if (not merge_different_node_types_) { - if (current_coarse_graph.vertex_type(u) != current_coarse_graph.vertex_type(v)) { - if constexpr (verbose) { + for (const auto u : currentCoarseGraph.Vertices()) { + for (const auto v : currentCoarseGraph.Children(u)) { + if constexpr (hasTypedVerticesV) { + if (not mergeDifferentNodeTypes_) { + if (currentCoarseGraph.VertexType(u) != currentCoarseGraph.VertexType(v)) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " not viable (different node types)\n"; } continue; @@ -223,29 +224,29 @@ class OrbitGraphProcessor { } } - if (non_viable_edges_cache_.count({u, v}) || non_viable_crit_path_edges_cache_.count({u, v})) { - if constexpr (verbose) { + if (nonViableEdgesCache_.count({u, v}) || nonViableCritPathEdgesCache_.count({u, v})) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " already checked. Skipping.\n"; } continue; } - const v_workw_t u_work_weight = current_coarse_graph.vertex_work_weight(u); - const v_workw_t v_work_weight = current_coarse_graph.vertex_work_weight(v); - const v_workw_t v_threshold - = work_threshold * static_cast>(current_groups[v].size()); - const v_workw_t u_threshold - = work_threshold * static_cast>(current_groups[u].size()); + const VWorkwT uWorkWeight = currentCoarseGraph.VertexWorkWeight(u); + const VWorkwT vWorkWeight = currentCoarseGraph.VertexWorkWeight(v); + const VWorkwT vThreshold + = workThreshold * static_cast>(currentGroups[v].size()); + const VWorkwT uThreshold + = workThreshold * static_cast>(currentGroups[u].size()); - if (u_work_weight > u_threshold && v_work_weight > v_threshold) { - if constexpr (verbose) { + if (uWorkWeight > uThreshold && vWorkWeight > vThreshold) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " not viable (work threshold)\n"; } continue; } if ((vertexPoset[u] + 1 != vertexPoset[v]) && (vertexBotPoset[u] != 1 + vertexBotPoset[v])) { - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " not viable poset. poste v: " << vertexBotPoset[v] << " poste u: " << vertexBotPoset[u] << "\n"; @@ -253,46 +254,46 @@ class OrbitGraphProcessor { continue; } - std::vector> new_subgraphs; - const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs); + std::vector> newSubgraphs; + const bool mergeIsValid = IsMergeViable(originalDag, currentGroups[u], currentGroups[v], newSubgraphs); - if (!merge_is_valid) { - if constexpr (verbose) { + if (!mergeIsValid) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " and " << v << " not viable (error in is_merge_viable)\n"; } - non_viable_edges_cache_.insert({u, v}); + nonViableEdgesCache_.insert({u, v}); continue; } - auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph); + auto [tempCoarseGraph, tempContractionMap] = SimulateMerge(u, v, currentCoarseGraph); - if (critical_path_weight(temp_coarse_graph) - > (path_threshold * static_cast>(new_subgraphs.size()) - + critical_path_weight(current_coarse_graph))) { - if constexpr (verbose) { - std::cout << " - Merge of " << u << " and " << v << " increases critical path. Old cirtical path: " - << critical_path_weight(current_coarse_graph) - << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + " - << path_threshold * static_cast>(new_subgraphs.size()) << "\n"; + if (CriticalPathWeight(tempCoarseGraph) + > (pathThreshold * static_cast>(newSubgraphs.size()) + + CriticalPathWeight(currentCoarseGraph))) { + if constexpr (verbose_) { + std::cout << " - Merge of " << u << " and " << v + << " increases critical path. Old cirtical path: " << CriticalPathWeight(currentCoarseGraph) + << " new critical path: " << CriticalPathWeight(tempCoarseGraph) << " + " + << pathThreshold * static_cast>(newSubgraphs.size()) << "\n"; } - non_viable_crit_path_edges_cache_.insert({u, v}); + nonViableCritPathEdgesCache_.insert({u, v}); continue; } - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << " - Merging " << v << " into " << u << ". New coarse graph has " - << temp_coarse_graph.num_vertices() << " nodes.\n"; + << tempCoarseGraph.NumVertices() << " nodes.\n"; } - commit_merge(u, - v, - std::move(temp_coarse_graph), - temp_contraction_map, - std::move(new_subgraphs), - current_coarse_graph, - current_groups, - current_contraction_map); + CommitMerge(u, + v, + std::move(tempCoarseGraph), + tempContractionMap, + std::move(newSubgraphs), + currentCoarseGraph, + currentGroups, + currentContractionMap); changed = true; break; @@ -307,31 +308,31 @@ class OrbitGraphProcessor { /** * @brief Deprecated non-adaptive merge function. */ - void contract_edges(const Graph_t &original_dag, - Constr_Graph_t ¤t_coarse_graph, - std::vector ¤t_groups, - std::vector ¤t_contraction_map, - const bool merge_symmetry_narrowing, - const bool merge_different_node_types, - const v_workw_t path_threshold = 0) { + void ContractEdges(const GraphT &originalDag, + ConstrGraphT ¤tCoarseGraph, + std::vector ¤tGroups, + std::vector ¤tContractionMap, + const bool mergeSymmetryNarrowing, + const bool mergeDifferentNodeTypes, + const VWorkwT pathThreshold = 0) { bool changed = true; while (changed) { - const std::vector> vertexPoset - = get_top_node_distance>(current_coarse_graph); - const std::vector> vertexBotPoset - = get_bottom_node_distance>(current_coarse_graph); + const std::vector> vertexPoset + = GetTopNodeDistance>(currentCoarseGraph); + const std::vector> vertexBotPoset + = GetBottomNodeDistance>(currentCoarseGraph); changed = false; - for (const auto &edge : edges(current_coarse_graph)) { - VertexType u = source(edge, current_coarse_graph); - VertexType v = target(edge, current_coarse_graph); + for (const auto &edge : Edges(currentCoarseGraph)) { + VertexType u = Source(edge, currentCoarseGraph); + VertexType v = Target(edge, currentCoarseGraph); - if (non_viable_edges_cache_.count({u, v}) || non_viable_crit_path_edges_cache_.count({u, v})) { + if (nonViableEdgesCache_.count({u, v}) || nonViableCritPathEdgesCache_.count({u, v})) { continue; } - if constexpr (has_typed_vertices_v) { - if (not merge_different_node_types) { - if (current_coarse_graph.vertex_type(u) != current_coarse_graph.vertex_type(v)) { + if constexpr (hasTypedVerticesV) { + if (not mergeDifferentNodeTypes) { + if (currentCoarseGraph.VertexType(u) != currentCoarseGraph.VertexType(v)) { continue; } } @@ -340,46 +341,45 @@ class OrbitGraphProcessor { continue; } - std::vector> new_subgraphs; - const std::size_t u_size = current_groups[u].size(); - const std::size_t v_size = current_groups[v].size(); - const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs); - const std::size_t new_size = new_subgraphs.size(); + std::vector> newSubgraphs; + const std::size_t uSize = currentGroups[u].size(); + const std::size_t vSize = currentGroups[v].size(); + const bool mergeIsValid = IsMergeViable(originalDag, currentGroups[u], currentGroups[v], newSubgraphs); + const std::size_t newSize = newSubgraphs.size(); - const bool merge_viable = (new_size >= current_symmetry); - const bool both_below_symmetry_threshold = (u_size < current_symmetry) && (v_size < current_symmetry); + const bool mergeViable = (newSize >= currentSymmetry_); + const bool bothBelowSymmetryThreshold = (uSize < currentSymmetry_) && (vSize < currentSymmetry_); - if (!merge_is_valid) { - non_viable_edges_cache_.insert({u, v}); + if (!mergeIsValid) { + nonViableEdgesCache_.insert({u, v}); continue; } - if (!merge_viable && !both_below_symmetry_threshold) { - non_viable_edges_cache_.insert({u, v}); + if (!mergeViable && !bothBelowSymmetryThreshold) { + nonViableEdgesCache_.insert({u, v}); continue; } - if (not merge_symmetry_narrowing) { - if (new_size < std::min(u_size, v_size)) { + if (not mergeSymmetryNarrowing) { + if (newSize < std::min(uSize, vSize)) { continue; } } - auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph); + auto [tempCoarseGraph, tempContractionMap] = SimulateMerge(u, v, currentCoarseGraph); - if (critical_path_weight(temp_coarse_graph) - > (path_threshold * static_cast>(new_subgraphs.size()) - + critical_path_weight(current_coarse_graph))) { - non_viable_crit_path_edges_cache_.insert({u, v}); + if (CriticalPathWeight(tempCoarseGraph) > (pathThreshold * static_cast>(newSubgraphs.size()) + + CriticalPathWeight(currentCoarseGraph))) { + nonViableCritPathEdgesCache_.insert({u, v}); continue; } - commit_merge(u, - v, - std::move(temp_coarse_graph), - temp_contraction_map, - std::move(new_subgraphs), - current_coarse_graph, - current_groups, - current_contraction_map); + CommitMerge(u, + v, + std::move(tempCoarseGraph), + tempContractionMap, + std::move(newSubgraphs), + currentCoarseGraph, + currentGroups, + currentContractionMap); changed = true; break; } @@ -389,37 +389,37 @@ class OrbitGraphProcessor { /** * @brief Core adaptive merging function. */ - void contract_edges_adpative_sym(const Graph_t &original_dag, - Constr_Graph_t ¤t_coarse_graph, - std::vector ¤t_groups, - std::vector ¤t_contraction_map, - const bool merge_different_node_types, - const bool merge_below_threshold, - const std::vector> &lock_threshold_per_type, - const v_workw_t path_threshold = 0) { + void ContractEdgesAdpativeSym(const GraphT &originalDag, + ConstrGraphT ¤tCoarseGraph, + std::vector ¤tGroups, + std::vector ¤tContractionMap, + const bool mergeDifferentNodeTypes, + const bool mergeBelowThreshold, + const std::vector> &lockThresholdPerType, + const VWorkwT pathThreshold = 0) { bool changed = true; while (changed) { - const std::vector> vertexPoset - = get_top_node_distance>(current_coarse_graph); - const std::vector> vertexBotPoset - = get_bottom_node_distance>(current_coarse_graph); + const std::vector> vertexPoset + = GetTopNodeDistance>(currentCoarseGraph); + const std::vector> vertexBotPoset + = GetBottomNodeDistance>(currentCoarseGraph); changed = false; - for (const auto &edge : edges(current_coarse_graph)) { - VertexType u = source(edge, current_coarse_graph); - VertexType v = target(edge, current_coarse_graph); + for (const auto &edge : Edges(currentCoarseGraph)) { + VertexType u = Source(edge, currentCoarseGraph); + VertexType v = Target(edge, currentCoarseGraph); - if (non_viable_edges_cache_.count({u, v}) || non_viable_crit_path_edges_cache_.count({u, v})) { - if constexpr (verbose) { + if (nonViableEdgesCache_.count({u, v}) || nonViableCritPathEdgesCache_.count({u, v})) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " already checked. Skipping.\n"; } continue; } - if constexpr (has_typed_vertices_v) { - if (not merge_different_node_types) { - if (current_coarse_graph.vertex_type(u) != current_coarse_graph.vertex_type(v)) { - if constexpr (verbose) { + if constexpr (hasTypedVerticesV) { + if (not mergeDifferentNodeTypes) { + if (currentCoarseGraph.VertexType(u) != currentCoarseGraph.VertexType(v)) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " not viable (different node types)\n"; } continue; @@ -428,114 +428,112 @@ class OrbitGraphProcessor { } if ((vertexPoset[u] + 1 != vertexPoset[v]) && (vertexBotPoset[u] != 1 + vertexBotPoset[v])) { - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " not viable poset. poste v: " << vertexBotPoset[v] << " poste u: " << vertexBotPoset[u] << "\n"; } continue; } - std::vector> new_subgraphs; - const std::size_t u_size = current_groups[u].size(); - const std::size_t v_size = current_groups[v].size(); + std::vector> newSubgraphs; + const std::size_t uSize = currentGroups[u].size(); + const std::size_t vSize = currentGroups[v].size(); - const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs); - const std::size_t new_size = new_subgraphs.size(); + const bool mergeIsValid = IsMergeViable(originalDag, currentGroups[u], currentGroups[v], newSubgraphs); + const std::size_t newSize = newSubgraphs.size(); - if (!merge_is_valid) { - if constexpr (verbose) { + if (!mergeIsValid) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " and " << v << " not viable (error in is_merge_viable)\n"; } - non_viable_edges_cache_.insert({u, v}); + nonViableEdgesCache_.insert({u, v}); continue; } - const bool merge_viable = (new_size >= current_symmetry); - const bool both_below_minimal_threshold = merge_below_threshold && (u_size < min_symmetry_) - && (v_size < min_symmetry_); + const bool mergeViable = (newSize >= currentSymmetry_); + const bool bothBelowMinimalThreshold = mergeBelowThreshold && (uSize < minSymmetry_) && (vSize < minSymmetry_); - if (!merge_viable && !both_below_minimal_threshold) { - if constexpr (verbose) { + if (!mergeViable && !bothBelowMinimalThreshold) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " not viable (Symmetry Threshold)\n"; - std::cout << " - u_sym: " << u_size << ", v_sym: " << v_size << " -> new_sym: " << new_size - << " (current_threshold: " << current_symmetry << ", global_min_threshold: " << min_symmetry_ + std::cout << " - u_sym: " << uSize << ", v_sym: " << vSize << " -> new_sym: " << newSize + << " (current_threshold: " << currentSymmetry_ << ", global_min_threshold: " << minSymmetry_ << ")\n"; } - non_viable_edges_cache_.insert({u, v}); + nonViableEdgesCache_.insert({u, v}); continue; } - v_type_t u_type = 0; - v_type_t v_type = 0; - if (not merge_different_node_types && has_typed_vertices_v) { - u_type = current_coarse_graph.vertex_type(u); - v_type = current_coarse_graph.vertex_type(v); + VTypeT uType = 0; + VTypeT vType = 0; + if (not mergeDifferentNodeTypes && hasTypedVerticesV) { + uType = currentCoarseGraph.VertexType(u); + vType = currentCoarseGraph.VertexType(v); } - const bool u_is_significant = (u_size >= min_symmetry_) - && (current_coarse_graph.vertex_work_weight(u) > lock_threshold_per_type[u_type]); - const bool v_is_significant = (v_size >= min_symmetry_) - && (current_coarse_graph.vertex_work_weight(v) > lock_threshold_per_type[v_type]); + const bool uIsSignificant = (uSize >= minSymmetry_) + && (currentCoarseGraph.VertexWorkWeight(u) > lockThresholdPerType[uType]); + const bool vIsSignificant = (vSize >= minSymmetry_) + && (currentCoarseGraph.VertexWorkWeight(v) > lockThresholdPerType[vType]); - if (u_is_significant && v_is_significant) { + if (uIsSignificant && vIsSignificant) { // Both are significant --- - if (new_size < std::min(u_size, v_size)) { - if constexpr (verbose) { + if (newSize < std::min(uSize, vSize)) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " not viable (Symmetry Narrowing below min of two significant nodes)\n"; - std::cout << " - u_sym: " << u_size << ", v_sym: " << v_size << " -> new_sym: " << new_size << "\n"; + std::cout << " - u_sym: " << uSize << ", v_sym: " << vSize << " -> new_sym: " << newSize << "\n"; } - non_viable_edges_cache_.insert({u, v}); + nonViableEdgesCache_.insert({u, v}); continue; } - } else if (u_is_significant || v_is_significant) { + } else if (uIsSignificant || vIsSignificant) { // Exactly one is significant --- - const std::size_t significant_node_size = u_is_significant ? u_size : v_size; + const std::size_t significantNodeSize = uIsSignificant ? uSize : vSize; - if (new_size < significant_node_size) { - if constexpr (verbose) { + if (newSize < significantNodeSize) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v << " not viable (Symmetry Narrowing of a single significant node)\n"; - std::cout << " - u_sym: " << u_size << " (sig: " << u_is_significant << ")" - << ", v_sym: " << v_size << " (sig: " << v_is_significant << ")" - << " -> new_sym: " << new_size << "\n"; + std::cout << " - u_sym: " << uSize << " (sig: " << uIsSignificant << ")" + << ", v_sym: " << vSize << " (sig: " << vIsSignificant << ")" + << " -> new_sym: " << newSize << "\n"; } - non_viable_edges_cache_.insert({u, v}); + nonViableEdgesCache_.insert({u, v}); continue; } } // Critical Path Check - auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph); + auto [tempCoarseGraph, tempContractionMap] = SimulateMerge(u, v, currentCoarseGraph); - if (critical_path_weight(temp_coarse_graph) - > (path_threshold * static_cast>(new_subgraphs.size()) - + critical_path_weight(current_coarse_graph))) { - if constexpr (verbose) { + if (CriticalPathWeight(tempCoarseGraph) > (pathThreshold * static_cast>(newSubgraphs.size()) + + CriticalPathWeight(currentCoarseGraph))) { + if constexpr (verbose_) { std::cout << " - Merge of " << u << " and " << v - << " increases critical path. Old cirtical path: " << critical_path_weight(current_coarse_graph) - << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + " - << path_threshold * static_cast>(new_subgraphs.size()) << "\n"; + << " increases critical path. Old cirtical path: " << CriticalPathWeight(currentCoarseGraph) + << " new critical path: " << CriticalPathWeight(tempCoarseGraph) << " + " + << pathThreshold * static_cast>(newSubgraphs.size()) << "\n"; } - non_viable_crit_path_edges_cache_.insert({u, v}); + nonViableCritPathEdgesCache_.insert({u, v}); continue; } // Commit Merge - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << " - Merging " << v << " into " << u << ". New coarse graph has " - << temp_coarse_graph.num_vertices() << " nodes.\n"; + << tempCoarseGraph.NumVertices() << " nodes.\n"; } - commit_merge(u, - v, - std::move(temp_coarse_graph), - temp_contraction_map, - std::move(new_subgraphs), - current_coarse_graph, - current_groups, - current_contraction_map); + CommitMerge(u, + v, + std::move(tempCoarseGraph), + tempContractionMap, + std::move(newSubgraphs), + currentCoarseGraph, + currentGroups, + currentContractionMap); changed = true; break; @@ -546,121 +544,118 @@ class OrbitGraphProcessor { public: explicit OrbitGraphProcessor() {} - void setMergeDifferentNodeTypes(bool flag) { merge_different_node_types_ = flag; } + void SetMergeDifferentNodeTypes(bool flag) { mergeDifferentNodeTypes_ = flag; } - void set_work_threshold(v_workw_t work_threshold) { work_threshold_ = work_threshold; } + void SetWorkThreshold(VWorkwT workThreshold) { workThreshold_ = workThreshold; } - void setCriticalPathThreshold(v_workw_t critical_path_threshold) { - critical_path_threshold_ = critical_path_threshold; - } + void SetCriticalPathThreshold(VWorkwT criticalPathThreshold) { criticalPathThreshold_ = criticalPathThreshold; } - void setLockRatio(double lock_ratio) { lock_orbit_ratio = lock_ratio; } + void SetLockRatio(double lockRatio) { lockOrbitRatio_ = lockRatio; } - void setSymmetryLevelHeuristic(SymmetryLevelHeuristic heuristic) { symmetry_level_heuristic_ = heuristic; } + void SetSymmetryLevelHeuristic(SymmetryLevelHeuristic heuristic) { symmetryLevelHeuristic_ = heuristic; } - void setWorkPercentiles(const std::vector &percentiles) { - work_percentiles_ = percentiles; - std::sort(work_percentiles_.begin(), work_percentiles_.end()); + void SetWorkPercentiles(const std::vector &percentiles) { + workPercentiles_ = percentiles; + std::sort(workPercentiles_.begin(), workPercentiles_.end()); } - void setUseStaticSymmetryLevel(size_t static_symmetry_level) { - symmetry_level_heuristic_ = SymmetryLevelHeuristic::NATURAL_BREAKS; - use_adaptive_symmetry_threshold_ = false; - current_symmetry = static_symmetry_level; + void SetUseStaticSymmetryLevel(size_t staticSymmetryLevel) { + symmetryLevelHeuristic_ = SymmetryLevelHeuristic::NATURAL_BREAKS; + useAdaptiveSymmetryThreshold_ = false; + currentSymmetry_ = staticSymmetryLevel; } - void setNaturalBreaksCountPercentage(double percentage) { natural_breaks_count_percentage_ = percentage; } + void SetNaturalBreaksCountPercentage(double percentage) { naturalBreaksCountPercentage_ = percentage; } /** * @brief Discovers isomorphic groups (orbits) and constructs a coarse graph. */ - void discover_isomorphic_groups(const Graph_t &dag, const HashComputer &hasher) { - coarse_graph_ = Constr_Graph_t(); - contraction_map_.clear(); - final_coarse_graph_ = Constr_Graph_t(); - final_contraction_map_.clear(); - final_groups_.clear(); - non_viable_edges_cache_.clear(); - non_viable_crit_path_edges_cache_.clear(); - - if (dag.num_vertices() == 0) { + void DiscoverIsomorphicGroups(const GraphT &dag, const HashComputer &hasher) { + coarseGraph_ = ConstrGraphT(); + contractionMap_.clear(); + finalCoarseGraph_ = ConstrGraphT(); + finalContractionMap_.clear(); + finalGroups_.clear(); + nonViableEdgesCache_.clear(); + nonViableCritPathEdgesCache_.clear(); + + if (dag.NumVertices() == 0) { return; } - const auto &orbits = hasher.get_orbits(); + const auto &orbits = hasher.GetOrbits(); - contraction_map_.assign(dag.num_vertices(), 0); - VertexType coarse_node_idx = 0; + contractionMap_.assign(dag.NumVertices(), 0); + VertexType coarseNodeIdx = 0; - for (const auto &hash_vertices_pair : orbits) { - const auto &vertices = hash_vertices_pair.second; + for (const auto &hashVerticesPair : orbits) { + const auto &vertices = hashVerticesPair.second; for (const auto v : vertices) { - contraction_map_[v] = coarse_node_idx; + contractionMap_[v] = coarseNodeIdx; } - coarse_node_idx++; + coarseNodeIdx++; } - std::vector> work_per_vertex_type; - work_per_vertex_type.resize(merge_different_node_types_ ? 1U : dag.num_vertex_types(), 0); + std::vector> workPerVertexType; + workPerVertexType.resize(mergeDifferentNodeTypes_ ? 1U : dag.NumVertexTypes(), 0); - std::map orbit_size_counts; - std::map> work_per_orbit_size; - v_workw_t total_work = 0; + std::map orbitSizeCounts; + std::map> workPerOrbitSize; + VWorkwT totalWork = 0; for (const auto &[hash, vertices] : orbits) { - const size_t orbit_size = vertices.size(); + const size_t orbitSize = vertices.size(); - if (orbit_size == 1U) { + if (orbitSize == 1U) { continue; // exclude single node orbits from total work } - orbit_size_counts[orbit_size]++; + orbitSizeCounts[orbitSize]++; - v_workw_t orbit_work = 0; + VWorkwT orbitWork = 0; for (const auto v : vertices) { - orbit_work += dag.vertex_work_weight(v); + orbitWork += dag.VertexWorkWeight(v); } - if (not merge_different_node_types_ && has_typed_vertices_v) { - work_per_vertex_type[dag.vertex_type(vertices[0])] += orbit_work; + if (not mergeDifferentNodeTypes_ && hasTypedVerticesV) { + workPerVertexType[dag.VertexType(vertices[0])] += orbitWork; } else { - work_per_vertex_type[0] += orbit_work; + workPerVertexType[0] += orbitWork; } - work_per_orbit_size[orbit_size] += orbit_work; - total_work += orbit_work; + workPerOrbitSize[orbitSize] += orbitWork; + totalWork += orbitWork; } - std::vector> lock_threshold_per_type(work_per_vertex_type.size()); - for (size_t i = 0; i < work_per_vertex_type.size(); ++i) { - lock_threshold_per_type[i] = static_cast>(lock_orbit_ratio * work_per_vertex_type[i]); + std::vector> lockThresholdPerType(workPerVertexType.size()); + for (size_t i = 0; i < workPerVertexType.size(); ++i) { + lockThresholdPerType[i] = static_cast>(lockOrbitRatio_ * workPerVertexType[i]); } - std::vector rel_acc_work_per_orbit_size; - std::vector symmetry_levels_to_test - = compute_symmetry_levels(rel_acc_work_per_orbit_size, work_per_orbit_size, total_work, orbit_size_counts); + std::vector relAccWorkPerOrbitSize; + std::vector symmetryLevelsToTest + = ComputeSymmetryLevels(relAccWorkPerOrbitSize, workPerOrbitSize, totalWork, orbitSizeCounts); - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "\n--- Orbit Analysis ---\n"; - for (auto const &[size, count] : orbit_size_counts) { - if (total_work > 0) { + for (auto const &[size, count] : orbitSizeCounts) { + if (totalWork > 0) { std::cout << " - Orbits of size " << size << ": " << count << " groups, weight: " - << 100.0 * static_cast(work_per_orbit_size[size]) / static_cast(total_work) << "%\n"; + << 100.0 * static_cast(workPerOrbitSize[size]) / static_cast(totalWork) << "%\n"; } else { std::cout << " - Orbits of size " << size << ": " << count << " groups, weight: 0.0%\n"; } } std::cout << " Cumulative work distribution by orbit size (largest to smallest):\n"; size_t i = 0; - for (auto it = orbit_size_counts.rbegin(); it != orbit_size_counts.rend() && i < rel_acc_work_per_orbit_size.size(); - ++it, ++i) { + for (auto it = orbitSizeCounts.rbegin(); it != orbitSizeCounts.rend() && i < relAccWorkPerOrbitSize.size(); ++it, ++i) { std::cout << " - Orbits with size >= " << it->first << ": " << std::fixed << std::setprecision(2) - << rel_acc_work_per_orbit_size[i] * 100 << "%\n"; + << relAccWorkPerOrbitSize[i] * 100 << "%\n"; } std::cout << " Work distribution by vertex type:\n"; - for (size_t j = 0; j < work_per_vertex_type.size(); ++j) { - if (total_work > 0) { + for (size_t j = 0; j < workPerVertexType.size(); ++j) { + if (totalWork > 0) { std::cout << " - Vertex type " << j << ": " - << 100.0 * static_cast(work_per_vertex_type[j]) / static_cast(total_work) << "%\n"; + << 100.0 * static_cast(workPerVertexType[j]) / static_cast(totalWork) << "%\n"; } else { std::cout << " - Vertex type " << j << ": 0.0%\n"; } @@ -668,70 +663,69 @@ class OrbitGraphProcessor { std::cout << "--------------------------------\n"; std::cout << " Symmetry levels to test: " << "\n"; - for (const auto level : symmetry_levels_to_test) { + for (const auto level : symmetryLevelsToTest) { std::cout << " - " << level << "\n"; } std::cout << "--------------------------------\n"; } - coarser_util::construct_coarse_dag(dag, coarse_graph_, contraction_map_); + coarser_util::ConstructCoarseDag(dag, coarseGraph_, contractionMap_); - if (use_adaptive_symmetry_threshold_) { - perform_coarsening_adaptive_symmetry(dag, coarse_graph_, lock_threshold_per_type, symmetry_levels_to_test); + if (useAdaptiveSymmetryThreshold_) { + PerformCoarseningAdaptiveSymmetry(dag, coarseGraph_, lockThresholdPerType, symmetryLevelsToTest); } else { - size_t total_size_count = 0U; - for (const auto &[size, count] : orbit_size_counts) { - total_size_count += count; + size_t totalSizeCount = 0U; + for (const auto &[size, count] : orbitSizeCounts) { + totalSizeCount += count; } - for (const auto &[size, count] : orbit_size_counts) { - if (size == 1U || size > current_symmetry) { + for (const auto &[size, count] : orbitSizeCounts) { + if (size == 1U || size > currentSymmetry_) { continue; } - if (count > total_size_count / 2) { - if constexpr (verbose) { + if (count > totalSizeCount / 2) { + if constexpr (verbose_) { std::cout << "Setting current_symmetry to " << size << " because " << count << " orbits of size " << size << " are more than half of the total number of orbits.\n"; } - current_symmetry = size; + currentSymmetry_ = size; } } - perform_coarsening(dag, coarse_graph_); + PerformCoarsening(dag, coarseGraph_); } } private: - std::vector compute_symmetry_levels(std::vector &rel_acc_work_per_orbit_size, - const std::map> work_per_orbit_size, - const v_workw_t total_work, - const std::map orbit_size_counts) { - std::vector symmetry_levels_to_test; - min_symmetry_ = 2; - - switch (symmetry_level_heuristic_) { + std::vector ComputeSymmetryLevels(std::vector &relAccWorkPerOrbitSize, + const std::map> workPerOrbitSize, + const VWorkwT totalWork, + const std::map orbitSizeCounts) { + std::vector symmetryLevelsToTest; + minSymmetry_ = 2; + + switch (symmetryLevelHeuristic_) { case SymmetryLevelHeuristic::PERCENTILE_BASED: { - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "Using PERCENTILE_BASED heuristic for symmetry levels.\n"; } - size_t percentile_idx = 0; - v_workw_t cumulative_work = 0; - for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) { - cumulative_work += it->second; - if (total_work == 0) { + size_t percentileIdx = 0; + VWorkwT cumulativeWork = 0; + for (auto it = workPerOrbitSize.rbegin(); it != workPerOrbitSize.rend(); ++it) { + cumulativeWork += it->second; + if (totalWork == 0) { continue; // Avoid division by zero } - double current_work_ratio = static_cast(cumulative_work) / static_cast(total_work); - rel_acc_work_per_orbit_size.push_back(current_work_ratio); // For printing + double currentWorkRatio = static_cast(cumulativeWork) / static_cast(totalWork); + relAccWorkPerOrbitSize.push_back(currentWorkRatio); // For printing - if (percentile_idx < work_percentiles_.size() && current_work_ratio >= work_percentiles_[percentile_idx]) { - if (it->first > min_symmetry_) { - symmetry_levels_to_test.push_back(it->first); + if (percentileIdx < workPercentiles_.size() && currentWorkRatio >= workPercentiles_[percentileIdx]) { + if (it->first > minSymmetry_) { + symmetryLevelsToTest.push_back(it->first); } - while (percentile_idx < work_percentiles_.size() - && current_work_ratio >= work_percentiles_[percentile_idx]) { - percentile_idx++; + while (percentileIdx < workPercentiles_.size() && currentWorkRatio >= workPercentiles_[percentileIdx]) { + percentileIdx++; } } } @@ -739,70 +733,67 @@ class OrbitGraphProcessor { } case SymmetryLevelHeuristic::NATURAL_BREAKS: { - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "Using NATURAL_BREAKS heuristic for symmetry levels.\n"; } - size_t total_orbit_groups = 0; - for (const auto &[size, count] : orbit_size_counts) { - total_orbit_groups += count; + size_t totalOrbitGroups = 0; + for (const auto &[size, count] : orbitSizeCounts) { + totalOrbitGroups += count; } - size_t count_threshold - = static_cast(static_cast(total_orbit_groups) * natural_breaks_count_percentage_); - if (count_threshold == 0 && total_orbit_groups > 0) { - count_threshold = 1; // Ensure threshold is at least 1 if possible + size_t countThreshold = static_cast(static_cast(totalOrbitGroups) * naturalBreaksCountPercentage_); + if (countThreshold == 0 && totalOrbitGroups > 0) { + countThreshold = 1; // Ensure threshold is at least 1 if possible } - if constexpr (verbose) { - std::cout << " - Total orbit groups: " << total_orbit_groups << ", count threshold: " << count_threshold - << "\n"; + if constexpr (verbose_) { + std::cout << " - Total orbit groups: " << totalOrbitGroups << ", count threshold: " << countThreshold << "\n"; } - std::vector sorted_sizes; - sorted_sizes.reserve(orbit_size_counts.size()); - for (const auto &[size, count] : orbit_size_counts) { - sorted_sizes.push_back(size); + std::vector sortedSizes; + sortedSizes.reserve(orbitSizeCounts.size()); + for (const auto &[size, count] : orbitSizeCounts) { + sortedSizes.push_back(size); } - std::sort(sorted_sizes.rbegin(), sorted_sizes.rend()); // Sort descending + std::sort(sortedSizes.rbegin(), sortedSizes.rend()); // Sort descending - if (!sorted_sizes.empty()) { - for (size_t i = 0; i < sorted_sizes.size(); ++i) { - const size_t current_size = sorted_sizes[i]; - if (current_size < min_symmetry_) { + if (!sortedSizes.empty()) { + for (size_t i = 0; i < sortedSizes.size(); ++i) { + const size_t currentSize = sortedSizes[i]; + if (currentSize < minSymmetry_) { continue; } // Add if this size's count is significant - const size_t current_count = orbit_size_counts.at(current_size); - bool count_significant = (current_count >= count_threshold); + const size_t currentCount = orbitSizeCounts.at(currentSize); + bool countSignificant = (currentCount >= countThreshold); - if (count_significant) { - symmetry_levels_to_test.push_back(current_size); + if (countSignificant) { + symmetryLevelsToTest.push_back(currentSize); continue; } } } - if (symmetry_levels_to_test.empty()) { - size_t max_count = 0; - size_t size_with_max_count = 0; - for (const auto &[size, count] : orbit_size_counts) { - if (count > max_count) { - max_count = count; - size_with_max_count = size; + if (symmetryLevelsToTest.empty()) { + size_t maxCount = 0; + size_t sizeWithMaxCount = 0; + for (const auto &[size, count] : orbitSizeCounts) { + if (count > maxCount) { + maxCount = count; + sizeWithMaxCount = size; } } - if (size_with_max_count > 0) { - symmetry_levels_to_test.push_back(size_with_max_count); + if (sizeWithMaxCount > 0) { + symmetryLevelsToTest.push_back(sizeWithMaxCount); } } // Verbose print data - v_workw_t cumulative_work = 0; - for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) { - cumulative_work += it->second; - if (total_work > 0) { - rel_acc_work_per_orbit_size.push_back(static_cast(cumulative_work) - / static_cast(total_work)); + VWorkwT cumulativeWork = 0; + for (auto it = workPerOrbitSize.rbegin(); it != workPerOrbitSize.rend(); ++it) { + cumulativeWork += it->second; + if (totalWork > 0) { + relAccWorkPerOrbitSize.push_back(static_cast(cumulativeWork) / static_cast(totalWork)); } } break; @@ -810,188 +801,175 @@ class OrbitGraphProcessor { case SymmetryLevelHeuristic::CURRENT_DEFAULT: default: { - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "Using CURRENT_DEFAULT heuristic for symmetry levels.\n"; } - double threshold = lock_orbit_ratio; - v_workw_t cumulative_work = 0; - for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) { - cumulative_work += it->second; - const double rel_work - = (total_work == 0) ? 0 : static_cast(cumulative_work) / static_cast(total_work); - rel_acc_work_per_orbit_size.push_back(rel_work); // For printing - - if (rel_work >= threshold && it->first > min_symmetry_) { - symmetry_levels_to_test.push_back(it->first); - threshold += lock_orbit_ratio * 0.5; + double threshold = lockOrbitRatio_; + VWorkwT cumulativeWork = 0; + for (auto it = workPerOrbitSize.rbegin(); it != workPerOrbitSize.rend(); ++it) { + cumulativeWork += it->second; + const double relWork + = (totalWork == 0) ? 0 : static_cast(cumulativeWork) / static_cast(totalWork); + relAccWorkPerOrbitSize.push_back(relWork); // For printing + + if (relWork >= threshold && it->first > minSymmetry_) { + symmetryLevelsToTest.push_back(it->first); + threshold += lockOrbitRatio_ * 0.5; } } break; } } - if (symmetry_levels_to_test.empty()) { - symmetry_levels_to_test.push_back(2); + if (symmetryLevelsToTest.empty()) { + symmetryLevelsToTest.push_back(2); } - min_symmetry_ = symmetry_levels_to_test.back(); + minSymmetry_ = symmetryLevelsToTest.back(); // De-duplicate and sort descending - std::sort(symmetry_levels_to_test.rbegin(), symmetry_levels_to_test.rend()); - auto last = std::unique(symmetry_levels_to_test.begin(), symmetry_levels_to_test.end()); - symmetry_levels_to_test.erase(last, symmetry_levels_to_test.end()); + std::sort(symmetryLevelsToTest.rbegin(), symmetryLevelsToTest.rend()); + auto last = std::unique(symmetryLevelsToTest.begin(), symmetryLevelsToTest.end()); + symmetryLevelsToTest.erase(last, symmetryLevelsToTest.end()); - return symmetry_levels_to_test; + return symmetryLevelsToTest; } /** * @brief Non-adaptive coarsening (deprecated). */ - void perform_coarsening(const Graph_t &original_dag, const Constr_Graph_t &initial_coarse_graph) { - final_coarse_graph_ = Constr_Graph_t(); - final_contraction_map_.clear(); + void PerformCoarsening(const GraphT &originalDag, const ConstrGraphT &initialCoarseGraph) { + finalCoarseGraph_ = ConstrGraphT(); + finalContractionMap_.clear(); - if (initial_coarse_graph.num_vertices() == 0) { + if (initialCoarseGraph.NumVertices() == 0) { return; } - Constr_Graph_t current_coarse_graph = initial_coarse_graph; - std::vector current_groups(initial_coarse_graph.num_vertices()); - std::vector current_contraction_map = contraction_map_; + ConstrGraphT currentCoarseGraph = initialCoarseGraph; + std::vector currentGroups(initialCoarseGraph.NumVertices()); + std::vector currentContractionMap = contractionMap_; // Initialize groups: each group corresponds to an orbit. - for (VertexType i = 0; i < original_dag.num_vertices(); ++i) { - const VertexType coarse_node = contraction_map_[i]; - current_groups[coarse_node].subgraphs.push_back({i}); + for (VertexType i = 0; i < originalDag.NumVertices(); ++i) { + const VertexType coarseNode = contractionMap_[i]; + currentGroups[coarseNode].subgraphs_.push_back({i}); } - if constexpr (has_typed_vertices_v) { - if constexpr (verbose) { + if constexpr (hasTypedVerticesV) { + if constexpr (verbose_) { std::cout << "Attempting to merge same node types.\n"; } - contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, false, false); - contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, true, false); + ContractEdges(originalDag, currentCoarseGraph, currentGroups, currentContractionMap, false, false); + ContractEdges(originalDag, currentCoarseGraph, currentGroups, currentContractionMap, true, false); } - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "Attempting to merge different node types.\n"; } - contract_edges( - original_dag, current_coarse_graph, current_groups, current_contraction_map, false, merge_different_node_types_); - contract_edges( - original_dag, current_coarse_graph, current_groups, current_contraction_map, true, merge_different_node_types_); + ContractEdges(originalDag, currentCoarseGraph, currentGroups, currentContractionMap, false, mergeDifferentNodeTypes_); + ContractEdges(originalDag, currentCoarseGraph, currentGroups, currentContractionMap, true, mergeDifferentNodeTypes_); - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << "Attempting to merge small orbits.\n"; } - merge_small_orbits(original_dag, current_coarse_graph, current_groups, current_contraction_map, work_threshold_); + MergeSmallOrbits(originalDag, currentCoarseGraph, currentGroups, currentContractionMap, workThreshold_); - non_viable_crit_path_edges_cache_.clear(); - non_viable_edges_cache_.clear(); + nonViableCritPathEdgesCache_.clear(); + nonViableEdgesCache_.clear(); - contract_edges(original_dag, - current_coarse_graph, - current_groups, - current_contraction_map, - true, - merge_different_node_types_, - work_threshold_); + ContractEdges( + originalDag, currentCoarseGraph, currentGroups, currentContractionMap, true, mergeDifferentNodeTypes_, workThreshold_); - final_coarse_graph_ = std::move(current_coarse_graph); - final_contraction_map_ = std::move(current_contraction_map); - final_groups_ = std::move(current_groups); + finalCoarseGraph_ = std::move(currentCoarseGraph); + finalContractionMap_ = std::move(currentContractionMap); + finalGroups_ = std::move(currentGroups); - if constexpr (verbose) { - print_final_groups_summary(); + if constexpr (verbose_) { + PrintFinalGroupsSummary(); } } - void perform_coarsening_adaptive_symmetry(const Graph_t &original_dag, - const Constr_Graph_t &initial_coarse_graph, - const std::vector> &lock_threshold_per_type, - const std::vector &symmetry_levels_to_test) { - final_coarse_graph_ = Constr_Graph_t(); - final_contraction_map_.clear(); + void PerformCoarseningAdaptiveSymmetry(const GraphT &originalDag, + const ConstrGraphT &initialCoarseGraph, + const std::vector> &lockThresholdPerType, + const std::vector &symmetryLevelsToTest) { + finalCoarseGraph_ = ConstrGraphT(); + finalContractionMap_.clear(); - if (initial_coarse_graph.num_vertices() == 0) { + if (initialCoarseGraph.NumVertices() == 0) { return; } - Constr_Graph_t current_coarse_graph = initial_coarse_graph; - std::vector current_groups(initial_coarse_graph.num_vertices()); - std::vector current_contraction_map = contraction_map_; + ConstrGraphT currentCoarseGraph = initialCoarseGraph; + std::vector currentGroups(initialCoarseGraph.NumVertices()); + std::vector currentContractionMap = contractionMap_; - for (VertexType i = 0; i < original_dag.num_vertices(); ++i) { - const VertexType coarse_node = contraction_map_[i]; - current_groups[coarse_node].subgraphs.push_back({i}); + for (VertexType i = 0; i < originalDag.NumVertices(); ++i) { + const VertexType coarseNode = contractionMap_[i]; + currentGroups[coarseNode].subgraphs_.push_back({i}); } - if constexpr (verbose) { - std::cout << " Starting adaptive symmetry coarsening with critical_path_threshold: " << critical_path_threshold_ - << "\n"; + if constexpr (verbose_) { + std::cout << " Starting adaptive symmetry coarsening with critical_path_threshold: " << criticalPathThreshold_ << "\n"; } - for (const auto sym : symmetry_levels_to_test) { - current_symmetry = sym; - const bool is_last_loop = (sym == symmetry_levels_to_test.back()); - if constexpr (verbose) { - std::cout << " Current symmetry threshold: " << current_symmetry << "\n"; + for (const auto sym : symmetryLevelsToTest) { + currentSymmetry_ = sym; + const bool isLastLoop = (sym == symmetryLevelsToTest.back()); + if constexpr (verbose_) { + std::cout << " Current symmetry threshold: " << currentSymmetry_ << "\n"; } - non_viable_edges_cache_.clear(); - - contract_edges_adpative_sym(original_dag, - current_coarse_graph, - current_groups, - current_contraction_map, - false, - is_last_loop, - lock_threshold_per_type); - - if (merge_different_node_types_) { - contract_edges_adpative_sym(original_dag, - current_coarse_graph, - current_groups, - current_contraction_map, - merge_different_node_types_, - is_last_loop, - lock_threshold_per_type); + nonViableEdgesCache_.clear(); + + ContractEdgesAdpativeSym( + originalDag, currentCoarseGraph, currentGroups, currentContractionMap, false, isLastLoop, lockThresholdPerType); + + if (mergeDifferentNodeTypes_) { + ContractEdgesAdpativeSym(originalDag, + currentCoarseGraph, + currentGroups, + currentContractionMap, + mergeDifferentNodeTypes_, + isLastLoop, + lockThresholdPerType); } - non_viable_crit_path_edges_cache_.clear(); - contract_edges_adpative_sym(original_dag, - current_coarse_graph, - current_groups, - current_contraction_map, - merge_different_node_types_, - is_last_loop, - lock_threshold_per_type, - critical_path_threshold_); + nonViableCritPathEdgesCache_.clear(); + ContractEdgesAdpativeSym(originalDag, + currentCoarseGraph, + currentGroups, + currentContractionMap, + mergeDifferentNodeTypes_, + isLastLoop, + lockThresholdPerType, + criticalPathThreshold_); } - if constexpr (verbose) { - std::cout << " Merging small orbits with work threshold: " << work_threshold_ << "\n"; + if constexpr (verbose_) { + std::cout << " Merging small orbits with work threshold: " << workThreshold_ << "\n"; } - non_viable_edges_cache_.clear(); - merge_small_orbits(original_dag, current_coarse_graph, current_groups, current_contraction_map, work_threshold_); + nonViableEdgesCache_.clear(); + MergeSmallOrbits(originalDag, currentCoarseGraph, currentGroups, currentContractionMap, workThreshold_); - final_coarse_graph_ = std::move(current_coarse_graph); - final_contraction_map_ = std::move(current_contraction_map); - final_groups_ = std::move(current_groups); + finalCoarseGraph_ = std::move(currentCoarseGraph); + finalContractionMap_ = std::move(currentContractionMap); + finalGroups_ = std::move(currentGroups); - if constexpr (verbose) { - print_final_groups_summary(); + if constexpr (verbose_) { + PrintFinalGroupsSummary(); } } - void print_final_groups_summary() const { + void PrintFinalGroupsSummary() const { std::cout << "\n--- 📦 Final Groups Summary ---\n"; - std::cout << "Total final groups: " << final_groups_.size() << "\n"; - for (size_t i = 0; i < final_groups_.size(); ++i) { - const auto &group = final_groups_[i]; - std::cout << " - Group " << i << " (Size: " << group.subgraphs.size() << ")\n"; - if (!group.subgraphs.empty() && !group.subgraphs[0].empty()) { - std::cout << " - Rep. Subgraph size: " << group.subgraphs[0].size() << " nodes\n"; + std::cout << "Total final groups: " << finalGroups_.size() << "\n"; + for (size_t i = 0; i < finalGroups_.size(); ++i) { + const auto &group = finalGroups_[i]; + std::cout << " - Group " << i << " (Size: " << group.subgraphs_.size() << ")\n"; + if (!group.subgraphs_.empty() && !group.subgraphs_[0].empty()) { + std::cout << " - Rep. Subgraph size: " << group.subgraphs_[0].size() << " nodes\n"; } } std::cout << "--------------------------------\n"; @@ -1000,56 +978,56 @@ class OrbitGraphProcessor { /** * @brief Checks if merging two groups is structurally viable. */ - bool is_merge_viable(const Graph_t &original_dag, - const Group &group_u, - const Group &group_v, - std::vector> &out_new_subgraphs) const { - std::vector all_nodes; - all_nodes.reserve(group_u.subgraphs.size() * (group_u.subgraphs.empty() ? 0 : group_u.subgraphs[0].size()) - + group_v.subgraphs.size() * (group_v.subgraphs.empty() ? 0 : group_v.subgraphs[0].size())); - for (const auto &sg : group_u.subgraphs) { - all_nodes.insert(all_nodes.end(), sg.begin(), sg.end()); + bool IsMergeViable(const GraphT &originalDag, + const Group &groupU, + const Group &groupV, + std::vector> &outNewSubgraphs) const { + std::vector allNodes; + allNodes.reserve(groupU.subgraphs_.size() * (groupU.subgraphs_.empty() ? 0 : groupU.subgraphs_[0].size()) + + groupV.subgraphs_.size() * (groupV.subgraphs_.empty() ? 0 : groupV.subgraphs_[0].size())); + for (const auto &sg : groupU.subgraphs_) { + allNodes.insert(allNodes.end(), sg.begin(), sg.end()); } - for (const auto &sg : group_v.subgraphs) { - all_nodes.insert(all_nodes.end(), sg.begin(), sg.end()); + for (const auto &sg : groupV.subgraphs_) { + allNodes.insert(allNodes.end(), sg.begin(), sg.end()); } assert([&]() { - std::vector temp_nodes_for_check = all_nodes; - std::sort(temp_nodes_for_check.begin(), temp_nodes_for_check.end()); - return std::unique(temp_nodes_for_check.begin(), temp_nodes_for_check.end()) == temp_nodes_for_check.end(); + std::vector tempNodesForCheck = allNodes; + std::sort(tempNodesForCheck.begin(), tempNodesForCheck.end()); + return std::unique(tempNodesForCheck.begin(), tempNodesForCheck.end()) == tempNodesForCheck.end(); }() && "Assumption failed: Vertices in groups being merged are not disjoint."); - std::sort(all_nodes.begin(), all_nodes.end()); + std::sort(allNodes.begin(), allNodes.end()); - Constr_Graph_t induced_subgraph; + ConstrGraphT inducedSubgraph; - auto map = create_induced_subgraph_map(original_dag, induced_subgraph, all_nodes); + auto map = CreateInducedSubgraphMap(originalDag, inducedSubgraph, allNodes); std::vector components; // local -> component_id - size_t num_components = compute_weakly_connected_components(induced_subgraph, components); - out_new_subgraphs.assign(num_components, std::vector()); + size_t numComponents = ComputeWeaklyConnectedComponents(inducedSubgraph, components); + outNewSubgraphs.assign(numComponents, std::vector()); - if (all_nodes.empty()) { // Handle empty graph case + if (allNodes.empty()) { // Handle empty graph case return true; } - for (const auto &node : all_nodes) { - out_new_subgraphs[components[map[node]]].push_back(node); + for (const auto &node : allNodes) { + outNewSubgraphs[components[map[node]]].push_back(node); } - if (num_components > 1) { - const size_t first_sg_size = out_new_subgraphs[0].size(); - Constr_Graph_t rep_sg; - create_induced_subgraph(original_dag, rep_sg, out_new_subgraphs[0]); + if (numComponents > 1) { + const size_t firstSgSize = outNewSubgraphs[0].size(); + ConstrGraphT repSg; + CreateInducedSubgraph(originalDag, repSg, outNewSubgraphs[0]); - for (size_t i = 1; i < num_components; ++i) { - if (out_new_subgraphs[i].size() != first_sg_size) { + for (size_t i = 1; i < numComponents; ++i) { + if (outNewSubgraphs[i].size() != firstSgSize) { return false; } - Constr_Graph_t current_sg; - create_induced_subgraph(original_dag, current_sg, out_new_subgraphs[i]); - if (!are_isomorphic_by_merkle_hash(rep_sg, current_sg)) { + ConstrGraphT currentSg; + CreateInducedSubgraph(originalDag, currentSg, outNewSubgraphs[i]); + if (!AreIsomorphicByMerkleHash(repSg, currentSg)) { return false; } } @@ -1058,15 +1036,15 @@ class OrbitGraphProcessor { } public: - const Graph_t &get_coarse_graph() const { return coarse_graph_; } + const GraphT &GetCoarseGraph() const { return coarseGraph_; } - const std::vector &get_contraction_map() const { return contraction_map_; } + const std::vector &GetContractionMap() const { return contractionMap_; } - const Graph_t &get_final_coarse_graph() const { return final_coarse_graph_; } + const GraphT &GetFinalCoarseGraph() const { return finalCoarseGraph_; } - const std::vector &get_final_contraction_map() const { return final_contraction_map_; } + const std::vector &GetFinalContractionMap() const { return finalContractionMap_; } - const std::vector &get_final_groups() const { return final_groups_; } + const std::vector &GetFinalGroups() const { return finalGroups_; } }; } // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp index 391c5819..be77a080 100644 --- a/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp +++ b/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp @@ -36,8 +36,8 @@ namespace osp { */ template class PrecomputedHashComputer : public HashComputer { - std::vector vertex_hashes; - std::unordered_map> orbits; + std::vector vertexHashes_; + std::unordered_map> orbits_; public: /** @@ -45,28 +45,28 @@ class PrecomputedHashComputer : public HashComputer { * * @param precomputed_hashes A vector of hash values for objects 0 to n-1. */ - PrecomputedHashComputer(const std::vector &precomputed_hashes) : vertex_hashes(precomputed_hashes) { - for (std::size_t i = 0; i < vertex_hashes.size(); ++i) { - const auto &hash = vertex_hashes[i]; - orbits[hash].push_back(static_cast(i)); + PrecomputedHashComputer(const std::vector &precomputedHashes) : vertexHashes_(precomputedHashes) { + for (std::size_t i = 0; i < vertexHashes_.size(); ++i) { + const auto &hash = vertexHashes_[i]; + orbits_[hash].push_back(static_cast(i)); } } virtual ~PrecomputedHashComputer() override = default; - inline std::size_t get_vertex_hash(const IndexType &v) const override { return vertex_hashes[v]; } + inline std::size_t GetVertexHash(const IndexType &v) const override { return vertexHashes_[v]; } - inline const std::vector &get_vertex_hashes() const override { return vertex_hashes; } + inline const std::vector &GetVertexHashes() const override { return vertexHashes_; } - inline std::size_t num_orbits() const override { return orbits.size(); } + inline std::size_t NumOrbits() const override { return orbits_.size(); } - inline const std::vector &get_orbit(const IndexType &v) const override { - return this->get_orbit_from_hash(this->get_vertex_hash(v)); + inline const std::vector &GetOrbit(const IndexType &v) const override { + return this->GetOrbitFromHash(this->GetVertexHash(v)); } - inline const std::unordered_map> &get_orbits() const override { return orbits; } + inline const std::unordered_map> &GetOrbits() const override { return orbits_; } - inline const std::vector &get_orbit_from_hash(const std::size_t &hash) const override { return orbits.at(hash); } + inline const std::vector &GetOrbitFromHash(const std::size_t &hash) const override { return orbits_.at(hash); } }; } // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp index 4b52b935..78c006f6 100644 --- a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp @@ -36,144 +36,141 @@ namespace osp { * potentially disconnected, subgraph that resulted from merging smaller isomorphic subgraphs. It divides * the input graph into its weakly connected components and schedules them on proportionally allocated processors. */ -template -class TrimmedGroupScheduler : public Scheduler { - Scheduler *sub_scheduler; - unsigned min_non_zero_procs_; +template +class TrimmedGroupScheduler : public Scheduler { + Scheduler *subScheduler_; + unsigned minNonZeroProcs_; - static constexpr bool verbose = false; + static constexpr bool verbose_ = false; public: - TrimmedGroupScheduler(Scheduler &scheduler, unsigned min_non_zero_procs) - : sub_scheduler(&scheduler), min_non_zero_procs_(min_non_zero_procs) {} + TrimmedGroupScheduler(Scheduler &scheduler, unsigned minNonZeroProcs) + : subScheduler_(&scheduler), minNonZeroProcs_(minNonZeroProcs) {} - std::string getScheduleName() const override { return "TrimmedGroupScheduler"; } + std::string GetScheduleName() const override { return "TrimmedGroupScheduler"; } - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); - const Constr_Graph_t &dag = instance.getComputationalDag(); - const BspArchitecture &arch = instance.getArchitecture(); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.GetInstance(); + const ConstrGraphT &dag = instance.GetComputationalDag(); + const BspArchitecture &arch = instance.GetArchitecture(); // Find the weakly connected components. These are assumed to be isomorphic subgraphs. - std::vector> component_map(dag.num_vertices()); - size_t num_components = compute_weakly_connected_components(dag, component_map); + std::vector> componentMap(dag.NumVertices()); + size_t numComponents = ComputeWeaklyConnectedComponents(dag, componentMap); - if (num_components == 0) { - schedule.setNumberOfSupersteps(0); - return RETURN_STATUS::OSP_SUCCESS; + if (numComponents == 0) { + schedule.SetNumberOfSupersteps(0); + return ReturnStatus::OSP_SUCCESS; } - if constexpr (verbose) { - std::cout << " [TrimmedGroupScheduler] min_non_zero_procs: " << min_non_zero_procs_ - << ", num_components: " << num_components << std::endl; + if constexpr (verbose_) { + std::cout << " [TrimmedGroupScheduler] min_non_zero_procs: " << minNonZeroProcs_ + << ", num_components: " << numComponents << std::endl; } // Group vertices by component. - std::vector>> components_vertices(num_components); - for (vertex_idx_t v = 0; v < dag.num_vertices(); ++v) { - components_vertices[component_map[v]].push_back(v); + std::vector>> componentsVertices(numComponents); + for (VertexIdxT v = 0; v < dag.NumVertices(); ++v) { + componentsVertices[componentMap[v]].push_back(v); } // Distribute components among processor types. // The goal is to assign `base_count` components to each processor type group, // plus one extra for the first `remainder` groups. - const unsigned base_count = static_cast(num_components) / min_non_zero_procs_; - const unsigned remainder = static_cast(num_components) % min_non_zero_procs_; - - std::vector> component_indices_per_group(min_non_zero_procs_); - unsigned component_cursor = 0; - for (unsigned i = 0; i < min_non_zero_procs_; ++i) { - unsigned num_to_assign = base_count + (i < remainder ? 1 : 0); - for (unsigned j = 0; j < num_to_assign; ++j) { - if (component_cursor < num_components) { - component_indices_per_group[i].push_back(component_cursor++); + const unsigned baseCount = static_cast(numComponents) / minNonZeroProcs_; + const unsigned remainder = static_cast(numComponents) % minNonZeroProcs_; + + std::vector> componentIndicesPerGroup(minNonZeroProcs_); + unsigned componentCursor = 0; + for (unsigned i = 0; i < minNonZeroProcs_; ++i) { + unsigned numToAssign = baseCount + (i < remainder ? 1 : 0); + for (unsigned j = 0; j < numToAssign; ++j) { + if (componentCursor < numComponents) { + componentIndicesPerGroup[i].push_back(componentCursor++); } } } // Determine the processor allocation for a single sub-problem. // Calculate offsets for processor types within the main 'arch' (passed to TrimmedGroupScheduler) - std::vector arch_proc_type_offsets(arch.getNumberOfProcessorTypes(), 0); - const auto &arch_proc_type_counts = arch.getProcessorTypeCount(); - for (unsigned type_idx = 1; type_idx < arch.getNumberOfProcessorTypes(); ++type_idx) { - arch_proc_type_offsets[type_idx] = arch_proc_type_offsets[type_idx - 1] + arch_proc_type_counts[type_idx - 1]; + std::vector archProcTypeOffsets(arch.GetNumberOfProcessorTypes(), 0); + const auto &archProcTypeCounts = arch.GetProcessorTypeCount(); + for (unsigned typeIdx = 1; typeIdx < arch.GetNumberOfProcessorTypes(); ++typeIdx) { + archProcTypeOffsets[typeIdx] = archProcTypeOffsets[typeIdx - 1] + archProcTypeCounts[typeIdx - 1]; } - std::vector sub_proc_counts(arch.getNumberOfProcessorTypes()); - std::vector> mem_weights(arch.getNumberOfProcessorTypes(), 0); - for (unsigned type_idx = 0; type_idx < arch.getNumberOfProcessorTypes(); ++type_idx) { - sub_proc_counts[type_idx] = arch.getProcessorTypeCount()[type_idx] / min_non_zero_procs_; - mem_weights[type_idx] = static_cast>(arch.maxMemoryBoundProcType(type_idx)); + std::vector subProcCounts(arch.GetNumberOfProcessorTypes()); + std::vector> memWeights(arch.GetNumberOfProcessorTypes(), 0); + for (unsigned typeIdx = 0; typeIdx < arch.GetNumberOfProcessorTypes(); ++typeIdx) { + subProcCounts[typeIdx] = arch.GetProcessorTypeCount()[typeIdx] / minNonZeroProcs_; + memWeights[typeIdx] = static_cast>(arch.MaxMemoryBoundProcType(typeIdx)); } - if constexpr (verbose) { + if constexpr (verbose_) { std::cout << " [TrimmedGroupScheduler] Sub-problem processor counts per type: "; - for (size_t type_idx = 0; type_idx < sub_proc_counts.size(); ++type_idx) { - std::cout << "T" << type_idx << ":" << sub_proc_counts[type_idx] << " "; + for (size_t typeIdx = 0; typeIdx < subProcCounts.size(); ++typeIdx) { + std::cout << "T" << typeIdx << ":" << subProcCounts[typeIdx] << " "; } std::cout << std::endl; } // Create the sub-architecture for one sub-problem. - BspArchitecture sub_arch(arch); - sub_arch.SetProcessorsConsequTypes(sub_proc_counts, mem_weights); + BspArchitecture subArch(arch); + subArch.SetProcessorsConsequTypes(subProcCounts, memWeights); // Calculate offsets for processor types within the 'sub_arch' - std::vector sub_arch_proc_type_offsets(sub_arch.getNumberOfProcessorTypes(), 0); - const auto &sub_arch_proc_type_counts = sub_arch.getProcessorTypeCount(); - for (unsigned type_idx = 1; type_idx < sub_arch.getNumberOfProcessorTypes(); ++type_idx) { - sub_arch_proc_type_offsets[type_idx] - = sub_arch_proc_type_offsets[type_idx - 1] + sub_arch_proc_type_counts[type_idx - 1]; + std::vector subArchProcTypeOffsets(subArch.GetNumberOfProcessorTypes(), 0); + const auto &subArchProcTypeCounts = subArch.GetProcessorTypeCount(); + for (unsigned typeIdx = 1; typeIdx < subArch.GetNumberOfProcessorTypes(); ++typeIdx) { + subArchProcTypeOffsets[typeIdx] = subArchProcTypeOffsets[typeIdx - 1] + subArchProcTypeCounts[typeIdx - 1]; } - unsigned max_supersteps = 0; - for (unsigned i = 0; i < min_non_zero_procs_; ++i) { - std::vector> group_vertices; - for (unsigned comp_idx : component_indices_per_group[i]) { - group_vertices.insert( - group_vertices.end(), components_vertices[comp_idx].begin(), components_vertices[comp_idx].end()); + unsigned maxSupersteps = 0; + for (unsigned i = 0; i < minNonZeroProcs_; ++i) { + std::vector> groupVertices; + for (unsigned compIdx : componentIndicesPerGroup[i]) { + groupVertices.insert(groupVertices.end(), componentsVertices[compIdx].begin(), componentsVertices[compIdx].end()); } - std::sort(group_vertices.begin(), group_vertices.end()); + std::sort(groupVertices.begin(), groupVertices.end()); - BspInstance sub_instanc; - sub_instanc.getArchitecture() = sub_arch; - sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix()); // Inherit compatibility - auto global_to_local_map = create_induced_subgraph_map( - dag, sub_instanc.getComputationalDag(), group_vertices); // Create induced subgraph + BspInstance subInstance; + subInstance.GetArchitecture() = subArch; + subInstance.SetNodeProcessorCompatibility(instance.GetNodeProcessorCompatibilityMatrix()); // Inherit compatibility + auto globalToLocalMap + = CreateInducedSubgraphMap(dag, subInstance.GetComputationalDag(), groupVertices); // Create induced subgraph // Create a schedule object for the sub-problem - BspSchedule sub_schedule(sub_instanc); + BspSchedule subSchedule(subInstance); // Call the sub-scheduler to compute the schedule for this group of components - auto status = sub_scheduler->computeSchedule(sub_schedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + auto status = subScheduler_->ComputeSchedule(subSchedule); + if (status != ReturnStatus::OSP_SUCCESS && status != ReturnStatus::BEST_FOUND) { return status; } // Map the sub-schedule back to the main schedule. - for (const auto &v_global : group_vertices) { - const auto v_local = global_to_local_map.at(v_global); - const unsigned sub_proc = sub_schedule.assignedProcessor(v_local); - const unsigned sub_superstep = sub_schedule.assignedSuperstep(v_local); + for (const auto &vGlobal : groupVertices) { + const auto vLocal = globalToLocalMap.at(vGlobal); + const unsigned subProc = subSchedule.AssignedProcessor(vLocal); + const unsigned subSuperstep = subSchedule.AssignedSuperstep(vLocal); // Determine the processor type and its local index within that type in the sub_arch - const unsigned proc_type = sub_arch.processorType(sub_proc); - const unsigned local_idx_within_type = sub_proc - sub_arch_proc_type_offsets[proc_type]; + const unsigned procType = subArch.ProcessorType(subProc); + const unsigned localIdxWithinType = subProc - subArchProcTypeOffsets[procType]; // Calculate the global processor ID by combining: // The base offset of this processor type in the main 'arch'. // The offset for the current 'i'-th block of processors of this type. // The local index within that type block. - const unsigned global_proc - = arch_proc_type_offsets[proc_type] + (i * sub_proc_counts[proc_type]) + local_idx_within_type; - schedule.setAssignedProcessor(v_global, global_proc); - schedule.setAssignedSuperstep(v_global, sub_superstep); + const unsigned globalProc = archProcTypeOffsets[procType] + (i * subProcCounts[procType]) + localIdxWithinType; + schedule.SetAssignedProcessor(vGlobal, globalProc); + schedule.SetAssignedSuperstep(vGlobal, subSuperstep); } - max_supersteps = std::max(max_supersteps, sub_schedule.numberOfSupersteps()); + maxSupersteps = std::max(maxSupersteps, subSchedule.NumberOfSupersteps()); } - schedule.setNumberOfSupersteps(max_supersteps); - return RETURN_STATUS::OSP_SUCCESS; + schedule.SetNumberOfSupersteps(maxSupersteps); + return ReturnStatus::OSP_SUCCESS; } }; diff --git a/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp index c916b55c..53e6e3fc 100644 --- a/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp +++ b/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp @@ -35,98 +35,98 @@ namespace osp { * @class AbstractWavefrontDivider * @brief Base class for wavefront-based DAG dividers. */ -template -class AbstractWavefrontDivider : public IDagDivider { - static_assert(is_computational_dag_v, "AbstractWavefrontDivider can only be used with computational DAGs."); +template +class AbstractWavefrontDivider : public IDagDivider { + static_assert(isComputationalDagV, "AbstractWavefrontDivider can only be used with computational DAGs."); protected: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - const Graph_t *dag_ptr_ = nullptr; + const GraphT *dagPtr_ = nullptr; /** * @brief Helper to get connected components for a specific range of levels. * This method is now const-correct. */ - std::vector> get_components_for_range(size_t start_level, - size_t end_level, - const std::vector> &level_sets) const { - union_find_universe_t uf; - for (size_t i = start_level; i < end_level; ++i) { - for (const auto vertex : level_sets[i]) { - uf.add_object(vertex, dag_ptr_->vertex_work_weight(vertex), dag_ptr_->vertex_mem_weight(vertex)); + std::vector> GetComponentsForRange(size_t startLevel, + size_t endLevel, + const std::vector> &levelSets) const { + UnionFindUniverseT uf; + for (size_t i = startLevel; i < endLevel; ++i) { + for (const auto vertex : levelSets[i]) { + uf.AddObject(vertex, dagPtr_->VertexWorkWeight(vertex), dagPtr_->VertexMemWeight(vertex)); } - for (const auto &node : level_sets[i]) { - for (const auto &child : dag_ptr_->children(node)) { - if (uf.is_in_universe(child)) { - uf.join_by_name(node, child); + for (const auto &node : levelSets[i]) { + for (const auto &child : dagPtr_->Children(node)) { + if (uf.IsInUniverse(child)) { + uf.JoinByName(node, child); } } - for (const auto &parent : dag_ptr_->parents(node)) { - if (uf.is_in_universe(parent)) { - uf.join_by_name(parent, node); + for (const auto &parent : dagPtr_->Parents(node)) { + if (uf.IsInUniverse(parent)) { + uf.JoinByName(parent, node); } } } } - return uf.get_connected_components(); + return uf.GetConnectedComponents(); } /** * @brief Computes wavefronts for the entire DAG. * This method is now const. */ - std::vector> compute_wavefronts() const { - std::vector all_vertices(dag_ptr_->num_vertices()); - std::iota(all_vertices.begin(), all_vertices.end(), 0); - return compute_wavefronts_for_subgraph(all_vertices); + std::vector> ComputeWavefronts() const { + std::vector allVertices(dagPtr_->NumVertices()); + std::iota(allVertices.begin(), allVertices.end(), 0); + return ComputeWavefrontsForSubgraph(allVertices); } /** * @brief Computes wavefronts for a specific subset of vertices. * This method is now const. */ - std::vector> compute_wavefronts_for_subgraph(const std::vector &vertices) const { + std::vector> ComputeWavefrontsForSubgraph(const std::vector &vertices) const { if (vertices.empty()) { return {}; } - std::vector> level_sets; - std::unordered_set vertex_set(vertices.begin(), vertices.end()); - std::unordered_map in_degree; + std::vector> levelSets; + std::unordered_set vertexSet(vertices.begin(), vertices.end()); + std::unordered_map inDegree; std::queue q; for (const auto &v : vertices) { - in_degree[v] = 0; - for (const auto &p : dag_ptr_->parents(v)) { - if (vertex_set.count(p)) { - in_degree[v]++; + inDegree[v] = 0; + for (const auto &p : dagPtr_->Parents(v)) { + if (vertexSet.count(p)) { + inDegree[v]++; } } - if (in_degree[v] == 0) { + if (inDegree[v] == 0) { q.push(v); } } while (!q.empty()) { - size_t level_size = q.size(); - std::vector current_level; - for (size_t i = 0; i < level_size; ++i) { + size_t levelSize = q.size(); + std::vector currentLevel; + for (size_t i = 0; i < levelSize; ++i) { VertexType u = q.front(); q.pop(); - current_level.push_back(u); - for (const auto &v : dag_ptr_->children(u)) { - if (vertex_set.count(v)) { - in_degree[v]--; - if (in_degree[v] == 0) { + currentLevel.push_back(u); + for (const auto &v : dagPtr_->Children(u)) { + if (vertexSet.count(v)) { + inDegree[v]--; + if (inDegree[v] == 0) { q.push(v); } } } } - level_sets.push_back(current_level); + levelSets.push_back(currentLevel); } - return level_sets; + return levelSets; } }; diff --git a/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp index c382169b..5714ee25 100644 --- a/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp +++ b/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp @@ -38,141 +38,141 @@ namespace osp { * section, it recursively repeats the process, allowing for a hierarchical * division of the DAG. */ -template -class RecursiveWavefrontDivider : public AbstractWavefrontDivider { +template +class RecursiveWavefrontDivider : public AbstractWavefrontDivider { public: - constexpr static bool enable_debug_print = true; + constexpr static bool enableDebugPrint_ = true; RecursiveWavefrontDivider() { // Set a sensible default splitter on construction. - use_largest_step_splitter(3.0, 4); + UseLargestStepSplitter(3.0, 4); } - std::vector>>> divide(const Graph_t &dag) override { - this->dag_ptr_ = &dag; - if constexpr (enable_debug_print) { + std::vector>>> Divide(const GraphT &dag) override { + this->dagPtr_ = &dag; + if constexpr (enableDebugPrint_) { std::cout << "[DEBUG] Starting recursive-scan division." << std::endl; } - auto global_level_sets = this->compute_wavefronts(); - if (global_level_sets.empty()) { + auto globalLevelSets = this->ComputeWavefronts(); + if (globalLevelSets.empty()) { return {}; } - std::vector>>> all_sections; - divide_recursive(global_level_sets.cbegin(), global_level_sets.cend(), global_level_sets, all_sections, 0); - return all_sections; + std::vector>>> allSections; + DivideRecursive(globalLevelSets.cbegin(), globalLevelSets.cend(), globalLevelSets, allSections, 0); + return allSections; } - RecursiveWavefrontDivider &set_metric(SequenceMetric metric) { - sequence_metric_ = metric; + RecursiveWavefrontDivider &SetMetric(SequenceMetric metric) { + sequenceMetric_ = metric; return *this; } - RecursiveWavefrontDivider &use_variance_splitter(double mult, double threshold, size_t min_len = 1) { - splitter_ = std::make_unique(mult, threshold, min_len); - min_subseq_len_ = min_len; + RecursiveWavefrontDivider &UseVarianceSplitter(double mult, double threshold, size_t minLen = 1) { + splitter_ = std::make_unique(mult, threshold, minLen); + minSubseqLen_ = minLen; return *this; } - RecursiveWavefrontDivider &use_largest_step_splitter(double threshold, size_t min_len) { - splitter_ = std::make_unique(threshold, min_len); - min_subseq_len_ = min_len; + RecursiveWavefrontDivider &UseLargestStepSplitter(double threshold, size_t minLen) { + splitter_ = std::make_unique(threshold, minLen); + minSubseqLen_ = minLen; return *this; } - RecursiveWavefrontDivider &use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) { - splitter_ = std::make_unique(diff_threshold, abs_threshold, min_len); - min_subseq_len_ = min_len; + RecursiveWavefrontDivider &UseThresholdScanSplitter(double diffThreshold, double absThreshold, size_t minLen = 1) { + splitter_ = std::make_unique(diffThreshold, absThreshold, minLen); + minSubseqLen_ = minLen; return *this; } - RecursiveWavefrontDivider &set_max_depth(size_t max_depth) { - max_depth_ = max_depth; + RecursiveWavefrontDivider &SetMaxDepth(size_t maxDepth) { + maxDepth_ = maxDepth; return *this; } private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; using LevelSetConstIterator = typename std::vector>::const_iterator; using DifferenceType = typename std::iterator_traits::difference_type; - SequenceMetric sequence_metric_ = SequenceMetric::COMPONENT_COUNT; + SequenceMetric sequenceMetric_ = SequenceMetric::COMPONENT_COUNT; std::unique_ptr splitter_; - size_t min_subseq_len_ = 4; - size_t max_depth_ = std::numeric_limits::max(); - - void divide_recursive(LevelSetConstIterator level_begin, - LevelSetConstIterator level_end, - const std::vector> &global_level_sets, - std::vector>> &all_sections, - size_t current_depth) const { - const auto current_range_size = static_cast(std::distance(level_begin, level_end)); - size_t start_level_idx = static_cast(std::distance(global_level_sets.cbegin(), level_begin)); - size_t end_level_idx = static_cast(std::distance(global_level_sets.cbegin(), level_end)); + size_t minSubseqLen_ = 4; + size_t maxDepth_ = std::numeric_limits::max(); + + void DivideRecursive(LevelSetConstIterator levelBegin, + LevelSetConstIterator levelEnd, + const std::vector> &globalLevelSets, + std::vector>> &allSections, + size_t currentDepth) const { + const auto currentRangeSize = static_cast(std::distance(levelBegin, levelEnd)); + size_t startLevelIdx = static_cast(std::distance(globalLevelSets.cbegin(), levelBegin)); + size_t endLevelIdx = static_cast(std::distance(globalLevelSets.cbegin(), levelEnd)); // --- Base Cases for Recursion --- - if (current_depth >= max_depth_ || current_range_size < min_subseq_len_) { - if constexpr (enable_debug_print) { - std::cout << "[DEBUG depth " << current_depth << "] Base case reached. Creating section from levels " - << start_level_idx << " to " << end_level_idx << "." << std::endl; + if (currentDepth >= maxDepth_ || currentRangeSize < minSubseqLen_) { + if constexpr (enableDebugPrint_) { + std::cout << "[DEBUG depth " << currentDepth << "] Base case reached. Creating section from levels " + << startLevelIdx << " to " << endLevelIdx << "." << std::endl; } // Ensure the section is not empty before adding - if (start_level_idx < end_level_idx) { - all_sections.push_back(this->get_components_for_range(start_level_idx, end_level_idx, global_level_sets)); + if (startLevelIdx < endLevelIdx) { + allSections.push_back(this->GetComponentsForRange(startLevelIdx, endLevelIdx, globalLevelSets)); } return; } // --- Create a view of the levels for the current sub-problem --- - std::vector> sub_level_sets(level_begin, level_end); + std::vector> subLevelSets(levelBegin, levelEnd); - SequenceGenerator generator(*(this->dag_ptr_), sub_level_sets); - std::vector sequence = generator.generate(sequence_metric_); + SequenceGenerator generator(*(this->dagPtr_), subLevelSets); + std::vector sequence = generator.Generate(sequenceMetric_); - if constexpr (enable_debug_print) { - std::cout << "[DEBUG depth " << current_depth << "] Analyzing sequence: "; + if constexpr (enableDebugPrint_) { + std::cout << "[DEBUG depth " << currentDepth << "] Analyzing sequence: "; for (const auto &val : sequence) { std::cout << val << " "; } std::cout << std::endl; } - std::vector local_cuts = splitter_->split(sequence); + std::vector localCuts = splitter_->Split(sequence); // --- Base Case: No further cuts found --- - if (local_cuts.empty()) { - if constexpr (enable_debug_print) { - std::cout << "[DEBUG depth " << current_depth << "] No cuts found. Creating section from levels " - << start_level_idx << " to " << end_level_idx << "." << std::endl; + if (localCuts.empty()) { + if constexpr (enableDebugPrint_) { + std::cout << "[DEBUG depth " << currentDepth << "] No cuts found. Creating section from levels " << startLevelIdx + << " to " << endLevelIdx << "." << std::endl; } - all_sections.push_back(this->get_components_for_range(start_level_idx, end_level_idx, global_level_sets)); + allSections.push_back(this->GetComponentsForRange(startLevelIdx, endLevelIdx, globalLevelSets)); return; } - if constexpr (enable_debug_print) { - std::cout << "[DEBUG depth " << current_depth << "] Found " << local_cuts.size() << " cuts: "; - for (const auto c : local_cuts) { + if constexpr (enableDebugPrint_) { + std::cout << "[DEBUG depth " << currentDepth << "] Found " << localCuts.size() << " cuts: "; + for (const auto c : localCuts) { std::cout << c << ", "; } - std::cout << "in level range [" << start_level_idx << ", " << end_level_idx << "). Recursing." << std::endl; + std::cout << "in level range [" << startLevelIdx << ", " << endLevelIdx << "). Recursing." << std::endl; } // --- Recurse on the new, smaller sub-problems --- - std::sort(local_cuts.begin(), local_cuts.end()); - local_cuts.erase(std::unique(local_cuts.begin(), local_cuts.end()), local_cuts.end()); - - auto current_sub_begin = level_begin; - for (const auto &local_cut_idx : local_cuts) { - auto cut_iterator = level_begin + static_cast(local_cut_idx); - if (cut_iterator > current_sub_begin) { - divide_recursive(current_sub_begin, cut_iterator, global_level_sets, all_sections, current_depth + 1); + std::sort(localCuts.begin(), localCuts.end()); + localCuts.erase(std::unique(localCuts.begin(), localCuts.end()), localCuts.end()); + + auto currentSubBegin = levelBegin; + for (const auto &localCutIdx : localCuts) { + auto cutIterator = levelBegin + static_cast(localCutIdx); + if (cutIterator > currentSubBegin) { + DivideRecursive(currentSubBegin, cutIterator, globalLevelSets, allSections, currentDepth + 1); } - current_sub_begin = cut_iterator; + currentSubBegin = cutIterator; } // Recurse on the final segment from the last cut to the end. - if (current_sub_begin < level_end) { - divide_recursive(current_sub_begin, level_end, global_level_sets, all_sections, current_depth + 1); + if (currentSubBegin < levelEnd) { + DivideRecursive(currentSubBegin, levelEnd, globalLevelSets, allSections, currentDepth + 1); } } }; diff --git a/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp index c815b615..3b2178a5 100644 --- a/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp +++ b/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp @@ -33,29 +33,29 @@ namespace osp { * @brief Divides a DAG by scanning all wavefronts and applying a splitting algorithm. * This revised version uses a fluent API for safer and clearer algorithm configuration. */ -template -class ScanWavefrontDivider : public AbstractWavefrontDivider { +template +class ScanWavefrontDivider : public AbstractWavefrontDivider { public: - constexpr static bool enable_debug_print = true; + constexpr static bool enableDebugPrint_ = true; - ScanWavefrontDivider() { use_largest_step_splitter(3.0, 4); } + ScanWavefrontDivider() { UseLargestStepSplitter(3.0, 4); } - std::vector>>> divide(const Graph_t &dag) override { - this->dag_ptr_ = &dag; - if constexpr (enable_debug_print) { + std::vector>>> Divide(const GraphT &dag) override { + this->dagPtr_ = &dag; + if constexpr (enableDebugPrint_) { std::cout << "[DEBUG] Starting scan-all division." << std::endl; } - std::vector>> level_sets = this->compute_wavefronts(); - if (level_sets.empty()) { + std::vector>> levelSets = this->ComputeWavefronts(); + if (levelSets.empty()) { return {}; } - SequenceGenerator generator(dag, level_sets); - std::vector sequence = generator.generate(sequence_metric_); + SequenceGenerator generator(dag, levelSets); + std::vector sequence = generator.Generate(sequenceMetric_); - if constexpr (enable_debug_print) { - std::cout << "[DEBUG] Metric: " << static_cast(sequence_metric_) << std::endl; + if constexpr (enableDebugPrint_) { + std::cout << "[DEBUG] Metric: " << static_cast(sequenceMetric_) << std::endl; std::cout << "[DEBUG] Generated sequence: "; for (const auto &val : sequence) { std::cout << val << " "; @@ -63,69 +63,69 @@ class ScanWavefrontDivider : public AbstractWavefrontDivider { std::cout << std::endl; } - std::vector cut_levels = splitter_->split(sequence); - std::sort(cut_levels.begin(), cut_levels.end()); - cut_levels.erase(std::unique(cut_levels.begin(), cut_levels.end()), cut_levels.end()); + std::vector cutLevels = splitter_->Split(sequence); + std::sort(cutLevels.begin(), cutLevels.end()); + cutLevels.erase(std::unique(cutLevels.begin(), cutLevels.end()), cutLevels.end()); - if constexpr (enable_debug_print) { + if constexpr (enableDebugPrint_) { std::cout << "[DEBUG] Final cut levels: "; - for (const auto &level : cut_levels) { + for (const auto &level : cutLevels) { std::cout << level << " "; } std::cout << std::endl; } - return create_vertex_maps_from_cuts(cut_levels, level_sets); + return CreateVertexMapsFromCuts(cutLevels, levelSets); } - ScanWavefrontDivider &set_metric(SequenceMetric metric) { - sequence_metric_ = metric; + ScanWavefrontDivider &SetMetric(SequenceMetric metric) { + sequenceMetric_ = metric; return *this; } - ScanWavefrontDivider &use_variance_splitter(double mult, double threshold, size_t min_len = 1) { - splitter_ = std::make_unique(mult, threshold, min_len); + ScanWavefrontDivider &UseVarianceSplitter(double mult, double threshold, size_t minLen = 1) { + splitter_ = std::make_unique(mult, threshold, minLen); return *this; } - ScanWavefrontDivider &use_largest_step_splitter(double threshold, size_t min_len) { - splitter_ = std::make_unique(threshold, min_len); + ScanWavefrontDivider &UseLargestStepSplitter(double threshold, size_t minLen) { + splitter_ = std::make_unique(threshold, minLen); return *this; } - ScanWavefrontDivider &use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) { - splitter_ = std::make_unique(diff_threshold, abs_threshold, min_len); + ScanWavefrontDivider &UseThresholdScanSplitter(double diffThreshold, double absThreshold, size_t minLen = 1) { + splitter_ = std::make_unique(diffThreshold, absThreshold, minLen); return *this; } private: - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - SequenceMetric sequence_metric_ = SequenceMetric::COMPONENT_COUNT; + SequenceMetric sequenceMetric_ = SequenceMetric::COMPONENT_COUNT; std::unique_ptr splitter_; - std::vector>> create_vertex_maps_from_cuts( - const std::vector &cut_levels, const std::vector> &level_sets) const { - if (cut_levels.empty()) { + std::vector>> CreateVertexMapsFromCuts( + const std::vector &cutLevels, const std::vector> &levelSets) const { + if (cutLevels.empty()) { // If there are no cuts, return a single section with all components. - return {this->get_components_for_range(0, level_sets.size(), level_sets)}; + return {this->GetComponentsForRange(0, levelSets.size(), levelSets)}; } - std::vector>> vertex_maps; - size_t start_level = 0; + std::vector>> vertexMaps; + size_t startLevel = 0; - for (const auto &cut_level : cut_levels) { - if (start_level < cut_level) { // Avoid creating empty sections - vertex_maps.push_back(this->get_components_for_range(start_level, cut_level, level_sets)); + for (const auto &cutLevel : cutLevels) { + if (startLevel < cutLevel) { // Avoid creating empty sections + vertexMaps.push_back(this->GetComponentsForRange(startLevel, cutLevel, levelSets)); } - start_level = cut_level; + startLevel = cutLevel; } // Add the final section from the last cut to the end of the levels - if (start_level < level_sets.size()) { - vertex_maps.push_back(this->get_components_for_range(start_level, level_sets.size(), level_sets)); + if (startLevel < levelSets.size()) { + vertexMaps.push_back(this->GetComponentsForRange(startLevel, levelSets.size(), levelSets)); } - return vertex_maps; + return vertexMaps; } }; diff --git a/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp b/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp index 9dd925ac..98b94c24 100644 --- a/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp +++ b/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp @@ -30,53 +30,53 @@ enum class SequenceMetric { COMPONENT_COUNT, AVAILABLE_PARALLELISM }; * @class SequenceGenerator * @brief Helper to generate a numerical sequence based on a chosen metric. */ -template +template class SequenceGenerator { - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; public: - SequenceGenerator(const Graph_t &dag, const std::vector> &level_sets) - : dag_(dag), level_sets_(level_sets) {} + SequenceGenerator(const GraphT &dag, const std::vector> &levelSets) + : dag_(dag), levelSets_(levelSets) {} - std::vector generate(SequenceMetric metric) const { + std::vector Generate(SequenceMetric metric) const { switch (metric) { case SequenceMetric::AVAILABLE_PARALLELISM: - return generate_available_parallelism(); + return GenerateAvailableParallelism(); case SequenceMetric::COMPONENT_COUNT: default: - return generate_component_count(); + return GenerateComponentCount(); } } private: - std::vector generate_component_count() const { - WavefrontStatisticsCollector collector(dag_, level_sets_); - auto fwd_stats = collector.compute_forward(); + std::vector GenerateComponentCount() const { + WavefrontStatisticsCollector collector(dag_, levelSets_); + auto fwdStats = collector.ComputeForward(); std::vector seq; - seq.reserve(fwd_stats.size()); - for (const auto &stat : fwd_stats) { - seq.push_back(static_cast(stat.connected_components_vertices.size())); + seq.reserve(fwdStats.size()); + for (const auto &stat : fwdStats) { + seq.push_back(static_cast(stat.connectedComponentsVertices_.size())); } return seq; } - std::vector generate_available_parallelism() const { + std::vector GenerateAvailableParallelism() const { std::vector seq; - seq.reserve(level_sets_.size()); - double cumulative_work = 0.0; - for (size_t i = 0; i < level_sets_.size(); ++i) { - double level_work = 0.0; - for (const auto &vertex : level_sets_[i]) { - level_work += dag_.vertex_work_weight(vertex); + seq.reserve(levelSets_.size()); + double cumulativeWork = 0.0; + for (size_t i = 0; i < levelSets_.size(); ++i) { + double levelWork = 0.0; + for (const auto &vertex : levelSets_[i]) { + levelWork += dag_.VertexWorkWeight(vertex); } - cumulative_work += level_work; - seq.push_back(cumulative_work / (static_cast(i) + 1.0)); + cumulativeWork += levelWork; + seq.push_back(cumulativeWork / (static_cast(i) + 1.0)); } return seq; } - const Graph_t &dag_; - const std::vector> &level_sets_; + const GraphT &dag_; + const std::vector> &levelSets_; }; } // end namespace osp diff --git a/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp b/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp index 2cde1ad0..1bc3b305 100644 --- a/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp +++ b/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp @@ -40,7 +40,7 @@ class SequenceSplitter { * @param seq The sequence of numbers to split. * @return A vector of indices where the sequence is split. */ - virtual std::vector split(const std::vector &seq) = 0; + virtual std::vector Split(const std::vector &seq) = 0; }; /** @@ -51,100 +51,100 @@ class SequenceSplitter { */ class VarianceSplitter : public SequenceSplitter { public: - VarianceSplitter(double var_mult, - double var_threshold, - size_t min_subseq_len = 1, - size_t max_depth = std::numeric_limits::max()) - : var_mult_(var_mult), var_threshold_(var_threshold), min_subseq_len_(min_subseq_len), max_depth_(max_depth) {} + VarianceSplitter(double varMult, + double varThreshold, + size_t minSubseqLen = 1, + size_t maxDepth = std::numeric_limits::max()) + : varMult_(varMult), varThreshold_(varThreshold), minSubseqLen_(minSubseqLen), maxDepth_(maxDepth) {} - std::vector split(const std::vector &seq) override { + std::vector Split(const std::vector &seq) override { if (seq.empty()) { return {}; } // Precompute prefix sums for the entire sequence - prefix_sum_.assign(seq.size() + 1, 0.0); - prefix_sq_sum_.assign(seq.size() + 1, 0.0); + prefixSum_.assign(seq.size() + 1, 0.0); + prefixSqSum_.assign(seq.size() + 1, 0.0); for (size_t i = 0; i < seq.size(); ++i) { - prefix_sum_[i + 1] = prefix_sum_[i] + seq[i]; - prefix_sq_sum_[i + 1] = prefix_sq_sum_[i] + seq[i] * seq[i]; + prefixSum_[i + 1] = prefixSum_[i] + seq[i]; + prefixSqSum_[i + 1] = prefixSqSum_[i] + seq[i] * seq[i]; } std::vector splits; - split_recursive(0, seq.size(), splits, 0); + SplitRecursive(0, seq.size(), splits, 0); std::sort(splits.begin(), splits.end()); return splits; } private: // Compute mean & variance in [l, r) in O(1) - void compute_variance(size_t l, size_t r, double &mean, double &variance) const { + void ComputeVariance(size_t l, size_t r, double &mean, double &variance) const { size_t n = r - l; if (n <= 1) { - mean = (n == 1) ? (prefix_sum_[r] - prefix_sum_[l]) : 0.0; + mean = (n == 1) ? (prefixSum_[r] - prefixSum_[l]) : 0.0; variance = 0.0; return; } - double sum = prefix_sum_[r] - prefix_sum_[l]; - double sq_sum = prefix_sq_sum_[r] - prefix_sq_sum_[l]; + double sum = prefixSum_[r] - prefixSum_[l]; + double sqSum = prefixSqSum_[r] - prefixSqSum_[l]; mean = sum / static_cast(n); - variance = sq_sum / static_cast(n) - mean * mean; + variance = sqSum / static_cast(n) - mean * mean; } - void split_recursive(size_t l, size_t r, std::vector &splits, size_t depth) { - if (depth >= max_depth_ || r - l < 2 * min_subseq_len_) { + void SplitRecursive(size_t l, size_t r, std::vector &splits, size_t depth) { + if (depth >= maxDepth_ || r - l < 2 * minSubseqLen_) { return; } double mean, variance; - compute_variance(l, r, mean, variance); + ComputeVariance(l, r, mean, variance); - if (variance > var_threshold_) { - size_t best_split = 0; - if (compute_best_split(l, r, best_split, variance)) { + if (variance > varThreshold_) { + size_t bestSplit = 0; + if (ComputeBestSplit(l, r, bestSplit, variance)) { // enforce minimum sub-sequence length - if ((best_split - l) >= min_subseq_len_ && (r - best_split) >= min_subseq_len_) { - splits.push_back(best_split); - split_recursive(l, best_split, splits, depth + 1); - split_recursive(best_split, r, splits, depth + 1); + if ((bestSplit - l) >= minSubseqLen_ && (r - bestSplit) >= minSubseqLen_) { + splits.push_back(bestSplit); + SplitRecursive(l, bestSplit, splits, depth + 1); + SplitRecursive(bestSplit, r, splits, depth + 1); } } } } - bool compute_best_split(size_t l, size_t r, size_t &best_split, double original_variance) const { + bool ComputeBestSplit(size_t l, size_t r, size_t &bestSplit, double originalVariance) const { size_t n = r - l; if (n < 2) { return false; } - double min_weighted_variance_sum = std::numeric_limits::max(); - best_split = 0; + double minWeightedVarianceSum = std::numeric_limits::max(); + bestSplit = 0; for (size_t i = l + 1; i < r; ++i) { - double left_mean, left_var, right_mean, right_var; - compute_variance(l, i, left_mean, left_var); - compute_variance(i, r, right_mean, right_var); + double leftMean, leftVar, rightMean, rightVar; + ComputeVariance(l, i, leftMean, leftVar); + ComputeVariance(i, r, rightMean, rightVar); - double weighted_sum = static_cast(i - l) * left_var + static_cast(r - i) * right_var; + double weightedSum = static_cast(i - l) * leftVar + static_cast(r - i) * rightVar; - if (weighted_sum < min_weighted_variance_sum) { - min_weighted_variance_sum = weighted_sum; - best_split = i; + if (weightedSum < minWeightedVarianceSum) { + minWeightedVarianceSum = weightedSum; + bestSplit = i; } } - double total_original_variance = original_variance * static_cast(n); - return best_split > l && min_weighted_variance_sum < var_mult_ * total_original_variance; + double totalOriginalVariance = originalVariance * static_cast(n); + return bestSplit > l && minWeightedVarianceSum < varMult_ * totalOriginalVariance; } - double var_mult_; - double var_threshold_; - size_t min_subseq_len_; - size_t max_depth_; - std::vector prefix_sum_; - std::vector prefix_sq_sum_; + double varMult_; + double varThreshold_; + size_t minSubseqLen_; + size_t maxDepth_; + std::vector prefixSum_; + std::vector prefixSqSum_; }; /** @@ -159,57 +159,57 @@ class LargestStepSplitter : public SequenceSplitter { using difference_type = typename std::iterator_traits::difference_type; public: - LargestStepSplitter(double diff_threshold, size_t min_subseq_len, size_t max_depth = std::numeric_limits::max()) - : diff_threshold_(diff_threshold), min_subseq_len_(min_subseq_len), max_depth_(max_depth) {} + LargestStepSplitter(double diffThreshold, size_t minSubseqLen, size_t maxDepth = std::numeric_limits::max()) + : diffThreshold_(diffThreshold), minSubseqLen_(minSubseqLen), maxDepth_(maxDepth) {} - std::vector split(const std::vector &seq) override { + std::vector Split(const std::vector &seq) override { std::vector splits; - split_recursive(seq.begin(), seq.end(), splits, 0, 0); + SplitRecursive(seq.begin(), seq.end(), splits, 0, 0); std::sort(splits.begin(), splits.end()); return splits; } private: - void split_recursive(ConstIterator begin, ConstIterator end, std::vector &splits, size_t offset, size_t current_depth) { - if (current_depth >= max_depth_) { + void SplitRecursive(ConstIterator begin, ConstIterator end, std::vector &splits, size_t offset, size_t currentDepth) { + if (currentDepth >= maxDepth_) { return; } const difference_type size = std::distance(begin, end); - if (static_cast(size) < 2 * min_subseq_len_) { + if (static_cast(size) < 2 * minSubseqLen_) { return; } - double max_diff = 0.0; - difference_type split_point_local = 0; + double maxDiff = 0.0; + difference_type splitPointLocal = 0; - difference_type current_local_idx = 0; + difference_type currentLocalIdx = 0; for (auto it = begin; it != end - 1; ++it) { double diff = std::abs(*it - *(it + 1)); - if (diff > max_diff) { - max_diff = diff; - split_point_local = current_local_idx + 1; + if (diff > maxDiff) { + maxDiff = diff; + splitPointLocal = currentLocalIdx + 1; } - current_local_idx++; + currentLocalIdx++; } - if (max_diff > diff_threshold_ && split_point_local > 0) { - size_t split_point_global = static_cast(split_point_local) + offset; + if (maxDiff > diffThreshold_ && splitPointLocal > 0) { + size_t splitPointGlobal = static_cast(splitPointLocal) + offset; - if ((split_point_local >= static_cast(min_subseq_len_)) - && ((size - split_point_local) >= static_cast(min_subseq_len_))) { - splits.push_back(split_point_global); + if ((splitPointLocal >= static_cast(minSubseqLen_)) + && ((size - splitPointLocal) >= static_cast(minSubseqLen_))) { + splits.push_back(splitPointGlobal); - ConstIterator split_it = begin + split_point_local; - split_recursive(begin, split_it, splits, offset, current_depth + 1); - split_recursive(split_it, end, splits, split_point_global, current_depth + 1); + ConstIterator splitIt = begin + splitPointLocal; + SplitRecursive(begin, splitIt, splits, offset, currentDepth + 1); + SplitRecursive(splitIt, end, splits, splitPointGlobal, currentDepth + 1); } } } - double diff_threshold_; - size_t min_subseq_len_; - size_t max_depth_; + double diffThreshold_; + size_t minSubseqLen_; + size_t maxDepth_; }; /** @@ -219,36 +219,36 @@ class LargestStepSplitter : public SequenceSplitter { */ class ThresholdScanSplitter : public SequenceSplitter { public: - ThresholdScanSplitter(double diff_threshold, double absolute_threshold, size_t min_subseq_len = 1) - : diff_threshold_(diff_threshold), absolute_threshold_(absolute_threshold), min_subseq_len_(min_subseq_len) {} + ThresholdScanSplitter(double diffThreshold, double absoluteThreshold, size_t minSubseqLen = 1) + : diffThreshold_(diffThreshold), absoluteThreshold_(absoluteThreshold), minSubseqLen_(minSubseqLen) {} - std::vector split(const std::vector &seq) override { + std::vector Split(const std::vector &seq) override { std::vector splits; if (seq.size() < 2) { return splits; } - size_t last_cut = 0; + size_t lastCut = 0; for (size_t i = 0; i < seq.size() - 1; ++i) { - bool should_cut = false; + bool shouldCut = false; double current = seq[i]; double next = seq[i + 1]; // A split is triggered by a significant change OR by crossing the absolute threshold. if (current > next) { // Dropping - if ((current - next) > diff_threshold_ || (next < absolute_threshold_ && current >= absolute_threshold_)) { - should_cut = true; + if ((current - next) > diffThreshold_ || (next < absoluteThreshold_ && current >= absoluteThreshold_)) { + shouldCut = true; } } else if (current < next) { // Rising - if ((next - current) > diff_threshold_ || (next > absolute_threshold_ && current <= absolute_threshold_)) { - should_cut = true; + if ((next - current) > diffThreshold_ || (next > absoluteThreshold_ && current <= absoluteThreshold_)) { + shouldCut = true; } } - if (should_cut) { - if ((i + 1 - last_cut) >= min_subseq_len_ && (seq.size() - (i + 1)) >= min_subseq_len_) { + if (shouldCut) { + if ((i + 1 - lastCut) >= minSubseqLen_ && (seq.size() - (i + 1)) >= minSubseqLen_) { splits.push_back(i + 1); - last_cut = i + 1; + lastCut = i + 1; } } } @@ -256,9 +256,9 @@ class ThresholdScanSplitter : public SequenceSplitter { } private: - double diff_threshold_; - double absolute_threshold_; - size_t min_subseq_len_; + double diffThreshold_; + double absoluteThreshold_; + size_t minSubseqLen_; }; } // namespace osp diff --git a/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp b/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp index 65f7d3e5..850b6e5e 100644 --- a/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp +++ b/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp @@ -27,39 +27,39 @@ namespace osp { * @struct WavefrontStatistics * @brief Holds statistical data for a single wavefront. */ -template +template struct WavefrontStatistics { - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::vector> connected_components_weights; - std::vector> connected_components_memories; - std::vector> connected_components_vertices; + std::vector> connectedComponentsWeights_; + std::vector> connectedComponentsMemories_; + std::vector> connectedComponentsVertices_; }; /** * @class WavefrontStatisticsCollector * @brief Computes forward and backward wavefront statistics for a given DAG. */ -template +template class WavefrontStatisticsCollector { - using VertexType = vertex_idx_t; - using UnionFind = union_find_universe_t; + using VertexType = VertexIdxT; + using UnionFind = UnionFindUniverseT; public: - WavefrontStatisticsCollector(const Graph_t &dag, const std::vector> &level_sets) - : dag_(dag), level_sets_(level_sets) {} + WavefrontStatisticsCollector(const GraphT &dag, const std::vector> &levelSets) + : dag_(dag), levelSets_(levelSets) {} /** * @brief Computes wavefront statistics by processing levels from start to end. * @return A vector of statistics, one for each level. */ - std::vector> compute_forward() const { - std::vector> stats(level_sets_.size()); + std::vector> ComputeForward() const { + std::vector> stats(levelSets_.size()); UnionFind uf; - for (size_t i = 0; i < level_sets_.size(); ++i) { - update_union_find(uf, i); - collect_stats_for_level(stats[i], uf); + for (size_t i = 0; i < levelSets_.size(); ++i) { + UpdateUnionFind(uf, i); + CollectStatsForLevel(stats[i], uf); } return stats; } @@ -68,55 +68,55 @@ class WavefrontStatisticsCollector { * @brief Computes wavefront statistics by processing levels from end to start. * @return A vector of statistics, one for each level (in original level order). */ - std::vector> compute_backward() const { - std::vector> stats(level_sets_.size()); + std::vector> ComputeBackward() const { + std::vector> stats(levelSets_.size()); UnionFind uf; - for (size_t i = level_sets_.size(); i > 0; --i) { - size_t level_idx = i - 1; - update_union_find(uf, level_idx); - collect_stats_for_level(stats[level_idx], uf); + for (size_t i = levelSets_.size(); i > 0; --i) { + size_t levelIdx = i - 1; + UpdateUnionFind(uf, levelIdx); + CollectStatsForLevel(stats[levelIdx], uf); } return stats; } private: - void update_union_find(UnionFind &uf, size_t level_idx) const { + void UpdateUnionFind(UnionFind &uf, size_t levelIdx) const { // Add all vertices from the current level to the universe - for (const auto vertex : level_sets_[level_idx]) { - uf.add_object(vertex, dag_.vertex_work_weight(vertex), dag_.vertex_mem_weight(vertex)); + for (const auto vertex : levelSets_[levelIdx]) { + uf.AddObject(vertex, dag_.VertexWorkWeight(vertex), dag_.VertexMemWeight(vertex)); } // Join components based on edges connecting to vertices already in the universe - for (const auto &node : level_sets_[level_idx]) { - for (const auto &child : dag_.children(node)) { - if (uf.is_in_universe(child)) { - uf.join_by_name(node, child); + for (const auto &node : levelSets_[levelIdx]) { + for (const auto &child : dag_.Children(node)) { + if (uf.IsInUniverse(child)) { + uf.JoinByName(node, child); } } - for (const auto &parent : dag_.parents(node)) { - if (uf.is_in_universe(parent)) { - uf.join_by_name(parent, node); + for (const auto &parent : dag_.Parents(node)) { + if (uf.IsInUniverse(parent)) { + uf.JoinByName(parent, node); } } } } - void collect_stats_for_level(WavefrontStatistics &stats, UnionFind &uf) const { - const auto components = uf.get_connected_components_weights_and_memories(); - stats.connected_components_vertices.reserve(components.size()); - stats.connected_components_weights.reserve(components.size()); - stats.connected_components_memories.reserve(components.size()); + void CollectStatsForLevel(WavefrontStatistics &stats, UnionFind &uf) const { + const auto components = uf.GetConnectedComponentsWeightsAndMemories(); + stats.connectedComponentsVertices_.reserve(components.size()); + stats.connectedComponentsWeights_.reserve(components.size()); + stats.connectedComponentsMemories_.reserve(components.size()); for (const auto &comp : components) { auto &[vertices, weight, memory] = comp; - stats.connected_components_vertices.emplace_back(vertices); - stats.connected_components_weights.emplace_back(weight); - stats.connected_components_memories.emplace_back(memory); + stats.connectedComponentsVertices_.emplace_back(vertices); + stats.connectedComponentsWeights_.emplace_back(weight); + stats.connectedComponentsMemories_.emplace_back(memory); } } - const Graph_t &dag_; - const std::vector> &level_sets_; + const GraphT &dag_; + const std::vector> &levelSets_; }; } // end namespace osp diff --git a/include/osp/graph_algorithms/computational_dag_construction_util.hpp b/include/osp/graph_algorithms/computational_dag_construction_util.hpp index 597b7dc1..8c9b368b 100644 --- a/include/osp/graph_algorithms/computational_dag_construction_util.hpp +++ b/include/osp/graph_algorithms/computational_dag_construction_util.hpp @@ -31,39 +31,36 @@ namespace osp { * assigned starting from 0. If the target graph is not empty, new vertices will be added to the target graph and their indices * will be sequentially assigned starting from the index N. * - * @tparam Graph_from The type of the source graph. Must satisfy `is_computational_dag`. - * @tparam Graph_to The type of the target graph. Must satisfy `is_constructable_cdag_vertex`. + * @tparam GraphFrom The type of the source graph. Must satisfy `is_computational_dag`. + * @tparam GraphTo The type of the target graph. Must satisfy `is_constructable_cdag_vertex`. * @param from The source graph. * @param to The target graph. */ -template -void constructComputationalDag(const Graph_from &from, Graph_to &to) { - static_assert(is_computational_dag_v, "Graph_from must satisfy the computational_dag concept"); - static_assert(is_constructable_cdag_vertex_v, "Graph_to must satisfy the constructable_cdag_vertex concept"); +template +void ConstructComputationalDag(const GraphFrom &from, GraphTo &to) { + static_assert(isComputationalDagV, "GraphFrom must satisfy the computational_dag concept"); + static_assert(isConstructableCdagVertexV, "GraphTo must satisfy the constructable_cdag_vertex concept"); - std::vector> vertex_map; - vertex_map.reserve(from.num_vertices()); + std::vector> vertexMap; + vertexMap.reserve(from.NumVertices()); - for (const auto &v_idx : from.vertices()) { - if constexpr (has_typed_vertices_v and has_typed_vertices_v) { - vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx), - from.vertex_comm_weight(v_idx), - from.vertex_mem_weight(v_idx), - from.vertex_type(v_idx))); + for (const auto &vIdx : from.Vertices()) { + if constexpr (hasTypedVerticesV and hasTypedVerticesV) { + vertexMap.push_back(to.AddVertex( + from.VertexWorkWeight(vIdx), from.VertexCommWeight(vIdx), from.VertexMemWeight(vIdx), from.VertexType(vIdx))); } else { - vertex_map.push_back( - to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx), from.vertex_mem_weight(v_idx))); + vertexMap.push_back(to.AddVertex(from.VertexWorkWeight(vIdx), from.VertexCommWeight(vIdx), from.VertexMemWeight(vIdx))); } } - if constexpr (has_edge_weights_v and has_edge_weights_v) { - for (const auto &e : edges(from)) { - to.add_edge(vertex_map[source(e, from)], vertex_map[target(e, from)], from.edge_comm_weight(e)); + if constexpr (hasEdgeWeightsV and hasEdgeWeightsV) { + for (const auto &e : Edges(from)) { + to.AddEdge(vertexMap[Source(e, from)], vertexMap[Target(e, from)], from.EdgeCommWeight(e)); } } else { - for (const auto &v : from.vertices()) { - for (const auto &child : from.children(v)) { - to.add_edge(vertex_map[v], vertex_map[child]); + for (const auto &v : from.Vertices()) { + for (const auto &child : from.Children(v)) { + to.AddEdge(vertexMap[v], vertexMap[child]); } } } diff --git a/include/osp/graph_algorithms/computational_dag_util.hpp b/include/osp/graph_algorithms/computational_dag_util.hpp index 3c8a339b..60b9f66d 100644 --- a/include/osp/graph_algorithms/computational_dag_util.hpp +++ b/include/osp/graph_algorithms/computational_dag_util.hpp @@ -25,64 +25,63 @@ limitations under the License. namespace osp { -template -v_memw_t max_memory_weight(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); +template +VMemwT MaxMemoryWeight(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + static_assert(hasVertexWeightsV, "GraphT must have vertex weights"); - v_memw_t max_memory_weight = 0; + VMemwT maxMemoryWeight = 0; - for (const auto &v : graph.vertices()) { - max_memory_weight = std::max(max_memory_weight, graph.vertex_memory_weight(v)); + for (const auto &v : graph.Vertices()) { + maxMemoryWeight = std::max(maxMemoryWeight, graph.VertexMemWeight(v)); } - return max_memory_weight; + return maxMemoryWeight; } -template -v_memw_t max_memory_weight(const v_type_t &nodeType_, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); - static_assert(has_typed_vertices_v, "Graph_t must have typed vertices"); +template +VMemwT MaxMemoryWeight(const VTypeT &nodeType, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + static_assert(hasVertexWeightsV, "GraphT must have vertex weights"); + static_assert(hasTypedVerticesV, "GraphT must have typed vertices"); - v_memw_t max_memory_weight = 0; + VMemwT maxMemoryWeight = 0; - for (const auto &node : graph.vertices()) { - if (graph.node_type(node) == nodeType_) { - max_memory_weight = std::max(max_memory_weight, graph.vertex_memory_weight(node)); + for (const auto &node : graph.Vertices()) { + if (graph.VertexType(node) == nodeType) { + maxMemoryWeight = std::max(maxMemoryWeight, graph.VertexMemWeight(node)); } } - return max_memory_weight; + return maxMemoryWeight; } -template -v_workw_t sumOfVerticesWorkWeights(VertexIterator begin, VertexIterator end, const Graph_t &graph) { - static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); +template +VWorkwT SumOfVerticesWorkWeights(VertexIterator begin, VertexIterator end, const GraphT &graph) { + static_assert(hasVertexWeightsV, "GraphT must have vertex weights"); return std::accumulate( - begin, end, 0, [&](const auto sum, const vertex_idx_t &v) { return sum + graph.vertex_work_weight(v); }); + begin, end, 0, [&](const auto sum, const VertexIdxT &v) { return sum + graph.VertexWorkWeight(v); }); } -template -v_workw_t sumOfVerticesWorkWeights(const Graph_t &graph) { - static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); +template +VWorkwT SumOfVerticesWorkWeights(const GraphT &graph) { + static_assert(hasVertexWeightsV, "GraphT must have vertex weights"); - return std::accumulate( - graph.vertices().begin(), - graph.vertices().end(), - static_cast>(0), - [&](const v_workw_t sum, const vertex_idx_t &v) { return sum + graph.vertex_work_weight(v); }); + return std::accumulate(graph.Vertices().begin(), + graph.Vertices().end(), + static_cast>(0), + [&](const VWorkwT sum, const VertexIdxT &v) { return sum + graph.VertexWorkWeight(v); }); } -template -v_workw_t sumOfVerticesWorkWeights(const std::initializer_list> vertices_, const Graph_t &graph) { - return sumOfVerticesWorkWeights(vertices_.begin(), vertices_.end(), graph); +template +VWorkwT SumOfVerticesWorkWeights(const std::initializer_list> vertices, const GraphT &graph) { + return SumOfVerticesWorkWeights(vertices.begin(), vertices.end(), graph); } -template -v_commw_t sumOfVerticesCommunicationWeights(VertexIterator begin, VertexIterator end, const Graph_t &graph) { - static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); +template +VCommwT SumOfVerticesCommunicationWeights(VertexIterator begin, VertexIterator end, const GraphT &graph) { + static_assert(hasVertexWeightsV, "GraphT must have vertex weights"); return std::accumulate( - begin, end, 0, [&](const auto sum, const vertex_idx_t &v) { return sum + graph.vertex_comm_weight(v); }); + begin, end, 0, [&](const auto sum, const VertexIdxT &v) { return sum + graph.VertexCommWeight(v); }); } /** @@ -91,14 +90,14 @@ v_commw_t sumOfVerticesCommunicationWeights(VertexIterator begin, Verte * @tparam Instance_t The type of the instance object (e.g., BspInstance) used for compatibility checks. * @tparam VertexIterator An iterator over vertex indices of the subgraph. */ -template -v_workw_t sumOfCompatibleWorkWeights( - VertexIterator begin, VertexIterator end, const SubGraph_t &graph, const Instance_t &main_instance, unsigned processorType) { - static_assert(has_vertex_weights_v, "SubGraph_t must have vertex weights"); +template +VWorkwT SumOfCompatibleWorkWeights( + VertexIterator begin, VertexIterator end, const SubGraphT &graph, const InstanceT &mainInstance, unsigned processorType) { + static_assert(hasVertexWeightsV, "SubGraph_t must have vertex weights"); return std::accumulate( - begin, end, static_cast>(0), [&](const v_workw_t sum, const vertex_idx_t &v) { - if (main_instance.isCompatibleType(graph.vertex_type(v), processorType)) { - return sum + graph.vertex_work_weight(v); + begin, end, static_cast>(0), [&](const VWorkwT sum, const VertexIdxT &v) { + if (mainInstance.IsCompatibleType(graph.VertexType(v), processorType)) { + return sum + graph.VertexWorkWeight(v); } return sum; }); @@ -107,67 +106,65 @@ v_workw_t sumOfCompatibleWorkWeights( /** * @brief Overload to calculate compatible work weight for all vertices in a graph. */ -template -v_workw_t sumOfCompatibleWorkWeights(const SubGraph_t &graph, const Instance_t &main_instance, unsigned processorType) { - return sumOfCompatibleWorkWeights(graph.vertices().begin(), graph.vertices().end(), graph, main_instance, processorType); +template +VWorkwT SumOfCompatibleWorkWeights(const SubGraphT &graph, const InstanceT &mainInstance, unsigned processorType) { + return SumOfCompatibleWorkWeights(graph.Vertices().begin(), graph.Vertices().end(), graph, mainInstance, processorType); } -template -v_commw_t sumOfVerticesCommunicationWeights(const Graph_t &graph) { - static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); +template +VCommwT SumOfVerticesCommunicationWeights(const GraphT &graph) { + static_assert(hasVertexWeightsV, "GraphT must have vertex weights"); - return std::accumulate( - graph.vertices().begin(), - graph.vertices().end(), - static_cast>(0), - [&](const v_commw_t sum, const vertex_idx_t &v) { return sum + graph.vertex_comm_weight(v); }); + return std::accumulate(graph.Vertices().begin(), + graph.Vertices().end(), + static_cast>(0), + [&](const VCommwT sum, const VertexIdxT &v) { return sum + graph.VertexCommWeight(v); }); } -template -v_commw_t sumOfVerticesCommunicationWeights(const std::initializer_list> &vertices_, - const Graph_t &graph) { - return sumOfVerticesCommunicationWeights(vertices_.begin(), vertices_.end(), graph); +template +VCommwT SumOfVerticesCommunicationWeights(const std::initializer_list> &vertices, const GraphT &graph) { + return SumOfVerticesCommunicationWeights(vertices.begin(), vertices.end(), graph); } -template -e_commw_t sumOfEdgesCommunicationWeights(EdgeIterator begin, EdgeIterator end, const Graph_t &graph) { - static_assert(has_edge_weights_v, "Graph_t must have edge weights"); +template +ECommwT SumOfEdgesCommunicationWeights(EdgeIterator begin, EdgeIterator end, const GraphT &graph) { + static_assert(hasEdgeWeightsV, "GraphT must have edge weights"); return std::accumulate( - begin, end, 0, [&](const auto sum, const edge_desc_t &e) { return sum + graph.edge_comm_weight(e); }); + begin, end, 0, [&](const auto sum, const EdgeDescT &e) { return sum + graph.EdgeCommWeight(e); }); } -template -e_commw_t sumOfEdgesCommunicationWeights(const std::initializer_list> &edges_, const Graph_t &graph) { - return sumOfEdgesCommunicationWeights(edges_.begin(), edges_.end(), graph); +template +ECommwT SumOfEdgesCommunicationWeights(const std::initializer_list> &edges, const GraphT &graph) { + return SumOfEdgesCommunicationWeights(edges.begin(), edges.end(), graph); } -template -v_workw_t critical_path_weight(const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); +template +VWorkwT CriticalPathWeight(const GraphT &graph) { + static_assert(isDirectedGraphEdgeDescV, "GraphT must satisfy the directed_graph concept"); + static_assert(hasVertexWeightsV, "GraphT must have vertex weights"); - if (graph.num_vertices() == 0) { + if (graph.NumVertices() == 0) { return 0; } - std::vector> top_length(graph.num_vertices(), 0); - v_workw_t critical_path_weight = 0; + std::vector> topLength(graph.NumVertices(), 0); + VWorkwT criticalPathWeight = 0; // calculating lenght of longest path for (const auto &node : GetTopOrder(graph)) { - v_workw_t max_temp = 0; - for (const auto &parent : graph.parents(node)) { - max_temp = std::max(max_temp, top_length[parent]); + VWorkwT maxTemp = 0; + for (const auto &parent : graph.Parents(node)) { + maxTemp = std::max(maxTemp, topLength[parent]); } - top_length[node] = max_temp + graph.vertex_work_weight(node); + topLength[node] = maxTemp + graph.VertexWorkWeight(node); - if (top_length[node] > critical_path_weight) { - critical_path_weight = top_length[node]; + if (topLength[node] > criticalPathWeight) { + criticalPathWeight = topLength[node]; } } - return critical_path_weight; + return criticalPathWeight; } } // namespace osp diff --git a/include/osp/graph_algorithms/cuthill_mckee.hpp b/include/osp/graph_algorithms/cuthill_mckee.hpp index 848330d4..6470d17d 100644 --- a/include/osp/graph_algorithms/cuthill_mckee.hpp +++ b/include/osp/graph_algorithms/cuthill_mckee.hpp @@ -29,238 +29,238 @@ limitations under the License. namespace osp { -template -struct cm_vertex { - using VertexType = vertex_idx_t; - VertexType vertex; +template +struct CmVertex { + using VertexType = VertexIdxT; + VertexType vertex_; - VertexType parent_position; + VertexType parentPosition_; - VertexType degree; + VertexType degree_; - cm_vertex() : vertex(0), parent_position(0), degree(0) {} + CmVertex() : vertex_(0), parentPosition_(0), degree_(0) {} - cm_vertex(VertexType vertex_, VertexType degree_, VertexType parent_position_) - : vertex(vertex_), parent_position(parent_position_), degree(degree_) {} + CmVertex(VertexType vertex, VertexType degree, VertexType parentPosition) + : vertex_(vertex), parentPosition_(parentPosition), degree_(degree) {} - bool operator<(cm_vertex const &rhs) const { - return (parent_position < rhs.parent_position) || (parent_position == rhs.parent_position and degree < rhs.degree) - || (parent_position == rhs.parent_position and degree == rhs.degree and vertex < rhs.vertex); + bool operator<(CmVertex const &rhs) const { + return (parentPosition_ < rhs.parentPosition_) || (parentPosition_ == rhs.parentPosition_ and degree_ < rhs.degree_) + || (parentPosition_ == rhs.parentPosition_ and degree_ == rhs.degree_ and vertex_ < rhs.vertex_); } }; -template -std::vector> cuthill_mckee_wavefront(const Graph_t &dag, bool permutation = false) { - using VertexType = vertex_idx_t; - using cm_vertex = cm_vertex; +template +std::vector> CuthillMckeeWavefront(const GraphT &dag, bool permutation = false) { + using VertexType = VertexIdxT; + using CmVertex = CmVertex; - std::vector result(dag.num_vertices()); - std::vector predecessors_count(dag.num_vertices(), 0); - std::vector predecessors_position(dag.num_vertices(), dag.num_vertices()); + std::vector result(dag.NumVertices()); + std::vector predecessorsCount(dag.NumVertices(), 0); + std::vector predecessorsPosition(dag.NumVertices(), dag.NumVertices()); - std::vector current_wavefront; - for (const auto &source : source_vertices_view(dag)) { - current_wavefront.push_back(cm_vertex(source, dag.out_degree(source), 0)); + std::vector currentWavefront; + for (const auto &source : SourceVerticesView(dag)) { + currentWavefront.push_back(CmVertex(source, dag.OutDegree(source), 0)); } - std::vector new_wavefront; - VertexType node_counter = 0; - while (node_counter < dag.num_vertices()) { - new_wavefront.clear(); - std::sort(current_wavefront.begin(), current_wavefront.end()); + std::vector newWavefront; + VertexType nodeCounter = 0; + while (nodeCounter < dag.NumVertices()) { + newWavefront.clear(); + std::sort(currentWavefront.begin(), currentWavefront.end()); if (permutation) { - for (VertexType i = 0; i < static_cast(current_wavefront.size()); i++) { - result[current_wavefront[i].vertex] = node_counter + i; + for (VertexType i = 0; i < static_cast(currentWavefront.size()); i++) { + result[currentWavefront[i].vertex_] = nodeCounter + i; } } else { - for (size_t i = 0; i < current_wavefront.size(); i++) { - result[node_counter + i] = current_wavefront[i].vertex; + for (size_t i = 0; i < currentWavefront.size(); i++) { + result[nodeCounter + i] = currentWavefront[i].vertex_; } } - if (node_counter + static_cast(current_wavefront.size()) == dag.num_vertices()) { + if (nodeCounter + static_cast(currentWavefront.size()) == dag.NumVertices()) { break; } - for (VertexType i = 0; i < static_cast(current_wavefront.size()); i++) { - for (const auto &child : dag.children(current_wavefront[i].vertex)) { - predecessors_count[child]++; - predecessors_position[child] = std::min(predecessors_position[child], node_counter + i); + for (VertexType i = 0; i < static_cast(currentWavefront.size()); i++) { + for (const auto &child : dag.Children(currentWavefront[i].vertex_)) { + predecessorsCount[child]++; + predecessorsPosition[child] = std::min(predecessorsPosition[child], nodeCounter + i); - if (predecessors_count[child] == dag.in_degree(child)) { - new_wavefront.push_back(cm_vertex(child, dag.out_degree(child), predecessors_position[child])); + if (predecessorsCount[child] == dag.InDegree(child)) { + newWavefront.push_back(CmVertex(child, dag.OutDegree(child), predecessorsPosition[child])); } } } - node_counter += static_cast(current_wavefront.size()); + nodeCounter += static_cast(currentWavefront.size()); - std::swap(current_wavefront, new_wavefront); + std::swap(currentWavefront, newWavefront); } return result; } -template -std::vector> cuthill_mckee_undirected(const Graph_t &dag, bool start_at_sink, bool perm = false) { - using VertexType = vertex_idx_t; - using cm_vertex = cm_vertex; +template +std::vector> CuthillMckeeUndirected(const GraphT &dag, bool startAtSink, bool perm = false) { + using VertexType = VertexIdxT; + using CmVertex = CmVertex; - std::vector cm_order(dag.num_vertices()); + std::vector cmOrder(dag.NumVertices()); - std::unordered_map max_node_distances; - VertexType first_node = 0; + std::unordered_map maxNodeDistances; + VertexType firstNode = 0; // compute bottom or top node distances of sink or source nodes, store node with the largest distance in first_node - if (start_at_sink) { - unsigned max_distance = 0; - const std::vector top_node_distance = get_top_node_distance(dag); - for (const auto &i : dag.vertices()) { - if (is_sink(i, dag)) { - max_node_distances[i] = top_node_distance[i]; - - if (top_node_distance[i] > max_distance) { - max_distance = top_node_distance[i]; - first_node = i; + if (startAtSink) { + unsigned maxDistance = 0; + const std::vector topNodeDistance = GetTopNodeDistance(dag); + for (const auto &i : dag.Vertices()) { + if (IsSink(i, dag)) { + maxNodeDistances[i] = topNodeDistance[i]; + + if (topNodeDistance[i] > maxDistance) { + maxDistance = topNodeDistance[i]; + firstNode = i; } } } } else { - unsigned max_distance = 0; - const std::vector bottom_node_distance = get_bottom_node_distance(dag); - for (const auto &i : dag.vertices()) { - if (is_source(i, dag)) { - max_node_distances[i] = bottom_node_distance[i]; - - if (bottom_node_distance[i] > max_distance) { - max_distance = bottom_node_distance[i]; - first_node = i; + unsigned maxDistance = 0; + const std::vector bottomNodeDistance = GetBottomNodeDistance(dag); + for (const auto &i : dag.Vertices()) { + if (IsSource(i, dag)) { + maxNodeDistances[i] = bottomNodeDistance[i]; + + if (bottomNodeDistance[i] > maxDistance) { + maxDistance = bottomNodeDistance[i]; + firstNode = i; } } } } if (perm) { - cm_order[first_node] = 0; + cmOrder[firstNode] = 0; } else { - cm_order[0] = first_node; + cmOrder[0] = firstNode; } std::unordered_set visited; - visited.insert(first_node); + visited.insert(firstNode); - std::vector current_level; - current_level.reserve(dag.in_degree(first_node) + dag.out_degree(first_node)); + std::vector currentLevel; + currentLevel.reserve(dag.InDegree(firstNode) + dag.OutDegree(firstNode)); - for (const auto &child : dag.children(first_node)) { - current_level.push_back(cm_vertex(child, dag.in_degree(child) + dag.out_degree(child), 0)); + for (const auto &child : dag.Children(firstNode)) { + currentLevel.push_back(CmVertex(child, dag.InDegree(child) + dag.OutDegree(child), 0)); visited.insert(child); } - for (const auto &parent : dag.parents(first_node)) { - current_level.push_back(cm_vertex(parent, dag.in_degree(parent) + dag.out_degree(parent), 0)); + for (const auto &parent : dag.Parents(firstNode)) { + currentLevel.push_back(CmVertex(parent, dag.InDegree(parent) + dag.OutDegree(parent), 0)); visited.insert(parent); } - VertexType node_counter = 1; - while (node_counter < dag.num_vertices()) { - std::sort(current_level.begin(), current_level.end()); + VertexType nodeCounter = 1; + while (nodeCounter < dag.NumVertices()) { + std::sort(currentLevel.begin(), currentLevel.end()); if (perm) { - for (VertexType i = 0; i < current_level.size(); i++) { - cm_order[current_level[i].vertex] = node_counter + i; + for (VertexType i = 0; i < currentLevel.size(); i++) { + cmOrder[currentLevel[i].vertex_] = nodeCounter + i; } } else { - for (VertexType i = 0; i < current_level.size(); i++) { - cm_order[node_counter + i] = current_level[i].vertex; + for (VertexType i = 0; i < currentLevel.size(); i++) { + cmOrder[nodeCounter + i] = currentLevel[i].vertex_; } } - if (node_counter + current_level.size() == dag.num_vertices()) { + if (nodeCounter + currentLevel.size() == dag.NumVertices()) { break; } - std::unordered_map node_priority; + std::unordered_map nodePriority; - for (VertexType i = 0; i < current_level.size(); i++) { - for (const auto &child : dag.children(current_level[i].vertex)) { + for (VertexType i = 0; i < currentLevel.size(); i++) { + for (const auto &child : dag.Children(currentLevel[i].vertex_)) { if (visited.find(child) == visited.end()) { - if (node_priority.find(child) == node_priority.end()) { - node_priority[child] = node_counter + i; + if (nodePriority.find(child) == nodePriority.end()) { + nodePriority[child] = nodeCounter + i; } else { - node_priority[child] = std::min(node_priority[child], node_counter + i); + nodePriority[child] = std::min(nodePriority[child], nodeCounter + i); } } } - for (const auto &parent : dag.parents(current_level[i].vertex)) { + for (const auto &parent : dag.Parents(currentLevel[i].vertex_)) { if (visited.find(parent) == visited.end()) { - if (node_priority.find(parent) == node_priority.end()) { - node_priority[parent] = node_counter + i; + if (nodePriority.find(parent) == nodePriority.end()) { + nodePriority[parent] = nodeCounter + i; } else { - node_priority[parent] = std::min(node_priority[parent], node_counter + i); + nodePriority[parent] = std::min(nodePriority[parent], nodeCounter + i); } } } } - node_counter += current_level.size(); + nodeCounter += currentLevel.size(); - if (node_priority.empty()) { // the dag has more than one connected components + if (nodePriority.empty()) { // the dag has more than one connected components - unsigned max_distance = 0; - for (const auto [node, distance] : max_node_distances) { - if (visited.find(node) == visited.end() and distance > max_distance) { - max_distance = distance; - first_node = node; + unsigned maxDistance = 0; + for (const auto [node, distance] : maxNodeDistances) { + if (visited.find(node) == visited.end() and distance > maxDistance) { + maxDistance = distance; + firstNode = node; } } if (perm) { - cm_order[first_node] = node_counter; + cmOrder[firstNode] = nodeCounter; } else { - cm_order[node_counter] = first_node; + cmOrder[nodeCounter] = firstNode; } - visited.insert(first_node); + visited.insert(firstNode); - current_level.clear(); - current_level.reserve(dag.in_degree(first_node) + dag.out_degree(first_node)); + currentLevel.clear(); + currentLevel.reserve(dag.InDegree(firstNode) + dag.OutDegree(firstNode)); - for (const auto &child : dag.children(first_node)) { - current_level.push_back(cm_vertex(child, dag.in_degree(child) + dag.out_degree(child), node_counter)); + for (const auto &child : dag.Children(firstNode)) { + currentLevel.push_back(CmVertex(child, dag.InDegree(child) + dag.OutDegree(child), nodeCounter)); visited.insert(child); } - for (const auto &parent : dag.parents(first_node)) { - current_level.push_back(cm_vertex(parent, dag.in_degree(parent) + dag.out_degree(parent), node_counter)); + for (const auto &parent : dag.Parents(firstNode)) { + currentLevel.push_back(CmVertex(parent, dag.InDegree(parent) + dag.OutDegree(parent), nodeCounter)); visited.insert(parent); } - node_counter++; + nodeCounter++; } else { - current_level.clear(); - current_level.reserve(node_priority.size()); + currentLevel.clear(); + currentLevel.reserve(nodePriority.size()); - for (const auto &[node, priority] : node_priority) { - current_level.push_back(cm_vertex(node, dag.in_degree(node) + dag.out_degree(node), priority)); + for (const auto &[node, priority] : nodePriority) { + currentLevel.push_back(CmVertex(node, dag.InDegree(node) + dag.OutDegree(node), priority)); visited.insert(node); } } } - return cm_order; + return cmOrder; } // Cuthill-McKee Wavefront -template -inline std::vector> GetTopOrderCuthillMcKeeWavefront(const Graph_t &dag) { - std::vector> order; - if (dag.num_vertices() > 0) { - std::vector> priority = cuthill_mckee_wavefront(dag); - order.reserve(dag.num_vertices()); - for (const auto &v : priority_vec_top_sort_view(dag, priority)) { +template +inline std::vector> GetTopOrderCuthillMcKeeWavefront(const GraphT &dag) { + std::vector> order; + if (dag.NumVertices() > 0) { + std::vector> priority = CuthillMckeeWavefront(dag); + order.reserve(dag.NumVertices()); + for (const auto &v : PriorityVecTopSortView(dag, priority)) { order.push_back(v); } } @@ -268,13 +268,13 @@ inline std::vector> GetTopOrderCuthillMcKeeWavefront(const } // Cuthill-McKee Undirected -template -inline std::vector> GetTopOrderCuthillMcKeeUndirected(const Graph_t &dag) { - std::vector> order; - if (dag.num_vertices() > 0) { - std::vector> priority = cuthill_mckee_undirected(dag, true, true); - order.reserve(dag.num_vertices()); - for (const auto &v : priority_vec_top_sort_view(dag, priority)) { +template +inline std::vector> GetTopOrderCuthillMcKeeUndirected(const GraphT &dag) { + std::vector> order; + if (dag.NumVertices() > 0) { + std::vector> priority = CuthillMckeeUndirected(dag, true, true); + order.reserve(dag.NumVertices()); + for (const auto &v : PriorityVecTopSortView(dag, priority)) { order.push_back(v); } } diff --git a/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp b/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp index 845cc27d..18b0a4f5 100644 --- a/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp +++ b/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp @@ -32,18 +32,17 @@ limitations under the License. namespace osp { -template -std::vector> get_contractable_edges_from_poset_int_map(const std::vector &poset_int_map, - const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph_edge_desc concept"); +template +std::vector> GetContractableEdgesFromPosetIntMap(const std::vector &posetIntMap, const GraphT &graph) { + static_assert(isDirectedGraphEdgeDescV, "GraphT must satisfy the directed_graph_edge_desc concept"); - std::vector> output; + std::vector> output; - for (const auto &edge : edges(graph)) { - vertex_idx_t src = source(edge, graph); - vertex_idx_t tgt = target(edge, graph); + for (const auto &edge : Edges(graph)) { + VertexIdxT src = Source(edge, graph); + VertexIdxT tgt = Target(edge, graph); - if (poset_int_map[tgt] == poset_int_map[src] + 1) { + if (posetIntMap[tgt] == posetIntMap[src] + 1) { output.emplace_back(edge); } } diff --git a/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp b/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp index ff0ff192..5d0de05c 100644 --- a/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp +++ b/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp @@ -26,47 +26,45 @@ limitations under the License. namespace osp { -template -std::pair, bool> edge_desc(const vertex_idx_t &src, - const vertex_idx_t &dest, - const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph edge desc concept"); - - for (const auto &edge : out_edges(src, graph)) { - if (target(edge, graph) == dest) { +template +std::pair, bool> EdgeDesc(const VertexIdxT &src, const VertexIdxT &dest, const GraphT &graph) { + static_assert(isDirectedGraphEdgeDescV, "GraphT must satisfy the directed_graph edge desc concept"); + + for (const auto &edge : OutEdges(src, graph)) { + if (Target(edge, graph) == dest) { return {edge, true}; } } - return {edge_desc_t(), false}; + return {EdgeDescT(), false}; } -template -std::unordered_set> long_edges_in_triangles(const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph edge desc concept"); - static_assert(has_hashable_edge_desc_v, "Graph_t must satisfy the has_hashable_edge_desc concept"); +template +std::unordered_set> LongEdgesInTriangles(const GraphT &graph) { + static_assert(isDirectedGraphEdgeDescV, "GraphT must satisfy the directed_graph edge desc concept"); + static_assert(hasHashableEdgeDescV, "GraphT must satisfy the HasHashableEdgeDesc concept"); - std::unordered_set> long_edges; + std::unordered_set> longEdges; - for (const auto &vertex : graph.vertices()) { - std::unordered_set> children_set; + for (const auto &vertex : graph.Vertices()) { + std::unordered_set> childrenSet; - for (const auto &v : graph.children(vertex)) { - children_set.emplace(v); + for (const auto &v : graph.Children(vertex)) { + childrenSet.emplace(v); } - for (const auto &edge : out_edges(vertex, graph)) { - const auto &child = target(edge, graph); + for (const auto &edge : OutEdges(vertex, graph)) { + const auto &child = Target(edge, graph); - for (const auto &parent : graph.parents(child)) { - if (children_set.find(parent) != children_set.cend()) { - long_edges.emplace(edge); + for (const auto &parent : graph.Parents(child)) { + if (childrenSet.find(parent) != childrenSet.cend()) { + longEdges.emplace(edge); break; } } } } - return long_edges; + return longEdges; } } // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp b/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp index af73869b..7c7444a0 100644 --- a/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp +++ b/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp @@ -29,48 +29,48 @@ limitations under the License. namespace osp { -template -std::unordered_set> long_edges_in_triangles_parallel(const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph edge desc concept"); - static_assert(has_hashable_edge_desc_v, "Graph_t must satisfy the has_hashable_edge_desc concept"); +template +std::unordered_set> LongEdgesInTrianglesParallel(const GraphT &graph) { + static_assert(isDirectedGraphEdgeDescV, "GraphT must satisfy the directed_graph edge desc concept"); + static_assert(hasHashableEdgeDescV, "GraphT must satisfy the HasHashableEdgeDesc concept"); - if (graph.num_edges() < 1000) { - return long_edges_in_triangles(graph); + if (graph.NumEdges() < 1000) { + return LongEdgesInTriangles(graph); } - std::unordered_set> long_edges; - std::vector>> deleted_edges_thread(static_cast(omp_get_max_threads())); + std::unordered_set> longEdges; + std::vector>> deletedEdgesThread(static_cast(omp_get_max_threads())); #pragma omp parallel for schedule(dynamic, 4) - for (vertex_idx_t vertex = 0; vertex < graph.num_vertices(); ++vertex) { - // for (const auto &vertex : graph.vertices()) { + for (VertexIdxT vertex = 0; vertex < graph.NumVertices(); ++vertex) { + // for (const auto &vertex : graph.Vertices()) { const unsigned int proc = static_cast(omp_get_thread_num()); - std::unordered_set> children_set; - for (const auto &v : graph.children(vertex)) { - children_set.emplace(v); + std::unordered_set> childrenSet; + for (const auto &v : graph.Children(vertex)) { + childrenSet.emplace(v); } - for (const auto &edge : out_edges(vertex, graph)) { - const auto &child = target(edge, graph); + for (const auto &edge : OutEdges(vertex, graph)) { + const auto &child = Target(edge, graph); - for (const auto &parent : graph.parents(child)) { - if (children_set.find(parent) != children_set.cend()) { - deleted_edges_thread[proc].emplace_back(edge); + for (const auto &parent : graph.Parents(child)) { + if (childrenSet.find(parent) != childrenSet.cend()) { + deletedEdgesThread[proc].emplace_back(edge); break; } } } } - for (const auto &edges_thread : deleted_edges_thread) { - for (const auto &edge : edges_thread) { - long_edges.emplace(edge); + for (const auto &edgesThread : deletedEdgesThread) { + for (const auto &edge : edgesThread) { + longEdges.emplace(edge); } } - return long_edges; + return longEdges; } } // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_edge_view.hpp b/include/osp/graph_algorithms/directed_graph_edge_view.hpp index ae925384..a9ded209 100644 --- a/include/osp/graph_algorithms/directed_graph_edge_view.hpp +++ b/include/osp/graph_algorithms/directed_graph_edge_view.hpp @@ -30,40 +30,40 @@ namespace osp { * The iteration order is lexicographical with respect to (source, target) pairs, determined by * the order of vertices and their adjacency lists. * - * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph_v` concept. + * @tparam GraphT The type of the graph, which must satisfy the `is_directed_graph_v` concept. */ -template -class edge_view { +template +class EdgeView { private: - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph_; + const GraphT &graph_; - template + template class DirectedEdgeIterator { public: using iterator_category = std::forward_iterator_tag; using difference_type = std::ptrdiff_t; - using value_type = directed_edge; + using value_type = DirectedEdge; using pointer = value_type *; using reference = value_type &; - struct arrow_proxy { - value_type value; + struct ArrowProxy { + value_type value_; - const value_type *operator->() const noexcept { return &value; } + const value_type *operator->() const noexcept { return &value_; } }; private: - const Graph_t *graph_; // Pointer to the graph - vertex_idx_t currentVertex_; // Current source vertex - child_iterator_t currentChild_; // Iterator to the current target vertex in current_vertex's adjacency list - vertex_idx_t currentEdgeIdx_; // Global index of the current edge in the traversal order - - void advanceToValid() { - while (currentVertex_ != graph_->num_vertices()) { - if (graph_->children(currentVertex_).begin() != graph_->children(currentVertex_).end()) { - currentChild_ = graph_->children(currentVertex_).begin(); + const GraphT *graph_; // Pointer to the graph + VertexIdxT currentVertex_; // Current source vertex + ChildIteratorT currentChild_; // Iterator to the current target vertex in current_vertex's adjacency list + VertexIdxT currentEdgeIdx_; // Global index of the current edge in the traversal order + + void AdvanceToValid() { + while (currentVertex_ != graph_->NumVertices()) { + if (graph_->Children(currentVertex_).begin() != graph_->Children(currentVertex_).end()) { + currentChild_ = graph_->Children(currentVertex_).begin(); break; } currentVertex_++; @@ -79,23 +79,23 @@ class edge_view { DirectedEdgeIterator &operator=(const DirectedEdgeIterator &other) = default; DirectedEdgeIterator &operator=(DirectedEdgeIterator &&other) noexcept = default; - explicit DirectedEdgeIterator(const Graph_t &graph1) : graph_(&graph1), currentVertex_(0), currentEdgeIdx_(0) { - advanceToValid(); + explicit DirectedEdgeIterator(const GraphT &graph1) : graph_(&graph1), currentVertex_(0), currentEdgeIdx_(0) { + AdvanceToValid(); } - DirectedEdgeIterator(const vertex_idx_t edge_idx, const Graph_t &graph1) - : graph_(&graph1), currentVertex_(0), currentEdgeIdx_(edge_idx) { - if (currentEdgeIdx_ >= graph_->num_edges()) { - currentEdgeIdx_ = graph_->num_edges(); - currentVertex_ = graph_->num_vertices(); + DirectedEdgeIterator(const VertexIdxT edgeIdx, const GraphT &graph1) + : graph_(&graph1), currentVertex_(0), currentEdgeIdx_(edgeIdx) { + if (currentEdgeIdx_ >= graph_->NumEdges()) { + currentEdgeIdx_ = graph_->NumEdges(); + currentVertex_ = graph_->NumVertices(); return; } - vertex_idx_t currentAccumulatedEdges = 0; + VertexIdxT currentAccumulatedEdges = 0; // Optimization: Skip vertices entirely if their degree is small enough - while (currentVertex_ < graph_->num_vertices()) { - const auto degree = graph_->out_degree(currentVertex_); + while (currentVertex_ < graph_->NumVertices()) { + const auto degree = graph_->OutDegree(currentVertex_); if (currentAccumulatedEdges + degree > currentEdgeIdx_) { break; } @@ -104,23 +104,23 @@ class edge_view { } // Initialize child iterator and advance within the specific vertex - if (currentVertex_ < graph_->num_vertices()) { - currentChild_ = graph_->children(currentVertex_).begin(); + if (currentVertex_ < graph_->NumVertices()) { + currentChild_ = graph_->Children(currentVertex_).begin(); std::advance(currentChild_, currentEdgeIdx_ - currentAccumulatedEdges); } } [[nodiscard]] value_type operator*() const { return {currentVertex_, *currentChild_}; } - [[nodiscard]] arrow_proxy operator->() const { return {operator*()}; } + [[nodiscard]] ArrowProxy operator->() const { return {operator*()}; } DirectedEdgeIterator &operator++() { currentChild_++; currentEdgeIdx_++; - if (currentChild_ == graph_->children(currentVertex_).end()) { + if (currentChild_ == graph_->Children(currentVertex_).end()) { currentVertex_++; - advanceToValid(); + AdvanceToValid(); } return *this; } @@ -140,23 +140,23 @@ class edge_view { public: using DirEdgeIterator - = DirectedEdgeIterator().children(std::declval>()).begin())>; - using iterator = DirEdgeIterator; - using constIterator = DirEdgeIterator; + = DirectedEdgeIterator().Children(std::declval>()).begin())>; + using Iterator = DirEdgeIterator; + using ConstIterator = DirEdgeIterator; - explicit edge_view(const Graph_t &graph) : graph_(graph) {} + explicit EdgeView(const GraphT &graph) : graph_(graph) {} [[nodiscard]] auto begin() const { return DirEdgeIterator(graph_); } [[nodiscard]] auto cbegin() const { return DirEdgeIterator(graph_); } - [[nodiscard]] auto end() const { return DirEdgeIterator(graph_.num_edges(), graph_); } + [[nodiscard]] auto end() const { return DirEdgeIterator(graph_.NumEdges(), graph_); } - [[nodiscard]] auto cend() const { return DirEdgeIterator(graph_.num_edges(), graph_); } + [[nodiscard]] auto cend() const { return DirEdgeIterator(graph_.NumEdges(), graph_); } - [[nodiscard]] auto size() const { return graph_.num_edges(); } + [[nodiscard]] auto size() const { return graph_.NumEdges(); } - [[nodiscard]] bool empty() const { return graph_.num_edges() == 0; } + [[nodiscard]] bool empty() const { return graph_.NumEdges() == 0; } }; /** @@ -165,50 +165,50 @@ class edge_view { * This class provides an iterator-based view to iterate over either outgoing or incoming edges * of a given vertex. It is a lightweight, non-owning view. * - * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph_v` concept. + * @tparam GraphT The type of the graph, which must satisfy the `is_directed_graph_v` concept. * @tparam IsOutgoing If true, iterates over outgoing edges; otherwise, incoming edges. */ -template +template class IncidentEdgeView { private: - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph_; - vertex_idx_t anchorVertex_; + const GraphT &graph_; + VertexIdxT anchorVertex_; - template + template class IncidentEdgeIterator { public: - using iterator_category = typename std::iterator_traits::iterator_category; + using iterator_category = typename std::iterator_traits::iterator_category; using difference_type = std::ptrdiff_t; - using value_type = directed_edge; + using value_type = DirectedEdge; using pointer = value_type *; using reference = value_type &; - struct arrow_proxy { - value_type value; + struct ArrowProxy { + value_type value_; - const value_type *operator->() const noexcept { return &value; } + const value_type *operator->() const noexcept { return &value_; } }; private: - vertex_idx_t anchorVertex_; - child_iterator_t currentIt_; + VertexIdxT anchorVertex_; + ChildIteratorT currentIt_; public: IncidentEdgeIterator() = default; - IncidentEdgeIterator(vertex_idx_t u, child_iterator_t it) : anchorVertex_(u), currentIt_(it) {} + IncidentEdgeIterator(VertexIdxT u, ChildIteratorT it) : anchorVertex_(u), currentIt_(it) {} [[nodiscard]] value_type operator*() const { - if constexpr (IsOutgoing) { + if constexpr (isOutgoing) { return {anchorVertex_, *currentIt_}; } else { return {*currentIt_, anchorVertex_}; } } - [[nodiscard]] arrow_proxy operator->() const { return {operator*()}; } + [[nodiscard]] ArrowProxy operator->() const { return {operator*()}; } IncidentEdgeIterator &operator++() { ++currentIt_; @@ -238,50 +238,50 @@ class IncidentEdgeView { }; // Helper to deduce iterator type based on direction - using base_iterator_type - = std::conditional_t().children(std::declval>()).begin()), - decltype(std::declval().parents(std::declval>()).begin())>; + using BaseIteratorType + = std::conditional_t().Children(std::declval>()).begin()), + decltype(std::declval().Parents(std::declval>()).begin())>; public: - using iterator = IncidentEdgeIterator; - using constIterator = iterator; + using Iterator = IncidentEdgeIterator; + using ConstIterator = Iterator; - IncidentEdgeView(const Graph_t &graph, vertex_idx_t u) : graph_(graph), anchorVertex_(u) {} + IncidentEdgeView(const GraphT &graph, VertexIdxT u) : graph_(graph), anchorVertex_(u) {} [[nodiscard]] auto begin() const { - if constexpr (IsOutgoing) { - return iterator(anchorVertex_, graph_.children(anchorVertex_).begin()); + if constexpr (isOutgoing) { + return Iterator(anchorVertex_, graph_.Children(anchorVertex_).begin()); } else { - return iterator(anchorVertex_, graph_.parents(anchorVertex_).begin()); + return Iterator(anchorVertex_, graph_.Parents(anchorVertex_).begin()); } } [[nodiscard]] auto cbegin() const { return begin(); } [[nodiscard]] auto end() const { - if constexpr (IsOutgoing) { - return iterator(anchorVertex_, graph_.children(anchorVertex_).end()); + if constexpr (isOutgoing) { + return Iterator(anchorVertex_, graph_.Children(anchorVertex_).end()); } else { - return iterator(anchorVertex_, graph_.parents(anchorVertex_).end()); + return Iterator(anchorVertex_, graph_.Parents(anchorVertex_).end()); } } [[nodiscard]] auto cend() const { return end(); } [[nodiscard]] auto size() const { - if constexpr (IsOutgoing) { - return graph_.out_degree(anchorVertex_); + if constexpr (isOutgoing) { + return graph_.OutDegree(anchorVertex_); } else { - return graph_.in_degree(anchorVertex_); + return graph_.InDegree(anchorVertex_); } } [[nodiscard]] bool empty() const { - if constexpr (IsOutgoing) { - return graph_.out_degree(anchorVertex_) == 0; + if constexpr (isOutgoing) { + return graph_.OutDegree(anchorVertex_) == 0; } else { - return graph_.in_degree(anchorVertex_) == 0; + return graph_.InDegree(anchorVertex_) == 0; } } }; @@ -289,13 +289,13 @@ class IncidentEdgeView { /** * @brief A view over the outgoing edges of a specific vertex in a directed graph. */ -template -using OutEdgeView = IncidentEdgeView; +template +using OutEdgeView = IncidentEdgeView; /** * @brief A view over the incoming edges of a specific vertex in a directed graph. */ -template -using InEdgeView = IncidentEdgeView; +template +using InEdgeView = IncidentEdgeView; } // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_path_util.hpp b/include/osp/graph_algorithms/directed_graph_path_util.hpp index a1675964..37733275 100644 --- a/include/osp/graph_algorithms/directed_graph_path_util.hpp +++ b/include/osp/graph_algorithms/directed_graph_path_util.hpp @@ -39,17 +39,17 @@ namespace osp { * This function performs a Breadth-First Search (BFS) starting from the `src` * vertex to determine if the `dest` vertex is reachable. * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param src The source vertex. * @param dest The destination vertex. * @param graph The graph to search in. * @return true if a path exists from src to dest, false otherwise. */ -template -bool has_path(const vertex_idx_t src, const vertex_idx_t dest, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +bool HasPath(const VertexIdxT src, const VertexIdxT dest, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - for (const auto &child : bfs_view(graph, src)) { + for (const auto &child : BfsView(graph, src)) { if (child == dest) { return true; } @@ -58,45 +58,45 @@ bool has_path(const vertex_idx_t src, const vertex_idx_t dest, return false; } -template -std::size_t longestPath(const std::set> &vertices, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::size_t LongestPath(const std::set> &vertices, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::queue bfs_queue; - std::map distances, in_degrees, visit_counter; + std::queue bfsQueue; + std::map distances, inDegrees, visitCounter; // Find source nodes for (const VertexType &node : vertices) { unsigned indeg = 0; - for (const VertexType &parent : graph.parents(node)) { + for (const VertexType &parent : graph.Parents(node)) { if (vertices.count(parent) == 1) { ++indeg; } } if (indeg == 0) { - bfs_queue.push(node); + bfsQueue.push(node); distances[node] = 0; } - in_degrees[node] = indeg; - visit_counter[node] = 0; + inDegrees[node] = indeg; + visitCounter[node] = 0; } // Execute BFS - while (!bfs_queue.empty()) { - const VertexType current = bfs_queue.front(); - bfs_queue.pop(); + while (!bfsQueue.empty()) { + const VertexType current = bfsQueue.front(); + bfsQueue.pop(); - for (const VertexType &child : graph.children(current)) { + for (const VertexType &child : graph.Children(current)) { if (vertices.count(child) == 0) { continue; } - ++visit_counter[child]; - if (visit_counter[child] == in_degrees[child]) { - bfs_queue.push(child); + ++visitCounter[child]; + if (visitCounter[child] == inDegrees[child]) { + bfsQueue.push(child); distances[child] = distances[current] + 1; } } @@ -107,80 +107,80 @@ std::size_t longestPath(const std::set> &vertices, const G }); } -template -std::size_t longestPath(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::size_t LongestPath(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::size_t max_edgecount = 0; - std::queue bfs_queue; - std::vector distances(graph.num_vertices(), 0), visit_counter(graph.num_vertices(), 0); + std::size_t maxEdgecount = 0; + std::queue bfsQueue; + std::vector distances(graph.NumVertices(), 0), visitCounter(graph.NumVertices(), 0); // Find source nodes - for (const auto &node : source_vertices_view(graph)) { - bfs_queue.push(node); + for (const auto &node : SourceVerticesView(graph)) { + bfsQueue.push(node); } // Execute BFS - while (!bfs_queue.empty()) { - const VertexType current = bfs_queue.front(); - bfs_queue.pop(); - - for (const VertexType &child : graph.children(current)) { - ++visit_counter[child]; - if (visit_counter[child] == graph.in_degree(child)) { - bfs_queue.push(child); + while (!bfsQueue.empty()) { + const VertexType current = bfsQueue.front(); + bfsQueue.pop(); + + for (const VertexType &child : graph.Children(current)) { + ++visitCounter[child]; + if (visitCounter[child] == graph.InDegree(child)) { + bfsQueue.push(child); distances[child] = distances[current] + 1; - max_edgecount = std::max(max_edgecount, distances[child]); + maxEdgecount = std::max(maxEdgecount, distances[child]); } } } - return max_edgecount; + return maxEdgecount; } -template -std::vector> longestChain(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::vector> LongestChain(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; std::vector chain; - if (graph.num_vertices() == 0) { + if (graph.NumVertices() == 0) { return chain; } - std::vector top_length(graph.num_vertices(), 0); - unsigned running_longest_chain = 0; + std::vector topLength(graph.NumVertices(), 0); + unsigned runningLongestChain = 0; - VertexType end_longest_chain = 0; + VertexType endLongestChain = 0; // calculating lenght of longest path - for (const VertexType &node : top_sort_view(graph)) { - unsigned max_temp = 0; - for (const auto &parent : graph.parents(node)) { - max_temp = std::max(max_temp, top_length[parent]); + for (const VertexType &node : TopSortView(graph)) { + unsigned maxTemp = 0; + for (const auto &parent : graph.Parents(node)) { + maxTemp = std::max(maxTemp, topLength[parent]); } - top_length[node] = max_temp + 1; - if (top_length[node] > running_longest_chain) { - end_longest_chain = node; - running_longest_chain = top_length[node]; + topLength[node] = maxTemp + 1; + if (topLength[node] > runningLongestChain) { + endLongestChain = node; + runningLongestChain = topLength[node]; } } // reconstructing longest path - chain.push_back(end_longest_chain); - while (graph.in_degree(end_longest_chain) != 0) { - for (const VertexType &in_node : graph.parents(end_longest_chain)) { - if (top_length[in_node] != top_length[end_longest_chain] - 1) { + chain.push_back(endLongestChain); + while (graph.InDegree(endLongestChain) != 0) { + for (const VertexType &inNode : graph.Parents(endLongestChain)) { + if (topLength[inNode] != topLength[endLongestChain] - 1) { continue; } - end_longest_chain = in_node; - chain.push_back(end_longest_chain); + endLongestChain = inNode; + chain.push_back(endLongestChain); break; } } @@ -189,169 +189,169 @@ std::vector> longestChain(const Graph_t &graph) { return chain; } -template -std::vector get_bottom_node_distance(const Graph_t &graph) { +template +std::vector GetBottomNodeDistance(const GraphT &graph) { static_assert(std::is_integral_v, "T must be of integral type"); - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - std::vector bottom_distance(graph.num_vertices(), 0); + std::vector bottomDistance(graph.NumVertices(), 0); - const auto top_order = GetTopOrder(graph); - for (std::size_t i = top_order.size() - 1; i < top_order.size(); i--) { - T max_temp = 0; - for (const auto &j : graph.children(top_order[i])) { - max_temp = std::max(max_temp, bottom_distance[j]); + const auto topOrder = GetTopOrder(graph); + for (std::size_t i = topOrder.size() - 1; i < topOrder.size(); i--) { + T maxTemp = 0; + for (const auto &j : graph.Children(topOrder[i])) { + maxTemp = std::max(maxTemp, bottomDistance[j]); } - bottom_distance[top_order[i]] = ++max_temp; + bottomDistance[topOrder[i]] = ++maxTemp; } - return bottom_distance; + return bottomDistance; } -template -std::vector get_top_node_distance(const Graph_t &graph) { +template +std::vector GetTopNodeDistance(const GraphT &graph) { static_assert(std::is_integral_v, "T must be of integral type"); - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - std::vector top_distance(graph.num_vertices(), 0); + std::vector topDistance(graph.NumVertices(), 0); - for (const auto &vertex : bfs_top_sort_view(graph)) { - T max_temp = 0; - for (const auto &j : graph.parents(vertex)) { - max_temp = std::max(max_temp, top_distance[j]); + for (const auto &vertex : BfsTopSortView(graph)) { + T maxTemp = 0; + for (const auto &j : graph.Parents(vertex)) { + maxTemp = std::max(maxTemp, topDistance[j]); } - top_distance[vertex] = ++max_temp; + topDistance[vertex] = ++maxTemp; } - return top_distance; + return topDistance; } -template -std::vector>> compute_wavefronts(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::vector>> ComputeWavefronts(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - std::vector>> wavefronts; - std::vector> parents_visited(graph.num_vertices(), 0); + std::vector>> wavefronts; + std::vector> parentsVisited(graph.NumVertices(), 0); - wavefronts.push_back(std::vector>()); - for (const auto &vertex : graph.vertices()) { - if (graph.in_degree(vertex) == 0) { + wavefronts.push_back(std::vector>()); + for (const auto &vertex : graph.Vertices()) { + if (graph.InDegree(vertex) == 0) { wavefronts.back().push_back(vertex); } else { - parents_visited[vertex] = static_cast>(graph.in_degree(vertex)); + parentsVisited[vertex] = static_cast>(graph.InDegree(vertex)); } } - vertex_idx_t counter = static_cast>(wavefronts.back().size()); + VertexIdxT counter = static_cast>(wavefronts.back().size()); - while (counter < graph.num_vertices()) { - std::vector> next_wavefront; - for (const auto &v_prev_wavefront : wavefronts.back()) { - for (const auto &child : graph.children(v_prev_wavefront)) { - parents_visited[child]--; - if (parents_visited[child] == 0) { - next_wavefront.push_back(child); + while (counter < graph.NumVertices()) { + std::vector> nextWavefront; + for (const auto &vPrevWavefront : wavefronts.back()) { + for (const auto &child : graph.Children(vPrevWavefront)) { + parentsVisited[child]--; + if (parentsVisited[child] == 0) { + nextWavefront.push_back(child); counter++; } } } - wavefronts.push_back(next_wavefront); + wavefronts.push_back(nextWavefront); } return wavefronts; } -template -std::vector get_strict_poset_integer_map(unsigned const noise, double const poisson_param, const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph_edge_desc concept"); +template +std::vector GetStrictPosetIntegerMap(unsigned const noise, double const poissonParam, const GraphT &graph) { + static_assert(isDirectedGraphEdgeDescV, "GraphT must satisfy the directed_graph_edge_desc concept"); if (noise > static_cast(std::numeric_limits::max())) { throw std::overflow_error("Overflow in get_strict_poset_integer_map"); } - using VertexType = vertex_idx_t; - using EdgeType = edge_desc_t; + using VertexType = VertexIdxT; + using EdgeType = EdgeDescT; - std::vector top_order = GetTopOrder(graph); + std::vector topOrder = GetTopOrder(graph); - Repeat_Chance repeater_coin; + RepeatChance repeaterCoin; - std::unordered_map up_or_down; + std::unordered_map upOrDown; - for (const auto &edge : edges(graph)) { - up_or_down.emplace(edge, repeater_coin.get_flip()); + for (const auto &edge : Edges(graph)) { + upOrDown.emplace(edge, repeaterCoin.GetFlip()); } std::random_device rd; std::mt19937 gen(rd()); - std::poisson_distribution<> poisson_gen(poisson_param + 1.0e-12); + std::poisson_distribution<> poissonGen(poissonParam + 1.0e-12); - std::vector top_distance = get_top_node_distance(graph); - std::vector bot_distance = get_bottom_node_distance(graph); - std::vector new_top(graph.num_vertices(), 0); - std::vector new_bot(graph.num_vertices(), 0); + std::vector topDistance = GetTopNodeDistance(graph); + std::vector botDistance = GetBottomNodeDistance(graph); + std::vector newTop(graph.NumVertices(), 0); + std::vector newBot(graph.NumVertices(), 0); - unsigned max_path = 0; - for (const auto &vertex : graph.vertices()) { - max_path = std::max(max_path, top_distance[vertex]); + unsigned maxPath = 0; + for (const auto &vertex : graph.Vertices()) { + maxPath = std::max(maxPath, topDistance[vertex]); } - for (const auto &source : source_vertices_view(graph)) { - if (max_path - bot_distance[source] + 1U + 2U * noise > static_cast(std::numeric_limits::max())) { + for (const auto &source : SourceVertices(graph)) { + if (maxPath - botDistance[source] + 1U + 2U * noise > static_cast(std::numeric_limits::max())) { throw std::overflow_error("Overflow in get_strict_poset_integer_map"); } - new_top[source] = randInt(static_cast(max_path - bot_distance[source] + 1 + 2 * noise)) - static_cast(noise); + newTop[source] = RandInt(static_cast(maxPath - botDistance[source] + 1 + 2 * noise)) - static_cast(noise); } - for (const auto &sink : sink_vertices_view(graph)) { - if (max_path - top_distance[sink] + 1U + 2U * noise > static_cast(std::numeric_limits::max())) { + for (const auto &sink : SinkVertices(graph)) { + if (maxPath - topDistance[sink] + 1U + 2U * noise > static_cast(std::numeric_limits::max())) { throw std::overflow_error("Overflow in get_strict_poset_integer_map"); } - new_bot[sink] = randInt(static_cast(max_path - top_distance[sink] + 1U + 2U * noise)) - static_cast(noise); + newBot[sink] = RandInt(static_cast(maxPath - topDistance[sink] + 1U + 2U * noise)) - static_cast(noise); } - for (const auto &vertex : top_order) { - if (is_source(vertex, graph)) { + for (const auto &vertex : topOrder) { + if (IsSource(vertex, graph)) { continue; } - int max_temp = std::numeric_limits::min(); + int maxTemp = std::numeric_limits::min(); - for (const auto &edge : in_edges(vertex, graph)) { - int temp = new_top[source(edge, graph)]; - if (up_or_down.at(edge)) { - if (poisson_param <= 0.0) { + for (const auto &edge : InEdges(vertex, graph)) { + int temp = newTop[Source(edge, graph)]; + if (upOrDown.at(edge)) { + if (poissonParam <= 0.0) { temp += 1; } else { - temp += 1 + poisson_gen(gen); + temp += 1 + poissonGen(gen); } } - max_temp = std::max(max_temp, temp); + maxTemp = std::max(maxTemp, temp); } - new_top[vertex] = max_temp; + newTop[vertex] = maxTemp; } - for (std::reverse_iterator iter = top_order.crbegin(); iter != top_order.crend(); ++iter) { - if (is_sink(*iter, graph)) { + for (std::reverse_iterator iter = topOrder.crbegin(); iter != topOrder.crend(); ++iter) { + if (IsSink(*iter, graph)) { continue; } - int max_temp = std::numeric_limits::min(); + int maxTemp = std::numeric_limits::min(); - for (const auto &edge : out_edges(*iter, graph)) { - int temp = new_bot[target(edge, graph)]; - if (!up_or_down.at(edge)) { - temp += 1 + poisson_gen(gen); + for (const auto &edge : OutEdges(*iter, graph)) { + int temp = newBot[Target(edge, graph)]; + if (!upOrDown.at(edge)) { + temp += 1 + poissonGen(gen); } - max_temp = std::max(max_temp, temp); + maxTemp = std::max(maxTemp, temp); } - new_bot[*iter] = max_temp; + newBot[*iter] = maxTemp; } - std::vector output(graph.num_vertices()); - for (unsigned i = 0; i < graph.num_vertices(); i++) { - output[i] = new_top[i] - new_bot[i]; + std::vector output(graph.NumVertices()); + for (unsigned i = 0; i < graph.NumVertices(); i++) { + output[i] = newTop[i] - newBot[i]; } return output; } diff --git a/include/osp/graph_algorithms/directed_graph_top_sort.hpp b/include/osp/graph_algorithms/directed_graph_top_sort.hpp index f8d24124..c074597d 100644 --- a/include/osp/graph_algorithms/directed_graph_top_sort.hpp +++ b/include/osp/graph_algorithms/directed_graph_top_sort.hpp @@ -51,16 +51,16 @@ namespace osp { /** * @brief Checks if the natural order of the vertices is a topological order. * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param graph The graph to check. * @return true if the vertices are in topological order, false otherwise. */ -template -bool checkNodesInTopologicalOrder(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +bool CheckNodesInTopologicalOrder(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - for (const auto &node : graph.vertices()) { - for (const auto &child : graph.children(node)) { + for (const auto &node : graph.Vertices()) { + for (const auto &child : graph.Children(node)) { if (child < node) { return false; } @@ -70,24 +70,24 @@ bool checkNodesInTopologicalOrder(const Graph_t &graph) { return true; } -template -std::vector> GetTopOrder(const Graph_t &graph) { - if constexpr (has_vertices_in_top_order_v) { - std::vector> topOrd(graph.num_vertices()); - std::iota(topOrd.begin(), topOrd.end(), static_cast>(0)); +template +std::vector> GetTopOrder(const GraphT &graph) { + if constexpr (hasVerticesInTopOrderV) { + std::vector> topOrd(graph.NumVertices()); + std::iota(topOrd.begin(), topOrd.end(), static_cast>(0)); return topOrd; } else { - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::vector predecessors_count(graph.num_vertices(), 0); - std::vector TopOrder; - TopOrder.reserve(graph.num_vertices()); + std::vector predecessorsCount(graph.NumVertices(), 0); + std::vector topOrder; + topOrder.reserve(graph.NumVertices()); std::queue next; // Find source nodes - for (const VertexType &v : source_vertices_view(graph)) { + for (const VertexType &v : SourceVertices(graph)) { next.push(v); } @@ -95,106 +95,106 @@ std::vector> GetTopOrder(const Graph_t &graph) { while (!next.empty()) { const VertexType node = next.front(); next.pop(); - TopOrder.push_back(node); + topOrder.push_back(node); - for (const VertexType ¤t : graph.children(node)) { - ++predecessors_count[current]; - if (predecessors_count[current] == graph.in_degree(current)) { + for (const VertexType ¤t : graph.Children(node)) { + ++predecessorsCount[current]; + if (predecessorsCount[current] == graph.InDegree(current)) { next.push(current); } } } - if (static_cast(TopOrder.size()) != graph.num_vertices()) { - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" - + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (static_cast(topOrder.size()) != graph.NumVertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.NumVertices() [" + + std::to_string(topOrder.size()) + " != " + std::to_string(graph.NumVertices()) + "]"); } - return TopOrder; + return topOrder; } } -template -std::vector> GetTopOrderReverse(const Graph_t &graph) { - std::vector> TopOrder = GetTopOrder(graph); - std::reverse(TopOrder.begin(), TopOrder.end()); - return TopOrder; +template +std::vector> GetTopOrderReverse(const GraphT &graph) { + std::vector> topOrder = GetTopOrder(graph); + std::reverse(topOrder.begin(), topOrder.end()); + return topOrder; } -template -std::vector> GetTopOrderGorder(const Graph_t &graph) { +template +std::vector> GetTopOrderGorder(const GraphT &graph) { // Generating modified Gorder topological order cf. "Speedup Graph Processing by Graph Ordering" by Hao Wei, Jeffrey // Xu Yu, Can Lu, and Xuemin Lin - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::vector predecessors_count(graph.num_vertices(), 0); - std::vector TopOrder; - TopOrder.reserve(graph.num_vertices()); + std::vector predecessorsCount(graph.NumVertices(), 0); + std::vector topOrder; + topOrder.reserve(graph.NumVertices()); const double decay = 8.0; - std::vector priorities(graph.num_vertices(), 0.0); + std::vector priorities(graph.NumVertices(), 0.0); - auto v_cmp = [&priorities, &graph](const VertexType &lhs, const VertexType &rhs) { + auto vCmp = [&priorities, &graph](const VertexType &lhs, const VertexType &rhs) { return (priorities[lhs] < priorities[rhs]) - || ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) < graph.out_degree(rhs))) - || ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) == graph.out_degree(rhs)) && (lhs > rhs)); + || ((priorities[lhs] <= priorities[rhs]) && (graph.OutDegree(lhs) < graph.OutDegree(rhs))) + || ((priorities[lhs] <= priorities[rhs]) && (graph.OutDegree(lhs) == graph.OutDegree(rhs)) && (lhs > rhs)); }; - std::priority_queue, decltype(v_cmp)> ready_q(v_cmp); - for (const VertexType &vert : source_vertices_view(graph)) { - ready_q.push(vert); + std::priority_queue, decltype(vCmp)> readyQ(vCmp); + for (const VertexType &vert : SourceVertices(graph)) { + readyQ.push(vert); } - while (!ready_q.empty()) { - VertexType vert = ready_q.top(); - ready_q.pop(); + while (!readyQ.empty()) { + VertexType vert = readyQ.top(); + readyQ.pop(); - double pos = static_cast(TopOrder.size()); + double pos = static_cast(topOrder.size()); pos /= decay; - TopOrder.push_back(vert); + topOrder.push_back(vert); // update priorities - for (const VertexType &chld : graph.children(vert)) { - priorities[chld] = log_sum_exp(priorities[chld], pos); + for (const VertexType &chld : graph.Children(vert)) { + priorities[chld] = LogSumExp(priorities[chld], pos); } - for (const VertexType &par : graph.parents(vert)) { - for (const VertexType &sibling : graph.children(par)) { - priorities[sibling] = log_sum_exp(priorities[sibling], pos); + for (const VertexType &par : graph.Parents(vert)) { + for (const VertexType &sibling : graph.Children(par)) { + priorities[sibling] = LogSumExp(priorities[sibling], pos); } } - for (const VertexType &chld : graph.children(vert)) { - for (const VertexType &couple : graph.parents(chld)) { - priorities[couple] = log_sum_exp(priorities[couple], pos); + for (const VertexType &chld : graph.Children(vert)) { + for (const VertexType &couple : graph.Parents(chld)) { + priorities[couple] = LogSumExp(priorities[couple], pos); } } // update constraints and push to queue - for (const VertexType &chld : graph.children(vert)) { - ++predecessors_count[chld]; - if (predecessors_count[chld] == graph.in_degree(chld)) { - ready_q.push(chld); + for (const VertexType &chld : graph.Children(vert)) { + ++predecessorsCount[chld]; + if (predecessorsCount[chld] == graph.InDegree(chld)) { + readyQ.push(chld); } } } - if (TopOrder.size() != graph.num_vertices()) { - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" - + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (topOrder.size() != graph.NumVertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.NumVertices() [" + + std::to_string(topOrder.size()) + " != " + std::to_string(graph.NumVertices()) + "]"); } - return TopOrder; + return topOrder; } -template -std::vector> GetFilteredTopOrder(const std::vector &valid, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::vector> GetFilteredTopOrder(const std::vector &valid, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - std::vector> filteredOrder; + std::vector> filteredOrder; for (const auto &node : GetTopOrder(graph)) { if (valid[node]) { filteredOrder.push_back(node); @@ -211,448 +211,446 @@ std::vector> GetFilteredTopOrder(const std::vector & * provides the required interface for managing vertices during topological sorting. * * @tparam T The type of the container wrapper. - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. */ -template -struct is_container_wrapper { +template +struct IsContainerWrapper { private: template - static auto test(int) -> decltype(std::declval().push(std::declval>()), - std::declval().pop_next(), + static auto Test(int) -> decltype(std::declval().Push(std::declval>()), + std::declval().PopNext(), std::declval().empty(), std::true_type()); template - static std::false_type test(...); + static std::false_type Test(...); public: - static constexpr bool value = decltype(test(0))::value; + static constexpr bool value_ = decltype(Test(0))::value; }; -template -inline constexpr bool is_container_wrapper_v = is_container_wrapper::value; +template +inline constexpr bool isContainerWrapperV = IsContainerWrapper::value_; -template -struct top_sort_iterator { - static_assert(is_container_wrapper_v, - "container_wrapper must satisfy the container wrapper concept"); +template +struct TopSortIterator { + static_assert(isContainerWrapperV, "container_wrapper must satisfy the container wrapper concept"); - const Graph_t &graph; - container_wrapper &next; + const GraphT &graph_; + ContainerWrapper &next_; - vertex_idx_t current_vertex; + VertexIdxT currentVertex_; - std::vector> predecessors_count; + std::vector> predecessorsCount_; public: using iterator_category = std::input_iterator_tag; - using value_type = vertex_idx_t; + using value_type = VertexIdxT; using difference_type = std::ptrdiff_t; using pointer = const value_type *; using reference = const value_type &; - top_sort_iterator(const Graph_t &graph_, container_wrapper &next_, vertex_idx_t start) - : graph(graph_), next(next_), current_vertex(start), predecessors_count(graph_.num_vertices(), 0) { - if (current_vertex == graph.num_vertices()) { + TopSortIterator(const GraphT &graph, ContainerWrapper &next, VertexIdxT start) + : graph_(graph), next_(next), currentVertex_(start), predecessorsCount_(graph.NumVertices(), 0) { + if (currentVertex_ == graph_.NumVertices()) { return; } - for (const auto &v : graph.vertices()) { - if (is_source(v, graph)) { - next.push(v); + for (const auto &v : graph_.Vertices()) { + if (IsSource(v, graph_)) { + next_.Push(v); } else { - predecessors_count[v] = static_cast>(graph.in_degree(v)); + predecessorsCount_[v] = static_cast>(graph_.InDegree(v)); } } - current_vertex = next.pop_next(); + currentVertex_ = next_.PopNext(); - for (const auto &child : graph.children(current_vertex)) { - --predecessors_count[child]; - if (not predecessors_count[child]) { - next.push(child); + for (const auto &child : graph_.Children(currentVertex_)) { + --predecessorsCount_[child]; + if (not predecessorsCount_[child]) { + next_.Push(child); } } } - value_type operator*() const { return current_vertex; } + value_type operator*() const { return currentVertex_; } // Prefix increment - top_sort_iterator &operator++() { - if (next.empty()) { - current_vertex = graph.num_vertices(); + TopSortIterator &operator++() { + if (next_.empty()) { + currentVertex_ = graph_.NumVertices(); return *this; } - current_vertex = next.pop_next(); + currentVertex_ = next_.PopNext(); - for (const auto &child : graph.children(current_vertex)) { - --predecessors_count[child]; - if (not predecessors_count[child]) { - next.push(child); + for (const auto &child : graph_.Children(currentVertex_)) { + --predecessorsCount_[child]; + if (not predecessorsCount_[child]) { + next_.Push(child); } } return *this; } // Postfix increment - top_sort_iterator operator++(int) { - top_sort_iterator tmp = *this; + TopSortIterator operator++(int) { + TopSortIterator tmp = *this; ++(*this); return tmp; } - friend bool operator==(const top_sort_iterator &one, const top_sort_iterator &other) { - return one.current_vertex == other.current_vertex; + friend bool operator==(const TopSortIterator &one, const TopSortIterator &other) { + return one.currentVertex_ == other.currentVertex_; }; - friend bool operator!=(const top_sort_iterator &one, const top_sort_iterator &other) { - return one.current_vertex != other.current_vertex; + friend bool operator!=(const TopSortIterator &one, const TopSortIterator &other) { + return one.currentVertex_ != other.currentVertex_; }; }; /** - * @class top_sort_view + * @class TopSortView * @brief Provides a view for iterating over the vertices of a directed graph in topological order. * * This class supports two modes of iteration: - * 1. If the graph type `Graph_t` has a predefined topological order (determined by the - * `has_vertices_in_top_order_v` trait), the iteration will directly use the graph's vertices. + * 1. If the graph type `GraphT` has a predefined topological order (determined by the + * `hasVerticesInTopOrderV<` trait), the iteration will directly use the graph's vertices. * 2. Otherwise, it performs a topological sort using a depth-first search (DFS) stack wrapper. * - * @tparam Graph_t The type of the directed graph. Must satisfy the `is_directed_graph` concept. + * @tparam GraphT The type of the directed graph. Must satisfy the `is_directed_graph` concept. * */ -template -class top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class TopSortView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; - dfs_stack_wrapper vertex_container; + const GraphT &graph_; + DfsStackWrapper vertexContainer_; - using ts_iterator = top_sort_iterator>; + using TsIterator = TopSortIterator>; public: - top_sort_view(const Graph_t &graph_) : graph(graph_) {} + TopSortView(const GraphT &graph) : graph_(graph) {} auto begin() { - if constexpr (has_vertices_in_top_order_v) { - return graph.vertices().begin(); + if constexpr (hasVerticesInTopOrderV) { + return graph_.Vertices().begin(); } else { - return ts_iterator(graph, vertex_container, 0); + return TsIterator(graph_, vertexContainer_, 0); } } auto end() { - if constexpr (has_vertices_in_top_order_v) { - return graph.vertices().end(); + if constexpr (hasVerticesInTopOrderV) { + return graph_.Vertices().end(); } else { - return ts_iterator(graph, vertex_container, graph.num_vertices()); + return TsIterator(graph_, vertexContainer_, graph_.NumVertices()); } } }; /** - * @class dfs_top_sort_view + * @class DfsTopSortView * @brief Provides a view for performing a topological sort on a directed graph using depth-first search (DFS). * * This class is designed to work with graphs that satisfy the `directed_graph` concept. It uses a DFS-based * approach to generate a topological ordering of the vertices in the graph. * - * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph` concept. + * @tparam GraphT The type of the graph, which must satisfy the `is_directed_graph` concept. * */ -template -class dfs_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class DfsTopSortView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; - dfs_stack_wrapper vertex_container; + const GraphT &graph_; + DfsStackWrapper vertexContainer_; - using ts_iterator = top_sort_iterator>; + using TsIterator = TopSortIterator>; public: - dfs_top_sort_view(const Graph_t &graph_) : graph(graph_) {} + DfsTopSortView(const GraphT &graph) : graph_(graph) {} - auto begin() { return ts_iterator(graph, vertex_container, 0); } + auto begin() { return TsIterator(graph_, vertexContainer_, 0); } - auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } + auto end() { return TsIterator(graph_, vertexContainer_, graph_.NumVertices()); } }; /** - * @class bfs_top_sort_view + * @class BfsTopSortView * @brief Provides a view for performing a topological sort on a directed graph using breadth-first search (BFS). * * This class is designed to work with graphs that satisfy the `directed_graph` concept. It uses a BFS-based * approach to generate a topological ordering of the vertices in the graph. * - * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph` concept. + * @tparam GraphT The type of the graph, which must satisfy the `is_directed_graph` concept. * */ -template -class bfs_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class BfsTopSortView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; - bfs_queue_wrapper vertex_container; + const GraphT &graph_; + BfsQueueWrapper vertexContainer_; - using ts_iterator = top_sort_iterator>; + using TsIterator = TopSortIterator>; public: - bfs_top_sort_view(const Graph_t &graph_) : graph(graph_) {} + BfsTopSortView(const GraphT &graph) : graph_(graph) {} - auto begin() { return ts_iterator(graph, vertex_container, 0); } + auto begin() { return TsIterator(graph_, vertexContainer_, 0); } - auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } + auto end() { return TsIterator(graph_, vertexContainer_, graph_.NumVertices()); } }; -template -std::vector> bfs_top_sort(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - std::vector> top_sort; +template +std::vector> BfsTopSort(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + std::vector> topSort; - for (const auto &node : bfs_top_sort_view(graph)) { - top_sort.push_back(node); + for (const auto &node : BfsTopSortView(graph)) { + topSort.push_back(node); } - return top_sort; + return topSort; } -template -std::vector> dfs_top_sort(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - std::vector> top_sort; +template +std::vector> DfsTopSort(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + std::vector> topSort; - for (const auto &node : top_sort_view(graph)) { - top_sort.push_back(node); + for (const auto &node : DfsTopSortView(graph)) { + topSort.push_back(node); } - return top_sort; + return topSort; } -template -struct priority_queue_wrapper { - priority_eval_f prio_f; +template +struct PriorityQueueWrapper { + PriorityEvalF prioF_; - struct heap_node { - vertex_idx_t node; + struct HeapNode { + VertexIdxT node_; - T priority; + T priority_; - heap_node() : node(0), priority(0) {} + HeapNode() : node_(0), priority_(0) {} - heap_node(vertex_idx_t n, T p) : node(n), priority(p) {} + HeapNode(VertexIdxT n, T p) : node_(n), priority_(p) {} - bool operator<(heap_node const &rhs) const { - return (priority < rhs.priority) || (priority == rhs.priority and node > rhs.node); + bool operator<(HeapNode const &rhs) const { + return (priority_ < rhs.priority_) || (priority_ == rhs.priority_ and node_ > rhs.node_); } }; - std::vector heap; + std::vector heap_; public: template - priority_queue_wrapper(Args &&...args) : prio_f(std::forward(args)...) {} + PriorityQueueWrapper(Args &&...args) : prioF_(std::forward(args)...) {} - void push(const vertex_idx_t &v) { - heap.emplace_back(v, prio_f(v)); - std::push_heap(heap.begin(), heap.end()); + void Push(const VertexIdxT &v) { + heap_.emplace_back(v, prioF_(v)); + std::push_heap(heap_.begin(), heap_.end()); } - vertex_idx_t pop_next() { - std::pop_heap(heap.begin(), heap.end()); - const auto current_node = heap.back().node; - heap.pop_back(); - return current_node; + VertexIdxT PopNext() { + std::pop_heap(heap_.begin(), heap_.end()); + const auto currentNode = heap_.back().node_; + heap_.pop_back(); + return currentNode; } - bool empty() const { return heap.empty(); } + bool empty() const { return heap_.empty(); } }; -template -class priority_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class PriorityTopSortView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; - using container = priority_queue_wrapper; - container vertex_container; + const GraphT &graph_; + using Container = PriorityQueueWrapper; + Container vertexContainer_; - using ts_iterator = top_sort_iterator; + using TsIterator = TopSortIterator; public: template - priority_top_sort_view(const Graph_t &graph_, Args &&...args) - : graph(graph_), vertex_container(std::forward(args)...) {} + PriorityTopSortView(const GraphT &graph, Args &&...args) : graph_(graph), vertexContainer_(std::forward(args)...) {} - auto begin() const { return ts_iterator(graph, vertex_container, 0); } + auto begin() const { return TsIterator(graph_, vertexContainer_, 0); } - auto end() const { return ts_iterator(graph, vertex_container, graph.num_vertices()); } + auto end() const { return TsIterator(graph_, vertexContainer_, graph_.NumVertices()); } }; -template -class locality_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class LocalityTopSortView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; + const GraphT &graph_; - struct loc_eval_f { - auto operator()(vertex_idx_t v) { return std::numeric_limits>::max() - v; } + struct LocEvalF { + auto operator()(VertexIdxT v) { return std::numeric_limits>::max() - v; } }; - priority_queue_wrapper> vertex_container; + PriorityQueueWrapper> vertexContainer_; - using ts_iterator = top_sort_iterator>>; + using TsIterator = TopSortIterator>>; public: - locality_top_sort_view(const Graph_t &graph_) : graph(graph_), vertex_container() {} + LocalityTopSortView(const GraphT &graph) : graph_(graph), vertexContainer_() {} - auto begin() { return ts_iterator(graph, vertex_container, 0); } + auto begin() { return TsIterator(graph_, vertexContainer_, 0); } - auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } + auto end() { return TsIterator(graph_, vertexContainer_, graph_.NumVertices()); } }; -template -std::vector> GetTopOrderMinIndex(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::vector> GetTopOrderMinIndex(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::vector TopOrder; - TopOrder.reserve(graph.num_vertices()); + std::vector topOrder; + topOrder.reserve(graph.NumVertices()); - for (const auto &vert : locality_top_sort_view(graph)) { - TopOrder.push_back(vert); + for (const auto &vert : LocalityTopSortView(graph)) { + topOrder.push_back(vert); } - if (TopOrder.size() != graph.num_vertices()) { - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" - + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (topOrder.size() != graph.NumVertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.NumVertices() [" + + std::to_string(topOrder.size()) + " != " + std::to_string(graph.NumVertices()) + "]"); } - return TopOrder; + return topOrder; } -template -class max_children_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class MaxChildrenTopSortView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; + const GraphT &graph_; - struct max_children_eval_f { - const Graph_t &graph; + struct MaxChildrenEvalF { + const GraphT &graph_; - max_children_eval_f(const Graph_t &g) : graph(g) {} + MaxChildrenEvalF(const GraphT &g) : graph_(g) {} - auto operator()(vertex_idx_t v) const { return graph.out_degree(v); } + auto operator()(VertexIdxT v) const { return graph_.OutDegree(v); } }; - priority_queue_wrapper> vertex_container; + PriorityQueueWrapper> vertexContainer_; - using ts_iterator = top_sort_iterator>>; + using TsIterator = TopSortIterator>>; public: - max_children_top_sort_view(const Graph_t &graph_) : graph(graph_), vertex_container(graph_) {} + MaxChildrenTopSortView(const GraphT &graph) : graph_(graph), vertexContainer_(graph) {} - auto begin() { return ts_iterator(graph, vertex_container, 0); } + auto begin() { return TsIterator(graph_, vertexContainer_, 0); } - auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } + auto end() { return TsIterator(graph_, vertexContainer_, graph_.NumVertices()); } }; -template -std::vector> GetTopOrderMaxChildren(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::vector> GetTopOrderMaxChildren(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::vector TopOrder; - TopOrder.reserve(graph.num_vertices()); + std::vector topOrder; + topOrder.reserve(graph.NumVertices()); - for (const auto &vert : max_children_top_sort_view(graph)) { - TopOrder.push_back(vert); + for (const auto &vert : MaxChildrenTopSortView(graph)) { + topOrder.push_back(vert); } - if (TopOrder.size() != graph.num_vertices()) { - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" - + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (topOrder.size() != graph.NumVertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.NumVertices() [" + + std::to_string(topOrder.size()) + " != " + std::to_string(graph.NumVertices()) + "]"); } - return TopOrder; + return topOrder; } -template -class random_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class RandomTopSortView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; + const GraphT &graph_; - struct random_eval_f { - std::vector> priority; + struct RandomEvalF { + std::vector> priority_; - random_eval_f(const std::size_t num) : priority(num, 0) { - std::iota(priority.begin(), priority.end(), 0); + RandomEvalF(const std::size_t num) : priority_(num, 0) { + std::iota(priority_.begin(), priority_.end(), 0); std::random_device rd; std::mt19937 g(rd()); - std::shuffle(priority.begin(), priority.end(), g); + std::shuffle(priority_.begin(), priority_.end(), g); } - auto operator()(vertex_idx_t v) const { return priority[v]; } + auto operator()(VertexIdxT v) const { return priority_[v]; } }; - priority_queue_wrapper> vertex_container; + PriorityQueueWrapper> vertexContainer_; - using ts_iterator = top_sort_iterator>>; + using TsIterator = TopSortIterator>>; public: - random_top_sort_view(const Graph_t &graph_) : graph(graph_), vertex_container(graph.num_vertices()) {} + RandomTopSortView(const GraphT &graph) : graph_(graph), vertexContainer_(graph_.NumVertices()) {} - auto begin() { return ts_iterator(graph, vertex_container, 0); } + auto begin() { return TsIterator(graph_, vertexContainer_, 0); } - auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } + auto end() { return TsIterator(graph_, vertexContainer_, graph_.NumVertices()); } }; -template -std::vector> GetTopOrderRandom(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::vector> GetTopOrderRandom(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - std::vector TopOrder; - TopOrder.reserve(graph.num_vertices()); + std::vector topOrder; + topOrder.reserve(graph.NumVertices()); - for (const auto &vert : random_top_sort_view(graph)) { - TopOrder.push_back(vert); + for (const auto &vert : RandomTopSortView(graph)) { + topOrder.push_back(vert); } - if (TopOrder.size() != graph.num_vertices()) { - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" - + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (topOrder.size() != graph.NumVertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.NumVertices() [" + + std::to_string(topOrder.size()) + " != " + std::to_string(graph.NumVertices()) + "]"); } - return TopOrder; + return topOrder; } -template -class priority_vec_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class PriorityVecTopSortView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; + const GraphT &graph_; - struct priority_eval_f { - const std::vector &priority; + struct PriorityEvalF { + const std::vector &priority_; - priority_eval_f(const std::vector &p) : priority(p) {} + PriorityEvalF(const std::vector &p) : priority_(p) {} - prio_t operator()(vertex_idx_t v) const { return priority[v]; } + PrioT operator()(VertexIdxT v) const { return priority_[v]; } }; - priority_queue_wrapper vertex_container; + PriorityQueueWrapper vertexContainer_; - using ts_iterator = top_sort_iterator>; + using TsIterator = TopSortIterator>; public: - priority_vec_top_sort_view(const Graph_t &graph_, const std::vector &priorities_vec) - : graph(graph_), vertex_container(priorities_vec) {} + PriorityVecTopSortView(const GraphT &graph, const std::vector &prioritiesVec) + : graph_(graph), vertexContainer_(prioritiesVec) {} - auto begin() { return ts_iterator(graph, vertex_container, 0); } + auto begin() { return TsIterator(graph_, vertexContainer_, 0); } - auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } + auto end() { return TsIterator(graph_, vertexContainer_, graph_.NumVertices()); } }; } // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_util.hpp b/include/osp/graph_algorithms/directed_graph_util.hpp index 8e373acd..11b980fa 100644 --- a/include/osp/graph_algorithms/directed_graph_util.hpp +++ b/include/osp/graph_algorithms/directed_graph_util.hpp @@ -40,16 +40,16 @@ namespace osp { /** * @brief Checks if there is an edge between two vertices in the graph. * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param src The source vertex. * @param dest The destination vertex. * @param graph The graph to check. * @return true if there is an edge from src to dest, false otherwise. */ -template -bool edge(const vertex_idx_t &src, const vertex_idx_t &dest, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - for (const auto &child : graph.children(src)) { +template +bool Edge(const VertexIdxT &src, const VertexIdxT &dest, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + for (const auto &child : graph.Children(src)) { if (child == dest) { return true; } @@ -60,29 +60,29 @@ bool edge(const vertex_idx_t &src, const vertex_idx_t &dest, c /** * @brief Checks if a vertex is a sink (no outgoing edges). * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param v The vertex to check. * @param graph The graph to check. * @return true if the vertex is a sink, false otherwise. */ -template -bool is_sink(const vertex_idx_t &v, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - return graph.out_degree(v) == 0u; +template +bool IsSink(const VertexIdxT &v, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + return graph.OutDegree(v) == 0u; } /** * @brief Checks if a vertex is a source (no incoming edges). * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param v The vertex to check. * @param graph The graph to check. * @return true if the vertex is a source, false otherwise. */ -template -bool is_source(const vertex_idx_t &v, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - return graph.in_degree(v) == 0u; +template +bool IsSource(const VertexIdxT &v, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + return graph.InDegree(v) == 0u; } /** @@ -92,58 +92,58 @@ bool is_source(const vertex_idx_t &v, const Graph_t &graph) { * It is used to create views for source and sink vertices in a directed graph. * */ -template -struct vertex_cond_iterator { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +struct VertexCondIterator { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); // TODO static_assert(is_callabl_v; - const Graph_t &graph; - iterator_t current_vertex; - cond_eval cond; + const GraphT &graph_; + IteratorT currentVertex_; + CondEval cond_; public: using iterator_category = std::input_iterator_tag; - using value_type = vertex_idx_t; + using value_type = VertexIdxT; using difference_type = std::ptrdiff_t; using pointer = const value_type *; using reference = const value_type &; - vertex_cond_iterator(const Graph_t &graph_, const iterator_t &start) : graph(graph_), current_vertex(start) { - while (current_vertex != graph.vertices().end()) { + VertexCondIterator(const GraphT &graph, const IteratorT &start) : graph_(graph), currentVertex_(start) { + while (currentVertex_ != graph_.Vertices().end()) { // if (cond.eval(graph, *current_vertex)) { - if (cond(graph, *current_vertex)) { + if (cond_(graph_, *currentVertex_)) { break; } - current_vertex++; + currentVertex_++; } } - value_type operator*() const { return current_vertex.operator*(); } + value_type operator*() const { return currentVertex_.operator*(); } // Prefix increment - vertex_cond_iterator &operator++() { - current_vertex++; + VertexCondIterator &operator++() { + currentVertex_++; - while (current_vertex != graph.vertices().end()) { - if (cond(graph, *current_vertex)) { + while (currentVertex_ != graph_.Vertices().end()) { + if (cond_(graph_, *currentVertex_)) { break; } - current_vertex++; + currentVertex_++; } return *this; } // Postfix increment - vertex_cond_iterator operator++(int) { - vertex_cond_iterator tmp = *this; + VertexCondIterator operator++(int) { + VertexCondIterator tmp = *this; ++(*this); return tmp; } - inline bool operator==(const vertex_cond_iterator &other) { return current_vertex == other.current_vertex; }; + inline bool operator==(const VertexCondIterator &other) { return currentVertex_ == other.currentVertex_; }; - inline bool operator!=(const vertex_cond_iterator &other) { return current_vertex != other.current_vertex; }; + inline bool operator!=(const VertexCondIterator &other) { return currentVertex_ != other.currentVertex_; }; }; /** @@ -152,27 +152,27 @@ struct vertex_cond_iterator { * These classes provide iterators to traverse the source and sink vertices * of a directed graph. */ -template -class source_vertices_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class SourceVerticesView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; + const GraphT &graph_; - struct source_eval { - // static bool eval(const Graph_t &graph, const vertex_idx_t &v) { return graph.in_degree(v) == 0; } - bool operator()(const Graph_t &graph, const vertex_idx_t &v) const { return graph.in_degree(v) == 0; } + struct SourceEval { + // static bool eval(const GraphT &graph, const VertexIdxT &v) { return graph.InDegree(v) == 0; } + bool operator()(const GraphT &graph, const VertexIdxT &v) const { return graph.InDegree(v) == 0; } }; - using source_iterator = vertex_cond_iterator; + using SourceIterator = VertexCondIterator; public: - source_vertices_view(const Graph_t &graph_) : graph(graph_) {} + SourceVerticesView(const GraphT &graph) : graph_(graph) {} - auto begin() const { return source_iterator(graph, graph.vertices().begin()); } + auto begin() const { return SourceIterator(graph_, graph_.Vertices().begin()); } - auto end() const { return source_iterator(graph, graph.vertices().end()); } + auto end() const { return SourceIterator(graph_, graph_.Vertices().end()); } - auto size() const { return graph.num_vertices(); } + auto size() const { return graph_.NumVertices(); } }; /** @@ -181,41 +181,41 @@ class source_vertices_view { * These classes provide iterators to traverse the source and sink vertices * of a directed graph. */ -template -class sink_vertices_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class SinkVerticesView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; + const GraphT &graph_; - struct sink_eval { - // static bool eval(const Graph_t &graph, const vertex_idx_t &v) { return graph.out_degree(v) == 0; } - bool operator()(const Graph_t &graph, const vertex_idx_t &v) { return graph.out_degree(v) == 0; } + struct SinkEval { + // static bool eval(const GraphT &graph, const VertexIdxT &v) { return graph.OutDegree(v) == 0; } + bool operator()(const GraphT &graph, const VertexIdxT &v) { return graph.OutDegree(v) == 0; } }; - using sink_iterator = vertex_cond_iterator; + using SinkIterator = VertexCondIterator; public: - sink_vertices_view(const Graph_t &graph_) : graph(graph_) {} + SinkVerticesView(const GraphT &graph) : graph_(graph) {} - auto begin() const { return sink_iterator(graph, graph.vertices().begin()); } + auto begin() const { return SinkIterator(graph_, graph_.Vertices().begin()); } - auto end() const { return sink_iterator(graph, graph.vertices().end()); } + auto end() const { return SinkIterator(graph_, graph_.Vertices().end()); } - auto size() const { return graph.num_vertices(); } + auto size() const { return graph_.NumVertices(); } }; /** * @brief Returns a collection containing the source vertices of a graph. * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param graph The graph to check. * @return A vector containing the indices of the source vertices. */ -template -std::vector> source_vertices(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - std::vector> vec; - for (const auto &source : source_vertices_view(graph)) { +template +std::vector> SourceVertices(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + std::vector> vec; + for (const auto &source : SourceVerticesView(graph)) { vec.push_back(source); } return vec; @@ -224,16 +224,16 @@ std::vector> source_vertices(const Graph_t &graph) { /** * @brief Returns a collection containing the sink vertices of a graph. * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param graph The graph to check. * @return A vector containing the indices of the sink vertices. */ -template -std::vector> sink_vertices(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - std::vector> vec; +template +std::vector> SinkVertices(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + std::vector> vec; - for (const auto &sink : sink_vertices_view(graph)) { + for (const auto &sink : SinkVerticesView(graph)) { vec.push_back(sink); } return vec; @@ -246,55 +246,55 @@ std::vector> sink_vertices(const Graph_t &graph) { * It uses a container wrapper to manage the traversal order. * The adj_iterator can be used to setup the traversal along children or parents. */ -template -struct traversal_iterator { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +struct TraversalIterator { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; + const GraphT &graph_; - adj_iterator adj_iter; + AdjIterator adjIter_; - container_wrapper vertex_container; + ContainerWrapper vertexContainer_; - std::unordered_set> visited; - vertex_idx_t current_vertex; + std::unordered_set> visited_; + VertexIdxT currentVertex_; public: using iterator_category = std::input_iterator_tag; - using value_type = vertex_idx_t; + using value_type = VertexIdxT; using difference_type = std::ptrdiff_t; using pointer = const value_type *; using reference = const value_type &; - traversal_iterator(const Graph_t &graph_, const vertex_idx_t &start) - : graph(graph_), adj_iter(graph_), current_vertex(start) { - if (graph.num_vertices() == start) { + TraversalIterator(const GraphT &graph, const VertexIdxT &start) + : graph_(graph), adjIter_(graph), currentVertex_(start) { + if (graph_.NumVertices() == start) { return; } - visited.insert(start); + visited_.insert(start); - for (const auto &v : adj_iter.iterate(current_vertex)) { - vertex_container.push(v); - visited.insert(v); + for (const auto &v : adjIter_.Iterate(currentVertex_)) { + vertexContainer_.Push(v); + visited_.insert(v); } } - value_type operator*() const { return current_vertex; } + value_type operator*() const { return currentVertex_; } // Prefix increment - traversal_iterator &operator++() { - if (vertex_container.empty()) { - current_vertex = graph.num_vertices(); + TraversalIterator &operator++() { + if (vertexContainer_.empty()) { + currentVertex_ = graph_.NumVertices(); return *this; } - current_vertex = vertex_container.pop_next(); + currentVertex_ = vertexContainer_.PopNext(); - for (const auto &v : adj_iter.iterate(current_vertex)) { - if (visited.find(v) == visited.end()) { - vertex_container.push(v); - visited.insert(v); + for (const auto &v : adjIter_.Iterate(currentVertex_)) { + if (visited_.find(v) == visited_.end()) { + vertexContainer_.Push(v); + visited_.insert(v); } } @@ -302,39 +302,39 @@ struct traversal_iterator { } // Postfix increment !! expensive - traversal_iterator operator++(int) { - traversal_iterator tmp = *this; + TraversalIterator operator++(int) { + TraversalIterator tmp = *this; ++(*this); return tmp; } - inline bool operator==(const traversal_iterator &other) { return current_vertex == other.current_vertex; }; + inline bool operator==(const TraversalIterator &other) { return currentVertex_ == other.currentVertex_; }; - inline bool operator!=(const traversal_iterator &other) { return current_vertex != other.current_vertex; }; + inline bool operator!=(const TraversalIterator &other) { return currentVertex_ != other.currentVertex_; }; }; -template -struct child_iterator { - const Graph_t &graph; +template +struct ChildIterator { + const GraphT &graph_; - child_iterator(const Graph_t &graph_) : graph(graph_) {} + ChildIterator(const GraphT &graph) : graph_(graph) {} - inline auto iterate(const vertex_idx_t &v) const { return graph.children(v); } + inline auto Iterate(const VertexIdxT &v) const { return graph_.Children(v); } }; -template -struct bfs_queue_wrapper { - std::queue> queue; +template +struct BfsQueueWrapper { + std::queue> queue_; - void push(const vertex_idx_t &v) { queue.push(v); } + void Push(const VertexIdxT &v) { queue_.push(v); } - vertex_idx_t pop_next() { - auto v = queue.front(); - queue.pop(); + VertexIdxT PopNext() { + auto v = queue_.front(); + queue_.pop(); return v; } - bool empty() const { return queue.empty(); } + bool empty() const { return queue_.empty(); } }; /** @@ -343,38 +343,38 @@ struct bfs_queue_wrapper { * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex * using breadth-first search (BFS). */ -template -class bfs_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class BfsView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; - vertex_idx_t start_vertex; + const GraphT &graph_; + VertexIdxT startVertex_; - using bfs_iterator = traversal_iterator, child_iterator>; + using BfsIterator = TraversalIterator, ChildIterator>; public: - bfs_view(const Graph_t &graph_, const vertex_idx_t &start) : graph(graph_), start_vertex(start) {} + BfsView(const GraphT &graph, const VertexIdxT &start) : graph_(graph), startVertex_(start) {} - auto begin() const { return bfs_iterator(graph, start_vertex); } + auto begin() const { return BfsIterator(graph_, startVertex_); } - auto end() const { return bfs_iterator(graph, graph.num_vertices()); } + auto end() const { return BfsIterator(graph_, graph_.NumVertices()); } - auto size() const { return graph.num_vertices(); } + auto size() const { return graph_.NumVertices(); } }; -template -struct dfs_stack_wrapper { - std::vector> stack; +template +struct DfsStackWrapper { + std::vector> stack_; - void push(const vertex_idx_t &v) { stack.push_back(v); } + void Push(const VertexIdxT &v) { stack_.push_back(v); } - vertex_idx_t pop_next() { - auto v = stack.back(); - stack.pop_back(); + VertexIdxT PopNext() { + auto v = stack_.back(); + stack_.pop_back(); return v; } - bool empty() const { return stack.empty(); } + bool empty() const { return stack_.empty(); } }; /** @@ -383,32 +383,32 @@ struct dfs_stack_wrapper { * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex * using depth-first search (DFS). */ -template -class dfs_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class DfsView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; - vertex_idx_t start_vertex; + const GraphT &graph_; + VertexIdxT startVertex_; - using dfs_iterator = traversal_iterator, child_iterator>; + using DfsIterator = TraversalIterator, ChildIterator>; public: - dfs_view(const Graph_t &graph_, const vertex_idx_t &start) : graph(graph_), start_vertex(start) {} + DfsView(const GraphT &graph, const VertexIdxT &start) : graph_(graph), startVertex_(start) {} - auto begin() const { return dfs_iterator(graph, start_vertex); } + auto begin() const { return DfsIterator(graph_, startVertex_); } - auto end() const { return dfs_iterator(graph, graph.num_vertices()); } + auto end() const { return DfsIterator(graph_, graph_.NumVertices()); } - auto size() const { return graph.num_vertices(); } + auto size() const { return graph_.NumVertices(); } }; -template -struct parents_iterator { - const Graph_t &graph; +template +struct ParentsIterator { + const GraphT &graph_; - parents_iterator(const Graph_t &graph_) : graph(graph_) {} + ParentsIterator(const GraphT &graph) : graph_(graph) {} - inline auto iterate(const vertex_idx_t &v) const { return graph.parents(v); } + inline auto Iterate(const VertexIdxT &v) const { return graph_.Parents(v); } }; /** @@ -417,38 +417,38 @@ struct parents_iterator { * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex * using breadth-first search (BFS) in reverse order. */ -template -class bfs_reverse_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +class BfsReverseView { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - const Graph_t &graph; - vertex_idx_t start_vertex; + const GraphT &graph_; + VertexIdxT startVertex_; - using bfs_iterator = traversal_iterator, parents_iterator>; + using BfsIterator = TraversalIterator, ParentsIterator>; public: - bfs_reverse_view(const Graph_t &graph_, const vertex_idx_t &start) : graph(graph_), start_vertex(start) {} + BfsReverseView(const GraphT &graph, const VertexIdxT &start) : graph_(graph), startVertex_(start) {} - auto begin() const { return bfs_iterator(graph, start_vertex); } + auto begin() const { return BfsIterator(graph_, startVertex_); } - auto end() const { return bfs_iterator(graph, graph.num_vertices()); } + auto end() const { return BfsIterator(graph_, graph_.NumVertices()); } - auto size() const { return graph.num_vertices(); } + auto size() const { return graph_.NumVertices(); } }; /** * @brief Returns a collection containing the successors of a vertex in a directed graph. * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param v The vertex to check. * @param graph The graph to check. * @return A vector containing the indices of the successors of the vertex. */ -template -std::vector> successors(const vertex_idx_t &v, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - std::vector> vec; - for (const auto &suc : bfs_view(graph, v)) { +template +std::vector> Successors(const VertexIdxT &v, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + std::vector> vec; + for (const auto &suc : BfsView(graph, v)) { vec.push_back(suc); } return vec; @@ -457,64 +457,64 @@ std::vector> successors(const vertex_idx_t &v, co /** * @brief Returns a collection containing the ancestors of a vertex in a directed graph. * - * @tparam Graph_t The type of the graph. + * @tparam GraphT The type of the graph. * @param v The vertex to check. * @param graph The graph to check. * @return A vector containing the indices of the ancestors of the vertex. */ -template -std::vector> ancestors(const vertex_idx_t &v, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - std::vector> vec; - for (const auto &anc : bfs_reverse_view(graph, v)) { +template +std::vector> Ancestors(const VertexIdxT &v, const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + std::vector> vec; + for (const auto &anc : BfsReverseView(graph, v)) { vec.push_back(anc); } return vec; } -template -bool is_acyclic(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +bool IsAcyclic(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - if (graph.num_vertices() < 2) { + if (graph.NumVertices() < 2) { return true; } - std::vector predecessors_count(graph.num_vertices(), 0); + std::vector predecessorsCount(graph.NumVertices(), 0); std::queue next; // Find source nodes - for (const VertexType &v : source_vertices_view(graph)) { + for (const VertexType &v : SourceVerticesView(graph)) { next.push(v); } - VertexType node_count = 0; + VertexType nodeCount = 0; while (!next.empty()) { const VertexType node = next.front(); next.pop(); - ++node_count; + ++nodeCount; - for (const VertexType ¤t : graph.children(node)) { - ++predecessors_count[current]; - if (predecessors_count[current] == graph.in_degree(current)) { + for (const VertexType ¤t : graph.Children(node)) { + ++predecessorsCount[current]; + if (predecessorsCount[current] == graph.InDegree(current)) { next.push(current); } } } - return node_count == graph.num_vertices(); + return nodeCount == graph.NumVertices(); } -template -bool is_connected(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +bool IsConnected(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; - if (graph.num_vertices() < 2) { + if (graph.NumVertices() < 2) { return true; } @@ -524,13 +524,13 @@ bool is_connected(const Graph_t &graph) { next.push(0); visited.insert(0); - VertexType node_count = 0; + VertexType nodeCount = 0; while (!next.empty()) { const VertexType node = next.front(); next.pop(); - ++node_count; + ++nodeCount; - for (const VertexType ¤t : graph.children(node)) { + for (const VertexType ¤t : graph.Children(node)) { if (visited.find(current) == visited.end()) { next.push(current); visited.insert(current); @@ -538,21 +538,21 @@ bool is_connected(const Graph_t &graph) { } } - return node_count == graph.num_vertices(); + return nodeCount == graph.NumVertices(); } -template -std::size_t num_common_parents(const Graph_t &graph, vertex_idx_t v1, vertex_idx_t v2) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::size_t NumCommonParents(const GraphT &graph, VertexIdxT v1, VertexIdxT v2) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - std::unordered_set> parents; - parents.reserve(graph.in_degree(v1)); - for (const auto &par : graph.parents(v1)) { + std::unordered_set> parents; + parents.reserve(graph.InDegree(v1)); + for (const auto &par : graph.Parents(v1)) { parents.emplace(par); } std::size_t num = 0; - for (const auto &par : graph.parents(v2)) { + for (const auto &par : graph.Parents(v2)) { if (parents.find(par) != parents.end()) { ++num; } @@ -561,18 +561,18 @@ std::size_t num_common_parents(const Graph_t &graph, vertex_idx_t v1, v return num; } -template -std::size_t num_common_children(const Graph_t &graph, vertex_idx_t v1, vertex_idx_t v2) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::size_t NumCommonChildren(const GraphT &graph, VertexIdxT v1, VertexIdxT v2) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - std::unordered_set> childrn; - childrn.reserve(graph.out_degree(v1)); - for (const auto &chld : graph.children(v1)) { + std::unordered_set> childrn; + childrn.reserve(graph.OutDegree(v1)); + for (const auto &chld : graph.Children(v1)) { childrn.emplace(chld); } std::size_t num = 0; - for (const auto &chld : graph.children(v2)) { + for (const auto &chld : graph.Children(v2)) { if (childrn.find(chld) != childrn.end()) { ++num; } @@ -588,50 +588,50 @@ std::size_t num_common_children(const Graph_t &graph, vertex_idx_t v1, * u, v in the subgraph, there is a path between u and v in the underlying * undirected graph. * - * @tparam Graph_t The type of the graph, which must satisfy the `directed_graph` concept. + * @tparam GraphT The type of the graph, which must satisfy the `directed_graph` concept. * @param graph The input directed graph. * @param[out] components A vector where `components[i]` will be the component ID for vertex `i`. * @return The total number of weakly connected components. */ -template -std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vector> &components) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; +template +std::size_t ComputeWeaklyConnectedComponents(const GraphT &graph, std::vector> &components) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); + using VertexType = VertexIdxT; - if (graph.num_vertices() == 0) { + if (graph.NumVertices() == 0) { components.clear(); return 0; } - components.assign(graph.num_vertices(), std::numeric_limits::max()); - VertexType component_id = 0; + components.assign(graph.NumVertices(), std::numeric_limits::max()); + VertexType componentId = 0; - for (const auto &v : graph.vertices()) { + for (const auto &v : graph.Vertices()) { if (components[v] == std::numeric_limits::max()) { std::vector q; q.push_back(v); - components[v] = component_id; + components[v] = componentId; size_t head = 0; while (head < q.size()) { VertexType u = q[head++]; - for (const auto &neighbor : graph.parents(u)) { + for (const auto &neighbor : graph.Parents(u)) { if (components[neighbor] == std::numeric_limits::max()) { - components[neighbor] = component_id; + components[neighbor] = componentId; q.push_back(neighbor); } } - for (const auto &neighbor : graph.children(u)) { + for (const auto &neighbor : graph.Children(u)) { if (components[neighbor] == std::numeric_limits::max()) { - components[neighbor] = component_id; + components[neighbor] = componentId; q.push_back(neighbor); } } } - component_id++; + componentId++; } } - return component_id; + return componentId; } /** @@ -639,10 +639,10 @@ std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vecto * @param graph The input directed graph. * @return The number of weakly connected components. */ -template -std::size_t count_weakly_connected_components(const Graph_t &graph) { - std::vector> components; - return compute_weakly_connected_components(graph, components); +template +std::size_t CountWeaklyConnectedComponents(const GraphT &graph) { + std::vector> components; + return ComputeWeaklyConnectedComponents(graph, components); } } // namespace osp diff --git a/include/osp/graph_algorithms/specialised_graph_algorithms/subgraph_algorithms.hpp b/include/osp/graph_algorithms/specialised_graph_algorithms/subgraph_algorithms.hpp index dd6e451a..88be7a68 100644 --- a/include/osp/graph_algorithms/specialised_graph_algorithms/subgraph_algorithms.hpp +++ b/include/osp/graph_algorithms/specialised_graph_algorithms/subgraph_algorithms.hpp @@ -25,64 +25,57 @@ limitations under the License. namespace osp { -template -std::unordered_map, vertex_idx_t> create_induced_subgraph_map( - const Graph_t_in &dag, - Compact_Sparse_Graph - &dag_out, - const std::vector> &selected_nodes) { - using Graph_t_out - = Compact_Sparse_Graph; - - static_assert(std::is_same_v, vertex_idx_t>, - "Graph_t_in and out must have the same vertex_idx types"); - - const std::vector> topOrder = GetTopOrder(dag); - std::vector> topOrderPosition(topOrder.size()); - for (vertex_idx_t pos = 0; pos < dag.num_vertices(); ++pos) { +template +std::unordered_map, VertexIdxT> CreateInducedSubgraphMap( + const GraphTIn &dag, + CompactSparseGraph + &dagOut, + const std::vector> &selectedNodes) { + using GraphTOut + = CompactSparseGraph; + + static_assert(std::is_same_v, VertexIdxT>, + "GraphTIn and out must have the same VertexIdx types"); + + const std::vector> topOrder = GetTopOrder(dag); + std::vector> topOrderPosition(topOrder.size()); + for (VertexIdxT pos = 0; pos < dag.NumVertices(); ++pos) { topOrderPosition[topOrder[pos]] = pos; } - auto topCmp = [&topOrderPosition](const vertex_idx_t &lhs, const vertex_idx_t &rhs) { + auto topCmp = [&topOrderPosition](const VertexIdxT &lhs, const VertexIdxT &rhs) { return topOrderPosition[lhs] < topOrderPosition[rhs]; }; - std::set, decltype(topCmp)> selectedVerticesOrdered( - selected_nodes.begin(), selected_nodes.end(), topCmp); + std::set, decltype(topCmp)> selectedVerticesOrdered(selectedNodes.begin(), selectedNodes.end(), topCmp); - std::unordered_map, vertex_idx_t> local_idx; - local_idx.reserve(selected_nodes.size()); + std::unordered_map, VertexIdxT> localIdx; + localIdx.reserve(selectedNodes.size()); - vertex_idx_t nodeCntr = 0; + VertexIdxT nodeCntr = 0; for (const auto &node : selectedVerticesOrdered) { - local_idx[node] = nodeCntr++; + localIdx[node] = nodeCntr++; } - std::vector, vertex_idx_t>> edges; + std::vector, VertexIdxT>> edges; for (const auto &node : selectedVerticesOrdered) { - for (const auto &chld : dag.children(node)) { + for (const auto &chld : dag.Children(node)) { if (selectedVerticesOrdered.find(chld) != selectedVerticesOrdered.end()) { - edges.emplace_back(local_idx.at(node), local_idx.at(chld)); + edges.emplace_back(localIdx.at(node), localIdx.at(chld)); } } } - dag_out = Graph_t_out(nodeCntr, edges); + dagOut = GraphTOut(nodeCntr, edges); - for (const auto &[oriVert, outVert] : local_idx) { - dag_out.set_vertex_work_weight(outVert, dag.vertex_work_weight(oriVert)); - dag_out.set_vertex_comm_weight(outVert, dag.vertex_comm_weight(oriVert)); - dag_out.set_vertex_mem_weight(outVert, dag.vertex_mem_weight(oriVert)); - dag_out.set_vertex_type(outVert, dag.vertex_type(oriVert)); + for (const auto &[oriVert, outVert] : localIdx) { + dagOut.SetVertexWorkWeight(outVert, dag.VertexWorkWeight(oriVert)); + dagOut.SetVertexCommWeight(outVert, dag.VertexCommWeight(oriVert)); + dagOut.SetVertexMemWeight(outVert, dag.VertexMemWeight(oriVert)); + dagOut.SetVertexType(outVert, dag.VertexType(oriVert)); } - return local_idx; + return localIdx; } } // end namespace osp diff --git a/include/osp/graph_algorithms/strongly_connected_components.hpp b/include/osp/graph_algorithms/strongly_connected_components.hpp index 7de49e29..0a647e67 100644 --- a/include/osp/graph_algorithms/strongly_connected_components.hpp +++ b/include/osp/graph_algorithms/strongly_connected_components.hpp @@ -33,55 +33,55 @@ namespace osp { * Tarjan's algorithm performs a single depth-first search to find all strongly connected components. * It has a time complexity of O(V + E), where V is the number of vertices and E is the number of edges. * - * @tparam Graph_t The type of the graph, which must satisfy the `directed_graph` concept. + * @tparam GraphT The type of the graph, which must satisfy the `directed_graph` concept. * @param graph The input directed graph. * @return A vector of vectors, where each inner vector contains the vertices of a strongly connected component. */ -template -std::vector>> strongly_connected_components(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +std::vector>> StronglyConnectedComponents(const GraphT &graph) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - using VertexType = vertex_idx_t; - const auto num_vertices = graph.num_vertices(); - if (num_vertices == 0) { + using VertexType = VertexIdxT; + const auto numVertices = graph.NumVertices(); + if (numVertices == 0) { return {}; } const VertexType unvisited = std::numeric_limits::max(); - std::vector ids(num_vertices, unvisited); - std::vector low(num_vertices, unvisited); - std::vector on_stack(num_vertices, false); + std::vector ids(numVertices, unvisited); + std::vector low(numVertices, unvisited); + std::vector onStack(numVertices, false); std::stack s; - VertexType id_counter = 0; + VertexType idCounter = 0; std::vector> sccs; - using ChildIterator = decltype(graph.children(std::declval()).begin()); + using ChildIterator = decltype(graph.Children(std::declval()).begin()); - for (VertexType i = 0; i < num_vertices; ++i) { + for (VertexType i = 0; i < numVertices; ++i) { if (ids[i] == unvisited) { - std::vector>> dfs_stack; + std::vector>> dfsStack; - dfs_stack.emplace_back(i, std::make_pair(graph.children(i).begin(), graph.children(i).end())); + dfsStack.emplace_back(i, std::make_pair(graph.Children(i).begin(), graph.Children(i).end())); s.push(i); - on_stack[i] = true; - ids[i] = low[i] = id_counter++; + onStack[i] = true; + ids[i] = low[i] = idCounter++; - while (!dfs_stack.empty()) { - auto &[at, iter_pair] = dfs_stack.back(); - auto &child_iter = iter_pair.first; - const auto &child_end = iter_pair.second; + while (!dfsStack.empty()) { + auto &[at, iterPair] = dfsStack.back(); + auto &childIter = iterPair.first; + const auto &childEnd = iterPair.second; - if (child_iter != child_end) { - VertexType to = *child_iter; - ++child_iter; + if (childIter != childEnd) { + VertexType to = *childIter; + ++childIter; if (ids[to] == unvisited) { - dfs_stack.emplace_back(to, std::make_pair(graph.children(to).begin(), graph.children(to).end())); + dfsStack.emplace_back(to, std::make_pair(graph.Children(to).begin(), graph.Children(to).end())); s.push(to); - on_stack[to] = true; - ids[to] = low[to] = id_counter++; - } else if (on_stack[to]) { + onStack[to] = true; + ids[to] = low[to] = idCounter++; + } else if (onStack[to]) { low[at] = std::min(low[at], ids[to]); } } else { @@ -90,7 +90,7 @@ std::vector>> strongly_connected_components(co while (true) { VertexType node = s.top(); s.pop(); - on_stack[node] = false; + onStack[node] = false; scc.push_back(node); if (node == at) { break; @@ -99,12 +99,12 @@ std::vector>> strongly_connected_components(co sccs.emplace_back(std::move(scc)); } - if (dfs_stack.size() > 1) { - auto &[parent, _] = dfs_stack[dfs_stack.size() - 2]; + if (dfsStack.size() > 1) { + auto &[parent, _] = dfsStack[dfsStack.size() - 2]; low[parent] = std::min(low[parent], low[at]); } - dfs_stack.pop_back(); + dfsStack.pop_back(); } } } diff --git a/include/osp/graph_algorithms/subgraph_algorithms.hpp b/include/osp/graph_algorithms/subgraph_algorithms.hpp index 2088c566..d82a9db5 100644 --- a/include/osp/graph_algorithms/subgraph_algorithms.hpp +++ b/include/osp/graph_algorithms/subgraph_algorithms.hpp @@ -28,132 +28,129 @@ limitations under the License. namespace osp { -template -void create_induced_subgraph(const Graph_t_in &dag, - Graph_t_out &dag_out, - const std::set> &selected_nodes, - const std::set> &extra_sources = {}) { - static_assert(std::is_same_v, vertex_idx_t>, - "Graph_t_in and out must have the same vertex_idx types"); +template +void CreateInducedSubgraph(const GraphTIn &dag, + GraphTOut &dagOut, + const std::set> &selectedNodes, + const std::set> &extraSources = {}) { + static_assert(std::is_same_v, VertexIdxT>, + "GraphTIn and out must have the same VertexIdx types"); - static_assert(is_constructable_cdag_vertex_v, "Graph_t_out must satisfy the constructable_cdag_vertex concept"); + static_assert(isConstructableCdagVertexV, "GraphTOut must satisfy the constructable_cdag_vertex concept"); - static_assert(is_constructable_cdag_edge_v, "Graph_t_out must satisfy the constructable_cdag_edge concept"); + static_assert(isConstructableCdagEdgeV, "GraphTOut must satisfy the constructable_cdag_edge concept"); - assert(dag_out.num_vertices() == 0); + assert(dagOut.NumVertices() == 0); - std::map, vertex_idx_t> local_idx; + std::map, VertexIdxT> localIdx; - for (const auto &node : extra_sources) { - local_idx[node] = dag_out.num_vertices(); - if constexpr (is_constructable_cdag_typed_vertex_v and has_typed_vertices_v) { + for (const auto &node : extraSources) { + localIdx[node] = dagOut.NumVertices(); + if constexpr (isConstructableCdagTypedVertexV and hasTypedVerticesV) { // add extra source with type - dag_out.add_vertex(0, dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), dag.vertex_type(node)); + dagOut.AddVertex(0, dag.VertexCommWeight(node), dag.VertexMemWeight(node), dag.VertexType(node)); } else { // add extra source without type - dag_out.add_vertex(0, dag.vertex_comm_weight(node), dag.vertex_mem_weight(node)); + dagOut.AddVertex(0, dag.VertexCommWeight(node), dag.VertexMemWeight(node)); } } - for (const auto &node : selected_nodes) { - local_idx[node] = dag_out.num_vertices(); + for (const auto &node : selectedNodes) { + localIdx[node] = dagOut.NumVertices(); - if constexpr (is_constructable_cdag_typed_vertex_v and has_typed_vertices_v) { + if constexpr (isConstructableCdagTypedVertexV and hasTypedVerticesV) { // add vertex with type - dag_out.add_vertex( - dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), dag.vertex_type(node)); + dagOut.AddVertex( + dag.VertexWorkWeight(node), dag.VertexCommWeight(node), dag.VertexMemWeight(node), dag.VertexType(node)); } else { // add vertex without type - dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node)); + dagOut.AddVertex(dag.VertexWorkWeight(node), dag.VertexCommWeight(node), dag.VertexMemWeight(node)); } } - if constexpr (has_edge_weights_v and has_edge_weights_v) { + if constexpr (hasEdgeWeightsV and hasEdgeWeightsV) { // add edges with edge comm weights - for (const auto &node : selected_nodes) { - for (const auto &in_edge : in_edges(node, dag)) { - const auto &pred = source(in_edge, dag); - if (selected_nodes.find(pred) != selected_nodes.end() || extra_sources.find(pred) != extra_sources.end()) { - dag_out.add_edge(local_idx[pred], local_idx[node], dag.edge_comm_weight(in_edge)); + for (const auto &node : selectedNodes) { + for (const auto &inEdge : InEdges(node, dag)) { + const auto &pred = Source(inEdge, dag); + if (selectedNodes.find(pred) != selectedNodes.end() || extraSources.find(pred) != extraSources.end()) { + dagOut.AddEdge(localIdx[pred], localIdx[node], dag.EdgeCommWeight(inEdge)); } } } } else { // add edges without edge comm weights - for (const auto &node : selected_nodes) { - for (const auto &pred : dag.parents(node)) { - if (selected_nodes.find(pred) != selected_nodes.end() || extra_sources.find(pred) != extra_sources.end()) { - dag_out.add_edge(local_idx[pred], local_idx[node]); + for (const auto &node : selectedNodes) { + for (const auto &pred : dag.Parents(node)) { + if (selectedNodes.find(pred) != selectedNodes.end() || extraSources.find(pred) != extraSources.end()) { + dagOut.AddEdge(localIdx[pred], localIdx[node]); } } } } } -template -void create_induced_subgraph(const Graph_t_in &dag, - Graph_t_out &dag_out, - const std::vector> &selected_nodes) { - return create_induced_subgraph(dag, dag_out, std::set>(selected_nodes.begin(), selected_nodes.end())); +template +void CreateInducedSubgraph(const GraphTIn &dag, GraphTOut &dagOut, const std::vector> &selectedNodes) { + return CreateInducedSubgraph(dag, dagOut, std::set>(selectedNodes.begin(), selectedNodes.end())); } -template -bool checkOrderedIsomorphism(const Graph_t &first, const Graph_t &second) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); +template +bool CheckOrderedIsomorphism(const GraphT &first, const GraphT &second) { + static_assert(isDirectedGraphV, "GraphT must satisfy the directed_graph concept"); - if (first.num_vertices() != second.num_vertices() || first.num_edges() != second.num_edges()) { + if (first.NumVertices() != second.NumVertices() || first.NumEdges() != second.NumEdges()) { return false; } - for (const auto &node : first.vertices()) { - if (first.vertex_work_weight(node) != second.vertex_work_weight(node) - || first.vertex_mem_weight(node) != second.vertex_mem_weight(node) - || first.vertex_comm_weight(node) != second.vertex_comm_weight(node) - || first.vertex_type(node) != second.vertex_type(node)) { + for (const auto &node : first.Vertices()) { + if (first.VertexWorkWeight(node) != second.VertexWorkWeight(node) + || first.VertexMemWeight(node) != second.VertexMemWeight(node) + || first.VertexCommWeight(node) != second.VertexCommWeight(node) || first.VertexType(node) != second.VertexType(node)) { return false; } - if (first.in_degree(node) != second.in_degree(node) || first.out_degree(node) != second.out_degree(node)) { + if (first.InDegree(node) != second.InDegree(node) || first.OutDegree(node) != second.OutDegree(node)) { return false; } - if constexpr (has_edge_weights_v) { - std::set, e_commw_t>> first_children, second_children; + if constexpr (hasEdgeWeightsV) { + std::set, ECommwT>> firstChildren, secondChildren; - for (const auto &out_edge : out_edges(node, first)) { - first_children.emplace(target(out_edge, first), first.edge_comm_weight(out_edge)); + for (const auto &outEdge : OutEdges(node, first)) { + firstChildren.emplace(Target(outEdge, first), first.EdgeCommWeight(outEdge)); } - for (const auto &out_edge : out_edges(node, second)) { - second_children.emplace(target(out_edge, second), second.edge_comm_weight(out_edge)); + for (const auto &outEdge : OutEdges(node, second)) { + secondChildren.emplace(Target(outEdge, second), second.EdgeCommWeight(outEdge)); } - auto itr = first_children.begin(), second_itr = second_children.begin(); - for (; itr != first_children.end() && second_itr != second_children.end(); ++itr) { - if (*itr != *second_itr) { + auto itr = firstChildren.begin(), secondItr = secondChildren.begin(); + for (; itr != firstChildren.end() && secondItr != secondChildren.end(); ++itr) { + if (*itr != *secondItr) { return false; } - ++second_itr; + ++secondItr; } } else { - std::set> first_children, second_children; + std::set> firstChildren, secondChildren; - for (const auto &child : first.children(node)) { - first_children.emplace(child); + for (const auto &child : first.Children(node)) { + firstChildren.emplace(child); } - for (const auto &child : second.children(node)) { - second_children.emplace(child); + for (const auto &child : second.Children(node)) { + secondChildren.emplace(child); } - auto itr = first_children.begin(), second_itr = second_children.begin(); - for (; itr != first_children.end() && second_itr != second_children.end(); ++itr) { - if (*itr != *second_itr) { + auto itr = firstChildren.begin(), secondItr = secondChildren.begin(); + for (; itr != firstChildren.end() && secondItr != secondChildren.end(); ++itr) { + if (*itr != *secondItr) { return false; } - ++second_itr; + ++secondItr; } } } @@ -161,114 +158,112 @@ bool checkOrderedIsomorphism(const Graph_t &first, const Graph_t &second) { return true; } -template -std::vector create_induced_subgraphs(const Graph_t_in &dag_in, const std::vector &partition_IDs) { +template +std::vector CreateInducedSubgraphs(const GraphTIn &dagIn, const std::vector &partitionIDs) { // assumes that input partition IDs are consecutive and starting from 0 - static_assert(std::is_same_v, vertex_idx_t>, - "Graph_t_in and out must have the same vertex_idx types"); + static_assert(std::is_same_v, VertexIdxT>, + "GraphTIn and out must have the same VertexIdx types"); - static_assert(is_constructable_cdag_vertex_v, "Graph_t_out must satisfy the constructable_cdag_vertex concept"); + static_assert(isConstructableCdagVertexV, "GraphTOut must satisfy the constructable_cdag_vertex concept"); - static_assert(is_constructable_cdag_edge_v, "Graph_t_out must satisfy the constructable_cdag_edge concept"); + static_assert(isConstructableCdagEdgeV, "GraphTOut must satisfy the constructable_cdag_edge concept"); - unsigned number_of_parts = 0; - for (const auto id : partition_IDs) { - number_of_parts = std::max(number_of_parts, id + 1); + unsigned numberOfParts = 0; + for (const auto id : partitionIDs) { + numberOfParts = std::max(numberOfParts, id + 1); } - std::vector split_dags(number_of_parts); + std::vector splitDags(numberOfParts); - std::vector> local_idx(dag_in.num_vertices()); + std::vector> localIdx(dagIn.NumVertices()); - for (const auto node : dag_in.vertices()) { - local_idx[node] = split_dags[partition_IDs[node]].num_vertices(); + for (const auto node : dagIn.Vertices()) { + localIdx[node] = splitDags[partitionIDs[node]].NumVertices(); - if constexpr (is_constructable_cdag_typed_vertex_v and has_typed_vertices_v) { - split_dags[partition_IDs[node]].add_vertex(dag_in.vertex_work_weight(node), - dag_in.vertex_comm_weight(node), - dag_in.vertex_mem_weight(node), - dag_in.vertex_type(node)); + if constexpr (isConstructableCdagTypedVertexV and hasTypedVerticesV) { + splitDags[partitionIDs[node]].AddVertex( + dagIn.VertexWorkWeight(node), dagIn.VertexCommWeight(node), dagIn.VertexMemWeight(node), dagIn.VertexType(node)); } else { - split_dags[partition_IDs[node]].add_vertex( - dag_in.vertex_work_weight(node), dag_in.vertex_comm_weight(node), dag_in.vertex_mem_weight(node)); + splitDags[partitionIDs[node]].AddVertex( + dagIn.VertexWorkWeight(node), dagIn.VertexCommWeight(node), dagIn.VertexMemWeight(node)); } } - if constexpr (has_edge_weights_v and has_edge_weights_v) { - for (const auto node : dag_in.vertices()) { - for (const auto &out_edge : out_edges(node, dag_in)) { - auto succ = target(out_edge, dag_in); + if constexpr (hasEdgeWeightsV and hasEdgeWeightsV) { + for (const auto node : dagIn.Vertices()) { + for (const auto &outEdge : OutEdges(node, dagIn)) { + auto succ = Target(outEdge, dagIn); - if (partition_IDs[node] == partition_IDs[succ]) { - split_dags[partition_IDs[node]].add_edge(local_idx[node], local_idx[succ], dag_in.edge_comm_weight(out_edge)); + if (partitionIDs[node] == partitionIDs[succ]) { + splitDags[partitionIDs[node]].AddEdge(localIdx[node], localIdx[succ], dagIn.EdgeCommWeight(outEdge)); } } } } else { - for (const auto node : dag_in.vertices()) { - for (const auto &child : dag_in.children(node)) { - if (partition_IDs[node] == partition_IDs[child]) { - split_dags[partition_IDs[node]].add_edge(local_idx[node], local_idx[child]); + for (const auto node : dagIn.Vertices()) { + for (const auto &child : dagIn.Children(node)) { + if (partitionIDs[node] == partitionIDs[child]) { + splitDags[partitionIDs[node]].AddEdge(localIdx[node], localIdx[child]); } } } } - return split_dags; + return splitDags; } -template -std::unordered_map, vertex_idx_t> create_induced_subgraph_map( - const Graph_t_in &dag, Graph_t_out &dag_out, const std::vector> &selected_nodes) { - static_assert(std::is_same_v, vertex_idx_t>, - "Graph_t_in and out must have the same vertex_idx types"); +template +std::unordered_map, VertexIdxT> CreateInducedSubgraphMap( + const GraphTIn &dag, GraphTOut &dagOut, const std::vector> &selectedNodes) { + static_assert(std::is_same_v, VertexIdxT>, + "GraphTIn and out must have the same VertexIdx types"); - static_assert(is_constructable_cdag_vertex_v, "Graph_t_out must satisfy the constructable_cdag_vertex concept"); + static_assert(isConstructableCdagVertexV, "GraphTOut must satisfy the constructable_cdag_vertex concept"); - static_assert(is_constructable_cdag_edge_v, "Graph_t_out must satisfy the constructable_cdag_edge concept"); + static_assert(isConstructableCdagEdgeV, "GraphTOut must satisfy the constructable_cdag_edge concept"); - assert(dag_out.num_vertices() == 0); + assert(dagOut.NumVertices() == 0); - std::unordered_map, vertex_idx_t> local_idx; - local_idx.reserve(selected_nodes.size()); + std::unordered_map, VertexIdxT> localIdx; + localIdx.reserve(selectedNodes.size()); - for (const auto &node : selected_nodes) { - local_idx[node] = dag_out.num_vertices(); + for (const auto &node : selectedNodes) { + localIdx[node] = dagOut.NumVertices(); - if constexpr (is_constructable_cdag_typed_vertex_v and has_typed_vertices_v) { + if constexpr (isConstructableCdagTypedVertexV and hasTypedVerticesV) { // add vertex with type - dag_out.add_vertex( - dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), dag.vertex_type(node)); + dagOut.AddVertex( + dag.VertexWorkWeight(node), dag.VertexCommWeight(node), dag.VertexMemWeight(node), dag.VertexType(node)); } else { // add vertex without type - dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node)); + dagOut.AddVertex(dag.VertexWorkWeight(node), dag.VertexCommWeight(node), dag.VertexMemWeight(node)); } } - if constexpr (has_edge_weights_v and has_edge_weights_v) { + if constexpr (hasEdgeWeightsV and hasEdgeWeightsV) { // add edges with edge comm weights - for (const auto &node : selected_nodes) { - for (const auto &in_edge : in_edges(node, dag)) { - const auto &pred = source(in_edge, dag); - if (local_idx.count(pred)) { - dag_out.add_edge(local_idx[pred], local_idx[node], dag.edge_comm_weight(in_edge)); + for (const auto &node : selectedNodes) { + for (const auto &inEdge : InEdges(node, dag)) { + const auto &pred = Source(inEdge, dag); + if (localIdx.count(pred)) { + dagOut.AddEdge(localIdx[pred], localIdx[node], dag.EdgeCommWeight(inEdge)); } } } } else { // add edges without edge comm weights - for (const auto &node : selected_nodes) { - for (const auto &pred : dag.parents(node)) { - if (local_idx.count(pred)) { - dag_out.add_edge(local_idx[pred], local_idx[node]); + for (const auto &node : selectedNodes) { + for (const auto &pred : dag.Parents(node)) { + if (localIdx.count(pred)) { + dagOut.AddEdge(localIdx[pred], localIdx[node]); } } } } - return local_idx; + return localIdx; } } // end namespace osp diff --git a/include/osp/graph_algorithms/transitive_reduction.hpp b/include/osp/graph_algorithms/transitive_reduction.hpp index a5ec6772..626dc936 100644 --- a/include/osp/graph_algorithms/transitive_reduction.hpp +++ b/include/osp/graph_algorithms/transitive_reduction.hpp @@ -41,51 +41,50 @@ namespace osp { * * This algorithm is efficient for sparse graphs, with a complexity of roughly O(E * (V+E)). * - * @tparam Graph_t_in The type of the input graph. Must satisfy the `is_directed_graph` concept. - * @tparam Graph_t_out The type of the output graph. Must satisfy the `is_constructable_cdag` concept. + * @tparam GraphTIn The type of the input graph. Must satisfy the `is_directed_graph` concept. + * @tparam GraphTOut The type of the output graph. Must satisfy the `is_constructable_cdag` concept. * @param graph_in The input DAG. * @param graph_out The output graph, which will contain the transitive reduction. The graph should be empty. */ -template -void transitive_reduction_sparse(const Graph_t_in &graph_in, Graph_t_out &graph_out) { - static_assert(is_directed_graph_v, "Input graph must be a directed graph."); - static_assert(is_constructable_cdag_v, "Output graph must be a constructable computational DAG."); - assert(graph_out.num_vertices() == 0 && "Output graph must be empty."); +template +void TransitiveReductionSparse(const GraphTIn &graphIn, GraphTOut &graphOut) { + static_assert(isDirectedGraphV, "Input graph must be a directed graph."); + static_assert(isConstructableCdagV, "Output graph must be a constructable computational DAG."); + assert(graphOut.NumVertices() == 0 && "Output graph must be empty."); - if (graph_in.num_vertices() == 0) { + if (graphIn.NumVertices() == 0) { return; } // 1. Copy vertices and their properties from graph_in to graph_out. - for (const auto &v_idx : graph_in.vertices()) { - if constexpr (has_typed_vertices_v && is_constructable_cdag_typed_vertex_v) { - graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), - graph_in.vertex_comm_weight(v_idx), - graph_in.vertex_mem_weight(v_idx), - graph_in.vertex_type(v_idx)); + for (const auto &vIdx : graphIn.Vertices()) { + if constexpr (hasTypedVerticesV && isConstructableCdagTypedVertexV) { + graphOut.AddVertex(graphIn.VertexWorkWeight(vIdx), + graphIn.VertexCommWeight(vIdx), + graphIn.VertexMemWeight(vIdx), + graphIn.VertexType(vIdx)); } else { - graph_out.add_vertex( - graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), graph_in.vertex_mem_weight(v_idx)); + graphOut.AddVertex(graphIn.VertexWorkWeight(vIdx), graphIn.VertexCommWeight(vIdx), graphIn.VertexMemWeight(vIdx)); } } // 2. Add an edge (u, v) to the reduction if it's not transitive. // An edge (u, v) is transitive if there exists a child w of u (w != v) that can reach v. - for (const auto &edge : edges(graph_in)) { - const auto u = source(edge, graph_in); - const auto v = target(edge, graph_in); - bool is_transitive = false; - for (const auto &w : graph_in.children(u)) { - if (w != v && has_path(w, v, graph_in)) { - is_transitive = true; + for (const auto &edge : Edges(graphIn)) { + const auto u = Source(edge, graphIn); + const auto v = Target(edge, graphIn); + bool isTransitive = false; + for (const auto &w : graphIn.Children(u)) { + if (w != v && HasPath(w, v, graphIn)) { + isTransitive = true; break; } } - if (!is_transitive) { - if constexpr (has_edge_weights_v && is_constructable_cdag_comm_edge_v) { - graph_out.add_edge(u, v, graph_in.edge_comm_weight(edge)); + if (!isTransitive) { + if constexpr (hasEdgeWeightsV && isConstructableCdagCommEdgeV) { + graphOut.AddEdge(u, v, graphIn.EdgeCommWeight(edge)); } else { - graph_out.add_edge(u, v); + graphOut.AddEdge(u, v); } } } @@ -105,46 +104,45 @@ void transitive_reduction_sparse(const Graph_t_in &graph_in, Graph_t_out &graph_ * * This algorithm is efficient for dense graphs, with a complexity of O(V^3). * - * @tparam Graph_t_in The type of the input graph. Must satisfy the `is_directed_graph_edge_desc` concept. - * @tparam Graph_t_out The type of the output graph. Must satisfy the `is_constructable_cdag` concept. + * @tparam GraphTIn The type of the input graph. Must satisfy the `is_directed_graph_edge_desc` concept. + * @tparam GraphTOut The type of the output graph. Must satisfy the `is_constructable_cdag` concept. * @param graph_in The input DAG. * @param graph_out The output graph, which will contain the transitive reduction. The graph should be empty. */ -template -void transitive_reduction_dense(const Graph_t_in &graph_in, Graph_t_out &graph_out) { - static_assert(is_directed_graph_edge_desc_v, "Input graph must be a directed graph with edge descriptors."); - static_assert(is_constructable_cdag_v, "Output graph must be a constructable computational DAG."); - assert(graph_out.num_vertices() == 0 && "Output graph must be empty."); - - const auto num_v = graph_in.num_vertices(); - if (num_v == 0) { +template +void TransitiveReductionDense(const GraphTIn &graphIn, GraphTOut &graphOut) { + static_assert(isDirectedGraphEdgeDescV, "Input graph must be a directed graph with edge descriptors."); + static_assert(isConstructableCdagV, "Output graph must be a constructable computational DAG."); + assert(graphOut.NumVertices() == 0 && "Output graph must be empty."); + + const auto numV = graphIn.NumVertices(); + if (numV == 0) { return; } // 1. Copy vertices and their properties from graph_in to graph_out. - for (const auto &v_idx : graph_in.vertices()) { - if constexpr (has_typed_vertices_v && is_constructable_cdag_typed_vertex_v) { - graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), - graph_in.vertex_comm_weight(v_idx), - graph_in.vertex_mem_weight(v_idx), - graph_in.vertex_type(v_idx)); + for (const auto &vIdx : graphIn.Vertices()) { + if constexpr (hasTypedVerticesV && isConstructableCdagTypedVertexV) { + graphOut.AddVertex(graphIn.VertexWorkWeight(vIdx), + graphIn.VertexCommWeight(vIdx), + graphIn.VertexMemWeight(vIdx), + graphIn.VertexType(vIdx)); } else { - graph_out.add_vertex( - graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), graph_in.vertex_mem_weight(v_idx)); + graphOut.AddVertex(graphIn.VertexWorkWeight(vIdx), graphIn.VertexCommWeight(vIdx), graphIn.VertexMemWeight(vIdx)); } } // 2. Compute transitive closure (reachability matrix). - std::vector> reachable(num_v, std::vector(num_v, false)); - for (const auto &edge : edges(graph_in)) { - reachable[source(edge, graph_in)][target(edge, graph_in)] = true; + std::vector> reachable(numV, std::vector(numV, false)); + for (const auto &edge : Edges(graphIn)) { + reachable[Source(edge, graphIn)][Target(edge, graphIn)] = true; } - const auto top_order = GetTopOrder(graph_in); - for (const auto &k : top_order) { - for (const auto &i : top_order) { + const auto topOrder = GetTopOrder(graphIn); + for (const auto &k : topOrder) { + for (const auto &i : topOrder) { if (reachable[i][k]) { - for (const auto &j : top_order) { + for (const auto &j : topOrder) { if (reachable[k][j]) { reachable[i][j] = true; } @@ -154,21 +152,21 @@ void transitive_reduction_dense(const Graph_t_in &graph_in, Graph_t_out &graph_o } // 3. Add an edge (u, v) to the reduction if it's not transitive. - for (const auto &edge : edges(graph_in)) { - const auto u = source(edge, graph_in); - const auto v = target(edge, graph_in); - bool is_transitive = false; - for (const auto &w : graph_in.children(u)) { + for (const auto &edge : Edges(graphIn)) { + const auto u = Source(edge, graphIn); + const auto v = Target(edge, graphIn); + bool isTransitive = false; + for (const auto &w : graphIn.Children(u)) { if (w != v && reachable[w][v]) { - is_transitive = true; + isTransitive = true; break; } } - if (!is_transitive) { - if constexpr (has_edge_weights_v && is_constructable_cdag_comm_edge_v) { - graph_out.add_edge(u, v, graph_in.edge_comm_weight(edge)); + if (!isTransitive) { + if constexpr (hasEdgeWeightsV && isConstructableCdagCommEdgeV) { + graphOut.AddEdge(u, v, graphIn.EdgeCommWeight(edge)); } else { - graph_out.add_edge(u, v); + graphOut.AddEdge(u, v); } } } diff --git a/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp b/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp index fd950cc3..0b09f59f 100644 --- a/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp +++ b/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp @@ -33,40 +33,40 @@ namespace osp { * @tparam memw_t Type for memory weights. * @tparam vertex_type_t Type for vertex types. */ -template -struct cdag_vertex_impl { - using vertex_idx_type = vertex_idx_t; - using work_weight_type = workw_t; - using comm_weight_type = commw_t; - using mem_weight_type = memw_t; - using cdag_vertex_type_type = vertex_type_t; +template +struct CDagVertexImpl { + using VertexIdxType = VertexIdxT; + using WorkWeightType = WorkwT; + using CommWeightType = CommwT; + using MemWeightType = MemwT; + using CDagVertexTypeType = VertexTypeT; - cdag_vertex_impl() = default; + CDagVertexImpl() = default; - cdag_vertex_impl(const cdag_vertex_impl &other) = default; - cdag_vertex_impl(cdag_vertex_impl &&other) noexcept = default; - cdag_vertex_impl &operator=(const cdag_vertex_impl &other) = default; - cdag_vertex_impl &operator=(cdag_vertex_impl &&other) noexcept = default; + CDagVertexImpl(const CDagVertexImpl &other) = default; + CDagVertexImpl(CDagVertexImpl &&other) noexcept = default; + CDagVertexImpl &operator=(const CDagVertexImpl &other) = default; + CDagVertexImpl &operator=(CDagVertexImpl &&other) noexcept = default; /** * @brief Constructs a vertex with specified properties. * - * @param vertex_idx_ The unique identifier for the vertex. - * @param work_w The computational work weight. - * @param comm_w The communication weight. - * @param mem_w The memory weight. - * @param vertex_t The type of the vertex. + * @param vertexIdx The unique identifier for the vertex. + * @param workW The computational work weight. + * @param commW The communication weight. + * @param memW The memory weight. + * @param vertexT The type of the vertex. */ - cdag_vertex_impl(vertex_idx_t vertex_idx_, workw_t work_w, commw_t comm_w, memw_t mem_w, vertex_type_t vertex_t) - : id(vertex_idx_), work_weight(work_w), comm_weight(comm_w), mem_weight(mem_w), vertex_type(vertex_t) {} + CDagVertexImpl(VertexIdxT vertexIdx, WorkwT workW, CommwT commW, MemwT memW, VertexTypeT vertexT) + : id_(vertexIdx), workWeight_(workW), commWeight_(commW), memWeight_(memW), vertexType_(vertexT) {} - vertex_idx_t id = 0; + VertexIdxT id_ = 0; - workw_t work_weight = 0; - commw_t comm_weight = 0; - memw_t mem_weight = 0; + WorkwT workWeight_ = 0; + CommwT commWeight_ = 0; + MemwT memWeight_ = 0; - vertex_type_t vertex_type = 0; + VertexTypeT vertexType_ = 0; }; /** @@ -74,13 +74,13 @@ struct cdag_vertex_impl { * * This struct implements a vertex with integer weights for work, communication, and memory. */ -using cdag_vertex_impl_int = cdag_vertex_impl; +using CDagVertexImplInt = CDagVertexImpl; /** * @brief A vertex implementation with unsigned weights. Indexed by std::size_t. Node types are unsigned. * * This struct implements a vertex with unsigned weights for work, communication, and memory. */ -using cdag_vertex_impl_unsigned = cdag_vertex_impl; +using CDagVertexImplUnsigned = CDagVertexImpl; } // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp index f933a7b6..9d4614fb 100644 --- a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp +++ b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp @@ -36,95 +36,93 @@ limitations under the License. namespace osp { -template -class Compact_Sparse_Graph { - static_assert(std::is_integral::value && std::is_integral::value, +template +class CompactSparseGraph { + static_assert(std::is_integral::value && std::is_integral::value, "Vertex and edge type must be of integral nature."); - static_assert(std::is_arithmetic_v && "Work weight must be of arithmetic type."); - static_assert(std::is_arithmetic_v && "Communication weight must be of arithmetic type."); - static_assert(std::is_arithmetic_v && "Memory weight must be of arithmetic type."); - static_assert(std::is_integral_v && "Vertex type type must be of integral type."); + static_assert(std::is_arithmetic_v && "Work weight must be of arithmetic type."); + static_assert(std::is_arithmetic_v && "Communication weight must be of arithmetic type."); + static_assert(std::is_arithmetic_v && "Memory weight must be of arithmetic type."); + static_assert(std::is_integral_v && "Vertex type type must be of integral type."); public: - using vertex_idx = vert_t; + using VertexIdx = VertT; - using vertex_work_weight_type = std::conditional_t; - using vertex_comm_weight_type = comm_weight_type; - using vertex_mem_weight_type = mem_weight_type; - using vertex_type_type = vertex_type_template_type; + using VertexWorkWeightType = std::conditional_t; + using VertexCommWeightType = CommWeightType; + using VertexMemWeightType = MemWeightType; + using VertexTypeType = VertexTypeTemplateType; - static bool constexpr vertices_in_top_order = true; - static bool constexpr children_in_top_order = true; - static bool constexpr children_in_vertex_order = true; - static bool constexpr parents_in_top_order = true; - static bool constexpr parents_in_vertex_order = true; + static bool constexpr verticesInTopOrder_ = true; + static bool constexpr childrenInTopOrder_ = true; + static bool constexpr childrenInVertexOrder_ = true; + static bool constexpr parentsInTopOrder_ = true; + static bool constexpr parentsInVertexOrder_ = true; private: - using ThisT = Compact_Sparse_Graph; + using ThisT = CompactSparseGraph; protected: - class Compact_Parent_Edges { + class CompactParentEdges { private: // Compressed Sparse Row (CSR) - std::vector csr_edge_parents; - std::vector csr_target_ptr; + std::vector csrEdgeParents_; + std::vector csrTargetPtr_; public: - Compact_Parent_Edges() = default; - Compact_Parent_Edges(const Compact_Parent_Edges &other) = default; - Compact_Parent_Edges(Compact_Parent_Edges &&other) = default; - Compact_Parent_Edges &operator=(const Compact_Parent_Edges &other) = default; - Compact_Parent_Edges &operator=(Compact_Parent_Edges &&other) = default; - virtual ~Compact_Parent_Edges() = default; + CompactParentEdges() = default; + CompactParentEdges(const CompactParentEdges &other) = default; + CompactParentEdges(CompactParentEdges &&other) = default; + CompactParentEdges &operator=(const CompactParentEdges &other) = default; + CompactParentEdges &operator=(CompactParentEdges &&other) = default; + virtual ~CompactParentEdges() = default; - Compact_Parent_Edges(const std::vector &csr_edge_parents_, const std::vector &csr_target_ptr_) - : csr_edge_parents(csr_edge_parents_), csr_target_ptr(csr_target_ptr_) {}; - Compact_Parent_Edges(std::vector &&csr_edge_parents_, std::vector &&csr_target_ptr_) - : csr_edge_parents(std::move(csr_edge_parents_)), csr_target_ptr(std::move(csr_target_ptr_)) {}; + CompactParentEdges(const std::vector &csrEdgeParents, const std::vector &csrTargetPtr) + : csrEdgeParents_(csrEdgeParents), csrTargetPtr_(csrTargetPtr) {}; + CompactParentEdges(std::vector &&csrEdgeParents, std::vector &&csrTargetPtr) + : csrEdgeParents_(std::move(csrEdgeParents)), csrTargetPtr_(std::move(csrTargetPtr)) {}; - inline edge_t number_of_parents(const vertex_idx v) const { return csr_target_ptr[v + 1] - csr_target_ptr[v]; } + inline EdgeT NumberOfParents(const VertexIdx v) const { return csrTargetPtr_[v + 1] - csrTargetPtr_[v]; } - class Parent_range { + class ParentRange { private: - const std::vector &_csr_edge_parents; - const std::vector &_csr_target_ptr; - const vertex_idx _vert; + const std::vector &csrEdgeParents_; + const std::vector &csrTargetPtr_; + const VertexIdx vert_; public: - Parent_range(const std::vector &csr_edge_parents, - const std::vector &csr_target_ptr, - const vertex_idx vert) - : _csr_edge_parents(csr_edge_parents), _csr_target_ptr(csr_target_ptr), _vert(vert) {}; + ParentRange(const std::vector &csrEdgeParents, const std::vector &csrTargetPtr, const VertexIdx vert) + : csrEdgeParents_(csrEdgeParents), csrTargetPtr_(csrTargetPtr), vert_(vert) {}; inline auto cbegin() const { - auto it = _csr_edge_parents.cbegin(); - std::advance(it, _csr_target_ptr[_vert]); + auto it = csrEdgeParents_.cbegin(); + std::advance(it, csrTargetPtr_[vert_]); return it; } inline auto cend() const { - auto it = _csr_edge_parents.cbegin(); - std::advance(it, _csr_target_ptr[_vert + 1]); + auto it = csrEdgeParents_.cbegin(); + std::advance(it, csrTargetPtr_[vert_ + 1]); return it; } @@ -133,14 +131,14 @@ class Compact_Sparse_Graph { inline auto end() const { return cend(); } inline auto crbegin() const { - auto it = _csr_edge_parents.crbegin(); - std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert + 1]); + auto it = csrEdgeParents_.crbegin(); + std::advance(it, csrTargetPtr_[csrTargetPtr_.size() - 1] - csrTargetPtr_[vert_ + 1]); return it; }; inline auto crend() const { - auto it = _csr_edge_parents.crbegin(); - std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert]); + auto it = csrEdgeParents_.crbegin(); + std::advance(it, csrTargetPtr_[csrTargetPtr_.size() - 1] - csrTargetPtr_[vert_]); return it; }; @@ -149,61 +147,61 @@ class Compact_Sparse_Graph { inline auto rend() const { return crend(); }; }; - inline Parent_range parents(const vertex_idx vert) const { return Parent_range(csr_edge_parents, csr_target_ptr, vert); } + inline ParentRange Parents(const VertexIdx vert) const { return ParentRange(csrEdgeParents_, csrTargetPtr_, vert); } }; - class Compact_Children_Edges { + class CompactChildrenEdges { private: // Compressed Sparse Column (CSC) - std::vector csc_edge_children; - std::vector csc_source_ptr; + std::vector cscEdgeChildren_; + std::vector cscSourcePtr_; public: - Compact_Children_Edges() = default; - Compact_Children_Edges(const Compact_Children_Edges &other) = default; - Compact_Children_Edges(Compact_Children_Edges &&other) = default; - Compact_Children_Edges &operator=(const Compact_Children_Edges &other) = default; - Compact_Children_Edges &operator=(Compact_Children_Edges &&other) = default; - virtual ~Compact_Children_Edges() = default; - - Compact_Children_Edges(const std::vector &csc_edge_children_, const std::vector &csc_source_ptr_) - : csc_edge_children(csc_edge_children_), csc_source_ptr(csc_source_ptr_) {}; - Compact_Children_Edges(std::vector &&csc_edge_children_, std::vector &&csc_source_ptr_) - : csc_edge_children(std::move(csc_edge_children_)), csc_source_ptr(std::move(csc_source_ptr_)) {}; - - inline edge_t number_of_children(const vertex_idx v) const { return csc_source_ptr[v + 1] - csc_source_ptr[v]; } - - inline vertex_idx source(const edge_t &indx) const { - auto it = std::upper_bound(csc_source_ptr.cbegin(), csc_source_ptr.cend(), indx); - vertex_idx src = static_cast(std::distance(csc_source_ptr.cbegin(), it) - 1); + CompactChildrenEdges() = default; + CompactChildrenEdges(const CompactChildrenEdges &other) = default; + CompactChildrenEdges(CompactChildrenEdges &&other) = default; + CompactChildrenEdges &operator=(const CompactChildrenEdges &other) = default; + CompactChildrenEdges &operator=(CompactChildrenEdges &&other) = default; + virtual ~CompactChildrenEdges() = default; + + CompactChildrenEdges(const std::vector &cscEdgeChildren, const std::vector &cscSourcePtr) + : cscEdgeChildren_(cscEdgeChildren), cscSourcePtr_(cscSourcePtr) {}; + CompactChildrenEdges(std::vector &&cscEdgeChildren, std::vector &&cscSourcePtr) + : cscEdgeChildren_(std::move(cscEdgeChildren)), cscSourcePtr_(std::move(cscSourcePtr)) {}; + + inline EdgeT NumberOfChildren(const VertexIdx v) const { return cscSourcePtr_[v + 1] - cscSourcePtr_[v]; } + + inline VertexIdx Source(const EdgeT &indx) const { + auto it = std::upper_bound(cscSourcePtr_.cbegin(), cscSourcePtr_.cend(), indx); + VertexIdx src = static_cast(std::distance(cscSourcePtr_.cbegin(), it) - 1); return src; }; - inline vertex_idx target(const edge_t &indx) const { return csc_edge_children[indx]; }; + inline VertexIdx Target(const EdgeT &indx) const { return cscEdgeChildren_[indx]; }; - inline edge_t children_indx_begin(const vertex_idx &vert) const { return csc_source_ptr[vert]; }; + inline EdgeT ChildrenIndxBegin(const VertexIdx &vert) const { return cscSourcePtr_[vert]; }; - class Children_range { + class ChildrenRange { private: - const std::vector &_csc_edge_children; - const std::vector &_csc_source_ptr; - const vertex_idx _vert; + const std::vector &cscEdgeChildren_; + const std::vector &cscSourcePtr_; + const VertexIdx vert_; public: - Children_range(const std::vector &csc_edge_children, - const std::vector &csc_source_ptr, - const vertex_idx vert) - : _csc_edge_children(csc_edge_children), _csc_source_ptr(csc_source_ptr), _vert(vert) {}; + ChildrenRange(const std::vector &cscEdgeChildren, + const std::vector &cscSourcePtr, + const VertexIdx vert) + : cscEdgeChildren_(cscEdgeChildren), cscSourcePtr_(cscSourcePtr), vert_(vert) {}; inline auto cbegin() const { - auto it = _csc_edge_children.cbegin(); - std::advance(it, _csc_source_ptr[_vert]); + auto it = cscEdgeChildren_.cbegin(); + std::advance(it, cscSourcePtr_[vert_]); return it; }; inline auto cend() const { - auto it = _csc_edge_children.cbegin(); - std::advance(it, _csc_source_ptr[_vert + 1]); + auto it = cscEdgeChildren_.cbegin(); + std::advance(it, cscSourcePtr_[vert_ + 1]); return it; }; @@ -212,14 +210,14 @@ class Compact_Sparse_Graph { inline auto end() const { return cend(); }; inline auto crbegin() const { - auto it = _csc_edge_children.crbegin(); - std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert + 1]); + auto it = cscEdgeChildren_.crbegin(); + std::advance(it, cscSourcePtr_[cscSourcePtr_.size() - 1] - cscSourcePtr_[vert_ + 1]); return it; }; inline auto crend() const { - auto it = _csc_edge_children.crbegin(); - std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert]); + auto it = cscEdgeChildren_.crbegin(); + std::advance(it, cscSourcePtr_[cscSourcePtr_.size() - 1] - cscSourcePtr_[vert_]); return it; }; @@ -228,165 +226,163 @@ class Compact_Sparse_Graph { inline auto rend() const { return crend(); }; }; - inline Children_range children(const vertex_idx vert) const { - return Children_range(csc_edge_children, csc_source_ptr, vert); - } + inline ChildrenRange Children(const VertexIdx vert) const { return ChildrenRange(cscEdgeChildren_, cscSourcePtr_, vert); } }; - vertex_idx number_of_vertices = static_cast(0); - edge_t number_of_edges = static_cast(0); + VertexIdx numberOfVertices_ = static_cast(0); + EdgeT numberOfEdges_ = static_cast(0); - Compact_Parent_Edges csr_in_edges; - Compact_Children_Edges csc_out_edges; + CompactParentEdges csrInEdges_; + CompactChildrenEdges cscOutEdges_; - vertex_type_type number_of_vertex_types = static_cast(1); + VertexTypeType numberOfVertexTypes_ = static_cast(1); - std::vector vert_work_weights; - std::vector vert_comm_weights; - std::vector vert_mem_weights; - std::vector vert_types; + std::vector vertWorkWeights_; + std::vector vertCommWeights_; + std::vector vertMemWeights_; + std::vector vertTypes_; - std::vector vertex_permutation_from_internal_to_original; - std::vector vertex_permutation_from_original_to_internal; + std::vector vertexPermutationFromInternalToOriginal_; + std::vector vertexPermutationFromOriginalToInternal_; template - std::enable_if_t _update_num_vertex_types() { - number_of_vertex_types = static_cast(1); + std::enable_if_t UpdateNumVertexTypes() { + numberOfVertexTypes_ = static_cast(1); } template - std::enable_if_t _update_num_vertex_types() { - number_of_vertex_types = static_cast(1); - for (const auto vt : vert_types) { - number_of_vertex_types = std::max(number_of_vertex_types, vt); + std::enable_if_t UpdateNumVertexTypes() { + numberOfVertexTypes_ = static_cast(1); + for (const auto vt : vertTypes_) { + numberOfVertexTypes_ = std::max(numberOfVertexTypes_, vt); } } public: - Compact_Sparse_Graph() = default; - Compact_Sparse_Graph(const Compact_Sparse_Graph &other) = default; - Compact_Sparse_Graph(Compact_Sparse_Graph &&other) = default; - Compact_Sparse_Graph &operator=(const Compact_Sparse_Graph &other) = default; - Compact_Sparse_Graph &operator=(Compact_Sparse_Graph &&other) = default; - virtual ~Compact_Sparse_Graph() = default; - - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type &edges) - : number_of_vertices(num_vertices_), number_of_edges(static_cast(edges.size())) { - static_assert(is_container_of>::value - || is_edge_list_type::value); - - assert((0 <= num_vertices_) && "Number of vertices must be non-negative."); - assert((edges.size() < static_cast(std::numeric_limits::max())) + CompactSparseGraph() = default; + CompactSparseGraph(const CompactSparseGraph &other) = default; + CompactSparseGraph(CompactSparseGraph &&other) = default; + CompactSparseGraph &operator=(const CompactSparseGraph &other) = default; + CompactSparseGraph &operator=(CompactSparseGraph &&other) = default; + virtual ~CompactSparseGraph() = default; + + template + CompactSparseGraph(VertexIdx numVertices, const EdgeListType &edges) + : numberOfVertices_(numVertices), numberOfEdges_(static_cast(edges.size())) { + static_assert(IsContainerOf>::value + || IsEdgeListType::value); + + assert((0 <= numVertices) && "Number of vertices must be non-negative."); + assert((edges.size() < static_cast(std::numeric_limits::max())) && "Number of edges must be strictly smaller than the maximally representable number."); - if constexpr (is_container_of>::value) { + if constexpr (IsContainerOf>::value) { assert(std::all_of(edges.begin(), edges.end(), - [num_vertices_](const auto &edge) { - return (0 <= edge.first) && (edge.first < num_vertices_) && (0 <= edge.second) - && (edge.second < num_vertices_); + [numVertices](const auto &edge) { + return (0 <= edge.first) && (edge.first < numVertices) && (0 <= edge.second) + && (edge.second < numVertices); }) && "Source and target of edges must be non-negative and less than the number of vertices."); } - if constexpr (is_edge_list_type_v) { + if constexpr (isEdgeListTypeV) { assert(std::all_of(edges.begin(), edges.end(), - [num_vertices_](const auto &edge) { - return (0 <= edge.source) && (edge.source < num_vertices_) && (0 <= edge.target) - && (edge.target < num_vertices_); + [numVertices](const auto &edge) { + return (0 <= edge.source_) && (edge.source_ < numVertices) && (0 <= edge.target_) + && (edge.target_ < numVertices); }) && "Source and target of edges must be non-negative and less than the number of vertices."); } - if constexpr (keep_vertex_order) { - if constexpr (is_container_of>::value) { + if constexpr (keepVertexOrder) { + if constexpr (IsContainerOf>::value) { assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.first < edge.second; }) && "Vertex order must be a topological order."); } - if constexpr (is_edge_list_type_v) { - assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.source < edge.target; }) + if constexpr (isEdgeListTypeV) { + assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.source_ < edge.target_; }) && "Vertex order must be a topological order."); } } - if constexpr (use_work_weights) { - vert_work_weights = std::vector(num_vertices(), 1); + if constexpr (useWorkWeights) { + vertWorkWeights_ = std::vector(NumVertices(), 1); } - if constexpr (use_comm_weights) { - vert_comm_weights = std::vector(num_vertices(), 0); + if constexpr (useCommWeights) { + vertCommWeights_ = std::vector(NumVertices(), 0); } - if constexpr (use_mem_weights) { - vert_mem_weights = std::vector(num_vertices(), 0); + if constexpr (useMemWeights) { + vertMemWeights_ = std::vector(NumVertices(), 0); } - if constexpr (use_vert_types) { - number_of_vertex_types = 1; - vert_types = std::vector(num_vertices(), 0); + if constexpr (useVertTypes) { + numberOfVertexTypes_ = 1; + vertTypes_ = std::vector(NumVertices(), 0); } - if constexpr (!keep_vertex_order) { - vertex_permutation_from_internal_to_original.reserve(num_vertices()); - vertex_permutation_from_original_to_internal.reserve(num_vertices()); + if constexpr (!keepVertexOrder) { + vertexPermutationFromInternalToOriginal_.reserve(NumVertices()); + vertexPermutationFromOriginalToInternal_.reserve(NumVertices()); } // Construction - std::vector> children_tmp(num_vertices()); - std::vector num_parents_tmp(num_vertices(), 0); + std::vector> childrenTmp(NumVertices()); + std::vector numParentsTmp(NumVertices(), 0); - if constexpr (is_container_of>::value) { + if constexpr (IsContainerOf>::value) { for (const auto &edge : edges) { - children_tmp[edge.first].push_back(edge.second); - num_parents_tmp[edge.second]++; + childrenTmp[edge.first].push_back(edge.second); + numParentsTmp[edge.second]++; } } - if constexpr (is_edge_list_type_v) { + if constexpr (isEdgeListTypeV) { for (const auto &edge : edges) { - children_tmp[edge.source].push_back(edge.target); - num_parents_tmp[edge.target]++; + childrenTmp[edge.source_].push_back(edge.target_); + numParentsTmp[edge.target_]++; } } - std::vector csc_edge_children; - csc_edge_children.reserve(num_edges()); - std::vector csc_source_ptr(num_vertices() + 1); - std::vector csr_edge_parents(num_edges()); - std::vector csr_target_ptr; - csr_target_ptr.reserve(num_vertices() + 1); + std::vector cscEdgeChildren; + cscEdgeChildren.reserve(NumEdges()); + std::vector cscSourcePtr(NumVertices() + 1); + std::vector csrEdgeParents(NumEdges()); + std::vector csrTargetPtr; + csrTargetPtr.reserve(NumVertices() + 1); - if constexpr (keep_vertex_order) { - for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { - csc_source_ptr[vert] = static_cast(csc_edge_children.size()); + if constexpr (keepVertexOrder) { + for (VertexIdx vert = 0; vert < NumVertices(); ++vert) { + cscSourcePtr[vert] = static_cast(cscEdgeChildren.size()); - std::sort(children_tmp[vert].begin(), children_tmp[vert].end()); - for (const auto &chld : children_tmp[vert]) { - csc_edge_children.emplace_back(chld); + std::sort(childrenTmp[vert].begin(), childrenTmp[vert].end()); + for (const auto &chld : childrenTmp[vert]) { + cscEdgeChildren.emplace_back(chld); } } - csc_source_ptr[num_vertices()] = static_cast(csc_edge_children.size()); + cscSourcePtr[NumVertices()] = static_cast(cscEdgeChildren.size()); - csr_target_ptr = std::vector(num_vertices() + 1, 0); - for (std::size_t i = 0U; i < num_parents_tmp.size(); ++i) { - csr_target_ptr[i + 1] = csr_target_ptr[i] + num_parents_tmp[i]; + csrTargetPtr = std::vector(NumVertices() + 1, 0); + for (std::size_t i = 0U; i < numParentsTmp.size(); ++i) { + csrTargetPtr[i + 1] = csrTargetPtr[i] + numParentsTmp[i]; } - std::vector offset = csr_target_ptr; - for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { - for (const auto &chld : children_tmp[vert]) { - csr_edge_parents[offset[chld]++] = vert; + std::vector offset = csrTargetPtr; + for (VertexIdx vert = 0; vert < NumVertices(); ++vert) { + for (const auto &chld : childrenTmp[vert]) { + csrEdgeParents[offset[chld]++] = vert; } } } else { - std::vector> parents_tmp(num_vertices()); + std::vector> parentsTmp(NumVertices()); - if constexpr (is_container_of>::value) { + if constexpr (IsContainerOf>::value) { for (const auto &edge : edges) { - parents_tmp[edge.second].push_back(edge.first); + parentsTmp[edge.second].push_back(edge.first); } } - if constexpr (is_edge_list_type_v) { + if constexpr (isEdgeListTypeV) { for (const auto &edge : edges) { - parents_tmp[edge.target].push_back(edge.source); + parentsTmp[edge.target_].push_back(edge.source_); } } @@ -394,682 +390,656 @@ class Compact_Sparse_Graph { // Xu Yu, Can Lu, and Xuemin Lin const double decay = 8.0; - std::vector prec_remaining = num_parents_tmp; - std::vector priorities(num_vertices(), 0.0); + std::vector precRemaining = numParentsTmp; + std::vector priorities(NumVertices(), 0.0); - auto v_cmp = [&priorities, &children_tmp](const vertex_idx &lhs, const vertex_idx &rhs) { + auto vCmp = [&priorities, &childrenTmp](const VertexIdx &lhs, const VertexIdx &rhs) { return (priorities[lhs] < priorities[rhs]) - || ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() < children_tmp[rhs].size())) - || ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() == children_tmp[rhs].size()) + || ((priorities[lhs] <= priorities[rhs]) && (childrenTmp[lhs].size() < childrenTmp[rhs].size())) + || ((priorities[lhs] <= priorities[rhs]) && (childrenTmp[lhs].size() == childrenTmp[rhs].size()) && (lhs > rhs)); }; - std::priority_queue, decltype(v_cmp)> ready_q(v_cmp); - for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { - if (prec_remaining[vert] == 0) { - ready_q.push(vert); + std::priority_queue, decltype(vCmp)> readyQ(vCmp); + for (VertexIdx vert = 0; vert < NumVertices(); ++vert) { + if (precRemaining[vert] == 0) { + readyQ.push(vert); } } - while (!ready_q.empty()) { - vertex_idx vert = ready_q.top(); - ready_q.pop(); + while (!readyQ.empty()) { + VertexIdx vert = readyQ.top(); + readyQ.pop(); - double pos = static_cast(vertex_permutation_from_internal_to_original.size()); + double pos = static_cast(vertexPermutationFromInternalToOriginal_.size()); pos /= decay; - vertex_permutation_from_internal_to_original.push_back(vert); + vertexPermutationFromInternalToOriginal_.push_back(vert); // update priorities - for (vertex_idx chld : children_tmp[vert]) { - priorities[chld] = log_sum_exp(priorities[chld], pos); + for (VertexIdx chld : childrenTmp[vert]) { + priorities[chld] = LogSumExp(priorities[chld], pos); } - for (vertex_idx par : parents_tmp[vert]) { - for (vertex_idx sibling : children_tmp[par]) { - priorities[sibling] = log_sum_exp(priorities[sibling], pos); + for (VertexIdx par : parentsTmp[vert]) { + for (VertexIdx sibling : childrenTmp[par]) { + priorities[sibling] = LogSumExp(priorities[sibling], pos); } } - for (vertex_idx chld : children_tmp[vert]) { - for (vertex_idx couple : parents_tmp[chld]) { - priorities[couple] = log_sum_exp(priorities[couple], pos); + for (VertexIdx chld : childrenTmp[vert]) { + for (VertexIdx couple : parentsTmp[chld]) { + priorities[couple] = LogSumExp(priorities[couple], pos); } } // update constraints and push to queue - for (vertex_idx chld : children_tmp[vert]) { - --prec_remaining[chld]; - if (prec_remaining[chld] == 0) { - ready_q.push(chld); + for (VertexIdx chld : childrenTmp[vert]) { + --precRemaining[chld]; + if (precRemaining[chld] == 0) { + readyQ.push(chld); } } } - assert(vertex_permutation_from_internal_to_original.size() == static_cast(num_vertices())); + assert(vertexPermutationFromInternalToOriginal_.size() == static_cast(NumVertices())); // constructing the csr and csc - vertex_permutation_from_original_to_internal = std::vector(num_vertices(), 0); - for (vertex_idx new_pos = 0; new_pos < num_vertices(); ++new_pos) { - vertex_permutation_from_original_to_internal[vertex_permutation_from_internal_to_original[new_pos]] = new_pos; + vertexPermutationFromOriginalToInternal_ = std::vector(NumVertices(), 0); + for (VertexIdx newPos = 0; newPos < NumVertices(); ++newPos) { + vertexPermutationFromOriginalToInternal_[vertexPermutationFromInternalToOriginal_[newPos]] = newPos; } - for (vertex_idx vert_new_pos = 0; vert_new_pos < num_vertices(); ++vert_new_pos) { - csc_source_ptr[vert_new_pos] = static_cast(csc_edge_children.size()); + for (VertexIdx vertNewPos = 0; vertNewPos < NumVertices(); ++vertNewPos) { + cscSourcePtr[vertNewPos] = static_cast(cscEdgeChildren.size()); - vertex_idx vert_old_name = vertex_permutation_from_internal_to_original[vert_new_pos]; + VertexIdx vertOldName = vertexPermutationFromInternalToOriginal_[vertNewPos]; - std::vector children_new_name; - children_new_name.reserve(children_tmp[vert_old_name].size()); + std::vector childrenNewName; + childrenNewName.reserve(childrenTmp[vertOldName].size()); - for (vertex_idx chld_old_name : children_tmp[vert_old_name]) { - children_new_name.push_back(vertex_permutation_from_original_to_internal[chld_old_name]); + for (VertexIdx chldOldName : childrenTmp[vertOldName]) { + childrenNewName.push_back(vertexPermutationFromOriginalToInternal_[chldOldName]); } - std::sort(children_new_name.begin(), children_new_name.end()); - for (const auto &chld : children_new_name) { - csc_edge_children.emplace_back(chld); + std::sort(childrenNewName.begin(), childrenNewName.end()); + for (const auto &chld : childrenNewName) { + cscEdgeChildren.emplace_back(chld); } } - csc_source_ptr[num_vertices()] = static_cast(csc_edge_children.size()); + cscSourcePtr[NumVertices()] = static_cast(cscEdgeChildren.size()); - edge_t acc = 0; - for (vertex_idx vert_old_name : vertex_permutation_from_internal_to_original) { - csr_target_ptr.push_back(acc); - acc += num_parents_tmp[vert_old_name]; + EdgeT acc = 0; + for (VertexIdx vertOldName : vertexPermutationFromInternalToOriginal_) { + csrTargetPtr.push_back(acc); + acc += numParentsTmp[vertOldName]; } - csr_target_ptr.push_back(acc); + csrTargetPtr.push_back(acc); - std::vector offset = csr_target_ptr; - for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { - for (edge_t indx = csc_source_ptr[vert]; indx < csc_source_ptr[vert + 1]; ++indx) { - const vertex_idx chld = csc_edge_children[indx]; - csr_edge_parents[offset[chld]++] = vert; + std::vector offset = csrTargetPtr; + for (VertexIdx vert = 0; vert < NumVertices(); ++vert) { + for (EdgeT indx = cscSourcePtr[vert]; indx < cscSourcePtr[vert + 1]; ++indx) { + const VertexIdx chld = cscEdgeChildren[indx]; + csrEdgeParents[offset[chld]++] = vert; } } } - csc_out_edges = Compact_Children_Edges(std::move(csc_edge_children), std::move(csc_source_ptr)); - csr_in_edges = Compact_Parent_Edges(std::move(csr_edge_parents), std::move(csr_target_ptr)); + cscOutEdges_ = CompactChildrenEdges(std::move(cscEdgeChildren), std::move(cscSourcePtr)); + csrInEdges_ = CompactParentEdges(std::move(csrEdgeParents), std::move(csrTargetPtr)); } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type &edges, const std::vector &ww) - : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - assert((ww.size() == static_cast(num_vertices())) + template + CompactSparseGraph(VertexIdx numVertices, const EdgeListType &edges, const std::vector &ww) + : CompactSparseGraph(numVertices, edges) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); + assert((ww.size() == static_cast(NumVertices())) && "Work weights vector must have the same length as the number of vertices."); - if constexpr (keep_vertex_order) { - vert_work_weights = ww; + if constexpr (keepVertexOrder) { + vertWorkWeights_ = ww; } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertWorkWeights_[vert] = ww[vertexPermutationFromInternalToOriginal_[vert]]; } } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, edge_list_type &edges, const std::vector &&ww) - : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - assert((ww.size() == static_cast(num_vertices())) + template + CompactSparseGraph(VertexIdx numVertices, EdgeListType &edges, const std::vector &&ww) + : CompactSparseGraph(numVertices, edges) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); + assert((ww.size() == static_cast(NumVertices())) && "Work weights vector must have the same length as the number of vertices."); - if constexpr (keep_vertex_order) { - vert_work_weights = std::move(ww); + if constexpr (keepVertexOrder) { + vertWorkWeights_ = std::move(ww); } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertWorkWeights_[vert] = ww[vertexPermutationFromInternalToOriginal_[vert]]; } } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &ww, - const std::vector &cw) - : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - assert((ww.size() == static_cast(num_vertices())) + template + CompactSparseGraph(VertexIdx numVertices, + const EdgeListType &edges, + const std::vector &ww, + const std::vector &cw) + : CompactSparseGraph(numVertices, edges) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); + static_assert(useCommWeights, "To set communication weight, graph type must allow communication weights."); + assert((ww.size() == static_cast(NumVertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) + assert((cw.size() == static_cast(NumVertices())) && "Communication weights vector must have the same length as the number of vertices."); - if constexpr (keep_vertex_order) { - vert_work_weights = ww; + if constexpr (keepVertexOrder) { + vertWorkWeights_ = ww; } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertWorkWeights_[vert] = ww[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_comm_weights = cw; + if constexpr (keepVertexOrder) { + vertCommWeights_ = cw; } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertCommWeights_[vert] = cw[vertexPermutationFromInternalToOriginal_[vert]]; } } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, - const edge_list_type &edges, - std::vector &&ww, - std::vector &&cw) - : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - assert((ww.size() == static_cast(num_vertices())) + template + CompactSparseGraph(VertexIdx numVertices, + const EdgeListType &edges, + std::vector &&ww, + std::vector &&cw) + : CompactSparseGraph(numVertices, edges) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); + static_assert(useCommWeights, "To set communication weight, graph type must allow communication weights."); + assert((ww.size() == static_cast(NumVertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) + assert((cw.size() == static_cast(NumVertices())) && "Communication weights vector must have the same length as the number of vertices."); - if constexpr (keep_vertex_order) { - vert_work_weights = std::move(ww); + if constexpr (keepVertexOrder) { + vertWorkWeights_ = std::move(ww); } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertWorkWeights_[vert] = ww[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_comm_weights = std::move(cw); + if constexpr (keepVertexOrder) { + vertCommWeights_ = std::move(cw); } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertCommWeights_[vert] = cw[vertexPermutationFromInternalToOriginal_[vert]]; } } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &ww, - const std::vector &cw, - const std::vector &mw) - : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); - assert((ww.size() == static_cast(num_vertices())) + template + CompactSparseGraph(VertexIdx numVertices, + const EdgeListType &edges, + const std::vector &ww, + const std::vector &cw, + const std::vector &mw) + : CompactSparseGraph(numVertices, edges) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); + static_assert(useCommWeights, "To set communication weight, graph type must allow communication weights."); + static_assert(useMemWeights, "To set memory weight, graph type must allow memory weights."); + assert((ww.size() == static_cast(NumVertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) + assert((cw.size() == static_cast(NumVertices())) && "Communication weights vector must have the same length as the number of vertices."); - assert((mw.size() == static_cast(num_vertices())) + assert((mw.size() == static_cast(NumVertices())) && "Memory weights vector must have the same length as the number of vertices."); - if constexpr (keep_vertex_order) { - vert_work_weights = ww; + if constexpr (keepVertexOrder) { + vertWorkWeights_ = ww; } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertWorkWeights_[vert] = ww[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_comm_weights = cw; + if constexpr (keepVertexOrder) { + vertCommWeights_ = cw; } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertCommWeights_[vert] = cw[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_mem_weights = mw; + if constexpr (keepVertexOrder) { + vertMemWeights_ = mw; } else { - for (auto vert : vertices()) { - vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertMemWeights_[vert] = mw[vertexPermutationFromInternalToOriginal_[vert]]; } } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, - const edge_list_type &edges, - std::vector &&ww, - std::vector &&cw, - std::vector &&mw) - : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); - assert((ww.size() == static_cast(num_vertices())) + template + CompactSparseGraph(VertexIdx numVertices, + const EdgeListType &edges, + std::vector &&ww, + std::vector &&cw, + std::vector &&mw) + : CompactSparseGraph(numVertices, edges) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); + static_assert(useCommWeights, "To set communication weight, graph type must allow communication weights."); + static_assert(useMemWeights, "To set memory weight, graph type must allow memory weights."); + assert((ww.size() == static_cast(NumVertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) + assert((cw.size() == static_cast(NumVertices())) && "Communication weights vector must have the same length as the number of vertices."); - assert((mw.size() == static_cast(num_vertices())) + assert((mw.size() == static_cast(NumVertices())) && "Memory weights vector must have the same length as the number of vertices."); - if constexpr (keep_vertex_order) { - vert_work_weights = std::move(ww); + if constexpr (keepVertexOrder) { + vertWorkWeights_ = std::move(ww); } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertWorkWeights_[vert] = ww[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_comm_weights = std::move(cw); + if constexpr (keepVertexOrder) { + vertCommWeights_ = std::move(cw); } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertCommWeights_[vert] = cw[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_mem_weights = std::move(mw); + if constexpr (keepVertexOrder) { + vertMemWeights_ = std::move(mw); } else { - for (auto vert : vertices()) { - vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertMemWeights_[vert] = mw[vertexPermutationFromInternalToOriginal_[vert]]; } } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &ww, - const std::vector &cw, - const std::vector &mw, - const std::vector &vt) - : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); - static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types."); - assert((ww.size() == static_cast(num_vertices())) + template + CompactSparseGraph(VertexIdx numVertices, + const EdgeListType &edges, + const std::vector &ww, + const std::vector &cw, + const std::vector &mw, + const std::vector &vt) + : CompactSparseGraph(numVertices, edges) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); + static_assert(useCommWeights, "To set communication weight, graph type must allow communication weights."); + static_assert(useMemWeights, "To set memory weight, graph type must allow memory weights."); + static_assert(useVertTypes, "To set vertex types, graph type must allow vertex types."); + assert((ww.size() == static_cast(NumVertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) + assert((cw.size() == static_cast(NumVertices())) && "Communication weights vector must have the same length as the number of vertices."); - assert((mw.size() == static_cast(num_vertices())) + assert((mw.size() == static_cast(NumVertices())) && "Memory weights vector must have the same length as the number of vertices."); - assert((vt.size() == static_cast(num_vertices())) + assert((vt.size() == static_cast(NumVertices())) && "Vertex type vector must have the same length as the number of vertices."); - if constexpr (keep_vertex_order) { - vert_work_weights = ww; + if constexpr (keepVertexOrder) { + vertWorkWeights_ = ww; } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertWorkWeights_[vert] = ww[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_comm_weights = cw; + if constexpr (keepVertexOrder) { + vertCommWeights_ = cw; } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertCommWeights_[vert] = cw[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_mem_weights = mw; + if constexpr (keepVertexOrder) { + vertMemWeights_ = mw; } else { - for (auto vert : vertices()) { - vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertMemWeights_[vert] = mw[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_types = vt; + if constexpr (keepVertexOrder) { + vertTypes_ = vt; } else { - for (auto vert : vertices()) { - vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertTypes_[vert] = vt[vertexPermutationFromInternalToOriginal_[vert]]; } } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, - const edge_list_type &edges, - std::vector &&ww, - std::vector &&cw, - std::vector &&mw, - std::vector &&vt) - : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); - static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types."); - assert((ww.size() == static_cast(num_vertices())) + template + CompactSparseGraph(VertexIdx numVertices, + const EdgeListType &edges, + std::vector &&ww, + std::vector &&cw, + std::vector &&mw, + std::vector &&vt) + : CompactSparseGraph(numVertices, edges) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); + static_assert(useCommWeights, "To set communication weight, graph type must allow communication weights."); + static_assert(useMemWeights, "To set memory weight, graph type must allow memory weights."); + static_assert(useVertTypes, "To set vertex types, graph type must allow vertex types."); + assert((ww.size() == static_cast(NumVertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) + assert((cw.size() == static_cast(NumVertices())) && "Communication weights vector must have the same length as the number of vertices."); - assert((mw.size() == static_cast(num_vertices())) + assert((mw.size() == static_cast(NumVertices())) && "Memory weights vector must have the same length as the number of vertices."); - assert((vt.size() == static_cast(num_vertices())) + assert((vt.size() == static_cast(NumVertices())) && "Vertex type vector must have the same length as the number of vertices."); - if constexpr (keep_vertex_order) { - vert_work_weights = std::move(ww); + if constexpr (keepVertexOrder) { + vertWorkWeights_ = std::move(ww); } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertWorkWeights_[vert] = ww[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_comm_weights = std::move(cw); + if constexpr (keepVertexOrder) { + vertCommWeights_ = std::move(cw); } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertCommWeights_[vert] = cw[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_mem_weights = std::move(mw); + if constexpr (keepVertexOrder) { + vertMemWeights_ = std::move(mw); } else { - for (auto vert : vertices()) { - vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertMemWeights_[vert] = mw[vertexPermutationFromInternalToOriginal_[vert]]; } } - if constexpr (keep_vertex_order) { - vert_types = std::move(vt); + if constexpr (keepVertexOrder) { + vertTypes_ = std::move(vt); } else { - for (auto vert : vertices()) { - vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]]; + for (auto vert : Vertices()) { + vertTypes_[vert] = vt[vertexPermutationFromInternalToOriginal_[vert]]; } } } - template - Compact_Sparse_Graph(const Graph_type &graph) : Compact_Sparse_Graph(graph.num_vertices(), edge_view(graph)) { - static_assert(is_directed_graph_v); + template + CompactSparseGraph(const GraphType &graph) : CompactSparseGraph(graph.NumVertices(), EdgeView(graph)) { + static_assert(isDirectedGraphV); - if constexpr (is_computational_dag_v && use_work_weights) { - for (const auto &vert : graph.vertices()) { - set_vertex_work_weight(vert, graph.vertex_work_weight(vert)); + if constexpr (isComputationalDagV && useWorkWeights) { + for (const auto &vert : graph.Vertices()) { + SetVertexWorkWeight(vert, graph.VertexWorkWeight(vert)); } } - if constexpr (is_computational_dag_v && use_comm_weights) { - for (const auto &vert : graph.vertices()) { - set_vertex_comm_weight(vert, graph.vertex_comm_weight(vert)); + if constexpr (isComputationalDagV && useCommWeights) { + for (const auto &vert : graph.Vertices()) { + SetVertexCommWeight(vert, graph.VertexCommWeight(vert)); } } - if constexpr (is_computational_dag_v && use_mem_weights) { - for (const auto &vert : graph.vertices()) { - set_vertex_mem_weight(vert, graph.vertex_mem_weight(vert)); + if constexpr (isComputationalDagV && useMemWeights) { + for (const auto &vert : graph.Vertices()) { + SetVertexMemWeight(vert, graph.VertexMemWeight(vert)); } } - if constexpr (is_computational_dag_typed_vertices_v && use_vert_types) { - for (const auto &vert : graph.vertices()) { - set_vertex_type(vert, graph.vertex_type(vert)); + if constexpr (isComputationalDagTypedVerticesV && useVertTypes) { + for (const auto &vert : graph.Vertices()) { + SetVertexType(vert, graph.VertexType(vert)); } } } - inline auto vertices() const { return integral_range(number_of_vertices); }; + inline auto Vertices() const { return IntegralRange(numberOfVertices_); }; - inline vert_t num_vertices() const { return number_of_vertices; }; + inline VertT NumVertices() const { return numberOfVertices_; }; - inline edge_t num_edges() const { return number_of_edges; } + inline EdgeT NumEdges() const { return numberOfEdges_; } - inline auto parents(const vertex_idx &v) const { return csr_in_edges.parents(v); }; + inline auto Parents(const VertexIdx &v) const { return csrInEdges_.Parents(v); }; - inline auto children(const vertex_idx &v) const { return csc_out_edges.children(v); }; + inline auto Children(const VertexIdx &v) const { return cscOutEdges_.Children(v); }; - inline edge_t in_degree(const vertex_idx &v) const { return csr_in_edges.number_of_parents(v); }; + inline EdgeT InDegree(const VertexIdx &v) const { return csrInEdges_.NumberOfParents(v); }; - inline edge_t out_degree(const vertex_idx &v) const { return csc_out_edges.number_of_children(v); }; + inline EdgeT OutDegree(const VertexIdx &v) const { return cscOutEdges_.NumberOfChildren(v); }; - template - inline std::enable_if_t vertex_work_weight(const vertex_idx &v) const { - return vert_work_weights[v]; + template + inline std::enable_if_t VertexWorkWeight(const VertexIdx &v) const { + return vertWorkWeights_[v]; } - template - inline std::enable_if_t vertex_work_weight(const vertex_idx &v) const { - return static_cast(1) + in_degree(v); + template + inline std::enable_if_t VertexWorkWeight(const VertexIdx &v) const { + return static_cast(1) + InDegree(v); } - template - inline std::enable_if_t vertex_comm_weight(const vertex_idx &v) const { - return vert_comm_weights[v]; + template + inline std::enable_if_t VertexCommWeight(const VertexIdx &v) const { + return vertCommWeights_[v]; } - template - inline std::enable_if_t vertex_comm_weight(const vertex_idx) const { + template + inline std::enable_if_t VertexCommWeight(const VertexIdx) const { return static_cast(0); } - template - inline std::enable_if_t vertex_mem_weight(const vertex_idx &v) const { - return vert_mem_weights[v]; + template + inline std::enable_if_t VertexMemWeight(const VertexIdx &v) const { + return vertMemWeights_[v]; } - template - inline std::enable_if_t vertex_mem_weight(const vertex_idx) const { + template + inline std::enable_if_t VertexMemWeight(const VertexIdx) const { return static_cast(0); } - template - inline std::enable_if_t vertex_type(const vertex_idx &v) const { - return vert_types[v]; + template + inline std::enable_if_t VertexType(const VertexIdx &v) const { + return vertTypes_[v]; } - template - inline std::enable_if_t vertex_type(const vertex_idx) const { + template + inline std::enable_if_t VertexType(const VertexIdx) const { return static_cast(0); } - inline vertex_type_type num_vertex_types() const { return number_of_vertex_types; }; + inline VertexTypeType NumVertexTypes() const { return numberOfVertexTypes_; }; template - inline std::enable_if_t set_vertex_work_weight(const vertex_idx &v, - const vertex_work_weight_type work_weight) { - if constexpr (keep_vertex_order) { - vert_work_weights[v] = work_weight; + inline std::enable_if_t SetVertexWorkWeight(const VertexIdx &v, const VertexWorkWeightType workWeight) { + if constexpr (keepVertexOrder) { + vertWorkWeights_[v] = workWeight; } else { - vert_work_weights[vertex_permutation_from_original_to_internal[v]] = work_weight; + vertWorkWeights_[vertexPermutationFromOriginalToInternal_[v]] = workWeight; } } template - inline std::enable_if_t set_vertex_work_weight(const vertex_idx &v, - const vertex_work_weight_type work_weight) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + inline std::enable_if_t SetVertexWorkWeight(const VertexIdx &v, + const VertexWorkWeightType workWeight) { + static_assert(useWorkWeights, "To set work weight, graph type must allow work weights."); } template - inline std::enable_if_t set_vertex_comm_weight(const vertex_idx &v, - const vertex_comm_weight_type comm_weight) { - if constexpr (keep_vertex_order) { - vert_comm_weights[v] = comm_weight; + inline std::enable_if_t SetVertexCommWeight(const VertexIdx &v, const VertexCommWeightType commWeight) { + if constexpr (keepVertexOrder) { + vertCommWeights_[v] = commWeight; } else { - vert_comm_weights[vertex_permutation_from_original_to_internal[v]] = comm_weight; + vertCommWeights_[vertexPermutationFromOriginalToInternal_[v]] = commWeight; } } template - inline std::enable_if_t set_vertex_comm_weight(const vertex_idx &v, - const vertex_comm_weight_type comm_weight) { - static_assert(use_comm_weights, "To set comm weight, graph type must allow comm weights."); + inline std::enable_if_t SetVertexCommWeight(const VertexIdx &v, + const VertexCommWeightType commWeight) { + static_assert(useCommWeights, "To set comm weight, graph type must allow comm weights."); } template - inline std::enable_if_t set_vertex_mem_weight(const vertex_idx &v, - const vertex_mem_weight_type mem_weight) { - if constexpr (keep_vertex_order) { - vert_mem_weights[v] = mem_weight; + inline std::enable_if_t SetVertexMemWeight(const VertexIdx &v, const VertexMemWeightType memWeight) { + if constexpr (keepVertexOrder) { + vertMemWeights_[v] = memWeight; } else { - vert_mem_weights[vertex_permutation_from_original_to_internal[v]] = mem_weight; + vertMemWeights_[vertexPermutationFromOriginalToInternal_[v]] = memWeight; } } template - inline std::enable_if_t set_vertex_mem_weight(const vertex_idx &v, - const vertex_mem_weight_type mem_weight) { - static_assert(use_mem_weights, "To set mem weight, graph type must allow mem weights."); + inline std::enable_if_t SetVertexMemWeight(const VertexIdx &v, const VertexMemWeightType memWeight) { + static_assert(useMemWeights, "To set mem weight, graph type must allow mem weights."); } template - inline std::enable_if_t set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) { - if constexpr (keep_vertex_order) { - vert_types[v] = vertex_type_; + inline std::enable_if_t SetVertexType(const VertexIdx &v, const VertexTypeType vertexType) { + if constexpr (keepVertexOrder) { + vertTypes_[v] = vertexType; } else { - vert_types[vertex_permutation_from_original_to_internal[v]] = vertex_type_; + vertTypes_[vertexPermutationFromOriginalToInternal_[v]] = vertexType; } - number_of_vertex_types = std::max(number_of_vertex_types, vertex_type_); + numberOfVertexTypes_ = std::max(numberOfVertexTypes_, vertexType); } template - inline std::enable_if_t set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) { - static_assert(use_vert_types, "To set vert type, graph type must allow vertex types."); + inline std::enable_if_t SetVertexType(const VertexIdx &v, const VertexTypeType vertexType) { + static_assert(useVertTypes, "To set vert type, graph type must allow vertex types."); } - template &> - inline std::enable_if_t get_pullback_permutation() const { - static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function."); + template &> + inline std::enable_if_t GetPullbackPermutation() const { + static_assert(!keepVertexOrder, "No permutation was applied. This is a deleted function."); return {}; } - template &> - inline std::enable_if_t get_pullback_permutation() const { - return vertex_permutation_from_internal_to_original; + template &> + inline std::enable_if_t GetPullbackPermutation() const { + return vertexPermutationFromInternalToOriginal_; } - template &> - inline std::enable_if_t get_pushforward_permutation() const { - static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function."); + template &> + inline std::enable_if_t GetPushforwardPermutation() const { + static_assert(!keepVertexOrder, "No permutation was applied. This is a deleted function."); return {}; } - template &> - inline std::enable_if_t get_pushforward_permutation() const { - return vertex_permutation_from_original_to_internal; + template &> + inline std::enable_if_t GetPushforwardPermutation() const { + return vertexPermutationFromOriginalToInternal_; } }; -template -struct is_Compact_Sparse_Graph, - void> : std::true_type {}; - -template -struct is_Compact_Sparse_Graph_reorder, - void> : std::true_type {}; - -static_assert(is_Compact_Sparse_Graph_v>); -static_assert(is_Compact_Sparse_Graph_v>); -static_assert(!is_Compact_Sparse_Graph_reorder_v>); -static_assert(is_Compact_Sparse_Graph_reorder_v>); - -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); - -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph must satisfy the directed_graph concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph must satisfy the directed_graph concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph must satisfy the directed_graph concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph must satisfy the directed_graph concept"); - -static_assert(is_computational_dag_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag concept"); - -static_assert(is_computational_dag_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag concept"); - -static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept"); - -static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept"); - -static_assert(is_direct_constructable_cdag_v>, - "Compact_Sparse_Graph must be directly constructable"); - -static_assert(is_direct_constructable_cdag_v>, - "Compact_Sparse_Graph must be directly constructable"); - -using CSG = Compact_Sparse_Graph; - -static_assert(is_directed_graph_edge_desc_v, "CSG must satisfy the directed_graph_edge_desc concept"); +template +struct IsCompactSparseGraph< + CompactSparseGraph, + void> : std::true_type {}; + +template +struct IsCompactSparseGraphReorder< + CompactSparseGraph, + void> : std::true_type {}; + +static_assert(isCompactSparseGraphV>); +static_assert(isCompactSparseGraphV>); +static_assert(!isCompactSparseGraphReorderV>); +static_assert(isCompactSparseGraphReorderV>); + +static_assert(hasVertexWeightsV>, "CompactSparseGraph must satisfy the has_vertex_weights concept"); + +static_assert(hasVertexWeightsV>, "CompactSparseGraph must satisfy the has_vertex_weights concept"); + +static_assert(isDirectedGraphV>, + "CompactSparseGraph must satisfy the directed_graph concept"); + +static_assert(isDirectedGraphV>, + "CompactSparseGraph must satisfy the directed_graph concept"); + +static_assert(isDirectedGraphV>, + "CompactSparseGraph must satisfy the directed_graph concept"); + +static_assert(isDirectedGraphV>, + "CompactSparseGraph must satisfy the directed_graph concept"); + +static_assert(isComputationalDagV>, + "CompactSparseGraph must satisfy the is_computation_dag concept"); + +static_assert(isComputationalDagV>, + "CompactSparseGraph must satisfy the is_computation_dag concept"); + +static_assert(isComputationalDagTypedVerticesV>, + "CompactSparseGraph must satisfy the is_computation_dag with types concept"); + +static_assert(isComputationalDagTypedVerticesV>, + "CompactSparseGraph must satisfy the is_computation_dag with types concept"); + +static_assert(isDirectConstructableCdagV>, "CompactSparseGraph must be directly constructable"); + +static_assert(isDirectConstructableCdagV>, "CompactSparseGraph must be directly constructable"); + +using CSG = CompactSparseGraph; + +static_assert(isDirectedGraphEdgeDescV, "CSG must satisfy the directed_graph_edge_desc concept"); // // Graph specific implementations -// template -// bool coarser_util::construct_coarse_dag( -// const Graph_t_in &dag_in, -// Compact_Sparse_Graph &coarsened_dag, -// std::vector>> +// bool useWorkWeights, bool useCommWeights, bool useMemWeights, bool useVertTypes, typename VertT, typename +// EdgeT, typename WorkWeightType, typename CommWeightType, typename MemWeightType, typename +// VertexTypeTemplateType> +// bool coarser_util::ConstructCoarseDag( +// const GraphTIn &dag_in, +// CompactSparseGraph &coarsened_dag, +// std::vector>> // &vertex_contraction_map) { -// using Graph_out_type = Compact_Sparse_Graph; +// using Graph_out_type = CompactSparseGraph; -// static_assert(is_directed_graph_v && is_directed_graph_v, "Graph types need to satisfy the -// is_directed_graph concept."); static_assert(is_computational_dag_v, "Graph_t_in must be a computational DAG"); -// static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, "Graph_out_type +// static_assert(isDirectedGraphV && isDirectedGraphV, "Graph types need to satisfy the +// is_directed_graph concept."); static_assert(isComputationalDagV, "GraphTIn must be a computational DAG"); +// static_assert(isConstructableCdagV || isDirectConstructableCdagV, "Graph_out_type // must be a (direct) constructable computational DAG"); -// assert(check_valid_contraction_map(vertex_contraction_map)); +// assert(CheckValidContractionMap(vertex_contraction_map)); -// const vertex_idx_t num_vert_quotient = +// const VertexIdxT num_vert_quotient = // (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; -// std::set, vertex_idx_t>> quotient_edges; +// std::set, VertexIdxT>> quotient_edges; -// for (const vertex_idx_t &vert : dag_in.vertices()) { -// for (const vertex_idx_t &chld : dag_in.children(vert)) { +// for (const VertexIdxT &vert : dag_in.Vertices()) { +// for (const VertexIdxT &chld : dag_in.Children(vert)) { // if (vertex_contraction_map[vert] == vertex_contraction_map[chld]) { // continue; // } @@ -1079,52 +1049,52 @@ static_assert(is_directed_graph_edge_desc_v, "CSG must satisfy the directed // coarsened_dag = Graph_out_type(num_vert_quotient, quotient_edges); -// const auto& pushforward_map = coarsened_dag.get_pushforward_permutation(); -// std::vector> combined_expansion_map(dag_in.num_vertices()); -// for (const auto &vert : dag_in.vertices()) { +// const auto& pushforward_map = coarsened_dag.GetPushforwardPermutation(); +// std::vector> combined_expansion_map(dag_in.NumVertices()); +// for (const auto &vert : dag_in.Vertices()) { // combined_expansion_map[vert] = pushforward_map[vertex_contraction_map[vert]]; // } -// if constexpr (has_vertex_weights_v && is_modifiable_cdag_vertex_v) { -// static_assert(std::is_same_v, v_workw_t>, "Work weight types of in-graph and -// out-graph must be the same."); static_assert(std::is_same_v, v_commw_t>, "Vertex -// communication types of in-graph and out-graph must be the same."); static_assert(std::is_same_v, -// v_memw_t>, "Memory weight types of in-graph and out-graph must be the same."); +// if constexpr (hasVertexWeightsV && isModifiableCdagVertexV) { +// static_assert(std::is_same_v, VWorkwT>, "Work weight types of in-graph and +// out-graph must be the same."); static_assert(std::is_same_v, VCommwT>, "Vertex +// communication types of in-graph and out-graph must be the same."); static_assert(std::is_same_v, +// VMemwT>, "Memory weight types of in-graph and out-graph must be the same."); -// for (const vertex_idx_t &vert : coarsened_dag.vertices()) { -// coarsened_dag.set_vertex_work_weight(vert, 0); -// coarsened_dag.set_vertex_comm_weight(vert, 0); -// coarsened_dag.set_vertex_mem_weight(vert, 0); +// for (const VertexIdxT &vert : coarsened_dag.Vertices()) { +// coarsened_dag.SetVertexWorkWeight(vert, 0); +// coarsened_dag.SetVertexCommWeight(vert, 0); +// coarsened_dag.SetVertexMemWeight(vert, 0); // } -// for (const vertex_idx_t &vert : dag_in.vertices()) { -// coarsened_dag.set_vertex_work_weight( +// for (const VertexIdxT &vert : dag_in.Vertices()) { +// coarsened_dag.SetVertexWorkWeight( // vertex_contraction_map[vert], -// v_work_acc_method()(coarsened_dag.vertex_work_weight(combined_expansion_map[vert]), -// dag_in.vertex_work_weight(vert))); +// v_work_acc_method()(coarsened_dag.VertexWorkWeight(combined_expansion_map[vert]), +// dag_in.VertexWorkWeight(vert))); -// coarsened_dag.set_vertex_comm_weight( +// coarsened_dag.SetVertexCommWeight( // vertex_contraction_map[vert], -// v_comm_acc_method()(coarsened_dag.vertex_comm_weight(combined_expansion_map[vert]), -// dag_in.vertex_comm_weight(vert))); +// v_comm_acc_method()(coarsened_dag.VertexCommWeight(combined_expansion_map[vert]), +// dag_in.VertexCommWeight(vert))); -// coarsened_dag.set_vertex_mem_weight( +// coarsened_dag.SetVertexMemWeight( // vertex_contraction_map[vert], -// v_mem_acc_method()(coarsened_dag.vertex_mem_weight(combined_expansion_map[vert]), -// dag_in.vertex_mem_weight(vert))); +// v_mem_acc_method()(coarsened_dag.VertexMemWeight(combined_expansion_map[vert]), +// dag_in.VertexMemWeight(vert))); // } // } -// if constexpr (has_typed_vertices_v && is_modifiable_cdag_typed_vertex_v) { -// static_assert(std::is_same_v, v_type_t>, +// if constexpr (hasTypedVerticesV && is_modifiable_cdag_typed_vertex_v) { +// static_assert(std::is_same_v, VTypeT>, // "Vertex type types of in graph and out graph must be the same!"); -// for (const vertex_idx_t &vert : dag_in.vertices()) { -// coarsened_dag.set_vertex_type(vertex_contraction_map[vert], dag_in.vertex_type(vert)); +// for (const VertexIdxT &vert : dag_in.Vertices()) { +// coarsened_dag.SetVertexType(vertex_contraction_map[vert], dag_in.VertexType(vert)); // } -// // assert(std::all_of(dag_in.vertices().begin(), dag_in.vertices().end(), +// // assert(std::all_of(dag_in.Vertices().begin(), dag_in.Vertices().end(), // // [&dag_in, &vertex_contraction_map, &coarsened_dag](const auto &vert){ return -// // dag_in.vertex_type(vert) == coarsened_dag.vertex_type(vertex_contraction_map[vert]); }) +// // dag_in.VertexType(vert) == coarsened_dag.VertexType(vertex_contraction_map[vert]); }) // // && "Contracted vertices must be of the same type"); // } diff --git a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp index 68b8b967..7e9a8b3c 100644 --- a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp +++ b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp @@ -22,169 +22,169 @@ limitations under the License. namespace osp { -template -class Compact_Sparse_Graph_EdgeDesc : public Compact_Sparse_Graph { +template +class CompactSparseGraphEdgeDesc : public CompactSparseGraph { private: - using ThisT = Compact_Sparse_Graph_EdgeDesc; - using BaseT = Compact_Sparse_Graph; + using ThisT = CompactSparseGraphEdgeDesc; + using BaseT = CompactSparseGraph; public: - using vertex_idx = typename BaseT::vertex_idx; + using VertexIdx = typename BaseT::VertexIdx; - using vertex_work_weight_type = typename BaseT::vertex_work_weight_type; - using vertex_comm_weight_type = typename BaseT::vertex_comm_weight_type; - using vertex_mem_weight_type = typename BaseT::vertex_mem_weight_type; - using vertex_type_type = typename BaseT::vertex_type_type; + using VertexWorkWeightType = typename BaseT::VertexWorkWeightType; + using VertexCommWeightType = typename BaseT::VertexCommWeightType; + using VertexMemWeightType = typename BaseT::VertexMemWeightType; + using VertexTypeType = typename BaseT::VertexTypeType; - using directed_edge_descriptor = edge_t; - using edge_comm_weight_type = e_comm_weight_type; + using DirectedEdgeDescriptor = EdgeT; + using EdgeCommWeightType = ECommWeightType; protected: - std::vector edge_comm_weights; + std::vector edgeCommWeights_; - class In_Edges_range { + class InEdgesRange { private: - const vertex_idx tgt_vert; - const typename BaseT::Compact_Parent_Edges::Parent_range par_range; - const typename BaseT::Compact_Children_Edges &csc_out_edges; + const VertexIdx tgtVert_; + const typename BaseT::CompactParentEdges::ParentRange parRange_; + const typename BaseT::CompactChildrenEdges &cscOutEdges_; - class In_Edges_iterator { + class InEdgesIterator { public: using iterator_category = std::bidirectional_iterator_tag; using difference_type = std::ptrdiff_t; - using value_type = edge_t; - using pointer = vertex_idx *; - using reference = edge_t &; + using value_type = EdgeT; + using pointer = VertexIdx *; + using reference = EdgeT &; private: - const vertex_idx target_vert; - const typename BaseT::Compact_Children_Edges &csc_out_edges; + const VertexIdx targetVert_; + const typename BaseT::CompactChildrenEdges &cscOutEdges_; - typename std::vector::const_iterator current; + typename std::vector::const_iterator current_; public: - In_Edges_iterator(const vertex_idx &target_vert_, - const typename BaseT::Compact_Children_Edges &csc_out_edges_, - const typename std::vector::const_iterator start_) - : target_vert(target_vert_), csc_out_edges(csc_out_edges_), current(start_) {}; - In_Edges_iterator(const In_Edges_iterator &other) - : target_vert(other.target_vert), csc_out_edges(other.csc_out_edges), current(other.current) {}; - - In_Edges_iterator &operator=(const In_Edges_iterator &other) { + InEdgesIterator(const VertexIdx &targetVert, + const typename BaseT::CompactChildrenEdges &cscOutEdges, + const typename std::vector::const_iterator start) + : targetVert_(targetVert), cscOutEdges_(cscOutEdges), current_(start) {}; + InEdgesIterator(const InEdgesIterator &other) + : targetVert_(other.targetVert_), cscOutEdges_(other.cscOutEdges_), current_(other.current_) {}; + + InEdgesIterator &operator=(const InEdgesIterator &other) { if (this != &other) { - target_vert = other.target_vert; - csc_out_edges = other.csc_out_edges; - current = other.current; + targetVert_ = other.targetVert_; + cscOutEdges_ = other.cscOutEdges_; + current_ = other.current_; } return *this; }; inline value_type operator*() const { - const vertex_idx src_vert = *current; - typename BaseT::Compact_Children_Edges::Children_range range = csc_out_edges.children(src_vert); + const VertexIdx srcVert = *current_; + typename BaseT::CompactChildrenEdges::ChildrenRange range = cscOutEdges_.Children(srcVert); - assert(std::binary_search(range.cbegin(), range.cend(), target_vert)); - auto it = std::lower_bound(range.cbegin(), range.cend(), target_vert); + assert(std::binary_search(range.cbegin(), range.cend(), targetVert_)); + auto it = std::lower_bound(range.cbegin(), range.cend(), targetVert_); - edge_t diff = static_cast(std::distance(range.cbegin(), it)); - edge_t edge_desc_val = csc_out_edges.children_indx_begin(src_vert) + diff; + EdgeT diff = static_cast(std::distance(range.cbegin(), it)); + EdgeT edgeDescVal = cscOutEdges_.ChildrenIndxBegin(srcVert) + diff; - return edge_desc_val; + return edgeDescVal; }; - inline In_Edges_iterator &operator++() { - ++current; + inline InEdgesIterator &operator++() { + ++current_; return *this; }; - inline In_Edges_iterator operator++(int) { - In_Edges_iterator temp = *this; + inline InEdgesIterator operator++(int) { + InEdgesIterator temp = *this; ++(*this); return temp; }; - inline In_Edges_iterator &operator--() { - --current; + inline InEdgesIterator &operator--() { + --current_; return *this; }; - inline In_Edges_iterator operator--(int) { - In_Edges_iterator temp = *this; + inline InEdgesIterator operator--(int) { + InEdgesIterator temp = *this; --(*this); return temp; }; - inline bool operator==(const In_Edges_iterator &other) const { return current == other.current; }; + inline bool operator==(const InEdgesIterator &other) const { return current_ == other.current_; }; - inline bool operator!=(const In_Edges_iterator &other) const { return !(*this == other); }; + inline bool operator!=(const InEdgesIterator &other) const { return !(*this == other); }; - inline bool operator<=(const In_Edges_iterator &other) const { return current <= other.current; }; + inline bool operator<=(const InEdgesIterator &other) const { return current_ <= other.current_; }; - inline bool operator<(const In_Edges_iterator &other) const { return (*this <= other) && (*this != other); }; + inline bool operator<(const InEdgesIterator &other) const { return (*this <= other) && (*this != other); }; - inline bool operator>=(const In_Edges_iterator &other) const { return (!(*this <= other)) || (*this == other); }; + inline bool operator>=(const InEdgesIterator &other) const { return (!(*this <= other)) || (*this == other); }; - inline bool operator>(const In_Edges_iterator &other) const { return !(*this <= other); }; + inline bool operator>(const InEdgesIterator &other) const { return !(*this <= other); }; }; public: - In_Edges_range() = default; - In_Edges_range(const In_Edges_range &other) = default; - In_Edges_range(In_Edges_range &&other) = default; - In_Edges_range &operator=(const In_Edges_range &other) = default; - In_Edges_range &operator=(In_Edges_range &&other) = default; - virtual ~In_Edges_range() = default; + InEdgesRange() = default; + InEdgesRange(const InEdgesRange &other) = default; + InEdgesRange(InEdgesRange &&other) = default; + InEdgesRange &operator=(const InEdgesRange &other) = default; + InEdgesRange &operator=(InEdgesRange &&other) = default; + virtual ~InEdgesRange() = default; - In_Edges_range(const vertex_idx &tgt_vert_, const ThisT &graph, const typename BaseT::Compact_Children_Edges &csc_out_edges_) - : tgt_vert(tgt_vert_), par_range(graph.parents(tgt_vert_)), csc_out_edges(csc_out_edges_) {}; + InEdgesRange(const VertexIdx &tgtVert, const ThisT &graph, const typename BaseT::CompactChildrenEdges &cscOutEdges) + : tgtVert_(tgtVert), parRange_(graph.Parents(tgtVert)), cscOutEdges_(cscOutEdges) {}; - inline auto cbegin() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cbegin()); }; + inline auto cbegin() const { return InEdgesIterator(tgtVert_, cscOutEdges_, parRange_.cbegin()); }; - inline auto cend() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cend()); }; + inline auto cend() const { return InEdgesIterator(tgtVert_, cscOutEdges_, parRange_.cend()); }; inline auto begin() const { return cbegin(); }; @@ -192,523 +192,517 @@ class Compact_Sparse_Graph_EdgeDesc : public Compact_Sparse_Graph - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type &edges) : BaseT(num_vertices_, edges) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + CompactSparseGraphEdgeDesc() = default; + CompactSparseGraphEdgeDesc(const CompactSparseGraphEdgeDesc &other) = default; + CompactSparseGraphEdgeDesc(CompactSparseGraphEdgeDesc &&other) = default; + CompactSparseGraphEdgeDesc &operator=(const CompactSparseGraphEdgeDesc &other) = default; + CompactSparseGraphEdgeDesc &operator=(CompactSparseGraphEdgeDesc &&other) = default; + virtual ~CompactSparseGraphEdgeDesc() = default; + + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, const EdgeListType &edges) : BaseT(numVertices, edges) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &ww) - : BaseT(num_vertices_, edges, ww) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, const EdgeListType &edges, const std::vector &ww) + : BaseT(numVertices, edges, ww) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &&ww) - : BaseT(num_vertices_, edges, std::move(ww)) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, const EdgeListType &edges, const std::vector &&ww) + : BaseT(numVertices, edges, std::move(ww)) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, - const edge_list_type &edges, - std::vector &ww, - std::vector &cw) - : BaseT(num_vertices_, edges, ww, cw) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, + const EdgeListType &edges, + std::vector &ww, + std::vector &cw) + : BaseT(numVertices, edges, ww, cw) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, - const edge_list_type &edges, - std::vector &&ww, - std::vector &&cw) - : BaseT(num_vertices_, edges, std::move(ww), std::move(cw)) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, + const EdgeListType &edges, + std::vector &&ww, + std::vector &&cw) + : BaseT(numVertices, edges, std::move(ww), std::move(cw)) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &ww, - const std::vector &cw, - const std::vector &mw) - : BaseT(num_vertices_, edges, ww, cw, mw) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, + const EdgeListType &edges, + const std::vector &ww, + const std::vector &cw, + const std::vector &mw) + : BaseT(numVertices, edges, ww, cw, mw) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &&ww, - const std::vector &&cw, - const std::vector &&mw) - : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw)) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, + const EdgeListType &edges, + const std::vector &&ww, + const std::vector &&cw, + const std::vector &&mw) + : BaseT(numVertices, edges, std::move(ww), std::move(cw), std::move(mw)) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &ww, - const std::vector &cw, - const std::vector &mw, - const std::vector &vt) - : BaseT(num_vertices_, edges, ww, cw, mw, vt) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, + const EdgeListType &edges, + const std::vector &ww, + const std::vector &cw, + const std::vector &mw, + const std::vector &vt) + : BaseT(numVertices, edges, ww, cw, mw, vt) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, - const edge_list_type &edges, - const std::vector &&ww, - const std::vector &&cw, - const std::vector &&mw, - const std::vector &&vt) - : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw), std::move(vt)) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(VertexIdx numVertices, + const EdgeListType &edges, + const std::vector &&ww, + const std::vector &&cw, + const std::vector &&mw, + const std::vector &&vt) + : BaseT(numVertices, edges, std::move(ww), std::move(cw), std::move(mw), std::move(vt)) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } } - template - Compact_Sparse_Graph_EdgeDesc(const Graph_type &graph) : BaseT(graph) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + template + CompactSparseGraphEdgeDesc(const GraphType &graph) : BaseT(graph) { + if constexpr (useEdgeCommWeights) { + edgeCommWeights_ = std::vector(BaseT::NumEdges(), static_cast(0)); } - if constexpr (has_edge_weights_v && use_edge_comm_weights) { - for (const auto &edge : edges(graph)) { - const auto src = source(edge, graph); - const auto tgt = target(edge, graph); - set_edge_comm_weight(src, tgt, graph.edge_comm_weight(edge)); + if constexpr (hasEdgeWeightsV && useEdgeCommWeights) { + for (const auto &edge : Edges(graph)) { + const auto src = Source(edge, graph); + const auto tgt = Target(edge, graph); + SetEdgeCommWeight(src, tgt, graph.EdgeCommWeight(edge)); } } } - inline auto edges() const { return integral_range(BaseT::number_of_edges); }; + inline auto Edges() const { return IntegralRange(BaseT::numberOfEdges_); }; - inline directed_edge_descriptor edge(const vertex_idx &src, const vertex_idx &tgt) const { - typename BaseT::Compact_Children_Edges::Children_range range = BaseT::csc_out_edges.children(src); + inline DirectedEdgeDescriptor Edge(const VertexIdx &src, const VertexIdx &tgt) const { + typename BaseT::CompactChildrenEdges::ChildrenRange range = BaseT::cscOutEdges_.Children(src); assert(std::binary_search(range.cbegin(), range.cend(), tgt)); auto it = std::lower_bound(range.cbegin(), range.cend(), tgt); - directed_edge_descriptor diff = static_cast(std::distance(range.cbegin(), it)); - directed_edge_descriptor edge_desc_val = BaseT::csc_out_edges.children_indx_begin(src) + diff; + DirectedEdgeDescriptor diff = static_cast(std::distance(range.cbegin(), it)); + DirectedEdgeDescriptor edgeDescVal = BaseT::cscOutEdges_.ChildrenIndxBegin(src) + diff; - return edge_desc_val; + return edgeDescVal; }; - inline vertex_idx source(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.source(edge); }; + inline VertexIdx Source(const DirectedEdgeDescriptor &edge) const { return BaseT::cscOutEdges_.Source(edge); }; - inline vertex_idx target(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.target(edge); }; + inline VertexIdx Target(const DirectedEdgeDescriptor &edge) const { return BaseT::cscOutEdges_.Target(edge); }; - inline auto out_edges(const vertex_idx &vert) const { - return integral_range(BaseT::csc_out_edges.children_indx_begin(vert), - BaseT::csc_out_edges.children_indx_begin(vert + 1)); + inline auto OutEdges(const VertexIdx &vert) const { + return IntegralRange(BaseT::cscOutEdges_.ChildrenIndxBegin(vert), + BaseT::cscOutEdges_.ChildrenIndxBegin(vert + 1)); }; - inline auto in_edges(const vertex_idx &vert) const { return In_Edges_range(vert, *this, BaseT::csc_out_edges); }; + inline auto InEdges(const VertexIdx &vert) const { return InEdgesRange(vert, *this, BaseT::cscOutEdges_); }; - template - inline std::enable_if_t edge_comm_weight(const directed_edge_descriptor &edge) const { - return edge_comm_weights[edge]; + template + inline std::enable_if_t EdgeCommWeight(const DirectedEdgeDescriptor &edge) const { + return edgeCommWeights_[edge]; } - template - inline std::enable_if_t edge_comm_weight(const directed_edge_descriptor &edge) const { + template + inline std::enable_if_t EdgeCommWeight(const DirectedEdgeDescriptor &edge) const { return static_cast(1); } template - inline std::enable_if_t set_edge_comm_weight(const vertex_idx &src, - const vertex_idx &tgt, - const edge_comm_weight_type e_comm_weight) { - if constexpr (keep_vertex_order) { - edge_comm_weights[edge(src, tgt)] = e_comm_weight; + inline std::enable_if_t SetEdgeCommWeight(const VertexIdx &src, + const VertexIdx &tgt, + const EdgeCommWeightType eCommWeight) { + if constexpr (keepVertexOrder) { + edgeCommWeights_[Edge(src, tgt)] = eCommWeight; } else { - const vertex_idx internal_src = BaseT::vertex_permutation_from_original_to_internal[src]; - const vertex_idx internal_tgt = BaseT::vertex_permutation_from_original_to_internal[tgt]; - edge_comm_weights[edge(internal_src, internal_tgt)] = e_comm_weight; + const VertexIdx internalSrc = BaseT::vertexPermutationFromOriginalToInternal_[src]; + const VertexIdx internalTgt = BaseT::vertexPermutationFromOriginalToInternal_[tgt]; + edgeCommWeights_[Edge(internalSrc, internalTgt)] = eCommWeight; } } template - inline std::enable_if_t set_edge_comm_weight(const vertex_idx &src, - const vertex_idx &tgt, - const edge_comm_weight_type e_comm_weight) { - static_assert(use_edge_comm_weights, "To set edge communication weight, graph type must allow edge communication weights."); + inline std::enable_if_t SetEdgeCommWeight(const VertexIdx &src, + const VertexIdx &tgt, + const EdgeCommWeightType eCommWeight) { + static_assert(useEdgeCommWeights, "To set edge communication weight, graph type must allow edge communication weights."); } }; -template -inline auto edges(const Compact_Sparse_Graph_EdgeDesc &graph) { - return graph.edges(); +template +inline auto Edges(const CompactSparseGraphEdgeDesc &graph) { + return graph.Edges(); } -template -inline auto out_edges(vertex_idx_t> v, - const Compact_Sparse_Graph_EdgeDesc &graph) { - return graph.out_edges(v); +template +inline auto OutEdges(VertexIdxT> v, + const CompactSparseGraphEdgeDesc &graph) { + return graph.OutEdges(v); } -template -inline auto in_edges(vertex_idx_t> v, - const Compact_Sparse_Graph_EdgeDesc &graph) { - return graph.in_edges(v); +template +inline auto InEdges(VertexIdxT> v, + const CompactSparseGraphEdgeDesc &graph) { + return graph.InEdges(v); } -template -inline vertex_idx_t> -source(const edge_desc_t> &edge, - const Compact_Sparse_Graph_EdgeDesc &graph) { - return graph.source(edge); +template +inline VertexIdxT> +Source(const EdgeDescT> &edge, + const CompactSparseGraphEdgeDesc &graph) { + return graph.Source(edge); } -template -inline vertex_idx_t> -target(const edge_desc_t> &edge, - const Compact_Sparse_Graph_EdgeDesc &graph) { - return graph.target(edge); +template +inline VertexIdxT> +Target(const EdgeDescT> &edge, + const CompactSparseGraphEdgeDesc &graph) { + return graph.Target(edge); } -template -struct is_Compact_Sparse_Graph, - void> : std::true_type {}; - -template -struct is_Compact_Sparse_Graph_reorder, - void> : std::true_type {}; - -static_assert(is_Compact_Sparse_Graph_v>); -static_assert(is_Compact_Sparse_Graph_v>); -static_assert(!is_Compact_Sparse_Graph_reorder_v>); -static_assert(is_Compact_Sparse_Graph_reorder_v>); - -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept"); - -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); - -static_assert(is_computational_dag_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept"); - -static_assert(is_computational_dag_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept"); - -static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept"); - -static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept"); - -static_assert(is_directed_graph_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept."); - -static_assert(is_directed_graph_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept."); - -static_assert( - is_computational_dag_typed_vertices_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computational_dag_typed_vertices_edge_desc_v with types concept"); - -static_assert( - is_computational_dag_typed_vertices_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computational_dag_typed_vertices_edge_desc_v with types concept"); - -static_assert(has_edge_weights_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept"); - -static_assert(has_edge_weights_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept"); - -static_assert(has_hashable_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept"); - -static_assert(has_hashable_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept"); +template +struct IsCompactSparseGraph, + void> : std::true_type {}; + +template +struct IsCompactSparseGraphReorder, + void> : std::true_type {}; + +static_assert(isCompactSparseGraphV>); +static_assert(isCompactSparseGraphV>); +static_assert(!isCompactSparseGraphReorderV>); +static_assert(isCompactSparseGraphReorderV>); + +static_assert(hasVertexWeightsV>, + "CompactSparseGraphEdgeDesc must satisfy the has_vertex_weights concept"); + +static_assert(hasVertexWeightsV>, + "CompactSparseGraphEdgeDesc must satisfy the has_vertex_weights concept"); + +static_assert(isDirectedGraphV>, + "CompactSparseGraphEdgeDesc must satisfy the directed_graph concept"); + +static_assert(isDirectedGraphV>, + "CompactSparseGraphEdgeDesc must satisfy the directed_graph concept"); + +static_assert(isDirectedGraphV>, + "CompactSparseGraphEdgeDesc must satisfy the directed_graph concept"); + +static_assert(isDirectedGraphV>, + "CompactSparseGraphEdgeDesc must satisfy the directed_graph concept"); + +static_assert(isComputationalDagV>, + "CompactSparseGraphEdgeDesc must satisfy the is_computation_dag concept"); + +static_assert(isComputationalDagV>, + "CompactSparseGraphEdgeDesc must satisfy the is_computation_dag concept"); + +static_assert(isComputationalDagTypedVerticesV>, + "CompactSparseGraphEdgeDesc must satisfy the is_computation_dag with types concept"); + +static_assert(isComputationalDagTypedVerticesV>, + "CompactSparseGraphEdgeDesc must satisfy the is_computation_dag with types concept"); + +static_assert(isDirectedGraphEdgeDescV>, + "CompactSparseGraphEdgeDesc must satisfy the directed graph edge descriptor concept."); + +static_assert(isDirectedGraphEdgeDescV>, + "CompactSparseGraphEdgeDesc must satisfy the directed graph edge descriptor concept."); + +static_assert(isComputationalDagTypedVerticesEdgeDescV>, + "CompactSparseGraphEdgeDesc must satisfy the isComputationalDagTypedVerticesEdgeDescV with types concept"); + +static_assert(isComputationalDagTypedVerticesEdgeDescV>, + "CompactSparseGraphEdgeDesc must satisfy the isComputationalDagTypedVerticesEdgeDescV with types concept"); + +static_assert(hasEdgeWeightsV>, + "CompactSparseGraphEdgeDesc must satisfy the has_edge_weights concept"); + +static_assert(hasEdgeWeightsV>, + "CompactSparseGraphEdgeDesc must satisfy the has_edge_weights concept"); + +static_assert(hasHashableEdgeDescV>, + "CompactSparseGraphEdgeDesc must satisfy the HasHashableEdgeDesc concept"); + +static_assert(hasHashableEdgeDescV>, + "CompactSparseGraphEdgeDesc must satisfy the HasHashableEdgeDesc concept"); using CSGE - = Compact_Sparse_Graph_EdgeDesc; + = CompactSparseGraphEdgeDesc; } // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp index f4445622..1cd7944a 100644 --- a/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp +++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp @@ -27,285 +27,264 @@ limitations under the License. namespace osp { -template -struct directed_edge_descriptor_impl { - using vertex_idx = typename v_impl::vertex_idx_type; +template +struct DirectedEdgeDescriptorImpl { + using VertexIdx = typename VImpl::VertexIdxType; - vertex_idx idx; + VertexIdx idx_; - vertex_idx source; - vertex_idx target; + VertexIdx source_; + VertexIdx target_; - directed_edge_descriptor_impl() : idx(0), source(0), target(0) {} + DirectedEdgeDescriptorImpl() : idx_(0), source_(0), target_(0) {} - directed_edge_descriptor_impl(const directed_edge_descriptor_impl &other) = default; - directed_edge_descriptor_impl(directed_edge_descriptor_impl &&other) = default; - directed_edge_descriptor_impl &operator=(const directed_edge_descriptor_impl &other) = default; - directed_edge_descriptor_impl &operator=(directed_edge_descriptor_impl &&other) = default; + DirectedEdgeDescriptorImpl(const DirectedEdgeDescriptorImpl &other) = default; + DirectedEdgeDescriptorImpl(DirectedEdgeDescriptorImpl &&other) = default; + DirectedEdgeDescriptorImpl &operator=(const DirectedEdgeDescriptorImpl &other) = default; + DirectedEdgeDescriptorImpl &operator=(DirectedEdgeDescriptorImpl &&other) = default; - directed_edge_descriptor_impl(vertex_idx source_arg, vertex_idx target_arg, vertex_idx idx_arg) - : idx(idx_arg), source(source_arg), target(target_arg) {} + DirectedEdgeDescriptorImpl(VertexIdx sourceArg, VertexIdx targetArg, VertexIdx idxArg) + : idx_(idxArg), source_(sourceArg), target_(targetArg) {} - ~directed_edge_descriptor_impl() = default; + ~DirectedEdgeDescriptorImpl() = default; - bool operator==(const directed_edge_descriptor_impl &other) const { - return idx == other.idx && source == other.source && target == other.target; + bool operator==(const DirectedEdgeDescriptorImpl &other) const { + return idx_ == other.idx_ && source_ == other.source_ && target_ == other.target_; } - bool operator!=(const directed_edge_descriptor_impl &other) const { return !(*this == other); } + bool operator!=(const DirectedEdgeDescriptorImpl &other) const { return !(*this == other); } }; -template -struct cdag_edge_impl { - using cdag_edge_comm_weight_type = edge_comm_weight_t; +template +struct CDagEdgeImpl { + using CDagEdgeCommWeightType = EdgeCommWeightT; - cdag_edge_impl(edge_comm_weight_t comm_weight_arg = 1) : comm_weight(comm_weight_arg) {} + CDagEdgeImpl(EdgeCommWeightT commWeightArg = 1) : commWeight_(commWeightArg) {} - edge_comm_weight_t comm_weight; + EdgeCommWeightT commWeight_; }; -using cdag_edge_impl_int = cdag_edge_impl; -using cdag_edge_impl_unsigned = cdag_edge_impl; +using CDagEdgeImplInt = CDagEdgeImpl; +using CDagEdgeImplUnsigned = CDagEdgeImpl; -template -class computational_dag_edge_idx_vector_impl { +template +class ComputationalDagEdgeIdxVectorImpl { public: // graph_traits specialization - using vertex_idx = typename v_impl::vertex_idx_type; - using directed_edge_descriptor = directed_edge_descriptor_impl; + using VertexIdx = typename VImpl::VertexIdxType; + using DirectedEdgeDescriptor = DirectedEdgeDescriptorImpl; - using out_edges_iterator_t = typename std::vector::const_iterator; - using in_edges_iterator_t = typename std::vector::const_iterator; + using OutEdgesIteratorT = typename std::vector::const_iterator; + using InEdgesIteratorT = typename std::vector::const_iterator; // cdag_traits specialization - using vertex_work_weight_type = typename v_impl::work_weight_type; - using vertex_comm_weight_type = typename v_impl::comm_weight_type; - using vertex_mem_weight_type = typename v_impl::mem_weight_type; - using vertex_type_type = typename v_impl::cdag_vertex_type_type; - using edge_comm_weight_type = typename e_impl::cdag_edge_comm_weight_type; + using VertexWorkWeightType = typename VImpl::WorkWeightType; + using VertexCommWeightType = typename VImpl::CommWeightType; + using VertexMemWeightType = typename VImpl::MemWeightType; + using VertexTypeType = typename VImpl::CDagVertexTypeType; + using EdgeCommWeightType = typename EImpl::CDagEdgeCommWeightType; private: - using ThisT = computational_dag_edge_idx_vector_impl; + using ThisT = ComputationalDagEdgeIdxVectorImpl; - std::vector vertices_; - std::vector edges_; + std::vector vertices_; + std::vector edges_; - unsigned num_vertex_types_ = 0; + unsigned numVertexTypes_ = 0; - std::vector> out_edges_; - std::vector> in_edges_; - - // struct cdag_edge_source_view { - // using value_type = vertex_idx; - - // vertex_idx operator()(directed_edge_descriptor &p) const { return p.source; } - // }; - - // struct cdag_edge_target_view { - // vertex_idx &operator()(directed_edge_descriptor &p) const { return p.target; } - // const vertex_idx &operator()(directed_edge_descriptor const &p) const { return p.target; } - // }; - - // using edge_adapter_source_t = ContainerAdaptor>; using edge_adapter_target_t = ContainerAdaptor>; + std::vector> outEdges_; + std::vector> inEdges_; public: - computational_dag_edge_idx_vector_impl() = default; + ComputationalDagEdgeIdxVectorImpl() = default; - computational_dag_edge_idx_vector_impl(vertex_idx num_vertices) - : vertices_(num_vertices), out_edges_(num_vertices), in_edges_(num_vertices) { - for (vertex_idx i = 0; i < num_vertices; ++i) { - vertices_[i].id = i; + ComputationalDagEdgeIdxVectorImpl(VertexIdx numVertices) + : vertices_(numVertices), outEdges_(numVertices), inEdges_(numVertices) { + for (VertexIdx i = 0; i < numVertices; ++i) { + vertices_[i].id_ = i; } } - computational_dag_edge_idx_vector_impl(const computational_dag_edge_idx_vector_impl &other) = default; + ComputationalDagEdgeIdxVectorImpl(const ComputationalDagEdgeIdxVectorImpl &other) = default; - template - computational_dag_edge_idx_vector_impl(const Graph_t &other) { - static_assert(is_computational_dag_v, "Graph_t must satisfy the is_computation_dag concept"); + template + ComputationalDagEdgeIdxVectorImpl(const GraphT &other) { + static_assert(isComputationalDagV, "GraphT must satisfy the is_computation_dag concept"); - constructComputationalDag(other, *this); + ConstructComputationalDag(other, *this); } - computational_dag_edge_idx_vector_impl &operator=(const computational_dag_edge_idx_vector_impl &other) = default; + ComputationalDagEdgeIdxVectorImpl &operator=(const ComputationalDagEdgeIdxVectorImpl &other) = default; - computational_dag_edge_idx_vector_impl(computational_dag_edge_idx_vector_impl &&other) + ComputationalDagEdgeIdxVectorImpl(ComputationalDagEdgeIdxVectorImpl &&other) : vertices_(std::move(other.vertices_)), edges_(std::move(other.edges_)), - num_vertex_types_(other.num_vertex_types_), - out_edges_(std::move(other.out_edges_)), - in_edges_(std::move(other.in_edges_)) { - other.num_vertex_types_ = 0; + numVertexTypes_(other.numVertexTypes_), + outEdges_(std::move(other.outEdges_)), + inEdges_(std::move(other.inEdges_)) { + other.numVertexTypes_ = 0; } - computational_dag_edge_idx_vector_impl &operator=(computational_dag_edge_idx_vector_impl &&other) { + ComputationalDagEdgeIdxVectorImpl &operator=(ComputationalDagEdgeIdxVectorImpl &&other) { if (this != &other) { vertices_ = std::move(other.vertices_); edges_ = std::move(other.edges_); - out_edges_ = std::move(other.out_edges_); - in_edges_ = std::move(other.in_edges_); - num_vertex_types_ = other.num_vertex_types_; - other.num_vertex_types_ = 0; + outEdges_ = std::move(other.outEdges_); + inEdges_ = std::move(other.inEdges_); + numVertexTypes_ = other.numVertexTypes_; + other.numVertexTypes_ = 0; } return *this; } - virtual ~computational_dag_edge_idx_vector_impl() = default; + virtual ~ComputationalDagEdgeIdxVectorImpl() = default; - inline vertex_idx num_edges() const { return static_cast(edges_.size()); } + inline VertexIdx NumEdges() const { return static_cast(edges_.size()); } - inline vertex_idx num_vertices() const { return static_cast(vertices_.size()); } + inline VertexIdx NumVertices() const { return static_cast(vertices_.size()); } - inline auto edges() const { return edge_range_vector_impl(*this); } + inline auto Edges() const { return EdgeRangeVectorImpl(*this); } - inline auto parents(vertex_idx v) const { return edge_source_range(in_edges_[v], *this); } + inline auto Parents(VertexIdx v) const { return EdgeSourceRange(inEdges_[v], *this); } - inline auto children(vertex_idx v) const { return edge_target_range(out_edges_[v], *this); } + inline auto Children(VertexIdx v) const { return EdgeTargetRange(outEdges_[v], *this); } - inline auto vertices() const { return integral_range(static_cast(vertices_.size())); } + inline auto Vertices() const { return IntegralRange(static_cast(vertices_.size())); } - inline const std::vector &in_edges(vertex_idx v) const { return in_edges_[v]; } + inline const std::vector &InEdges(VertexIdx v) const { return inEdges_[v]; } - inline const std::vector &out_edges(vertex_idx v) const { return out_edges_[v]; } + inline const std::vector &OutEdges(VertexIdx v) const { return outEdges_[v]; } - inline vertex_idx in_degree(vertex_idx v) const { return static_cast(in_edges_[v].size()); } + inline VertexIdx InDegree(VertexIdx v) const { return static_cast(inEdges_[v].size()); } - inline vertex_idx out_degree(vertex_idx v) const { return static_cast(out_edges_[v].size()); } + inline VertexIdx OutDegree(VertexIdx v) const { return static_cast(outEdges_[v].size()); } - inline edge_comm_weight_type edge_comm_weight(directed_edge_descriptor e) const { return edges_[e.idx].comm_weight; } + inline EdgeCommWeightType EdgeCommWeight(DirectedEdgeDescriptor e) const { return edges_[e.idx_].commWeight_; } - inline vertex_work_weight_type vertex_work_weight(vertex_idx v) const { return vertices_[v].work_weight; } + inline VertexWorkWeightType VertexWorkWeight(VertexIdx v) const { return vertices_[v].workWeight_; } - inline vertex_comm_weight_type vertex_comm_weight(vertex_idx v) const { return vertices_[v].comm_weight; } + inline VertexCommWeightType VertexCommWeight(VertexIdx v) const { return vertices_[v].commWeight_; } - inline vertex_mem_weight_type vertex_mem_weight(vertex_idx v) const { return vertices_[v].mem_weight; } + inline VertexMemWeightType VertexMemWeight(VertexIdx v) const { return vertices_[v].memWeight_; } - inline unsigned num_vertex_types() const { return num_vertex_types_; } + inline unsigned NumVertexTypes() const { return numVertexTypes_; } - inline vertex_type_type vertex_type(vertex_idx v) const { return vertices_[v].vertex_type; } + inline VertexTypeType VertexType(VertexIdx v) const { return vertices_[v].vertexType_; } - inline vertex_idx source(const directed_edge_descriptor &e) const { return e.source; } + inline VertexIdx Source(const DirectedEdgeDescriptor &e) const { return e.source_; } - inline vertex_idx target(const directed_edge_descriptor &e) const { return e.target; } + inline VertexIdx Target(const DirectedEdgeDescriptor &e) const { return e.target_; } - vertex_idx add_vertex(vertex_work_weight_type work_weight, - vertex_comm_weight_type comm_weight, - vertex_mem_weight_type mem_weight, - vertex_type_type vertex_type = 0) { - vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type); + VertexIdx AddVertex(VertexWorkWeightType workWeight, + VertexCommWeightType commWeight, + VertexMemWeightType memWeight, + VertexTypeType vertexType = 0) { + vertices_.emplace_back(vertices_.size(), workWeight, commWeight, memWeight, vertexType); - out_edges_.push_back({}); - in_edges_.push_back({}); + outEdges_.push_back({}); + inEdges_.push_back({}); - num_vertex_types_ = std::max(num_vertex_types_, vertex_type + 1); + numVertexTypes_ = std::max(numVertexTypes_, vertexType + 1); - return vertices_.back().id; + return vertices_.back().id_; } - std::pair add_edge(vertex_idx source, vertex_idx target, edge_comm_weight_type comm_weight = 1) { + std::pair AddEdge(VertexIdx source, VertexIdx target, EdgeCommWeightType commWeight = 1) { if (source == target) { - return {directed_edge_descriptor{}, false}; + return {DirectedEdgeDescriptor{}, false}; } if (source >= vertices_.size() || target >= vertices_.size()) { - return {directed_edge_descriptor{}, false}; + return {DirectedEdgeDescriptor{}, false}; } - for (const auto edge : out_edges_[source]) { - if (edge.target == target) { - return {directed_edge_descriptor{}, false}; + for (const auto edge : outEdges_[source]) { + if (edge.target_ == target) { + return {DirectedEdgeDescriptor{}, false}; } } - out_edges_[source].emplace_back(source, target, edges_.size()); - in_edges_[target].emplace_back(source, target, edges_.size()); + outEdges_[source].emplace_back(source, target, edges_.size()); + inEdges_[target].emplace_back(source, target, edges_.size()); - edges_.emplace_back(comm_weight); + edges_.emplace_back(commWeight); - return {out_edges_[source].back(), true}; + return {outEdges_[source].back(), true}; } - inline void set_vertex_work_weight(vertex_idx v, vertex_work_weight_type work_weight) { - vertices_[v].work_weight = work_weight; - } + inline void SetVertexWorkWeight(VertexIdx v, VertexWorkWeightType workWeight) { vertices_[v].workWeight_ = workWeight; } - inline void set_vertex_comm_weight(vertex_idx v, vertex_comm_weight_type comm_weight) { - vertices_[v].comm_weight = comm_weight; - } + inline void SetVertexCommWeight(VertexIdx v, VertexCommWeightType commWeight) { vertices_[v].commWeight_ = commWeight; } - inline void set_vertex_mem_weight(vertex_idx v, vertex_mem_weight_type mem_weight) { vertices_[v].mem_weight = mem_weight; } + inline void SetVertexMemWeight(VertexIdx v, VertexMemWeightType memWeight) { vertices_[v].memWeight_ = memWeight; } - inline void set_vertex_type(vertex_idx v, vertex_type_type vertex_type) { - vertices_[v].vertex_type = vertex_type; - num_vertex_types_ = std::max(num_vertex_types_, vertex_type + 1); + inline void SetVertexType(VertexIdx v, VertexTypeType vertexType) { + vertices_[v].vertexType_ = vertexType; + numVertexTypes_ = std::max(numVertexTypes_, vertexType + 1); } - inline void set_edge_comm_weight(directed_edge_descriptor e, edge_comm_weight_type comm_weight) { - edges_[e.idx].comm_weight = comm_weight; + inline void SetEdgeCommWeight(DirectedEdgeDescriptor e, EdgeCommWeightType commWeight) { + edges_[e.idx_].commWeight_ = commWeight; } - inline const v_impl &get_vertex_impl(vertex_idx v) const { return vertices_[v]; } + inline const VImpl &GetVertexImpl(VertexIdx v) const { return vertices_[v]; } - inline const e_impl &get_edge_impl(directed_edge_descriptor e) const { return edges_[e.idx]; } + inline const EImpl &GetEdgeImpl(DirectedEdgeDescriptor e) const { return edges_[e.idx_]; } }; -template -inline auto edges(const computational_dag_edge_idx_vector_impl &graph) { - return graph.edges(); +template +inline auto Edges(const ComputationalDagEdgeIdxVectorImpl &graph) { + return graph.Edges(); } -template -inline auto out_edges(vertex_idx_t> v, - const computational_dag_edge_idx_vector_impl &graph) { - return graph.out_edges(v); +template +inline auto OutEdges(VertexIdxT> v, + const ComputationalDagEdgeIdxVectorImpl &graph) { + return graph.OutEdges(v); } -template -inline auto in_edges(vertex_idx_t> v, - const computational_dag_edge_idx_vector_impl &graph) { - return graph.in_edges(v); +template +inline auto InEdges(VertexIdxT> v, + const ComputationalDagEdgeIdxVectorImpl &graph) { + return graph.InEdges(v); } // default implementation to get the source of an edge -template -inline vertex_idx_t> source( - const edge_desc_t> &edge, - const computational_dag_edge_idx_vector_impl &graph) { - return graph.source(edge); +template +inline VertexIdxT> Source( + const EdgeDescT> &edge, + const ComputationalDagEdgeIdxVectorImpl &graph) { + return graph.Source(edge); } // default implementation to get the target of an edge -template -inline vertex_idx_t> target( - const edge_desc_t> &edge, - const computational_dag_edge_idx_vector_impl &graph) { - return graph.target(edge); +template +inline VertexIdxT> Target( + const EdgeDescT> &edge, + const ComputationalDagEdgeIdxVectorImpl &graph) { + return graph.Target(edge); } // default template specialization -using computational_dag_edge_idx_vector_impl_def_t - = computational_dag_edge_idx_vector_impl; +using ComputationalDagEdgeIdxVectorImplDefT = ComputationalDagEdgeIdxVectorImpl; -using computational_dag_edge_idx_vector_impl_def_int_t - = computational_dag_edge_idx_vector_impl; +using ComputationalDagEdgeIdxVectorImplDefIntT = ComputationalDagEdgeIdxVectorImpl; -static_assert(is_directed_graph_edge_desc_v, +static_assert(isDirectedGraphEdgeDescV, "computational_dag_edge_idx_vector_impl must satisfy the directed_graph_edge_desc concept"); -static_assert(is_computational_dag_typed_vertices_edge_desc_v, +static_assert(isComputationalDagTypedVerticesEdgeDescV, "computational_dag_edge_idx_vector_impl must satisfy the computation_dag_typed_vertices_edge_desc concept"); } // namespace osp -template -struct std::hash> { - using vertex_idx = typename v_impl::vertex_idx_type; +template +struct std::hash> { + using VertexIdx = typename VImpl::VertexIdxType; - std::size_t operator()(const osp::directed_edge_descriptor_impl &p) const noexcept { - auto h1 = std::hash{}(p.source); - osp::hash_combine(h1, p.target); + std::size_t operator()(const osp::DirectedEdgeDescriptorImpl &p) const noexcept { + auto h1 = std::hash{}(p.source_); + osp::HashCombine(h1, p.target_); return h1; } diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp index b6621760..ed692500 100644 --- a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp +++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp @@ -46,96 +46,96 @@ namespace osp { * - `work_weight_type`: The type used for computational work weights. * - `comm_weight_type`: The type used for communication weights. * - `mem_weight_type`: The type used for memory weights. - * - `cdag_vertex_type_type`: The type used for vertex types. + * - `cdag_VertexTypeType`: The type used for vertex types. * - It must have the following public data members: * - `id`: Of type `vertex_idx_type`. * - `work_weight`: Of type `work_weight_type`. * - `comm_weight`: Of type `comm_weight_type`. * - `mem_weight`: Of type `mem_weight_type`. - * - `vertex_type`: Of type `cdag_vertex_type_type`. + * - `vertex_type`: Of type `cdag_VertexTypeType`. * - It must be constructible with the signature: * `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight, - * cdag_vertex_type_type vertex_type)` + * cdag_VertexTypeType vertex_type)` * - * @see cdag_vertex_impl for a reference implementation of the vertex type. + * @see CDagVertexImpl for a reference implementation of the vertex type. */ -template -class computational_dag_vector_impl { +template +class ComputationalDagVectorImpl { public: - using vertex_idx = typename v_impl::vertex_idx_type; + using VertexIdx = typename VImpl::VertexIdxType; - using vertex_work_weight_type = typename v_impl::work_weight_type; - using vertex_comm_weight_type = typename v_impl::comm_weight_type; - using vertex_mem_weight_type = typename v_impl::mem_weight_type; - using vertex_type_type = typename v_impl::cdag_vertex_type_type; + using VertexWorkWeightType = typename VImpl::WorkWeightType; + using VertexCommWeightType = typename VImpl::CommWeightType; + using VertexMemWeightType = typename VImpl::MemWeightType; + using VertexTypeType = typename VImpl::CDagVertexTypeType; - computational_dag_vector_impl() = default; + ComputationalDagVectorImpl() = default; /** * @brief Constructs a graph with a specified number of vertices. * - * @param num_vertices The number of vertices to initialize. + * @param NumVertices The number of vertices to initialize. */ - explicit computational_dag_vector_impl(const vertex_idx num_vertices) - : vertices_(num_vertices), out_neigbors(num_vertices), in_neigbors(num_vertices), num_edges_(0), num_vertex_types_(0) { - for (vertex_idx i = 0; i < num_vertices; ++i) { - vertices_[i].id = i; + explicit ComputationalDagVectorImpl(const VertexIdx numVertices) + : vertices_(numVertices), outNeigbors_(numVertices), inNeigbors_(numVertices), numEdges_(0), numVertexTypes_(0) { + for (VertexIdx i = 0; i < numVertices; ++i) { + vertices_[i].id_ = i; } } - computational_dag_vector_impl(const computational_dag_vector_impl &other) = default; - computational_dag_vector_impl &operator=(const computational_dag_vector_impl &other) = default; + ComputationalDagVectorImpl(const ComputationalDagVectorImpl &other) = default; + ComputationalDagVectorImpl &operator=(const ComputationalDagVectorImpl &other) = default; /** * @brief Constructs a graph from another graph type. * * This constructor initializes the graph by copying the structure and properties from another graph `other`. - * The source graph `Graph_t` must satisfy the `is_computational_dag` concept. + * The source graph `GraphT` must satisfy the `is_computational_dag` concept. * - * @tparam Graph_t The type of the source graph. Must satisfy `is_computational_dag_v`. + * @tparam GraphT The type of the source graph. Must satisfy `is_computational_dag_v`. * @param other The source graph to copy from. */ - template - explicit computational_dag_vector_impl(const Graph_t &other) { - static_assert(is_computational_dag_v, "Graph_t must satisfy the is_computation_dag concept"); - constructComputationalDag(other, *this); + template + explicit ComputationalDagVectorImpl(const GraphT &other) { + static_assert(isComputationalDagV, "GraphT must satisfy the is_computation_dag concept"); + ConstructComputationalDag(other, *this); } - computational_dag_vector_impl(computational_dag_vector_impl &&other) noexcept + ComputationalDagVectorImpl(ComputationalDagVectorImpl &&other) noexcept : vertices_(std::move(other.vertices_)), - out_neigbors(std::move(other.out_neigbors)), - in_neigbors(std::move(other.in_neigbors)), - num_edges_(other.num_edges_), - num_vertex_types_(other.num_vertex_types_) { - other.num_edges_ = 0; - other.num_vertex_types_ = 0; + outNeigbors_(std::move(other.outNeigbors_)), + inNeigbors_(std::move(other.inNeigbors_)), + numEdges_(other.numEdges_), + numVertexTypes_(other.numVertexTypes_) { + other.numEdges_ = 0; + other.numVertexTypes_ = 0; }; - computational_dag_vector_impl &operator=(computational_dag_vector_impl &&other) noexcept { + ComputationalDagVectorImpl &operator=(ComputationalDagVectorImpl &&other) noexcept { if (this != &other) { vertices_ = std::move(other.vertices_); - out_neigbors = std::move(other.out_neigbors); - in_neigbors = std::move(other.in_neigbors); - num_edges_ = other.num_edges_; - num_vertex_types_ = other.num_vertex_types_; + outNeigbors_ = std::move(other.outNeigbors_); + inNeigbors_ = std::move(other.inNeigbors_); + numEdges_ = other.numEdges_; + numVertexTypes_ = other.numVertexTypes_; - other.num_edges_ = 0; - other.num_vertex_types_ = 0; + other.numEdges_ = 0; + other.numVertexTypes_ = 0; } return *this; } - virtual ~computational_dag_vector_impl() = default; + virtual ~ComputationalDagVectorImpl() = default; /** * @brief Returns a range of all vertex indices. */ - [[nodiscard]] auto vertices() const { return integral_range(static_cast(vertices_.size())); } + [[nodiscard]] auto Vertices() const { return IntegralRange(static_cast(vertices_.size())); } /** * @brief Returns the total number of vertices. */ - [[nodiscard]] vertex_idx num_vertices() const { return static_cast(vertices_.size()); } + [[nodiscard]] VertexIdx NumVertices() const { return static_cast(vertices_.size()); } /** * @brief Checks if the graph is empty (no vertices). @@ -145,43 +145,43 @@ class computational_dag_vector_impl { /** * @brief Returns the total number of edges. */ - [[nodiscard]] vertex_idx num_edges() const { return num_edges_; } + [[nodiscard]] VertexIdx NumEdges() const { return numEdges_; } /** * @brief Returns the parents (in-neighbors) of a vertex. Does not perform bounds checking. * @param v The vertex index. */ - [[nodiscard]] const std::vector &parents(const vertex_idx v) const { return in_neigbors[v]; } + [[nodiscard]] const std::vector &Parents(const VertexIdx v) const { return inNeigbors_[v]; } /** * @brief Returns the children (out-neighbors) of a vertex. Does not perform bounds checking. * @param v The vertex index. */ - [[nodiscard]] const std::vector &children(const vertex_idx v) const { return out_neigbors[v]; } + [[nodiscard]] const std::vector &Children(const VertexIdx v) const { return outNeigbors_[v]; } /** * @brief Returns the in-degree of a vertex. Does not perform bounds checking. * @param v The vertex index. */ - [[nodiscard]] vertex_idx in_degree(const vertex_idx v) const { return static_cast(in_neigbors[v].size()); } + [[nodiscard]] VertexIdx InDegree(const VertexIdx v) const { return static_cast(inNeigbors_[v].size()); } /** * @brief Returns the out-degree of a vertex. Does not perform bounds checking. * @param v The vertex index. */ - [[nodiscard]] vertex_idx out_degree(const vertex_idx v) const { return static_cast(out_neigbors[v].size()); } + [[nodiscard]] VertexIdx OutDegree(const VertexIdx v) const { return static_cast(outNeigbors_[v].size()); } - [[nodiscard]] vertex_work_weight_type vertex_work_weight(const vertex_idx v) const { return vertices_[v].work_weight; } + [[nodiscard]] VertexWorkWeightType VertexWorkWeight(const VertexIdx v) const { return vertices_[v].workWeight_; } - [[nodiscard]] vertex_comm_weight_type vertex_comm_weight(const vertex_idx v) const { return vertices_[v].comm_weight; } + [[nodiscard]] VertexCommWeightType VertexCommWeight(const VertexIdx v) const { return vertices_[v].commWeight_; } - [[nodiscard]] vertex_mem_weight_type vertex_mem_weight(const vertex_idx v) const { return vertices_[v].mem_weight; } + [[nodiscard]] VertexMemWeightType VertexMemWeight(const VertexIdx v) const { return vertices_[v].memWeight_; } - [[nodiscard]] vertex_type_type vertex_type(const vertex_idx v) const { return vertices_[v].vertex_type; } + [[nodiscard]] VertexTypeType VertexType(const VertexIdx v) const { return vertices_[v].vertexType_; } - [[nodiscard]] vertex_type_type num_vertex_types() const { return num_vertex_types_; } + [[nodiscard]] VertexTypeType NumVertexTypes() const { return numVertexTypes_; } - [[nodiscard]] const v_impl &get_vertex_impl(const vertex_idx v) const { return vertices_[v]; } + [[nodiscard]] const VImpl &GetVertexImpl(const VertexIdx v) const { return vertices_[v]; } /** * @brief Adds a new isolated vertex to the graph. @@ -192,34 +192,32 @@ class computational_dag_vector_impl { * @param vertex_type Type of the vertex. * @return The index of the newly added vertex. */ - vertex_idx add_vertex(const vertex_work_weight_type work_weight, - const vertex_comm_weight_type comm_weight, - const vertex_mem_weight_type mem_weight, - const vertex_type_type vertex_type = 0) { - vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type); - out_neigbors.push_back({}); - in_neigbors.push_back({}); + VertexIdx AddVertex(const VertexWorkWeightType workWeight, + const VertexCommWeightType commWeight, + const VertexMemWeightType memWeight, + const VertexTypeType vertexType = 0) { + vertices_.emplace_back(vertices_.size(), workWeight, commWeight, memWeight, vertexType); + outNeigbors_.push_back({}); + inNeigbors_.push_back({}); - num_vertex_types_ = std::max(num_vertex_types_, vertex_type + 1); + numVertexTypes_ = std::max(numVertexTypes_, vertexType + 1); - return vertices_.back().id; + return vertices_.back().id_; } - void set_vertex_work_weight(const vertex_idx v, const vertex_work_weight_type work_weight) { - vertices_.at(v).work_weight = work_weight; + void SetVertexWorkWeight(const VertexIdx v, const VertexWorkWeightType workWeight) { + vertices_.at(v).workWeight_ = workWeight; } - void set_vertex_comm_weight(const vertex_idx v, const vertex_comm_weight_type comm_weight) { - vertices_.at(v).comm_weight = comm_weight; + void SetVertexCommWeight(const VertexIdx v, const VertexCommWeightType commWeight) { + vertices_.at(v).commWeight_ = commWeight; } - void set_vertex_mem_weight(const vertex_idx v, const vertex_mem_weight_type mem_weight) { - vertices_.at(v).mem_weight = mem_weight; - } + void SetVertexMemWeight(const VertexIdx v, const VertexMemWeightType memWeight) { vertices_.at(v).memWeight_ = memWeight; } - void set_vertex_type(const vertex_idx v, const vertex_type_type vertex_type) { - vertices_.at(v).vertex_type = vertex_type; - num_vertex_types_ = std::max(num_vertex_types_, vertex_type + 1); + void SetVertexType(const VertexIdx v, const VertexTypeType vertexType) { + vertices_.at(v).vertexType_ = vertexType; + numVertexTypes_ = std::max(numVertexTypes_, vertexType + 1); } /** @@ -229,54 +227,54 @@ class computational_dag_vector_impl { * @param target The target vertex index. * @return True if the edge was added, false if it already exists or vertices are invalid. */ - bool add_edge(const vertex_idx source, const vertex_idx target) { - if (source >= static_cast(vertices_.size()) || target >= static_cast(vertices_.size()) + bool AddEdge(const VertexIdx source, const VertexIdx target) { + if (source >= static_cast(vertices_.size()) || target >= static_cast(vertices_.size()) || source == target) { return false; } - const auto &out = out_neigbors.at(source); + const auto &out = outNeigbors_.at(source); if (std::find(out.begin(), out.end(), target) != out.end()) { return false; } - out_neigbors[source].push_back(target); - in_neigbors.at(target).push_back(source); - num_edges_++; + outNeigbors_[source].push_back(target); + inNeigbors_.at(target).push_back(source); + numEdges_++; return true; } private: - std::vector vertices_; + std::vector vertices_; - std::vector> out_neigbors; - std::vector> in_neigbors; + std::vector> outNeigbors_; + std::vector> inNeigbors_; - vertex_idx num_edges_ = 0; - unsigned num_vertex_types_ = 0; + VertexIdx numEdges_ = 0; + unsigned numVertexTypes_ = 0; }; /** * @brief Default implementation of a computational DAG using unsigned integer weights. */ -using computational_dag_vector_impl_def_t = computational_dag_vector_impl; +using ComputationalDagVectorImplDefUnsignedT = ComputationalDagVectorImpl; /** * @brief Default implementation of a computational DAG using signed integer weights. */ -using computational_dag_vector_impl_def_int_t = computational_dag_vector_impl; +using ComputationalDagVectorImplDefIntT = ComputationalDagVectorImpl; -static_assert(is_directed_graph_edge_desc_v>, - "computational_dag_vector_impl must satisfy the directed_graph_edge_desc concept"); +static_assert(isDirectedGraphEdgeDescV, + "ComputationalDagVectorImpl must satisfy the directed_graph_edge_desc concept"); -static_assert(has_vertex_weights_v>, - "computational_dag_vector_impl must satisfy the has_vertex_weights concept"); +static_assert(hasVertexWeightsV, + "ComputationalDagVectorImpl must satisfy the has_vertex_weights concept"); -static_assert(is_directed_graph_v>, - "computational_dag_vector_impl must satisfy the directed_graph concept"); +static_assert(isDirectedGraphV, + "ComputationalDagVectorImpl must satisfy the directed_graph concept"); -static_assert(is_computational_dag_typed_vertices_v>, - "computational_dag_vector_impl must satisfy the is_computation_dag concept"); +static_assert(isComputationalDagTypedVerticesV, + "ComputationalDagVectorImpl must satisfy the is_computation_dag concept"); } // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp b/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp index 56b9ac38..1817962b 100644 --- a/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp +++ b/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp @@ -45,30 +45,30 @@ namespace osp { * - `work_weight_type`: The type used for computational work weights. * - `comm_weight_type`: The type used for communication weights. * - `mem_weight_type`: The type used for memory weights. - * - `cdag_vertex_type_type`: The type used for vertex types. + * - `cdag_VertexTypeType`: The type used for vertex types. * - It must have the following public data members: * - `id`: Of type `vertex_idx_type`. * - `work_weight`: Of type `work_weight_type`. * - `comm_weight`: Of type `comm_weight_type`. * - `mem_weight`: Of type `mem_weight_type`. - * - `vertex_type`: Of type `cdag_vertex_type_type`. + * - `vertex_type`: Of type `cdag_VertexTypeType`. * - It must be constructible with the signature: * `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight, - * cdag_vertex_type_type vertex_type)` + * cdag_VertexTypeType vertex_type)` * * @tparam index_t The type used for vertex indices in the adjacency lists. */ -template -class dag_vector_adapter { +template +class DagVectorAdapter { public: - using vertex_idx = typename v_impl::vertex_idx_type; + using VertexIdx = typename VImpl::VertexIdxType; - using vertex_work_weight_type = typename v_impl::work_weight_type; - using vertex_comm_weight_type = typename v_impl::comm_weight_type; - using vertex_mem_weight_type = typename v_impl::mem_weight_type; - using vertex_type_type = typename v_impl::cdag_vertex_type_type; + using VertexWorkWeightType = typename VImpl::WorkWeightType; + using VertexCommWeightType = typename VImpl::CommWeightType; + using VertexMemWeightType = typename VImpl::MemWeightType; + using VertexTypeType = typename VImpl::CDagVertexTypeType; - dag_vector_adapter() = default; + DagVectorAdapter() = default; /** * @brief Constructs a dag_vector_adapter from adjacency lists. @@ -78,26 +78,21 @@ class dag_vector_adapter { * * @warning The adapter stores pointers to these vectors. They must remain valid for the lifetime of the adapter. */ - dag_vector_adapter(const std::vector> &out_neigbors_, - const std::vector> &in_neigbors_) - : vertices_(out_neigbors_.size()), - out_neigbors(&out_neigbors_), - in_neigbors(&in_neigbors_), - num_edges_(0), - num_vertex_types_(1) { - for (vertex_idx i = 0; i < static_cast(out_neigbors_.size()); ++i) { - vertices_[i].id = i; - num_edges_ += out_neigbors_[i].size(); + DagVectorAdapter(const std::vector> &outNeigbors, const std::vector> &inNeigbors) + : vertices_(outNeigbors.size()), outNeigbors_(&outNeigbors), inNeigbors_(&inNeigbors), numEdges_(0), numVertexTypes_(1) { + for (VertexIdx i = 0; i < static_cast(outNeigbors.size()); ++i) { + vertices_[i].id_ = i; + numEdges_ += outNeigbors[i].size(); } } - dag_vector_adapter(const dag_vector_adapter &other) = default; - dag_vector_adapter &operator=(const dag_vector_adapter &other) = default; + DagVectorAdapter(const DagVectorAdapter &other) = default; + DagVectorAdapter &operator=(const DagVectorAdapter &other) = default; - dag_vector_adapter(dag_vector_adapter &&other) noexcept = default; - dag_vector_adapter &operator=(dag_vector_adapter &&other) noexcept = default; + DagVectorAdapter(DagVectorAdapter &&other) noexcept = default; + DagVectorAdapter &operator=(DagVectorAdapter &&other) noexcept = default; - virtual ~dag_vector_adapter() = default; + virtual ~DagVectorAdapter() = default; /** * @brief Re-initializes the adapter with new adjacency lists. @@ -105,110 +100,108 @@ class dag_vector_adapter { * @param in_neigbors_ New in-neighbors adjacency list. * @param out_neigbors_ New out-neighbors adjacency list. */ - void set_in_out_neighbors(const std::vector> &in_neigbors_, - const std::vector> &out_neigbors_) { - out_neigbors = &out_neigbors_; - in_neigbors = &in_neigbors_; + void SetInOutNeighbors(const std::vector> &inNeigbors, + const std::vector> &outNeigbors) { + outNeigbors_ = &outNeigbors; + inNeigbors_ = &inNeigbors; - vertices_.resize(out_neigbors->size()); + vertices_.resize(outNeigbors_->size()); - num_edges_ = 0; - for (vertex_idx i = 0; i < static_cast(out_neigbors->size()); ++i) { - vertices_[i].id = i; - num_edges_ += out_neigbors_[i].size(); + numEdges_ = 0; + for (VertexIdx i = 0; i < static_cast(outNeigbors_->size()); ++i) { + vertices_[i].id_ = i; + numEdges_ += outNeigbors[i].size(); } - num_vertex_types_ = 1; + numVertexTypes_ = 1; } /** * @brief Returns a range of all vertex indices. */ - [[nodiscard]] auto vertices() const { return integral_range(static_cast(vertices_.size())); } + [[nodiscard]] auto Vertices() const { return IntegralRange(static_cast(vertices_.size())); } /** * @brief Returns the total number of vertices. */ - [[nodiscard]] vertex_idx num_vertices() const { return static_cast(vertices_.size()); } + [[nodiscard]] VertexIdx NumVertices() const { return static_cast(vertices_.size()); } /** * @brief Returns the total number of edges. */ - [[nodiscard]] vertex_idx num_edges() const { return static_cast(num_edges_); } + [[nodiscard]] VertexIdx NumEdges() const { return static_cast(numEdges_); } /** * @brief Returns a view of the parents (in-neighbors) of a vertex. Does not perform bounds checking. * @param v The vertex index. */ - [[nodiscard]] auto parents(const vertex_idx v) const { return vector_cast_view((*in_neigbors)[v]); } + [[nodiscard]] auto Parents(const VertexIdx v) const { return VectorCastView((*inNeigbors_)[v]); } /** * @brief Returns a view of the children (out-neighbors) of a vertex. Does not perform bounds checking. * @param v The vertex index. */ - [[nodiscard]] auto children(const vertex_idx v) const { return vector_cast_view((*out_neigbors)[v]); } + [[nodiscard]] auto Children(const VertexIdx v) const { return VectorCastView((*outNeigbors_)[v]); } /** * @brief Returns the in-degree of a vertex. Does not perform bounds checking. * @param v The vertex index. */ - [[nodiscard]] vertex_idx in_degree(const vertex_idx v) const { return static_cast((*in_neigbors)[v].size()); } + [[nodiscard]] VertexIdx InDegree(const VertexIdx v) const { return static_cast((*inNeigbors_)[v].size()); } /** * @brief Returns the out-degree of a vertex. Does not perform bounds checking. * @param v The vertex index. */ - [[nodiscard]] vertex_idx out_degree(const vertex_idx v) const { return static_cast((*out_neigbors)[v].size()); } + [[nodiscard]] VertexIdx OutDegree(const VertexIdx v) const { return static_cast((*outNeigbors_)[v].size()); } - [[nodiscard]] vertex_work_weight_type vertex_work_weight(const vertex_idx v) const { return vertices_[v].work_weight; } + [[nodiscard]] VertexWorkWeightType VertexWorkWeight(const VertexIdx v) const { return vertices_[v].workWeight_; } - [[nodiscard]] vertex_comm_weight_type vertex_comm_weight(const vertex_idx v) const { return vertices_[v].comm_weight; } + [[nodiscard]] VertexCommWeightType VertexCommWeight(const VertexIdx v) const { return vertices_[v].commWeight_; } - [[nodiscard]] vertex_mem_weight_type vertex_mem_weight(const vertex_idx v) const { return vertices_[v].mem_weight; } + [[nodiscard]] VertexMemWeightType VertexMemWeight(const VertexIdx v) const { return vertices_[v].memWeight_; } - [[nodiscard]] vertex_type_type vertex_type(const vertex_idx v) const { return vertices_[v].vertex_type; } + [[nodiscard]] VertexTypeType VertexType(const VertexIdx v) const { return vertices_[v].vertexType_; } - [[nodiscard]] vertex_type_type num_vertex_types() const { return num_vertex_types_; } + [[nodiscard]] VertexTypeType NumVertexTypes() const { return numVertexTypes_; } - [[nodiscard]] const v_impl &get_vertex_impl(const vertex_idx v) const { return vertices_[v]; } + [[nodiscard]] const VImpl &GetVertexImpl(const VertexIdx v) const { return vertices_[v]; } - void set_vertex_work_weight(const vertex_idx v, const vertex_work_weight_type work_weight) { - vertices_.at(v).work_weight = work_weight; + void SetVertexWorkWeight(const VertexIdx v, const VertexWorkWeightType workWeight) { + vertices_.at(v).workWeight_ = workWeight; } - void set_vertex_comm_weight(const vertex_idx v, const vertex_comm_weight_type comm_weight) { - vertices_.at(v).comm_weight = comm_weight; + void SetVertexCommWeight(const VertexIdx v, const VertexCommWeightType commWeight) { + vertices_.at(v).commWeight_ = commWeight; } - void set_vertex_mem_weight(const vertex_idx v, const vertex_mem_weight_type mem_weight) { - vertices_.at(v).mem_weight = mem_weight; - } + void SetVertexMemWeight(const VertexIdx v, const VertexMemWeightType memWeight) { vertices_.at(v).memWeight_ = memWeight; } - void set_vertex_type(const vertex_idx v, const vertex_type_type vertex_type) { - vertices_.at(v).vertex_type = vertex_type; - num_vertex_types_ = std::max(num_vertex_types_, vertex_type + 1); + void SetVertexType(const VertexIdx v, const VertexTypeType vertexType) { + vertices_.at(v).vertexType_ = vertexType; + numVertexTypes_ = std::max(numVertexTypes_, vertexType + 1); } private: - std::vector vertices_; + std::vector vertices_; - const std::vector> *out_neigbors; - const std::vector> *in_neigbors; + const std::vector> *outNeigbors_; + const std::vector> *inNeigbors_; - std::size_t num_edges_ = 0; - unsigned num_vertex_types_ = 0; + std::size_t numEdges_ = 0; + unsigned numVertexTypes_ = 0; }; -static_assert(is_directed_graph_edge_desc_v>, +static_assert(isDirectedGraphEdgeDescV>, "dag_vector_adapter must satisfy the directed_graph_edge_desc concept"); -static_assert(has_vertex_weights_v>, +static_assert(hasVertexWeightsV>, "dag_vector_adapter must satisfy the has_vertex_weights concept"); -static_assert(is_directed_graph_v>, +static_assert(isDirectedGraphV>, "dag_vector_adapter must satisfy the directed_graph concept"); -static_assert(is_computational_dag_typed_vertices_v>, +static_assert(isComputationalDagTypedVerticesV>, "dag_vector_adapter must satisfy the is_computation_dag concept"); } // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp b/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp index 56cce059..c22e7552 100644 --- a/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp +++ b/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp @@ -24,98 +24,98 @@ limitations under the License. namespace osp { -template -class edge_range_vector_impl { - using directed_edge_descriptor = typename directed_graph_edge_desc_traits::directed_edge_descriptor; - using vertex_idx = typename directed_graph_traits::vertex_idx; - using iter = typename Graph_t::out_edges_iterator_t; - const Graph_t &graph; +template +class EdgeRangeVectorImpl { + using DirectedEdgeDescriptor = typename DirectedGraphEdgeDescTraits::DirectedEdgeDescriptor; + using VertexIdx = typename DirectedGraphTraits::VertexIdx; + using Iter = typename GraphT::OutEdgesIteratorT; + const GraphT &graph_; - struct edge_iterator { - vertex_idx current_vertex; - std::size_t current_edge_idx; - iter current_edge; + struct EdgeIterator { + VertexIdx currentVertex_; + std::size_t currentEdgeIdx_; + Iter currentEdge_; - const Graph_t *graph; + const GraphT *graph_; public: using iterator_category = std::forward_iterator_tag; - using value_type = directed_edge_descriptor; + using value_type = DirectedEdgeDescriptor; using difference_type = std::ptrdiff_t; using pointer = const value_type *; using reference = const value_type &; - edge_iterator() : current_vertex(0u), current_edge_idx(0u), graph(nullptr) {} + EdgeIterator() : currentVertex_(0u), currentEdgeIdx_(0u), graph_(nullptr) {} - edge_iterator(const edge_iterator &other) - : current_vertex(other.current_vertex), current_edge_idx(other.current_edge_idx), graph(other.graph) {} + EdgeIterator(const EdgeIterator &other) + : currentVertex_(other.currentVertex_), currentEdgeIdx_(other.currentEdgeIdx_), graph_(other.graph_) {} - edge_iterator &operator=(const edge_iterator &other) { + EdgeIterator &operator=(const EdgeIterator &other) { if (this != &other) { - current_vertex = other.current_vertex; - current_edge_idx = other.current_edge_idx; - graph = other.graph; + currentVertex_ = other.currentVertex_; + currentEdgeIdx_ = other.currentEdgeIdx_; + graph_ = other.graph_; } return *this; } - edge_iterator(const Graph_t &graph_) : current_vertex(0u), current_edge_idx(0u), graph(&graph_) { - while (current_vertex != graph->num_vertices()) { - if (graph->out_edges(current_vertex).begin() != graph->out_edges(current_vertex).end()) { - current_edge = graph->out_edges(current_vertex).begin(); + EdgeIterator(const GraphT &graph) : currentVertex_(0u), currentEdgeIdx_(0u), graph_(&graph) { + while (currentVertex_ != graph_->NumVertices()) { + if (graph_->OutEdges(currentVertex_).begin() != graph_->OutEdges(currentVertex_).end()) { + currentEdge_ = graph_->OutEdges(currentVertex_).begin(); break; } - current_vertex++; + currentVertex_++; } } - edge_iterator(std::size_t current_edge_idx_, const Graph_t &graph_) - : current_vertex(0u), current_edge_idx(current_edge_idx_), graph(&graph_) { - if (current_edge_idx < graph->num_edges()) { + EdgeIterator(std::size_t currentEdgeIdx, const GraphT &graph) + : currentVertex_(0u), currentEdgeIdx_(currentEdgeIdx), graph_(&graph) { + if (currentEdgeIdx_ < graph_->NumEdges()) { std::size_t tmp = 0u; - if (tmp < current_edge_idx) { - while (current_vertex != graph->num_vertices()) { - current_edge = graph->out_edges(current_vertex).begin(); + if (tmp < currentEdgeIdx_) { + while (currentVertex_ != graph_->NumVertices()) { + currentEdge_ = graph_->OutEdges(currentVertex_).begin(); - while (current_edge != graph->out_edges(current_vertex).end()) { - if (tmp == current_edge_idx) { + while (currentEdge_ != graph_->OutEdges(currentVertex_).end()) { + if (tmp == currentEdgeIdx_) { break; } - current_edge++; + currentEdge_++; tmp++; } - current_vertex++; + currentVertex_++; } } } else { - current_edge_idx = graph->num_edges(); - current_vertex = graph->num_vertices(); + currentEdgeIdx_ = graph_->NumEdges(); + currentVertex_ = graph_->NumVertices(); } } - const value_type &operator*() const { return *current_edge; } + const value_type &operator*() const { return *currentEdge_; } - const value_type *operator->() const { return &(*current_edge); } + const value_type *operator->() const { return &(*currentEdge_); } // Prefix increment - edge_iterator &operator++() { - current_edge++; - current_edge_idx++; + EdgeIterator &operator++() { + currentEdge_++; + currentEdgeIdx_++; - if (current_edge == graph->out_edges(current_vertex).end()) { - current_vertex++; + if (currentEdge_ == graph_->OutEdges(currentVertex_).end()) { + currentVertex_++; - while (current_vertex != graph->num_vertices()) { - if (graph->out_edges(current_vertex).begin() != graph->out_edges(current_vertex).end()) { - current_edge = graph->out_edges(current_vertex).begin(); + while (currentVertex_ != graph_->NumVertices()) { + if (graph_->OutEdges(currentVertex_).begin() != graph_->OutEdges(currentVertex_).end()) { + currentEdge_ = graph_->OutEdges(currentVertex_).begin(); break; } - current_vertex++; + currentVertex_++; } } @@ -123,154 +123,152 @@ class edge_range_vector_impl { } // Postfix increment - edge_iterator operator++(int) { - edge_iterator tmp = *this; + EdgeIterator operator++(int) { + EdgeIterator tmp = *this; ++(*this); return tmp; } - inline bool operator==(const edge_iterator &other) const { return current_edge_idx == other.current_edge_idx; } + inline bool operator==(const EdgeIterator &other) const { return currentEdgeIdx_ == other.currentEdgeIdx_; } - inline bool operator!=(const edge_iterator &other) const { return current_edge_idx != other.current_edge_idx; } + inline bool operator!=(const EdgeIterator &other) const { return currentEdgeIdx_ != other.currentEdgeIdx_; } }; public: - edge_range_vector_impl(const Graph_t &graph_) : graph(graph_) {} + EdgeRangeVectorImpl(const GraphT &graph) : graph_(graph) {} - auto begin() const { return edge_iterator(graph); } + auto begin() const { return EdgeIterator(graph_); } - auto end() const { return edge_iterator(graph.num_edges(), graph); } + auto end() const { return EdgeIterator(graph_.NumEdges(), graph_); } - auto size() const { return graph.num_edges(); } + auto size() const { return graph_.NumEdges(); } }; -template -class edge_source_range { - using directed_edge_descriptor = typename directed_graph_edge_desc_traits::directed_edge_descriptor; - using vertex_idx = typename directed_graph_traits::vertex_idx; - using iter = typename Graph_t::in_edges_iterator_t; +template +class EdgeSourceRange { + using DirectedEdgeDescriptor = typename DirectedGraphEdgeDescTraits::DirectedEdgeDescriptor; + using VertexIdx = typename DirectedGraphTraits::VertexIdx; + using Iter = typename GraphT::InEdgesIteratorT; - const Graph_t &graph; - const std::vector &edges; + const GraphT &graph_; + const std::vector &edges_; - struct source_iterator { - const Graph_t *graph; - iter current_edge; + struct SourceIterator { + const GraphT *graph_; + Iter currentEdge_; public: using iterator_category = std::forward_iterator_tag; - using value_type = vertex_idx; + using value_type = VertexIdx; using difference_type = std::ptrdiff_t; using pointer = const value_type *; using reference = const value_type &; - source_iterator() : graph(nullptr) {} + SourceIterator() : graph_(nullptr) {} - source_iterator(const source_iterator &other) : graph(other.graph), current_edge(other.current_edge) {} + SourceIterator(const SourceIterator &other) : graph_(other.graph_), currentEdge_(other.currentEdge_) {} - source_iterator &operator=(const source_iterator &other) { + SourceIterator &operator=(const SourceIterator &other) { if (this != &other) { - graph = other.graph; - current_edge = other.current_edge; + graph_ = other.graph_; + currentEdge_ = other.currentEdge_; } return *this; } - source_iterator(iter current_edge_, const Graph_t &graph_) : graph(&graph_), current_edge(current_edge_) {} + SourceIterator(Iter currentEdge, const GraphT &graph) : graph_(&graph), currentEdge_(currentEdge) {} - value_type operator*() const { return source(*current_edge, *graph); } + value_type operator*() const { return Source(*currentEdge_, *graph_); } // Prefix increment - source_iterator &operator++() { - current_edge++; + SourceIterator &operator++() { + currentEdge_++; return *this; } // Postfix increment - source_iterator operator++(int) { - source_iterator tmp = *this; + SourceIterator operator++(int) { + SourceIterator tmp = *this; ++(*this); return tmp; } - inline bool operator==(const source_iterator &other) const { return current_edge == other.current_edge; } + inline bool operator==(const SourceIterator &other) const { return currentEdge_ == other.currentEdge_; } - inline bool operator!=(const source_iterator &other) const { return current_edge != other.current_edge; } + inline bool operator!=(const SourceIterator &other) const { return currentEdge_ != other.currentEdge_; } }; public: - edge_source_range(const std::vector &edges_, const Graph_t &graph_) - : graph(graph_), edges(edges_) {} + EdgeSourceRange(const std::vector &edges, const GraphT &graph) : graph_(graph), edges_(edges) {} - auto begin() const { return source_iterator(edges.begin(), graph); } + auto begin() const { return SourceIterator(edges_.begin(), graph_); } - auto end() const { return source_iterator(edges.end(), graph); } + auto end() const { return SourceIterator(edges_.end(), graph_); } - auto size() const { return edges.size(); } + auto size() const { return edges_.size(); } }; -template -class edge_target_range { - using directed_edge_descriptor = typename directed_graph_edge_desc_traits::directed_edge_descriptor; - using vertex_idx = typename directed_graph_traits::vertex_idx; - using iter = typename Graph_t::out_edges_iterator_t; - const Graph_t &graph; - const std::vector &edges; +template +class EdgeTargetRange { + using DirectedEdgeDescriptor = typename DirectedGraphEdgeDescTraits::DirectedEdgeDescriptor; + using VertexIdx = typename DirectedGraphTraits::VertexIdx; + using Iter = typename GraphT::OutEdgesIteratorT; + const GraphT &graph_; + const std::vector &edges_; - struct target_iterator { - const Graph_t *graph; - iter current_edge; + struct TargetIterator { + const GraphT *graph_; + Iter currentEdge_; public: using iterator_category = std::forward_iterator_tag; - using value_type = vertex_idx; + using value_type = VertexIdx; using difference_type = std::ptrdiff_t; using pointer = const value_type *; using reference = const value_type &; - target_iterator() : graph(nullptr) {} + TargetIterator() : graph_(nullptr) {} - target_iterator(const target_iterator &other) : graph(other.graph), current_edge(other.current_edge) {} + TargetIterator(const TargetIterator &other) : graph_(other.graph_), currentEdge_(other.currentEdge_) {} - target_iterator &operator=(const target_iterator &other) { + TargetIterator &operator=(const TargetIterator &other) { if (this != &other) { - graph = other.graph; - current_edge = other.current_edge; + graph_ = other.graph_; + currentEdge_ = other.currentEdge_; } return *this; } - target_iterator(iter current_edge_, const Graph_t &graph_) : graph(&graph_), current_edge(current_edge_) {} + TargetIterator(Iter currentEdge, const GraphT &graph) : graph_(&graph), currentEdge_(currentEdge) {} - value_type operator*() const { return target(*current_edge, *graph); } + value_type operator*() const { return Target(*currentEdge_, *graph_); } // Prefix increment - target_iterator &operator++() { - current_edge++; + TargetIterator &operator++() { + currentEdge_++; return *this; } // Postfix increment - target_iterator operator++(int) { - target_iterator tmp = *this; + TargetIterator operator++(int) { + TargetIterator tmp = *this; ++(*this); return tmp; } - inline bool operator==(const target_iterator &other) const { return current_edge == other.current_edge; } + inline bool operator==(const TargetIterator &other) const { return currentEdge_ == other.currentEdge_; } - inline bool operator!=(const target_iterator &other) const { return current_edge != other.current_edge; } + inline bool operator!=(const TargetIterator &other) const { return currentEdge_ != other.currentEdge_; } }; public: - edge_target_range(const std::vector &edges_, const Graph_t &graph_) - : graph(graph_), edges(edges_) {} + EdgeTargetRange(const std::vector &edges, const GraphT &graph) : graph_(graph), edges_(edges) {} - auto begin() const { return target_iterator(edges.begin(), graph); } + auto begin() const { return TargetIterator(edges_.begin(), graph_); } - auto end() const { return target_iterator(edges.end(), graph); } + auto end() const { return TargetIterator(edges_.end(), graph_); } - auto size() const { return edges.size(); } + auto size() const { return edges_.size(); } }; } // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp b/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp index 4de995e9..e0836e2c 100644 --- a/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp +++ b/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp @@ -31,10 +31,10 @@ namespace osp { * @tparam from_t The original type of elements in the vector. * @tparam to_t The target type to cast elements to. */ -template -class vector_cast_view { - using iter = typename std::vector::const_iterator; - const std::vector &vec; +template +class VectorCastView { + using Iter = typename std::vector::const_iterator; + const std::vector &vec_; /** * @brief Iterator for vector_cast_view. @@ -42,70 +42,70 @@ class vector_cast_view { * This iterator wraps the underlying vector iterator and performs a static_cast * on dereference. It satisfies the RandomAccessIterator concept. */ - struct cast_iterator { + struct CastIterator { using iterator_category = std::random_access_iterator_tag; - using value_type = to_t; + using value_type = ToT; using difference_type = std::ptrdiff_t; using pointer = const value_type *; using reference = const value_type &; - iter current_edge; + Iter currentEdge_; - cast_iterator() = default; + CastIterator() = default; - explicit cast_iterator(iter current_edge_) : current_edge(current_edge_) {} + explicit CastIterator(Iter currentEdge) : currentEdge_(currentEdge) {} - value_type operator*() const { return static_cast(*current_edge); } + value_type operator*() const { return static_cast(*currentEdge_); } - cast_iterator &operator++() { - ++current_edge; + CastIterator &operator++() { + ++currentEdge_; return *this; } - cast_iterator operator++(int) { - cast_iterator tmp = *this; + CastIterator operator++(int) { + CastIterator tmp = *this; ++(*this); return tmp; } - cast_iterator &operator--() { - --current_edge; + CastIterator &operator--() { + --currentEdge_; return *this; } - cast_iterator operator--(int) { - cast_iterator tmp = *this; + CastIterator operator--(int) { + CastIterator tmp = *this; --(*this); return tmp; } - cast_iterator &operator+=(difference_type n) { - current_edge += n; + CastIterator &operator+=(difference_type n) { + currentEdge_ += n; return *this; } - cast_iterator &operator-=(difference_type n) { - current_edge -= n; + CastIterator &operator-=(difference_type n) { + currentEdge_ -= n; return *this; } - cast_iterator operator+(difference_type n) const { return cast_iterator(current_edge + n); } + CastIterator operator+(difference_type n) const { return CastIterator(currentEdge_ + n); } - cast_iterator operator-(difference_type n) const { return cast_iterator(current_edge - n); } + CastIterator operator-(difference_type n) const { return CastIterator(currentEdge_ - n); } - difference_type operator-(const cast_iterator &other) const { return current_edge - other.current_edge; } + difference_type operator-(const CastIterator &other) const { return currentEdge_ - other.currentEdge_; } - bool operator==(const cast_iterator &other) const { return current_edge == other.current_edge; } + bool operator==(const CastIterator &other) const { return currentEdge_ == other.currentEdge_; } - bool operator!=(const cast_iterator &other) const { return current_edge != other.current_edge; } + bool operator!=(const CastIterator &other) const { return currentEdge_ != other.currentEdge_; } - bool operator<(const cast_iterator &other) const { return current_edge < other.current_edge; } + bool operator<(const CastIterator &other) const { return currentEdge_ < other.currentEdge_; } - bool operator>(const cast_iterator &other) const { return current_edge > other.current_edge; } + bool operator>(const CastIterator &other) const { return currentEdge_ > other.currentEdge_; } - bool operator<=(const cast_iterator &other) const { return current_edge <= other.current_edge; } + bool operator<=(const CastIterator &other) const { return currentEdge_ <= other.currentEdge_; } - bool operator>=(const cast_iterator &other) const { return current_edge >= other.current_edge; } + bool operator>=(const CastIterator &other) const { return currentEdge_ >= other.currentEdge_; } }; public: @@ -115,38 +115,38 @@ class vector_cast_view { * @param vec_ The vector to view. The view holds a reference to this vector, * so the vector must outlive the view. */ - explicit vector_cast_view(const std::vector &vec_) : vec(vec_) {} + explicit VectorCastView(const std::vector &vec) : vec_(vec) {} /** * @brief Returns an iterator to the beginning of the view. * @return An iterator to the first element. */ - [[nodiscard]] auto begin() const { return cast_iterator(vec.begin()); } + [[nodiscard]] auto begin() const { return CastIterator(vec_.begin()); } /** * @brief Returns an iterator to the end of the view. * @return An iterator to the element following the last element. */ - [[nodiscard]] auto end() const { return cast_iterator(vec.end()); } + [[nodiscard]] auto end() const { return CastIterator(vec_.end()); } /** * @brief Returns the number of elements in the view. * @return The number of elements. */ - [[nodiscard]] auto size() const { return vec.size(); } + [[nodiscard]] auto size() const { return vec_.size(); } /** * @brief Checks if the view is empty. * @return True if the view is empty, false otherwise. */ - [[nodiscard]] bool empty() const { return vec.empty(); } + [[nodiscard]] bool empty() const { return vec_.empty(); } /** * @brief Accesses the element at the specified index. * @param i The index of the element to access. * @return The element at index i, cast to to_t. */ - [[nodiscard]] auto operator[](std::size_t i) const { return static_cast(vec[i]); } + [[nodiscard]] auto operator[](std::size_t i) const { return static_cast(vec_[i]); } }; } // namespace osp diff --git a/include/osp/graph_implementations/boost_graphs/boost_graph.hpp b/include/osp/graph_implementations/boost_graphs/boost_graph.hpp index 8dcb069d..0a644e97 100644 --- a/include/osp/graph_implementations/boost_graphs/boost_graph.hpp +++ b/include/osp/graph_implementations/boost_graphs/boost_graph.hpp @@ -29,52 +29,49 @@ limitations under the License. #include "osp/graph_algorithms/computational_dag_construction_util.hpp" #include "source_iterator_range.hpp" -template -struct boost_vertex { - boost_vertex() : workWeight(0), communicationWeight(0), memoryWeight(0), nodeType(0) {} - - boost_vertex(vertex_workw_t workWeight_, - vertex_commw_t communicationWeight_, - vertex_memw_t memoryWeight_, - vertex_type_t nodeType_ = 0) - : workWeight(workWeight_), communicationWeight(communicationWeight_), memoryWeight(memoryWeight_), nodeType(nodeType_) {} - - vertex_workw_t workWeight; - vertex_commw_t communicationWeight; - vertex_memw_t memoryWeight; - vertex_type_t nodeType; +template +struct BoostVertex { + BoostVertex() : workWeight_(0), communicationWeight_(0), memoryWeight_(0), nodeType_(0) {} + + BoostVertex(VertexWorkwT workWeight, VertexCommwT communicationWeight, VertexMemwT memoryWeight, VertexTypeT nodeType = 0) + : workWeight_(workWeight), communicationWeight_(communicationWeight), memoryWeight_(memoryWeight), nodeType_(nodeType) {} + + VertexWorkwT workWeight_; + VertexCommwT communicationWeight_; + VertexMemwT memoryWeight_; + VertexTypeT nodeType_; }; -using boost_vertex_def_int = boost_vertex; -using boost_vertex_def_uint = boost_vertex; +using BoostVertexDefInt = BoostVertex; +using BoostVertexDefUint = BoostVertex; -template -struct boost_edge { - boost_edge() : communicationWeight(0) {} +template +struct BoostEdge { + BoostEdge() : communicationWeight_(0) {} - boost_edge(edge_commw_t communicationWeight_) : communicationWeight(communicationWeight_) {} + BoostEdge(EdgeCommwT communicationWeight) : communicationWeight_(communicationWeight) {} - edge_commw_t communicationWeight; + EdgeCommwT communicationWeight_; }; -using boost_edge_def_int = boost_edge; -using boost_edge_def_uint = boost_edge; +using BoostEdgeDefInt = BoostEdge; +using BoostEdgeDefUint = BoostEdge; -template -using boost_graph_impl = boost::adjacency_list, - boost_edge>; +template +using BoostGraphImpl = boost::adjacency_list, + BoostEdge>; -using boost_edge_desc = +using BoostEdgeDesc = typename boost::graph_traits>::edge_descriptor; template <> -struct std::hash { - std::size_t operator()(const boost_edge_desc &p) const noexcept { +struct std::hash { + std::size_t operator()(const BoostEdgeDesc &p) const noexcept { auto h1 = std::hash{}(p.m_source); - osp::hash_combine(h1, p.m_target); + osp::HashCombine(h1, p.m_target); return h1; } @@ -90,308 +87,294 @@ struct std::hash { * calculating the longest path, and retrieving topological order of vertices. */ -template -class boost_graph { - using boost_graph_impl_t = boost_graph_impl; +template +class BoostGraph { + using BoostGraphImplT = BoostGraphImpl; public: // graph_traits specialization - using directed_edge_descriptor = typename boost::graph_traits::edge_descriptor; - using vertex_idx = typename boost::graph_traits::vertex_descriptor; + using DirectedEdgeDescriptor = typename boost::graph_traits::edge_descriptor; + using VertexIdx = typename boost::graph_traits::vertex_descriptor; // cdag_traits specialization - using vertex_work_weight_type = vertex_workw_t; - using vertex_comm_weight_type = vertex_commw_t; - using vertex_mem_weight_type = vertex_memw_t; - using vertex_type_type = vertex_type_t; - using edge_comm_weight_type = edge_commw_t; - - boost_graph(const std::vector> &out_, - const std::vector &workW_, - const std::vector &commW_, - const std::unordered_map, edge_comm_weight_type, osp::pair_hash> &comm_edge_W) - : number_of_vertex_types(0) { - graph.m_vertices.reserve(out_.size()); - - assert(out_.size() == workW_.size()); - assert(out_.size() == commW_.size()); - - for (size_t i = 0; i < out_.size(); ++i) { - add_vertex(workW_[i], commW_[i]); + using VertexWorkWeightType = VertexWorkwT; + using VertexCommWeightType = VertexCommwT; + using VertexMemWeightType = VertexMemwT; + using VertexTypeType = VertexTypeT; + using EdgeCommWeightType = EdgeCommwT; + + BoostGraph(const std::vector> &out, + const std::vector &workW, + const std::vector &commW, + const std::unordered_map, EdgeCommWeightType, osp::PairHash> &commEdgeW) + : numberOfVertexTypes_(0) { + graph_.m_vertices.reserve(out.size()); + + assert(out.size() == workW.size()); + assert(out.size() == commW.size()); + + for (size_t i = 0; i < out.size(); ++i) { + AddVertex(workW[i], commW[i]); } - for (size_t i = 0; i < out_.size(); ++i) { - for (const auto &j : out_[i]) { - assert(comm_edge_W.find(std::make_pair(i, j)) != comm_edge_W.cend()); - add_edge(i, j, comm_edge_W.at(std::make_pair(i, j))); + for (size_t i = 0; i < out.size(); ++i) { + for (const auto &j : out[i]) { + assert(commEdgeW.find(std::make_pair(i, j)) != commEdgeW.cend()); + AddEdge(i, j, commEdgeW.at(std::make_pair(i, j))); } } - updateNumberOfVertexTypes(); + UpdateNumberOfVertexTypes(); } - boost_graph(const std::vector> &out_, - const std::vector &workW_, - const std::vector &commW_) - : number_of_vertex_types(0) { - graph.m_vertices.reserve(out_.size()); + BoostGraph(const std::vector> &out, + const std::vector &workW, + const std::vector &commW) + : numberOfVertexTypes_(0) { + graph_.m_vertices.reserve(out.size()); - assert(out_.size() == workW_.size()); - assert(out_.size() == commW_.size()); + assert(out.size() == workW.size()); + assert(out.size() == commW.size()); - for (size_t i = 0; i < out_.size(); ++i) { - add_vertex(workW_[i], commW_[i]); + for (size_t i = 0; i < out.size(); ++i) { + AddVertex(workW[i], commW[i]); } - for (size_t i = 0; i < out_.size(); ++i) { - for (const auto &j : out_[i]) { - add_edge(i, j); + for (size_t i = 0; i < out.size(); ++i) { + for (const auto &j : out[i]) { + AddEdge(i, j); } } - updateNumberOfVertexTypes(); + UpdateNumberOfVertexTypes(); } - boost_graph(const std::vector> &out_, - const std::vector &workW_, - const std::vector &commW_, - const std::vector &nodeType_) - : number_of_vertex_types(0) { - graph.m_vertices.reserve(out_.size()); + BoostGraph(const std::vector> &out, + const std::vector &workW, + const std::vector &commW, + const std::vector &nodeType) + : numberOfVertexTypes_(0) { + graph_.m_vertices.reserve(out.size()); - assert(out_.size() == workW_.size()); - assert(out_.size() == commW_.size()); - assert(out_.size() == nodeType_.size()); + assert(out.size() == workW.size()); + assert(out.size() == commW.size()); + assert(out.size() == nodeType.size()); - for (size_t i = 0; i < out_.size(); ++i) { - add_vertex(workW_[i], commW_[i], 0, nodeType_[i]); + for (size_t i = 0; i < out.size(); ++i) { + AddVertex(workW[i], commW[i], 0, nodeType[i]); } - for (size_t i = 0; i < out_.size(); ++i) { - for (const auto &j : out_[i]) { - add_edge(i, j); + for (size_t i = 0; i < out.size(); ++i) { + for (const auto &j : out[i]) { + AddEdge(i, j); } } - updateNumberOfVertexTypes(); + UpdateNumberOfVertexTypes(); } /** * @brief Default constructor for the ComputationalDag class. */ - explicit boost_graph() : graph(0), number_of_vertex_types(0) {} + explicit BoostGraph() : graph_(0), numberOfVertexTypes_(0) {} - boost_graph(vertex_idx number_of_nodes) : graph(number_of_nodes), number_of_vertex_types(0) {} + BoostGraph(VertexIdx numberOfNodes) : graph_(numberOfNodes), numberOfVertexTypes_(0) {} - boost_graph(unsigned number_of_nodes) : graph(static_cast(number_of_nodes)), number_of_vertex_types(0) {} + BoostGraph(unsigned numberOfNodes) : graph_(static_cast(numberOfNodes)), numberOfVertexTypes_(0) {} - boost_graph(const boost_graph &other) = default; + BoostGraph(const BoostGraph &other) = default; - boost_graph &operator=(const boost_graph &other) = default; + BoostGraph &operator=(const BoostGraph &other) = default; - boost_graph(boost_graph &&other) : number_of_vertex_types(other.number_of_vertex_types) { - std::swap(this->graph, other.graph); - other.number_of_vertex_types = 0; + BoostGraph(BoostGraph &&other) : numberOfVertexTypes_(other.numberOfVertexTypes_) { + std::swap(this->graph_, other.graph_); + other.numberOfVertexTypes_ = 0; } - boost_graph &operator=(boost_graph &&other) { + BoostGraph &operator=(BoostGraph &&other) { if (this != &other) { - std::swap(graph, other.graph); - number_of_vertex_types = other.number_of_vertex_types; - other.number_of_vertex_types = 0; - other.graph.clear(); + std::swap(graph_, other.graph_); + numberOfVertexTypes_ = other.numberOfVertexTypes_; + other.numberOfVertexTypes_ = 0; + other.graph_.clear(); } return *this; } - virtual ~boost_graph() = default; + virtual ~BoostGraph() = default; - template - boost_graph(const Graph_t &other) : number_of_vertex_types(0) { - static_assert(osp::is_computational_dag_v, "Graph_t must satisfy the is_computation_dag concept"); + template + BoostGraph(const GraphT &other) : numberOfVertexTypes_(0) { + static_assert(osp::isComputationalDagV, "GraphT must satisfy the is_computation_dag concept"); - graph.m_vertices.reserve(other.num_vertices()); + graph_.m_vertices.reserve(other.NumVertices()); - osp::constructComputationalDag(other, *this); + osp::ConstructComputationalDag(other, *this); } - inline const boost_graph_impl_t &get_boost_graph() const { return graph; } + inline const BoostGraphImplT &GetBoostGraph() const { return graph_; } - inline boost_graph_impl_t &get_boost_graph() { return graph; } + inline BoostGraphImplT &GetBoostGraph() { return graph_; } - inline size_t num_vertices() const { return boost::num_vertices(graph); } + inline size_t NumVertices() const { return boost::num_vertices(graph_); } - inline size_t num_edges() const { return boost::num_edges(graph); } + inline size_t NumEdges() const { return boost::num_edges(graph_); } - void updateNumberOfVertexTypes() { - number_of_vertex_types = 0; - for (const auto &v : vertices()) { - if (vertex_type(v) >= number_of_vertex_types) { - number_of_vertex_types = vertex_type(v) + 1; + void UpdateNumberOfVertexTypes() { + numberOfVertexTypes_ = 0; + for (const auto &v : Vertices()) { + if (VertexType(v) >= numberOfVertexTypes_) { + numberOfVertexTypes_ = VertexType(v) + 1; } } } - inline unsigned num_vertex_types() const { return number_of_vertex_types; }; - - auto vertices() const { return boost::make_iterator_range(boost::vertices(graph)); } - - auto vertices() { return boost::make_iterator_range(boost::vertices(graph)); } + inline unsigned NumVertexTypes() const { return numberOfVertexTypes_; }; - // template - // void debug() const { - // static_assert(sizeof(T *) == 0); - // } + auto Vertices() const { return boost::make_iterator_range(boost::vertices(graph_)); } - auto parents(const vertex_idx &v) const { - // auto ciao = boost::extensions::make_source_iterator_range(boost::inv_adjacent_vertices(v, graph)); + auto Vertices() { return boost::make_iterator_range(boost::vertices(graph_)); } - // debug::value_type>(); - - // debug(); - return boost::extensions::make_source_iterator_range(boost::inv_adjacent_vertices(v, graph)); + auto Parents(const VertexIdx &v) const { + return boost::extensions::MakeSourceIteratorRange(boost::inv_adjacent_vertices(v, graph_)); } - auto parents(const vertex_idx &v) { - return boost::extensions::make_source_iterator_range(boost::inv_adjacent_vertices(v, graph)); + auto Parents(const VertexIdx &v) { + return boost::extensions::MakeSourceIteratorRange(boost::inv_adjacent_vertices(v, graph_)); } - auto children(const vertex_idx &v) const { - return boost::extensions::make_source_iterator_range(boost::adjacent_vertices(v, graph)); + auto Children(const VertexIdx &v) const { + return boost::extensions::MakeSourceIteratorRange(boost::adjacent_vertices(v, graph_)); } - auto children(const vertex_idx &v) { - return boost::extensions::make_source_iterator_range(boost::adjacent_vertices(v, graph)); - } + auto Children(const VertexIdx &v) { return boost::extensions::MakeSourceIteratorRange(boost::adjacent_vertices(v, graph_)); } - auto edges() const { return boost::extensions::make_source_iterator_range(boost::edges(graph)); } + auto Edges() const { return boost::extensions::MakeSourceIteratorRange(boost::edges(graph_)); } - auto edges() { return boost::extensions::make_source_iterator_range(boost::edges(graph)); } + auto Edges() { return boost::extensions::MakeSourceIteratorRange(boost::edges(graph_)); } - auto in_edges(const vertex_idx &v) const { return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph)); } + auto InEdges(const VertexIdx &v) const { return boost::extensions::MakeSourceIteratorRange(boost::in_edges(v, graph_)); } - auto in_edges(const vertex_idx &v) { return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph)); } + auto InEdges(const VertexIdx &v) { return boost::extensions::MakeSourceIteratorRange(boost::in_edges(v, graph_)); } - auto out_edges(const vertex_idx &v) const { - return boost::extensions::make_source_iterator_range(boost::out_edges(v, graph)); - } + auto OutEdges(const VertexIdx &v) const { return boost::extensions::MakeSourceIteratorRange(boost::out_edges(v, graph_)); } - auto out_edges(const vertex_idx &v) { return boost::extensions::make_source_iterator_range(boost::out_edges(v, graph)); } + auto OutEdges(const VertexIdx &v) { return boost::extensions::MakeSourceIteratorRange(boost::out_edges(v, graph_)); } - vertex_idx source(const directed_edge_descriptor &e) const { return boost::source(e, graph); } + VertexIdx Source(const DirectedEdgeDescriptor &e) const { return boost::source(e, graph_); } - vertex_idx target(const directed_edge_descriptor &e) const { return boost::target(e, graph); } + VertexIdx Target(const DirectedEdgeDescriptor &e) const { return boost::target(e, graph_); } - inline size_t out_degree(const vertex_idx &v) const { return boost::out_degree(v, graph); } + inline size_t OutDegree(const VertexIdx &v) const { return boost::out_degree(v, graph_); } - inline size_t in_degree(const vertex_idx &v) const { return boost::in_degree(v, graph); } + inline size_t InDegree(const VertexIdx &v) const { return boost::in_degree(v, graph_); } - vertex_work_weight_type vertex_work_weight(const vertex_idx &v) const { return graph[v].workWeight; } + VertexWorkWeightType VertexWorkWeight(const VertexIdx &v) const { return graph_[v].workWeight_; } - vertex_comm_weight_type vertex_comm_weight(const vertex_idx &v) const { return graph[v].communicationWeight; } + VertexCommWeightType VertexCommWeight(const VertexIdx &v) const { return graph_[v].communicationWeight_; } - vertex_mem_weight_type vertex_mem_weight(const vertex_idx &v) const { return graph[v].memoryWeight; } + VertexMemWeightType VertexMemWeight(const VertexIdx &v) const { return graph_[v].memoryWeight_; } - vertex_type_type vertex_type(const vertex_idx &v) const { return graph[v].nodeType; } + VertexTypeType VertexType(const VertexIdx &v) const { return graph_[v].nodeType_; } - edge_comm_weight_type edge_comm_weight(const directed_edge_descriptor &e) const { return graph[e].communicationWeight; } + EdgeCommWeightType EdgeCommWeight(const DirectedEdgeDescriptor &e) const { return graph_[e].communicationWeight_; } - void set_vertex_mem_weight(const vertex_idx &v, const vertex_mem_weight_type memory_weight) { - graph[v].memoryWeight = memory_weight; + void SetVertexMemWeight(const VertexIdx &v, const VertexMemWeightType memoryWeight) { + graph_[v].memoryWeight_ = memoryWeight; } - void set_vertex_work_weight(const vertex_idx &v, const vertex_work_weight_type work_weight) { - graph[v].workWeight = work_weight; - } + void SetVertexWorkWeight(const VertexIdx &v, const VertexWorkWeightType workWeight) { graph_[v].workWeight_ = workWeight; } - void set_vertex_type(const vertex_idx &v, const vertex_type_type node_type) { - graph[v].nodeType = node_type; - number_of_vertex_types = std::max(number_of_vertex_types, node_type + 1); + void SetVertexType(const VertexIdx &v, const VertexTypeType nodeType) { + graph_[v].nodeType_ = nodeType; + numberOfVertexTypes_ = std::max(numberOfVertexTypes_, nodeType + 1); } - void set_vertex_comm_weight(const vertex_idx &v, const vertex_comm_weight_type comm_weight) { - graph[v].communicationWeight = comm_weight; + void SetVertexCommWeight(const VertexIdx &v, const VertexCommWeightType commWeight) { + graph_[v].communicationWeight_ = commWeight; } - void set_edge_comm_weight(const directed_edge_descriptor &e, const edge_comm_weight_type comm_weight) { - graph[e].communicationWeight = comm_weight; + void SetEdgeCommWeight(const DirectedEdgeDescriptor &e, const EdgeCommWeightType commWeight) { + graph_[e].communicationWeight_ = commWeight; } - vertex_idx add_vertex(const vertex_work_weight_type work_weight, - const vertex_comm_weight_type comm_weight, - const vertex_mem_weight_type memory_weight = 0, - const vertex_type_type node_type = 0) { - number_of_vertex_types = std::max(number_of_vertex_types, node_type + 1); - return boost::add_vertex(boost_vertex{work_weight, comm_weight, memory_weight, node_type}, graph); + VertexIdx AddVertex(const VertexWorkWeightType workWeight, + const VertexCommWeightType commWeight, + const VertexMemWeightType memoryWeight = 0, + const VertexTypeType nodeType = 0) { + numberOfVertexTypes_ = std::max(numberOfVertexTypes_, nodeType + 1); + return boost::add_vertex( + BoostVertex{ + workWeight, commWeight, memoryWeight, nodeType}, + graph_); } - std::pair, bool> add_edge( - const vertex_idx &src, const vertex_idx &tar, edge_commw_t comm_weight = DEFAULT_EDGE_COMM_WEIGHT) { - const auto pair = boost::add_edge(src, tar, {comm_weight}, graph); + std::pair, bool> AddEdge( + const VertexIdx &src, const VertexIdx &tar, EdgeCommwT commWeight = defaultEdgeCommWeight_) { + const auto pair = boost::add_edge(src, tar, {commWeight}, graph_); - number_of_vertex_types = std::max(number_of_vertex_types, 1u); // in case adding edges adds vertices + numberOfVertexTypes_ = std::max(numberOfVertexTypes_, 1u); // in case adding edges adds vertices return pair; } - void remove_edge(const directed_edge_descriptor &e) { boost::remove_edge(e, graph); } + void RemoveEdge(const DirectedEdgeDescriptor &e) { boost::remove_edge(e, graph_); } - void remove_vertex(const vertex_idx &v) { - boost::remove_vertex(v, graph); - updateNumberOfVertexTypes(); + void RemoveVertex(const VertexIdx &v) { + boost::remove_vertex(v, graph_); + UpdateNumberOfVertexTypes(); } - void clear_vertex(const vertex_idx &v) { boost::clear_vertex(v, graph); } + void ClearVertex(const VertexIdx &v) { boost::clear_vertex(v, graph_); } private: - boost_graph_impl_t graph; + BoostGraphImplT graph_; - vertex_type_type number_of_vertex_types; + VertexTypeType numberOfVertexTypes_; - static constexpr edge_comm_weight_type DEFAULT_EDGE_COMM_WEIGHT = 1; + static constexpr EdgeCommWeightType defaultEdgeCommWeight_ = 1; }; -template -inline auto edges(const boost_graph &graph) { - return graph.edges(); +template +inline auto Edges(const BoostGraph &graph) { + return graph.Edges(); } -template -inline auto out_edges(osp::vertex_idx_t> v, - const boost_graph &graph) { - return graph.out_edges(v); +template +inline auto OutEdges(osp::VertexIdxT> v, + const BoostGraph &graph) { + return graph.OutEdges(v); } -template -inline auto in_edges(osp::vertex_idx_t> v, - const boost_graph &graph) { - return graph.in_edges(v); +template +inline auto InEdges(osp::VertexIdxT> v, + const BoostGraph &graph) { + return graph.InEdges(v); } -template -inline osp::vertex_idx_t> source( - const osp::edge_desc_t> &edge, - const boost_graph &graph) { - return graph.source(edge); +template +inline osp::VertexIdxT> Source( + const osp::EdgeDescT> &edge, + const BoostGraph &graph) { + return graph.Source(edge); } -template -inline osp::vertex_idx_t> target( - const osp::edge_desc_t> &edge, - const boost_graph &graph) { - return graph.target(edge); +template +inline osp::VertexIdxT> Target( + const osp::EdgeDescT> &edge, + const BoostGraph &graph) { + return graph.Target(edge); } -using boost_graph_int_t = boost_graph; -using boost_graph_uint_t = boost_graph; +using BoostGraphIntT = BoostGraph; +using BoostGraphUintT = BoostGraph; -static_assert(osp::is_directed_graph_edge_desc_v, - "boost_graph_adapter does not satisfy the directed_graph_edge_desc concept"); +static_assert(osp::isDirectedGraphEdgeDescV, + "BoostGraphIntT does not satisfy the directed_graph_edge_desc concept"); -static_assert(osp::is_computational_dag_typed_vertices_edge_desc_v, - "boost_graph_adapter must satisfy the computational_dag_typed_vertices_edge_desc concept"); +static_assert(osp::isComputationalDagTypedVerticesEdgeDescV, + "BoostGraphIntT must satisfy the computational_dag_typed_vertices_edge_desc concept"); -static_assert(osp::is_constructable_cdag_vertex_v, - "boost_graph_adapter must satisfy the is_constructable_cdag_vertex concept"); +static_assert(osp::isConstructableCdagVertexV, + "BoostGraphIntT must satisfy the is_constructable_cdag_vertex concept"); -static_assert(osp::is_constructable_cdag_typed_vertex_v, - "boost_graph_adapter must satisfy the is_constructable_cdag_typed_vertex concept"); +static_assert(osp::isConstructableCdagTypedVertexV, + "BoostGraphIntT must satisfy the is_constructable_cdag_typed_vertex concept"); -static_assert(osp::is_constructable_cdag_edge_v, - "boost_graph_adapter must satisfy the is_constructable_cdag_edge concept"); +static_assert(osp::isConstructableCdagEdgeV, "BoostGraphIntT must satisfy the is_constructable_cdag_edge concept"); -static_assert(osp::is_constructable_cdag_comm_edge_v, - "boost_graph_adapter must satisfy the is_constructable_cdag_comm_edge concept"); +static_assert(osp::isConstructableCdagCommEdgeV, + "BoostGraphIntT must satisfy the is_constructable_cdag_comm_edge concept"); diff --git a/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp b/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp index 17fcf796..2bfb7255 100644 --- a/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp +++ b/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp @@ -52,7 +52,7 @@ void inv_breadth_first_visit( Q.pop(); vis.examine_vertex(u, g); for (boost::tie(ei, ei_end) = in_edges(u, g); ei != ei_end; ++ei) { - Vertex v = target(*ei, g); + Vertex v = Target(*ei, g); vis.examine_edge(*ei, g); ColorValue v_color = get(color, v); if (v_color == Color::white()) { diff --git a/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp b/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp index 868a34f4..5cc4c1c4 100644 --- a/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp +++ b/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp @@ -23,37 +23,37 @@ limitations under the License. namespace boost::extensions { template -struct source_iterator_range { +struct SourceIteratorRange { public: - IteratorType begin() { return _begin; } + IteratorType begin() { return begin_; } - IteratorType begin() const { return _begin; } + IteratorType begin() const { return begin_; } - IteratorType cbegin() const { return _begin; } + IteratorType cbegin() const { return begin_; } - IteratorType end() { return _end; } + IteratorType end() { return end_; } - IteratorType end() const { return _end; } + IteratorType end() const { return end_; } - IteratorType cend() const { return _end; } + IteratorType cend() const { return end_; } std::size_t size() const { return std::distance(cbegin(), cend()); } template - explicit source_iterator_range(RangeType &r) : _begin(boost::begin(r)), _end(boost::end(r)) {} + explicit SourceIteratorRange(RangeType &r) : begin_(boost::begin(r)), end_(boost::end(r)) {} private: - IteratorType _begin, _end; + IteratorType begin_, end_; }; template -source_iterator_range()))> make_source_iterator_range(const ForwardRange &r) { - return source_iterator_range()))>(r); +SourceIteratorRange()))> MakeSourceIteratorRange(const ForwardRange &r) { + return SourceIteratorRange()))>(r); } template -source_iterator_range()))> make_source_iterator_range(ForwardRange &r) { - return source_iterator_range()))>(r); +SourceIteratorRange()))> MakeSourceIteratorRange(ForwardRange &r) { + return SourceIteratorRange()))>(r); } } // namespace boost::extensions diff --git a/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp b/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp index 616b750c..96c23983 100644 --- a/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp +++ b/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp @@ -26,22 +26,22 @@ limitations under the License. namespace osp { -template +template class EigenCSRRange { const Graph &graph_; - eigen_idx_type index_; + EigenIdxType index_; public: - using CSRMatrix = Eigen::SparseMatrix; + using CSRMatrix = Eigen::SparseMatrix; using Inner = typename CSRMatrix::InnerIterator; - class iterator { + class Iterator { Inner it_; - eigen_idx_type skip_; - bool at_end_; + EigenIdxType skip_; + bool atEnd_; - void skip_diagonal() { - while (((!at_end_) && (it_.row() == skip_)) & (it_.col() == skip_)) { + void SkipDiagonal() { + while (((!atEnd_) && (it_.row() == skip_)) & (it_.col() == skip_)) { ++(*this); } } @@ -53,68 +53,68 @@ class EigenCSRRange { using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; - iterator() = default; + Iterator() = default; - iterator(const iterator &other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {} + Iterator(const Iterator &other) : it_(other.it_), skip_(other.skip_), atEnd_(other.atEnd_) {} - iterator &operator=(const iterator &other) { + Iterator &operator=(const Iterator &other) { it_ = other.it_; skip_ = other.skip_; - at_end_ = other.at_end_; + atEnd_ = other.atEnd_; return *this; } - iterator(const CSRMatrix &mat, eigen_idx_type idx, bool end = false) : skip_(idx), at_end_(end) { + Iterator(const CSRMatrix &mat, EigenIdxType idx, bool end = false) : skip_(idx), atEnd_(end) { if (!end) { it_ = Inner(mat, idx); - at_end_ = !it_; - skip_diagonal(); + atEnd_ = !it_; + SkipDiagonal(); } } reference operator*() const { return static_cast(it_.col()); } - iterator &operator++() { + Iterator &operator++() { ++it_; - at_end_ = !it_; - skip_diagonal(); + atEnd_ = !it_; + SkipDiagonal(); return *this; } - iterator operator++(int) { - iterator temp = *this; + Iterator operator++(int) { + Iterator temp = *this; ++(*this); return temp; } - bool operator==(const iterator &) const { return at_end_; } + bool operator==(const Iterator &) const { return atEnd_; } - bool operator!=(const iterator &) const { return !at_end_; } + bool operator!=(const Iterator &) const { return !atEnd_; } }; - EigenCSRRange(const Graph &graph, eigen_idx_type idx) : graph_(graph), index_(idx) {} + EigenCSRRange(const Graph &graph, EigenIdxType idx) : graph_(graph), index_(idx) {} - iterator begin() const { return iterator(*graph_.getCSR(), index_); } + Iterator begin() const { return Iterator(*graph_.GetCSR(), index_); } - iterator end() const { return iterator(*graph_.getCSR(), index_, true); } + Iterator end() const { return Iterator(*graph_.GetCSR(), index_, true); } }; -template +template class EigenCSCRange { const Graph &graph_; - eigen_idx_type index_; + EigenIdxType index_; public: - using CSCMatrix = Eigen::SparseMatrix; + using CSCMatrix = Eigen::SparseMatrix; using Inner = typename CSCMatrix::InnerIterator; - class iterator { + class Iterator { Inner it_; - eigen_idx_type skip_; - bool at_end_; + EigenIdxType skip_; + bool atEnd_; - void skip_diagonal() { - while ((!at_end_) & (it_.row() == skip_) & (it_.col() == skip_)) { + void SkipDiagonal() { + while ((!atEnd_) & (it_.row() == skip_) & (it_.col() == skip_)) { ++(*this); } } @@ -126,50 +126,50 @@ class EigenCSCRange { using difference_type = std::ptrdiff_t; using iterator_category = std::input_iterator_tag; - iterator() = default; + Iterator() = default; - iterator(const iterator &other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {} + Iterator(const Iterator &other) : it_(other.it_), skip_(other.skip_), atEnd_(other.atEnd_) {} - iterator &operator=(const iterator &other) { + Iterator &operator=(const Iterator &other) { it_ = other.it_; skip_ = other.skip_; - at_end_ = other.at_end_; + atEnd_ = other.atEnd_; return *this; } - iterator(const CSCMatrix &mat, eigen_idx_type idx, bool end = false) : skip_(idx), at_end_(end) { + Iterator(const CSCMatrix &mat, EigenIdxType idx, bool end = false) : skip_(idx), atEnd_(end) { if (!end) { it_ = Inner(mat, idx); - at_end_ = !it_; - skip_diagonal(); + atEnd_ = !it_; + SkipDiagonal(); } } reference operator*() const { return static_cast(it_.row()); } - iterator &operator++() { + Iterator &operator++() { ++it_; - at_end_ = !it_; - skip_diagonal(); + atEnd_ = !it_; + SkipDiagonal(); return *this; } - iterator operator++(int) { - iterator temp = *this; + Iterator operator++(int) { + Iterator temp = *this; ++(*this); return temp; } - bool operator==(const iterator &) const { return at_end_; } + bool operator==(const Iterator &) const { return atEnd_; } - bool operator!=(const iterator &) const { return !at_end_; } + bool operator!=(const Iterator &) const { return !atEnd_; } }; - EigenCSCRange(const Graph &graph, eigen_idx_type idx) : graph_(graph), index_(idx) {} + EigenCSCRange(const Graph &graph, EigenIdxType idx) : graph_(graph), index_(idx) {} - iterator begin() const { return iterator(*graph_.getCSC(), index_); } + Iterator begin() const { return Iterator(*graph_.GetCSC(), index_); } - iterator end() const { return iterator(*graph_.getCSC(), index_, true); } + Iterator end() const { return Iterator(*graph_.GetCSC(), index_, true); } }; } // namespace osp diff --git a/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp b/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp index 7d989f69..96bdad19 100644 --- a/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp +++ b/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp @@ -32,104 +32,103 @@ namespace osp { /// @brief Implementation of a lower‐triangular sparse matrix as a directed acyclic graph. /// Wraps Eigen's sparse matrix and exposes graph-like methods for scheduling and analysis. -template +template class SparseMatrixImp { - static_assert(std::is_integral_v, "Eigen index type must be integral"); + static_assert(std::is_integral_v, "Eigen index type must be integral"); private: // Define Eigen-compatible matrix types using eigen_idx_type as the index type - using MatrixCSR = Eigen::SparseMatrix; // For parents - using MatrixCSC = Eigen::SparseMatrix; // For children + using MatrixCSR = Eigen::SparseMatrix; // For parents + using MatrixCSC = Eigen::SparseMatrix; // For children // Internal pointers to the sparse matrices (not owning) - MatrixCSR *L_csr_p = nullptr; - MatrixCSC *L_csc_p = nullptr; + MatrixCSR *lCsrP_ = nullptr; + MatrixCSC *lCscP_ = nullptr; public: // Vertex index type must match Eigen's StorageIndex (signed 32-bit) - using vertex_idx = size_t; + using VertexIdx = size_t; // Required graph trait aliases (used in concept checks) - using vertex_work_weight_type = eigen_idx_type; - using vertex_comm_weight_type = eigen_idx_type; - using vertex_mem_weight_type = int; - using vertex_type_type = unsigned; + using VertexWorkWeightType = EigenIdxType; + using VertexCommWeightType = EigenIdxType; + using VertexMemWeightType = int; + using VertexTypeType = unsigned; - using eigen_idx_t = eigen_idx_type; + using EigenIdxT = EigenIdxType; SparseMatrixImp() = default; // Setters for the internal CSR and CSC matrix pointers - void setCSR(MatrixCSR *mat) { L_csr_p = mat; } + void SetCsr(MatrixCSR *mat) { lCsrP_ = mat; } - void setCSC(MatrixCSC *mat) { L_csc_p = mat; } + void SetCsc(MatrixCSC *mat) { lCscP_ = mat; } // Getters for internal matrices (used by EigenSparseRange) - const MatrixCSR *getCSR() const { return L_csr_p; } + const MatrixCSR *GetCSR() const { return lCsrP_; } - const MatrixCSC *getCSC() const { return L_csc_p; } + const MatrixCSC *GetCSC() const { return lCscP_; } /// @brief Number of vertices = number of rows in the matrix - size_t num_vertices() const noexcept { return static_cast(L_csr_p->rows()); } + size_t NumVertices() const noexcept { return static_cast(lCsrP_->rows()); } - /// @brief Return a range over all vertices [0, num_vertices) - auto vertices() const { return osp::integral_range(num_vertices()); } + /// @brief Return a range over all vertices [0, NumVertices) + auto Vertices() const { return osp::IntegralRange(NumVertices()); } /// @brief Number of edges = total non-zeros minus diagonal elements - vertex_idx num_edges() const noexcept { return static_cast(L_csr_p->nonZeros() - L_csr_p->rows()); } + VertexIdx NumEdges() const noexcept { return static_cast(lCsrP_->nonZeros() - lCsrP_->rows()); } /// @brief In-degree = non-zero off-diagonal entries in row v (CSR) - vertex_idx in_degree(vertex_idx v) const noexcept { - return static_cast(L_csr_p->outerIndexPtr()[v + 1] - L_csr_p->outerIndexPtr()[v] - 1); + VertexIdx InDegree(VertexIdx v) const noexcept { + return static_cast(lCsrP_->outerIndexPtr()[v + 1] - lCsrP_->outerIndexPtr()[v] - 1); } /// @brief Out-degree = non-zero off-diagonal entries in column v (CSC) - vertex_idx out_degree(vertex_idx v) const noexcept { - return static_cast(L_csc_p->outerIndexPtr()[v + 1] - L_csc_p->outerIndexPtr()[v] - 1); + VertexIdx OutDegree(VertexIdx v) const noexcept { + return static_cast(lCscP_->outerIndexPtr()[v + 1] - lCscP_->outerIndexPtr()[v] - 1); } /// @brief Get the children (dependents) of vertex v using CSC layout - auto children(vertex_idx v) const { - return osp::EigenCSCRange(*this, static_cast(v)); + auto Children(VertexIdx v) const { + return osp::EigenCSCRange(*this, static_cast(v)); } /// @brief Get the parents (dependencies) of vertex v using CSR layout - auto parents(vertex_idx v) const { - return osp::EigenCSRRange(*this, static_cast(v)); + auto Parents(VertexIdx v) const { + return osp::EigenCSRRange(*this, static_cast(v)); } /// @brief Work weight of a vertex (e.g., row size) - vertex_work_weight_type vertex_work_weight(vertex_idx v) const noexcept { - return L_csr_p->outerIndexPtr()[v + 1] - L_csr_p->outerIndexPtr()[v]; + VertexWorkWeightType VertexWorkWeight(VertexIdx v) const noexcept { + return lCsrP_->outerIndexPtr()[v + 1] - lCsrP_->outerIndexPtr()[v]; } // Default zero weights (placeholders, extend as needed) - vertex_comm_weight_type vertex_comm_weight(vertex_idx) const noexcept { return 0; } + VertexCommWeightType VertexCommWeight(VertexIdx) const noexcept { return 0; } - vertex_mem_weight_type vertex_mem_weight(vertex_idx) const noexcept { return 0; } + VertexMemWeightType VertexMemWeight(VertexIdx) const noexcept { return 0; } - inline unsigned num_vertex_types() const { return 1; }; + inline unsigned NumVertexTypes() const { return 1; }; - inline vertex_type_type vertex_type(const vertex_idx) const { return 0; } + inline VertexTypeType VertexType(const VertexIdx) const { return 0; } }; -using sparse_matrix_graph_int32_t = SparseMatrixImp; -using sparse_matrix_graph_int64_t = SparseMatrixImp; +using SparseMatrixGraphInt32T = SparseMatrixImp; +using SparseMatrixGraphInt64T = SparseMatrixImp; -static_assert(is_directed_graph_edge_desc_v>, - "SparseMatrix must satisfy the directed_graph_edge_desc concept"); +static_assert(isDirectedGraphEdgeDescV>, "SparseMatrix must satisfy the directed_graph_edge_desc concept"); // Verify that SparseMatrixImp satisfies the directed graph concept -static_assert(is_directed_graph_v>, "SparseMatrix must satisfy directed_graph_concept"); +static_assert(isDirectedGraphV>, "SparseMatrix must satisfy directed_graph_concept"); -static_assert(is_directed_graph_v>, "SparseMatrix must satisfy directed_graph_concept"); +static_assert(isDirectedGraphV>, "SparseMatrix must satisfy directed_graph_concept"); -static_assert(has_vertex_weights_v>, "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); +static_assert(hasVertexWeightsV>, "CompactSparseGraph must satisfy the has_vertex_weights concept"); -static_assert(has_vertex_weights_v>, "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); +static_assert(hasVertexWeightsV>, "CompactSparseGraph must satisfy the has_vertex_weights concept"); -static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag concept"); +static_assert(isComputationalDagTypedVerticesV>, + "CompactSparseGraph must satisfy the is_computation_dag concept"); } // namespace osp diff --git a/include/osp/graph_implementations/integral_range.hpp b/include/osp/graph_implementations/integral_range.hpp index 0217c64f..40bdeab7 100644 --- a/include/osp/graph_implementations/integral_range.hpp +++ b/include/osp/graph_implementations/integral_range.hpp @@ -32,11 +32,11 @@ namespace osp { * @tparam T The integral type of the values (e.g., int, unsigned, size_t). */ template -class integral_range { +class IntegralRange { static_assert(std::is_integral::value, "integral_range requires an integral type"); - T start; - T finish; + T start_; + T finish_; public: /** @@ -44,7 +44,7 @@ class integral_range { * * This iterator satisfies the RandomAccessIterator concept. */ - class integral_iterator { // public for std::reverse_iterator + class IntegralIterator { // public for std::reverse_iterator public: using iterator_category = std::random_access_iterator_tag; using difference_type = std::ptrdiff_t; @@ -55,157 +55,157 @@ class integral_range { /** * @brief Proxy object to support operator-> for integral types. */ - struct arrow_proxy { - T value; + struct ArrowProxy { + T value_; - constexpr const T *operator->() const noexcept { return &value; } + constexpr const T *operator->() const noexcept { return &value_; } }; private: - value_type current; + value_type current_; public: /** * @brief Default constructor. Initializes iterator to 0. */ - constexpr integral_iterator() noexcept : current(0) {} + constexpr IntegralIterator() noexcept : current_(0) {} /** * @brief Constructs an iterator pointing to the given value. * @param start The starting value. */ - explicit constexpr integral_iterator(value_type start) noexcept : current(start) {} + explicit constexpr IntegralIterator(value_type start) noexcept : current_(start) {} - constexpr integral_iterator(const integral_iterator &) noexcept = default; - constexpr integral_iterator &operator=(const integral_iterator &) noexcept = default; - ~integral_iterator() = default; + constexpr IntegralIterator(const IntegralIterator &) noexcept = default; + constexpr IntegralIterator &operator=(const IntegralIterator &) noexcept = default; + ~IntegralIterator() = default; /** * @brief Dereference operator. * @return The current integral value. */ - [[nodiscard]] constexpr value_type operator*() const noexcept { return current; } + [[nodiscard]] constexpr value_type operator*() const noexcept { return current_; } /** * @brief Arrow operator. * @return A proxy object that allows access to the address of the value. */ - [[nodiscard]] constexpr arrow_proxy operator->() const noexcept { return arrow_proxy{current}; } + [[nodiscard]] constexpr ArrowProxy operator->() const noexcept { return ArrowProxy{current_}; } - constexpr integral_iterator &operator++() noexcept { - ++current; + constexpr IntegralIterator &operator++() noexcept { + ++current_; return *this; } - constexpr integral_iterator operator++(int) noexcept { - integral_iterator temp = *this; + constexpr IntegralIterator operator++(int) noexcept { + IntegralIterator temp = *this; ++(*this); return temp; } - constexpr integral_iterator &operator--() noexcept { - --current; + constexpr IntegralIterator &operator--() noexcept { + --current_; return *this; } - constexpr integral_iterator operator--(int) noexcept { - integral_iterator temp = *this; + constexpr IntegralIterator operator--(int) noexcept { + IntegralIterator temp = *this; --(*this); return temp; } - [[nodiscard]] constexpr bool operator==(const integral_iterator &other) const noexcept { - return current == other.current; + [[nodiscard]] constexpr bool operator==(const IntegralIterator &other) const noexcept { + return current_ == other.current_; } - [[nodiscard]] constexpr bool operator!=(const integral_iterator &other) const noexcept { return !(*this == other); } + [[nodiscard]] constexpr bool operator!=(const IntegralIterator &other) const noexcept { return !(*this == other); } - constexpr integral_iterator &operator+=(difference_type n) noexcept { - current = static_cast(current + n); + constexpr IntegralIterator &operator+=(difference_type n) noexcept { + current_ = static_cast(current_ + n); return *this; } - [[nodiscard]] constexpr integral_iterator operator+(difference_type n) const noexcept { - integral_iterator temp = *this; + [[nodiscard]] constexpr IntegralIterator operator+(difference_type n) const noexcept { + IntegralIterator temp = *this; return temp += n; } - [[nodiscard]] friend constexpr integral_iterator operator+(difference_type n, const integral_iterator &it) noexcept { + [[nodiscard]] friend constexpr IntegralIterator operator+(difference_type n, const IntegralIterator &it) noexcept { return it + n; } - constexpr integral_iterator &operator-=(difference_type n) noexcept { - current = static_cast(current - n); + constexpr IntegralIterator &operator-=(difference_type n) noexcept { + current_ = static_cast(current_ - n); return *this; } - [[nodiscard]] constexpr integral_iterator operator-(difference_type n) const noexcept { - integral_iterator temp = *this; + [[nodiscard]] constexpr IntegralIterator operator-(difference_type n) const noexcept { + IntegralIterator temp = *this; return temp -= n; } - [[nodiscard]] constexpr difference_type operator-(const integral_iterator &other) const noexcept { - return static_cast(current) - static_cast(other.current); + [[nodiscard]] constexpr difference_type operator-(const IntegralIterator &other) const noexcept { + return static_cast(current_) - static_cast(other.current_); } [[nodiscard]] constexpr value_type operator[](difference_type n) const noexcept { return *(*this + n); } - [[nodiscard]] constexpr bool operator<(const integral_iterator &other) const noexcept { return current < other.current; } + [[nodiscard]] constexpr bool operator<(const IntegralIterator &other) const noexcept { return current_ < other.current_; } - [[nodiscard]] constexpr bool operator>(const integral_iterator &other) const noexcept { return current > other.current; } + [[nodiscard]] constexpr bool operator>(const IntegralIterator &other) const noexcept { return current_ > other.current_; } - [[nodiscard]] constexpr bool operator<=(const integral_iterator &other) const noexcept { - return current <= other.current; + [[nodiscard]] constexpr bool operator<=(const IntegralIterator &other) const noexcept { + return current_ <= other.current_; } - [[nodiscard]] constexpr bool operator>=(const integral_iterator &other) const noexcept { - return current >= other.current; + [[nodiscard]] constexpr bool operator>=(const IntegralIterator &other) const noexcept { + return current_ >= other.current_; } }; - using reverse_integral_iterator = std::reverse_iterator; + using ReverseIntegralIterator = std::reverse_iterator; public: /** * @brief Constructs a range [0, end). * @param end_ The exclusive upper bound. */ - constexpr integral_range(T end_) noexcept : start(static_cast(0)), finish(end_) {} + constexpr IntegralRange(T end) noexcept : start_(static_cast(0)), finish_(end) {} /** * @brief Constructs a range [start, end). * @param start_ The inclusive lower bound. * @param end_ The exclusive upper bound. */ - constexpr integral_range(T start_, T end_) noexcept : start(start_), finish(end_) {} + constexpr IntegralRange(T start, T end) noexcept : start_(start), finish_(end) {} - [[nodiscard]] constexpr integral_iterator begin() const noexcept { return integral_iterator(start); } + [[nodiscard]] constexpr IntegralIterator begin() const noexcept { return IntegralIterator(start_); } - [[nodiscard]] constexpr integral_iterator cbegin() const noexcept { return integral_iterator(start); } + [[nodiscard]] constexpr IntegralIterator cbegin() const noexcept { return IntegralIterator(start_); } - [[nodiscard]] constexpr integral_iterator end() const noexcept { return integral_iterator(finish); } + [[nodiscard]] constexpr IntegralIterator end() const noexcept { return IntegralIterator(finish_); } - [[nodiscard]] constexpr integral_iterator cend() const noexcept { return integral_iterator(finish); } + [[nodiscard]] constexpr IntegralIterator cend() const noexcept { return IntegralIterator(finish_); } - [[nodiscard]] constexpr reverse_integral_iterator rbegin() const noexcept { return reverse_integral_iterator(end()); } + [[nodiscard]] constexpr ReverseIntegralIterator rbegin() const noexcept { return ReverseIntegralIterator(end()); } - [[nodiscard]] constexpr reverse_integral_iterator crbegin() const noexcept { return reverse_integral_iterator(cend()); } + [[nodiscard]] constexpr ReverseIntegralIterator crbegin() const noexcept { return ReverseIntegralIterator(cend()); } - [[nodiscard]] constexpr reverse_integral_iterator rend() const noexcept { return reverse_integral_iterator(begin()); } + [[nodiscard]] constexpr ReverseIntegralIterator rend() const noexcept { return ReverseIntegralIterator(begin()); } - [[nodiscard]] constexpr reverse_integral_iterator crend() const noexcept { return reverse_integral_iterator(cbegin()); } + [[nodiscard]] constexpr ReverseIntegralIterator crend() const noexcept { return ReverseIntegralIterator(cbegin()); } /** * @brief Returns the number of elements in the range. * @return The size of the range. */ - [[nodiscard]] constexpr auto size() const noexcept { return finish - start; } + [[nodiscard]] constexpr auto size() const noexcept { return finish_ - start_; } /** * @brief Checks if the range is empty. * @return True if the range is empty, false otherwise. */ - [[nodiscard]] constexpr bool empty() const noexcept { return start == finish; } + [[nodiscard]] constexpr bool empty() const noexcept { return start_ == finish_; } }; } // namespace osp diff --git a/include/osp/partitioning/model/hypergraph.hpp b/include/osp/partitioning/model/hypergraph.hpp index 9c36beb6..b8a0bb71 100644 --- a/include/osp/partitioning/model/hypergraph.hpp +++ b/include/osp/partitioning/model/hypergraph.hpp @@ -25,168 +25,167 @@ limitations under the License. namespace osp { -template +template class Hypergraph { - using this_t = Hypergraph; + using ThisT = Hypergraph; public: - using vertex_idx = index_type; - using vertex_work_weight_type = workw_type; - using vertex_mem_weight_type = memw_type; - using vertex_comm_weight_type = commw_type; + using VertexIdx = IndexType; + using VertexWorkWeightType = WorkwType; + using VertexMemWeightType = MemwType; + using VertexCommWeightType = CommwType; Hypergraph() = default; - Hypergraph(index_type num_vertices_, index_type num_hyperedges_) - : Num_vertices(num_vertices_), - Num_hyperedges(num_hyperedges_), - vertex_work_weights(num_vertices_, 1), - vertex_memory_weights(num_vertices_, 1), - hyperedge_weights(num_hyperedges_, 1), - incident_hyperedges_to_vertex(num_vertices_), - vertices_in_hyperedge(num_hyperedges_) {} + Hypergraph(IndexType numVertices, IndexType numHyperedges) + : numVertices_(numVertices), + numHyperedges_(numHyperedges), + vertexWorkWeights_(numVertices, 1), + vertexMemoryWeights_(numVertices, 1), + hyperedgeWeights_(numHyperedges, 1), + incidentHyperedgesToVertex_(numVertices), + verticesInHyperedge_(numHyperedges) {} - Hypergraph(const this_t &other) = default; - Hypergraph &operator=(const this_t &other) = default; + Hypergraph(const ThisT &other) = default; + Hypergraph &operator=(const ThisT &other) = default; virtual ~Hypergraph() = default; - inline index_type num_vertices() const { return Num_vertices; } + inline IndexType NumVertices() const { return numVertices_; } - inline index_type num_hyperedges() const { return Num_hyperedges; } + inline IndexType NumHyperedges() const { return numHyperedges_; } - inline index_type num_pins() const { return Num_pins; } + inline IndexType NumPins() const { return numPins_; } - inline workw_type get_vertex_work_weight(index_type node) const { return vertex_work_weights[node]; } + inline WorkwType GetVertexWorkWeight(IndexType node) const { return vertexWorkWeights_[node]; } - inline memw_type get_vertex_memory_weight(index_type node) const { return vertex_memory_weights[node]; } + inline MemwType GetVertexMemoryWeight(IndexType node) const { return vertexMemoryWeights_[node]; } - inline commw_type get_hyperedge_weight(index_type hyperedge) const { return hyperedge_weights[hyperedge]; } + inline CommwType GetHyperedgeWeight(IndexType hyperedge) const { return hyperedgeWeights_[hyperedge]; } - void add_pin(index_type vertex_idx, index_type hyperedge_idx); - void add_vertex(workw_type work_weight = 1, memw_type memory_weight = 1); - void add_empty_hyperedge(commw_type weight = 1); - void add_hyperedge(const std::vector &pins, commw_type weight = 1); - void set_vertex_work_weight(index_type vertex_idx, workw_type weight); - void set_vertex_memory_weight(index_type vertex_idx, memw_type weight); - void set_hyperedge_weight(index_type hyperedge_idx, commw_type weight); + void AddPin(IndexType vertexIdx, IndexType hyperedgeIdx); + void AddVertex(WorkwType workWeight = 1, MemwType memoryWeight = 1); + void AddEmptyHyperedge(CommwType weight = 1); + void AddHyperedge(const std::vector &pins, CommwType weight = 1); + void SetVertexWorkWeight(IndexType vertexIdx, WorkwType weight); + void SetVertexMemoryWeight(IndexType vertexIdx, MemwType weight); + void SetHyperedgeWeight(IndexType hyperedgeIdx, CommwType weight); - void clear(); - void reset(index_type num_vertices_, index_type num_hyperedges_); + void Clear(); + void Reset(IndexType numVertices, IndexType numHyperedges); - inline const std::vector &get_incident_hyperedges(index_type vertex) const { - return incident_hyperedges_to_vertex[vertex]; + inline const std::vector &GetIncidentHyperedges(IndexType vertex) const { + return incidentHyperedgesToVertex_[vertex]; } - inline const std::vector &get_vertices_in_hyperedge(index_type hyperedge) const { - return vertices_in_hyperedge[hyperedge]; + inline const std::vector &GetVerticesInHyperedge(IndexType hyperedge) const { + return verticesInHyperedge_[hyperedge]; } private: - index_type Num_vertices = 0, Num_hyperedges = 0, Num_pins = 0; + IndexType numVertices_ = 0, numHyperedges_ = 0, numPins_ = 0; - std::vector vertex_work_weights; - std::vector vertex_memory_weights; - std::vector hyperedge_weights; + std::vector vertexWorkWeights_; + std::vector vertexMemoryWeights_; + std::vector hyperedgeWeights_; - std::vector> incident_hyperedges_to_vertex; - std::vector> vertices_in_hyperedge; + std::vector> incidentHyperedgesToVertex_; + std::vector> verticesInHyperedge_; }; -using Hypergraph_def_t = Hypergraph; +using HypergraphDefT = Hypergraph; -template -void Hypergraph::add_pin(index_type vertex, index_type hyperedge_idx) { - if (vertex >= Num_vertices) { +template +void Hypergraph::AddPin(IndexType vertex, IndexType hyperedgeIdx) { + if (vertex >= numVertices_) { throw std::invalid_argument("Invalid Argument while adding pin: vertex index out of range."); - } else if (hyperedge_idx >= Num_hyperedges) { + } else if (hyperedgeIdx >= numHyperedges_) { throw std::invalid_argument("Invalid Argument while adding pin: hyperedge index out of range."); } else { - incident_hyperedges_to_vertex[vertex].push_back(hyperedge_idx); - vertices_in_hyperedge[hyperedge_idx].push_back(vertex); - ++Num_pins; + incidentHyperedgesToVertex_[vertex].push_back(hyperedgeIdx); + verticesInHyperedge_[hyperedgeIdx].push_back(vertex); + ++numPins_; } } -template -void Hypergraph::add_vertex(workw_type work_weight, memw_type memory_weight) { - vertex_work_weights.push_back(work_weight); - vertex_memory_weights.push_back(memory_weight); - incident_hyperedges_to_vertex.emplace_back(); - ++Num_vertices; +template +void Hypergraph::AddVertex(WorkwType workWeight, MemwType memoryWeight) { + vertexWorkWeights_.push_back(workWeight); + vertexMemoryWeights_.push_back(memoryWeight); + incidentHyperedgesToVertex_.emplace_back(); + ++numVertices_; } -template -void Hypergraph::add_empty_hyperedge(commw_type weight) { - vertices_in_hyperedge.emplace_back(); - hyperedge_weights.push_back(weight); - ++Num_hyperedges; +template +void Hypergraph::AddEmptyHyperedge(CommwType weight) { + verticesInHyperedge_.emplace_back(); + hyperedgeWeights_.push_back(weight); + ++numHyperedges_; } -template -void Hypergraph::add_hyperedge(const std::vector &pins, - commw_type weight) { - vertices_in_hyperedge.emplace_back(pins); - hyperedge_weights.push_back(weight); - for (index_type vertex : pins) { - incident_hyperedges_to_vertex[vertex].push_back(Num_hyperedges); +template +void Hypergraph::AddHyperedge(const std::vector &pins, CommwType weight) { + verticesInHyperedge_.emplace_back(pins); + hyperedgeWeights_.push_back(weight); + for (IndexType vertex : pins) { + incidentHyperedgesToVertex_[vertex].push_back(numHyperedges_); } - ++Num_hyperedges; - Num_pins += static_cast(pins.size()); + ++numHyperedges_; + numPins_ += static_cast(pins.size()); } -template -void Hypergraph::set_vertex_work_weight(index_type vertex, workw_type weight) { - if (vertex >= Num_vertices) { +template +void Hypergraph::SetVertexWorkWeight(IndexType vertex, WorkwType weight) { + if (vertex >= numVertices_) { throw std::invalid_argument("Invalid Argument while setting vertex weight: vertex index out of range."); } else { - vertex_work_weights[vertex] = weight; + vertexWorkWeights_[vertex] = weight; } } -template -void Hypergraph::set_vertex_memory_weight(index_type vertex, memw_type weight) { - if (vertex >= Num_vertices) { +template +void Hypergraph::SetVertexMemoryWeight(IndexType vertex, MemwType weight) { + if (vertex >= numVertices_) { throw std::invalid_argument("Invalid Argument while setting vertex weight: vertex index out of range."); } else { - vertex_memory_weights[vertex] = weight; + vertexMemoryWeights_[vertex] = weight; } } -template -void Hypergraph::set_hyperedge_weight(index_type hyperedge_idx, commw_type weight) { - if (hyperedge_idx >= Num_hyperedges) { +template +void Hypergraph::SetHyperedgeWeight(IndexType hyperedgeIdx, CommwType weight) { + if (hyperedgeIdx >= numHyperedges_) { throw std::invalid_argument("Invalid Argument while setting hyperedge weight: hyepredge index out of range."); } else { - hyperedge_weights[hyperedge_idx] = weight; + hyperedgeWeights_[hyperedgeIdx] = weight; } } -template -void Hypergraph::clear() { - Num_vertices = 0; - Num_hyperedges = 0; - Num_pins = 0; - - vertex_work_weights.clear(); - vertex_memory_weights.clear(); - hyperedge_weights.clear(); - incident_hyperedges_to_vertex.clear(); - vertices_in_hyperedge.clear(); +template +void Hypergraph::Clear() { + numVertices_ = 0; + numHyperedges_ = 0; + numPins_ = 0; + + vertexWorkWeights_.clear(); + vertexMemoryWeights_.clear(); + hyperedgeWeights_.clear(); + incidentHyperedgesToVertex_.clear(); + verticesInHyperedge_.clear(); } -template -void Hypergraph::reset(index_type num_vertices_, index_type num_hyperedges_) { - clear(); +template +void Hypergraph::Reset(IndexType numVertices, IndexType numHyperedges) { + Clear(); - Num_vertices = num_vertices_; - Num_hyperedges = num_hyperedges_; + numVertices_ = numVertices; + numHyperedges_ = numHyperedges; - vertex_work_weights.resize(num_vertices_, 1); - vertex_memory_weights.resize(num_vertices_, 1); - hyperedge_weights.resize(num_hyperedges_, 1); - incident_hyperedges_to_vertex.resize(num_vertices_); - vertices_in_hyperedge.resize(num_hyperedges_); + vertexWorkWeights_.resize(numVertices, 1); + vertexMemoryWeights_.resize(numVertices, 1); + hyperedgeWeights_.resize(numHyperedges, 1); + incidentHyperedgesToVertex_.resize(numVertices); + verticesInHyperedge_.resize(numHyperedges); } } // namespace osp diff --git a/include/osp/partitioning/model/hypergraph_utility.hpp b/include/osp/partitioning/model/hypergraph_utility.hpp index a641519f..636d033d 100644 --- a/include/osp/partitioning/model/hypergraph_utility.hpp +++ b/include/osp/partitioning/model/hypergraph_utility.hpp @@ -35,128 +35,128 @@ namespace osp { // summing up weights -template -typename hypergraph_t::vertex_work_weight_type compute_total_vertex_work_weight(const hypergraph_t &hgraph) { - using index_type = typename hypergraph_t::vertex_idx; - using workw_type = typename hypergraph_t::vertex_work_weight_type; - - workw_type total = 0; - for (index_type node = 0; node < hgraph.num_vertices(); ++node) { - total += hgraph.get_vertex_work_weight(node); +template +typename HypergraphT::VertexWorkWeightType ComputeTotalVertexWorkWeight(const HypergraphT &hgraph) { + using IndexType = typename HypergraphT::VertexIdx; + using WorkwType = typename HypergraphT::VertexWorkWeightType; + + WorkwType total = 0; + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { + total += hgraph.GetVertexWorkWeight(node); } return total; } -template -typename hypergraph_t::vertex_mem_weight_type compute_total_vertex_memory_weight(const hypergraph_t &hgraph) { - using index_type = typename hypergraph_t::vertex_idx; - using memw_type = typename hypergraph_t::vertex_mem_weight_type; +template +typename HypergraphT::VertexMemWeightType ComputeTotalVertexMemoryWeight(const HypergraphT &hgraph) { + using IndexType = typename HypergraphT::VertexIdx; + using MemwType = typename HypergraphT::VertexMemWeightType; - memw_type total = 0; - for (index_type node = 0; node < hgraph.num_vertices(); ++node) { - total += hgraph.get_vertex_memory_weight(node); + MemwType total = 0; + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { + total += hgraph.GetVertexMemoryWeight(node); } return total; } // get induced subhypergraph -template -hypergraph_t create_induced_hypergraph(const hypergraph_t &hgraph, const std::vector &include) { - if (include.size() != hgraph.num_vertices()) { +template +HypergraphT CreateInducedHypergraph(const HypergraphT &hgraph, const std::vector &include) { + if (include.size() != hgraph.NumVertices()) { throw std::invalid_argument("Invalid Argument while extracting induced hypergraph: input bool array has incorrect size."); } - using index_type = typename hypergraph_t::vertex_idx; + using IndexType = typename HypergraphT::VertexIdx; - std::vector new_index(hgraph.num_vertices()); - unsigned current_index = 0; - for (index_type node = 0; node < hgraph.num_vertices(); ++node) { + std::vector newIndex(hgraph.NumVertices()); + unsigned currentIndex = 0; + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { if (include[node]) { - new_index[node] = current_index++; + newIndex[node] = currentIndex++; } } - hypergraph_t new_hgraph(current_index, 0); - for (index_type node = 0; node < hgraph.num_vertices(); ++node) { + HypergraphT newHgraph(currentIndex, 0); + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { if (include[node]) { - new_hgraph.set_vertex_work_weight(new_index[node], hgraph.get_vertex_work_weight(node)); - new_hgraph.set_vertex_memory_weight(new_index[node], hgraph.get_vertex_memory_weight(node)); + newHgraph.SetVertexWorkWeight(newIndex[node], hgraph.GetVertexWorkWeight(node)); + newHgraph.SetVertexMemoryWeight(newIndex[node], hgraph.GetVertexMemoryWeight(node)); } } - for (index_type hyperedge = 0; hyperedge < hgraph.num_hyperedges(); ++hyperedge) { - unsigned nr_induced_pins = 0; - std::vector induced_hyperedge; - for (index_type node : hgraph.get_vertices_in_hyperedge(hyperedge)) { + for (IndexType hyperedge = 0; hyperedge < hgraph.NumHyperedges(); ++hyperedge) { + unsigned nrInducedPins = 0; + std::vector inducedHyperedge; + for (IndexType node : hgraph.GetVerticesInHyperedge(hyperedge)) { if (include[node]) { - induced_hyperedge.push_back(new_index[node]); - ++nr_induced_pins; + inducedHyperedge.push_back(newIndex[node]); + ++nrInducedPins; } } - if (nr_induced_pins >= 2) { - new_hgraph.add_hyperedge(induced_hyperedge, hgraph.get_hyperedge_weight(hyperedge)); + if (nrInducedPins >= 2) { + newHgraph.AddHyperedge(inducedHyperedge, hgraph.GetHyperedgeWeight(hyperedge)); } } - return new_hgraph; + return newHgraph; } // conversion -template -hypergraph_t convert_from_cdag_as_dag(const Graph_t &dag) { - using index_type = typename hypergraph_t::vertex_idx; - using workw_type = typename hypergraph_t::vertex_work_weight_type; - using memw_type = typename hypergraph_t::vertex_mem_weight_type; - using commw_type = typename hypergraph_t::vertex_comm_weight_type; - - static_assert(std::is_same_v, index_type>, "Index type mismatch, cannot convert DAG to hypergraph."); - static_assert(std::is_same_v, workw_type>, "Work weight type mismatch, cannot convert DAG to hypergraph."); - static_assert(std::is_same_v, memw_type>, "Memory weight type mismatch, cannot convert DAG to hypergraph."); - static_assert(!has_edge_weights_v || std::is_same_v, commw_type>, +template +HypergraphT ConvertFromCdagAsDag(const GraphT &dag) { + using IndexType = typename HypergraphT::VertexIdx; + using WorkwType = typename HypergraphT::VertexWorkWeightType; + using MemwType = typename HypergraphT::VertexMemWeightType; + using CommwType = typename HypergraphT::VertexCommWeightType; + + static_assert(std::is_same_v, IndexType>, "Index type mismatch, cannot convert DAG to hypergraph."); + static_assert(std::is_same_v, WorkwType>, "Work weight type mismatch, cannot convert DAG to hypergraph."); + static_assert(std::is_same_v, MemwType>, "Memory weight type mismatch, cannot convert DAG to hypergraph."); + static_assert(!hasEdgeWeightsV || std::is_same_v, CommwType>, "Communication weight type mismatch, cannot convert DAG to hypergraph."); - hypergraph_t hgraph(dag.num_vertices(), 0); - for (const auto &node : dag.vertices()) { - hgraph.set_vertex_work_weight(node, dag.vertex_work_weight(node)); - hgraph.set_vertex_memory_weight(node, dag.vertex_mem_weight(node)); - for (const auto &child : dag.children(node)) { - if constexpr (has_edge_weights_v) { - hgraph.add_hyperedge({node, child}, dag.edge_comm_weight(edge_desc(node, child, dag).first)); + HypergraphT hgraph(dag.NumVertices(), 0); + for (const auto &node : dag.Vertices()) { + hgraph.SetVertexWorkWeight(node, dag.VertexWorkWeight(node)); + hgraph.SetVertexMemoryWeight(node, dag.VertexMemWeight(node)); + for (const auto &child : dag.Children(node)) { + if constexpr (hasEdgeWeightsV) { + hgraph.AddHyperedge({node, child}, dag.EdgeCommWeight(EdgeDesc(node, child, dag).first)); } else { - hgraph.add_hyperedge({node, child}); + hgraph.AddHyperedge({node, child}); } } } return hgraph; } -template -hypergraph_t convert_from_cdag_as_hyperdag(const Graph_t &dag) { - using index_type = typename hypergraph_t::vertex_idx; - using workw_type = typename hypergraph_t::vertex_work_weight_type; - using memw_type = typename hypergraph_t::vertex_mem_weight_type; - using commw_type = typename hypergraph_t::vertex_comm_weight_type; - - static_assert(std::is_same_v, index_type>, "Index type mismatch, cannot convert DAG to hypergraph."); - static_assert(std::is_same_v, workw_type>, "Work weight type mismatch, cannot convert DAG to hypergraph."); - static_assert(std::is_same_v, memw_type>, "Memory weight type mismatch, cannot convert DAG to hypergraph."); - static_assert(std::is_same_v, commw_type>, +template +HypergraphT ConvertFromCdagAsHyperdag(const GraphT &dag) { + using IndexType = typename HypergraphT::VertexIdx; + using WorkwType = typename HypergraphT::VertexWorkWeightType; + using MemwType = typename HypergraphT::VertexMemWeightType; + using CommwType = typename HypergraphT::VertexCommWeightType; + + static_assert(std::is_same_v, IndexType>, "Index type mismatch, cannot convert DAG to hypergraph."); + static_assert(std::is_same_v, WorkwType>, "Work weight type mismatch, cannot convert DAG to hypergraph."); + static_assert(std::is_same_v, MemwType>, "Memory weight type mismatch, cannot convert DAG to hypergraph."); + static_assert(std::is_same_v, CommwType>, "Communication weight type mismatch, cannot convert DAG to hypergraph."); - hypergraph_t hgraph(dag.num_vertices(), 0); - for (const auto &node : dag.vertices()) { - hgraph.set_vertex_work_weight(node, dag.vertex_work_weight(node)); - hgraph.set_vertex_memory_weight(node, dag.vertex_mem_weight(node)); - if (dag.out_degree(node) == 0) { + HypergraphT hgraph(dag.NumVertices(), 0); + for (const auto &node : dag.Vertices()) { + hgraph.SetVertexWorkWeight(node, dag.VertexWorkWeight(node)); + hgraph.SetVertexMemoryWeight(node, dag.VertexMemWeight(node)); + if (dag.OutDegree(node) == 0) { continue; } - std::vector new_hyperedge({node}); - for (const auto &child : dag.children(node)) { - new_hyperedge.push_back(child); + std::vector newHyperedge({node}); + for (const auto &child : dag.Children(node)) { + newHyperedge.push_back(child); } - hgraph.add_hyperedge(new_hyperedge, dag.vertex_comm_weight(node)); + hgraph.AddHyperedge(newHyperedge, dag.VertexCommWeight(node)); } return hgraph; } diff --git a/include/osp/partitioning/model/partitioning.hpp b/include/osp/partitioning/model/partitioning.hpp index ae4ee277..2fd2bd96 100644 --- a/include/osp/partitioning/model/partitioning.hpp +++ b/include/osp/partitioning/model/partitioning.hpp @@ -26,66 +26,66 @@ namespace osp { // Represents a partitioning where each vertex of a hypergraph is assigned to a specifc partition -template +template class Partitioning { private: - using index_type = typename hypergraph_t::vertex_idx; - using workw_type = typename hypergraph_t::vertex_work_weight_type; - using memw_type = typename hypergraph_t::vertex_mem_weight_type; - using commw_type = typename hypergraph_t::vertex_comm_weight_type; + using IndexType = typename HypergraphT::VertexIdx; + using WorkwType = typename HypergraphT::VertexWorkWeightType; + using MemwType = typename HypergraphT::VertexMemWeightType; + using CommwType = typename HypergraphT::VertexCommWeightType; - const PartitioningProblem *instance; + const PartitioningProblem *instance_; - std::vector node_to_partition_assignment; + std::vector nodeToPartitionAssignment_; public: Partitioning() = delete; - Partitioning(const PartitioningProblem &inst) - : instance(&inst), node_to_partition_assignment(std::vector(inst.getHypergraph().num_vertices(), 0)) {} + Partitioning(const PartitioningProblem &inst) + : instance_(&inst), nodeToPartitionAssignment_(std::vector(inst.GetHypergraph().NumVertices(), 0)) {} - Partitioning(const PartitioningProblem &inst, const std::vector &partition_assignment_) - : instance(&inst), node_to_partition_assignment(partition_assignment_) {} + Partitioning(const PartitioningProblem &inst, const std::vector &partitionAssignment) + : instance_(&inst), nodeToPartitionAssignment_(partitionAssignment) {} - Partitioning(const Partitioning &partitioning_) = default; - Partitioning(Partitioning &&partitioning_) = default; + Partitioning(const Partitioning &partitioning) = default; + Partitioning(Partitioning &&partitioning) = default; - Partitioning &operator=(const Partitioning &partitioning_) = default; + Partitioning &operator=(const Partitioning &partitioning) = default; virtual ~Partitioning() = default; // getters and setters - inline const PartitioningProblem &getInstance() const { return *instance; } + inline const PartitioningProblem &GetInstance() const { return *instance_; } - inline unsigned assignedPartition(index_type node) const { return node_to_partition_assignment[node]; } + inline unsigned AssignedPartition(IndexType node) const { return nodeToPartitionAssignment_[node]; } - inline const std::vector &assignedPartitions() const { return node_to_partition_assignment; } + inline const std::vector &AssignedPartitions() const { return nodeToPartitionAssignment_; } - inline std::vector &assignedPartitions() { return node_to_partition_assignment; } + inline std::vector &AssignedPartitions() { return nodeToPartitionAssignment_; } - inline void setAssignedPartition(index_type node, unsigned part) { node_to_partition_assignment.at(node) = part; } + inline void SetAssignedPartition(IndexType node, unsigned part) { nodeToPartitionAssignment_.at(node) = part; } - void setAssignedPartitions(const std::vector &vec) { - if (vec.size() == static_cast(instance->getHypergraph().num_vertices())) { - node_to_partition_assignment = vec; + void SetAssignedPartitions(const std::vector &vec) { + if (vec.size() == static_cast(instance_->GetHypergraph().NumVertices())) { + nodeToPartitionAssignment_ = vec; } else { throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } - void setAssignedPartitions(std::vector &&vec) { - if (vec.size() == static_cast(instance->getHypergraph().num_vertices())) { - node_to_partition_assignment = vec; + void SetAssignedPartitions(std::vector &&vec) { + if (vec.size() == static_cast(instance_->GetHypergraph().NumVertices())) { + nodeToPartitionAssignment_ = vec; } else { throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } - std::vector getPartitionContent(unsigned part) const { - std::vector content; - for (index_type node = 0; node < node_to_partition_assignment.size(); ++node) { - if (node_to_partition_assignment[node] == part) { + std::vector GetPartitionContent(unsigned part) const { + std::vector content; + for (IndexType node = 0; node < nodeToPartitionAssignment_.size(); ++node) { + if (nodeToPartitionAssignment_[node] == part) { content.push_back(node); } } @@ -93,86 +93,86 @@ class Partitioning { return content; } - void resetPartition() { - node_to_partition_assignment.clear(); - node_to_partition_assignment.resize(instance->getHypergraph().num_vertices(), 0); + void ResetPartition() { + nodeToPartitionAssignment_.clear(); + nodeToPartitionAssignment_.resize(instance_->GetHypergraph().NumVertices(), 0); } // costs and validity - std::vector computeLambdaForHyperedges() const; - commw_type computeConnectivityCost() const; - commw_type computeCutNetCost() const; + std::vector ComputeLambdaForHyperedges() const; + CommwType ComputeConnectivityCost() const; + CommwType ComputeCutNetCost() const; - bool satisfiesBalanceConstraint() const; + bool SatisfiesBalanceConstraint() const; }; -template -std::vector Partitioning::computeLambdaForHyperedges() const { - std::vector lambda(instance->getHypergraph().num_hyperedges(), 0); - for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { - const std::vector &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx); +template +std::vector Partitioning::ComputeLambdaForHyperedges() const { + std::vector lambda(instance_->GetHypergraph().NumHyperedges(), 0); + for (IndexType edgeIdx = 0; edgeIdx < instance_->GetHypergraph().NumHyperedges(); ++edgeIdx) { + const std::vector &hyperedge = instance_->GetHypergraph().GetVerticesInHyperedge(edgeIdx); if (hyperedge.empty()) { continue; } - std::vector intersects_part(instance->getNumberOfPartitions(), false); - for (const index_type &node : hyperedge) { - intersects_part[node_to_partition_assignment[node]] = true; + std::vector intersectsPart(instance_->GetNumberOfPartitions(), false); + for (const IndexType &node : hyperedge) { + intersectsPart[nodeToPartitionAssignment_[node]] = true; } - for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { - if (intersects_part[part]) { - ++lambda[edge_idx]; + for (unsigned part = 0; part < instance_->GetNumberOfPartitions(); ++part) { + if (intersectsPart[part]) { + ++lambda[edgeIdx]; } } } return lambda; } -template -typename hypergraph_t::vertex_comm_weight_type Partitioning::computeConnectivityCost() const { - commw_type total = 0; - std::vector lambda = computeLambdaForHyperedges(); +template +typename HypergraphT::VertexCommWeightType Partitioning::ComputeConnectivityCost() const { + CommwType total = 0; + std::vector lambda = ComputeLambdaForHyperedges(); - for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { - if (lambda[edge_idx] >= 1) { - total += (static_cast(lambda[edge_idx]) - 1) * instance->getHypergraph().get_hyperedge_weight(edge_idx); + for (IndexType edgeIdx = 0; edgeIdx < instance_->GetHypergraph().NumHyperedges(); ++edgeIdx) { + if (lambda[edgeIdx] >= 1) { + total += (static_cast(lambda[edgeIdx]) - 1) * instance_->GetHypergraph().GetHyperedgeWeight(edgeIdx); } } return total; } -template -typename hypergraph_t::vertex_comm_weight_type Partitioning::computeCutNetCost() const { - commw_type total = 0; - std::vector lambda = computeLambdaForHyperedges(); - for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { - if (lambda[edge_idx] > 1) { - total += instance->getHypergraph().get_hyperedge_weight(edge_idx); +template +typename HypergraphT::VertexCommWeightType Partitioning::ComputeCutNetCost() const { + CommwType total = 0; + std::vector lambda = ComputeLambdaForHyperedges(); + for (IndexType edgeIdx = 0; edgeIdx < instance_->GetHypergraph().NumHyperedges(); ++edgeIdx) { + if (lambda[edgeIdx] > 1) { + total += instance_->GetHypergraph().GetHyperedgeWeight(edgeIdx); } } return total; } -template -bool Partitioning::satisfiesBalanceConstraint() const { - std::vector work_weight(instance->getNumberOfPartitions(), 0); - std::vector memory_weight(instance->getNumberOfPartitions(), 0); - for (index_type node = 0; node < node_to_partition_assignment.size(); ++node) { - if (node_to_partition_assignment[node] > instance->getNumberOfPartitions()) { +template +bool Partitioning::SatisfiesBalanceConstraint() const { + std::vector workWeight(instance_->GetNumberOfPartitions(), 0); + std::vector memoryWeight(instance_->GetNumberOfPartitions(), 0); + for (IndexType node = 0; node < nodeToPartitionAssignment_.size(); ++node) { + if (nodeToPartitionAssignment_[node] > instance_->GetNumberOfPartitions()) { throw std::invalid_argument("Invalid Argument while checking balance constraint: partition ID out of range."); } else { - work_weight[node_to_partition_assignment[node]] += instance->getHypergraph().get_vertex_work_weight(node); - memory_weight[node_to_partition_assignment[node]] += instance->getHypergraph().get_vertex_memory_weight(node); + workWeight[nodeToPartitionAssignment_[node]] += instance_->GetHypergraph().GetVertexWorkWeight(node); + memoryWeight[nodeToPartitionAssignment_[node]] += instance_->GetHypergraph().GetVertexMemoryWeight(node); } } - for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { - if (work_weight[part] > instance->getMaxWorkWeightPerPartition()) { + for (unsigned part = 0; part < instance_->GetNumberOfPartitions(); ++part) { + if (workWeight[part] > instance_->GetMaxWorkWeightPerPartition()) { return false; } - if (memory_weight[part] > instance->getMaxMemoryWeightPerPartition()) { + if (memoryWeight[part] > instance_->GetMaxMemoryWeightPerPartition()) { return false; } } diff --git a/include/osp/partitioning/model/partitioning_problem.hpp b/include/osp/partitioning/model/partitioning_problem.hpp index 90ae57c6..5e3b90cd 100644 --- a/include/osp/partitioning/model/partitioning_problem.hpp +++ b/include/osp/partitioning/model/partitioning_problem.hpp @@ -26,90 +26,90 @@ limitations under the License. namespace osp { // represents a hypergraph partitioning problem into a fixed number of parts with a balance constraint -template +template class PartitioningProblem { private: - using this_t = PartitioningProblem; + using ThisT = PartitioningProblem; - using index_type = typename hypergraph_t::vertex_idx; - using workw_type = typename hypergraph_t::vertex_work_weight_type; - using memw_type = typename hypergraph_t::vertex_mem_weight_type; - using commw_type = typename hypergraph_t::vertex_comm_weight_type; + using IndexType = typename HypergraphT::VertexIdx; + using WorkwType = typename HypergraphT::VertexWorkWeightType; + using MemwType = typename HypergraphT::VertexMemWeightType; + using CommwType = typename HypergraphT::VertexCommWeightType; - hypergraph_t hgraph; + HypergraphT hgraph_; - unsigned nr_of_partitions; - workw_type max_work_weight_per_partition; - memw_type max_memory_weight_per_partition; + unsigned nrOfPartitions_; + WorkwType maxWorkWeightPerPartition_; + MemwType maxMemoryWeightPerPartition_; - bool allows_replication = false; + bool allowsReplication_ = false; public: PartitioningProblem() = default; - PartitioningProblem(const hypergraph_t &hgraph_, - unsigned nr_parts_ = 2, - workw_type max_work_weight_ = std::numeric_limits::max(), - memw_type max_memory_weight_ = std::numeric_limits::max()) - : hgraph(hgraph_), - nr_of_partitions(nr_parts_), - max_work_weight_per_partition(max_work_weight_), - max_memory_weight_per_partition(max_memory_weight_) {} - - PartitioningProblem(const hypergraph_t &&hgraph_, - unsigned nr_parts_ = 2, - workw_type max_work_weight_ = std::numeric_limits::max(), - memw_type max_memory_weight_ = std::numeric_limits::max()) - : hgraph(hgraph_), - nr_of_partitions(nr_parts_), - max_work_weight_per_partition(max_work_weight_), - max_memory_weight_per_partition(max_memory_weight_) {} - - PartitioningProblem(const this_t &other) = default; - PartitioningProblem(this_t &&other) = default; - - PartitioningProblem &operator=(const this_t &other) = default; - PartitioningProblem &operator=(this_t &&other) = default; + PartitioningProblem(const HypergraphT &hgraph, + unsigned nrParts = 2, + WorkwType maxWorkWeight = std::numeric_limits::max(), + MemwType maxMemoryWeight = std::numeric_limits::max()) + : hgraph_(hgraph), + nrOfPartitions_(nrParts), + maxWorkWeightPerPartition_(maxWorkWeight), + maxMemoryWeightPerPartition_(maxMemoryWeight) {} + + PartitioningProblem(const HypergraphT &&hgraph, + unsigned nrParts = 2, + WorkwType maxWorkWeight = std::numeric_limits::max(), + MemwType maxMemoryWeight = std::numeric_limits::max()) + : hgraph_(hgraph), + nrOfPartitions_(nrParts), + maxWorkWeightPerPartition_(maxWorkWeight), + maxMemoryWeightPerPartition_(maxMemoryWeight) {} + + PartitioningProblem(const ThisT &other) = default; + PartitioningProblem(ThisT &&other) = default; + + PartitioningProblem &operator=(const ThisT &other) = default; + PartitioningProblem &operator=(ThisT &&other) = default; // getters - inline const hypergraph_t &getHypergraph() const { return hgraph; } + inline const HypergraphT &GetHypergraph() const { return hgraph_; } - inline hypergraph_t &getHypergraph() { return hgraph; } + inline HypergraphT &GetHypergraph() { return hgraph_; } - inline unsigned getNumberOfPartitions() const { return nr_of_partitions; } + inline unsigned GetNumberOfPartitions() const { return nrOfPartitions_; } - inline workw_type getMaxWorkWeightPerPartition() const { return max_work_weight_per_partition; } + inline WorkwType GetMaxWorkWeightPerPartition() const { return maxWorkWeightPerPartition_; } - inline memw_type getMaxMemoryWeightPerPartition() const { return max_memory_weight_per_partition; } + inline MemwType GetMaxMemoryWeightPerPartition() const { return maxMemoryWeightPerPartition_; } - inline bool getAllowsReplication() const { return allows_replication; } + inline bool GetAllowsReplication() const { return allowsReplication_; } // setters - inline void setHypergraph(const hypergraph_t &hgraph_) { hgraph = hgraph_; } + inline void SetHypergraph(const HypergraphT &hgraph) { hgraph_ = hgraph; } - inline void setNumberOfPartitions(unsigned nr_parts_) { nr_of_partitions = nr_parts_; } + inline void SetNumberOfPartitions(unsigned nrParts) { nrOfPartitions_ = nrParts; } - inline void setAllowsReplication(bool allowed_) { allows_replication = allowed_; } + inline void SetAllowsReplication(bool allowed) { allowsReplication_ = allowed; } - inline void setMaxWorkWeightExplicitly(workw_type max_weight_) { max_work_weight_per_partition = max_weight_; } + inline void SetMaxWorkWeightExplicitly(WorkwType maxWeight) { maxWorkWeightPerPartition_ = maxWeight; } - void setMaxWorkWeightViaImbalanceFactor(double imbalance) { + void SetMaxWorkWeightViaImbalanceFactor(double imbalance) { if (imbalance < 0) { throw std::invalid_argument("Invalid Argument while setting imbalance parameter: parameter is negative."); } else { - max_work_weight_per_partition = static_cast( - ceil(compute_total_vertex_work_weight(hgraph) / static_cast(nr_of_partitions) * (1.0 + imbalance))); + maxWorkWeightPerPartition_ = static_cast( + ceil(ComputeTotalVertexWorkWeight(hgraph_) / static_cast(nrOfPartitions_) * (1.0 + imbalance))); } } - inline void setMaxMemoryWeightExplicitly(memw_type max_weight_) { max_memory_weight_per_partition = max_weight_; } + inline void SetMaxMemoryWeightExplicitly(MemwType maxWeight) { maxMemoryWeightPerPartition_ = maxWeight; } - void setMaxMemoryWeightViaImbalanceFactor(double imbalance) { + void SetMaxMemoryWeightViaImbalanceFactor(double imbalance) { if (imbalance < 0) { throw std::invalid_argument("Invalid Argument while setting imbalance parameter: parameter is negative."); } else { - max_memory_weight_per_partition = static_cast( - ceil(compute_total_vertex_memory_weight(hgraph) / static_cast(nr_of_partitions) * (1.0 + imbalance))); + maxMemoryWeightPerPartition_ = static_cast( + ceil(ComputeTotalVertexMemoryWeight(hgraph_) / static_cast(nrOfPartitions_) * (1.0 + imbalance))); } } }; diff --git a/include/osp/partitioning/model/partitioning_replication.hpp b/include/osp/partitioning/model/partitioning_replication.hpp index 698db721..c07c27ed 100644 --- a/include/osp/partitioning/model/partitioning_replication.hpp +++ b/include/osp/partitioning/model/partitioning_replication.hpp @@ -26,70 +26,70 @@ namespace osp { // Represents a partitioning where each vertex of a hypergraph can be assinged to one or more partitions -template +template class PartitioningWithReplication { private: - using index_type = typename hypergraph_t::vertex_idx; - using workw_type = typename hypergraph_t::vertex_work_weight_type; - using memw_type = typename hypergraph_t::vertex_mem_weight_type; - using commw_type = typename hypergraph_t::vertex_comm_weight_type; + using IndexType = typename HypergraphT::VertexIdx; + using WorkwType = typename HypergraphT::VertexWorkWeightType; + using MemwType = typename HypergraphT::VertexMemWeightType; + using CommwType = typename HypergraphT::VertexCommWeightType; - const PartitioningProblem *instance; + const PartitioningProblem *instance_; - std::vector> node_to_partitions_assignment; + std::vector> nodeToPartitionsAssignment_; public: PartitioningWithReplication() = delete; - PartitioningWithReplication(const PartitioningProblem &inst) - : instance(&inst), - node_to_partitions_assignment(std::vector>(inst.getHypergraph().num_vertices(), {0})) {} + PartitioningWithReplication(const PartitioningProblem &inst) + : instance_(&inst), + nodeToPartitionsAssignment_(std::vector>(inst.GetHypergraph().NumVertices(), {0})) {} - PartitioningWithReplication(const PartitioningProblem &inst, - const std::vector> &partition_assignment_) - : instance(&inst), node_to_partitions_assignment(partition_assignment_) {} + PartitioningWithReplication(const PartitioningProblem &inst, + const std::vector> &partitionAssignment) + : instance_(&inst), nodeToPartitionsAssignment_(partitionAssignment) {} - PartitioningWithReplication(const PartitioningWithReplication &partitioning_) = default; - PartitioningWithReplication(PartitioningWithReplication &&partitioning_) = default; + PartitioningWithReplication(const PartitioningWithReplication &partitioning) = default; + PartitioningWithReplication(PartitioningWithReplication &&partitioning) = default; - PartitioningWithReplication &operator=(const PartitioningWithReplication &partitioning_) = default; + PartitioningWithReplication &operator=(const PartitioningWithReplication &partitioning) = default; virtual ~PartitioningWithReplication() = default; // getters and setters - inline const PartitioningProblem &getInstance() const { return *instance; } + inline const PartitioningProblem &GetInstance() const { return *instance_; } - inline std::vector assignedPartitions(index_type node) const { return node_to_partitions_assignment[node]; } + inline std::vector AssignedPartitions(IndexType node) const { return nodeToPartitionsAssignment_[node]; } - inline const std::vector> &assignedPartitions() const { return node_to_partitions_assignment; } + inline const std::vector> &AssignedPartitions() const { return nodeToPartitionsAssignment_; } - inline std::vector> &assignedPartitions() { return node_to_partitions_assignment; } + inline std::vector> &AssignedPartitions() { return nodeToPartitionsAssignment_; } - inline void setAssignedPartitions(index_type node, const std::vector &parts) { - node_to_partitions_assignment.at(node) = parts; + inline void SetAssignedPartitions(IndexType node, const std::vector &parts) { + nodeToPartitionsAssignment_.at(node) = parts; } - void setAssignedPartitionVectors(const std::vector> &vec) { - if (vec.size() == static_cast(instance->getHypergraph().num_vertices())) { - node_to_partitions_assignment = vec; + void SetAssignedPartitionVectors(const std::vector> &vec) { + if (vec.size() == static_cast(instance_->GetHypergraph().NumVertices())) { + nodeToPartitionsAssignment_ = vec; } else { throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } - void setAssignedPartitionVectors(std::vector> &&vec) { - if (vec.size() == static_cast(instance->getHypergraph().num_vertices())) { - node_to_partitions_assignment = vec; + void SetAssignedPartitionVectors(std::vector> &&vec) { + if (vec.size() == static_cast(instance_->GetHypergraph().NumVertices())) { + nodeToPartitionsAssignment_ = vec; } else { throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } - std::vector> getPartitionContents() const { - std::vector> content(instance->getNumberOfPartitions()); - for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node) { - for (unsigned part : node_to_partitions_assignment[node]) { + std::vector> GetPartitionContents() const { + std::vector> content(instance_->GetNumberOfPartitions()); + for (IndexType node = 0; node < nodeToPartitionsAssignment_.size(); ++node) { + for (unsigned part : nodeToPartitionsAssignment_[node]) { content[part].push_back(node); } } @@ -97,131 +97,131 @@ class PartitioningWithReplication { return content; } - void resetPartition() { - node_to_partitions_assignment.clear(); - node_to_partitions_assignment.resize(instance->getHypergraph().num_vertices(), {0}); + void ResetPartition() { + nodeToPartitionsAssignment_.clear(); + nodeToPartitionsAssignment_.resize(instance_->GetHypergraph().NumVertices(), {0}); } // costs and validity - commw_type computeConnectivityCost() const; - commw_type computeCutNetCost() const; + CommwType ComputeConnectivityCost() const; + CommwType ComputeCutNetCost() const; - bool satisfiesBalanceConstraint() const; + bool SatisfiesBalanceConstraint() const; }; -template -typename hypergraph_t::vertex_comm_weight_type PartitioningWithReplication::computeConnectivityCost() const { +template +typename HypergraphT::VertexCommWeightType PartitioningWithReplication::ComputeConnectivityCost() const { // naive implementation. in the worst-case this is exponential in the number of parts - if (instance->getNumberOfPartitions() > 16) { + if (instance_->GetNumberOfPartitions() > 16) { throw std::invalid_argument("Computing connectivity cost is not supported for more than 16 partitions."); } - commw_type total = 0; - std::vector part_used(instance->getNumberOfPartitions(), false); - for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { - const std::vector &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx); + CommwType total = 0; + std::vector partUsed(instance_->GetNumberOfPartitions(), false); + for (IndexType edgeIdx = 0; edgeIdx < instance_->GetHypergraph().NumHyperedges(); ++edgeIdx) { + const std::vector &hyperedge = instance_->GetHypergraph().GetVerticesInHyperedge(edgeIdx); if (hyperedge.empty()) { continue; } unsigned long mask = 0UL; - std::vector nr_nodes_covered_by_part(instance->getNumberOfPartitions(), 0); - for (const index_type &node : hyperedge) { - if (node_to_partitions_assignment[node].size() == 1) { - mask = mask | (1UL << node_to_partitions_assignment[node].front()); + std::vector nrNodesCoveredByPart(instance_->GetNumberOfPartitions(), 0); + for (const IndexType &node : hyperedge) { + if (nodeToPartitionsAssignment_[node].size() == 1) { + mask = mask | (1UL << nodeToPartitionsAssignment_[node].front()); } } - unsigned min_parts_to_cover = instance->getNumberOfPartitions(); - unsigned long mask_limit = 1UL << instance->getNumberOfPartitions(); - for (unsigned long subset_mask = 1UL; subset_mask < mask_limit; ++subset_mask) { - if ((subset_mask & mask) != mask) { + unsigned minPartsToCover = instance_->GetNumberOfPartitions(); + unsigned long maskLimit = 1UL << instance_->GetNumberOfPartitions(); + for (unsigned long subsetMask = 1UL; subsetMask < maskLimit; ++subsetMask) { + if ((subsetMask & mask) != mask) { continue; } - unsigned nr_parts_used = 0; - for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { - part_used[part] = (((1UL << part) & subset_mask) > 0); - nr_parts_used += static_cast(part_used[part]); + unsigned nrPartsUsed = 0; + for (unsigned part = 0; part < instance_->GetNumberOfPartitions(); ++part) { + partUsed[part] = (((1UL << part) & subsetMask) > 0); + nrPartsUsed += static_cast(partUsed[part]); } - bool all_nodes_covered = true; - for (const index_type &node : hyperedge) { - bool node_covered = false; - for (unsigned part : node_to_partitions_assignment[node]) { - if (part_used[part]) { - node_covered = true; + bool allNodesCovered = true; + for (const IndexType &node : hyperedge) { + bool nodeCovered = false; + for (unsigned part : nodeToPartitionsAssignment_[node]) { + if (partUsed[part]) { + nodeCovered = true; break; } } - if (!node_covered) { - all_nodes_covered = false; + if (!nodeCovered) { + allNodesCovered = false; break; } } - if (all_nodes_covered) { - min_parts_to_cover = std::min(min_parts_to_cover, nr_parts_used); + if (allNodesCovered) { + minPartsToCover = std::min(minPartsToCover, nrPartsUsed); } } - total += static_cast(min_parts_to_cover - 1) * instance->getHypergraph().get_hyperedge_weight(edge_idx); + total += static_cast(minPartsToCover - 1) * instance_->GetHypergraph().GetHyperedgeWeight(edgeIdx); } return total; } -template -typename hypergraph_t::vertex_comm_weight_type PartitioningWithReplication::computeCutNetCost() const { - commw_type total = 0; - for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { - const std::vector &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx); +template +typename HypergraphT::VertexCommWeightType PartitioningWithReplication::ComputeCutNetCost() const { + CommwType total = 0; + for (IndexType edgeIdx = 0; edgeIdx < instance_->GetHypergraph().NumHyperedges(); ++edgeIdx) { + const std::vector &hyperedge = instance_->GetHypergraph().GetVerticesInHyperedge(edgeIdx); if (hyperedge.empty()) { continue; } - std::vector nr_nodes_covered_by_part(instance->getNumberOfPartitions(), 0); - for (const index_type &node : hyperedge) { - for (unsigned part : node_to_partitions_assignment[node]) { - ++nr_nodes_covered_by_part[part]; + std::vector nrNodesCoveredByPart(instance_->GetNumberOfPartitions(), 0); + for (const IndexType &node : hyperedge) { + for (unsigned part : nodeToPartitionsAssignment_[node]) { + ++nrNodesCoveredByPart[part]; } } - bool covers_all = false; - for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { - if (nr_nodes_covered_by_part[part] == hyperedge.size()) { - covers_all = true; + bool coversAll = false; + for (unsigned part = 0; part < instance_->GetNumberOfPartitions(); ++part) { + if (nrNodesCoveredByPart[part] == hyperedge.size()) { + coversAll = true; } } - if (!covers_all) { - total += instance->getHypergraph().get_hyperedge_weight(edge_idx); + if (!coversAll) { + total += instance_->GetHypergraph().GetHyperedgeWeight(edgeIdx); } } return total; } -template -bool PartitioningWithReplication::satisfiesBalanceConstraint() const { - std::vector work_weight(instance->getNumberOfPartitions(), 0); - std::vector memory_weight(instance->getNumberOfPartitions(), 0); - for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node) { - for (unsigned part : node_to_partitions_assignment[node]) { - if (part > instance->getNumberOfPartitions()) { +template +bool PartitioningWithReplication::SatisfiesBalanceConstraint() const { + std::vector workWeight(instance_->GetNumberOfPartitions(), 0); + std::vector memoryWeight(instance_->GetNumberOfPartitions(), 0); + for (IndexType node = 0; node < nodeToPartitionsAssignment_.size(); ++node) { + for (unsigned part : nodeToPartitionsAssignment_[node]) { + if (part > instance_->GetNumberOfPartitions()) { throw std::invalid_argument("Invalid Argument while checking balance constraint: partition ID out of range."); } else { - work_weight[part] += instance->getHypergraph().get_vertex_work_weight(node); - memory_weight[part] += instance->getHypergraph().get_vertex_memory_weight(node); + workWeight[part] += instance_->GetHypergraph().GetVertexWorkWeight(node); + memoryWeight[part] += instance_->GetHypergraph().GetVertexMemoryWeight(node); } } } - for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { - if (work_weight[part] > instance->getMaxWorkWeightPerPartition()) { + for (unsigned part = 0; part < instance_->GetNumberOfPartitions(); ++part) { + if (workWeight[part] > instance_->GetMaxWorkWeightPerPartition()) { return false; } - if (memory_weight[part] > instance->getMaxMemoryWeightPerPartition()) { + if (memoryWeight[part] > instance_->GetMaxMemoryWeightPerPartition()) { return false; } } diff --git a/include/osp/partitioning/partitioners/generic_FM.hpp b/include/osp/partitioning/partitioners/generic_FM.hpp index c17c90f4..b1219265 100644 --- a/include/osp/partitioning/partitioners/generic_FM.hpp +++ b/include/osp/partitioning/partitioners/generic_FM.hpp @@ -25,78 +25,77 @@ limitations under the License. namespace osp { -template +template class GenericFM { - using index_type = typename hypergraph_t::vertex_idx; - using workw_type = typename hypergraph_t::vertex_work_weight_type; - using memw_type = typename hypergraph_t::vertex_mem_weight_type; - using commw_type = typename hypergraph_t::vertex_comm_weight_type; + using IndexType = typename HypergraphT::VertexIdx; + using WorkwType = typename HypergraphT::VertexWorkWeightType; + using MemwType = typename HypergraphT::VertexMemWeightType; + using CommwType = typename HypergraphT::VertexCommWeightType; protected: - unsigned max_number_of_passes = 10; - index_type max_nodes_in_part = 0; + unsigned maxNumberOfPasses_ = 10; + IndexType maxNodesInPart_ = 0; // auxiliary for RecursiveFM - std::vector getMaxNodesOnLevel(index_type nr_nodes, unsigned nr_parts) const; + std::vector GetMaxNodesOnLevel(IndexType nrNodes, unsigned nrParts) const; public: - void ImprovePartitioning(Partitioning &partition); + void ImprovePartitioning(Partitioning &partition); - void RecursiveFM(Partitioning &partition); + void RecursiveFM(Partitioning &partition); - inline unsigned getMaxNumberOfPasses() const { return max_number_of_passes; } + inline unsigned GetMaxNumberOfPasses() const { return maxNumberOfPasses_; } - inline void setMaxNumberOfPasses(unsigned passes_) { max_number_of_passes = passes_; } + inline void SetMaxNumberOfPasses(unsigned passes) { maxNumberOfPasses_ = passes; } - inline index_type getMaxNodesInPart() const { return max_nodes_in_part; } + inline IndexType GetMaxNodesInPart() const { return maxNodesInPart_; } - inline void setMaxNodesInPart(index_type max_nodes_) { max_nodes_in_part = max_nodes_; } + inline void SetMaxNodesInPart(IndexType maxNodes) { maxNodesInPart_ = maxNodes; } }; -template -void GenericFM::ImprovePartitioning(Partitioning &partition) { +template +void GenericFM::ImprovePartitioning(Partitioning &partition) { // Note: this algorithm disregards hyperedge weights, in order to keep the size of the gain bucket array bounded! - if (partition.getInstance().getNumberOfPartitions() != 2) { + if (partition.GetInstance().GetNumberOfPartitions() != 2) { std::cout << "Error: FM can only be used for 2 partitions." << std::endl; return; } - if (!partition.satisfiesBalanceConstraint()) { + if (!partition.SatisfiesBalanceConstraint()) { std::cout << "Error: initial partition to FM does not satisfy balance constraint." << std::endl; return; } - const Hypergraph &Hgraph = partition.getInstance().getHypergraph(); + const Hypergraph &hgraph = partition.GetInstance().GetHypergraph(); - index_type max_degree = 0; - for (index_type node = 0; node < Hgraph.num_vertices(); ++node) { - max_degree = std::max(max_degree, static_cast(Hgraph.get_incident_hyperedges(node).size())); + IndexType maxDegree = 0; + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { + maxDegree = std::max(maxDegree, static_cast(hgraph.GetIncidentHyperedges(node).size())); } - if (max_nodes_in_part == 0) { // if not initialized - max_nodes_in_part - = static_cast(ceil(static_cast(Hgraph.num_vertices()) - * static_cast(partition.getInstance().getMaxWorkWeightPerPartition()) - / static_cast(compute_total_vertex_work_weight(Hgraph)))); + if (maxNodesInPart_ == 0) { // if not initialized + maxNodesInPart_ = static_cast(ceil(static_cast(hgraph.NumVertices()) + * static_cast(partition.GetInstance().GetMaxWorkWeightPerPartition()) + / static_cast(ComputeTotalVertexWorkWeight(hgraph)))); } - for (unsigned pass_idx = 0; pass_idx < max_number_of_passes; ++pass_idx) { - std::vector node_to_new_part = partition.assignedPartitions(); - std::vector locked(Hgraph.num_vertices(), false); - std::vector gain(Hgraph.num_vertices(), 0); - std::vector > nr_nodes_in_hyperedge_on_side(Hgraph.num_hyperedges(), std::vector(2, 0)); + for (unsigned passIdx = 0; passIdx < maxNumberOfPasses_; ++passIdx) { + std::vector nodeToNewPart = partition.AssignedPartitions(); + std::vector locked(hgraph.NumVertices(), false); + std::vector gain(hgraph.NumVertices(), 0); + std::vector > nrNodesInHyperedgeOnSide(hgraph.NumHyperedges(), std::vector(2, 0)); int cost = 0; - index_type left_side = 0; - for (index_type node = 0; node < Hgraph.num_vertices(); ++node) { - if (partition.assignedPartition(node) == 0) { - ++left_side; + IndexType leftSide = 0; + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { + if (partition.AssignedPartition(node) == 0) { + ++leftSide; } } - if (left_side > max_nodes_in_part || Hgraph.num_vertices() - left_side > max_nodes_in_part) { - if (pass_idx == 0) { + if (leftSide > maxNodesInPart_ || hgraph.NumVertices() - leftSide > maxNodesInPart_) { + if (passIdx == 0) { std::cout << "Error: initial partitioning of FM is not balanced." << std::endl; return; } else { @@ -106,27 +105,27 @@ void GenericFM::ImprovePartitioning(Partitioning &pa } // Initialize gain values - for (index_type hyperedge = 0; hyperedge < Hgraph.num_hyperedges(); ++hyperedge) { - for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { - ++nr_nodes_in_hyperedge_on_side[hyperedge][partition.assignedPartition(node)]; + for (IndexType hyperedge = 0; hyperedge < hgraph.NumHyperedges(); ++hyperedge) { + for (IndexType node : hgraph.GetVerticesInHyperedge(hyperedge)) { + ++nrNodesInHyperedgeOnSide[hyperedge][partition.AssignedPartition(node)]; } - if (Hgraph.get_vertices_in_hyperedge(hyperedge).size() < 2) { + if (hgraph.GetVerticesInHyperedge(hyperedge).size() < 2) { continue; } for (unsigned part = 0; part < 2; ++part) { - if (nr_nodes_in_hyperedge_on_side[hyperedge][part] == 1) { - for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { - if (partition.assignedPartition(node) == part) { + if (nrNodesInHyperedgeOnSide[hyperedge][part] == 1) { + for (IndexType node : hgraph.GetVerticesInHyperedge(hyperedge)) { + if (partition.AssignedPartition(node) == part) { ++gain[node]; } } } - if (nr_nodes_in_hyperedge_on_side[hyperedge][part] == 0) { - for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { - if (partition.assignedPartition(node) != part) { + if (nrNodesInHyperedgeOnSide[hyperedge][part] == 0) { + for (IndexType node : hgraph.GetVerticesInHyperedge(hyperedge)) { + if (partition.AssignedPartition(node) != part) { --gain[node]; } } @@ -135,244 +134,242 @@ void GenericFM::ImprovePartitioning(Partitioning &pa } // build gain bucket array - std::vector max_gain(2, -static_cast(max_degree) - 1); - std::vector > > gain_bucket_array( - 2, std::vector >(2 * max_degree + 1)); - for (index_type node = 0; node < Hgraph.num_vertices(); ++node) { - const unsigned &part = partition.assignedPartition(node); - gain_bucket_array[part][static_cast(gain[node] + static_cast(max_degree))].push_back(node); - max_gain[part] = std::max(max_gain[part], gain[node]); + std::vector maxGain(2, -static_cast(maxDegree) - 1); + std::vector > > gainBucketArray( + 2, std::vector >(2 * maxDegree + 1)); + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { + const unsigned &part = partition.AssignedPartition(node); + gainBucketArray[part][static_cast(gain[node] + static_cast(maxDegree))].push_back(node); + maxGain[part] = std::max(maxGain[part], gain[node]); } - index_type best_index = 0; - int best_cost = 0; - std::vector moved_nodes; + IndexType bestIndex = 0; + int bestCost = 0; + std::vector movedNodes; // the pass itself: make moves - while (moved_nodes.size() < Hgraph.num_vertices()) { + while (movedNodes.size() < hgraph.NumVertices()) { // select move - index_type to_move = std::numeric_limits::max(); - unsigned chosen_part = std::numeric_limits::max(); - - unsigned gain_index = static_cast(std::max(max_gain[0], max_gain[1]) + static_cast(max_degree)); - while (gain_index < std::numeric_limits::max()) { - bool can_choose_left = (Hgraph.num_vertices() - left_side < max_nodes_in_part) - && !gain_bucket_array[0][gain_index].empty(); - bool can_choose_right = (left_side < max_nodes_in_part) && !gain_bucket_array[1][gain_index].empty(); - - if (can_choose_left && can_choose_right) { - chosen_part = (left_side >= Hgraph.num_vertices() / 2) ? 1 : 0; - } else if (can_choose_left) { - chosen_part = 0; - } else if (can_choose_right) { - chosen_part = 1; + IndexType toMove = std::numeric_limits::max(); + unsigned chosenPart = std::numeric_limits::max(); + + unsigned gainIndex = static_cast(std::max(maxGain[0], maxGain[1]) + static_cast(maxDegree)); + while (gainIndex < std::numeric_limits::max()) { + bool canChooseLeft = (hgraph.NumVertices() - leftSide < maxNodesInPart_) && !gainBucketArray[0][gainIndex].empty(); + bool canChooseRight = (leftSide < maxNodesInPart_) && !gainBucketArray[1][gainIndex].empty(); + + if (canChooseLeft && canChooseRight) { + chosenPart = (leftSide >= hgraph.NumVertices() / 2) ? 1 : 0; + } else if (canChooseLeft) { + chosenPart = 0; + } else if (canChooseRight) { + chosenPart = 1; } - if (chosen_part < 2) { - to_move = gain_bucket_array[chosen_part][gain_index].back(); - gain_bucket_array[chosen_part][gain_index].pop_back(); + if (chosenPart < 2) { + toMove = gainBucketArray[chosenPart][gainIndex].back(); + gainBucketArray[chosenPart][gainIndex].pop_back(); break; } - --gain_index; + --gainIndex; } - if (to_move == std::numeric_limits::max()) { + if (toMove == std::numeric_limits::max()) { break; } // make move - moved_nodes.push_back(to_move); - cost -= gain[to_move]; - if (cost < best_cost) { - best_cost = cost; - best_index = static_cast(moved_nodes.size()) + 1; + movedNodes.push_back(toMove); + cost -= gain[toMove]; + if (cost < bestCost) { + bestCost = cost; + bestIndex = static_cast(movedNodes.size()) + 1; } - locked[to_move] = true; - node_to_new_part[to_move] = 1 - node_to_new_part[to_move]; + locked[toMove] = true; + nodeToNewPart[toMove] = 1 - nodeToNewPart[toMove]; - if (chosen_part == 0) { - --left_side; + if (chosenPart == 0) { + --leftSide; } else { - ++left_side; + ++leftSide; } - unsigned other_part = 1 - chosen_part; + unsigned otherPart = 1 - chosenPart; // update gain values - for (index_type hyperedge : Hgraph.get_incident_hyperedges(to_move)) { - if (nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 1) { - for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { + for (IndexType hyperedge : hgraph.GetIncidentHyperedges(toMove)) { + if (nrNodesInHyperedgeOnSide[hyperedge][chosenPart] == 1) { + for (IndexType node : hgraph.GetVerticesInHyperedge(hyperedge)) { if (locked[node]) { continue; } - std::vector &vec - = gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))]; + std::vector &vec + = gainBucketArray[otherPart][static_cast(gain[node] + static_cast(maxDegree))]; vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end()); --gain[node]; - gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))].push_back( - node); + gainBucketArray[otherPart][static_cast(gain[node] + static_cast(maxDegree))].push_back(node); } - } else if (nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 2) { - for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { - if (node_to_new_part[node] == chosen_part && !locked[node]) { - std::vector &vec - = gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))]; + } else if (nrNodesInHyperedgeOnSide[hyperedge][chosenPart] == 2) { + for (IndexType node : hgraph.GetVerticesInHyperedge(hyperedge)) { + if (nodeToNewPart[node] == chosenPart && !locked[node]) { + std::vector &vec + = gainBucketArray[chosenPart][static_cast(gain[node] + static_cast(maxDegree))]; vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end()); ++gain[node]; - gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))] - .push_back(node); - max_gain[chosen_part] = std::max(max_gain[chosen_part], gain[node]); + gainBucketArray[chosenPart][static_cast(gain[node] + static_cast(maxDegree))].push_back( + node); + maxGain[chosenPart] = std::max(maxGain[chosenPart], gain[node]); break; } } } - if (nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 1) { - for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { - if (node_to_new_part[node] == other_part && !locked[node]) { - std::vector &vec - = gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))]; + if (nrNodesInHyperedgeOnSide[hyperedge][otherPart] == 1) { + for (IndexType node : hgraph.GetVerticesInHyperedge(hyperedge)) { + if (nodeToNewPart[node] == otherPart && !locked[node]) { + std::vector &vec + = gainBucketArray[otherPart][static_cast(gain[node] + static_cast(maxDegree))]; vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end()); --gain[node]; - gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))].push_back( + gainBucketArray[otherPart][static_cast(gain[node] + static_cast(maxDegree))].push_back( node); break; } } - } else if (nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 0) { - for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { + } else if (nrNodesInHyperedgeOnSide[hyperedge][otherPart] == 0) { + for (IndexType node : hgraph.GetVerticesInHyperedge(hyperedge)) { if (locked[node]) { continue; } - std::vector &vec - = gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))]; + std::vector &vec + = gainBucketArray[chosenPart][static_cast(gain[node] + static_cast(maxDegree))]; vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end()); ++gain[node]; - gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))].push_back( + gainBucketArray[chosenPart][static_cast(gain[node] + static_cast(maxDegree))].push_back( node); - max_gain[chosen_part] = std::max(max_gain[chosen_part], gain[node]); + maxGain[chosenPart] = std::max(maxGain[chosenPart], gain[node]); } } - --nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part]; - ++nr_nodes_in_hyperedge_on_side[hyperedge][other_part]; + --nrNodesInHyperedgeOnSide[hyperedge][chosenPart]; + ++nrNodesInHyperedgeOnSide[hyperedge][otherPart]; } } // apply best configuration seen - if (best_index == 0) { + if (bestIndex == 0) { break; } - for (index_type node_idx = 0; node_idx < best_index && node_idx < static_cast(moved_nodes.size()); ++node_idx) { - partition.setAssignedPartition(moved_nodes[node_idx], 1U - partition.assignedPartition(moved_nodes[node_idx])); + for (IndexType nodeIdx = 0; nodeIdx < bestIndex && nodeIdx < static_cast(movedNodes.size()); ++nodeIdx) { + partition.SetAssignedPartition(movedNodes[nodeIdx], 1U - partition.AssignedPartition(movedNodes[nodeIdx])); } } } -template -void GenericFM::RecursiveFM(Partitioning &partition) { - const unsigned &nr_parts = partition.getInstance().getNumberOfPartitions(); - const index_type &nr_nodes = partition.getInstance().getHypergraph().num_vertices(); +template +void GenericFM::RecursiveFM(Partitioning &partition) { + const unsigned &nrParts = partition.GetInstance().GetNumberOfPartitions(); + const IndexType &nrNodes = partition.GetInstance().GetHypergraph().NumVertices(); - using Hgraph = Hypergraph; + using Hgraph = Hypergraph; // Note: this is just a simple recursive heuristic for the case when the partitions are a small power of 2 - if (nr_parts != 4 && nr_parts != 8 && nr_parts != 16 && nr_parts != 32) { + if (nrParts != 4 && nrParts != 8 && nrParts != 16 && nrParts != 32) { std::cout << "Error: Recursive FM can only be used for 4, 8, 16 or 32 partitions currently." << std::endl; return; } - for (index_type node = 0; node < nr_nodes; ++node) { - partition.setAssignedPartition(node, static_cast(node % 2)); + for (IndexType node = 0; node < nrNodes; ++node) { + partition.SetAssignedPartition(node, static_cast(node % 2)); } - if (max_nodes_in_part == 0) { // if not initialized - max_nodes_in_part = static_cast( - ceil(static_cast(nr_nodes) * static_cast(partition.getInstance().getMaxWorkWeightPerPartition()) - / static_cast(compute_total_vertex_work_weight(partition.getInstance().getHypergraph())))); + if (maxNodesInPart_ == 0) { // if not initialized + maxNodesInPart_ = static_cast( + ceil(static_cast(nrNodes) * static_cast(partition.GetInstance().GetMaxWorkWeightPerPartition()) + / static_cast(ComputeTotalVertexWorkWeight(partition.GetInstance().GetHypergraph())))); } - const std::vector max_nodes_on_level = getMaxNodesOnLevel(nr_nodes, nr_parts); + const std::vector maxNodesOnLevel = GetMaxNodesOnLevel(nrNodes, nrParts); unsigned parts = 1; unsigned level = 0; - std::vector sub_hgraphs({partition.getInstance().getHypergraph()}); - unsigned start_index = 0; - - std::map > node_to_new_hgraph_and_id; - std::map, index_type> hgraph_and_id_to_old_idx; - for (index_type node = 0; node < nr_nodes; ++node) { - node_to_new_hgraph_and_id[node] = std::make_pair(0, node); - hgraph_and_id_to_old_idx[std::make_pair(0, node)] = node; + std::vector subHgraphs({partition.GetInstance().GetHypergraph()}); + unsigned startIndex = 0; + + std::map > nodeToNewHgraphAndId; + std::map, IndexType> hgraphAndIdToOldIdx; + for (IndexType node = 0; node < nrNodes; ++node) { + nodeToNewHgraphAndId[node] = std::make_pair(0, node); + hgraphAndIdToOldIdx[std::make_pair(0, node)] = node; } - while (parts < nr_parts) { - unsigned end_idx = static_cast(sub_hgraphs.size()); - for (unsigned sub_hgraph_index = start_index; sub_hgraph_index < end_idx; ++sub_hgraph_index) { - const Hgraph &hgraph = sub_hgraphs[sub_hgraph_index]; + while (parts < nrParts) { + unsigned endIdx = static_cast(subHgraphs.size()); + for (unsigned subHgraphIndex = startIndex; subHgraphIndex < endIdx; ++subHgraphIndex) { + const Hgraph &hgraph = subHgraphs[subHgraphIndex]; PartitioningProblem instance(hgraph, 2); - Partitioning sub_partition(instance); - for (index_type node = 0; node < hgraph.num_vertices(); ++node) { - sub_partition.setAssignedPartition(node, node % 2); + Partitioning subPartition(instance); + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { + subPartition.SetAssignedPartition(node, node % 2); } - GenericFM sub_fm; - sub_fm.setMaxNodesInPart(max_nodes_on_level[level]); - // std::cout<<"Hgraph of size "< current_idx(2, 0); - std::vector > part_indicator(2, std::vector(hgraph.num_vertices(), false)); - for (index_type node = 0; node < hgraph.num_vertices(); ++node) { - const unsigned part_id = sub_partition.assignedPartition(node); - const index_type original_id = hgraph_and_id_to_old_idx[std::make_pair(sub_hgraph_index, node)]; - node_to_new_hgraph_and_id[original_id] = std::make_pair(sub_hgraphs.size() + part_id, current_idx[part_id]); - hgraph_and_id_to_old_idx[std::make_pair(sub_hgraphs.size() + part_id, current_idx[part_id])] = original_id; - ++current_idx[part_id]; - part_indicator[part_id][node] = true; + GenericFM subFm; + subFm.SetMaxNodesInPart(maxNodesOnLevel[level]); + // std::cout<<"Hgraph of size "< currentIdx(2, 0); + std::vector > partIndicator(2, std::vector(hgraph.NumVertices(), false)); + for (IndexType node = 0; node < hgraph.NumVertices(); ++node) { + const unsigned partId = subPartition.AssignedPartition(node); + const IndexType originalId = hgraphAndIdToOldIdx[std::make_pair(subHgraphIndex, node)]; + nodeToNewHgraphAndId[originalId] = std::make_pair(subHgraphs.size() + partId, currentIdx[partId]); + hgraphAndIdToOldIdx[std::make_pair(subHgraphs.size() + partId, currentIdx[partId])] = originalId; + ++currentIdx[partId]; + partIndicator[partId][node] = true; } for (unsigned part = 0; part < 2; ++part) { - sub_hgraphs.push_back(create_induced_hypergraph(sub_hgraphs[sub_hgraph_index], part_indicator[part])); + subHgraphs.push_back(CreateInducedHypergraph(subHgraphs[subHgraphIndex], partIndicator[part])); } - ++start_index; + ++startIndex; } parts *= 2; ++level; } - for (index_type node = 0; node < nr_nodes; ++node) { - partition.setAssignedPartition( - node, node_to_new_hgraph_and_id[node].first - (static_cast(sub_hgraphs.size()) - nr_parts)); + for (IndexType node = 0; node < nrNodes; ++node) { + partition.SetAssignedPartition(node, + nodeToNewHgraphAndId[node].first - (static_cast(subHgraphs.size()) - nrParts)); } } -template -std::vector GenericFM::getMaxNodesOnLevel( - typename hypergraph_t::vertex_idx nr_nodes, unsigned nr_parts) const { - std::vector max_nodes_on_level; - std::vector limit_per_level({static_cast(ceil(static_cast(nr_nodes) / 2.0))}); - for (unsigned parts = nr_parts / 4; parts > 0; parts /= 2) { - limit_per_level.push_back(static_cast(ceil(static_cast(limit_per_level.back()) / 2.0))); +template +std::vector GenericFM::GetMaxNodesOnLevel(typename HypergraphT::VertexIdx nrNodes, + unsigned nrParts) const { + std::vector maxNodesOnLevel; + std::vector limitPerLevel({static_cast(ceil(static_cast(nrNodes) / 2.0))}); + for (unsigned parts = nrParts / 4; parts > 0; parts /= 2) { + limitPerLevel.push_back(static_cast(ceil(static_cast(limitPerLevel.back()) / 2.0))); } - max_nodes_on_level.push_back(max_nodes_in_part); - for (unsigned parts = 2; parts < nr_parts; parts *= 2) { - index_type next_limit = max_nodes_on_level.back() * 2; - if (next_limit > limit_per_level.back()) { - --next_limit; + maxNodesOnLevel.push_back(maxNodesInPart_); + for (unsigned parts = 2; parts < nrParts; parts *= 2) { + IndexType nextLimit = maxNodesOnLevel.back() * 2; + if (nextLimit > limitPerLevel.back()) { + --nextLimit; } - limit_per_level.pop_back(); - max_nodes_on_level.push_back(next_limit); + limitPerLevel.pop_back(); + maxNodesOnLevel.push_back(nextLimit); } - std::reverse(max_nodes_on_level.begin(), max_nodes_on_level.end()); - return max_nodes_on_level; + std::reverse(maxNodesOnLevel.begin(), maxNodesOnLevel.end()); + return maxNodesOnLevel; } } // namespace osp diff --git a/include/osp/partitioning/partitioners/partitioning_ILP.hpp b/include/osp/partitioning/partitioners/partitioning_ILP.hpp index 5623d581..a1ea92e7 100644 --- a/include/osp/partitioning/partitioners/partitioning_ILP.hpp +++ b/include/osp/partitioning/partitioners/partitioning_ILP.hpp @@ -27,70 +27,70 @@ limitations under the License. namespace osp { -template -class HypergraphPartitioningILP : public HypergraphPartitioningILPBase { +template +class HypergraphPartitioningILP : public HypergraphPartitioningILPBase { protected: - std::vector readCoptAssignment(const PartitioningProblem &instance, Model &model); + std::vector ReadCoptAssignment(const PartitioningProblem &instance, Model &model); - void setupExtraVariablesConstraints(const PartitioningProblem &instance, Model &model); + void SetupExtraVariablesConstraints(const PartitioningProblem &instance, Model &model); - void setInitialSolution(const Partitioning &partition, Model &model); + void SetInitialSolution(const Partitioning &partition, Model &model); public: virtual ~HypergraphPartitioningILP() override = default; - RETURN_STATUS computePartitioning(Partitioning &result); + ReturnStatus ComputePartitioning(Partitioning &result); - virtual std::string getAlgorithmName() const override { return "HypergraphPartitioningILP"; } + virtual std::string GetAlgorithmName() const override { return "HypergraphPartitioningILP"; } }; -template -RETURN_STATUS HypergraphPartitioningILP::computePartitioning(Partitioning &result) { +template +ReturnStatus HypergraphPartitioningILP::ComputePartitioning(Partitioning &result) { Envr env; Model model = env.CreateModel("HypergraphPart"); - this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model); - setupExtraVariablesConstraints(result.getInstance(), model); + this->SetupFundamentalVariablesConstraintsObjective(result.GetInstance(), model); + SetupExtraVariablesConstraints(result.GetInstance(), model); - if (this->use_initial_solution) { - setInitialSolution(result, model); + if (this->useInitialSolution_) { + SetInitialSolution(result, model); } - this->solveILP(model); + this->SolveIlp(model); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - result.setAssignedPartitions(readCoptAssignment(result.getInstance(), model)); - return RETURN_STATUS::OSP_SUCCESS; + result.SetAssignedPartitions(ReadCoptAssignment(result.GetInstance(), model)); + return ReturnStatus::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } else { if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - result.setAssignedPartitions(readCoptAssignment(result.getInstance(), model)); - return RETURN_STATUS::OSP_SUCCESS; + result.SetAssignedPartitions(ReadCoptAssignment(result.GetInstance(), model)); + return ReturnStatus::OSP_SUCCESS; } else { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } } } -template -void HypergraphPartitioningILP::setupExtraVariablesConstraints(const PartitioningProblem &instance, - Model &model) { - using index_type = typename hypergraph_t::vertex_idx; +template +void HypergraphPartitioningILP::SetupExtraVariablesConstraints(const PartitioningProblem &instance, + Model &model) { + using IndexType = typename HypergraphT::VertexIdx; - const index_type numberOfParts = instance.getNumberOfPartitions(); - const index_type numberOfVertices = instance.getHypergraph().num_vertices(); + const IndexType numberOfParts = instance.GetNumberOfPartitions(); + const IndexType numberOfVertices = instance.GetHypergraph().NumVertices(); // Constraints // each node assigned to exactly one partition - for (index_type node = 0; node < numberOfVertices; node++) { + for (IndexType node = 0; node < numberOfVertices; node++) { Expr expr; for (unsigned part = 0; part < numberOfParts; part++) { - expr += this->node_in_partition[node][static_cast(part)]; + expr += this->nodeInPartition_[node][static_cast(part)]; } model.AddConstr(expr == 1); @@ -98,48 +98,48 @@ void HypergraphPartitioningILP::setupExtraVariablesConstraints(con // hyperedge indicators match node variables for (unsigned part = 0; part < numberOfParts; part++) { - for (index_type node = 0; node < numberOfVertices; node++) { - for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { - model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part)] - >= this->node_in_partition[node][static_cast(part)]); + for (IndexType node = 0; node < numberOfVertices; node++) { + for (const IndexType &hyperedge : instance.GetHypergraph().GetIncidentHyperedges(node)) { + model.AddConstr(this->hyperedgeUsesPartition_[hyperedge][static_cast(part)] + >= this->nodeInPartition_[node][static_cast(part)]); } } } } // convert generic one-to-many assingment (of base class function) to one-to-one -template -std::vector HypergraphPartitioningILP::readCoptAssignment(const PartitioningProblem &instance, - Model &model) { - using index_type = typename hypergraph_t::vertex_idx; +template +std::vector HypergraphPartitioningILP::ReadCoptAssignment(const PartitioningProblem &instance, + Model &model) { + using IndexType = typename HypergraphT::VertexIdx; - std::vector node_to_partition(instance.getHypergraph().num_vertices(), std::numeric_limits::max()); - std::vector> assignmentsGenericForm = this->readAllCoptAssignments(instance, model); + std::vector nodeToPartition(instance.GetHypergraph().NumVertices(), std::numeric_limits::max()); + std::vector> assignmentsGenericForm = this->ReadAllCoptAssignments(instance, model); - for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) { - node_to_partition[node] = assignmentsGenericForm[node].front(); + for (IndexType node = 0; node < instance.GetHypergraph().NumVertices(); node++) { + nodeToPartition[node] = assignmentsGenericForm[node].front(); } - return node_to_partition; + return nodeToPartition; } -template -void HypergraphPartitioningILP::setInitialSolution(const Partitioning &partition, Model &model) { - using index_type = typename hypergraph_t::vertex_idx; +template +void HypergraphPartitioningILP::SetInitialSolution(const Partitioning &partition, Model &model) { + using IndexType = typename HypergraphT::VertexIdx; - const std::vector &assignment = partition.assignedPartitions(); - const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions(); - if (assignment.size() != partition.getInstance().getHypergraph().num_vertices()) { + const std::vector &assignment = partition.AssignedPartitions(); + const unsigned &numPartitions = partition.GetInstance().GetNumberOfPartitions(); + if (assignment.size() != partition.GetInstance().GetHypergraph().NumVertices()) { return; } - for (index_type node = 0; node < assignment.size(); ++node) { + for (IndexType node = 0; node < assignment.size(); ++node) { if (assignment[node] >= numPartitions) { continue; } for (unsigned part = 0; part < numPartitions; ++part) { - model.SetMipStart(this->node_in_partition[node][static_cast(part)], static_cast(assignment[node] == part)); + model.SetMipStart(this->nodeInPartition_[node][static_cast(part)], static_cast(assignment[node] == part)); } } model.LoadMipStart(); diff --git a/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp b/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp index 23e12d5b..afc25af4 100644 --- a/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp +++ b/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp @@ -26,38 +26,38 @@ limitations under the License. namespace osp { -template +template class HypergraphPartitioningILPBase { protected: - std::vector node_in_partition; - std::vector hyperedge_uses_partition; + std::vector nodeInPartition_; + std::vector hyperedgeUsesPartition_; - unsigned time_limit_seconds = 3600; - bool use_initial_solution = false; + unsigned timeLimitSeconds_ = 3600; + bool useInitialSolution_ = false; - std::vector > readAllCoptAssignments(const PartitioningProblem &instance, Model &model); + std::vector > ReadAllCoptAssignments(const PartitioningProblem &instance, Model &model); - void setupFundamentalVariablesConstraintsObjective(const PartitioningProblem &instance, Model &model); + void SetupFundamentalVariablesConstraintsObjective(const PartitioningProblem &instance, Model &model); - void solveILP(Model &model); + void SolveIlp(Model &model); public: - virtual std::string getAlgorithmName() const = 0; + virtual std::string GetAlgorithmName() const = 0; - inline unsigned getTimeLimitSeconds() const { return time_limit_seconds; } + inline unsigned GetTimeLimitSeconds() const { return timeLimitSeconds_; } - inline void setTimeLimitSeconds(unsigned limit_) { time_limit_seconds = limit_; } + inline void SetTimeLimitSeconds(unsigned limit) { timeLimitSeconds_ = limit; } - inline void setUseInitialSolution(bool use_) { use_initial_solution = use_; } + inline void SetUseInitialSolution(bool use) { useInitialSolution_ = use; } virtual ~HypergraphPartitioningILPBase() = default; }; -template -void HypergraphPartitioningILPBase::solveILP(Model &model) { +template +void HypergraphPartitioningILPBase::SolveIlp(Model &model) { model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0); - model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds); + model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds_); model.SetIntParam(COPT_INTPARAM_THREADS, 128); model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); @@ -73,108 +73,108 @@ void HypergraphPartitioningILPBase::solveILP(Model &model) { model.Solve(); } -template -void HypergraphPartitioningILPBase::setupFundamentalVariablesConstraintsObjective( - const PartitioningProblem &instance, Model &model) { - using index_type = typename hypergraph_t::vertex_idx; - using workw_type = typename hypergraph_t::vertex_work_weight_type; - using memw_type = typename hypergraph_t::vertex_mem_weight_type; +template +void HypergraphPartitioningILPBase::SetupFundamentalVariablesConstraintsObjective( + const PartitioningProblem &instance, Model &model) { + using IndexType = typename HypergraphT::VertexIdx; + using WorkwType = typename HypergraphT::VertexWorkWeightType; + using MemwType = typename HypergraphT::VertexMemWeightType; - const index_type numberOfParts = instance.getNumberOfPartitions(); - const index_type numberOfVertices = instance.getHypergraph().num_vertices(); - const index_type numberOfHyperedges = instance.getHypergraph().num_hyperedges(); + const IndexType numberOfParts = instance.GetNumberOfPartitions(); + const IndexType numberOfVertices = instance.GetHypergraph().NumVertices(); + const IndexType numberOfHyperedges = instance.GetHypergraph().NumHyperedges(); // Variables - node_in_partition = std::vector(numberOfVertices); + nodeInPartition_ = std::vector(numberOfVertices); - for (index_type node = 0; node < numberOfVertices; node++) { - node_in_partition[node] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "node_in_partition"); + for (IndexType node = 0; node < numberOfVertices; node++) { + nodeInPartition_[node] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "node_in_partition"); } - hyperedge_uses_partition = std::vector(numberOfHyperedges); + hyperedgeUsesPartition_ = std::vector(numberOfHyperedges); - for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) { - hyperedge_uses_partition[hyperedge] + for (IndexType hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) { + hyperedgeUsesPartition_[hyperedge] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "hyperedge_uses_partition"); } // partition size constraints - if (instance.getMaxWorkWeightPerPartition() < std::numeric_limits::max()) { + if (instance.GetMaxWorkWeightPerPartition() < std::numeric_limits::max()) { for (unsigned part = 0; part < numberOfParts; part++) { Expr expr; - for (index_type node = 0; node < numberOfVertices; node++) { - expr += instance.getHypergraph().get_vertex_work_weight(node) * node_in_partition[node][static_cast(part)]; + for (IndexType node = 0; node < numberOfVertices; node++) { + expr += instance.GetHypergraph().GetVertexWorkWeight(node) * nodeInPartition_[node][static_cast(part)]; } - model.AddConstr(expr <= instance.getMaxWorkWeightPerPartition()); + model.AddConstr(expr <= instance.GetMaxWorkWeightPerPartition()); } } - if (instance.getMaxMemoryWeightPerPartition() < std::numeric_limits::max()) { + if (instance.GetMaxMemoryWeightPerPartition() < std::numeric_limits::max()) { for (unsigned part = 0; part < numberOfParts; part++) { Expr expr; - for (index_type node = 0; node < numberOfVertices; node++) { - expr += instance.getHypergraph().get_vertex_memory_weight(node) * node_in_partition[node][static_cast(part)]; + for (IndexType node = 0; node < numberOfVertices; node++) { + expr += instance.GetHypergraph().GetVertexMemoryWeight(node) * nodeInPartition_[node][static_cast(part)]; } - model.AddConstr(expr <= instance.getMaxMemoryWeightPerPartition()); + model.AddConstr(expr <= instance.GetMaxMemoryWeightPerPartition()); } } // set objective Expr expr; - for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) { - expr -= instance.getHypergraph().get_hyperedge_weight(hyperedge); + for (IndexType hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) { + expr -= instance.GetHypergraph().GetHyperedgeWeight(hyperedge); for (unsigned part = 0; part < numberOfParts; part++) { - expr += instance.getHypergraph().get_hyperedge_weight(hyperedge) - * hyperedge_uses_partition[hyperedge][static_cast(part)]; + expr += instance.GetHypergraph().GetHyperedgeWeight(hyperedge) + * hyperedgeUsesPartition_[hyperedge][static_cast(part)]; } } model.SetObjective(expr, COPT_MINIMIZE); } -template -std::vector > HypergraphPartitioningILPBase::readAllCoptAssignments( - const PartitioningProblem &instance, Model &model) { - using index_type = typename hypergraph_t::vertex_idx; +template +std::vector > HypergraphPartitioningILPBase::ReadAllCoptAssignments( + const PartitioningProblem &instance, Model &model) { + using IndexType = typename HypergraphT::VertexIdx; - std::vector > node_to_partitions(instance.getHypergraph().num_vertices()); + std::vector > nodeToPartitions(instance.GetHypergraph().NumVertices()); - std::set nonempty_partition_ids; - for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) { - for (unsigned part = 0; part < instance.getNumberOfPartitions(); part++) { - if (node_in_partition[node][static_cast(part)].Get(COPT_DBLINFO_VALUE) >= .99) { - node_to_partitions[node].push_back(part); - nonempty_partition_ids.insert(part); + std::set nonemptyPartitionIds; + for (IndexType node = 0; node < instance.GetHypergraph().NumVertices(); node++) { + for (unsigned part = 0; part < instance.GetNumberOfPartitions(); part++) { + if (nodeInPartition_[node][static_cast(part)].Get(COPT_DBLINFO_VALUE) >= .99) { + nodeToPartitions[node].push_back(part); + nonemptyPartitionIds.insert(part); } } } - for (std::vector &chosen_partitions : node_to_partitions) { - if (chosen_partitions.empty()) { + for (std::vector &chosenPartitions : nodeToPartitions) { + if (chosenPartitions.empty()) { std::cout << "Error: partitioning returned by ILP seems incomplete!" << std::endl; - chosen_partitions.push_back(std::numeric_limits::max()); + chosenPartitions.push_back(std::numeric_limits::max()); } } - unsigned current_index = 0; - std::map new_part_index; - for (unsigned part_index : nonempty_partition_ids) { - new_part_index[part_index] = current_index; - ++current_index; + unsigned currentIndex = 0; + std::map newPartIndex; + for (unsigned partIndex : nonemptyPartitionIds) { + newPartIndex[partIndex] = currentIndex; + ++currentIndex; } - for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) { - for (unsigned entry_idx = 0; entry_idx < node_to_partitions[node].size(); ++entry_idx) { - node_to_partitions[node][entry_idx] = new_part_index[node_to_partitions[node][entry_idx]]; + for (IndexType node = 0; node < instance.GetHypergraph().NumVertices(); node++) { + for (unsigned entryIdx = 0; entryIdx < nodeToPartitions[node].size(); ++entryIdx) { + nodeToPartitions[node][entryIdx] = newPartIndex[nodeToPartitions[node][entryIdx]]; } } std::cout << "Hypergraph partitioning ILP best solution value: " << model.GetDblAttr(COPT_DBLATTR_BESTOBJ) << ", best lower bound: " << model.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl; - return node_to_partitions; + return nodeToPartitions; } } // namespace osp diff --git a/include/osp/partitioning/partitioners/partitioning_ILP_replication.hpp b/include/osp/partitioning/partitioners/partitioning_ILP_replication.hpp index 5378ff1a..ac9d04da 100644 --- a/include/osp/partitioning/partitioners/partitioning_ILP_replication.hpp +++ b/include/osp/partitioning/partitioners/partitioning_ILP_replication.hpp @@ -26,90 +26,90 @@ limitations under the License. namespace osp { -template -class HypergraphPartitioningILPWithReplication : public HypergraphPartitioningILPBase { +template +class HypergraphPartitioningILPWithReplication : public HypergraphPartitioningILPBase { public: - enum class REPLICATION_MODEL_IN_ILP { ONLY_TWICE, GENERAL }; + enum class ReplicationModelInIlp { ONLY_TWICE, GENERAL }; protected: - void setupExtraVariablesConstraints(const PartitioningProblem &instance, Model &model); + void SetupExtraVariablesConstraints(const PartitioningProblem &instance, Model &model); - void setInitialSolution(const PartitioningWithReplication &partition, Model &model); + void SetInitialSolution(const PartitioningWithReplication &partition, Model &model); - REPLICATION_MODEL_IN_ILP replication_model = REPLICATION_MODEL_IN_ILP::ONLY_TWICE; + ReplicationModelInIlp replicationModel_ = ReplicationModelInIlp::ONLY_TWICE; public: virtual ~HypergraphPartitioningILPWithReplication() override = default; - RETURN_STATUS computePartitioning(PartitioningWithReplication &result); + ReturnStatus ComputePartitioning(PartitioningWithReplication &result); - virtual std::string getAlgorithmName() const override { return "HypergraphPartitioningILPWithReplication"; } + virtual std::string GetAlgorithmName() const override { return "HypergraphPartitioningILPWithReplication"; } - void setReplicationModel(REPLICATION_MODEL_IN_ILP replication_model_) { replication_model = replication_model_; } + void SetReplicationModel(ReplicationModelInIlp replicationModel) { replicationModel_ = replicationModel; } }; -template -RETURN_STATUS HypergraphPartitioningILPWithReplication::computePartitioning( - PartitioningWithReplication &result) { +template +ReturnStatus HypergraphPartitioningILPWithReplication::ComputePartitioning( + PartitioningWithReplication &result) { Envr env; Model model = env.CreateModel("HypergraphPartRepl"); - this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model); - setupExtraVariablesConstraints(result.getInstance(), model); + this->SetupFundamentalVariablesConstraintsObjective(result.GetInstance(), model); + SetupExtraVariablesConstraints(result.GetInstance(), model); - if (this->use_initial_solution) { - setInitialSolution(result, model); + if (this->useInitialSolution_) { + SetInitialSolution(result, model); } - this->solveILP(model); + this->SolveIlp(model); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - result.setAssignedPartitionVectors(this->readAllCoptAssignments(result.getInstance(), model)); - return RETURN_STATUS::OSP_SUCCESS; + result.SetAssignedPartitionVectors(this->ReadAllCoptAssignments(result.GetInstance(), model)); + return ReturnStatus::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } else { if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - result.setAssignedPartitionVectors(this->readAllCoptAssignments(result.getInstance(), model)); - return RETURN_STATUS::OSP_SUCCESS; + result.SetAssignedPartitionVectors(this->ReadAllCoptAssignments(result.GetInstance(), model)); + return ReturnStatus::OSP_SUCCESS; } else { - return RETURN_STATUS::ERROR; + return ReturnStatus::ERROR; } } } -template -void HypergraphPartitioningILPWithReplication::setupExtraVariablesConstraints( - const PartitioningProblem &instance, Model &model) { - using index_type = typename hypergraph_t::vertex_idx; +template +void HypergraphPartitioningILPWithReplication::SetupExtraVariablesConstraints( + const PartitioningProblem &instance, Model &model) { + using IndexType = typename HypergraphT::VertexIdx; - const index_type numberOfParts = instance.getNumberOfPartitions(); - const index_type numberOfVertices = instance.getHypergraph().num_vertices(); + const IndexType numberOfParts = instance.GetNumberOfPartitions(); + const IndexType numberOfVertices = instance.GetHypergraph().NumVertices(); - if (replication_model == REPLICATION_MODEL_IN_ILP::GENERAL) { + if (replicationModel_ == ReplicationModelInIlp::GENERAL) { // create variables for each pin+partition combination - std::map, index_type> pin_ID_map; - index_type nr_of_pins = 0; - for (index_type node = 0; node < numberOfVertices; node++) { - for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { - pin_ID_map[std::make_pair(node, hyperedge)] = nr_of_pins++; + std::map, IndexType> pinIdMap; + IndexType nrOfPins = 0; + for (IndexType node = 0; node < numberOfVertices; node++) { + for (const IndexType &hyperedge : instance.GetHypergraph().GetIncidentHyperedges(node)) { + pinIdMap[std::make_pair(node, hyperedge)] = nrOfPins++; } } - std::vector pin_covered_by_partition = std::vector(nr_of_pins); + std::vector pinCoveredByPartition = std::vector(nrOfPins); - for (index_type pin = 0; pin < nr_of_pins; pin++) { - pin_covered_by_partition[pin] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "pin_covered_by_partition"); + for (IndexType pin = 0; pin < nrOfPins; pin++) { + pinCoveredByPartition[pin] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "pin_covered_by_partition"); } // each pin covered exactly once - for (index_type pin = 0; pin < nr_of_pins; pin++) { + for (IndexType pin = 0; pin < nrOfPins; pin++) { Expr expr; for (unsigned part = 0; part < numberOfParts; part++) { - expr += pin_covered_by_partition[pin][static_cast(part)]; + expr += pinCoveredByPartition[pin][static_cast(part)]; } model.AddConstr(expr == 1); @@ -117,59 +117,57 @@ void HypergraphPartitioningILPWithReplication::setupExtraVariables // pin covering requires node assignment for (unsigned part = 0; part < numberOfParts; part++) { - for (index_type node = 0; node < numberOfVertices; node++) { - for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { - model.AddConstr( - this->node_in_partition[node][static_cast(part)] - >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast(part)]); + for (IndexType node = 0; node < numberOfVertices; node++) { + for (const IndexType &hyperedge : instance.GetHypergraph().GetIncidentHyperedges(node)) { + model.AddConstr(this->nodeInPartition_[node][static_cast(part)] + >= pinCoveredByPartition[pinIdMap[std::make_pair(node, hyperedge)]][static_cast(part)]); } } } // pin covering requires hyperedge use for (unsigned part = 0; part < numberOfParts; part++) { - for (index_type node = 0; node < numberOfVertices; node++) { - for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { - model.AddConstr( - this->hyperedge_uses_partition[hyperedge][static_cast(part)] - >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast(part)]); + for (IndexType node = 0; node < numberOfVertices; node++) { + for (const IndexType &hyperedge : instance.GetHypergraph().GetIncidentHyperedges(node)) { + model.AddConstr(this->hyperedgeUsesPartition_[hyperedge][static_cast(part)] + >= pinCoveredByPartition[pinIdMap[std::make_pair(node, hyperedge)]][static_cast(part)]); } } } - } else if (replication_model == REPLICATION_MODEL_IN_ILP::ONLY_TWICE) { + } else if (replicationModel_ == ReplicationModelInIlp::ONLY_TWICE) { // each node has one or two copies - VarArray node_replicated = model.AddVars(static_cast(numberOfVertices), COPT_BINARY, "node_replicated"); + VarArray nodeReplicated = model.AddVars(static_cast(numberOfVertices), COPT_BINARY, "node_replicated"); - for (index_type node = 0; node < numberOfVertices; node++) { + for (IndexType node = 0; node < numberOfVertices; node++) { Expr expr = -1; for (unsigned part = 0; part < numberOfParts; part++) { - expr += this->node_in_partition[node][static_cast(part)]; + expr += this->nodeInPartition_[node][static_cast(part)]; } - model.AddConstr(expr == node_replicated[static_cast(node)]); + model.AddConstr(expr == nodeReplicated[static_cast(node)]); } // hyperedge indicators if node is not replicated for (unsigned part = 0; part < numberOfParts; part++) { - for (index_type node = 0; node < numberOfVertices; node++) { - for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { - model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part)] - >= this->node_in_partition[node][static_cast(part)] - - node_replicated[static_cast(node)]); + for (IndexType node = 0; node < numberOfVertices; node++) { + for (const IndexType &hyperedge : instance.GetHypergraph().GetIncidentHyperedges(node)) { + model.AddConstr(this->hyperedgeUsesPartition_[hyperedge][static_cast(part)] + >= this->nodeInPartition_[node][static_cast(part)] + - nodeReplicated[static_cast(node)]); } } } // hyperedge indicators if node is replicated - for (index_type node = 0; node < numberOfVertices; node++) { - for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { + for (IndexType node = 0; node < numberOfVertices; node++) { + for (const IndexType &hyperedge : instance.GetHypergraph().GetIncidentHyperedges(node)) { for (unsigned part1 = 0; part1 < numberOfParts; part1++) { for (unsigned part2 = part1 + 1; part2 < numberOfParts; part2++) { - model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part1)] - + this->hyperedge_uses_partition[hyperedge][static_cast(part2)] - >= this->node_in_partition[node][static_cast(part1)] - + this->node_in_partition[node][static_cast(part2)] - 1); + model.AddConstr(this->hyperedgeUsesPartition_[hyperedge][static_cast(part1)] + + this->hyperedgeUsesPartition_[hyperedge][static_cast(part2)] + >= this->nodeInPartition_[node][static_cast(part1)] + + this->nodeInPartition_[node][static_cast(part2)] - 1); } } } @@ -177,18 +175,18 @@ void HypergraphPartitioningILPWithReplication::setupExtraVariables } } -template -void HypergraphPartitioningILPWithReplication::setInitialSolution( - const PartitioningWithReplication &partition, Model &model) { - using index_type = typename hypergraph_t::vertex_idx; +template +void HypergraphPartitioningILPWithReplication::SetInitialSolution( + const PartitioningWithReplication &partition, Model &model) { + using IndexType = typename HypergraphT::VertexIdx; - const std::vector > &assignments = partition.assignedPartitions(); - const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions(); - if (assignments.size() != partition.getInstance().getHypergraph().num_vertices()) { + const std::vector > &assignments = partition.AssignedPartitions(); + const unsigned &numPartitions = partition.GetInstance().GetNumberOfPartitions(); + if (assignments.size() != partition.GetInstance().GetHypergraph().NumVertices()) { return; } - for (index_type node = 0; node < assignments.size(); ++node) { + for (IndexType node = 0; node < assignments.size(); ++node) { std::vector assingedToPart(numPartitions, false); for (unsigned part : assignments[node]) { if (part < numPartitions) { @@ -197,7 +195,7 @@ void HypergraphPartitioningILPWithReplication::setInitialSolution( } for (unsigned part = 0; part < numPartitions; ++part) { - model.SetMipStart(this->node_in_partition[node][static_cast(part)], static_cast(assingedToPart[part])); + model.SetMipStart(this->nodeInPartition_[node][static_cast(part)], static_cast(assingedToPart[part])); } } model.LoadMipStart(); diff --git a/include/osp/pebbling/PebblingSchedule.hpp b/include/osp/pebbling/PebblingSchedule.hpp index 440d6254..474161c0 100644 --- a/include/osp/pebbling/PebblingSchedule.hpp +++ b/include/osp/pebbling/PebblingSchedule.hpp @@ -47,576 +47,574 @@ typedef std::tuple KeyTriple; * * @see BspInstance */ -template +template class PebblingSchedule { - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); + static_assert(isComputationalDagV, "PebblingSchedule can only be used with computational DAGs."); private: - using vertex_idx = vertex_idx_t; - using cost_type = v_workw_t; - using memweight_type = v_memw_t; + using VertexIdx = VertexIdxT; + using CostType = VWorkwT; + using MemWeightType = VMemwT; - static_assert(std::is_same_v, v_commw_t>, + static_assert(std::is_same_v, VCommwT>, "PebblingSchedule requires work and comm. weights to have the same type."); - const BspInstance *instance; + const BspInstance *instance_; - unsigned int number_of_supersteps; + unsigned int numberOfSupersteps_; - bool need_to_load_inputs = true; + bool needToLoadInputs_ = true; - struct compute_step { - vertex_idx node; - std::vector nodes_evicted_after; + struct ComputeStep { + VertexIdx node_; + std::vector nodesEvictedAfter_; - compute_step() {} + ComputeStep() {} - compute_step(vertex_idx node_) : node(node_) {} + ComputeStep(VertexIdx node) : node_(node) {} - compute_step(vertex_idx node_, const std::vector &evicted_) : node(node_), nodes_evicted_after(evicted_) {} + ComputeStep(VertexIdx node, const std::vector &evicted) : node_(node), nodesEvictedAfter_(evicted) {} }; // executed nodes in order in a computation phase, for processor p and superstep s - std::vector>> compute_steps_for_proc_superstep; + std::vector>> computeStepsForProcSuperstep_; // nodes evicted from cache in a given superstep's comm phase - std::vector>> nodes_evicted_in_comm; + std::vector>> nodesEvictedInComm_; // nodes sent down to processor p in superstep s - std::vector>> nodes_sent_down; + std::vector>> nodesSentDown_; // nodes sent up from processor p in superstep s - std::vector>> nodes_sent_up; + std::vector>> nodesSentUp_; // set of nodes that need to have blue pebble at end, sinks by default, and // set of nodes on each processor that begin with red pebble, nothing by default // (TODO: maybe move to problem definition classes instead?) - std::set needs_blue_at_end; - std::vector> has_red_in_beginning; + std::set needsBlueAtEnd_; + std::vector> hasRedInBeginning_; // nodes that are from a previous part of a larger DAG, handled differently in conversion - std::set external_sources; + std::set externalSources_; public: - enum CACHE_EVICTION_STRATEGY { FORESIGHT, LEAST_RECENTLY_USED, LARGEST_ID }; + enum CacheEvictionStrategy { FORESIGHT, LEAST_RECENTLY_USED, LARGEST_ID }; /** * @brief Default constructor for the PebblingSchedule class. */ - PebblingSchedule() : instance(nullptr), number_of_supersteps(0) {} + PebblingSchedule() : instance_(nullptr), numberOfSupersteps_(0) {} - PebblingSchedule(const BspInstance &inst) : instance(&inst) { - BspSchedule schedule( - inst, std::vector(inst.numberOfVertices(), 0), std::vector(inst.numberOfVertices(), 0)); + PebblingSchedule(const BspInstance &inst) : instance_(&inst) { + BspSchedule schedule( + inst, std::vector(inst.NumberOfVertices(), 0), std::vector(inst.NumberOfVertices(), 0)); ConvertFromBsp(schedule); } - PebblingSchedule(const BspInstance &inst, - const std::vector &processor_assignment_, - const std::vector &superstep_assignment_) - : instance(&inst) { - BspSchedule schedule(inst, processor_assignment_, superstep_assignment_); + PebblingSchedule(const BspInstance &inst, + const std::vector &processorAssignment, + const std::vector &superstepAssignment) + : instance_(&inst) { + BspSchedule schedule(inst, processorAssignment, superstepAssignment); ConvertFromBsp(schedule); } - PebblingSchedule(const BspInstance &inst, - const std::vector>> &compute_steps, - const std::vector>>> &nodes_evicted_after_compute, - const std::vector>> &nodes_sent_up_, - const std::vector>> &nodes_sent_down_, - const std::vector>> &nodes_evicted_in_comm_, - const std::set &needs_blue_at_end_ = std::set(), - const std::vector> &has_red_in_beginning_ = std::vector>(), - const bool need_to_load_inputs_ = false) - : instance(&inst), - number_of_supersteps(0), - need_to_load_inputs(need_to_load_inputs_), - nodes_evicted_in_comm(nodes_evicted_in_comm_), - nodes_sent_down(nodes_sent_down_), - nodes_sent_up(nodes_sent_up_), - needs_blue_at_end(needs_blue_at_end_), - has_red_in_beginning(has_red_in_beginning_) { - compute_steps_for_proc_superstep.resize(compute_steps.size(), - std::vector>(compute_steps[0].size())); - for (unsigned proc = 0; proc < compute_steps.size(); ++proc) { - number_of_supersteps = std::max(number_of_supersteps, static_cast(compute_steps[proc].size())); - for (unsigned supstep = 0; supstep < static_cast(compute_steps[proc].size()); ++supstep) { - for (unsigned step_index = 0; step_index < static_cast(compute_steps[proc][supstep].size()); - ++step_index) { - compute_steps_for_proc_superstep[proc][supstep].emplace_back( - compute_steps[proc][supstep][step_index], nodes_evicted_after_compute[proc][supstep][step_index]); + PebblingSchedule(const BspInstance &inst, + const std::vector>> &computeSteps, + const std::vector>>> &nodesEvictedAfterCompute, + const std::vector>> &nodesSentUp, + const std::vector>> &nodesSentDown, + const std::vector>> &nodesEvictedInComm, + const std::set &needsBlueAtEnd = std::set(), + const std::vector> &hasRedInBeginning = std::vector>(), + const bool needToLoadInputs = false) + : instance_(&inst), + numberOfSupersteps_(0), + needToLoadInputs_(needToLoadInputs), + nodesEvictedInComm_(nodesEvictedInComm), + nodesSentDown_(nodesSentDown), + nodesSentUp_(nodesSentUp), + needsBlueAtEnd_(needsBlueAtEnd), + hasRedInBeginning_(hasRedInBeginning) { + computeStepsForProcSuperstep_.resize(computeSteps.size(), std::vector>(computeSteps[0].size())); + for (unsigned proc = 0; proc < computeSteps.size(); ++proc) { + numberOfSupersteps_ = std::max(numberOfSupersteps_, static_cast(computeSteps[proc].size())); + for (unsigned supstep = 0; supstep < static_cast(computeSteps[proc].size()); ++supstep) { + for (unsigned stepIndex = 0; stepIndex < static_cast(computeSteps[proc][supstep].size()); ++stepIndex) { + computeStepsForProcSuperstep_[proc][supstep].emplace_back(computeSteps[proc][supstep][stepIndex], + nodesEvictedAfterCompute[proc][supstep][stepIndex]); } } } } - PebblingSchedule(const BspSchedule &schedule, CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID) - : instance(&schedule.getInstance()) { - ConvertFromBsp(schedule, evict_rule); + PebblingSchedule(const BspSchedule &schedule, CacheEvictionStrategy evictRule = LARGEST_ID) + : instance_(&schedule.GetInstance()) { + ConvertFromBsp(schedule, evictRule); } virtual ~PebblingSchedule() = default; // cost computation - cost_type computeCost() const; - cost_type computeAsynchronousCost() const; + CostType ComputeCost() const; + CostType ComputeAsynchronousCost() const; // remove unnecessary steps (e.g. from ILP solution) - void cleanSchedule(); + void CleanSchedule(); // convert from unconstrained schedule - void ConvertFromBsp(const BspSchedule &schedule, CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID); + void ConvertFromBsp(const BspSchedule &schedule, CacheEvictionStrategy evictRule = LARGEST_ID); // auxiliary for conversion - std::vector>> computeTopOrdersDFS(const BspSchedule &schedule) const; - static bool hasValidSolution(const BspInstance &instance, - const std::set &external_sources = std::set()); - void SplitSupersteps(const BspSchedule &schedule); - void SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID); + std::vector>> ComputeTopOrdersDfs(const BspSchedule &schedule) const; + static bool HasValidSolution(const BspInstance &instance, + const std::set &externalSources = std::set()); + void SplitSupersteps(const BspSchedule &schedule); + void SetMemoryMovement(CacheEvictionStrategy evictRule = LARGEST_ID); // delete current communication schedule, and switch to foresight policy instead void ResetToForesight(); // other basic operations - bool isValid() const; - static std::vector minimumMemoryRequiredPerNodeType(const BspInstance &instance, - const std::set &external_sources - = std::set()); + bool IsValid() const; + static std::vector MinimumMemoryRequiredPerNodeType(const BspInstance &instance, + const std::set &externalSources + = std::set()); // expand a MemSchedule from a coarsened DAG to the original DAG - PebblingSchedule ExpandMemSchedule(const BspInstance &original_instance, - const std::vector mapping_to_coarse) const; + PebblingSchedule ExpandMemSchedule(const BspInstance &originalInstance, + const std::vector mappingToCoarse) const; // convert to BSP (ignores vertical I/O and recomputation) - BspSchedule ConvertToBsp() const; + BspSchedule ConvertToBsp() const; /** * @brief Returns a reference to the BspInstance for the schedule. * * @return A reference to the BspInstance for the schedule. */ - const BspInstance &getInstance() const { return *instance; } + const BspInstance &GetInstance() const { return *instance_; } /** * @brief Returns the number of supersteps in the schedule. * * @return The number of supersteps in the schedule. */ - unsigned numberOfSupersteps() const { return number_of_supersteps; } + unsigned NumberOfSupersteps() const { return numberOfSupersteps_; } - void updateNumberOfSupersteps(unsigned new_number_of_supersteps); + void UpdateNumberOfSupersteps(unsigned newNumberOfSupersteps); - inline bool needsToLoadInputs() const { return need_to_load_inputs; } + inline bool NeedsToLoadInputs() const { return needToLoadInputs_; } - inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_; } + inline void SetNeedToLoadInputs(const bool loadInputs) { needToLoadInputs_ = loadInputs; } - void getDataForMultiprocessorPebbling(std::vector>> &computeSteps, - std::vector>> &sendUpSteps, - std::vector>> &sendDownSteps, - std::vector>> &nodesEvictedAfterStep) const; + void GetDataForMultiprocessorPebbling(std::vector>> &computeSteps, + std::vector>> &sendUpSteps, + std::vector>> &sendDownSteps, + std::vector>> &nodesEvictedAfterStep) const; // utility for partial ILPs - std::vector> getMemContentAtEnd() const; - void removeEvictStepsFromEnd(); + std::vector> GetMemContentAtEnd() const; + void RemoveEvictStepsFromEnd(); - void CreateFromPartialPebblings(const BspInstance &bsp_instance, - const std::vector> &pebblings, - const std::vector> &processors_to_parts, - const std::vector> &original_node_id, - const std::vector> &original_proc_id, - const std::vector>> &has_reds_in_beginning); + void CreateFromPartialPebblings(const BspInstance &bspInstance, + const std::vector> &pebblings, + const std::vector> &processorsToParts, + const std::vector> &originalNodeId, + const std::vector> &originalProcId, + const std::vector>> &hasRedsInBeginning); // auxiliary function to remove some unnecessary communications after assembling from partial pebblings - void FixForceEvicts(const std::vector> force_evict_node_proc_step); + void FixForceEvicts(const std::vector> &forceEvictNodeProcStep); // auxiliary after partial pebblings: try to merge supersteps void TryToMergeSupersteps(); - const std::vector &GetComputeStepsForProcSuperstep(unsigned proc, unsigned supstep) const { - return compute_steps_for_proc_superstep[proc][supstep]; + const std::vector &GetComputeStepsForProcSuperstep(unsigned proc, unsigned supstep) const { + return computeStepsForProcSuperstep_[proc][supstep]; } - const std::vector &GetNodesEvictedInComm(unsigned proc, unsigned supstep) const { - return nodes_evicted_in_comm[proc][supstep]; + const std::vector &GetNodesEvictedInComm(unsigned proc, unsigned supstep) const { + return nodesEvictedInComm_[proc][supstep]; } - const std::vector &GetNodesSentDown(unsigned proc, unsigned supstep) const { - return nodes_sent_down[proc][supstep]; + const std::vector &GetNodesSentDown(unsigned proc, unsigned supstep) const { + return nodesSentDown_[proc][supstep]; } - const std::vector &GetNodesSentUp(unsigned proc, unsigned supstep) const { return nodes_sent_up[proc][supstep]; } + const std::vector &GetNodesSentUp(unsigned proc, unsigned supstep) const { return nodesSentUp_[proc][supstep]; } - void SetNeedsBlueAtEnd(const std::set &nodes_) { needs_blue_at_end = nodes_; } + void SetNeedsBlueAtEnd(const std::set &nodes) { needsBlueAtEnd_ = nodes; } - void SetExternalSources(const std::set &nodes_) { external_sources = nodes_; } + void SetExternalSources(const std::set &nodes) { externalSources_ = nodes; } - void SetHasRedInBeginning(const std::vector> &nodes_) { has_red_in_beginning = nodes_; } + void SetHasRedInBeginning(const std::vector> &nodes) { hasRedInBeginning_ = nodes; } }; -template -void PebblingSchedule::updateNumberOfSupersteps(unsigned new_number_of_supersteps) { - number_of_supersteps = new_number_of_supersteps; +template +void PebblingSchedule::UpdateNumberOfSupersteps(unsigned newNumberOfSupersteps) { + numberOfSupersteps_ = newNumberOfSupersteps; - compute_steps_for_proc_superstep.clear(); - compute_steps_for_proc_superstep.resize(instance->numberOfProcessors(), - std::vector>(number_of_supersteps)); + computeStepsForProcSuperstep_.clear(); + computeStepsForProcSuperstep_.resize(instance_->NumberOfProcessors(), + std::vector>(numberOfSupersteps_)); - nodes_evicted_in_comm.clear(); - nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + nodesEvictedInComm_.clear(); + nodesEvictedInComm_.resize(instance_->NumberOfProcessors(), std::vector>(numberOfSupersteps_)); - nodes_sent_down.clear(); - nodes_sent_down.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + nodesSentDown_.clear(); + nodesSentDown_.resize(instance_->NumberOfProcessors(), std::vector>(numberOfSupersteps_)); - nodes_sent_up.clear(); - nodes_sent_up.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + nodesSentUp_.clear(); + nodesSentUp_.resize(instance_->NumberOfProcessors(), std::vector>(numberOfSupersteps_)); } -template -v_workw_t PebblingSchedule::computeCost() const { - cost_type total_costs = 0; - for (unsigned step = 0; step < number_of_supersteps; ++step) { +template +VWorkwT PebblingSchedule::ComputeCost() const { + CostType totalCosts = 0; + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { // compute phase - cost_type max_work = std::numeric_limits::min(); - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - cost_type work = 0; - for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { - work += instance->getComputationalDag().vertex_work_weight(computeStep.node); + CostType maxWork = std::numeric_limits::min(); + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + CostType work = 0; + for (const auto &computeStep : computeStepsForProcSuperstep_[proc][step]) { + work += instance_->GetComputationalDag().VertexWorkWeight(computeStep.node_); } - if (work > max_work) { - max_work = work; + if (work > maxWork) { + maxWork = work; } } - total_costs += max_work; + totalCosts += maxWork; // communication phase - cost_type max_send_up = std::numeric_limits::min(); - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - cost_type send_up = 0; - for (vertex_idx node : nodes_sent_up[proc][step]) { - send_up += instance->getComputationalDag().vertex_comm_weight(node) - * instance->getArchitecture().communicationCosts(); + CostType maxSendUp = std::numeric_limits::min(); + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + CostType sendUp = 0; + for (VertexIdx node : nodesSentUp_[proc][step]) { + sendUp += instance_->GetComputationalDag().VertexCommWeight(node) + * instance_->GetArchitecture().CommunicationCosts(); } - if (send_up > max_send_up) { - max_send_up = send_up; + if (sendUp > maxSendUp) { + maxSendUp = sendUp; } } - total_costs += max_send_up; + totalCosts += maxSendUp; - total_costs += static_cast(instance->getArchitecture().synchronisationCosts()); + totalCosts += static_cast(instance_->GetArchitecture().SynchronisationCosts()); - cost_type max_send_down = std::numeric_limits::min(); - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - cost_type send_down = 0; - for (vertex_idx node : nodes_sent_down[proc][step]) { - send_down += instance->getComputationalDag().vertex_comm_weight(node) - * instance->getArchitecture().communicationCosts(); + CostType maxSendDown = std::numeric_limits::min(); + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + CostType sendDown = 0; + for (VertexIdx node : nodesSentDown_[proc][step]) { + sendDown += instance_->GetComputationalDag().VertexCommWeight(node) + * instance_->GetArchitecture().CommunicationCosts(); } - if (send_down > max_send_down) { - max_send_down = send_down; + if (sendDown > maxSendDown) { + maxSendDown = sendDown; } } - total_costs += max_send_down; + totalCosts += maxSendDown; } - return total_costs; + return totalCosts; } -template -v_workw_t PebblingSchedule::computeAsynchronousCost() const { - std::vector current_time_at_processor(instance->getArchitecture().numberOfProcessors(), 0); - std::vector time_when_node_gets_blue(instance->getComputationalDag().num_vertices(), - std::numeric_limits::max()); - if (need_to_load_inputs) { - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - if (instance->getComputationalDag().in_degree(node) == 0) { - time_when_node_gets_blue[node] = 0; +template +VWorkwT PebblingSchedule::ComputeAsynchronousCost() const { + std::vector currentTimeAtProcessor(instance_->GetArchitecture().NumberOfProcessors(), 0); + std::vector timeWhenNodeGetsBlue(instance_->GetComputationalDag().NumVertices(), + std::numeric_limits::max()); + if (needToLoadInputs_) { + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + if (instance_->GetComputationalDag().InDegree(node) == 0) { + timeWhenNodeGetsBlue[node] = 0; } } } - for (unsigned step = 0; step < number_of_supersteps; ++step) { + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { // compute phase - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { - current_time_at_processor[proc] += instance->getComputationalDag().vertex_work_weight(computeStep.node); + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + for (const auto &computeStep : computeStepsForProcSuperstep_[proc][step]) { + currentTimeAtProcessor[proc] += instance_->GetComputationalDag().VertexWorkWeight(computeStep.node_); } } // communication phase - send up - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - for (vertex_idx node : nodes_sent_up[proc][step]) { - current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) - * instance->getArchitecture().communicationCosts(); - if (time_when_node_gets_blue[node] > current_time_at_processor[proc]) { - time_when_node_gets_blue[node] = current_time_at_processor[proc]; + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + for (VertexIdx node : nodesSentUp_[proc][step]) { + currentTimeAtProcessor[proc] += instance_->GetComputationalDag().VertexCommWeight(node) + * instance_->GetArchitecture().CommunicationCosts(); + if (timeWhenNodeGetsBlue[node] > currentTimeAtProcessor[proc]) { + timeWhenNodeGetsBlue[node] = currentTimeAtProcessor[proc]; } } } // communication phase - send down - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - for (vertex_idx node : nodes_sent_down[proc][step]) { - if (current_time_at_processor[proc] < time_when_node_gets_blue[node]) { - current_time_at_processor[proc] = time_when_node_gets_blue[node]; + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + for (VertexIdx node : nodesSentDown_[proc][step]) { + if (currentTimeAtProcessor[proc] < timeWhenNodeGetsBlue[node]) { + currentTimeAtProcessor[proc] = timeWhenNodeGetsBlue[node]; } - current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) - * instance->getArchitecture().communicationCosts(); + currentTimeAtProcessor[proc] += instance_->GetComputationalDag().VertexCommWeight(node) + * instance_->GetArchitecture().CommunicationCosts(); } } } - cost_type makespan = 0; - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - if (current_time_at_processor[proc] > makespan) { - makespan = current_time_at_processor[proc]; + CostType makespan = 0; + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + if (currentTimeAtProcessor[proc] > makespan) { + makespan = currentTimeAtProcessor[proc]; } } return makespan; } -template -void PebblingSchedule::cleanSchedule() { - if (!isValid()) { +template +void PebblingSchedule::CleanSchedule() { + if (!IsValid()) { return; } // NOTE - this function removes unnecessary steps in most cases, but not all (some require e.g. multiple iterations) - std::vector>> needed(instance->numberOfVertices(), - std::vector>(instance->numberOfProcessors())); - std::vector> keep_false(instance->numberOfVertices(), - std::vector(instance->numberOfProcessors(), false)); - std::vector> has_red_after_cleaning(instance->numberOfVertices(), - std::vector(instance->numberOfProcessors(), false)); + std::vector>> needed(instance_->NumberOfVertices(), + std::vector>(instance_->NumberOfProcessors())); + std::vector> keepFalse(instance_->NumberOfVertices(), + std::vector(instance_->NumberOfProcessors(), false)); + std::vector> hasRedAfterCleaning(instance_->NumberOfVertices(), + std::vector(instance_->NumberOfProcessors(), false)); - std::vector ever_needed_as_blue(instance->numberOfVertices(), false); - if (needs_blue_at_end.empty()) { - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - if (instance->getComputationalDag().out_degree(node) == 0) { - ever_needed_as_blue[node] = true; + std::vector everNeededAsBlue(instance_->NumberOfVertices(), false); + if (needsBlueAtEnd_.empty()) { + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + if (instance_->GetComputationalDag().OutDegree(node) == 0) { + everNeededAsBlue[node] = true; } } } else { - for (vertex_idx node : needs_blue_at_end) { - ever_needed_as_blue[node] = true; + for (VertexIdx node : needsBlueAtEnd_) { + everNeededAsBlue[node] = true; } } - for (unsigned step = 0; step < number_of_supersteps; ++step) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (vertex_idx node : nodes_sent_down[proc][step]) { - ever_needed_as_blue[node] = true; + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (VertexIdx node : nodesSentDown_[proc][step]) { + everNeededAsBlue[node] = true; } } } - if (!has_red_in_beginning.empty()) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (vertex_idx node : has_red_in_beginning[proc]) { - has_red_after_cleaning[node][proc] = true; + if (!hasRedInBeginning_.empty()) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (VertexIdx node : hasRedInBeginning_[proc]) { + hasRedAfterCleaning[node][proc] = true; } } } - for (unsigned step = 0; step < number_of_supersteps; ++step) { + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { // compute phase - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { - vertex_idx node = computeStep.node; + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + for (const auto &computeStep : computeStepsForProcSuperstep_[proc][step]) { + VertexIdx node = computeStep.node_; needed[node][proc].emplace_back(false); - keep_false[node][proc] = has_red_after_cleaning[node][proc]; - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { - has_red_after_cleaning[pred][proc] = true; - if (!keep_false[pred][proc]) { + keepFalse[node][proc] = hasRedAfterCleaning[node][proc]; + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { + hasRedAfterCleaning[pred][proc] = true; + if (!keepFalse[pred][proc]) { needed[pred][proc].back() = true; } } - for (vertex_idx to_evict : computeStep.nodes_evicted_after) { - has_red_after_cleaning[to_evict][proc] = false; + for (VertexIdx toEvict : computeStep.nodesEvictedAfter_) { + hasRedAfterCleaning[toEvict][proc] = false; } } } // send up phase - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - for (vertex_idx node : nodes_sent_up[proc][step]) { - if (ever_needed_as_blue[node]) { - has_red_after_cleaning[node][proc] = true; - if (!keep_false[node][proc]) { + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + for (VertexIdx node : nodesSentUp_[proc][step]) { + if (everNeededAsBlue[node]) { + hasRedAfterCleaning[node][proc] = true; + if (!keepFalse[node][proc]) { needed[node][proc].back() = true; } } } } - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { - has_red_after_cleaning[node][proc] = false; + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + for (VertexIdx node : nodesEvictedInComm_[proc][step]) { + hasRedAfterCleaning[node][proc] = false; } } // send down phase - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - for (vertex_idx node : nodes_sent_down[proc][step]) { + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + for (VertexIdx node : nodesSentDown_[proc][step]) { needed[node][proc].emplace_back(false); - keep_false[node][proc] = has_red_after_cleaning[node][proc]; + keepFalse[node][proc] = hasRedAfterCleaning[node][proc]; } } } - std::vector>> new_compute_steps_for_proc_superstep( - instance->numberOfProcessors(), std::vector>(number_of_supersteps)); - std::vector>> new_nodes_evicted_in_comm( - instance->numberOfProcessors(), std::vector>(number_of_supersteps)); - std::vector>> new_nodes_sent_down( - instance->numberOfProcessors(), std::vector>(number_of_supersteps)); - std::vector>> new_nodes_sent_up( - instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + std::vector>> newComputeStepsForProcSuperstep( + instance_->NumberOfProcessors(), std::vector>(numberOfSupersteps_)); + std::vector>> newNodesEvictedInComm( + instance_->NumberOfProcessors(), std::vector>(numberOfSupersteps_)); + std::vector>> newNodesSentDown(instance_->NumberOfProcessors(), + std::vector>(numberOfSupersteps_)); + std::vector>> newNodesSentUp(instance_->NumberOfProcessors(), + std::vector>(numberOfSupersteps_)); - std::vector> has_red(instance->numberOfVertices(), std::vector(instance->numberOfProcessors(), false)); - if (!has_red_in_beginning.empty()) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (vertex_idx node : has_red_in_beginning[proc]) { - has_red[node][proc] = true; + std::vector> hasRed(instance_->NumberOfVertices(), std::vector(instance_->NumberOfProcessors(), false)); + if (!hasRedInBeginning_.empty()) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (VertexIdx node : hasRedInBeginning_[proc]) { + hasRed[node][proc] = true; } } } - std::vector has_blue(instance->numberOfVertices()); - std::vector time_when_node_gets_blue(instance->getComputationalDag().num_vertices(), - std::numeric_limits::max()); - if (need_to_load_inputs) { - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - if (instance->getComputationalDag().in_degree(node) == 0) { - has_blue[node] = true; - time_when_node_gets_blue[node] = 0; + std::vector hasBlue(instance_->NumberOfVertices()); + std::vector timeWhenNodeGetsBlue(instance_->GetComputationalDag().NumVertices(), + std::numeric_limits::max()); + if (needToLoadInputs_) { + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + if (instance_->GetComputationalDag().InDegree(node) == 0) { + hasBlue[node] = true; + timeWhenNodeGetsBlue[node] = 0; } } } - std::vector current_time_at_processor(instance->getArchitecture().numberOfProcessors(), 0); + std::vector currentTimeAtProcessor(instance_->GetArchitecture().NumberOfProcessors(), 0); - for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { + for (unsigned superstep = 0; superstep < numberOfSupersteps_; ++superstep) { // compute phase - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - std::vector step_remains(compute_steps_for_proc_superstep[proc][superstep].size(), false); - std::vector> new_evict_after(compute_steps_for_proc_superstep[proc][superstep].size()); + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + std::vector stepRemains(computeStepsForProcSuperstep_[proc][superstep].size(), false); + std::vector> newEvictAfter(computeStepsForProcSuperstep_[proc][superstep].size()); - unsigned new_stepIndex = 0; - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; + unsigned newStepIndex = 0; + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + VertexIdx node = computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_; if (needed[node][proc].front()) { - new_compute_steps_for_proc_superstep[proc][superstep].emplace_back(node, std::vector()); - step_remains[stepIndex] = true; - has_red[node][proc] = true; - ++new_stepIndex; - current_time_at_processor[proc] += instance->getComputationalDag().vertex_work_weight(node); + newComputeStepsForProcSuperstep[proc][superstep].emplace_back(node, std::vector()); + stepRemains[stepIndex] = true; + hasRed[node][proc] = true; + ++newStepIndex; + currentTimeAtProcessor[proc] += instance_->GetComputationalDag().VertexWorkWeight(node); } needed[node][proc].pop_front(); - for (vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after) { - if (has_red[to_evict][proc]) { - new_evict_after[stepIndex].push_back(to_evict); + for (VertexIdx toEvict : computeStepsForProcSuperstep_[proc][superstep][stepIndex].nodesEvictedAfter_) { + if (hasRed[toEvict][proc]) { + newEvictAfter[stepIndex].push_back(toEvict); } - has_red[to_evict][proc] = false; + hasRed[toEvict][proc] = false; } } // go backwards to fix cache eviction steps - std::vector to_evict; - for (size_t stepIndex = compute_steps_for_proc_superstep[proc][superstep].size() - 1; - stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); + std::vector toEvict; + for (size_t stepIndex = computeStepsForProcSuperstep_[proc][superstep].size() - 1; + stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); --stepIndex) { - for (vertex_idx node : new_evict_after[stepIndex]) { - to_evict.push_back(node); + for (VertexIdx node : newEvictAfter[stepIndex]) { + toEvict.push_back(node); } - if (step_remains[stepIndex]) { - new_compute_steps_for_proc_superstep[proc][superstep][new_stepIndex - 1].nodes_evicted_after = to_evict; - to_evict.clear(); - --new_stepIndex; + if (stepRemains[stepIndex]) { + newComputeStepsForProcSuperstep[proc][superstep][newStepIndex - 1].nodesEvictedAfter_ = toEvict; + toEvict.clear(); + --newStepIndex; } } - if (!to_evict.empty() && superstep >= 1) { - for (vertex_idx node : to_evict) { + if (!toEvict.empty() && superstep >= 1) { + for (VertexIdx node : toEvict) { auto itr = std::find( - new_nodes_sent_down[proc][superstep - 1].begin(), new_nodes_sent_down[proc][superstep - 1].end(), node); - if (itr == new_nodes_sent_down[proc][superstep - 1].end()) { - new_nodes_evicted_in_comm[proc][superstep - 1].push_back(node); + newNodesSentDown[proc][superstep - 1].begin(), newNodesSentDown[proc][superstep - 1].end(), node); + if (itr == newNodesSentDown[proc][superstep - 1].end()) { + newNodesEvictedInComm[proc][superstep - 1].push_back(node); } else { - new_nodes_sent_down[proc][superstep - 1].erase(itr); + newNodesSentDown[proc][superstep - 1].erase(itr); } } } } - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { // send up phase - for (vertex_idx node : nodes_sent_up[proc][superstep]) { - if (!ever_needed_as_blue[node]) { + for (VertexIdx node : nodesSentUp_[proc][superstep]) { + if (!everNeededAsBlue[node]) { continue; } - cost_type new_time_at_processor = current_time_at_processor[proc] - + instance->getComputationalDag().vertex_comm_weight(node) - * instance->getArchitecture().communicationCosts(); + CostType newTimeAtProcessor = currentTimeAtProcessor[proc] + + instance_->GetComputationalDag().VertexCommWeight(node) + * instance_->GetArchitecture().CommunicationCosts(); // only copy send up step if it is not obsolete in at least one of the two cases (sync or async schedule) - if (!has_blue[node] || new_time_at_processor < time_when_node_gets_blue[node]) { - new_nodes_sent_up[proc][superstep].push_back(node); - has_blue[node] = true; - current_time_at_processor[proc] = new_time_at_processor; - if (time_when_node_gets_blue[node] > new_time_at_processor) { - time_when_node_gets_blue[node] = new_time_at_processor; + if (!hasBlue[node] || newTimeAtProcessor < timeWhenNodeGetsBlue[node]) { + newNodesSentUp[proc][superstep].push_back(node); + hasBlue[node] = true; + currentTimeAtProcessor[proc] = newTimeAtProcessor; + if (timeWhenNodeGetsBlue[node] > newTimeAtProcessor) { + timeWhenNodeGetsBlue[node] = newTimeAtProcessor; } } } } // comm phase evict - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { - for (vertex_idx node : nodes_evicted_in_comm[proc][superstep]) { - if (has_red[node][proc]) { - new_nodes_evicted_in_comm[proc][superstep].push_back(node); - has_red[node][proc] = false; + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { + for (VertexIdx node : nodesEvictedInComm_[proc][superstep]) { + if (hasRed[node][proc]) { + newNodesEvictedInComm[proc][superstep].push_back(node); + hasRed[node][proc] = false; } } } - for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (unsigned proc = 0; proc < instance_->GetArchitecture().NumberOfProcessors(); ++proc) { // send down phase - for (vertex_idx node : nodes_sent_down[proc][superstep]) { + for (VertexIdx node : nodesSentDown_[proc][superstep]) { if (needed[node][proc].front()) { - new_nodes_sent_down[proc][superstep].push_back(node); - has_red[node][proc] = true; - if (current_time_at_processor[proc] < time_when_node_gets_blue[node]) { - current_time_at_processor[proc] = time_when_node_gets_blue[node]; + newNodesSentDown[proc][superstep].push_back(node); + hasRed[node][proc] = true; + if (currentTimeAtProcessor[proc] < timeWhenNodeGetsBlue[node]) { + currentTimeAtProcessor[proc] = timeWhenNodeGetsBlue[node]; } - current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) - * instance->getArchitecture().communicationCosts(); + currentTimeAtProcessor[proc] += instance_->GetComputationalDag().VertexCommWeight(node) + * instance_->GetArchitecture().CommunicationCosts(); } needed[node][proc].pop_front(); } } } - compute_steps_for_proc_superstep = new_compute_steps_for_proc_superstep; - nodes_evicted_in_comm = new_nodes_evicted_in_comm; - nodes_sent_down = new_nodes_sent_down; - nodes_sent_up = new_nodes_sent_up; + computeStepsForProcSuperstep_ = newComputeStepsForProcSuperstep; + nodesEvictedInComm_ = newNodesEvictedInComm; + nodesSentDown_ = newNodesSentDown; + nodesSentUp_ = newNodesSentUp; } -template -void PebblingSchedule::ConvertFromBsp(const BspSchedule &schedule, CACHE_EVICTION_STRATEGY evict_rule) { - instance = &schedule.getInstance(); +template +void PebblingSchedule::ConvertFromBsp(const BspSchedule &schedule, CacheEvictionStrategy evictRule) { + instance_ = &schedule.GetInstance(); // check if conversion possible at all - if (!hasValidSolution(schedule.getInstance(), external_sources)) { + if (!HasValidSolution(schedule.GetInstance(), externalSources_)) { std::cout << "Conversion failed." << std::endl; return; } @@ -625,636 +623,635 @@ void PebblingSchedule::ConvertFromBsp(const BspSchedule &sched SplitSupersteps(schedule); // track memory - SetMemoryMovement(evict_rule); + SetMemoryMovement(evictRule); } -template -bool PebblingSchedule::hasValidSolution(const BspInstance &instance, - const std::set &external_sources) { - std::vector memory_required = minimumMemoryRequiredPerNodeType(instance); - std::vector has_enough_memory(instance.getComputationalDag().num_vertex_types(), true); - for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) { - if (external_sources.find(node) == external_sources.end()) { - has_enough_memory[instance.getComputationalDag().vertex_type(node)] = false; +template +bool PebblingSchedule::HasValidSolution(const BspInstance &instance, const std::set &externalSources) { + std::vector memoryRequired = MinimumMemoryRequiredPerNodeType(instance); + std::vector hasEnoughMemory(instance.GetComputationalDag().NumVertexTypes(), true); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); ++node) { + if (externalSources.find(node) == externalSources.end()) { + hasEnoughMemory[instance.GetComputationalDag().VertexType(node)] = false; } } - for (v_type_t node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (instance.isCompatibleType(node_type, instance.getArchitecture().processorType(proc)) - && instance.getArchitecture().memoryBound(proc) >= memory_required[node_type]) { - has_enough_memory[node_type] = true; + for (VTypeT nodeType = 0; nodeType < instance.GetComputationalDag().NumVertexTypes(); ++nodeType) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + if (instance.IsCompatibleType(nodeType, instance.GetArchitecture().ProcessorType(proc)) + && instance.GetArchitecture().MemoryBound(proc) >= memoryRequired[nodeType]) { + hasEnoughMemory[nodeType] = true; break; } } } - for (v_type_t node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { - if (!has_enough_memory[node_type]) { - std::cout << "No valid solution exists. Minimum memory required for node type " << node_type << " is " - << memory_required[node_type] << std::endl; + for (VTypeT nodeType = 0; nodeType < instance.GetComputationalDag().NumVertexTypes(); ++nodeType) { + if (!hasEnoughMemory[nodeType]) { + std::cout << "No valid solution exists. Minimum memory required for node type " << nodeType << " is " + << memoryRequired[nodeType] << std::endl; return false; } } return true; } -template -void PebblingSchedule::SplitSupersteps(const BspSchedule &schedule) { +template +void PebblingSchedule::SplitSupersteps(const BspSchedule &schedule) { // get DFS topological order in each superstep - std::vector>> top_orders = computeTopOrdersDFS(schedule); + std::vector>> topOrders = ComputeTopOrdersDfs(schedule); - std::vector top_order_idx(instance->getComputationalDag().num_vertices(), 0); - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { - for (unsigned idx = 0; idx < top_orders[proc][step].size(); ++idx) { - top_order_idx[top_orders[proc][step][idx]] = idx; + std::vector topOrderIdx(instance_->GetComputationalDag().NumVertices(), 0); + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (unsigned step = 0; step < schedule.NumberOfSupersteps(); ++step) { + for (unsigned idx = 0; idx < topOrders[proc][step].size(); ++idx) { + topOrderIdx[topOrders[proc][step][idx]] = idx; } } } // split supersteps as needed - std::vector new_superstep_ID(instance->getComputationalDag().num_vertices()); - unsigned superstep_index = 0; - for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { - unsigned max_segments_in_superstep = 0; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (top_orders[proc][step].empty()) { + std::vector newSuperstepId(instance_->GetComputationalDag().NumVertices()); + unsigned superstepIndex = 0; + for (unsigned step = 0; step < schedule.NumberOfSupersteps(); ++step) { + unsigned maxSegmentsInSuperstep = 0; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + if (topOrders[proc][step].empty()) { continue; } // the superstep will be split into smaller segments std::vector> segments; - unsigned start_idx = 0; - while (start_idx < top_orders[proc][step].size()) { + unsigned startIdx = 0; + while (startIdx < topOrders[proc][step].size()) { // binary search for largest segment that still statisfies mem constraint - bool doubling_phase = true; - unsigned end_lower_bound = start_idx, end_upper_bound = static_cast(top_orders[proc][step].size() - 1); - while (end_lower_bound < end_upper_bound) { - unsigned end_current; - - if (doubling_phase) { - if (end_lower_bound == start_idx) { - end_current = start_idx + 1; + bool doublingPhase = true; + unsigned endLowerBound = startIdx, endUpperBound = static_cast(topOrders[proc][step].size() - 1); + while (endLowerBound < endUpperBound) { + unsigned endCurrent; + + if (doublingPhase) { + if (endLowerBound == startIdx) { + endCurrent = startIdx + 1; } else { - end_current = std::min(start_idx + 2 * (end_lower_bound - start_idx), - static_cast(top_orders[proc][step].size()) - 1); + endCurrent = std::min(startIdx + 2 * (endLowerBound - startIdx), + static_cast(topOrders[proc][step].size()) - 1); } } else { - end_current = end_lower_bound + (end_upper_bound - end_lower_bound + 1) / 2; + endCurrent = endLowerBound + (endUpperBound - endLowerBound + 1) / 2; } // check if this segment is valid bool valid = true; - std::map neededAfter; - for (unsigned idx = start_idx; idx <= end_current; ++idx) { - vertex_idx node = top_orders[proc][step][idx]; + std::map neededAfter; + for (unsigned idx = startIdx; idx <= endCurrent; ++idx) { + VertexIdx node = topOrders[proc][step][idx]; neededAfter[node] = false; - if (needs_blue_at_end.empty()) { - neededAfter[node] = (instance->getComputationalDag().out_degree(node) == 0); + if (needsBlueAtEnd_.empty()) { + neededAfter[node] = (instance_->GetComputationalDag().OutDegree(node) == 0); } else { - neededAfter[node] = (needs_blue_at_end.find(node) != needs_blue_at_end.end()); + neededAfter[node] = (needsBlueAtEnd_.find(node) != needsBlueAtEnd_.end()); } - for (vertex_idx succ : instance->getComputationalDag().children(node)) { - if (schedule.assignedSuperstep(succ) > step) { + for (VertexIdx succ : instance_->GetComputationalDag().Children(node)) { + if (schedule.AssignedSuperstep(succ) > step) { neededAfter[node] = true; } - if (schedule.assignedSuperstep(succ) == step && top_order_idx[succ] <= end_current) { + if (schedule.AssignedSuperstep(succ) == step && topOrderIdx[succ] <= endCurrent) { neededAfter[node] = true; } } } - std::map lastUsedBy; - std::set values_needed; - for (unsigned idx = start_idx; idx <= end_current; ++idx) { - vertex_idx node = top_orders[proc][step][idx]; - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { - if (schedule.assignedSuperstep(pred) < step - || (schedule.assignedSuperstep(pred) == step && !neededAfter[pred])) { + std::map lastUsedBy; + std::set valuesNeeded; + for (unsigned idx = startIdx; idx <= endCurrent; ++idx) { + VertexIdx node = topOrders[proc][step][idx]; + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { + if (schedule.AssignedSuperstep(pred) < step + || (schedule.AssignedSuperstep(pred) == step && !neededAfter[pred])) { lastUsedBy[pred] = node; } - if (schedule.assignedSuperstep(pred) < step - || (schedule.assignedSuperstep(pred) == step && top_order_idx[pred] < start_idx) - || (need_to_load_inputs && instance->getComputationalDag().in_degree(pred) == 0) - || external_sources.find(pred) != external_sources.end()) { - values_needed.insert(pred); + if (schedule.AssignedSuperstep(pred) < step + || (schedule.AssignedSuperstep(pred) == step && topOrderIdx[pred] < startIdx) + || (needToLoadInputs_ && instance_->GetComputationalDag().InDegree(pred) == 0) + || externalSources_.find(pred) != externalSources_.end()) { + valuesNeeded.insert(pred); } } } - memweight_type mem_needed = 0; - for (vertex_idx node : values_needed) { - mem_needed += instance->getComputationalDag().vertex_mem_weight(node); + MemWeightType memNeeded = 0; + for (VertexIdx node : valuesNeeded) { + memNeeded += instance_->GetComputationalDag().VertexMemWeight(node); } - for (unsigned idx = start_idx; idx <= end_current; ++idx) { - vertex_idx node = top_orders[proc][step][idx]; + for (unsigned idx = startIdx; idx <= endCurrent; ++idx) { + VertexIdx node = topOrders[proc][step][idx]; - if (need_to_load_inputs && instance->getComputationalDag().in_degree(node) == 0) { + if (needToLoadInputs_ && instance_->GetComputationalDag().InDegree(node) == 0) { continue; } - mem_needed += instance->getComputationalDag().vertex_mem_weight(node); - if (mem_needed > instance->getArchitecture().memoryBound(proc)) { + memNeeded += instance_->GetComputationalDag().VertexMemWeight(node); + if (memNeeded > instance_->GetArchitecture().MemoryBound(proc)) { valid = false; break; } - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { if (lastUsedBy[pred] == node) { - mem_needed -= instance->getComputationalDag().vertex_mem_weight(pred); + memNeeded -= instance_->GetComputationalDag().VertexMemWeight(pred); } } } if (valid) { - end_lower_bound = end_current; - if (end_current == top_orders[proc][step].size() - 1) { - doubling_phase = false; - end_upper_bound = end_current; + endLowerBound = endCurrent; + if (endCurrent == topOrders[proc][step].size() - 1) { + doublingPhase = false; + endUpperBound = endCurrent; } } else { - doubling_phase = false; - end_upper_bound = end_current - 1; + doublingPhase = false; + endUpperBound = endCurrent - 1; } } - segments.emplace_back(start_idx, end_lower_bound); - start_idx = end_lower_bound + 1; + segments.emplace_back(startIdx, endLowerBound); + startIdx = endLowerBound + 1; } - unsigned step_idx = 0; + unsigned stepIdx = 0; for (auto segment : segments) { for (unsigned idx = segment.first; idx <= segment.second; ++idx) { - new_superstep_ID[top_orders[proc][step][idx]] = superstep_index + step_idx; + newSuperstepId[topOrders[proc][step][idx]] = superstepIndex + stepIdx; } - ++step_idx; + ++stepIdx; } - if (step_idx > max_segments_in_superstep) { - max_segments_in_superstep = step_idx; + if (stepIdx > maxSegmentsInSuperstep) { + maxSegmentsInSuperstep = stepIdx; } } - superstep_index += max_segments_in_superstep; + superstepIndex += maxSegmentsInSuperstep; } - std::vector reindex_to_shrink(superstep_index); - std::vector has_compute(superstep_index, false); - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - if (!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0) { - has_compute[new_superstep_ID[node]] = true; + std::vector reindexToShrink(superstepIndex); + std::vector hasCompute(superstepIndex, false); + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + if (!needToLoadInputs_ || instance_->GetComputationalDag().InDegree(node) > 0) { + hasCompute[newSuperstepId[node]] = true; } } - unsigned current_index = 0; - for (unsigned superstep = 0; superstep < superstep_index; ++superstep) { - if (has_compute[superstep]) { - reindex_to_shrink[superstep] = current_index; - ++current_index; + unsigned currentIndex = 0; + for (unsigned superstep = 0; superstep < superstepIndex; ++superstep) { + if (hasCompute[superstep]) { + reindexToShrink[superstep] = currentIndex; + ++currentIndex; } } - unsigned offset = need_to_load_inputs ? 1 : 0; - updateNumberOfSupersteps(current_index + offset); - std::cout << schedule.numberOfSupersteps() << " -> " << number_of_supersteps << std::endl; + unsigned offset = needToLoadInputs_ ? 1 : 0; + UpdateNumberOfSupersteps(currentIndex + offset); + std::cout << schedule.NumberOfSupersteps() << " -> " << numberOfSupersteps_ << std::endl; // TODO: might not need offset for first step when beginning with red pebbles - for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (vertex_idx node : top_orders[proc][step]) { - if (!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0) { - compute_steps_for_proc_superstep[proc][reindex_to_shrink[new_superstep_ID[node]] + offset].emplace_back(node); + for (unsigned step = 0; step < schedule.NumberOfSupersteps(); ++step) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (VertexIdx node : topOrders[proc][step]) { + if (!needToLoadInputs_ || instance_->GetComputationalDag().InDegree(node) > 0) { + computeStepsForProcSuperstep_[proc][reindexToShrink[newSuperstepId[node]] + offset].emplace_back(node); } } } } } -template -void PebblingSchedule::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_rule) { - const size_t N = instance->getComputationalDag().num_vertices(); +template +void PebblingSchedule::SetMemoryMovement(CacheEvictionStrategy evictRule) { + const size_t n = instance_->GetComputationalDag().NumVertices(); - std::vector mem_used(instance->numberOfProcessors(), 0); - std::vector> in_mem(instance->numberOfProcessors()); + std::vector memUsed(instance_->NumberOfProcessors(), 0); + std::vector> inMem(instance_->NumberOfProcessors()); - std::vector in_slow_mem(N, false); - if (need_to_load_inputs) { - for (vertex_idx node = 0; node < N; ++node) { - if (instance->getComputationalDag().in_degree(node) == 0) { - in_slow_mem[node] = true; + std::vector inSlowMem(n, false); + if (needToLoadInputs_) { + for (VertexIdx node = 0; node < n; ++node) { + if (instance_->GetComputationalDag().InDegree(node) == 0) { + inSlowMem[node] = true; } } } - std::vector, vertex_idx>>> evictable(instance->numberOfProcessors()); - std::vector> non_evictable(instance->numberOfProcessors()); + std::vector, VertexIdx>>> evictable(instance_->NumberOfProcessors()); + std::vector> nonEvictable(instance_->NumberOfProcessors()); // iterator to its position in "evictable" - for efficient delete - std::vector> place_in_evictable( - N, std::vector(instance->numberOfProcessors())); - for (vertex_idx node = 0; node < N; ++node) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - place_in_evictable[node][proc] = evictable[proc].end(); + std::vector> placeInEvictable( + n, std::vector(instance_->NumberOfProcessors())); + for (VertexIdx node = 0; node < n; ++node) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + placeInEvictable[node][proc] = evictable[proc].end(); } } // utility for LRU eviction strategy - std::vector> node_last_used_on_proc; - if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) { - node_last_used_on_proc.resize(N, std::vector(instance->numberOfProcessors(), 0)); + std::vector> nodeLastUsedOnProc; + if (evictRule == CacheEvictionStrategy::LEAST_RECENTLY_USED) { + nodeLastUsedOnProc.resize(n, std::vector(instance_->NumberOfProcessors(), 0)); } - std::vector total_step_count_on_proc(instance->numberOfProcessors(), 0); + std::vector totalStepCountOnProc(instance_->NumberOfProcessors(), 0); // select a representative compute step for each node, in case of being computed multiple times // (NOTE - the conversion assumes that there is enough fast memory to keep each value until the end of // its representative step, if the value in question is ever needed on another processor/superster // without being recomputed there - otherwise, it would be even hard to decide whether a solution exists) - std::vector selected_processor(N); - std::vector> selected_step(N, std::make_pair(number_of_supersteps, 0)); - for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - if (selected_step[node].first > superstep - || (selected_step[node].first == superstep && selected_step[node].second < stepIndex)) { - selected_processor[node] = proc; - selected_step[node] = std::make_pair(superstep, stepIndex); + std::vector selectedProcessor(n); + std::vector> selectedStep(n, std::make_pair(numberOfSupersteps_, 0)); + for (unsigned superstep = 0; superstep < numberOfSupersteps_; ++superstep) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + VertexIdx node = computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_; + if (selectedStep[node].first > superstep + || (selectedStep[node].first == superstep && selectedStep[node].second < stepIndex)) { + selectedProcessor[node] = proc; + selectedStep[node] = std::make_pair(superstep, stepIndex); } } } } // check if the node needs to be kept until the end of its representative superstep - std::vector must_be_preserved(N, false); - std::vector computed_in_current_superstep(N, false); - for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - computed_in_current_superstep[node] = true; - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { - if (!computed_in_current_superstep[pred]) { - must_be_preserved[pred] = true; + std::vector mustBePreserved(n, false); + std::vector computedInCurrentSuperstep(n, false); + for (unsigned superstep = 0; superstep < numberOfSupersteps_; ++superstep) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + VertexIdx node = computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_; + computedInCurrentSuperstep[node] = true; + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { + if (!computedInCurrentSuperstep[pred]) { + mustBePreserved[pred] = true; } } } - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - computed_in_current_superstep[compute_steps_for_proc_superstep[proc][superstep][stepIndex].node] = false; + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + computedInCurrentSuperstep[computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_] = false; } } } - if (needs_blue_at_end.empty()) { - for (vertex_idx node = 0; node < N; ++node) { - if (instance->getComputationalDag().out_degree(node) == 0) { - must_be_preserved[node] = true; + if (needsBlueAtEnd_.empty()) { + for (VertexIdx node = 0; node < n; ++node) { + if (instance_->GetComputationalDag().OutDegree(node) == 0) { + mustBePreserved[node] = true; } } } else { - for (vertex_idx node : needs_blue_at_end) { - must_be_preserved[node] = true; + for (VertexIdx node : needsBlueAtEnd_) { + mustBePreserved[node] = true; } } // superstep-step pairs where a node is required (on a given proc) - opening a separate queue after each time it's recomputed - std::vector>>>> node_used_at_proc_lists( - N, + std::vector>>>> nodeUsedAtProcLists( + n, std::vector>>>( - instance->numberOfProcessors(), std::deque>>(1))); - for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { - node_used_at_proc_lists[pred][proc].back().emplace_back(superstep, stepIndex); + instance_->NumberOfProcessors(), std::deque>>(1))); + for (unsigned superstep = 0; superstep < numberOfSupersteps_; ++superstep) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + VertexIdx node = computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_; + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { + nodeUsedAtProcLists[pred][proc].back().emplace_back(superstep, stepIndex); } - node_used_at_proc_lists[node][proc].emplace_back(); + nodeUsedAtProcLists[node][proc].emplace_back(); } } } // set up initial content of fast memories - if (!has_red_in_beginning.empty()) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - in_mem = has_red_in_beginning; - for (vertex_idx node : in_mem[proc]) { - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); + if (!hasRedInBeginning_.empty()) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + inMem = hasRedInBeginning_; + for (VertexIdx node : inMem[proc]) { + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(node); std::pair prio; - if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) { - prio = node_used_at_proc_lists[node][proc].front().front(); - } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) { - prio = std::make_pair(UINT_MAX - node_last_used_on_proc[node][proc], static_cast(node)); - } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) { + if (evictRule == CacheEvictionStrategy::FORESIGHT) { + prio = nodeUsedAtProcLists[node][proc].front().front(); + } else if (evictRule == CacheEvictionStrategy::LEAST_RECENTLY_USED) { + prio = std::make_pair(UINT_MAX - nodeLastUsedOnProc[node][proc], static_cast(node)); + } else if (evictRule == CacheEvictionStrategy::LARGEST_ID) { prio = std::make_pair(static_cast(node), 0); } - place_in_evictable[node][proc] = evictable[proc].emplace(prio, node).first; + placeInEvictable[node][proc] = evictable[proc].emplace(prio, node).first; } } } // iterate through schedule - for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (compute_steps_for_proc_superstep[proc][superstep].empty()) { + for (unsigned superstep = 0; superstep < numberOfSupersteps_; ++superstep) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + if (computeStepsForProcSuperstep_[proc][superstep].empty()) { continue; } // before compute phase, evict data in comm phase of previous superstep - std::set new_values_needed; - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - computed_in_current_superstep[node] = true; - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { - if (!computed_in_current_superstep[pred]) { - non_evictable[proc].insert(pred); - - if (place_in_evictable[pred][proc] != evictable[proc].end()) { - evictable[proc].erase(place_in_evictable[pred][proc]); - place_in_evictable[pred][proc] = evictable[proc].end(); + std::set newValuesNeeded; + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + VertexIdx node = computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_; + computedInCurrentSuperstep[node] = true; + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { + if (!computedInCurrentSuperstep[pred]) { + nonEvictable[proc].insert(pred); + + if (placeInEvictable[pred][proc] != evictable[proc].end()) { + evictable[proc].erase(placeInEvictable[pred][proc]); + placeInEvictable[pred][proc] = evictable[proc].end(); } - if (in_mem[proc].find(pred) == in_mem[proc].end()) { - new_values_needed.insert(pred); + if (inMem[proc].find(pred) == inMem[proc].end()) { + newValuesNeeded.insert(pred); } } } } - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - computed_in_current_superstep[compute_steps_for_proc_superstep[proc][superstep][stepIndex].node] = false; + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + computedInCurrentSuperstep[computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_] = false; } - for (vertex_idx node : new_values_needed) { - in_mem[proc].insert(node); - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); - nodes_sent_down[proc][superstep - 1].push_back(node); - if (!in_slow_mem[node]) { - in_slow_mem[node] = true; - nodes_sent_up[selected_processor[node]][selected_step[node].first].push_back(node); + for (VertexIdx node : newValuesNeeded) { + inMem[proc].insert(node); + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(node); + nodesSentDown_[proc][superstep - 1].push_back(node); + if (!inSlowMem[node]) { + inSlowMem[node] = true; + nodesSentUp_[selectedProcessor[node]][selectedStep[node].first].push_back(node); } } - memweight_type first_node_weight - = instance->getComputationalDag().vertex_mem_weight(compute_steps_for_proc_superstep[proc][superstep][0].node); + MemWeightType firstNodeWeight + = instance_->GetComputationalDag().VertexMemWeight(computeStepsForProcSuperstep_[proc][superstep][0].node_); - while (mem_used[proc] + first_node_weight - > instance->getArchitecture().memoryBound(proc)) // no sliding pebbles for now + while (memUsed[proc] + firstNodeWeight + > instance_->GetArchitecture().MemoryBound(proc)) // no sliding pebbles for now { if (evictable[proc].empty()) { std::cout << "ERROR: Cannot create valid memory movement for these superstep lists." << std::endl; return; } - vertex_idx evicted = (--evictable[proc].end())->second; + VertexIdx evicted = (--evictable[proc].end())->second; evictable[proc].erase(--evictable[proc].end()); - place_in_evictable[evicted][proc] = evictable[proc].end(); + placeInEvictable[evicted][proc] = evictable[proc].end(); - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(evicted); - in_mem[proc].erase(evicted); + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(evicted); + inMem[proc].erase(evicted); - nodes_evicted_in_comm[proc][superstep - 1].push_back(evicted); + nodesEvictedInComm_[proc][superstep - 1].push_back(evicted); } // indicates if the node will be needed after (and thus cannot be deleted during) this compute phase - std::map needed_after; + std::map neededAfter; // during compute phase - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - memweight_type node_weight = instance->getComputationalDag().vertex_mem_weight(node); + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + VertexIdx node = computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_; + MemWeightType nodeWeight = instance_->GetComputationalDag().VertexMemWeight(node); if (stepIndex > 0) { // evict nodes to make space - while (mem_used[proc] + node_weight > instance->getArchitecture().memoryBound(proc)) { + while (memUsed[proc] + nodeWeight > instance_->GetArchitecture().MemoryBound(proc)) { if (evictable[proc].empty()) { std::cout << "ERROR: Cannot create valid memory movement for these superstep lists." << std::endl; return; } - vertex_idx evicted = (--evictable[proc].end())->second; + VertexIdx evicted = (--evictable[proc].end())->second; evictable[proc].erase(--evictable[proc].end()); - place_in_evictable[evicted][proc] = evictable[proc].end(); + placeInEvictable[evicted][proc] = evictable[proc].end(); - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(evicted); - in_mem[proc].erase(evicted); + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(evicted); + inMem[proc].erase(evicted); - compute_steps_for_proc_superstep[proc][superstep][stepIndex - 1].nodes_evicted_after.push_back(evicted); + computeStepsForProcSuperstep_[proc][superstep][stepIndex - 1].nodesEvictedAfter_.push_back(evicted); } } - in_mem[proc].insert(node); - mem_used[proc] += node_weight; + inMem[proc].insert(node); + memUsed[proc] += nodeWeight; - non_evictable[proc].insert(node); + nonEvictable[proc].insert(node); - if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) // update usage times for LRU strategy + if (evictRule == CacheEvictionStrategy::LEAST_RECENTLY_USED) // update usage times for LRU strategy { - ++total_step_count_on_proc[proc]; - node_last_used_on_proc[node][proc] = total_step_count_on_proc[proc]; - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { - node_last_used_on_proc[pred][proc] = total_step_count_on_proc[proc]; + ++totalStepCountOnProc[proc]; + nodeLastUsedOnProc[node][proc] = totalStepCountOnProc[proc]; + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { + nodeLastUsedOnProc[pred][proc] = totalStepCountOnProc[proc]; } } - if (selected_processor[node] == proc && selected_step[node] == std::make_pair(superstep, stepIndex) - && must_be_preserved[node]) { - needed_after[node] = true; + if (selectedProcessor[node] == proc && selectedStep[node] == std::make_pair(superstep, stepIndex) + && mustBePreserved[node]) { + neededAfter[node] = true; } else { - needed_after[node] = false; + neededAfter[node] = false; } - node_used_at_proc_lists[node][proc].pop_front(); + nodeUsedAtProcLists[node][proc].pop_front(); - for (vertex_idx pred : instance->getComputationalDag().parents(node)) { - node_used_at_proc_lists[pred][proc].front().pop_front(); + for (VertexIdx pred : instance_->GetComputationalDag().Parents(node)) { + nodeUsedAtProcLists[pred][proc].front().pop_front(); - if (needed_after[pred]) { + if (neededAfter[pred]) { continue; } // autoevict - if (node_used_at_proc_lists[pred][proc].front().empty()) { - in_mem[proc].erase(pred); - non_evictable[proc].erase(pred); - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(pred); - compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after.push_back(pred); - } else if (node_used_at_proc_lists[pred][proc].front().front().first > superstep) { - non_evictable[proc].erase(pred); + if (nodeUsedAtProcLists[pred][proc].front().empty()) { + inMem[proc].erase(pred); + nonEvictable[proc].erase(pred); + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(pred); + computeStepsForProcSuperstep_[proc][superstep][stepIndex].nodesEvictedAfter_.push_back(pred); + } else if (nodeUsedAtProcLists[pred][proc].front().front().first > superstep) { + nonEvictable[proc].erase(pred); std::pair prio; - if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) { - prio = node_used_at_proc_lists[pred][proc].front().front(); - } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) { - prio = std::make_pair(UINT_MAX - node_last_used_on_proc[pred][proc], static_cast(pred)); - } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) { + if (evictRule == CacheEvictionStrategy::FORESIGHT) { + prio = nodeUsedAtProcLists[pred][proc].front().front(); + } else if (evictRule == CacheEvictionStrategy::LEAST_RECENTLY_USED) { + prio = std::make_pair(UINT_MAX - nodeLastUsedOnProc[pred][proc], static_cast(pred)); + } else if (evictRule == CacheEvictionStrategy::LARGEST_ID) { prio = std::make_pair(static_cast(pred), 0); } - place_in_evictable[pred][proc] = evictable[proc].emplace(prio, pred).first; + placeInEvictable[pred][proc] = evictable[proc].emplace(prio, pred).first; } } } // after compute phase - for (vertex_idx node : non_evictable[proc]) { - if (node_used_at_proc_lists[node][proc].front().empty()) { - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node); - in_mem[proc].erase(node); - nodes_evicted_in_comm[proc][superstep].push_back(node); - if ((instance->getComputationalDag().out_degree(node) == 0 - || needs_blue_at_end.find(node) != needs_blue_at_end.end()) - && !in_slow_mem[node]) { - in_slow_mem[node] = true; - nodes_sent_up[proc][superstep].push_back(node); + for (VertexIdx node : nonEvictable[proc]) { + if (nodeUsedAtProcLists[node][proc].front().empty()) { + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(node); + inMem[proc].erase(node); + nodesEvictedInComm_[proc][superstep].push_back(node); + if ((instance_->GetComputationalDag().OutDegree(node) == 0 + || needsBlueAtEnd_.find(node) != needsBlueAtEnd_.end()) + && !inSlowMem[node]) { + inSlowMem[node] = true; + nodesSentUp_[proc][superstep].push_back(node); } } else { std::pair prio; - if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) { - prio = node_used_at_proc_lists[node][proc].front().front(); - } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) { - prio = std::make_pair(UINT_MAX - node_last_used_on_proc[node][proc], static_cast(node)); - } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) { + if (evictRule == CacheEvictionStrategy::FORESIGHT) { + prio = nodeUsedAtProcLists[node][proc].front().front(); + } else if (evictRule == CacheEvictionStrategy::LEAST_RECENTLY_USED) { + prio = std::make_pair(UINT_MAX - nodeLastUsedOnProc[node][proc], static_cast(node)); + } else if (evictRule == CacheEvictionStrategy::LARGEST_ID) { prio = std::make_pair(static_cast(node), 0); } - place_in_evictable[node][proc] = evictable[proc].emplace(prio, node).first; + placeInEvictable[node][proc] = evictable[proc].emplace(prio, node).first; - if (needs_blue_at_end.find(node) != needs_blue_at_end.end() && !in_slow_mem[node]) { - in_slow_mem[node] = true; - nodes_sent_up[proc][superstep].push_back(node); + if (needsBlueAtEnd_.find(node) != needsBlueAtEnd_.end() && !inSlowMem[node]) { + inSlowMem[node] = true; + nodesSentUp_[proc][superstep].push_back(node); } } } - non_evictable[proc].clear(); + nonEvictable[proc].clear(); } } } -template -void PebblingSchedule::ResetToForesight() { - nodes_evicted_in_comm.clear(); - nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); +template +void PebblingSchedule::ResetToForesight() { + nodesEvictedInComm_.clear(); + nodesEvictedInComm_.resize(instance_->NumberOfProcessors(), std::vector>(numberOfSupersteps_)); - nodes_sent_down.clear(); - nodes_sent_down.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + nodesSentDown_.clear(); + nodesSentDown_.resize(instance_->NumberOfProcessors(), std::vector>(numberOfSupersteps_)); - nodes_sent_up.clear(); - nodes_sent_up.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + nodesSentUp_.clear(); + nodesSentUp_.resize(instance_->NumberOfProcessors(), std::vector>(numberOfSupersteps_)); - SetMemoryMovement(CACHE_EVICTION_STRATEGY::FORESIGHT); + SetMemoryMovement(CacheEvictionStrategy::FORESIGHT); } -template -bool PebblingSchedule::isValid() const { - std::vector mem_used(instance->numberOfProcessors(), 0); - std::vector> in_fast_mem(instance->getComputationalDag().num_vertices(), - std::vector(instance->numberOfProcessors(), false)); - std::vector in_slow_mem(instance->getComputationalDag().num_vertices(), false); +template +bool PebblingSchedule::IsValid() const { + std::vector memUsed(instance_->NumberOfProcessors(), 0); + std::vector> inFastMem(instance_->GetComputationalDag().NumVertices(), + std::vector(instance_->NumberOfProcessors(), false)); + std::vector inSlowMem(instance_->GetComputationalDag().NumVertices(), false); - if (need_to_load_inputs) { - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - if (instance->getComputationalDag().in_degree(node) == 0) { - in_slow_mem[node] = true; + if (needToLoadInputs_) { + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + if (instance_->GetComputationalDag().InDegree(node) == 0) { + inSlowMem[node] = true; } } } - if (!has_red_in_beginning.empty()) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (vertex_idx node : has_red_in_beginning[proc]) { - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); - in_fast_mem[node][proc] = true; + if (!hasRedInBeginning_.empty()) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (VertexIdx node : hasRedInBeginning_[proc]) { + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(node); + inFastMem[node][proc] = true; } } } - for (unsigned step = 0; step < number_of_supersteps; ++step) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { // computation phase - for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { - if (!instance->isCompatible(computeStep.node, proc)) { + for (const auto &computeStep : computeStepsForProcSuperstep_[proc][step]) { + if (!instance_->IsCompatible(computeStep.node_, proc)) { return false; } - for (vertex_idx pred : instance->getComputationalDag().parents(computeStep.node)) { - if (!in_fast_mem[pred][proc]) { + for (VertexIdx pred : instance_->GetComputationalDag().Parents(computeStep.node_)) { + if (!inFastMem[pred][proc]) { return false; } } - if (need_to_load_inputs && instance->getComputationalDag().in_degree(computeStep.node) == 0) { + if (needToLoadInputs_ && instance_->GetComputationalDag().InDegree(computeStep.node_) == 0) { return false; } - if (!in_fast_mem[computeStep.node][proc]) { - in_fast_mem[computeStep.node][proc] = true; - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(computeStep.node); + if (!inFastMem[computeStep.node_][proc]) { + inFastMem[computeStep.node_][proc] = true; + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(computeStep.node_); } - if (mem_used[proc] > instance->getArchitecture().memoryBound(proc)) { + if (memUsed[proc] > instance_->GetArchitecture().MemoryBound(proc)) { return false; } - for (vertex_idx to_remove : computeStep.nodes_evicted_after) { - if (!in_fast_mem[to_remove][proc]) { + for (VertexIdx toRemove : computeStep.nodesEvictedAfter_) { + if (!inFastMem[toRemove][proc]) { return false; } - in_fast_mem[to_remove][proc] = false; - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_remove); + inFastMem[toRemove][proc] = false; + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(toRemove); } } // communication phase - sendup and eviction - for (vertex_idx node : nodes_sent_up[proc][step]) { - if (!in_fast_mem[node][proc]) { + for (VertexIdx node : nodesSentUp_[proc][step]) { + if (!inFastMem[node][proc]) { return false; } - in_slow_mem[node] = true; + inSlowMem[node] = true; } - for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { - if (!in_fast_mem[node][proc]) { + for (VertexIdx node : nodesEvictedInComm_[proc][step]) { + if (!inFastMem[node][proc]) { return false; } - in_fast_mem[node][proc] = false; - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node); + inFastMem[node][proc] = false; + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(node); } } // communication phase - senddown - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (vertex_idx node : nodes_sent_down[proc][step]) { - if (!in_slow_mem[node]) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (VertexIdx node : nodesSentDown_[proc][step]) { + if (!inSlowMem[node]) { return false; } - if (!in_fast_mem[node][proc]) { - in_fast_mem[node][proc] = true; - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); + if (!inFastMem[node][proc]) { + inFastMem[node][proc] = true; + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(node); } } } - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (mem_used[proc] > instance->getArchitecture().memoryBound(proc)) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + if (memUsed[proc] > instance_->GetArchitecture().MemoryBound(proc)) { return false; } } } - if (needs_blue_at_end.empty()) { - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - if (instance->getComputationalDag().out_degree(node) == 0 && !in_slow_mem[node]) { + if (needsBlueAtEnd_.empty()) { + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + if (instance_->GetComputationalDag().OutDegree(node) == 0 && !inSlowMem[node]) { return false; } } } else { - for (vertex_idx node : needs_blue_at_end) { - if (!in_slow_mem[node]) { + for (VertexIdx node : needsBlueAtEnd_) { + if (!inSlowMem[node]) { return false; } } @@ -1263,67 +1260,67 @@ bool PebblingSchedule::isValid() const { return true; } -template -std::vector> PebblingSchedule::minimumMemoryRequiredPerNodeType( - const BspInstance &instance, const std::set &external_sources) { - std::vector> max_needed(instance.getComputationalDag().num_vertex_types(), 0); - for (vertex_idx_t node = 0; node < instance.getComputationalDag().num_vertices(); ++node) { - if (external_sources.find(node) != external_sources.end()) { +template +std::vector> PebblingSchedule::MinimumMemoryRequiredPerNodeType(const BspInstance &instance, + const std::set &externalSources) { + std::vector> maxNeeded(instance.GetComputationalDag().NumVertexTypes(), 0); + for (VertexIdx node = 0; node < instance.GetComputationalDag().NumVertices(); ++node) { + if (externalSources.find(node) != externalSources.end()) { continue; } - v_memw_t needed = instance.getComputationalDag().vertex_mem_weight(node); - const v_type_t type = instance.getComputationalDag().vertex_type(node); - for (vertex_idx_t pred : instance.getComputationalDag().parents(node)) { - needed += instance.getComputationalDag().vertex_mem_weight(pred); + VMemwT needed = instance.GetComputationalDag().VertexMemWeight(node); + const VTypeT type = instance.GetComputationalDag().VertexType(node); + for (VertexIdx pred : instance.GetComputationalDag().Parents(node)) { + needed += instance.GetComputationalDag().VertexMemWeight(pred); } - if (needed > max_needed[type]) { - max_needed[type] = needed; + if (needed > maxNeeded[type]) { + maxNeeded[type] = needed; } } - return max_needed; + return maxNeeded; } -template -std::vector>>> PebblingSchedule::computeTopOrdersDFS( - const BspSchedule &schedule) const { - size_t n = schedule.getInstance().getComputationalDag().num_vertices(); - unsigned num_procs = schedule.getInstance().numberOfProcessors(); - unsigned num_supsteps = schedule.numberOfSupersteps(); +template +std::vector>>> PebblingSchedule::ComputeTopOrdersDfs( + const BspSchedule &schedule) const { + size_t n = schedule.GetInstance().GetComputationalDag().NumVertices(); + unsigned numProcs = schedule.GetInstance().NumberOfProcessors(); + unsigned numSupsteps = schedule.NumberOfSupersteps(); - std::vector>> top_orders(num_procs, std::vector>(num_supsteps)); + std::vector>> topOrders(numProcs, std::vector>(numSupsteps)); - std::vector>> Q(num_procs, std::vector>(num_supsteps)); - std::vector>> nodesUpdated(num_procs, std::vector>(num_supsteps)); - std::vector nr_pred(n); - std::vector pred_done(n, 0); - for (vertex_idx node = 0; node < n; ++node) { + std::vector>> queue(numProcs, std::vector>(numSupsteps)); + std::vector>> nodesUpdated(numProcs, std::vector>(numSupsteps)); + std::vector nrPred(n); + std::vector predDone(n, 0); + for (VertexIdx node = 0; node < n; ++node) { unsigned predecessors = 0; - for (vertex_idx pred : schedule.getInstance().getComputationalDag().parents(node)) { - if (external_sources.find(pred) == external_sources.end() - && schedule.assignedProcessor(node) == schedule.assignedProcessor(pred) - && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(pred)) { + for (VertexIdx pred : schedule.GetInstance().GetComputationalDag().Parents(node)) { + if (externalSources_.find(pred) == externalSources_.end() + && schedule.AssignedProcessor(node) == schedule.AssignedProcessor(pred) + && schedule.AssignedSuperstep(node) == schedule.AssignedSuperstep(pred)) { ++predecessors; } } - nr_pred[node] = predecessors; - if (predecessors == 0 && external_sources.find(node) == external_sources.end()) { - Q[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)].push_back(node); + nrPred[node] = predecessors; + if (predecessors == 0 && externalSources_.find(node) == externalSources_.end()) { + queue[schedule.AssignedProcessor(node)][schedule.AssignedSuperstep(node)].push_back(node); } } - for (unsigned proc = 0; proc < num_procs; ++proc) { - for (unsigned step = 0; step < num_supsteps; ++step) { - while (!Q[proc][step].empty()) { - vertex_idx node = Q[proc][step].front(); - Q[proc][step].pop_front(); - top_orders[proc][step].push_back(node); - for (vertex_idx succ : schedule.getInstance().getComputationalDag().children(node)) { - if (schedule.assignedProcessor(node) == schedule.assignedProcessor(succ) - && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(succ)) { - ++pred_done[succ]; - if (pred_done[succ] == nr_pred[succ]) { - Q[proc][step].push_front(succ); + for (unsigned proc = 0; proc < numProcs; ++proc) { + for (unsigned step = 0; step < numSupsteps; ++step) { + while (!queue[proc][step].empty()) { + VertexIdx node = queue[proc][step].front(); + queue[proc][step].pop_front(); + topOrders[proc][step].push_back(node); + for (VertexIdx succ : schedule.GetInstance().GetComputationalDag().Children(node)) { + if (schedule.AssignedProcessor(node) == schedule.AssignedProcessor(succ) + && schedule.AssignedSuperstep(node) == schedule.AssignedSuperstep(succ)) { + ++predDone[succ]; + if (predDone[succ] == nrPred[succ]) { + queue[proc][step].push_front(succ); } } } @@ -1331,48 +1328,48 @@ std::vector>>> PebblingSchedule -void PebblingSchedule::getDataForMultiprocessorPebbling( - std::vector>> &computeSteps, - std::vector>> &sendUpSteps, - std::vector>> &sendDownSteps, - std::vector>> &nodesEvictedAfterStep) const { +template +void PebblingSchedule::GetDataForMultiprocessorPebbling( + std::vector>> &computeSteps, + std::vector>> &sendUpSteps, + std::vector>> &sendDownSteps, + std::vector>> &nodesEvictedAfterStep) const { computeSteps.clear(); - computeSteps.resize(instance->numberOfProcessors()); + computeSteps.resize(instance_->NumberOfProcessors()); sendUpSteps.clear(); - sendUpSteps.resize(instance->numberOfProcessors()); + sendUpSteps.resize(instance_->NumberOfProcessors()); sendDownSteps.clear(); - sendDownSteps.resize(instance->numberOfProcessors()); + sendDownSteps.resize(instance_->NumberOfProcessors()); nodesEvictedAfterStep.clear(); - nodesEvictedAfterStep.resize(instance->numberOfProcessors()); + nodesEvictedAfterStep.resize(instance_->NumberOfProcessors()); - std::vector mem_used(instance->numberOfProcessors(), 0); - std::vector> in_mem(instance->numberOfProcessors()); - if (!has_red_in_beginning.empty()) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (vertex_idx node : has_red_in_beginning[proc]) { - in_mem[proc].insert(node); - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); + std::vector memUsed(instance_->NumberOfProcessors(), 0); + std::vector> inMem(instance_->NumberOfProcessors()); + if (!hasRedInBeginning_.empty()) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (VertexIdx node : hasRedInBeginning_[proc]) { + inMem[proc].insert(node); + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(node); } } } unsigned step = 0; - for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { - std::vector step_on_proc(instance->numberOfProcessors(), step); - bool any_compute = false; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (!compute_steps_for_proc_superstep[proc][superstep].empty()) { - any_compute = true; + for (unsigned superstep = 0; superstep < numberOfSupersteps_; ++superstep) { + std::vector stepOnProc(instance_->NumberOfProcessors(), step); + bool anyCompute = false; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + if (!computeStepsForProcSuperstep_[proc][superstep].empty()) { + anyCompute = true; } } - if (any_compute) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (anyCompute) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(); sendDownSteps[proc].emplace_back(); @@ -1380,51 +1377,51 @@ void PebblingSchedule::getDataForMultiprocessorPebbling( } } - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - std::vector evict_list; - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { - vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - if (mem_used[proc] + instance->getComputationalDag().vertex_mem_weight(node) - > instance->getArchitecture().memoryBound(proc)) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + std::vector evictList; + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][superstep].size(); ++stepIndex) { + VertexIdx node = computeStepsForProcSuperstep_[proc][superstep][stepIndex].node_; + if (memUsed[proc] + instance_->GetComputationalDag().VertexMemWeight(node) + > instance_->GetArchitecture().MemoryBound(proc)) { // open new step - nodesEvictedAfterStep[proc][step_on_proc[proc]] = evict_list; - ++step_on_proc[proc]; - for (vertex_idx to_evict : evict_list) { - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict); + nodesEvictedAfterStep[proc][stepOnProc[proc]] = evictList; + ++stepOnProc[proc]; + for (VertexIdx toEvict : evictList) { + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(toEvict); } - evict_list.clear(); + evictList.clear(); computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(); sendDownSteps[proc].emplace_back(); nodesEvictedAfterStep[proc].emplace_back(); } - computeSteps[proc][step_on_proc[proc]].emplace_back(node); - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); - for (vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after) { - evict_list.emplace_back(to_evict); + computeSteps[proc][stepOnProc[proc]].emplace_back(node); + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(node); + for (VertexIdx toEvict : computeStepsForProcSuperstep_[proc][superstep][stepIndex].nodesEvictedAfter_) { + evictList.emplace_back(toEvict); } } - if (!evict_list.empty()) { - nodesEvictedAfterStep[proc][step_on_proc[proc]] = evict_list; - for (vertex_idx to_evict : evict_list) { - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict); + if (!evictList.empty()) { + nodesEvictedAfterStep[proc][stepOnProc[proc]] = evictList; + for (VertexIdx toEvict : evictList) { + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(toEvict); } } } - if (any_compute) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - ++step_on_proc[proc]; + if (anyCompute) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + ++stepOnProc[proc]; } } - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - step = std::max(step, step_on_proc[proc]); + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + step = std::max(step, stepOnProc[proc]); } - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (; step_on_proc[proc] < step; ++step_on_proc[proc]) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (; stepOnProc[proc] < step; ++stepOnProc[proc]) { computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(); sendDownSteps[proc].emplace_back(); @@ -1432,455 +1429,451 @@ void PebblingSchedule::getDataForMultiprocessorPebbling( } } - bool any_send_up = false; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (!nodes_sent_up[proc][superstep].empty() || !nodes_evicted_in_comm[proc][superstep].empty()) { - any_send_up = true; + bool anySendUp = false; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + if (!nodesSentUp_[proc][superstep].empty() || !nodesEvictedInComm_[proc][superstep].empty()) { + anySendUp = true; } } - if (any_send_up) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (anySendUp) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { computeSteps[proc].emplace_back(); - sendUpSteps[proc].emplace_back(nodes_sent_up[proc][superstep]); + sendUpSteps[proc].emplace_back(nodesSentUp_[proc][superstep]); sendDownSteps[proc].emplace_back(); - nodesEvictedAfterStep[proc].emplace_back(nodes_evicted_in_comm[proc][superstep]); - for (vertex_idx to_evict : nodes_evicted_in_comm[proc][superstep]) { - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict); + nodesEvictedAfterStep[proc].emplace_back(nodesEvictedInComm_[proc][superstep]); + for (VertexIdx toEvict : nodesEvictedInComm_[proc][superstep]) { + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(toEvict); } - ++step_on_proc[proc]; + ++stepOnProc[proc]; } ++step; } - bool any_send_down = false; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (!nodes_sent_down[proc][superstep].empty()) { - any_send_down = true; + bool anySendDown = false; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + if (!nodesSentDown_[proc][superstep].empty()) { + anySendDown = true; } } - if (any_send_down) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (anySendDown) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(); - sendDownSteps[proc].emplace_back(nodes_sent_down[proc][superstep]); - for (vertex_idx send_down : nodes_sent_down[proc][superstep]) { - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(send_down); + sendDownSteps[proc].emplace_back(nodesSentDown_[proc][superstep]); + for (VertexIdx sendDown : nodesSentDown_[proc][superstep]) { + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(sendDown); } nodesEvictedAfterStep[proc].emplace_back(); - ++step_on_proc[proc]; + ++stepOnProc[proc]; } ++step; } } } -template -std::vector>> PebblingSchedule::getMemContentAtEnd() const { - std::vector> mem_content(instance->numberOfProcessors()); - if (!has_red_in_beginning.empty()) { - mem_content = has_red_in_beginning; +template +std::vector>> PebblingSchedule::GetMemContentAtEnd() const { + std::vector>> memContent(instance_->NumberOfProcessors()); + if (!hasRedInBeginning_.empty()) { + memContent = hasRedInBeginning_; } - for (unsigned step = 0; step < number_of_supersteps; ++step) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { // computation phase - for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { - mem_content[proc].insert(computeStep.node); - for (vertex_idx to_remove : computeStep.nodes_evicted_after) { - mem_content[proc].erase(to_remove); + for (const auto &computeStep : computeStepsForProcSuperstep_[proc][step]) { + memContent[proc].insert(computeStep.node_); + for (VertexIdx toRemove : computeStep.nodesEvictedAfter_) { + memContent[proc].erase(toRemove); } } // communication phase - eviction - for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { - mem_content[proc].erase(node); + for (VertexIdx node : nodesEvictedInComm_[proc][step]) { + memContent[proc].erase(node); } // communication phase - senddown - for (vertex_idx node : nodes_sent_down[proc][step]) { - mem_content[proc].insert(node); + for (VertexIdx node : nodesSentDown_[proc][step]) { + memContent[proc].insert(node); } } } - return mem_content; + return memContent; } -template -void PebblingSchedule::removeEvictStepsFromEnd() { - std::vector mem_used(instance->numberOfProcessors(), 0); - std::vector bottleneck(instance->numberOfProcessors(), 0); - std::vector> fast_mem_end = getMemContentAtEnd(); - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (vertex_idx node : fast_mem_end[proc]) { - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); +template +void PebblingSchedule::RemoveEvictStepsFromEnd() { + std::vector memUsed(instance_->NumberOfProcessors(), 0); + std::vector bottleneck(instance_->NumberOfProcessors(), 0); + std::vector>> fastMemEnd = GetMemContentAtEnd(); + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (VertexIdx node : fastMemEnd[proc]) { + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(node); } - bottleneck[proc] = instance->getArchitecture().memoryBound(proc) - mem_used[proc]; + bottleneck[proc] = instance_->GetArchitecture().MemoryBound(proc) - memUsed[proc]; } - for (unsigned step = number_of_supersteps; step > 0;) { + for (unsigned step = numberOfSupersteps_; step > 0;) { --step; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { // communication phase - senddown - for (vertex_idx node : nodes_sent_down[proc][step]) { - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node); + for (VertexIdx node : nodesSentDown_[proc][step]) { + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(node); } // communication phase - eviction - std::vector remaining; - for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); - if (instance->getComputationalDag().vertex_mem_weight(node) <= bottleneck[proc] - && fast_mem_end[proc].find(node) == fast_mem_end[proc].end()) { - fast_mem_end[proc].insert(node); - bottleneck[proc] -= instance->getComputationalDag().vertex_mem_weight(node); + std::vector remaining; + for (VertexIdx node : nodesEvictedInComm_[proc][step]) { + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(node); + if (instance_->GetComputationalDag().VertexMemWeight(node) <= bottleneck[proc] + && fastMemEnd[proc].find(node) == fastMemEnd[proc].end()) { + fastMemEnd[proc].insert(node); + bottleneck[proc] -= instance_->GetComputationalDag().VertexMemWeight(node); } else { remaining.push_back(node); } } - nodes_evicted_in_comm[proc][step] = remaining; - bottleneck[proc] = std::min(bottleneck[proc], instance->getArchitecture().memoryBound(proc) - mem_used[proc]); + nodesEvictedInComm_[proc][step] = remaining; + bottleneck[proc] = std::min(bottleneck[proc], instance_->GetArchitecture().MemoryBound(proc) - memUsed[proc]); // computation phase - for (unsigned stepIndex = static_cast(compute_steps_for_proc_superstep[proc][step].size()); stepIndex > 0;) { + for (unsigned stepIndex = static_cast(computeStepsForProcSuperstep_[proc][step].size()); stepIndex > 0;) { --stepIndex; - auto &computeStep = compute_steps_for_proc_superstep[proc][step][stepIndex]; - - std::vector remaining_2; - for (vertex_idx to_remove : computeStep.nodes_evicted_after) { - mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(to_remove); - if (instance->getComputationalDag().vertex_mem_weight(to_remove) <= bottleneck[proc] - && fast_mem_end[proc].find(to_remove) == fast_mem_end[proc].end()) { - fast_mem_end[proc].insert(to_remove); - bottleneck[proc] -= instance->getComputationalDag().vertex_mem_weight(to_remove); + auto &computeStep = computeStepsForProcSuperstep_[proc][step][stepIndex]; + + std::vector remaining2; + for (VertexIdx toRemove : computeStep.nodesEvictedAfter_) { + memUsed[proc] += instance_->GetComputationalDag().VertexMemWeight(toRemove); + if (instance_->GetComputationalDag().VertexMemWeight(toRemove) <= bottleneck[proc] + && fastMemEnd[proc].find(toRemove) == fastMemEnd[proc].end()) { + fastMemEnd[proc].insert(toRemove); + bottleneck[proc] -= instance_->GetComputationalDag().VertexMemWeight(toRemove); } else { - remaining_2.push_back(to_remove); + remaining2.push_back(toRemove); } } - computeStep.nodes_evicted_after = remaining_2; - bottleneck[proc] = std::min(bottleneck[proc], instance->getArchitecture().memoryBound(proc) - mem_used[proc]); + computeStep.nodesEvictedAfter_ = remaining2; + bottleneck[proc] = std::min(bottleneck[proc], instance_->GetArchitecture().MemoryBound(proc) - memUsed[proc]); - mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(computeStep.node); + memUsed[proc] -= instance_->GetComputationalDag().VertexMemWeight(computeStep.node_); } } } - if (!isValid()) { + if (!IsValid()) { std::cout << "ERROR: eviction removal process created an invalid schedule." << std::endl; } } -template -void PebblingSchedule::CreateFromPartialPebblings( - const BspInstance &bsp_instance, - const std::vector> &pebblings, - const std::vector> &processors_to_parts, - const std::vector> &original_node_id, - const std::vector> &original_proc_id, - const std::vector>> &has_reds_in_beginning) { - instance = &bsp_instance; +template +void PebblingSchedule::CreateFromPartialPebblings(const BspInstance &bspInstance, + const std::vector> &pebblings, + const std::vector> &processorsToParts, + const std::vector> &originalNodeId, + const std::vector> &originalProcId, + const std::vector>> &hasRedsInBeginning) { + instance_ = &bspInstance; - unsigned nr_parts = static_cast(processors_to_parts.size()); + unsigned nrParts = static_cast(processorsToParts.size()); - std::vector> in_mem(instance->numberOfProcessors()); - std::vector> force_evicts; + std::vector> inMem(instance_->NumberOfProcessors()); + std::vector> forceEvicts; - compute_steps_for_proc_superstep.clear(); - nodes_sent_up.clear(); - nodes_sent_down.clear(); - nodes_evicted_in_comm.clear(); - compute_steps_for_proc_superstep.resize(instance->numberOfProcessors()); - nodes_sent_up.resize(instance->numberOfProcessors()); - nodes_sent_down.resize(instance->numberOfProcessors()); - nodes_evicted_in_comm.resize(instance->numberOfProcessors()); + computeStepsForProcSuperstep_.clear(); + nodesSentUp_.clear(); + nodesSentDown_.clear(); + nodesEvictedInComm_.clear(); + computeStepsForProcSuperstep_.resize(instance_->NumberOfProcessors()); + nodesSentUp_.resize(instance_->NumberOfProcessors()); + nodesSentDown_.resize(instance_->NumberOfProcessors()); + nodesEvictedInComm_.resize(instance_->NumberOfProcessors()); - std::vector supstep_idx(instance->numberOfProcessors(), 0); + std::vector supstepIdx(instance_->NumberOfProcessors(), 0); - std::vector gets_blue_in_superstep(instance->numberOfVertices(), UINT_MAX); - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - if (instance->getComputationalDag().in_degree(node) == 0) { - gets_blue_in_superstep[node] = 0; + std::vector getsBlueInSuperstep(instance_->NumberOfVertices(), UINT_MAX); + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + if (instance_->GetComputationalDag().InDegree(node) == 0) { + getsBlueInSuperstep[node] = 0; } } - for (unsigned part = 0; part < nr_parts; ++part) { - unsigned starting_step_index = 0; + for (unsigned part = 0; part < nrParts; ++part) { + unsigned startingStepIndex = 0; // find dependencies on previous subschedules - for (vertex_idx node = 0; node < pebblings[part].instance->numberOfVertices(); ++node) { - if (pebblings[part].instance->getComputationalDag().in_degree(node) == 0) { - starting_step_index = std::max(starting_step_index, gets_blue_in_superstep[original_node_id[part].at(node)]); + for (VertexIdx node = 0; node < pebblings[part].instance_->NumberOfVertices(); ++node) { + if (pebblings[part].instance_->GetComputationalDag().InDegree(node) == 0) { + startingStepIndex = std::max(startingStepIndex, getsBlueInSuperstep[originalNodeId[part].at(node)]); } } // sync starting points for the subset of processors - for (unsigned proc : processors_to_parts[part]) { - starting_step_index = std::max(starting_step_index, supstep_idx[proc]); + for (unsigned proc : processorsToParts[part]) { + startingStepIndex = std::max(startingStepIndex, supstepIdx[proc]); } - for (unsigned proc : processors_to_parts[part]) { - while (supstep_idx[proc] < starting_step_index) { - compute_steps_for_proc_superstep[proc].emplace_back(); - nodes_sent_up[proc].emplace_back(); - nodes_sent_down[proc].emplace_back(); - nodes_evicted_in_comm[proc].emplace_back(); - ++supstep_idx[proc]; + for (unsigned proc : processorsToParts[part]) { + while (supstepIdx[proc] < startingStepIndex) { + computeStepsForProcSuperstep_[proc].emplace_back(); + nodesSentUp_[proc].emplace_back(); + nodesSentDown_[proc].emplace_back(); + nodesEvictedInComm_[proc].emplace_back(); + ++supstepIdx[proc]; } } // check and update according to initial states of red pebbles - for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) { - unsigned proc_id = original_proc_id[part].at(proc); - std::set needed_in_red, add_before, remove_before; - for (vertex_idx node : has_reds_in_beginning[part][proc]) { - vertex_idx node_id = original_node_id[part].at(node); - needed_in_red.insert(node_id); - if (in_mem[proc_id].find(node_id) == in_mem[proc_id].end()) { - add_before.insert(node_id); + for (unsigned proc = 0; proc < processorsToParts[part].size(); ++proc) { + unsigned procId = originalProcId[part].at(proc); + std::set neededInRed, addBefore, removeBefore; + for (VertexIdx node : hasRedsInBeginning[part][proc]) { + VertexIdx nodeId = originalNodeId[part].at(node); + neededInRed.insert(nodeId); + if (inMem[procId].find(nodeId) == inMem[procId].end()) { + addBefore.insert(nodeId); } } - for (vertex_idx node : in_mem[proc_id]) { - if (needed_in_red.find(node) == needed_in_red.end()) { - remove_before.insert(node); + for (VertexIdx node : inMem[procId]) { + if (neededInRed.find(node) == neededInRed.end()) { + removeBefore.insert(node); } } - if ((!add_before.empty() || !remove_before.empty()) && supstep_idx[proc_id] == 0) { + if ((!addBefore.empty() || !removeBefore.empty()) && supstepIdx[procId] == 0) { // this code is added just in case - this shouldn't happen in normal schedules - compute_steps_for_proc_superstep[proc_id].emplace_back(); - nodes_sent_up[proc_id].emplace_back(); - nodes_sent_down[proc_id].emplace_back(); - nodes_evicted_in_comm[proc_id].emplace_back(); - ++supstep_idx[proc_id]; + computeStepsForProcSuperstep_[procId].emplace_back(); + nodesSentUp_[procId].emplace_back(); + nodesSentDown_[procId].emplace_back(); + nodesEvictedInComm_[procId].emplace_back(); + ++supstepIdx[procId]; } - for (vertex_idx node : add_before) { - in_mem[proc_id].insert(node); - nodes_sent_down[proc_id].back().push_back(node); + for (VertexIdx node : addBefore) { + inMem[procId].insert(node); + nodesSentDown_[procId].back().push_back(node); } - for (vertex_idx node : remove_before) { - in_mem[proc_id].erase(node); - nodes_evicted_in_comm[proc_id].back().push_back(node); - force_evicts.push_back(std::make_tuple(node, proc_id, nodes_evicted_in_comm[proc_id].size() - 1)); + for (VertexIdx node : removeBefore) { + inMem[procId].erase(node); + nodesEvictedInComm_[procId].back().push_back(node); + forceEvicts.push_back(std::make_tuple(node, procId, nodesEvictedInComm_[procId].size() - 1)); } } - for (unsigned supstep = 0; supstep < pebblings[part].numberOfSupersteps(); ++supstep) { - for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) { - unsigned proc_id = original_proc_id[part].at(proc); - compute_steps_for_proc_superstep[proc_id].emplace_back(); - nodes_sent_up[proc_id].emplace_back(); - nodes_sent_down[proc_id].emplace_back(); - nodes_evicted_in_comm[proc_id].emplace_back(); + for (unsigned supstep = 0; supstep < pebblings[part].NumberOfSupersteps(); ++supstep) { + for (unsigned proc = 0; proc < processorsToParts[part].size(); ++proc) { + unsigned procId = originalProcId[part].at(proc); + computeStepsForProcSuperstep_[procId].emplace_back(); + nodesSentUp_[procId].emplace_back(); + nodesSentDown_[procId].emplace_back(); + nodesEvictedInComm_[procId].emplace_back(); // copy schedule with translated indeces - for (const compute_step &computeStep : pebblings[part].GetComputeStepsForProcSuperstep(proc, supstep)) { - compute_steps_for_proc_superstep[proc_id].back().emplace_back(); - compute_steps_for_proc_superstep[proc_id].back().back().node = original_node_id[part].at(computeStep.node); - in_mem[proc_id].insert(original_node_id[part].at(computeStep.node)); - - for (vertex_idx local_id : computeStep.nodes_evicted_after) { - compute_steps_for_proc_superstep[proc_id].back().back().nodes_evicted_after.push_back( - original_node_id[part].at(local_id)); - in_mem[proc_id].erase(original_node_id[part].at(local_id)); + for (const ComputeStep &computeStep : pebblings[part].GetComputeStepsForProcSuperstep(proc, supstep)) { + computeStepsForProcSuperstep_[procId].back().emplace_back(); + computeStepsForProcSuperstep_[procId].back().back().node_ = originalNodeId[part].at(computeStep.node_); + inMem[procId].insert(originalNodeId[part].at(computeStep.node_)); + + for (VertexIdx localId : computeStep.nodesEvictedAfter_) { + computeStepsForProcSuperstep_[procId].back().back().nodesEvictedAfter_.push_back( + originalNodeId[part].at(localId)); + inMem[procId].erase(originalNodeId[part].at(localId)); } } - for (vertex_idx node : pebblings[part].GetNodesSentUp(proc, supstep)) { - vertex_idx node_id = original_node_id[part].at(node); - nodes_sent_up[proc_id].back().push_back(node_id); - gets_blue_in_superstep[node_id] = std::min(gets_blue_in_superstep[node_id], supstep_idx[proc_id]); + for (VertexIdx node : pebblings[part].GetNodesSentUp(proc, supstep)) { + VertexIdx nodeId = originalNodeId[part].at(node); + nodesSentUp_[procId].back().push_back(nodeId); + getsBlueInSuperstep[nodeId] = std::min(getsBlueInSuperstep[nodeId], supstepIdx[procId]); } - for (vertex_idx node : pebblings[part].GetNodesEvictedInComm(proc, supstep)) { - nodes_evicted_in_comm[proc_id].back().push_back(original_node_id[part].at(node)); - in_mem[proc_id].erase(original_node_id[part].at(node)); + for (VertexIdx node : pebblings[part].GetNodesEvictedInComm(proc, supstep)) { + nodesEvictedInComm_[procId].back().push_back(originalNodeId[part].at(node)); + inMem[procId].erase(originalNodeId[part].at(node)); } - for (vertex_idx node : pebblings[part].GetNodesSentDown(proc, supstep)) { - nodes_sent_down[proc_id].back().push_back(original_node_id[part].at(node)); - in_mem[proc_id].insert(original_node_id[part].at(node)); + for (VertexIdx node : pebblings[part].GetNodesSentDown(proc, supstep)) { + nodesSentDown_[procId].back().push_back(originalNodeId[part].at(node)); + inMem[procId].insert(originalNodeId[part].at(node)); } - ++supstep_idx[proc_id]; + ++supstepIdx[procId]; } } } // padding supersteps in the end - unsigned max_step_index = 0; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - max_step_index = std::max(max_step_index, supstep_idx[proc]); + unsigned maxStepIndex = 0; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + maxStepIndex = std::max(maxStepIndex, supstepIdx[proc]); } - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - while (supstep_idx[proc] < max_step_index) { - compute_steps_for_proc_superstep[proc].emplace_back(); - nodes_sent_up[proc].emplace_back(); - nodes_sent_down[proc].emplace_back(); - nodes_evicted_in_comm[proc].emplace_back(); - ++supstep_idx[proc]; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + while (supstepIdx[proc] < maxStepIndex) { + computeStepsForProcSuperstep_[proc].emplace_back(); + nodesSentUp_[proc].emplace_back(); + nodesSentDown_[proc].emplace_back(); + nodesEvictedInComm_[proc].emplace_back(); + ++supstepIdx[proc]; } } - number_of_supersteps = max_step_index; - need_to_load_inputs = true; + numberOfSupersteps_ = maxStepIndex; + needToLoadInputs_ = true; - FixForceEvicts(force_evicts); + FixForceEvicts(forceEvicts); TryToMergeSupersteps(); } -template -void PebblingSchedule::FixForceEvicts( - const std::vector> force_evict_node_proc_step) { +template +void PebblingSchedule::FixForceEvicts(const std::vector> &forceEvictNodeProcStep) { // Some values were evicted only because they weren't present in the next part - see if we can undo those evictions - for (auto force_evict : force_evict_node_proc_step) { - vertex_idx node = std::get<0>(force_evict); - unsigned proc = std::get<1>(force_evict); - unsigned superstep = std::get<2>(force_evict); + for (auto forceEvict : forceEvictNodeProcStep) { + VertexIdx node = std::get<0>(forceEvict); + unsigned proc = std::get<1>(forceEvict); + unsigned superstep = std::get<2>(forceEvict); - bool next_in_comp = false; - bool next_in_comm = false; + bool nextInComp = false; + bool nextInComm = false; std::pair where; - for (unsigned find_supstep = superstep + 1; find_supstep < numberOfSupersteps(); ++find_supstep) { - for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][find_supstep].size(); ++stepIndex) { - if (compute_steps_for_proc_superstep[proc][find_supstep][stepIndex].node == node) { - next_in_comp = true; - where = std::make_pair(find_supstep, stepIndex); + for (unsigned findSupstep = superstep + 1; findSupstep < NumberOfSupersteps(); ++findSupstep) { + for (unsigned stepIndex = 0; stepIndex < computeStepsForProcSuperstep_[proc][findSupstep].size(); ++stepIndex) { + if (computeStepsForProcSuperstep_[proc][findSupstep][stepIndex].node_ == node) { + nextInComp = true; + where = std::make_pair(findSupstep, stepIndex); break; } } - if (next_in_comp) { + if (nextInComp) { break; } - for (vertex_idx send_down : nodes_sent_down[proc][find_supstep]) { - if (send_down == node) { - next_in_comm = true; - where = std::make_pair(find_supstep, 0); + for (VertexIdx sendDown : nodesSentDown_[proc][findSupstep]) { + if (sendDown == node) { + nextInComm = true; + where = std::make_pair(findSupstep, 0); break; } } - if (next_in_comm) { + if (nextInComm) { break; } } // check new schedule for validity - if (!next_in_comp && !next_in_comm) { + if (!nextInComp && !nextInComm) { continue; } - PebblingSchedule test_schedule = *this; - for (auto itr = test_schedule.nodes_evicted_in_comm[proc][superstep].begin(); - itr != test_schedule.nodes_evicted_in_comm[proc][superstep].end(); + PebblingSchedule testSchedule = *this; + for (auto itr = testSchedule.nodesEvictedInComm_[proc][superstep].begin(); + itr != testSchedule.nodesEvictedInComm_[proc][superstep].end(); ++itr) { if (*itr == node) { - test_schedule.nodes_evicted_in_comm[proc][superstep].erase(itr); + testSchedule.nodesEvictedInComm_[proc][superstep].erase(itr); break; } } - if (next_in_comp) { - for (auto itr = test_schedule.compute_steps_for_proc_superstep[proc][where.first].begin(); - itr != test_schedule.compute_steps_for_proc_superstep[proc][where.first].end(); + if (nextInComp) { + for (auto itr = testSchedule.computeStepsForProcSuperstep_[proc][where.first].begin(); + itr != testSchedule.computeStepsForProcSuperstep_[proc][where.first].end(); ++itr) { - if (itr->node == node) { + if (itr->node_ == node) { if (where.second > 0) { - auto previous_step = itr; - --previous_step; - for (vertex_idx to_evict : itr->nodes_evicted_after) { - previous_step->nodes_evicted_after.push_back(to_evict); + auto previousStep = itr; + --previousStep; + for (VertexIdx toEvict : itr->nodesEvictedAfter_) { + previousStep->nodesEvictedAfter_.push_back(toEvict); } } else { - for (vertex_idx to_evict : itr->nodes_evicted_after) { - test_schedule.nodes_evicted_in_comm[proc][where.first - 1].push_back(to_evict); + for (VertexIdx toEvict : itr->nodesEvictedAfter_) { + testSchedule.nodesEvictedInComm_[proc][where.first - 1].push_back(toEvict); } } - test_schedule.compute_steps_for_proc_superstep[proc][where.first].erase(itr); + testSchedule.computeStepsForProcSuperstep_[proc][where.first].erase(itr); break; } } - if (test_schedule.isValid()) { - nodes_evicted_in_comm[proc][superstep] = test_schedule.nodes_evicted_in_comm[proc][superstep]; - compute_steps_for_proc_superstep[proc][where.first] - = test_schedule.compute_steps_for_proc_superstep[proc][where.first]; - nodes_evicted_in_comm[proc][where.first - 1] = test_schedule.nodes_evicted_in_comm[proc][where.first - 1]; + if (testSchedule.IsValid()) { + nodesEvictedInComm_[proc][superstep] = testSchedule.nodesEvictedInComm_[proc][superstep]; + computeStepsForProcSuperstep_[proc][where.first] = testSchedule.computeStepsForProcSuperstep_[proc][where.first]; + nodesEvictedInComm_[proc][where.first - 1] = testSchedule.nodesEvictedInComm_[proc][where.first - 1]; } - } else if (next_in_comm) { - for (auto itr = test_schedule.nodes_sent_down[proc][where.first].begin(); - itr != test_schedule.nodes_sent_down[proc][where.first].end(); + } else if (nextInComm) { + for (auto itr = testSchedule.nodesSentDown_[proc][where.first].begin(); + itr != testSchedule.nodesSentDown_[proc][where.first].end(); ++itr) { if (*itr == node) { - test_schedule.nodes_sent_down[proc][where.first].erase(itr); + testSchedule.nodesSentDown_[proc][where.first].erase(itr); break; } } - if (test_schedule.isValid()) { - nodes_evicted_in_comm[proc][superstep] = test_schedule.nodes_evicted_in_comm[proc][superstep]; - nodes_sent_down[proc][where.first] = test_schedule.nodes_sent_down[proc][where.first]; + if (testSchedule.IsValid()) { + nodesEvictedInComm_[proc][superstep] = testSchedule.nodesEvictedInComm_[proc][superstep]; + nodesSentDown_[proc][where.first] = testSchedule.nodesSentDown_[proc][where.first]; } } } } -template -void PebblingSchedule::TryToMergeSupersteps() { - std::vector is_removed(number_of_supersteps, false); +template +void PebblingSchedule::TryToMergeSupersteps() { + std::vector isRemoved(numberOfSupersteps_, false); - for (unsigned step = 1; step < number_of_supersteps; ++step) { - if (is_removed[step]) { + for (unsigned step = 1; step < numberOfSupersteps_; ++step) { + if (isRemoved[step]) { continue; } - unsigned prev_step = step - 1; - while (is_removed[prev_step]) { - --prev_step; + unsigned prevStep = step - 1; + while (isRemoved[prevStep]) { + --prevStep; } - for (unsigned next_step = step + 1; next_step < number_of_supersteps; ++next_step) { + for (unsigned nextStep = step + 1; nextStep < numberOfSupersteps_; ++nextStep) { // Try to merge step and next_step - PebblingSchedule test_schedule = *this; + PebblingSchedule testSchedule = *this; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - test_schedule.compute_steps_for_proc_superstep[proc][step].insert( - test_schedule.compute_steps_for_proc_superstep[proc][step].end(), - test_schedule.compute_steps_for_proc_superstep[proc][next_step].begin(), - test_schedule.compute_steps_for_proc_superstep[proc][next_step].end()); - test_schedule.compute_steps_for_proc_superstep[proc][next_step].clear(); + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + testSchedule.computeStepsForProcSuperstep_[proc][step].insert( + testSchedule.computeStepsForProcSuperstep_[proc][step].end(), + testSchedule.computeStepsForProcSuperstep_[proc][nextStep].begin(), + testSchedule.computeStepsForProcSuperstep_[proc][nextStep].end()); + testSchedule.computeStepsForProcSuperstep_[proc][nextStep].clear(); - test_schedule.nodes_sent_up[proc][step].insert(test_schedule.nodes_sent_up[proc][step].end(), - test_schedule.nodes_sent_up[proc][next_step].begin(), - test_schedule.nodes_sent_up[proc][next_step].end()); - test_schedule.nodes_sent_up[proc][next_step].clear(); + testSchedule.nodesSentUp_[proc][step].insert(testSchedule.nodesSentUp_[proc][step].end(), + testSchedule.nodesSentUp_[proc][nextStep].begin(), + testSchedule.nodesSentUp_[proc][nextStep].end()); + testSchedule.nodesSentUp_[proc][nextStep].clear(); - test_schedule.nodes_sent_down[proc][prev_step].insert(test_schedule.nodes_sent_down[proc][prev_step].end(), - test_schedule.nodes_sent_down[proc][step].begin(), - test_schedule.nodes_sent_down[proc][step].end()); - test_schedule.nodes_sent_down[proc][step].clear(); + testSchedule.nodesSentDown_[proc][prevStep].insert(testSchedule.nodesSentDown_[proc][prevStep].end(), + testSchedule.nodesSentDown_[proc][step].begin(), + testSchedule.nodesSentDown_[proc][step].end()); + testSchedule.nodesSentDown_[proc][step].clear(); - test_schedule.nodes_evicted_in_comm[proc][step].insert( - test_schedule.nodes_evicted_in_comm[proc][step].end(), - test_schedule.nodes_evicted_in_comm[proc][next_step].begin(), - test_schedule.nodes_evicted_in_comm[proc][next_step].end()); - test_schedule.nodes_evicted_in_comm[proc][next_step].clear(); + testSchedule.nodesEvictedInComm_[proc][step].insert(testSchedule.nodesEvictedInComm_[proc][step].end(), + testSchedule.nodesEvictedInComm_[proc][nextStep].begin(), + testSchedule.nodesEvictedInComm_[proc][nextStep].end()); + testSchedule.nodesEvictedInComm_[proc][nextStep].clear(); } - if (test_schedule.isValid()) { - is_removed[next_step] = true; - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - compute_steps_for_proc_superstep[proc][step] = test_schedule.compute_steps_for_proc_superstep[proc][step]; - compute_steps_for_proc_superstep[proc][next_step].clear(); + if (testSchedule.IsValid()) { + isRemoved[nextStep] = true; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + computeStepsForProcSuperstep_[proc][step] = testSchedule.computeStepsForProcSuperstep_[proc][step]; + computeStepsForProcSuperstep_[proc][nextStep].clear(); - nodes_sent_up[proc][step] = test_schedule.nodes_sent_up[proc][step]; - nodes_sent_up[proc][next_step].clear(); + nodesSentUp_[proc][step] = testSchedule.nodesSentUp_[proc][step]; + nodesSentUp_[proc][nextStep].clear(); - nodes_sent_down[proc][prev_step] = test_schedule.nodes_sent_down[proc][prev_step]; - nodes_sent_down[proc][step] = nodes_sent_down[proc][next_step]; - nodes_sent_down[proc][next_step].clear(); + nodesSentDown_[proc][prevStep] = testSchedule.nodesSentDown_[proc][prevStep]; + nodesSentDown_[proc][step] = nodesSentDown_[proc][nextStep]; + nodesSentDown_[proc][nextStep].clear(); - nodes_evicted_in_comm[proc][step] = test_schedule.nodes_evicted_in_comm[proc][step]; - nodes_evicted_in_comm[proc][next_step].clear(); + nodesEvictedInComm_[proc][step] = testSchedule.nodesEvictedInComm_[proc][step]; + nodesEvictedInComm_[proc][nextStep].clear(); } } else { break; @@ -1888,136 +1881,135 @@ void PebblingSchedule::TryToMergeSupersteps() { } } - unsigned new_nr_supersteps = 0; - for (unsigned step = 0; step < number_of_supersteps; ++step) { - if (!is_removed[step]) { - ++new_nr_supersteps; + unsigned newNrSupersteps = 0; + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + if (!isRemoved[step]) { + ++newNrSupersteps; } } - if (new_nr_supersteps == number_of_supersteps) { + if (newNrSupersteps == numberOfSupersteps_) { return; } - PebblingSchedule shortened_schedule = *this; - shortened_schedule.updateNumberOfSupersteps(new_nr_supersteps); + PebblingSchedule shortenedSchedule = *this; + shortenedSchedule.UpdateNumberOfSupersteps(newNrSupersteps); - unsigned new_index = 0; - for (unsigned step = 0; step < number_of_supersteps; ++step) { - if (is_removed[step]) { + unsigned newIndex = 0; + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + if (isRemoved[step]) { continue; } - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - shortened_schedule.compute_steps_for_proc_superstep[proc][new_index] = compute_steps_for_proc_superstep[proc][step]; - shortened_schedule.nodes_sent_up[proc][new_index] = nodes_sent_up[proc][step]; - shortened_schedule.nodes_sent_down[proc][new_index] = nodes_sent_down[proc][step]; - shortened_schedule.nodes_evicted_in_comm[proc][new_index] = nodes_evicted_in_comm[proc][step]; + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + shortenedSchedule.computeStepsForProcSuperstep_[proc][newIndex] = computeStepsForProcSuperstep_[proc][step]; + shortenedSchedule.nodesSentUp_[proc][newIndex] = nodesSentUp_[proc][step]; + shortenedSchedule.nodesSentDown_[proc][newIndex] = nodesSentDown_[proc][step]; + shortenedSchedule.nodesEvictedInComm_[proc][newIndex] = nodesEvictedInComm_[proc][step]; } - ++new_index; + ++newIndex; } - *this = shortened_schedule; + *this = shortenedSchedule; - if (!isValid()) { + if (!IsValid()) { std::cout << "ERROR: schedule is not valid after superstep merging." << std::endl; } } -template -PebblingSchedule PebblingSchedule::ExpandMemSchedule(const BspInstance &original_instance, - const std::vector mapping_to_coarse) const { - std::map> original_vertices_for_coarse_ID; - for (vertex_idx node = 0; node < original_instance.numberOfVertices(); ++node) { - original_vertices_for_coarse_ID[mapping_to_coarse[node]].insert(node); +template +PebblingSchedule PebblingSchedule::ExpandMemSchedule(const BspInstance &originalInstance, + const std::vector mappingToCoarse) const { + std::map> originalVerticesForCoarseID; + for (VertexIdx node = 0; node < originalInstance.NumberOfVertices(); ++node) { + originalVerticesForCoarseID[mappingToCoarse[node]].insert(node); } - PebblingSchedule fine_schedule; - fine_schedule.instance = &original_instance; - fine_schedule.updateNumberOfSupersteps(number_of_supersteps); + PebblingSchedule fineSchedule; + fineSchedule.instance_ = &originalInstance; + fineSchedule.UpdateNumberOfSupersteps(numberOfSupersteps_); - for (unsigned step = 0; step < number_of_supersteps; ++step) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { // computation phase - for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { - vertex_idx node = computeStep.node; - for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) { - fine_schedule.compute_steps_for_proc_superstep[proc][step].emplace_back(original_node); + for (const auto &computeStep : computeStepsForProcSuperstep_[proc][step]) { + VertexIdx node = computeStep.node_; + for (VertexIdx originalNode : originalVerticesForCoarseID[node]) { + fineSchedule.computeStepsForProcSuperstep_[proc][step].emplace_back(originalNode); } - for (vertex_idx to_remove : computeStep.nodes_evicted_after) { - for (vertex_idx original_node : original_vertices_for_coarse_ID[to_remove]) { - fine_schedule.compute_steps_for_proc_superstep[proc][step].back().nodes_evicted_after.push_back( - original_node); + for (VertexIdx toRemove : computeStep.nodeEvictedAfter_) { + for (VertexIdx originalNode : originalVerticesForCoarseID[toRemove]) { + fineSchedule.computeStepsForProcSuperstep_[proc][step].back().nodeEvictedAfter_.push_back(originalNode); } } } // communication phase - for (vertex_idx node : nodes_sent_up[proc][step]) { - for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) { - fine_schedule.nodes_sent_up[proc][step].push_back(original_node); + for (VertexIdx node : nodesSentUp_[proc][step]) { + for (VertexIdx originalNode : originalVerticesForCoarseID[node]) { + fineSchedule.nodesSentUp_[proc][step].push_back(originalNode); } } - for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { - for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) { - fine_schedule.nodes_evicted_in_comm[proc][step].push_back(original_node); + for (VertexIdx node : nodesEvictedInComm_[proc][step]) { + for (VertexIdx originalNode : originalVerticesForCoarseID[node]) { + fineSchedule.nodesEvictedInComm_[proc][step].push_back(originalNode); } } - for (vertex_idx node : nodes_sent_down[proc][step]) { - for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) { - fine_schedule.nodes_sent_down[proc][step].push_back(original_node); + for (VertexIdx node : nodesSentDown_[proc][step]) { + for (VertexIdx originalNode : originalVerticesForCoarseID[node]) { + fineSchedule.nodesSentDown_[proc][step].push_back(originalNode); } } } } - fine_schedule.cleanSchedule(); - return fine_schedule; + fineSchedule.CleanSchedule(); + return fineSchedule; } -template -BspSchedule PebblingSchedule::ConvertToBsp() const { - std::vector node_to_proc(instance->numberOfVertices(), UINT_MAX), - node_to_supstep(instance->numberOfVertices(), UINT_MAX); +template +BspSchedule PebblingSchedule::ConvertToBsp() const { + std::vector nodeToProc(instance_->NumberOfVertices(), UINT_MAX), + nodeToSupstep(instance_->NumberOfVertices(), UINT_MAX); - for (unsigned step = 0; step < number_of_supersteps; ++step) { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { - for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { - const vertex_idx &node = computeStep.node; - if (node_to_proc[node] == UINT_MAX) { - node_to_proc[node] = proc; - node_to_supstep[node] = step; + for (unsigned step = 0; step < numberOfSupersteps_; ++step) { + for (unsigned proc = 0; proc < instance_->NumberOfProcessors(); ++proc) { + for (const auto &computeStep : computeStepsForProcSuperstep_[proc][step]) { + const VertexIdx &node = computeStep.node_; + if (nodeToProc[node] == UINT_MAX) { + nodeToProc[node] = proc; + nodeToSupstep[node] = step; } } } } - if (need_to_load_inputs) { - for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { - if (instance->getComputationalDag().in_degree(node) == 0) { - unsigned min_superstep = UINT_MAX, proc_chosen = 0; - for (vertex_idx succ : instance->getComputationalDag().children(node)) { - if (node_to_supstep[succ] < min_superstep) { - min_superstep = node_to_supstep[succ]; - proc_chosen = node_to_proc[succ]; + if (needToLoadInputs_) { + for (VertexIdx node = 0; node < instance_->NumberOfVertices(); ++node) { + if (instance_->GetComputationalDag().InDegree(node) == 0) { + unsigned minSuperstep = UINT_MAX, procChosen = 0; + for (VertexIdx succ : instance_->GetComputationalDag().Children(node)) { + if (nodeToSupstep[succ] < minSuperstep) { + minSuperstep = nodeToSupstep[succ]; + procChosen = nodeToProc[succ]; } } - node_to_supstep[node] = min_superstep; - node_to_proc[node] = proc_chosen; + nodeToSupstep[node] = minSuperstep; + nodeToProc[node] = procChosen; } } } - BspSchedule schedule(*instance, node_to_proc, node_to_supstep); - if (schedule.satisfiesPrecedenceConstraints() && schedule.satisfiesNodeTypeConstraints()) { - schedule.setAutoCommunicationSchedule(); + BspSchedule schedule(*instance_, nodeToProc, nodeToSupstep); + if (schedule.SatisfiesPrecedenceConstraints() && schedule.satisfiesNodeTypeConstraints()) { + schedule.SetAutoCommunicationSchedule(); return schedule; } else { std::cout << "ERROR: no direct conversion to Bsp schedule exists, using dummy schedule instead." << std::endl; - return BspSchedule(*instance); + return BspSchedule(*instance_); } } diff --git a/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp b/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp index 71044414..797e2cb6 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp @@ -22,7 +22,7 @@ limitations under the License. #include struct COPTEnv { - static Envr getInstance() { + static Envr GetInstance() { static Envr env; return env; } diff --git a/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp b/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp index dce14592..b6b7948b 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp @@ -28,116 +28,109 @@ limitations under the License. namespace osp { -template -class MultiProcessorPebbling : public Scheduler { - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); +template +class MultiProcessorPebbling : public Scheduler { + static_assert(isComputationalDagV, "PebblingSchedule can only be used with computational DAGs."); private: - using vertex_idx = vertex_idx_t; - using workweight_type = v_workw_t; - using commweight_type = v_commw_t; - using memweight_type = v_memw_t; + using VertexIdx = VertexIdxT; - Model model; + Model model_; - bool write_solutions_found; + bool writeSolutionsFound_; class WriteSolutionCallback : public CallbackBase { private: - unsigned counter; - unsigned max_number_solution; + unsigned counter_; + unsigned maxNumberSolution_; - double best_obj; + double bestObj_; public: WriteSolutionCallback() - : counter(0), - max_number_solution(500), - best_obj(COPT_INFINITY), - write_solutions_path_cb(""), - solution_file_prefix_cb("") {} + : counter_(0), maxNumberSolution_(500), bestObj_(COPT_INFINITY), writeSolutionsPathCb_(""), solutionFilePrefixCb_("") {} - std::string write_solutions_path_cb; - std::string solution_file_prefix_cb; + std::string writeSolutionsPathCb_; + std::string solutionFilePrefixCb_; void callback() override; }; - WriteSolutionCallback solution_callback; + WriteSolutionCallback solutionCallback_; protected: - std::vector> compute; - std::vector> send_up; - std::vector> send_down; - std::vector> has_red; - std::vector has_blue; + std::vector> compute_; + std::vector> sendUp_; + std::vector> sendDown_; + std::vector> hasRed_; + std::vector hasBlue_; - std::vector>> compute_exists; - std::vector>> send_up_exists; - std::vector>> send_down_exists; - std::vector> has_blue_exists; + std::vector>> computeExists_; + std::vector>> sendUpExists_; + std::vector>> sendDownExists_; + std::vector> hasBlueExists_; - VarArray comp_phase; - VarArray comm_phase; - VarArray send_up_phase; - VarArray send_down_phase; + VarArray compPhase_; + VarArray commPhase_; + VarArray sendUpPhase_; + VarArray sendDownPhase_; - VarArray comm_phase_ends; - VarArray comp_phase_ends; + VarArray commPhaseEnds_; + VarArray compPhaseEnds_; - unsigned max_time = 0; - unsigned time_limit_seconds; + unsigned maxTime_ = 0; + unsigned timeLimitSeconds_; // problem settings - bool slidingPebbles = false; - bool mergeSteps = true; - bool synchronous = true; - bool up_and_down_cost_summed = true; - bool allows_recomputation = true; - bool restrict_step_types = false; - unsigned compute_steps_per_cycle = 3; - bool need_to_load_inputs = true; - std::set needs_blue_at_end; - std::vector> has_red_in_beginning; - bool verbose = false; + bool slidingPebbles_ = false; + bool mergeSteps_ = true; + bool synchronous_ = true; + bool upAndDownCostSummed_ = true; + bool allowsRecomputation_ = true; + bool restrictStepTypes_ = false; + unsigned computeStepsPerCycle_ = 3; + bool needToLoadInputs_ = true; + std::set needsBlueAtEnd_; + std::vector> hasRedInBeginning_; + bool verbose_ = false; - void constructPebblingScheduleFromSolution(PebblingSchedule &schedule); + void ConstructPebblingScheduleFromSolution(PebblingSchedule &schedule); - void setInitialSolution(const BspInstance &instance, - const std::vector>> &computeSteps, - const std::vector>> &sendUpSteps, - const std::vector>> &sendDownSteps, - const std::vector>> &nodesEvictedAfterStep); + void SetInitialSolution(const BspInstance &instance, + const std::vector>> &computeSteps, + const std::vector>> &sendUpSteps, + const std::vector>> &sendDownSteps, + const std::vector>> &nodesEvictedAfterStep); - unsigned computeMaxTimeForInitialSolution(const BspInstance &instance, - const std::vector>> &computeSteps, - const std::vector>> &sendUpSteps, - const std::vector>> &sendDownSteps) const; + unsigned ComputeMaxTimeForInitialSolution(const BspInstance &instance, + const std::vector>> &computeSteps, + const std::vector>> &sendUpSteps, + const std::vector>> &sendDownSteps) const; - void setupBaseVariablesConstraints(const BspInstance &instance); + void SetupBaseVariablesConstraints(const BspInstance &instance); - void setupSyncPhaseVariablesConstraints(const BspInstance &instance); - void setupSyncObjective(const BspInstance &instance); + void SetupSyncPhaseVariablesConstraints(const BspInstance &instance); + void SetupSyncObjective(const BspInstance &instance); - void setupAsyncVariablesConstraintsObjective(const BspInstance &instance); - void setupBspVariablesConstraintsObjective(const BspInstance &instance); + void SetupAsyncVariablesConstraintsObjective(const BspInstance &instance); + void SetupBspVariablesConstraintsObjective(const BspInstance &instance); - void solveILP(); + void SolveIlp(); public: MultiProcessorPebbling() - : Scheduler(), model(COPTEnv::getInstance().CreateModel("MPP")), write_solutions_found(false), max_time(0) {} + : Scheduler(), model_(COPTEnv::GetInstance().CreateModel("MPP")), writeSolutionsFound_(false), maxTime_(0) {} virtual ~MultiProcessorPebbling() = default; - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override; - virtual RETURN_STATUS computeSynchPebbling(PebblingSchedule &schedule); + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override; + virtual ReturnStatus ComputeSynchPebbling(PebblingSchedule &schedule); - virtual RETURN_STATUS computePebbling(PebblingSchedule &schedule, bool use_async = false); + virtual ReturnStatus ComputePebbling(PebblingSchedule &schedule, bool useAsync = false); - virtual RETURN_STATUS computePebblingWithInitialSolution(const PebblingSchedule &initial_solution, - PebblingSchedule &out_schedule, - bool use_async = false); + virtual ReturnStatus ComputePebblingWithInitialSolution(const PebblingSchedule &initialSolution, + PebblingSchedule &outSchedule, + bool useAsync = false); /** * @brief Enables writing intermediate solutions. @@ -150,10 +143,10 @@ class MultiProcessorPebbling : public Scheduler { * @param path The path where the solutions will be written. * @param file_prefix The prefix that will be used for the solution files. */ - inline void enableWriteIntermediateSol(std::string path, std::string file_prefix) { - write_solutions_found = true; - solution_callback.write_solutions_path_cb = path; - solution_callback.solution_file_prefix_cb = file_prefix; + inline void EnableWriteIntermediateSol(std::string path, std::string filePrefix) { + writeSolutionsFound_ = true; + solutionCallback_.writeSolutionsPathCb_ = path; + solutionCallback_.solutionFilePrefixCb_ = filePrefix; } /** @@ -163,316 +156,317 @@ class MultiProcessorPebbling : public Scheduler { * calling this function, the `enableWriteIntermediateSol` function needs * to be called again in order to enable writing of intermediate solutions. */ - inline void disableWriteIntermediateSol() { write_solutions_found = false; } + inline void DisableWriteIntermediateSol() { writeSolutionsFound_ = false; } /** * @brief Get the best gap found by the solver. * * @return The best gap found by the solver. */ - inline double bestGap() { return model.GetDblAttr(COPT_DBLATTR_BESTGAP); } + inline double BestGap() { return model_.GetDblAttr(COPT_DBLATTR_BESTGAP); } /** * @brief Get the best objective value found by the solver. * * @return The best objective value found by the solver. */ - inline double bestObjective() { return model.GetDblAttr(COPT_DBLATTR_BESTOBJ); } + inline double BestObjective() { return model_.GetDblAttr(COPT_DBLATTR_BESTOBJ); } /** * @brief Get the best bound found by the solver. * * @return The best bound found by the solver. */ - inline double bestBound() { return model.GetDblAttr(COPT_DBLATTR_BESTBND); } + inline double BestBound() { return model_.GetDblAttr(COPT_DBLATTR_BESTBND); } /** * @brief Get the name of the schedule. * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { return "MultiProcessorPebbling"; } + virtual std::string GetScheduleName() const override { return "MultiProcessorPebbling"; } // getters and setters for problem parameters - inline bool allowsSlidingPebbles() const { return slidingPebbles; } + inline bool AllowsSlidingPebbles() const { return slidingPebbles_; } - inline bool allowsMergingSteps() const { return mergeSteps; } + inline bool AllowsMergingSteps() const { return mergeSteps_; } - inline bool isUpAndDownCostSummed() const { return up_and_down_cost_summed; } + inline bool IsUpAndDownCostSummed() const { return upAndDownCostSummed_; } - inline bool allowsRecomputation() const { return allows_recomputation; } + inline bool AllowsRecomputation() const { return allowsRecomputation_; } - inline bool hasRestrictedStepTypes() const { return restrict_step_types; } + inline bool HasRestrictedStepTypes() const { return restrictStepTypes_; } - inline bool needsToLoadInputs() const { return need_to_load_inputs; } + inline bool NeedsToLoadInputs() const { return needToLoadInputs_; } - inline unsigned getComputeStepsPerCycle() const { return compute_steps_per_cycle; } + inline unsigned GetComputeStepsPerCycle() const { return computeStepsPerCycle_; } - inline unsigned getMaxTime() const { return max_time; } + inline unsigned GetMaxTime() const { return maxTime_; } - inline void setSlidingPebbles(const bool slidingPebbles_) { slidingPebbles = slidingPebbles_; } + inline void SetSlidingPebbles(const bool slidingPebbles) { slidingPebbles_ = slidingPebbles; } - inline void setMergingSteps(const bool mergeSteps_) { mergeSteps = mergeSteps_; } + inline void SetMergingSteps(const bool mergeSteps) { mergeSteps_ = mergeSteps; } - inline void setUpAndDownCostSummed(const bool is_summed_) { up_and_down_cost_summed = is_summed_; } + inline void SetUpAndDownCostSummed(const bool isSummed) { upAndDownCostSummed_ = isSummed; } - inline void setRecomputation(const bool allow_recompute_) { allows_recomputation = allow_recompute_; } + inline void SetRecomputation(const bool allowRecompute) { allowsRecomputation_ = allowRecompute; } - inline void setRestrictStepTypes(const bool restrict_) { - restrict_step_types = restrict_; - if (restrict_) { - mergeSteps = true; + inline void SetRestrictStepTypes(const bool restrict) { + restrictStepTypes_ = restrict; + if (restrict) { + mergeSteps_ = true; } } - inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_; } + inline void SetNeedToLoadInputs(const bool loadInputs) { needToLoadInputs_ = loadInputs; } - inline void setComputeStepsPerCycle(const unsigned steps_per_cycle_) { compute_steps_per_cycle = steps_per_cycle_; } + inline void SetComputeStepsPerCycle(const unsigned stepsPerCycle) { computeStepsPerCycle_ = stepsPerCycle; } - inline void setMaxTime(const unsigned max_time_) { max_time = max_time_; } + inline void SetMaxTime(const unsigned maxTime) { maxTime_ = maxTime; } - inline void setNeedsBlueAtEnd(const std::set &needs_blue_) { needs_blue_at_end = needs_blue_; } + inline void SetNeedsBlueAtEnd(const std::set &needsBlue) { needsBlueAtEnd_ = needsBlue; } - inline void setHasRedInBeginning(const std::vector> &has_red_) { has_red_in_beginning = has_red_; } + inline void SetHasRedInBeginning(const std::vector> &hasRed) { hasRedInBeginning_ = hasRed; } - inline void setVerbose(const bool verbose_) { verbose = verbose_; } + inline void SetVerbose(const bool verbose) { verbose_ = verbose; } - inline void setTimeLimitSeconds(unsigned time_limit_seconds_) { time_limit_seconds = time_limit_seconds_; } + inline void SetTimeLimitSeconds(unsigned timeLimitSeconds) { timeLimitSeconds_ = timeLimitSeconds; } - bool hasEmptyStep(const BspInstance &instance); + bool HasEmptyStep(const BspInstance &instance); }; // implementation -template -void MultiProcessorPebbling::solveILP() { - if (!verbose) { - model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0); +template +void MultiProcessorPebbling::SolveIlp() { + if (!verbose_) { + model_.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0); } - model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds); - model.SetIntParam(COPT_INTPARAM_THREADS, 128); + model_.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds_); + model_.SetIntParam(COPT_INTPARAM_THREADS, 128); - model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); - model.SetIntParam(COPT_INTPARAM_LPMETHOD, 1); - model.SetIntParam(COPT_INTPARAM_ROUNDINGHEURLEVEL, 1); + model_.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); + model_.SetIntParam(COPT_INTPARAM_LPMETHOD, 1); + model_.SetIntParam(COPT_INTPARAM_ROUNDINGHEURLEVEL, 1); - model.SetIntParam(COPT_INTPARAM_SUBMIPHEURLEVEL, 1); + model_.SetIntParam(COPT_INTPARAM_SUBMIPHEURLEVEL, 1); // model.SetIntParam(COPT_INTPARAM_PRESOLVE, 1); // model.SetIntParam(COPT_INTPARAM_CUTLEVEL, 0); - model.SetIntParam(COPT_INTPARAM_TREECUTLEVEL, 2); + model_.SetIntParam(COPT_INTPARAM_TREECUTLEVEL, 2); // model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2); - model.Solve(); + model_.Solve(); } -template -RETURN_STATUS MultiProcessorPebbling::computeSchedule(BspSchedule &schedule) { - if (max_time == 0) { - max_time = 2 * static_cast(schedule.getInstance().numberOfVertices()); +template +ReturnStatus MultiProcessorPebbling::ComputeSchedule(BspSchedule &schedule) { + if (maxTime_ == 0) { + maxTime_ = 2 * static_cast(schedule.GetInstance().NumberOfVertices()); } - setupBaseVariablesConstraints(schedule.getInstance()); - setupSyncPhaseVariablesConstraints(schedule.getInstance()); - setupBspVariablesConstraintsObjective(schedule.getInstance()); + SetupBaseVariablesConstraints(schedule.GetInstance()); + SetupSyncPhaseVariablesConstraints(schedule.GetInstance()); + SetupBspVariablesConstraintsObjective(schedule.GetInstance()); - solveILP(); + SolveIlp(); - if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - return RETURN_STATUS::OSP_SUCCESS; + if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { + return ReturnStatus::OSP_SUCCESS; - } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + } else if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { + return ReturnStatus::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - return RETURN_STATUS::BEST_FOUND; + if (model_.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { + return ReturnStatus::BEST_FOUND; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } -}; +} -template -RETURN_STATUS MultiProcessorPebbling::computeSynchPebbling(PebblingSchedule &schedule) { - const BspInstance &instance = schedule.getInstance(); +template +ReturnStatus MultiProcessorPebbling::ComputeSynchPebbling(PebblingSchedule &schedule) { + const BspInstance &instance = schedule.GetInstance(); - if (max_time == 0) { - max_time = 2 * static_cast(instance.numberOfVertices()); + if (maxTime_ == 0) { + maxTime_ = 2 * static_cast(instance.NumberOfVertices()); } - mergeSteps = false; + mergeSteps_ = false; - setupBaseVariablesConstraints(instance); - setupSyncPhaseVariablesConstraints(instance); - setupSyncObjective(instance); + SetupBaseVariablesConstraints(instance); + SetupSyncPhaseVariablesConstraints(instance); + SetupSyncObjective(instance); - solveILP(); + SolveIlp(); - if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructPebblingScheduleFromSolution(schedule); - return RETURN_STATUS::OSP_SUCCESS; + if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { + ConstructPebblingScheduleFromSolution(schedule); + return ReturnStatus::OSP_SUCCESS; - } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + } else if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { + return ReturnStatus::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructPebblingScheduleFromSolution(schedule); - return RETURN_STATUS::OSP_SUCCESS; + if (model_.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { + ConstructPebblingScheduleFromSolution(schedule); + return ReturnStatus::OSP_SUCCESS; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } } -template -RETURN_STATUS MultiProcessorPebbling::computePebbling(PebblingSchedule &schedule, bool use_async) { - const BspInstance &instance = schedule.getInstance(); +template +ReturnStatus MultiProcessorPebbling::ComputePebbling(PebblingSchedule &schedule, bool useAsync) { + const BspInstance &instance = schedule.GetInstance(); - if (max_time == 0) { - max_time = 2 * static_cast(instance.numberOfVertices()); + if (maxTime_ == 0) { + maxTime_ = 2 * static_cast(instance.NumberOfVertices()); } - synchronous = !use_async; + synchronous_ = !useAsync; - setupBaseVariablesConstraints(instance); - if (synchronous) { - setupSyncPhaseVariablesConstraints(instance); - setupBspVariablesConstraintsObjective(instance); + SetupBaseVariablesConstraints(instance); + if (synchronous_) { + SetupSyncPhaseVariablesConstraints(instance); + SetupBspVariablesConstraintsObjective(instance); } else { - setupAsyncVariablesConstraintsObjective(instance); + SetupAsyncVariablesConstraintsObjective(instance); } - solveILP(); + SolveIlp(); - if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructPebblingScheduleFromSolution(schedule); - return schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR; + if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { + ConstructPebblingScheduleFromSolution(schedule); + return schedule.IsValid() ? ReturnStatus::OSP_SUCCESS : ReturnStatus::ERROR; - } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + } else if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { + return ReturnStatus::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructPebblingScheduleFromSolution(schedule); - return schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR; + if (model_.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { + ConstructPebblingScheduleFromSolution(schedule); + return schedule.IsValid() ? ReturnStatus::OSP_SUCCESS : ReturnStatus::ERROR; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } } -template -RETURN_STATUS MultiProcessorPebbling::computePebblingWithInitialSolution(const PebblingSchedule &initial_solution, - PebblingSchedule &out_schedule, - bool use_async) { - const BspInstance &instance = initial_solution.getInstance(); +template +ReturnStatus MultiProcessorPebbling::ComputePebblingWithInitialSolution(const PebblingSchedule &initialSolution, + PebblingSchedule &outSchedule, + bool useAsync) { + const BspInstance &instance = initialSolution.GetInstance(); - std::vector>> computeSteps; - std::vector>> sendUpSteps; - std::vector>> sendDownSteps; - std::vector>> nodesEvictedAfterStep; + std::vector>> computeSteps; + std::vector>> sendUpSteps; + std::vector>> sendDownSteps; + std::vector>> nodesEvictedAfterStep; - synchronous = !use_async; + synchronous_ = !useAsync; - initial_solution.getDataForMultiprocessorPebbling(computeSteps, sendUpSteps, sendDownSteps, nodesEvictedAfterStep); + initialSolution.GetDataForMultiprocessorPebbling(computeSteps, sendUpSteps, sendDownSteps, nodesEvictedAfterStep); - max_time = computeMaxTimeForInitialSolution(instance, computeSteps, sendUpSteps, sendDownSteps); + maxTime_ = ComputeMaxTimeForInitialSolution(instance, computeSteps, sendUpSteps, sendDownSteps); - if (verbose) { - std::cout << "Max time set at " << max_time << std::endl; + if (verbose_) { + std::cout << "Max time set at " << maxTime_ << std::endl; } - setupBaseVariablesConstraints(instance); - if (synchronous) { - setupSyncPhaseVariablesConstraints(instance); - setupBspVariablesConstraintsObjective(instance); + SetupBaseVariablesConstraints(instance); + if (synchronous_) { + SetupSyncPhaseVariablesConstraints(instance); + SetupBspVariablesConstraintsObjective(instance); } else { - setupAsyncVariablesConstraintsObjective(instance); + SetupAsyncVariablesConstraintsObjective(instance); } - setInitialSolution(instance, computeSteps, sendUpSteps, sendDownSteps, nodesEvictedAfterStep); + SetInitialSolution(instance, computeSteps, sendUpSteps, sendDownSteps, nodesEvictedAfterStep); - if (verbose) { + if (verbose_) { std::cout << "Initial solution set." << std::endl; } - solveILP(); + SolveIlp(); - if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructPebblingScheduleFromSolution(out_schedule); - return out_schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR; + if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { + ConstructPebblingScheduleFromSolution(outSchedule); + return outSchedule.IsValid() ? ReturnStatus::OSP_SUCCESS : ReturnStatus::ERROR; - } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; + } else if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { + return ReturnStatus::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructPebblingScheduleFromSolution(out_schedule); - return out_schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR; + if (model_.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { + ConstructPebblingScheduleFromSolution(outSchedule); + return outSchedule.IsValid() ? ReturnStatus::OSP_SUCCESS : ReturnStatus::ERROR; } else { - return RETURN_STATUS::TIMEOUT; + return ReturnStatus::TIMEOUT; } } } -template -void MultiProcessorPebbling::setupBaseVariablesConstraints(const BspInstance &instance) { +template +void MultiProcessorPebbling::SetupBaseVariablesConstraints(const BspInstance &instance) { /* Variables */ - compute = std::vector>(instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); + compute_ + = std::vector>(instance.NumberOfVertices(), std::vector(instance.NumberOfProcessors())); - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - compute[node][processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "node_processor_time"); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + compute_[node][processor] = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "node_processor_time"); } } - compute_exists.resize(instance.numberOfVertices(), - std::vector>(instance.numberOfProcessors(), std::vector(max_time, true))); + computeExists_.resize(instance.NumberOfVertices(), + std::vector>(instance.NumberOfProcessors(), std::vector(maxTime_, true))); - send_up = std::vector>(instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); + sendUp_ = std::vector>(instance.NumberOfVertices(), std::vector(instance.NumberOfProcessors())); - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - send_up[node][processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "send_up"); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + sendUp_[node][processor] = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "sendUp"); } } - send_up_exists.resize(instance.numberOfVertices(), - std::vector>(instance.numberOfProcessors(), std::vector(max_time, true))); + sendUpExists_.resize(instance.NumberOfVertices(), + std::vector>(instance.NumberOfProcessors(), std::vector(maxTime_, true))); - send_down - = std::vector>(instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); + sendDown_ + = std::vector>(instance.NumberOfVertices(), std::vector(instance.NumberOfProcessors())); - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - send_down[node][processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "send_down"); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + sendDown_[node][processor] = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "sendDown"); } } - send_down_exists.resize(instance.numberOfVertices(), - std::vector>(instance.numberOfProcessors(), std::vector(max_time, true))); + sendDownExists_.resize(instance.NumberOfVertices(), + std::vector>(instance.NumberOfProcessors(), std::vector(maxTime_, true))); - has_blue = std::vector(instance.numberOfVertices()); + hasBlue_ = std::vector(instance.NumberOfVertices()); - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - has_blue[node] = model.AddVars(static_cast(max_time), COPT_BINARY, "blue_pebble"); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + hasBlue_[node] = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "blue_pebble"); } - has_blue_exists.resize(instance.numberOfVertices(), std::vector(max_time, true)); + hasBlueExists_.resize(instance.NumberOfVertices(), std::vector(maxTime_, true)); - has_red = std::vector>(instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); + hasRed_ = std::vector>(instance.NumberOfVertices(), std::vector(instance.NumberOfProcessors())); - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - has_red[node][processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "red_pebble"); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + hasRed_[node][processor] = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "red_pebble"); } } @@ -480,53 +474,53 @@ void MultiProcessorPebbling::setupBaseVariablesConstraints(const BspIns Invalidate variables based on various factors (node types, input loading, step type restriction) */ - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - if (!instance.isCompatible(node, processor)) { - for (unsigned t = 0; t < max_time; t++) { - compute_exists[node][processor][t] = false; - send_up_exists[node][processor][t] = false; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + if (!instance.IsCompatible(node, processor)) { + for (unsigned t = 0; t < maxTime_; t++) { + computeExists_[node][processor][t] = false; + sendUpExists_[node][processor][t] = false; } } } } // restrict source nodes if they need to be loaded - if (need_to_load_inputs) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (instance.getComputationalDag().in_degree(node) == 0) { - for (unsigned t = 0; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - compute_exists[node][processor][t] = false; - send_up_exists[node][processor][t] = false; + if (needToLoadInputs_) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (instance.GetComputationalDag().InDegree(node) == 0) { + for (unsigned t = 0; t < maxTime_; t++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + computeExists_[node][processor][t] = false; + sendUpExists_[node][processor][t] = false; } - has_blue_exists[node][t] = false; + hasBlueExists_[node][t] = false; } } } } // restrict step types for simpler ILP - if (restrict_step_types) { - for (unsigned t = 0; t < max_time; t++) { - bool this_is_a_comm_step = (t % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1); - if (!need_to_load_inputs && t % (compute_steps_per_cycle + 2) == compute_steps_per_cycle) { - this_is_a_comm_step = true; - } - if (need_to_load_inputs && t % (compute_steps_per_cycle + 2) == 0) { - this_is_a_comm_step = true; - } - if (this_is_a_comm_step) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - compute_exists[node][processor][t] = false; + if (restrictStepTypes_) { + for (unsigned t = 0; t < maxTime_; t++) { + bool thisIsACommStep = (t % (computeStepsPerCycle_ + 2) == computeStepsPerCycle_ + 1); + if (!needToLoadInputs_ && t % (computeStepsPerCycle_ + 2) == computeStepsPerCycle_) { + thisIsACommStep = true; + } + if (needToLoadInputs_ && t % (computeStepsPerCycle_ + 2) == 0) { + thisIsACommStep = true; + } + if (thisIsACommStep) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + computeExists_[node][processor][t] = false; } } } else { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - send_up_exists[node][processor][t] = false; - send_down_exists[node][processor][t] = false; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + sendUpExists_[node][processor][t] = false; + sendDownExists_[node][processor][t] = false; } } } @@ -537,393 +531,393 @@ void MultiProcessorPebbling::setupBaseVariablesConstraints(const BspIns Constraints */ - if (!mergeSteps) { - for (unsigned t = 0; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { + if (!mergeSteps_) { + for (unsigned t = 0; t < maxTime_; t++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { Expr expr; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (compute_exists[node][processor][t]) { - expr += compute[node][processor][static_cast(t)]; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (computeExists_[node][processor][t]) { + expr += compute_[node][processor][static_cast(t)]; } - if (send_up_exists[node][processor][t]) { - expr += send_up[node][processor][static_cast(t)]; + if (sendUpExists_[node][processor][t]) { + expr += sendUp_[node][processor][static_cast(t)]; } - if (send_down_exists[node][processor][t]) { - expr += send_down[node][processor][static_cast(t)]; + if (sendDownExists_[node][processor][t]) { + expr += sendDown_[node][processor][static_cast(t)]; } } - model.AddConstr(expr <= 1); + model_.AddConstr(expr <= 1); } } } else { // extra variables to indicate step types in step merging - std::vector comp_step_on_proc = std::vector(instance.numberOfProcessors()); - std::vector comm_step_on_proc = std::vector(instance.numberOfProcessors()); + std::vector compStepOnProc = std::vector(instance.NumberOfProcessors()); + std::vector commStepOnProc = std::vector(instance.NumberOfProcessors()); - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - comp_step_on_proc[processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "comp_step_on_proc"); - comm_step_on_proc[processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "comm_step_on_proc"); + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + compStepOnProc[processor] = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "comp_step_on_proc"); + commStepOnProc[processor] = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "comm_step_on_proc"); } - const unsigned M = static_cast(instance.numberOfVertices()); + const unsigned m = static_cast(instance.NumberOfVertices()); - for (unsigned t = 0; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr_comp, expr_comm; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (compute_exists[node][processor][t]) { - expr_comp += compute[node][processor][static_cast(t)]; + for (unsigned t = 0; t < maxTime_; t++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + Expr exprComp, exprComm; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (computeExists_[node][processor][t]) { + exprComp += compute_[node][processor][static_cast(t)]; } - if (send_up_exists[node][processor][t]) { - expr_comm += send_up[node][processor][static_cast(t)]; + if (sendUpExists_[node][processor][t]) { + exprComm += sendUp_[node][processor][static_cast(t)]; } - if (send_down_exists[node][processor][t]) { - expr_comm += send_down[node][processor][static_cast(t)]; + if (sendDownExists_[node][processor][t]) { + exprComm += sendDown_[node][processor][static_cast(t)]; } } - model.AddConstr(M * comp_step_on_proc[processor][static_cast(t)] >= expr_comp); - model.AddConstr(2 * M * comm_step_on_proc[processor][static_cast(t)] >= expr_comm); + model_.AddConstr(m * compStepOnProc[processor][static_cast(t)] >= exprComp); + model_.AddConstr(2 * m * commStepOnProc[processor][static_cast(t)] >= exprComm); - model.AddConstr( - comp_step_on_proc[processor][static_cast(t)] + comm_step_on_proc[processor][static_cast(t)] <= 1); + model_.AddConstr(compStepOnProc[processor][static_cast(t)] + commStepOnProc[processor][static_cast(t)] + <= 1); } } } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned t = 1; t < max_time; t++) { - if (!has_blue_exists[node][t]) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned t = 1; t < maxTime_; t++) { + if (!hasBlueExists_[node][t]) { continue; } Expr expr; - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - if (send_up_exists[node][processor][t - 1]) { - expr += send_up[node][processor][static_cast(t) - 1]; + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + if (sendUpExists_[node][processor][t - 1]) { + expr += sendUp_[node][processor][static_cast(t) - 1]; } } - model.AddConstr(has_blue[node][static_cast(t)] <= has_blue[node][static_cast(t) - 1] + expr); + model_.AddConstr(hasBlue_[node][static_cast(t)] <= hasBlue_[node][static_cast(t) - 1] + expr); } } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 1; t < max_time; t++) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned t = 1; t < maxTime_; t++) { Expr expr; - if (compute_exists[node][processor][t - 1]) { - expr += compute[node][processor][static_cast(t) - 1]; + if (computeExists_[node][processor][t - 1]) { + expr += compute_[node][processor][static_cast(t) - 1]; } - if (send_down_exists[node][processor][t - 1]) { - expr += send_down[node][processor][static_cast(t) - 1]; + if (sendDownExists_[node][processor][t - 1]) { + expr += sendDown_[node][processor][static_cast(t) - 1]; } - model.AddConstr(has_red[node][processor][static_cast(t)] - <= has_red[node][processor][static_cast(t) - 1] + expr); + model_.AddConstr(hasRed_[node][processor][static_cast(t)] + <= hasRed_[node][processor][static_cast(t) - 1] + expr); } } } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { - if (!compute_exists[node][processor][t]) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned t = 0; t < maxTime_; t++) { + if (!computeExists_[node][processor][t]) { continue; } - for (const auto &source : instance.getComputationalDag().parents(node)) { - if (!mergeSteps || !compute_exists[source][processor][t]) { - model.AddConstr(compute[node][processor][static_cast(t)] - <= has_red[source][processor][static_cast(t)]); + for (const auto &source : instance.GetComputationalDag().Parents(node)) { + if (!mergeSteps_ || !computeExists_[source][processor][t]) { + model_.AddConstr(compute_[node][processor][static_cast(t)] + <= hasRed_[source][processor][static_cast(t)]); } else { - model.AddConstr(compute[node][processor][static_cast(t)] - <= has_red[source][processor][static_cast(t)] - + compute[source][processor][static_cast(t)]); + model_.AddConstr(compute_[node][processor][static_cast(t)] + <= hasRed_[source][processor][static_cast(t)] + + compute_[source][processor][static_cast(t)]); } } } } } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { - if (send_up_exists[node][processor][t]) { - model.AddConstr(send_up[node][processor][static_cast(t)] <= has_red[node][processor][static_cast(t)]); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned t = 0; t < maxTime_; t++) { + if (sendUpExists_[node][processor][t]) { + model_.AddConstr(sendUp_[node][processor][static_cast(t)] + <= hasRed_[node][processor][static_cast(t)]); } } } } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { - if (send_down_exists[node][processor][t] && has_blue_exists[node][t]) { - model.AddConstr(send_down[node][processor][static_cast(t)] <= has_blue[node][static_cast(t)]); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned t = 0; t < maxTime_; t++) { + if (sendDownExists_[node][processor][t] && hasBlueExists_[node][t]) { + model_.AddConstr(sendDown_[node][processor][static_cast(t)] <= hasBlue_[node][static_cast(t)]); } } } } - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned t = 0; t < maxTime_; t++) { Expr expr; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - expr += has_red[node][processor][static_cast(t)] * instance.getComputationalDag().vertex_mem_weight(node); - if (!slidingPebbles && compute_exists[node][processor][t]) { - expr += compute[node][processor][static_cast(t)] * instance.getComputationalDag().vertex_mem_weight(node); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + expr += hasRed_[node][processor][static_cast(t)] * instance.GetComputationalDag().VertexMemWeight(node); + if (!slidingPebbles_ && computeExists_[node][processor][t]) { + expr += compute_[node][processor][static_cast(t)] * instance.GetComputationalDag().VertexMemWeight(node); } } - model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor)); + model_.AddConstr(expr <= instance.GetArchitecture().MemoryBound(processor)); } } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - if (has_red_in_beginning.empty() - || has_red_in_beginning[processor].find(node) == has_red_in_beginning[processor].end()) { - model.AddConstr(has_red[node][processor][0] == 0); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + if (hasRedInBeginning_.empty() || hasRedInBeginning_[processor].find(node) == hasRedInBeginning_[processor].end()) { + model_.AddConstr(hasRed_[node][processor][0] == 0); } else { - model.AddConstr(has_red[node][processor][0] == 1); + model_.AddConstr(hasRed_[node][processor][0] == 1); } } } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (!need_to_load_inputs || instance.getComputationalDag().in_degree(node) > 0) { - model.AddConstr(has_blue[node][0] == 0); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (!needToLoadInputs_ || instance.GetComputationalDag().InDegree(node) > 0) { + model_.AddConstr(hasBlue_[node][0] == 0); } } - if (needs_blue_at_end.empty()) // default case: blue pebbles required on sinks at the end + if (needsBlueAtEnd_.empty()) // default case: blue pebbles required on sinks at the end { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (instance.getComputationalDag().out_degree(node) == 0 && has_blue_exists[node][max_time - 1]) { - model.AddConstr(has_blue[node][static_cast(max_time) - 1] == 1); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (instance.GetComputationalDag().OutDegree(node) == 0 && hasBlueExists_[node][maxTime_ - 1]) { + model_.AddConstr(hasBlue_[node][static_cast(maxTime_) - 1] == 1); } } } else // otherwise: specified set of nodes that need blue at the end { - for (vertex_idx node : needs_blue_at_end) { - if (has_blue_exists[node][max_time - 1]) { - model.AddConstr(has_blue[node][static_cast(max_time) - 1] == 1); + for (VertexIdx node : needsBlueAtEnd_) { + if (hasBlueExists_[node][maxTime_ - 1]) { + model_.AddConstr(hasBlue_[node][static_cast(maxTime_) - 1] == 1); } } } // disable recomputation if needed - if (!allows_recomputation) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (!allowsRecomputation_) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { Expr expr; - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { - if (compute_exists[node][processor][t]) { - expr += compute[node][processor][static_cast(t)]; + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (unsigned t = 0; t < maxTime_; t++) { + if (computeExists_[node][processor][t]) { + expr += compute_[node][processor][static_cast(t)]; } } } - model.AddConstr(expr <= 1); + model_.AddConstr(expr <= 1); } } -}; +} -template -void MultiProcessorPebbling::setupSyncPhaseVariablesConstraints(const BspInstance &instance) { - comp_phase = model.AddVars(static_cast(max_time), COPT_BINARY, "comp_phase"); +template +void MultiProcessorPebbling::SetupSyncPhaseVariablesConstraints(const BspInstance &instance) { + compPhase_ = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "comp_phase"); - if (mergeSteps) { - comm_phase = model.AddVars(static_cast(max_time), COPT_BINARY, "comm_phase"); + if (mergeSteps_) { + commPhase_ = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "comm_phase"); } else { - send_up_phase = model.AddVars(static_cast(max_time), COPT_BINARY, "send_up_phase"); - send_down_phase = model.AddVars(static_cast(max_time), COPT_BINARY, "send_down_phase"); + sendUpPhase_ = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "sendUp_phase"); + sendDownPhase_ = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "sendDown_phase"); } - const unsigned M = static_cast(instance.numberOfProcessors() * instance.numberOfVertices()); + const unsigned m = static_cast(instance.NumberOfProcessors() * instance.NumberOfVertices()); - for (unsigned t = 0; t < max_time; t++) { - Expr expr_comp, expr_comm, expr_send_up, expr_send_down; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - if (compute_exists[node][processor][t]) { - expr_comp += compute[node][processor][static_cast(t)]; + for (unsigned t = 0; t < maxTime_; t++) { + Expr exprComp, exprComm, exprSendUp, exprSendDown; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + if (computeExists_[node][processor][t]) { + exprComp += compute_[node][processor][static_cast(t)]; } - if (mergeSteps) { - if (send_up_exists[node][processor][t]) { - expr_comm += send_up[node][processor][static_cast(t)]; + if (mergeSteps_) { + if (sendUpExists_[node][processor][t]) { + exprComm += sendUp_[node][processor][static_cast(t)]; } - if (send_down_exists[node][processor][t]) { - expr_comm += send_down[node][processor][static_cast(t)]; + if (sendDownExists_[node][processor][t]) { + exprComm += sendDown_[node][processor][static_cast(t)]; } } else { - if (send_up_exists[node][processor][t]) { - expr_send_up += send_up[node][processor][static_cast(t)]; + if (sendUpExists_[node][processor][t]) { + exprSendUp += sendUp_[node][processor][static_cast(t)]; } - if (send_down_exists[node][processor][t]) { - expr_send_down += send_down[node][processor][static_cast(t)]; + if (sendDownExists_[node][processor][t]) { + exprSendDown += sendDown_[node][processor][static_cast(t)]; } } } } - model.AddConstr(M * comp_phase[static_cast(t)] >= expr_comp); - if (mergeSteps) { - model.AddConstr(2 * M * comm_phase[static_cast(t)] >= expr_comm); - model.AddConstr(comp_phase[static_cast(t)] + comm_phase[static_cast(t)] <= 1); + model_.AddConstr(m * compPhase_[static_cast(t)] >= exprComp); + if (mergeSteps_) { + model_.AddConstr(2 * m * commPhase_[static_cast(t)] >= exprComm); + model_.AddConstr(compPhase_[static_cast(t)] + commPhase_[static_cast(t)] <= 1); } else { - model.AddConstr(M * send_up_phase[static_cast(t)] >= expr_send_up); - model.AddConstr(M * send_down_phase[static_cast(t)] >= expr_send_down); - model.AddConstr( - comp_phase[static_cast(t)] + send_up_phase[static_cast(t)] + send_down_phase[static_cast(t)] <= 1); + model_.AddConstr(m * sendUpPhase_[static_cast(t)] >= exprSendUp); + model_.AddConstr(m * sendDownPhase_[static_cast(t)] >= exprSendDown); + model_.AddConstr( + compPhase_[static_cast(t)] + sendUpPhase_[static_cast(t)] + sendDownPhase_[static_cast(t)] <= 1); } } -}; +} -template -void MultiProcessorPebbling::setupBspVariablesConstraintsObjective(const BspInstance &instance) { - comp_phase_ends = model.AddVars(static_cast(max_time), COPT_BINARY, "comp_phase_ends"); +template +void MultiProcessorPebbling::SetupBspVariablesConstraintsObjective(const BspInstance &instance) { + compPhaseEnds_ = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "comp_phase_ends"); - comm_phase_ends = model.AddVars(static_cast(max_time), COPT_BINARY, "comm_phase_ends"); + commPhaseEnds_ = model_.AddVars(static_cast(maxTime_), COPT_BINARY, "comm_phase_ends"); - VarArray work_induced = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "work_induced"); - VarArray comm_induced = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "comm_induced"); + VarArray workInduced_ = model_.AddVars(static_cast(maxTime_), COPT_CONTINUOUS, "work_induced"); + VarArray commInduced_ = model_.AddVars(static_cast(maxTime_), COPT_CONTINUOUS, "comm_induced"); - std::vector work_step_until(instance.numberOfProcessors()); - std::vector comm_step_until(instance.numberOfProcessors()); - std::vector send_up_step_until(instance.numberOfProcessors()); - std::vector send_down_step_until(instance.numberOfProcessors()); + std::vector workStepUntil(instance.NumberOfProcessors()); + std::vector commStepUntil(instance.NumberOfProcessors()); + std::vector sendUpStepUntil(instance.NumberOfProcessors()); + std::vector sendDownStepUntil(instance.NumberOfProcessors()); - VarArray send_up_induced; - VarArray send_down_induced; - if (up_and_down_cost_summed) { - send_up_induced = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "send_up_induced"); - send_down_induced = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "send_down_induced"); + VarArray sendUpInduced; + VarArray sendDownInduced; + if (upAndDownCostSummed_) { + sendUpInduced = model_.AddVars(static_cast(maxTime_), COPT_CONTINUOUS, "sendUp_induced"); + sendDownInduced = model_.AddVars(static_cast(maxTime_), COPT_CONTINUOUS, "sendDown_induced"); } - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - work_step_until[processor] = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "work_step_until"); - send_up_step_until[processor] = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "send_up_step_until"); - send_down_step_until[processor] = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "send_up_step_until"); + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + workStepUntil[processor] = model_.AddVars(static_cast(maxTime_), COPT_CONTINUOUS, "work_step_until"); + sendUpStepUntil[processor] = model_.AddVars(static_cast(maxTime_), COPT_CONTINUOUS, "sendUp_step_until"); + sendDownStepUntil[processor] = model_.AddVars(static_cast(maxTime_), COPT_CONTINUOUS, "sendUp_step_until"); } - for (unsigned t = 0; t < max_time; t++) { - model.AddConstr(comp_phase[static_cast(t)] >= comp_phase_ends[static_cast(t)]); - if (mergeSteps) { - model.AddConstr(comm_phase[static_cast(t)] >= comm_phase_ends[static_cast(t)]); + for (unsigned t = 0; t < maxTime_; t++) { + model_.AddConstr(compPhase_[static_cast(t)] >= compPhaseEnds_[static_cast(t)]); + if (mergeSteps_) { + model_.AddConstr(commPhase_[static_cast(t)] >= commPhaseEnds_[static_cast(t)]); } else { - model.AddConstr(send_down_phase[static_cast(t)] + send_up_phase[static_cast(t)] - >= comm_phase_ends[static_cast(t)]); + model_.AddConstr(sendDownPhase_[static_cast(t)] + sendUpPhase_[static_cast(t)] + >= commPhaseEnds_[static_cast(t)]); } } - for (unsigned t = 0; t < max_time - 1; t++) { - model.AddConstr(comp_phase_ends[static_cast(t)] - >= comp_phase[static_cast(t)] - comp_phase[static_cast(t) + 1]); - if (mergeSteps) { - model.AddConstr(comm_phase_ends[static_cast(t)] - >= comm_phase[static_cast(t)] - comm_phase[static_cast(t) + 1]); + for (unsigned t = 0; t < maxTime_ - 1; t++) { + model_.AddConstr(compPhaseEnds_[static_cast(t)] + >= compPhase_[static_cast(t)] - compPhase_[static_cast(t) + 1]); + if (mergeSteps_) { + model_.AddConstr(commPhaseEnds_[static_cast(t)] + >= commPhase_[static_cast(t)] - commPhase_[static_cast(t) + 1]); } else { - model.AddConstr(comm_phase_ends[static_cast(t)] - >= send_down_phase[static_cast(t)] + send_up_phase[static_cast(t)] - - send_down_phase[static_cast(t) + 1] - send_up_phase[static_cast(t) + 1]); + model_.AddConstr(commPhaseEnds_[static_cast(t)] + >= sendDownPhase_[static_cast(t)] + sendUpPhase_[static_cast(t)] + - sendDownPhase_[static_cast(t) + 1] - sendUpPhase_[static_cast(t) + 1]); } } - model.AddConstr(comp_phase_ends[static_cast(max_time) - 1] >= comp_phase[static_cast(max_time) - 1]); - if (mergeSteps) { - model.AddConstr(comm_phase_ends[static_cast(max_time) - 1] >= comm_phase[static_cast(max_time) - 1]); + model_.AddConstr(compPhaseEnds_[static_cast(maxTime_) - 1] >= compPhase_[static_cast(maxTime_) - 1]); + if (mergeSteps_) { + model_.AddConstr(commPhaseEnds_[static_cast(maxTime_) - 1] >= commPhase_[static_cast(maxTime_) - 1]); } else { - model.AddConstr(comm_phase_ends[static_cast(max_time) - 1] - >= send_down_phase[static_cast(max_time) - 1] + send_up_phase[static_cast(max_time) - 1]); + model_.AddConstr(commPhaseEnds_[static_cast(maxTime_) - 1] + >= sendDownPhase_[static_cast(maxTime_) - 1] + sendUpPhase_[static_cast(maxTime_) - 1]); } - const unsigned M = static_cast(instance.numberOfProcessors() - * (sumOfVerticesWorkWeights(instance.getComputationalDag()) - + sumOfVerticesCommunicationWeights(instance.getComputationalDag()))); - - for (unsigned t = 1; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr_work; - Expr expr_send_up; - Expr expr_send_down; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (compute_exists[node][processor][t]) { - expr_work += instance.getComputationalDag().vertex_work_weight(node) - * compute[node][processor][static_cast(t)]; + const unsigned m = static_cast(instance.NumberOfProcessors() + * (SumOfVerticesWorkWeights(instance.GetComputationalDag()) + + SumOfVerticesCommunicationWeights(instance.GetComputationalDag()))); + + for (unsigned t = 1; t < maxTime_; t++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + Expr exprWork; + Expr exprSendUp; + Expr exprSendDown; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (computeExists_[node][processor][t]) { + exprWork + += instance.GetComputationalDag().VertexWorkWeight(node) * compute_[node][processor][static_cast(t)]; } - if (send_up_exists[node][processor][t]) { - expr_send_up += instance.getComputationalDag().vertex_comm_weight(node) - * send_up[node][processor][static_cast(t)]; + if (sendUpExists_[node][processor][t]) { + exprSendUp + += instance.GetComputationalDag().VertexCommWeight(node) * sendUp_[node][processor][static_cast(t)]; } - if (send_down_exists[node][processor][t]) { - expr_send_down += instance.getComputationalDag().vertex_comm_weight(node) - * send_down[node][processor][static_cast(t)]; + if (sendDownExists_[node][processor][t]) { + exprSendDown += instance.GetComputationalDag().VertexCommWeight(node) + * sendDown_[node][processor][static_cast(t)]; } } - model.AddConstr(M * comm_phase_ends[static_cast(t)] + work_step_until[processor][static_cast(t)] - >= work_step_until[processor][static_cast(t) - 1] + expr_work); + model_.AddConstr(m * commPhaseEnds_[static_cast(t)] + workStepUntil[processor][static_cast(t)] + >= workStepUntil[processor][static_cast(t) - 1] + exprWork); - model.AddConstr(M * comp_phase_ends[static_cast(t)] + send_up_step_until[processor][static_cast(t)] - >= send_up_step_until[processor][static_cast(t) - 1] + expr_send_up); + model_.AddConstr(m * compPhaseEnds_[static_cast(t)] + sendUpStepUntil[processor][static_cast(t)] + >= sendUpStepUntil[processor][static_cast(t) - 1] + exprSendUp); - model.AddConstr(M * comp_phase_ends[static_cast(t)] + send_down_step_until[processor][static_cast(t)] - >= send_down_step_until[processor][static_cast(t) - 1] + expr_send_down); + model_.AddConstr(m * compPhaseEnds_[static_cast(t)] + sendDownStepUntil[processor][static_cast(t)] + >= sendDownStepUntil[processor][static_cast(t) - 1] + exprSendDown); - model.AddConstr(work_induced[static_cast(t)] - >= work_step_until[processor][static_cast(t)] - M * (1 - comp_phase_ends[static_cast(t)])); - if (up_and_down_cost_summed) { - model.AddConstr(send_up_induced[static_cast(t)] >= send_up_step_until[processor][static_cast(t)] - - M * (1 - comm_phase_ends[static_cast(t)])); - model.AddConstr(send_down_induced[static_cast(t)] >= send_down_step_until[processor][static_cast(t)] - - M * (1 - comm_phase_ends[static_cast(t)])); - model.AddConstr(comm_induced[static_cast(t)] - >= send_up_induced[static_cast(t)] + send_down_induced[static_cast(t)]); + model_.AddConstr(workInduced_[static_cast(t)] + >= workStepUntil[processor][static_cast(t)] - m * (1 - compPhaseEnds_[static_cast(t)])); + if (upAndDownCostSummed_) { + model_.AddConstr(sendUpInduced[static_cast(t)] >= sendUpStepUntil[processor][static_cast(t)] + - m * (1 - commPhaseEnds_[static_cast(t)])); + model_.AddConstr(sendDownInduced[static_cast(t)] >= sendDownStepUntil[processor][static_cast(t)] + - m * (1 - commPhaseEnds_[static_cast(t)])); + model_.AddConstr(commInduced_[static_cast(t)] + >= sendUpInduced[static_cast(t)] + sendDownInduced[static_cast(t)]); } else { - model.AddConstr(comm_induced[static_cast(t)] >= send_down_step_until[processor][static_cast(t)] - - M * (1 - comm_phase_ends[static_cast(t)])); - model.AddConstr(comm_induced[static_cast(t)] >= send_up_step_until[processor][static_cast(t)] - - M * (1 - comm_phase_ends[static_cast(t)])); + model_.AddConstr(commInduced_[static_cast(t)] >= sendDownStepUntil[processor][static_cast(t)] + - m * (1 - commPhaseEnds_[static_cast(t)])); + model_.AddConstr(commInduced_[static_cast(t)] >= sendUpStepUntil[processor][static_cast(t)] + - m * (1 - commPhaseEnds_[static_cast(t)])); } } } // t = 0 - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr_work; - Expr expr_send_up; - Expr expr_send_down; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (compute_exists[node][processor][0]) { - expr_work += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][0]; + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + Expr exprWork; + Expr exprSendUp; + Expr exprSendDown; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (computeExists_[node][processor][0]) { + exprWork += instance.GetComputationalDag().VertexWorkWeight(node) * compute_[node][processor][0]; } - if (send_up_exists[node][processor][0]) { - expr_send_up += instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][0]; + if (sendUpExists_[node][processor][0]) { + exprSendUp += instance.GetComputationalDag().VertexCommWeight(node) * sendUp_[node][processor][0]; } - if (send_down_exists[node][processor][0]) { - expr_send_down += instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][0]; + if (sendDownExists_[node][processor][0]) { + exprSendDown += instance.GetComputationalDag().VertexCommWeight(node) * sendDown_[node][processor][0]; } } - model.AddConstr(M * comm_phase_ends[0] + work_step_until[processor][0] >= expr_work); + model_.AddConstr(m * commPhaseEnds_[0] + workStepUntil[processor][0] >= exprWork); - model.AddConstr(M * comp_phase_ends[0] + send_up_step_until[processor][0] >= expr_send_up); + model_.AddConstr(m * compPhaseEnds_[0] + sendUpStepUntil[processor][0] >= exprSendUp); - model.AddConstr(M * comp_phase_ends[0] + send_down_step_until[processor][0] >= expr_send_down); + model_.AddConstr(m * compPhaseEnds_[0] + sendDownStepUntil[processor][0] >= exprSendDown); - model.AddConstr(work_induced[0] >= work_step_until[processor][0] - M * (1 - comp_phase_ends[0])); - if (up_and_down_cost_summed) { - model.AddConstr(send_up_induced[0] >= send_up_step_until[processor][0] - M * (1 - comm_phase_ends[0])); - model.AddConstr(send_down_induced[0] >= send_down_step_until[processor][0] - M * (1 - comm_phase_ends[0])); - model.AddConstr(comm_induced[0] >= send_up_induced[0] + send_down_induced[0]); + model_.AddConstr(workInduced_[0] >= workStepUntil[processor][0] - m * (1 - compPhaseEnds_[0])); + if (upAndDownCostSummed_) { + model_.AddConstr(sendUpInduced[0] >= sendUpStepUntil[processor][0] - m * (1 - commPhaseEnds_[0])); + model_.AddConstr(sendDownInduced[0] >= sendDownStepUntil[processor][0] - m * (1 - commPhaseEnds_[0])); + model_.AddConstr(commInduced_[0] >= sendUpInduced[0] + sendDownInduced[0]); } else { - model.AddConstr(comm_induced[0] >= send_down_step_until[processor][0] - M * (1 - comm_phase_ends[0])); - model.AddConstr(comm_induced[0] >= send_up_step_until[processor][0] - M * (1 - comm_phase_ends[0])); + model_.AddConstr(commInduced_[0] >= sendDownStepUntil[processor][0] - m * (1 - commPhaseEnds_[0])); + model_.AddConstr(commInduced_[0] >= sendUpStepUntil[processor][0] - m * (1 - commPhaseEnds_[0])); } } @@ -932,122 +926,120 @@ void MultiProcessorPebbling::setupBspVariablesConstraintsObjective(cons */ Expr expr; - for (unsigned t = 0; t < max_time; t++) { - expr += work_induced[static_cast(t)] + instance.synchronisationCosts() * comm_phase_ends[static_cast(t)] - + instance.communicationCosts() * comm_induced[static_cast(t)]; + for (unsigned t = 0; t < maxTime_; t++) { + expr += workInduced_[static_cast(t)] + instance.SynchronisationCosts() * commPhaseEnds_[static_cast(t)] + + instance.CommunicationCosts() * commInduced_[static_cast(t)]; } - model.SetObjective(expr, COPT_MINIMIZE); -}; + model_.SetObjective(expr, COPT_MINIMIZE); +} -template -void MultiProcessorPebbling::setupSyncObjective(const BspInstance &instance) { +template +void MultiProcessorPebbling::SetupSyncObjective(const BspInstance &instance) { Expr expr; - for (unsigned t = 0; t < max_time; t++) { - if (!mergeSteps) { - expr += comp_phase[static_cast(t)] + instance.communicationCosts() * send_up_phase[static_cast(t)] - + instance.communicationCosts() * send_down_phase[static_cast(t)]; + for (unsigned t = 0; t < maxTime_; t++) { + if (!mergeSteps_) { + expr += compPhase_[static_cast(t)] + instance.CommunicationCosts() * sendUpPhase_[static_cast(t)] + + instance.CommunicationCosts() * sendDownPhase_[static_cast(t)]; } else { // this objective+parameter combination is not very meaningful, but still defined here to avoid a segfault otherwise - expr += comp_phase[static_cast(t)] + instance.communicationCosts() * comm_phase[static_cast(t)]; + expr += compPhase_[static_cast(t)] + instance.CommunicationCosts() * commPhase_[static_cast(t)]; } } - model.SetObjective(expr, COPT_MINIMIZE); + model_.SetObjective(expr, COPT_MINIMIZE); } -template -void MultiProcessorPebbling::setupAsyncVariablesConstraintsObjective(const BspInstance &instance) { - std::vector finish_times(instance.numberOfProcessors()); +template +void MultiProcessorPebbling::SetupAsyncVariablesConstraintsObjective(const BspInstance &instance) { + std::vector finishTimes(instance.NumberOfProcessors()); - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - finish_times[processor] = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "finish_times"); + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + finishTimes[processor] = model_.AddVars(static_cast(maxTime_), COPT_CONTINUOUS, "finish_times"); } - Var makespan = model.AddVar(0, COPT_INFINITY, 1, COPT_CONTINUOUS, "makespan"); + Var makespan = model_.AddVar(0, COPT_INFINITY, 1, COPT_CONTINUOUS, "makespan"); - VarArray gets_blue = model.AddVars(static_cast(instance.numberOfVertices()), COPT_CONTINUOUS, "gets_blue"); + VarArray getsBlue = model_.AddVars(static_cast(instance.NumberOfVertices()), COPT_CONTINUOUS, "gets_blue"); - const unsigned M = static_cast(instance.numberOfProcessors() - * (sumOfVerticesWorkWeights(instance.getComputationalDag()) - + sumOfVerticesCommunicationWeights(instance.getComputationalDag()))); + const unsigned m = static_cast(instance.NumberOfProcessors() + * (SumOfVerticesWorkWeights(instance.GetComputationalDag()) + + SumOfVerticesCommunicationWeights(instance.GetComputationalDag()))); - for (unsigned t = 0; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr send_down_step_length; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (send_down_exists[node][processor][t]) { - send_down_step_length += instance.communicationCosts() - * instance.getComputationalDag().vertex_comm_weight(node) - * send_down[node][processor][static_cast(t)]; + for (unsigned t = 0; t < maxTime_; t++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + Expr sendDownStepLength; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (sendDownExists_[node][processor][t]) { + sendDownStepLength += instance.CommunicationCosts() * instance.GetComputationalDag().VertexCommWeight(node) + * sendDown_[node][processor][static_cast(t)]; } } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (send_up_exists[node][processor][t]) { - model.AddConstr(gets_blue[static_cast(node)] - >= finish_times[processor][static_cast(t)] - - (1 - send_up[node][processor][static_cast(t)]) * M); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (sendUpExists_[node][processor][t]) { + model_.AddConstr(getsBlue[static_cast(node)] + >= finishTimes[processor][static_cast(t)] + - (1 - sendUp_[node][processor][static_cast(t)]) * m); } - if (send_down_exists[node][processor][t]) { - model.AddConstr(gets_blue[static_cast(node)] - <= finish_times[processor][static_cast(t)] - + (1 - send_down[node][processor][static_cast(t)]) * M - send_down_step_length); + if (sendDownExists_[node][processor][t]) { + model_.AddConstr(getsBlue[static_cast(node)] + <= finishTimes[processor][static_cast(t)] + + (1 - sendDown_[node][processor][static_cast(t)]) * m - sendDownStepLength); } } } } // makespan constraint - for (unsigned t = 0; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - model.AddConstr(makespan >= finish_times[processor][static_cast(t)]); + for (unsigned t = 0; t < maxTime_; t++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + model_.AddConstr(makespan >= finishTimes[processor][static_cast(t)]); } } // t = 0 - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { Expr expr; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (compute_exists[node][processor][0]) { - expr += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][0]; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (computeExists_[node][processor][0]) { + expr += instance.GetComputationalDag().VertexWorkWeight(node) * compute_[node][processor][0]; } - if (send_up_exists[node][processor][0]) { - expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) - * send_up[node][processor][0]; + if (sendUpExists_[node][processor][0]) { + expr += instance.CommunicationCosts() * instance.GetComputationalDag().VertexCommWeight(node) + * sendUp_[node][processor][0]; } - if (send_down_exists[node][processor][0]) { - expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) - * send_down[node][processor][0]; + if (sendDownExists_[node][processor][0]) { + expr += instance.CommunicationCosts() * instance.GetComputationalDag().VertexCommWeight(node) + * sendDown_[node][processor][0]; } } - model.AddConstr(finish_times[processor][0] >= expr); + model_.AddConstr(finishTimes[processor][0] >= expr); } - for (unsigned t = 1; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { + for (unsigned t = 1; t < maxTime_; t++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { Expr expr; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (compute_exists[node][processor][t]) { - expr += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][static_cast(t)]; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (computeExists_[node][processor][t]) { + expr += instance.GetComputationalDag().VertexWorkWeight(node) * compute_[node][processor][static_cast(t)]; } - if (send_up_exists[node][processor][t]) { - expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) - * send_up[node][processor][static_cast(t)]; + if (sendUpExists_[node][processor][t]) { + expr += instance.CommunicationCosts() * instance.GetComputationalDag().VertexCommWeight(node) + * sendUp_[node][processor][static_cast(t)]; } - if (send_down_exists[node][processor][t]) { - expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) - * send_down[node][processor][static_cast(t)]; + if (sendDownExists_[node][processor][t]) { + expr += instance.CommunicationCosts() * instance.GetComputationalDag().VertexCommWeight(node) + * sendDown_[node][processor][static_cast(t)]; } } - model.AddConstr(finish_times[processor][static_cast(t)] - >= finish_times[processor][static_cast(t) - 1] + expr); + model_.AddConstr(finishTimes[processor][static_cast(t)] >= finishTimes[processor][static_cast(t) - 1] + expr); } } @@ -1055,146 +1047,145 @@ void MultiProcessorPebbling::setupAsyncVariablesConstraintsObjective(co Objective */ - model.SetObjective(makespan, COPT_MINIMIZE); + model_.SetObjective(makespan, COPT_MINIMIZE); } -template -void MultiProcessorPebbling::WriteSolutionCallback::callback() { - if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { +template +void MultiProcessorPebbling::WriteSolutionCallback::callback() { + if (Where() == COPT_CBCONTEXT_MIPSOL && counter_ < maxNumberSolution_ && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { try { - if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { - best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); + if (GetDblInfo(COPT_CBINFO_BESTOBJ) < bestObj_ && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { + bestObj_ = GetDblInfo(COPT_CBINFO_BESTOBJ); // auto sched = constructBspScheduleFromCallback(); // BspScheduleWriter sched_writer(sched); // sched_writer.write_dot(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" // + // std::to_string(counter) + "_schedule.dot"); - counter++; + counter_++; } } catch (const std::exception &e) {} } -}; +} -template -void MultiProcessorPebbling::constructPebblingScheduleFromSolution(PebblingSchedule &schedule) { - const BspInstance &instance = schedule.getInstance(); +template +void MultiProcessorPebbling::ConstructPebblingScheduleFromSolution(PebblingSchedule &schedule) { + const BspInstance &instance = schedule.GetInstance(); - std::vector>>> nodes_computed( - instance.numberOfProcessors(), std::vector>>(max_time)); - std::vector>> nodes_sent_up(instance.numberOfProcessors(), - std::vector>(max_time)); - std::vector>> nodes_sent_down(instance.numberOfProcessors(), - std::vector>(max_time)); - std::vector>> evicted_after(instance.numberOfProcessors(), - std::vector>(max_time)); + std::vector>>> nodesComputed( + instance.NumberOfProcessors(), std::vector>>(maxTime_)); + std::vector>> nodesSentUp(instance.NumberOfProcessors(), + std::vector>(maxTime_)); + std::vector>> nodesSentDown(instance.NumberOfProcessors(), + std::vector>(maxTime_)); + std::vector>> evictedAfter(instance.NumberOfProcessors(), + std::vector>(maxTime_)); // used to remove unneeded steps when a node is sent down and then up (which becomes invalid after reordering the comm phases) - std::vector> sent_down_already(instance.numberOfVertices(), - std::vector(instance.numberOfProcessors(), false)); - std::vector> ignore_red(instance.numberOfVertices(), std::vector(instance.numberOfProcessors(), false)); + std::vector> sentDownAlready(instance.NumberOfVertices(), + std::vector(instance.NumberOfProcessors(), false)); + std::vector> ignoreRed(instance.NumberOfVertices(), std::vector(instance.NumberOfProcessors(), false)); - std::vector topOrder = GetTopOrder(instance.getComputationalDag()); - std::vector topOrderPosition(instance.numberOfVertices()); - for (unsigned index = 0; index < instance.numberOfVertices(); ++index) { + std::vector topOrder = GetTopOrder(instance.GetComputationalDag()); + std::vector topOrderPosition(instance.NumberOfVertices()); + for (unsigned index = 0; index < instance.NumberOfVertices(); ++index) { topOrderPosition[topOrder[index]] = index; } - std::vector empty_step(max_time, true); - std::vector> step_type_on_proc(instance.numberOfProcessors(), std::vector(max_time, 0)); + std::vector emptyStep(maxTime_, true); + std::vector> stepTypeOnProc(instance.NumberOfProcessors(), std::vector(maxTime_, 0)); - for (unsigned step = 0; step < max_time; step++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (compute_exists[node][processor][step] - && compute[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { - step_type_on_proc[processor][step] = 1; + for (unsigned step = 0; step < maxTime_; step++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (computeExists_[node][processor][step] + && compute_[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { + stepTypeOnProc[processor][step] = 1; } } } } - for (unsigned step = 0; step < max_time; step++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (step > 0 && has_red[node][processor][static_cast(step) - 1].Get(COPT_DBLINFO_VALUE) >= .99 - && has_red[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) <= .01 - && !ignore_red[node][processor]) { - for (size_t previous_step = step - 1; previous_step < step; --previous_step) { - if (!nodes_computed[processor][previous_step].empty() || !nodes_sent_up[processor][previous_step].empty() - || !nodes_sent_down[processor][previous_step].empty() || previous_step == 0) { - evicted_after[processor][previous_step].insert(node); - empty_step[previous_step] = false; + for (unsigned step = 0; step < maxTime_; step++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (step > 0 && hasRed_[node][processor][static_cast(step) - 1].Get(COPT_DBLINFO_VALUE) >= .99 + && hasRed_[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) <= .01 + && !ignoreRed[node][processor]) { + for (size_t previousStep = step - 1; previousStep < step; --previousStep) { + if (!nodesComputed[processor][previousStep].empty() || !nodesSentUp[processor][previousStep].empty() + || !nodesSentDown[processor][previousStep].empty() || previousStep == 0) { + evictedAfter[processor][previousStep].insert(node); + emptyStep[previousStep] = false; break; } } } - if (compute_exists[node][processor][step] - && compute[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { - nodes_computed[processor][step].emplace(topOrderPosition[node], node); - empty_step[step] = false; - ignore_red[node][processor] = false; + if (computeExists_[node][processor][step] + && compute_[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { + nodesComputed[processor][step].emplace(topOrderPosition[node], node); + emptyStep[step] = false; + ignoreRed[node][processor] = false; - // implicit eviction in case of mergesteps - never having "has_red=1" - if (step + 1 < max_time && has_red[node][processor][static_cast(step) + 1].Get(COPT_DBLINFO_VALUE) <= .01) { - evicted_after[processor][step].insert(node); + // implicit eviction in case of mergesteps - never having "hasRed=1" + if (step + 1 < maxTime_ && hasRed_[node][processor][static_cast(step) + 1].Get(COPT_DBLINFO_VALUE) <= .01) { + evictedAfter[processor][step].insert(node); } } - if (send_down_exists[node][processor][step] - && send_down[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { - bool keep_step = false; + if (sendDownExists_[node][processor][step] + && sendDown_[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { + bool keepStep = false; - for (unsigned next_step = step + 1; - next_step < max_time - && has_red[node][processor][static_cast(next_step)].Get(COPT_DBLINFO_VALUE) >= .99; - ++next_step) { - if (step_type_on_proc[processor][next_step] == 1) { - keep_step = true; + for (unsigned nextStep = step + 1; + nextStep < maxTime_ && hasRed_[node][processor][static_cast(nextStep)].Get(COPT_DBLINFO_VALUE) >= .99; + ++nextStep) { + if (stepTypeOnProc[processor][nextStep] == 1) { + keepStep = true; break; } } - if (keep_step) { - nodes_sent_down[processor][step].push_back(node); - empty_step[step] = false; - step_type_on_proc[processor][step] = 3; - ignore_red[node][processor] = false; + if (keepStep) { + nodesSentDown[processor][step].push_back(node); + emptyStep[step] = false; + stepTypeOnProc[processor][step] = 3; + ignoreRed[node][processor] = false; } else { - ignore_red[node][processor] = true; + ignoreRed[node][processor] = true; } - sent_down_already[node][processor] = true; + sentDownAlready[node][processor] = true; } - if (send_up_exists[node][processor][step] - && send_up[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99 - && !sent_down_already[node][processor]) { - nodes_sent_up[processor][step].push_back(node); - empty_step[step] = false; - step_type_on_proc[processor][step] = 2; + if (sendUpExists_[node][processor][step] + && sendUp_[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99 + && !sentDownAlready[node][processor]) { + nodesSentUp[processor][step].push_back(node); + emptyStep[step] = false; + stepTypeOnProc[processor][step] = 2; } } } } // components of the final PebblingSchedule - the first two dimensions are always processor and superstep - std::vector>> compute_steps_per_supstep(instance.numberOfProcessors()); - std::vector>>> nodes_evicted_after_compute(instance.numberOfProcessors()); - std::vector>> nodes_sent_up_in_supstep(instance.numberOfProcessors()); - std::vector>> nodes_sent_down_in_supstep(instance.numberOfProcessors()); - std::vector>> nodes_evicted_in_comm_phase(instance.numberOfProcessors()); + std::vector>> computeStepsPerSupstep(instance.NumberOfProcessors()); + std::vector>>> nodesEvictedAfterCompute(instance.NumberOfProcessors()); + std::vector>> nodesSentUpInSupstep(instance.NumberOfProcessors()); + std::vector>> nodesSentDownInSupstep(instance.NumberOfProcessors()); + std::vector>> nodesEvictedInCommPhase(instance.NumberOfProcessors()); // edge case: check if an extra superstep must be added in the beginning to evict values that are initially in cache - bool needs_evict_step_in_beginning = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - for (unsigned step = 0; step < max_time; step++) { - if (step_type_on_proc[proc][step] == 0 && !evicted_after[proc][step].empty()) { - needs_evict_step_in_beginning = true; + bool needsEvictStepInBeginning = false; + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + for (unsigned step = 0; step < maxTime_; step++) { + if (stepTypeOnProc[proc][step] == 0 && !evictedAfter[proc][step].empty()) { + needsEvictStepInBeginning = true; break; - } else if (step_type_on_proc[proc][step] > 0) { + } else if (stepTypeOnProc[proc][step] > 0) { break; } } @@ -1202,463 +1193,461 @@ void MultiProcessorPebbling::constructPebblingScheduleFromSolution(Pebb // create the actual PebblingSchedule - iterating over the steps unsigned superstepIndex = 0; - if (synchronous) { - bool in_comm = true; + if (synchronous_) { + bool inComm = true; superstepIndex = UINT_MAX; - if (needs_evict_step_in_beginning) { + if (needsEvictStepInBeginning) { // artificially insert comm step in beginning, if it would start with compute otherwise - bool begins_with_compute = false; - for (unsigned step = 0; step < max_time; step++) { - bool is_comp = false, is_comm = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - if (step_type_on_proc[proc][step] == 1) { - is_comp = true; + bool beginsWithCompute = false; + for (unsigned step = 0; step < maxTime_; step++) { + bool isComp = false, isComm = false; + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + if (stepTypeOnProc[proc][step] == 1) { + isComp = true; } - if (step_type_on_proc[proc][step] > 1) { - is_comm = true; + if (stepTypeOnProc[proc][step] > 1) { + isComm = true; } } - if (is_comp) { - begins_with_compute = true; + if (isComp) { + beginsWithCompute = true; } - if (is_comp || is_comm) { + if (isComp || isComm) { break; } } - if (begins_with_compute) { + if (beginsWithCompute) { superstepIndex = 0; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - compute_steps_per_supstep[proc].push_back(std::vector()); - nodes_evicted_after_compute[proc].push_back(std::vector>()); - nodes_sent_up_in_supstep[proc].push_back(std::vector()); - nodes_sent_down_in_supstep[proc].push_back(std::vector()); - nodes_evicted_in_comm_phase[proc].push_back(std::vector()); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + computeStepsPerSupstep[proc].push_back(std::vector()); + nodesEvictedAfterCompute[proc].push_back(std::vector>()); + nodesSentUpInSupstep[proc].push_back(std::vector()); + nodesSentDownInSupstep[proc].push_back(std::vector()); + nodesEvictedInCommPhase[proc].push_back(std::vector()); } } } // process steps - for (unsigned step = 0; step < max_time; step++) { - if (empty_step[step]) { + for (unsigned step = 0; step < maxTime_; step++) { + if (emptyStep[step]) { continue; } - unsigned step_type = 0; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - step_type = std::max(step_type, step_type_on_proc[proc][step]); + unsigned stepType = 0; + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + stepType = std::max(stepType, stepTypeOnProc[proc][step]); } - if (step_type == 1) { - if (in_comm) { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - compute_steps_per_supstep[proc].push_back(std::vector()); - nodes_evicted_after_compute[proc].push_back(std::vector>()); - nodes_sent_up_in_supstep[proc].push_back(std::vector()); - nodes_sent_down_in_supstep[proc].push_back(std::vector()); - nodes_evicted_in_comm_phase[proc].push_back(std::vector()); + if (stepType == 1) { + if (inComm) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + computeStepsPerSupstep[proc].push_back(std::vector()); + nodesEvictedAfterCompute[proc].push_back(std::vector>()); + nodesSentUpInSupstep[proc].push_back(std::vector()); + nodesSentDownInSupstep[proc].push_back(std::vector()); + nodesEvictedInCommPhase[proc].push_back(std::vector()); } ++superstepIndex; - in_comm = false; + inComm = false; } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - for (auto index_and_node : nodes_computed[proc][step]) { - compute_steps_per_supstep[proc][superstepIndex].push_back(index_and_node.second); - nodes_evicted_after_compute[proc][superstepIndex].push_back(std::vector()); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + for (auto indexAndNode : nodesComputed[proc][step]) { + computeStepsPerSupstep[proc][superstepIndex].push_back(indexAndNode.second); + nodesEvictedAfterCompute[proc][superstepIndex].push_back(std::vector()); } - for (vertex_idx node : evicted_after[proc][step]) { - if (!nodes_evicted_after_compute[proc][superstepIndex].empty()) { - nodes_evicted_after_compute[proc][superstepIndex].back().push_back(node); + for (VertexIdx node : evictedAfter[proc][step]) { + if (!nodesEvictedAfterCompute[proc][superstepIndex].empty()) { + nodesEvictedAfterCompute[proc][superstepIndex].back().push_back(node); } else { // can only happen in special case: eviction in the very beginning - nodes_evicted_in_comm_phase[proc][0].push_back(node); + nodesEvictedInCommPhase[proc][0].push_back(node); } } } } - if (step_type == 2 || step_type == 3) { + if (stepType == 2 || stepType == 3) { if (superstepIndex == UINT_MAX) { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - compute_steps_per_supstep[proc].push_back(std::vector()); - nodes_evicted_after_compute[proc].push_back(std::vector>()); - nodes_sent_up_in_supstep[proc].push_back(std::vector()); - nodes_sent_down_in_supstep[proc].push_back(std::vector()); - nodes_evicted_in_comm_phase[proc].push_back(std::vector()); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + computeStepsPerSupstep[proc].push_back(std::vector()); + nodesEvictedAfterCompute[proc].push_back(std::vector>()); + nodesSentUpInSupstep[proc].push_back(std::vector()); + nodesSentDownInSupstep[proc].push_back(std::vector()); + nodesEvictedInCommPhase[proc].push_back(std::vector()); } ++superstepIndex; } - in_comm = true; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - for (vertex_idx node : nodes_sent_up[proc][step]) { - nodes_sent_up_in_supstep[proc][superstepIndex].push_back(node); + inComm = true; + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + for (VertexIdx node : nodesSentUp[proc][step]) { + nodesSentUpInSupstep[proc][superstepIndex].push_back(node); } - for (vertex_idx node : evicted_after[proc][step]) { - nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node); + for (VertexIdx node : evictedAfter[proc][step]) { + nodesEvictedInCommPhase[proc][superstepIndex].push_back(node); } - for (vertex_idx node : nodes_sent_down[proc][step]) { - nodes_sent_down_in_supstep[proc][superstepIndex].push_back(node); + for (VertexIdx node : nodesSentDown[proc][step]) { + nodesSentDownInSupstep[proc][superstepIndex].push_back(node); } } } } } else { - std::vector step_idx_on_proc(instance.numberOfProcessors(), 0); + std::vector stepIdxOnProc(instance.NumberOfProcessors(), 0); - std::vector already_has_blue(instance.numberOfVertices(), false); - if (need_to_load_inputs) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (instance.getComputationalDag().in_degree(node) == 0) { - already_has_blue[node] = true; + std::vector alreadyHasBlue(instance.NumberOfVertices(), false); + if (needToLoadInputs_) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (instance.GetComputationalDag().InDegree(node) == 0) { + alreadyHasBlue[node] = true; } } } - std::vector proc_finished(instance.numberOfProcessors(), false); - unsigned nr_proc_finished = 0; - while (nr_proc_finished < instance.numberOfProcessors()) { + std::vector procFinished(instance.NumberOfProcessors(), false); + unsigned nrProcFinished = 0; + while (nrProcFinished < instance.NumberOfProcessors()) { // preliminary sweep of superstep, to see if we need to wait for other processors - std::vector idx_limit_on_proc = step_idx_on_proc; + std::vector idxLimitOnProc = stepIdxOnProc; // first add compute steps - if (!needs_evict_step_in_beginning || superstepIndex > 0) { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - while (idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] <= 1) { - ++idx_limit_on_proc[proc]; + if (!needsEvictStepInBeginning || superstepIndex > 0) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + while (idxLimitOnProc[proc] < maxTime_ && stepTypeOnProc[proc][idxLimitOnProc[proc]] <= 1) { + ++idxLimitOnProc[proc]; } } } // then add communications step until possible (note - they might not be valid if all put into a single superstep!) - std::set new_blues; - bool still_making_progress = true; - while (still_making_progress) { - still_making_progress = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - while (idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] != 1) { - bool accept_step = true; - for (vertex_idx node : nodes_sent_down[proc][idx_limit_on_proc[proc]]) { - if (!already_has_blue[node] && new_blues.find(node) == new_blues.end()) { - accept_step = false; + std::set newBlues; + bool stillMakingProgress = true; + while (stillMakingProgress) { + stillMakingProgress = false; + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + while (idxLimitOnProc[proc] < maxTime_ && stepTypeOnProc[proc][idxLimitOnProc[proc]] != 1) { + bool acceptStep = true; + for (VertexIdx node : nodesSentDown[proc][idxLimitOnProc[proc]]) { + if (!alreadyHasBlue[node] && newBlues.find(node) == newBlues.end()) { + acceptStep = false; } } - if (!accept_step) { + if (!acceptStep) { break; } - for (vertex_idx node : nodes_sent_up[proc][idx_limit_on_proc[proc]]) { - if (!already_has_blue[node]) { - new_blues.insert(node); + for (VertexIdx node : nodesSentUp[proc][idxLimitOnProc[proc]]) { + if (!alreadyHasBlue[node]) { + newBlues.insert(node); } } - still_making_progress = true; - ++idx_limit_on_proc[proc]; + stillMakingProgress = true; + ++idxLimitOnProc[proc]; } } } // actually process the superstep - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - compute_steps_per_supstep[proc].push_back(std::vector()); - nodes_evicted_after_compute[proc].push_back(std::vector>()); - nodes_sent_up_in_supstep[proc].push_back(std::vector()); - nodes_sent_down_in_supstep[proc].push_back(std::vector()); - nodes_evicted_in_comm_phase[proc].push_back(std::vector()); - - while (step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] <= 1) { - for (auto index_and_node : nodes_computed[proc][step_idx_on_proc[proc]]) { - compute_steps_per_supstep[proc][superstepIndex].push_back(index_and_node.second); - nodes_evicted_after_compute[proc][superstepIndex].push_back(std::vector()); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); proc++) { + computeStepsPerSupstep[proc].push_back(std::vector()); + nodesEvictedAfterCompute[proc].push_back(std::vector>()); + nodesSentUpInSupstep[proc].push_back(std::vector()); + nodesSentDownInSupstep[proc].push_back(std::vector()); + nodesEvictedInCommPhase[proc].push_back(std::vector()); + + while (stepIdxOnProc[proc] < idxLimitOnProc[proc] && stepTypeOnProc[proc][stepIdxOnProc[proc]] <= 1) { + for (auto indexAndNode : nodesComputed[proc][stepIdxOnProc[proc]]) { + computeStepsPerSupstep[proc][superstepIndex].push_back(indexAndNode.second); + nodesEvictedAfterCompute[proc][superstepIndex].push_back(std::vector()); } - for (vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]]) { - if (!nodes_evicted_after_compute[proc][superstepIndex].empty()) { - nodes_evicted_after_compute[proc][superstepIndex].back().push_back(node); + for (VertexIdx node : evictedAfter[proc][stepIdxOnProc[proc]]) { + if (!nodesEvictedAfterCompute[proc][superstepIndex].empty()) { + nodesEvictedAfterCompute[proc][superstepIndex].back().push_back(node); } else { // can only happen in special case: eviction in the very beginning - nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node); + nodesEvictedInCommPhase[proc][superstepIndex].push_back(node); } } - ++step_idx_on_proc[proc]; + ++stepIdxOnProc[proc]; } - while (step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] != 1) { - for (vertex_idx node : nodes_sent_up[proc][step_idx_on_proc[proc]]) { - nodes_sent_up_in_supstep[proc][superstepIndex].push_back(node); - already_has_blue[node] = true; + while (stepIdxOnProc[proc] < idxLimitOnProc[proc] && stepTypeOnProc[proc][stepIdxOnProc[proc]] != 1) { + for (VertexIdx node : nodesSentUp[proc][stepIdxOnProc[proc]]) { + nodesSentUpInSupstep[proc][superstepIndex].push_back(node); + alreadyHasBlue[node] = true; } - for (vertex_idx node : nodes_sent_down[proc][step_idx_on_proc[proc]]) { - nodes_sent_down_in_supstep[proc][superstepIndex].push_back(node); + for (VertexIdx node : nodesSentDown[proc][stepIdxOnProc[proc]]) { + nodesSentDownInSupstep[proc][superstepIndex].push_back(node); } - for (vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]]) { - nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node); + for (VertexIdx node : evictedAfter[proc][stepIdxOnProc[proc]]) { + nodesEvictedInCommPhase[proc][superstepIndex].push_back(node); } - ++step_idx_on_proc[proc]; + ++stepIdxOnProc[proc]; } - if (step_idx_on_proc[proc] == max_time && !proc_finished[proc]) { - proc_finished[proc] = true; - ++nr_proc_finished; + if (stepIdxOnProc[proc] == maxTime_ && !procFinished[proc]) { + procFinished[proc] = true; + ++nrProcFinished; } } ++superstepIndex; } } - std::cout << "MPP ILP best solution value: " << model.GetDblAttr(COPT_DBLATTR_BESTOBJ) - << ", best lower bound: " << model.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl; - - schedule = PebblingSchedule(instance, - compute_steps_per_supstep, - nodes_evicted_after_compute, - nodes_sent_up_in_supstep, - nodes_sent_down_in_supstep, - nodes_evicted_in_comm_phase, - needs_blue_at_end, - has_red_in_beginning, - need_to_load_inputs); + std::cout << "MPP ILP best solution value: " << model_.GetDblAttr(COPT_DBLATTR_BESTOBJ) + << ", best lower bound: " << model_.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl; + + schedule = PebblingSchedule(instance, + computeStepsPerSupstep, + nodesEvictedAfterCompute, + nodesSentUpInSupstep, + nodesSentDownInSupstep, + nodesEvictedInCommPhase, + needsBlueAtEnd_, + hasRedInBeginning_, + needToLoadInputs_); } -template -void MultiProcessorPebbling::setInitialSolution( - const BspInstance &instance, - const std::vector>> &computeSteps, - const std::vector>> &sendUpSteps, - const std::vector>> &sendDownSteps, - const std::vector>> &nodesEvictedAfterStep) { - const unsigned N = static_cast(instance.numberOfVertices()); - - std::vector in_slow_mem(N, false); - if (need_to_load_inputs) { - for (vertex_idx node = 0; node < N; ++node) { - if (instance.getComputationalDag().in_degree(node) == 0) { - in_slow_mem[node] = true; +template +void MultiProcessorPebbling::SetInitialSolution( + const BspInstance &instance, + const std::vector>> &computeSteps, + const std::vector>> &sendUpSteps, + const std::vector>> &sendDownSteps, + const std::vector>> &nodesEvictedAfterStep) { + const unsigned n = static_cast(instance.NumberOfVertices()); + + std::vector inSlowMem(n, false); + if (needToLoadInputs_) { + for (VertexIdx node = 0; node < n; ++node) { + if (instance.GetComputationalDag().InDegree(node) == 0) { + inSlowMem[node] = true; } } } - std::vector> in_fast_mem(N, std::vector(instance.numberOfProcessors(), false)); - if (!has_red_in_beginning.empty()) { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - for (vertex_idx node : has_red_in_beginning[proc]) { - in_fast_mem[node][proc] = true; + std::vector> inFastMem(n, std::vector(instance.NumberOfProcessors(), false)); + if (!hasRedInBeginning_.empty()) { + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + for (VertexIdx node : hasRedInBeginning_[proc]) { + inFastMem[node][proc] = true; } } } - unsigned step = 0, new_step_idx = 0; + unsigned step = 0, newStepIdx = 0; for (; step < computeSteps[0].size(); ++step) { - for (vertex_idx node = 0; node < N; ++node) { - if (has_blue_exists[node][new_step_idx]) { - model.SetMipStart(has_blue[node][static_cast(new_step_idx)], static_cast(in_slow_mem[node])); + for (VertexIdx node = 0; node < n; ++node) { + if (hasBlueExists_[node][newStepIdx]) { + model_.SetMipStart(hasBlue_[node][static_cast(newStepIdx)], static_cast(inSlowMem[node])); } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - model.SetMipStart(has_red[node][proc][static_cast(new_step_idx)], static_cast(in_fast_mem[node][proc])); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + model_.SetMipStart(hasRed_[node][proc][static_cast(newStepIdx)], static_cast(inFastMem[node][proc])); } } - if (restrict_step_types) { + if (restrictStepTypes_) { // align step number with step type cycle's phase, if needed - bool skip_step = true; - while (skip_step) { - skip_step = false; - bool is_compute = false, is_send_up = false, is_send_down = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + bool skipStep = true; + while (skipStep) { + skipStep = false; + bool isCompute = false, isSendUp = false, isSendDown = false; + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { if (!computeSteps[proc][step].empty()) { - is_compute = true; + isCompute = true; } if (!sendUpSteps[proc][step].empty()) { - is_send_up = true; + isSendUp = true; } if (!sendDownSteps[proc][step].empty()) { - is_send_down = true; + isSendDown = true; } } - bool send_up_step_idx - = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1)) - || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle)); - bool send_down_step_idx - = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == 0)) - || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1)); + bool sendUpStepIdx + = (needToLoadInputs_ && (newStepIdx % (computeStepsPerCycle_ + 2) == computeStepsPerCycle_ + 1)) + || (!needToLoadInputs_ && (newStepIdx % (computeStepsPerCycle_ + 2) == computeStepsPerCycle_)); + bool sendDownStepIdx + = (needToLoadInputs_ && (newStepIdx % (computeStepsPerCycle_ + 2) == 0)) + || (!needToLoadInputs_ && (newStepIdx % (computeStepsPerCycle_ + 2) == computeStepsPerCycle_ + 1)); - if (is_compute && (send_up_step_idx || send_down_step_idx)) { - skip_step = true; + if (isCompute && (sendUpStepIdx || sendDownStepIdx)) { + skipStep = true; } - if (is_send_up && !send_up_step_idx) { - skip_step = true; + if (isSendUp && !sendUpStepIdx) { + skipStep = true; } - if (is_send_down && !send_down_step_idx) { - skip_step = true; + if (isSendDown && !sendDownStepIdx) { + skipStep = true; } - if (skip_step) { - ++new_step_idx; - for (vertex_idx node = 0; node < N; ++node) { - if (has_blue_exists[node][new_step_idx]) { - model.SetMipStart(has_blue[node][static_cast(new_step_idx)], static_cast(in_slow_mem[node])); + if (skipStep) { + ++newStepIdx; + for (VertexIdx node = 0; node < n; ++node) { + if (hasBlueExists_[node][newStepIdx]) { + model_.SetMipStart(hasBlue_[node][static_cast(newStepIdx)], static_cast(inSlowMem[node])); } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - model.SetMipStart(has_red[node][proc][static_cast(new_step_idx)], - static_cast(in_fast_mem[node][proc])); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + model_.SetMipStart(hasRed_[node][proc][static_cast(newStepIdx)], + static_cast(inFastMem[node][proc])); } } } } } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - std::vector value_of_node(N, false); - for (vertex_idx node : computeSteps[proc][step]) { - value_of_node[node] = true; - if (compute_exists[node][proc][new_step_idx]) { - model.SetMipStart(compute[node][proc][static_cast(new_step_idx)], 1); + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + std::vector valueOfNode(n, false); + for (VertexIdx node : computeSteps[proc][step]) { + valueOfNode[node] = true; + if (computeExists_[node][proc][newStepIdx]) { + model_.SetMipStart(compute_[node][proc][static_cast(newStepIdx)], 1); } - in_fast_mem[node][proc] = true; + inFastMem[node][proc] = true; } - for (vertex_idx node : computeSteps[proc][step]) { - if (!value_of_node[node]) { - if (compute_exists[node][proc][new_step_idx]) { - model.SetMipStart(compute[node][proc][static_cast(new_step_idx)], 0); + for (VertexIdx node : computeSteps[proc][step]) { + if (!valueOfNode[node]) { + if (computeExists_[node][proc][newStepIdx]) { + model_.SetMipStart(compute_[node][proc][static_cast(newStepIdx)], 0); } } else { - value_of_node[node] = false; + valueOfNode[node] = false; } } - for (vertex_idx node : sendUpSteps[proc][step]) { - value_of_node[node] = true; - if (send_up_exists[node][proc][new_step_idx]) { - model.SetMipStart(send_up[node][proc][static_cast(new_step_idx)], 1); + for (VertexIdx node : sendUpSteps[proc][step]) { + valueOfNode[node] = true; + if (sendUpExists_[node][proc][newStepIdx]) { + model_.SetMipStart(sendUp_[node][proc][static_cast(newStepIdx)], 1); } - in_slow_mem[node] = true; + inSlowMem[node] = true; } - for (vertex_idx node : sendUpSteps[proc][step]) { - if (!value_of_node[node]) { - if (send_up_exists[node][proc][new_step_idx]) { - model.SetMipStart(send_up[node][proc][static_cast(new_step_idx)], 0); + for (VertexIdx node : sendUpSteps[proc][step]) { + if (!valueOfNode[node]) { + if (sendUpExists_[node][proc][newStepIdx]) { + model_.SetMipStart(sendUp_[node][proc][static_cast(newStepIdx)], 0); } } else { - value_of_node[node] = false; + valueOfNode[node] = false; } } - for (vertex_idx node : sendDownSteps[proc][step]) { - value_of_node[node] = true; - if (send_down_exists[node][proc][new_step_idx]) { - model.SetMipStart(send_down[node][proc][static_cast(new_step_idx)], 1); + for (VertexIdx node : sendDownSteps[proc][step]) { + valueOfNode[node] = true; + if (sendDownExists_[node][proc][newStepIdx]) { + model_.SetMipStart(sendDown_[node][proc][static_cast(newStepIdx)], 1); } - in_fast_mem[node][proc] = true; + inFastMem[node][proc] = true; } - for (vertex_idx node : sendDownSteps[proc][step]) { - if (!value_of_node[node]) { - if (send_down_exists[node][proc][new_step_idx]) { - model.SetMipStart(send_down[node][proc][static_cast(new_step_idx)], 0); + for (VertexIdx node : sendDownSteps[proc][step]) { + if (!valueOfNode[node]) { + if (sendDownExists_[node][proc][newStepIdx]) { + model_.SetMipStart(sendDown_[node][proc][static_cast(newStepIdx)], 0); } } else { - value_of_node[node] = false; + valueOfNode[node] = false; } } - for (vertex_idx node : nodesEvictedAfterStep[proc][step]) { - in_fast_mem[node][proc] = false; + for (VertexIdx node : nodesEvictedAfterStep[proc][step]) { + inFastMem[node][proc] = false; } } - ++new_step_idx; + ++newStepIdx; } - for (; new_step_idx < max_time; ++new_step_idx) { - for (vertex_idx node = 0; node < N; ++node) { - if (has_blue_exists[node][new_step_idx]) { - model.SetMipStart(has_blue[node][static_cast(new_step_idx)], static_cast(in_slow_mem[node])); - } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - model.SetMipStart(has_red[node][proc][static_cast(new_step_idx)], 0); - if (compute_exists[node][proc][new_step_idx]) { - model.SetMipStart(compute[node][proc][static_cast(new_step_idx)], 0); + for (; newStepIdx < maxTime_; ++newStepIdx) { + for (VertexIdx node = 0; node < n; ++node) { + if (hasBlueExists_[node][newStepIdx]) { + model_.SetMipStart(hasBlue_[node][static_cast(newStepIdx)], static_cast(inSlowMem[node])); + } + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { + model_.SetMipStart(hasRed_[node][proc][static_cast(newStepIdx)], 0); + if (computeExists_[node][proc][newStepIdx]) { + model_.SetMipStart(compute_[node][proc][static_cast(newStepIdx)], 0); } - if (send_up_exists[node][proc][new_step_idx]) { - model.SetMipStart(send_up[node][proc][static_cast(new_step_idx)], 0); + if (sendUpExists_[node][proc][newStepIdx]) { + model_.SetMipStart(sendUp_[node][proc][static_cast(newStepIdx)], 0); } - if (send_down_exists[node][proc][new_step_idx]) { - model.SetMipStart(send_down[node][proc][static_cast(new_step_idx)], 0); + if (sendDownExists_[node][proc][newStepIdx]) { + model_.SetMipStart(sendDown_[node][proc][static_cast(newStepIdx)], 0); } } } } - model.LoadMipStart(); + model_.LoadMipStart(); } -template -unsigned MultiProcessorPebbling::computeMaxTimeForInitialSolution( - const BspInstance &instance, - const std::vector>> &computeSteps, - const std::vector>> &sendUpSteps, - const std::vector>> &sendDownSteps) const { - if (!restrict_step_types) { +template +unsigned MultiProcessorPebbling::ComputeMaxTimeForInitialSolution( + const BspInstance &instance, + const std::vector>> &computeSteps, + const std::vector>> &sendUpSteps, + const std::vector>> &sendDownSteps) const { + if (!restrictStepTypes_) { return static_cast(computeSteps[0].size()) + 3; } - unsigned step = 0, new_step_idx = 0; + unsigned step = 0, newStepIdx = 0; for (; step < computeSteps[0].size(); ++step) { // align step number with step type cycle's phase, if needed - bool skip_step = true; - while (skip_step) { - skip_step = false; - bool is_compute = false, is_send_up = false, is_send_down = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + bool skipStep = true; + while (skipStep) { + skipStep = false; + bool isCompute = false, isSendUp = false, isSendDown = false; + for (unsigned proc = 0; proc < instance.NumberOfProcessors(); ++proc) { if (!computeSteps[proc][step].empty()) { - is_compute = true; + isCompute = true; } if (!sendUpSteps[proc][step].empty()) { - is_send_up = true; + isSendUp = true; } if (!sendDownSteps[proc][step].empty()) { - is_send_down = true; + isSendDown = true; } } - bool send_up_step_idx - = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1)) - || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle)); - bool send_down_step_idx - = (need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == 0)) - || (!need_to_load_inputs && (new_step_idx % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1)); + bool sendUpStepIdx = (needToLoadInputs_ && (newStepIdx % (computeStepsPerCycle_ + 2) == computeStepsPerCycle_ + 1)) + || (!needToLoadInputs_ && (newStepIdx % (computeStepsPerCycle_ + 2) == computeStepsPerCycle_)); + bool sendDownStepIdx + = (needToLoadInputs_ && (newStepIdx % (computeStepsPerCycle_ + 2) == 0)) + || (!needToLoadInputs_ && (newStepIdx % (computeStepsPerCycle_ + 2) == computeStepsPerCycle_ + 1)); - if (is_compute && (send_up_step_idx || send_down_step_idx)) { - skip_step = true; + if (isCompute && (sendUpStepIdx || sendDownStepIdx)) { + skipStep = true; } - if (is_send_up && !send_up_step_idx) { - skip_step = true; + if (isSendUp && !sendUpStepIdx) { + skipStep = true; } - if (is_send_down && !send_down_step_idx) { - skip_step = true; + if (isSendDown && !sendDownStepIdx) { + skipStep = true; } - if (skip_step) { - ++new_step_idx; + if (skipStep) { + ++newStepIdx; } } - ++new_step_idx; + ++newStepIdx; } - new_step_idx += compute_steps_per_cycle + 2; - return new_step_idx; + newStepIdx += computeStepsPerCycle_ + 2; + return newStepIdx; } -template -bool MultiProcessorPebbling::hasEmptyStep(const BspInstance &instance) { - for (unsigned step = 0; step < max_time; ++step) { +template +bool MultiProcessorPebbling::HasEmptyStep(const BspInstance &instance) { + for (unsigned step = 0; step < maxTime_; ++step) { bool empty = true; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - if ((compute_exists[node][processor][step] && compute[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) - || (send_up_exists[node][processor][step] && send_up[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) - || (send_down_exists[node][processor][step] - && send_down[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99)) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.NumberOfProcessors(); processor++) { + if ((computeExists_[node][processor][step] && compute_[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) + || (sendUpExists_[node][processor][step] && sendUp_[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) + || (sendDownExists_[node][processor][step] && sendDown_[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99)) { empty = false; } } diff --git a/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp b/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp index 0544eaab..db44d0dd 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp @@ -29,96 +29,96 @@ limitations under the License. namespace osp { -template -class PebblingPartialILP : public Scheduler { - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, +template +class PebblingPartialILP : public Scheduler { + static_assert(isComputationalDagV, "PebblingSchedule can only be used with computational DAGs."); + static_assert(std::is_same_v, VCommwT>, "PebblingSchedule requires work and comm. weights to have the same type."); - using vertex_idx = vertex_idx_t; - using cost_type = v_workw_t; + using VertexIdx = VertexIdxT; + using CostType = VWorkwT; - unsigned minPartitionSize = 50, maxPartitionSize = 100; - unsigned time_seconds_for_subILPs = 600; + unsigned minPartitionSize_ = 50, maxPartitionSize_ = 100; + unsigned timeSecondsForSubIlPs_ = 600; - bool asynchronous = false; - bool verbose = false; + bool asynchronous_ = false; + bool verbose_ = false; - std::map, unsigned> part_and_nodetype_to_new_index; + std::map, unsigned> partAndNodeTypeToNewIndex_; public: PebblingPartialILP() {} virtual ~PebblingPartialILP() = default; - RETURN_STATUS computePebbling(PebblingSchedule &schedule); + ReturnStatus ComputePebbling(PebblingSchedule &schedule); // not used, only here for using scheduler class base functionality (status enums, timelimits, etc) - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override; + virtual ReturnStatus ComputeSchedule(BspSchedule &schedule) override; - Graph_t contractByPartition(const BspInstance &instance, const std::vector &node_to_part_assignment); + GraphT ContractByPartition(const BspInstance &instance, const std::vector &nodeToPartAssignment); /** * @brief Get the name of the schedule. * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { return "PebblingPartialILP"; } + virtual std::string GetScheduleName() const override { return "PebblingPartialILP"; } // getters and setters for problem parameters - inline std::pair getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); } + inline std::pair GetMinAndMaxSize() const { return std::make_pair(minPartitionSize_, maxPartitionSize_); } - inline void setMinSize(const unsigned min_size) { - minPartitionSize = min_size; - maxPartitionSize = 2 * min_size; + inline void SetMinSize(const unsigned minSize) { + minPartitionSize_ = minSize; + maxPartitionSize_ = 2 * minSize; } - inline void setMinAndMaxSize(const std::pair min_and_max) { - minPartitionSize = min_and_max.first; - maxPartitionSize = min_and_max.second; + inline void SetMinAndMaxSize(const std::pair minAndMax) { + minPartitionSize_ = minAndMax.first; + maxPartitionSize_ = minAndMax.second; } - inline void setAsync(const bool async_) { asynchronous = async_; } + inline void SetAsync(const bool async) { asynchronous_ = async; } - inline void setSecondsForSubILP(const unsigned seconds_) { time_seconds_for_subILPs = seconds_; } + inline void SetSecondsForSubIlp(const unsigned seconds) { timeSecondsForSubIlPs_ = seconds; } - inline void setVerbose(const bool verbose_) { verbose = verbose_; } + inline void SetVerbose(const bool verbose) { verbose_ = verbose; } }; -template -RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule &schedule) { - const BspInstance &instance = schedule.getInstance(); +template +ReturnStatus PebblingPartialILP::ComputePebbling(PebblingSchedule &schedule) { + const BspInstance &instance = schedule.GetInstance(); - if (!PebblingSchedule::hasValidSolution(instance)) { - return RETURN_STATUS::ERROR; + if (!PebblingSchedule::HasValidSolution(instance)) { + return ReturnStatus::ERROR; } // STEP 1: divide DAG acyclicly with partitioning ILP - AcyclicDagDivider dag_divider; - dag_divider.setMinAndMaxSize({minPartitionSize, maxPartitionSize}); - std::vector assignment_to_parts = dag_divider.computePartitioning(instance); - unsigned nr_parts = *std::max_element(assignment_to_parts.begin(), assignment_to_parts.end()) + 1; + AcyclicDagDivider dagDivider; + dagDivider.SetMinAndMaxSize({minPartitionSize_, maxPartitionSize_}); + std::vector assignmentToParts = dagDivider.ComputePartitioning(instance); + unsigned nrParts = *std::max_element(assignmentToParts.begin(), assignmentToParts.end()) + 1; // TODO remove source nodes before this? - Graph_t contracted_dag = contractByPartition(instance, assignment_to_parts); + GraphT contractedDag = ContractByPartition(instance, assignmentToParts); // STEP 2: develop high-level multischedule on parts - BspInstance contracted_instance( - contracted_dag, instance.getArchitecture(), instance.getNodeProcessorCompatibilityMatrix()); + BspInstance contractedInstance( + contractedDag, instance.GetArchitecture(), instance.GetNodeProcessorCompatibilityMatrix()); - SubproblemMultiScheduling multi_scheduler; - std::vector> processors_to_parts_and_types; - multi_scheduler.computeMultiSchedule(contracted_instance, processors_to_parts_and_types); + SubproblemMultiScheduling multiScheduler; + std::vector> processorsToPartsAndTypes; + multiScheduler.ComputeMultiSchedule(contractedInstance, processorsToPartsAndTypes); - std::vector> processors_to_parts(nr_parts); - for (unsigned part = 0; part < nr_parts; ++part) { - for (unsigned type = 0; type < instance.getComputationalDag().num_vertex_types(); ++type) { - if (part_and_nodetype_to_new_index.find({part, type}) != part_and_nodetype_to_new_index.end()) { - unsigned new_index = part_and_nodetype_to_new_index[{part, type}]; - for (unsigned proc : processors_to_parts_and_types[new_index]) { - processors_to_parts[part].insert(proc); + std::vector> processorsToParts(nrParts); + for (unsigned part = 0; part < nrParts; ++part) { + for (unsigned type = 0; type < instance.GetComputationalDag().NumVertexTypes(); ++type) { + if (partAndNodeTypeToNewIndex_.find({part, type}) != partAndNodeTypeToNewIndex_.end()) { + unsigned newIndex = partAndNodeTypeToNewIndex_[{part, type}]; + for (unsigned proc : processorsToPartsAndTypes[newIndex]) { + processorsToParts[part].insert(proc); } } } @@ -127,61 +127,61 @@ RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule> nodes_in_part(nr_parts), extra_sources(nr_parts); - std::vector> original_node_id(nr_parts); - std::vector> original_proc_id(nr_parts); - for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) { - if (instance.getComputationalDag().in_degree(node) > 0) { - nodes_in_part[assignment_to_parts[node]].insert(node); + std::vector> nodesInPart(nrParts), extraSources(nrParts); + std::vector> originalNodeId(nrParts); + std::vector> originalProcId(nrParts); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); ++node) { + if (instance.GetComputationalDag().InDegree(node) > 0) { + nodesInPart[assignmentToParts[node]].insert(node); } else { - extra_sources[assignment_to_parts[node]].insert(node); + extraSources[assignmentToParts[node]].insert(node); } - for (const vertex_idx &pred : instance.getComputationalDag().parents(node)) { - if (assignment_to_parts[node] != assignment_to_parts[pred]) { - extra_sources[assignment_to_parts[node]].insert(pred); + for (const VertexIdx &pred : instance.GetComputationalDag().Parents(node)) { + if (assignmentToParts[node] != assignmentToParts[pred]) { + extraSources[assignmentToParts[node]].insert(pred); } } } - std::vector subDags; - for (unsigned part = 0; part < nr_parts; ++part) { - Graph_t dag; - create_induced_subgraph(instance.getComputationalDag(), dag, nodes_in_part[part], extra_sources[part]); + std::vector subDags; + for (unsigned part = 0; part < nrParts; ++part) { + GraphT dag; + CreateInducedSubgraph(instance.GetComputationalDag(), dag, nodesInPart[part], extraSources[part]); subDags.push_back(dag); // set source nodes to a new type, so that they are compatible with any processor - unsigned artificial_type_for_sources = subDags.back().num_vertex_types(); - for (vertex_idx node_idx = 0; node_idx < extra_sources[part].size(); ++node_idx) { - subDags.back().set_vertex_type(node_idx, artificial_type_for_sources); + unsigned artificialTypeForSources = subDags.back().NumVertexTypes(); + for (VertexIdx nodeIdx = 0; nodeIdx < extraSources[part].size(); ++nodeIdx) { + subDags.back().SetVertexType(nodeIdx, artificialTypeForSources); } } - std::vector isomorphicTo(nr_parts, UINT_MAX); + std::vector isomorphicTo(nrParts, UINT_MAX); - std::cout << "Number of parts: " << nr_parts << std::endl; + std::cout << "Number of parts: " << nrParts << std::endl; - for (unsigned part = 0; part < nr_parts; ++part) { - for (unsigned other_part = part + 1; other_part < nr_parts; ++other_part) { - if (isomorphicTo[other_part] < UINT_MAX) { + for (unsigned part = 0; part < nrParts; ++part) { + for (unsigned otherPart = part + 1; otherPart < nrParts; ++otherPart) { + if (isomorphicTo[otherPart] < UINT_MAX) { continue; } bool isomorphic = true; - if (!checkOrderedIsomorphism(subDags[part], subDags[other_part])) { + if (!CheckOrderedIsomorphism(subDags[part], subDags[otherPart])) { continue; } - std::vector proc_assigned_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - std::vector other_proc_assigned_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (unsigned proc : processors_to_parts[part]) { - ++proc_assigned_per_type[instance.getArchitecture().processorType(proc)]; + std::vector procAssignedPerType(instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); + std::vector otherProcAssignedPerType(instance.GetArchitecture().GetNumberOfProcessorTypes(), 0); + for (unsigned proc : processorsToParts[part]) { + ++procAssignedPerType[instance.GetArchitecture().ProcessorType(proc)]; } - for (unsigned proc : processors_to_parts[other_part]) { - ++other_proc_assigned_per_type[instance.getArchitecture().processorType(proc)]; + for (unsigned proc : processorsToParts[otherPart]) { + ++otherProcAssignedPerType[instance.GetArchitecture().ProcessorType(proc)]; } - for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) { - if (proc_assigned_per_type[proc_type] != other_proc_assigned_per_type[proc_type]) { + for (unsigned procType = 0; procType < instance.GetArchitecture().GetNumberOfProcessorTypes(); ++procType) { + if (procAssignedPerType[procType] != otherProcAssignedPerType[procType]) { isomorphic = false; } } @@ -190,150 +190,150 @@ RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule> in_fast_mem(instance.numberOfProcessors()); - std::vector> pebbling(nr_parts); - std::vector> subArch(nr_parts); - std::vector> subInstance(nr_parts); + std::vector> inFastMem(instance.NumberOfProcessors()); + std::vector> pebbling(nrParts); + std::vector> subArch(nrParts); + std::vector> subInstance(nrParts); // to handle the initial memory content for isomorphic parts - std::vector>> has_reds_in_beginning( - nr_parts, std::vector>(instance.numberOfProcessors())); + std::vector>> hasRedsInBeginning( + nrParts, std::vector>(instance.NumberOfProcessors())); - for (unsigned part = 0; part < nr_parts; ++part) { + for (unsigned part = 0; part < nrParts; ++part) { std::cout << "part " << part << std::endl; // set up sub-DAG - Graph_t &subDag = subDags[part]; - std::map local_id; - vertex_idx node_idx = 0; - for (vertex_idx node : extra_sources[part]) { - local_id[node] = node_idx; - original_node_id[part][node_idx] = node; - ++node_idx; + GraphT &subDag = subDags[part]; + std::map localId; + VertexIdx nodeIdx = 0; + for (VertexIdx node : extraSources[part]) { + localId[node] = nodeIdx; + originalNodeId[part][nodeIdx] = node; + ++nodeIdx; } - for (vertex_idx node : nodes_in_part[part]) { - local_id[node] = node_idx; - original_node_id[part][node_idx] = node; - ++node_idx; + for (VertexIdx node : nodesInPart[part]) { + localId[node] = nodeIdx; + originalNodeId[part][nodeIdx] = node; + ++nodeIdx; } - std::set needs_blue_at_end; - for (vertex_idx node : nodes_in_part[part]) { - for (const vertex_idx &succ : instance.getComputationalDag().children(node)) { - if (assignment_to_parts[node] != assignment_to_parts[succ]) { - needs_blue_at_end.insert(local_id[node]); + std::set needsBlueAtEnd; + for (VertexIdx node : nodesInPart[part]) { + for (const VertexIdx &succ : instance.GetComputationalDag().Children(node)) { + if (assignmentToParts[node] != assignmentToParts[succ]) { + needsBlueAtEnd.insert(localId[node]); } } - if (instance.getComputationalDag().out_degree(node) == 0) { - needs_blue_at_end.insert(local_id[node]); + if (instance.GetComputationalDag().OutDegree(node) == 0) { + needsBlueAtEnd.insert(localId[node]); } } // set up sub-architecture - subArch[part].setNumberOfProcessors(static_cast(processors_to_parts[part].size())); - unsigned proc_index = 0; - for (unsigned proc : processors_to_parts[part]) { - subArch[part].setProcessorType(proc_index, instance.getArchitecture().processorType(proc)); - subArch[part].setMemoryBound(instance.getArchitecture().memoryBound(proc), proc_index); - original_proc_id[part][proc_index] = proc; - ++proc_index; + subArch[part].SetNumberOfProcessors(static_cast(processorsToParts[part].size())); + unsigned procIndex = 0; + for (unsigned proc : processorsToParts[part]) { + subArch[part].SetProcessorType(procIndex, instance.GetArchitecture().ProcessorType(proc)); + subArch[part].SetMemoryBound(instance.GetArchitecture().MemoryBound(proc), procIndex); + originalProcId[part][procIndex] = proc; + ++procIndex; } - subArch[part].setCommunicationCosts(instance.getArchitecture().communicationCosts()); - subArch[part].setSynchronisationCosts(instance.getArchitecture().synchronisationCosts()); + subArch[part].SetCommunicationCosts(instance.GetArchitecture().CommunicationCosts()); + subArch[part].SetSynchronisationCosts(instance.GetArchitecture().SynchronisationCosts()); // no NUMA parameters for now // skip if isomorphic to previous part if (isomorphicTo[part] < UINT_MAX) { pebbling[part] = pebbling[isomorphicTo[part]]; - has_reds_in_beginning[part] = has_reds_in_beginning[isomorphicTo[part]]; + hasRedsInBeginning[part] = hasRedsInBeginning[isomorphicTo[part]]; continue; } // set node-processor compatibility matrix - std::vector> comp_matrix = instance.getNodeProcessorCompatibilityMatrix(); - comp_matrix.emplace_back(instance.getArchitecture().getNumberOfProcessorTypes(), true); - subInstance[part] = BspInstance(subDag, subArch[part], comp_matrix); + std::vector> compMatrix = instance.GetNodeProcessorCompatibilityMatrix(); + compMatrix.emplace_back(instance.GetArchitecture().GetNumberOfProcessorTypes(), true); + subInstance[part] = BspInstance(subDag, subArch[part], compMatrix); // currently we only allow the input laoding scenario - the case where this is false is unmaintained/untested - bool need_to_load_inputs = true; + bool needToLoadInputs = true; // keep in fast memory what's relevant, remove the rest - for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) { - has_reds_in_beginning[part][proc].clear(); - std::set new_content_fast_mem; - for (vertex_idx node : in_fast_mem[original_proc_id[part][proc]]) { - if (local_id.find(node) != local_id.end()) { - has_reds_in_beginning[part][proc].insert(local_id[node]); - new_content_fast_mem.insert(node); + for (unsigned proc = 0; proc < processorsToParts[part].size(); ++proc) { + hasRedsInBeginning[part][proc].clear(); + std::set newContentFastMem; + for (VertexIdx node : inFastMem[originalProcId[part][proc]]) { + if (localId.find(node) != localId.end()) { + hasRedsInBeginning[part][proc].insert(localId[node]); + newContentFastMem.insert(node); } } - in_fast_mem[original_proc_id[part][proc]] = new_content_fast_mem; + inFastMem[originalProcId[part][proc]] = newContentFastMem; } // heuristic solution for baseline - PebblingSchedule heuristic_pebbling; - GreedyBspScheduler greedy_scheduler; - BspSchedule bsp_heuristic(subInstance[part]); - greedy_scheduler.computeSchedule(bsp_heuristic); - - std::set extra_source_ids; - for (vertex_idx idx = 0; idx < extra_sources[part].size(); ++idx) { - extra_source_ids.insert(idx); + PebblingSchedule heuristicPebbling; + GreedyBspScheduler greedyScheduler; + BspSchedule bspHeuristic(subInstance[part]); + greedyScheduler.ComputeSchedule(bspHeuristic); + + std::set extraSourceIds; + for (VertexIdx idx = 0; idx < extraSources[part].size(); ++idx) { + extraSourceIds.insert(idx); } - heuristic_pebbling.setNeedToLoadInputs(true); - heuristic_pebbling.SetExternalSources(extra_source_ids); - heuristic_pebbling.SetNeedsBlueAtEnd(needs_blue_at_end); - heuristic_pebbling.SetHasRedInBeginning(has_reds_in_beginning[part]); - heuristic_pebbling.ConvertFromBsp(bsp_heuristic, PebblingSchedule::CACHE_EVICTION_STRATEGY::FORESIGHT); + heuristicPebbling.SetNeedToLoadInputs(true); + heuristicPebbling.SetExternalSources(extraSourceIds); + heuristicPebbling.SetNeedsBlueAtEnd(needsBlueAtEnd); + heuristicPebbling.SetHasRedInBeginning(hasRedsInBeginning[part]); + heuristicPebbling.ConvertFromBsp(bspHeuristic, PebblingSchedule::CacheEvictionStrategy::FORESIGHT); - heuristic_pebbling.removeEvictStepsFromEnd(); - pebbling[part] = heuristic_pebbling; - cost_type heuristicCost = asynchronous ? heuristic_pebbling.computeAsynchronousCost() : heuristic_pebbling.computeCost(); + heuristicPebbling.RemoveEvictStepsFromEnd(); + pebbling[part] = heuristicPebbling; + CostType heuristicCost = asynchronous_ ? heuristicPebbling.ComputeAsynchronousCost() : heuristicPebbling.ComputeCost(); - if (!heuristic_pebbling.isValid()) { + if (!heuristicPebbling.IsValid()) { std::cout << "ERROR: Pebbling heuristic INVALID!" << std::endl; } // solution with subILP - MultiProcessorPebbling mpp; - mpp.setVerbose(verbose); - mpp.setTimeLimitSeconds(time_seconds_for_subILPs); - mpp.setMaxTime(2 * maxPartitionSize); // just a heuristic choice, does not guarantee feasibility! - mpp.setNeedsBlueAtEnd(needs_blue_at_end); - mpp.setNeedToLoadInputs(need_to_load_inputs); - mpp.setHasRedInBeginning(has_reds_in_beginning[part]); - - PebblingSchedule pebblingILP(subInstance[part]); - RETURN_STATUS status = mpp.computePebblingWithInitialSolution(heuristic_pebbling, pebblingILP, asynchronous); - if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) { - if (!pebblingILP.isValid()) { + MultiProcessorPebbling mpp; + mpp.SetVerbose(verbose_); + mpp.SetTimeLimitSeconds(timeSecondsForSubIlPs_); + mpp.SetMaxTime(2 * maxPartitionSize_); // just a heuristic choice, does not guarantee feasibility! + mpp.SetNeedsBlueAtEnd(needsBlueAtEnd); + mpp.SetNeedToLoadInputs(needToLoadInputs); + mpp.SetHasRedInBeginning(hasRedsInBeginning[part]); + + PebblingSchedule pebblingILP(subInstance[part]); + ReturnStatus status = mpp.ComputePebblingWithInitialSolution(heuristicPebbling, pebblingILP, asynchronous_); + if (status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND) { + if (!pebblingILP.IsValid()) { std::cout << "ERROR: Pebbling ILP INVALID!" << std::endl; } - pebblingILP.removeEvictStepsFromEnd(); - cost_type ILP_cost = asynchronous ? pebblingILP.computeAsynchronousCost() : pebblingILP.computeCost(); - if (ILP_cost < heuristicCost) { + pebblingILP.RemoveEvictStepsFromEnd(); + CostType ilpCost = asynchronous_ ? pebblingILP.ComputeAsynchronousCost() : pebblingILP.ComputeCost(); + if (ilpCost < heuristicCost) { pebbling[part] = pebblingILP; - std::cout << "ILP chosen instead of greedy. (" << ILP_cost << " < " << heuristicCost << ")" << std::endl; + std::cout << "ILP chosen instead of greedy. (" << ilpCost << " < " << heuristicCost << ")" << std::endl; } else { - std::cout << "Greedy chosen instead of ILP. (" << heuristicCost << " < " << ILP_cost << ")" << std::endl; + std::cout << "Greedy chosen instead of ILP. (" << heuristicCost << " < " << ilpCost << ")" << std::endl; } // save fast memory content for next subproblem - std::vector> fast_mem_content_at_end = pebbling[part].getMemContentAtEnd(); - for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) { - in_fast_mem[original_proc_id[part][proc]].clear(); - for (vertex_idx node : fast_mem_content_at_end[proc]) { - in_fast_mem[original_proc_id[part][proc]].insert(original_node_id[part][node]); + std::vector> fastMemContentAtEnd = pebbling[part].GetMemContentAtEnd(); + for (unsigned proc = 0; proc < processorsToParts[part].size(); ++proc) { + inFastMem[originalProcId[part][proc]].clear(); + for (VertexIdx node : fastMemContentAtEnd[proc]) { + inFastMem[originalProcId[part][proc]].insert(originalNodeId[part][node]); } } } else { @@ -342,65 +342,61 @@ RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule -Graph_t PebblingPartialILP::contractByPartition(const BspInstance &instance, - const std::vector &node_to_part_assignment) { - const auto &G = instance.getComputationalDag(); +template +GraphT PebblingPartialILP::ContractByPartition(const BspInstance &instance, + const std::vector &nodeToPartAssignment) { + const auto &g = instance.GetComputationalDag(); - part_and_nodetype_to_new_index.clear(); + partAndNodeTypeToNewIndex_.clear(); - unsigned nr_new_nodes = 0; - for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) { - if (part_and_nodetype_to_new_index.find({node_to_part_assignment[node], G.vertex_type(node)}) - == part_and_nodetype_to_new_index.end()) { - part_and_nodetype_to_new_index[{node_to_part_assignment[node], G.vertex_type(node)}] = nr_new_nodes; - ++nr_new_nodes; + unsigned nrNewNodes = 0; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); ++node) { + if (partAndNodeTypeToNewIndex_.find({nodeToPartAssignment[node], g.VertexType(node)}) == partAndNodeTypeToNewIndex_.end()) { + partAndNodeTypeToNewIndex_[{nodeToPartAssignment[node], g.VertexType(node)}] = nrNewNodes; + ++nrNewNodes; } } - Graph_t contracted; - for (vertex_idx node = 0; node < nr_new_nodes; ++node) { - contracted.add_vertex(0, 0, 0); + GraphT contracted; + for (VertexIdx node = 0; node < nrNewNodes; ++node) { + contracted.AddVertex(0, 0, 0); } - std::set> edges; + std::set> edges; - for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) { - vertex_idx node_new_index = part_and_nodetype_to_new_index[{node_to_part_assignment[node], G.vertex_type(node)}]; - for (const vertex_idx &succ : instance.getComputationalDag().children(node)) { - if (node_to_part_assignment[node] != node_to_part_assignment[succ]) { - edges.emplace(node_new_index, part_and_nodetype_to_new_index[{node_to_part_assignment[succ], G.vertex_type(succ)}]); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); ++node) { + VertexIdx nodeNewIndex = partAndNodeTypeToNewIndex_[{nodeToPartAssignment[node], g.VertexType(node)}]; + for (const VertexIdx &succ : instance.GetComputationalDag().Children(node)) { + if (nodeToPartAssignment[node] != nodeToPartAssignment[succ]) { + edges.emplace(nodeNewIndex, partAndNodeTypeToNewIndex_[{nodeToPartAssignment[succ], g.VertexType(succ)}]); } } - contracted.set_vertex_work_weight(node_new_index, - contracted.vertex_work_weight(node_new_index) + G.vertex_work_weight(node)); - contracted.set_vertex_comm_weight(node_new_index, - contracted.vertex_comm_weight(node_new_index) + G.vertex_comm_weight(node)); - contracted.set_vertex_mem_weight(node_new_index, contracted.vertex_mem_weight(node_new_index) + G.vertex_mem_weight(node)); - contracted.set_vertex_type(node_new_index, G.vertex_type(node)); + contracted.SetVertexWorkWeight(nodeNewIndex, contracted.VertexWorkWeight(nodeNewIndex) + g.VertexWorkWeight(node)); + contracted.SetVertexCommWeight(nodeNewIndex, contracted.VertexCommWeight(nodeNewIndex) + g.VertexCommWeight(node)); + contracted.SetVertexMemWeight(nodeNewIndex, contracted.VertexMemWeight(nodeNewIndex) + g.VertexMemWeight(node)); + contracted.SetVertexType(nodeNewIndex, g.VertexType(node)); } for (auto edge : edges) { - if constexpr (has_edge_weights_v) { - contracted.add_edge(edge.first, edge.second, 1); + if constexpr (hasEdgeWeightsV) { + contracted.AddEdge(edge.first, edge.second, 1); } else { - contracted.add_edge(edge.first, edge.second); + contracted.AddEdge(edge.first, edge.second); } } return contracted; } -template -RETURN_STATUS PebblingPartialILP::computeSchedule(BspSchedule &) { - return RETURN_STATUS::ERROR; +template +ReturnStatus PebblingPartialILP::ComputeSchedule(BspSchedule &) { + return ReturnStatus::ERROR; } } // namespace osp diff --git a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp index f969be72..6ab5534b 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp @@ -26,281 +26,281 @@ limitations under the License. namespace osp { -template +template class AcyclicDagDivider { - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); + static_assert(isComputationalDagV, "PebblingSchedule can only be used with computational DAGs."); protected: - using vertex_idx = vertex_idx_t; + using VertexIdx = VertexIdxT; - unsigned minPartitionSize = 40, maxPartitionSize = 80; - bool ignore_sources_in_size = true; + unsigned minPartitionSize_ = 40, maxPartitionSize_ = 80; + bool ignoreSourcesInSize_ = true; - std::vector getTopologicalSplit(const Graph_t &G, - std::pair min_and_max, - const std::vector &is_original_source) const; + std::vector GetTopologicalSplit(const GraphT &g, + std::pair minAndMax, + const std::vector &isOriginalSource) const; - v_commw_t static getSplitCost(const Graph_t &G, const std::vector &node_to_part); + VCommwT static GetSplitCost(const GraphT &g, const std::vector &nodeToPart); public: AcyclicDagDivider() {} virtual ~AcyclicDagDivider() = default; - std::vector computePartitioning(const BspInstance &instance); + std::vector ComputePartitioning(const BspInstance &instance); // getters and setters for problem parameters - inline std::pair getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); } + inline std::pair GetMinAndMaxSize() const { return std::make_pair(minPartitionSize_, maxPartitionSize_); } - inline void setMinAndMaxSize(const std::pair min_and_max) { - minPartitionSize = min_and_max.first; - maxPartitionSize = min_and_max.second; + inline void SetMinAndMaxSize(const std::pair minAndMax) { + minPartitionSize_ = minAndMax.first; + maxPartitionSize_ = minAndMax.second; } - inline void setIgnoreSources(const bool ignore_) { ignore_sources_in_size = ignore_; } + inline void SetIgnoreSources(const bool ignore) { ignoreSourcesInSize_ = ignore; } }; -template -std::vector AcyclicDagDivider::computePartitioning(const BspInstance &instance) { - const unsigned N = static_cast(instance.numberOfVertices()); +template +std::vector AcyclicDagDivider::ComputePartitioning(const BspInstance &instance) { + const unsigned n = static_cast(instance.NumberOfVertices()); // split to connected components first - ConnectedComponentDivider connected_comp; - connected_comp.divide(instance.getComputationalDag()); - - std::vector subDags = connected_comp.get_sub_dags(); - std::vector> node_to_subdag_and_index(N); - std::vector> original_id(subDags.size()); - for (vertex_idx node = 0; node < N; ++node) { - node_to_subdag_and_index[node] = {connected_comp.get_component()[node], connected_comp.get_vertex_map()[node]}; - original_id[connected_comp.get_component()[node]].push_back(node); + ConnectedComponentDivider connectedComp; + connectedComp.Divide(instance.GetComputationalDag()); + + std::vector subDags = connectedComp.GetSubDags(); + std::vector> nodeToSubdagAndIndex(n); + std::vector> originalId(subDags.size()); + for (VertexIdx node = 0; node < n; ++node) { + nodeToSubdagAndIndex[node] = {connectedComp.GetComponent()[node], connectedComp.GetVertexMap()[node]}; + originalId[connectedComp.GetComponent()[node]].push_back(node); } // TODO extend with splits at directed articulation points in future? // split components further with ILPs or heuristics while (true) { - bool exists_too_large = false; - std::vector dag_is_too_large(subDags.size(), false); - std::vector dag_real_size(subDags.size(), 0); + bool existsTooLarge = false; + std::vector dagIsTooLarge(subDags.size(), false); + std::vector dagRealSize(subDags.size(), 0); for (unsigned idx = 0; idx < subDags.size(); ++idx) { - const Graph_t &dag = subDags[idx]; - if (!ignore_sources_in_size) { - dag_real_size[idx] = static_cast(dag.num_vertices()); - if (dag.num_vertices() > maxPartitionSize) { - dag_is_too_large[idx] = true; - exists_too_large = true; + const GraphT &dag = subDags[idx]; + if (!ignoreSourcesInSize_) { + dagRealSize[idx] = static_cast(dag.NumVertices()); + if (dag.NumVertices() > maxPartitionSize_) { + dagIsTooLarge[idx] = true; + existsTooLarge = true; } } else { - for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { - if (instance.getComputationalDag().in_degree(original_id[idx][local_ID]) > 0) { - ++dag_real_size[idx]; + for (VertexIdx localId = 0; localId < dag.NumVertices(); ++localId) { + if (instance.GetComputationalDag().InDegree(originalId[idx][localId]) > 0) { + ++dagRealSize[idx]; } } } - if (dag_real_size[idx] > maxPartitionSize) { - dag_is_too_large[idx] = true; - exists_too_large = true; + if (dagRealSize[idx] > maxPartitionSize_) { + dagIsTooLarge[idx] = true; + existsTooLarge = true; } } - if (!exists_too_large) { + if (!existsTooLarge) { break; } - std::vector newDagList; - std::vector> original_id_updated; + std::vector newDagList; + std::vector> originalIdUpdated; for (unsigned idx = 0; idx < subDags.size(); ++idx) { - const Graph_t &dag = subDags[idx]; - if (!dag_is_too_large[idx]) { - for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { - node_to_subdag_and_index[original_id[idx][local_ID]].first = static_cast(newDagList.size()); + const GraphT &dag = subDags[idx]; + if (!dagIsTooLarge[idx]) { + for (VertexIdx localId = 0; localId < dag.NumVertices(); ++localId) { + nodeToSubdagAndIndex[originalId[idx][localId]].first = static_cast(newDagList.size()); } - original_id_updated.push_back(original_id[idx]); + originalIdUpdated.push_back(originalId[idx]); newDagList.push_back(dag); } else { - std::vector ILP_assignment; + std::vector ilpAssignment; // unsigned newMin = dag_real_size[idx]/3, minPartitionSize); minimum condition removed - it can cause very strict bisections - unsigned newMin = dag_real_size[idx] / 3; - unsigned newMax = dag_real_size[idx] - newMin; + unsigned newMin = dagRealSize[idx] / 3; + unsigned newMax = dagRealSize[idx] - newMin; // mark the source nodes of the original DAG - std::vector is_original_source(dag.num_vertices()); - for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { - is_original_source[local_ID] = (instance.getComputationalDag().in_degree(original_id[idx][local_ID]) == 0); + std::vector isOriginalSource(dag.NumVertices()); + for (VertexIdx localId = 0; localId < dag.NumVertices(); ++localId) { + isOriginalSource[localId] = (instance.GetComputationalDag().InDegree(originalId[idx][localId]) == 0); } // heuristic splitting - std::vector heuristic_assignment = getTopologicalSplit(dag, {newMin, newMax}, is_original_source); - unsigned heuristicCost = getSplitCost(dag, heuristic_assignment); - unsigned ILPCost = UINT_MAX; + std::vector heuristicAssignment = GetTopologicalSplit(dag, {newMin, newMax}, isOriginalSource); + unsigned heuristicCost = GetSplitCost(dag, heuristicAssignment); + unsigned ilpCost = UINT_MAX; // ILP-based splitting - AcyclicPartitioningILP partitioner; - partitioner.setTimeLimitSeconds(120); - partitioner.setMinAndMaxSize({newMin, newMax}); - partitioner.setIsOriginalSource(is_original_source); - partitioner.setNumberOfParts(2); // note - if set to more than 2, ILP is MUCH more inefficient - BspInstance partial_instance(dag, instance.getArchitecture(), instance.getNodeProcessorCompatibilityMatrix()); - RETURN_STATUS status = partitioner.computePartitioning(partial_instance, ILP_assignment); - if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) { - ILPCost = getSplitCost(dag, ILP_assignment); + AcyclicPartitioningILP partitioner; + partitioner.SetTimeLimitSeconds(120); + partitioner.SetMinAndMaxSize({newMin, newMax}); + partitioner.SetIsOriginalSource(isOriginalSource); + partitioner.SetNumberOfParts(2); // note - if set to more than 2, ILP is MUCH more inefficient + BspInstance partialInstance(dag, instance.GetArchitecture(), instance.GetNodeProcessorCompatibilityMatrix()); + ReturnStatus status = partitioner.ComputePartitioning(partialInstance, ilpAssignment); + if (status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND) { + ilpCost = GetSplitCost(dag, ilpAssignment); } - std::vector assignment = ILPCost < heuristicCost ? ILP_assignment : heuristic_assignment; + std::vector assignment = ilpCost < heuristicCost ? ilpAssignment : heuristicAssignment; // split DAG according to labels - std::vector splitDags = create_induced_subgraphs(dag, assignment); - /*std::cout<<"SPLIT DONE: "< splitDags = CreateInducedSubgraphs(dag, assignment); + /*std::cout<<"SPLIT DONE: "< node_idx_in_new_subDag(dag.num_vertices()); - std::vector nr_nodes_in_new_subDag(splitDags.size(), 0); - for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { - node_idx_in_new_subDag[local_ID] = nr_nodes_in_new_subDag[assignment[local_ID]]; - ++nr_nodes_in_new_subDag[assignment[local_ID]]; + std::vector nodeIdxInNewSubDag(dag.NumVertices()); + std::vector nrNodesInNewSubDag(splitDags.size(), 0); + for (VertexIdx localId = 0; localId < dag.NumVertices(); ++localId) { + nodeIdxInNewSubDag[localId] = nrNodesInNewSubDag[assignment[localId]]; + ++nrNodesInNewSubDag[assignment[localId]]; } - for (auto next_dag : splitDags) { - original_id_updated.emplace_back(next_dag.num_vertices()); + for (auto nextDag : splitDags) { + originalIdUpdated.emplace_back(nextDag.NumVertices()); } - for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { - node_to_subdag_and_index[original_id[idx][local_ID]] - = {newDagList.size() + assignment[local_ID], node_idx_in_new_subDag[local_ID]}; - original_id_updated[newDagList.size() + assignment[local_ID]][node_idx_in_new_subDag[local_ID]] - = original_id[idx][local_ID]; + for (VertexIdx localId = 0; localId < dag.NumVertices(); ++localId) { + nodeToSubdagAndIndex[originalId[idx][localId]] + = {newDagList.size() + assignment[localId], nodeIdxInNewSubDag[localId]}; + originalIdUpdated[newDagList.size() + assignment[localId]][nodeIdxInNewSubDag[localId]] + = originalId[idx][localId]; } - for (auto next_dag : splitDags) { - newDagList.push_back(next_dag); + for (auto nextDag : splitDags) { + newDagList.push_back(nextDag); } } } subDags = newDagList; - original_id = original_id_updated; + originalId = originalIdUpdated; } // output final cost - std::vector final_assignment(N); - for (vertex_idx node = 0; node < N; ++node) { - final_assignment[node] = node_to_subdag_and_index[node].first; + std::vector finalAssignment(n); + for (VertexIdx node = 0; node < n; ++node) { + finalAssignment[node] = nodeToSubdagAndIndex[node].first; } - std::cout << "Final cut cost of acyclic DAG divider is " << getSplitCost(instance.getComputationalDag(), final_assignment) + std::cout << "Final cut cost of acyclic DAG divider is " << GetSplitCost(instance.GetComputationalDag(), finalAssignment) << std::endl; - return final_assignment; + return finalAssignment; } -template -std::vector AcyclicDagDivider::getTopologicalSplit(const Graph_t &G, - std::pair min_and_max, - const std::vector &is_original_source) const { - std::vector node_to_part(G.num_vertices()); +template +std::vector AcyclicDagDivider::GetTopologicalSplit(const GraphT &g, + std::pair minAndMax, + const std::vector &isOriginalSource) const { + std::vector nodeToPart(g.NumVertices()); - std::vector top_order = GetTopOrder(G); - std::vector top_order_idx(G.num_vertices()); - for (unsigned idx = 0; idx < G.num_vertices(); ++idx) { - top_order_idx[top_order[idx]] = idx; + std::vector topOrder = GetTopOrder(g); + std::vector topOrderIdx(g.NumVertices()); + for (unsigned idx = 0; idx < g.NumVertices(); ++idx) { + topOrderIdx[topOrder[idx]] = idx; } - std::vector last_node_idx_in_hyperedge(G.num_vertices()); - for (unsigned node = 0; node < G.num_vertices(); ++node) { - last_node_idx_in_hyperedge[node] = top_order_idx[node]; - for (const auto &succ : G.children(node)) { - last_node_idx_in_hyperedge[node] = std::max(last_node_idx_in_hyperedge[node], top_order_idx[succ]); + std::vector lastNodeIdxInHyperedge(g.NumVertices()); + for (unsigned node = 0; node < g.NumVertices(); ++node) { + lastNodeIdxInHyperedge[node] = topOrderIdx[node]; + for (const auto &succ : g.Children(node)) { + lastNodeIdxInHyperedge[node] = std::max(lastNodeIdxInHyperedge[node], topOrderIdx[succ]); } } unsigned index = 0; - unsigned current_part_id = 0; - - unsigned nodes_remaining = static_cast(G.num_vertices()); - if (ignore_sources_in_size) { - nodes_remaining = 0; - for (unsigned node = 0; node < G.num_vertices(); ++node) { - if (!is_original_source[node]) { - ++nodes_remaining; + unsigned currentPartId = 0; + + unsigned nodesRemaining = static_cast(g.NumVertices()); + if (ignoreSourcesInSize_) { + nodesRemaining = 0; + for (unsigned node = 0; node < g.NumVertices(); ++node) { + if (!isOriginalSource[node]) { + ++nodesRemaining; } } } - while (nodes_remaining > min_and_max.second) { - unsigned best_cost = UINT_MAX; - unsigned best_end = index; + while (nodesRemaining > minAndMax.second) { + unsigned bestCost = UINT_MAX; + unsigned bestEnd = index; unsigned end; - unsigned newly_added_nodes = 0; - for (end = index + 1; index < G.num_vertices() && newly_added_nodes < min_and_max.first; ++end) { - if (!ignore_sources_in_size || !is_original_source[end]) { - ++newly_added_nodes; + unsigned newlyAddedNodes = 0; + for (end = index + 1; index < g.NumVertices() && newlyAddedNodes < minAndMax.first; ++end) { + if (!ignoreSourcesInSize_ || !isOriginalSource[end]) { + ++newlyAddedNodes; } } - while (end < G.num_vertices() && newly_added_nodes < min_and_max.second) { - unsigned extra_cost = 0; + while (end < g.NumVertices() && newlyAddedNodes < minAndMax.second) { + unsigned extraCost = 0; // check the extra cut cost of the potential endpoint - for (unsigned top_order_pos = index; top_order_pos <= end; ++top_order_pos) { - vertex_idx node = top_order[top_order_pos]; - if (last_node_idx_in_hyperedge[node] > end) { - extra_cost += G.vertex_comm_weight(node); + for (unsigned topOrderPos = index; topOrderPos <= end; ++topOrderPos) { + VertexIdx node = topOrder[topOrderPos]; + if (lastNodeIdxInHyperedge[node] > end) { + extraCost += g.VertexCommWeight(node); } - for (const auto &pred : G.parents(node)) { - if (last_node_idx_in_hyperedge[pred] > end) { - extra_cost += G.vertex_comm_weight(pred); + for (const auto &pred : g.Parents(node)) { + if (lastNodeIdxInHyperedge[pred] > end) { + extraCost += g.VertexCommWeight(pred); } } } - if (extra_cost < best_cost) { - best_cost = extra_cost; - best_end = end; + if (extraCost < bestCost) { + bestCost = extraCost; + bestEnd = end; } ++end; - if (!ignore_sources_in_size || !is_original_source[end]) { - ++newly_added_nodes; + if (!ignoreSourcesInSize_ || !isOriginalSource[end]) { + ++newlyAddedNodes; } } - for (vertex_idx idx = index; idx <= best_end; ++idx) { - node_to_part[top_order[idx]] = current_part_id; - if (!ignore_sources_in_size || !is_original_source[idx]) { - --nodes_remaining; + for (VertexIdx idx = index; idx <= bestEnd; ++idx) { + nodeToPart[topOrder[idx]] = currentPartId; + if (!ignoreSourcesInSize_ || !isOriginalSource[idx]) { + --nodesRemaining; } } - index = best_end + 1; - ++current_part_id; + index = bestEnd + 1; + ++currentPartId; } // remaining nodes go into last part - for (vertex_idx idx = index; idx < G.num_vertices(); ++idx) { - node_to_part[top_order[idx]] = current_part_id; + for (VertexIdx idx = index; idx < g.NumVertices(); ++idx) { + nodeToPart[topOrder[idx]] = currentPartId; } - return node_to_part; + return nodeToPart; } -template -v_commw_t AcyclicDagDivider::getSplitCost(const Graph_t &G, const std::vector &node_to_part) { - v_commw_t cost = 0; +template +VCommwT AcyclicDagDivider::GetSplitCost(const GraphT &g, const std::vector &nodeToPart) { + VCommwT cost = 0; - for (vertex_idx node = 0; node < G.num_vertices(); ++node) { - std::set parts_included; - parts_included.insert(node_to_part[node]); - for (const auto &succ : G.children(node)) { - parts_included.insert(node_to_part[succ]); + for (VertexIdx node = 0; node < g.NumVertices(); ++node) { + std::set partsIncluded; + partsIncluded.insert(nodeToPart[node]); + for (const auto &succ : g.Children(node)) { + partsIncluded.insert(nodeToPart[succ]); } - cost += static_cast>(parts_included.size() - 1) * G.vertex_comm_weight(node); + cost += static_cast>(partsIncluded.size() - 1) * g.VertexCommWeight(node); } return cost; diff --git a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp index 2faaeb81..e2bd1268 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp @@ -24,66 +24,62 @@ limitations under the License. namespace osp { -template +template class AcyclicPartitioningILP { - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); + static_assert(isComputationalDagV, "PebblingSchedule can only be used with computational DAGs."); private: - using vertex_idx = vertex_idx_t; - using commweight_type = v_commw_t; + using VertexIdx = VertexIdxT; + using commweight_type = VCommwT; - Model model; + Model model_; - bool write_solutions_found; - bool ignore_sources_for_constraint = true; + bool writeSolutionsFound_; + bool ignoreSourcesForConstraint_ = true; class WriteSolutionCallback : public CallbackBase { private: - unsigned counter; - unsigned max_number_solution; + unsigned counter_; + unsigned maxNumberSolution_; - double best_obj; + double bestObj_; public: WriteSolutionCallback() - : counter(0), - max_number_solution(500), - best_obj(COPT_INFINITY), - write_solutions_path_cb(""), - solution_file_prefix_cb("") {} + : counter_(0), maxNumberSolution_(500), bestObj_(COPT_INFINITY), writeSolutionsPathCb_(""), solutionFilePrefixCb_("") {} - std::string write_solutions_path_cb; - std::string solution_file_prefix_cb; + std::string writeSolutionsPathCb_; + std::string solutionFilePrefixCb_; void callback() override; }; - WriteSolutionCallback solution_callback; + WriteSolutionCallback solutionCallback_; - unsigned numberOfParts = 0; + unsigned numberOfParts_ = 0; - std::vector is_original_source; + std::vector isOriginalSource_; - unsigned time_limit_seconds; + unsigned timeLimitSeconds_; protected: - std::vector node_in_partition; - std::vector hyperedge_intersects_partition; + std::vector nodeInPartition_; + std::vector hyperedgeIntersectsPartition_; - unsigned minPartitionSize = 500, maxPartitionSize = 1400; + unsigned minPartitionSize_ = 500, maxPartitionSize_ = 1400; - std::vector returnAssignment(const BspInstance &instance); + std::vector ReturnAssignment(const BspInstance &instance); - void setupVariablesConstraintsObjective(const BspInstance &instance); + void SetupVariablesConstraintsObjective(const BspInstance &instance); - void solveILP(); + void SolveIlp(); public: - AcyclicPartitioningILP() : model(COPTEnv::getInstance().CreateModel("AsyncPart")), write_solutions_found(false) {} + AcyclicPartitioningILP() : model_(COPTEnv::GetInstance().CreateModel("AsyncPart")), writeSolutionsFound_(false) {} virtual ~AcyclicPartitioningILP() = default; - RETURN_STATUS computePartitioning(const BspInstance &instance, std::vector &partitioning); + ReturnStatus ComputePartitioning(const BspInstance &instance, std::vector &partitioning); /** * @brief Enables writing intermediate solutions. @@ -96,10 +92,10 @@ class AcyclicPartitioningILP { * @param path The path where the solutions will be written. * @param file_prefix The prefix that will be used for the solution files. */ - inline void enableWriteIntermediateSol(std::string path, std::string file_prefix) { - write_solutions_found = true; - solution_callback.write_solutions_path_cb = path; - solution_callback.solution_file_prefix_cb = file_prefix; + inline void EnableWriteIntermediateSol(std::string path, std::string filePrefix) { + writeSolutionsFound_ = true; + solutionCallback_.writeSolutionsPathCb_ = path; + solutionCallback_.solutionFilePrefixCb_ = filePrefix; } /** @@ -109,184 +105,183 @@ class AcyclicPartitioningILP { * calling this function, the `enableWriteIntermediateSol` function needs * to be called again in order to enable writing of intermediate solutions. */ - inline void disableWriteIntermediateSol() { write_solutions_found = false; } + inline void DisableWriteIntermediateSol() { writeSolutionsFound_ = false; } /** * @brief Get the best gap found by the solver. * * @return The best gap found by the solver. */ - inline double bestGap() { return model.GetDblAttr(COPT_DBLATTR_BESTGAP); } + inline double BestGap() { return model_.GetDblAttr(COPT_DBLATTR_BESTGAP); } /** * @brief Get the best objective value found by the solver. * * @return The best objective value found by the solver. */ - inline double bestObjective() { return model.GetDblAttr(COPT_DBLATTR_BESTOBJ); } + inline double BestObjective() { return model_.GetDblAttr(COPT_DBLATTR_BESTOBJ); } /** * @brief Get the best bound found by the solver. * * @return The best bound found by the solver. */ - inline double bestBound() { return model.GetDblAttr(COPT_DBLATTR_BESTBND); } + inline double BestBound() { return model_.GetDblAttr(COPT_DBLATTR_BESTBND); } /** * @brief Get the name of the schedule. * * @return The name of the schedule. */ - virtual std::string getScheduleName() const { return "AcyclicPartitioningILP"; } + virtual std::string GetScheduleName() const { return "AcyclicPartitioningILP"; } // getters and setters for problem parameters - inline std::pair getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); } + inline std::pair GetMinAndMaxSize() const { return std::make_pair(minPartitionSize_, maxPartitionSize_); } - inline void setMinAndMaxSize(const std::pair min_and_max) { - minPartitionSize = min_and_max.first; - maxPartitionSize = min_and_max.second; + inline void SetMinAndMaxSize(const std::pair minAndMax) { + minPartitionSize_ = minAndMax.first; + maxPartitionSize_ = minAndMax.second; } - inline unsigned getNumberOfParts() const { return numberOfParts; } + inline unsigned GetNumberOfParts() const { return numberOfParts_; } - inline void setNumberOfParts(const unsigned number_of_parts) { numberOfParts = number_of_parts; } + inline void SetNumberOfParts(const unsigned numberOfParts) { numberOfParts_ = numberOfParts; } - inline void setIgnoreSourceForConstraint(const bool ignore_) { ignore_sources_for_constraint = ignore_; } + inline void SetIgnoreSourceForConstraint(const bool ignore) { ignoreSourcesForConstraint_ = ignore; } - inline void setIsOriginalSource(const std::vector &is_original_source_) { is_original_source = is_original_source_; } + inline void SetIsOriginalSource(const std::vector &isOriginalSource) { isOriginalSource_ = isOriginalSource; } - void setTimeLimitSeconds(unsigned time_limit_seconds_) { time_limit_seconds = time_limit_seconds_; } + void SetTimeLimitSeconds(unsigned timeLimitSeconds) { timeLimitSeconds_ = timeLimitSeconds; } }; -template -void AcyclicPartitioningILP::solveILP() { - model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0); +template +void AcyclicPartitioningILP::SolveIlp() { + model_.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0); - model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds); - model.SetIntParam(COPT_INTPARAM_THREADS, 128); + model_.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds_); + model_.SetIntParam(COPT_INTPARAM_THREADS, 128); - model.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); - model.SetIntParam(COPT_INTPARAM_LPMETHOD, 1); - model.SetIntParam(COPT_INTPARAM_ROUNDINGHEURLEVEL, 1); + model_.SetIntParam(COPT_INTPARAM_STRONGBRANCHING, 1); + model_.SetIntParam(COPT_INTPARAM_LPMETHOD, 1); + model_.SetIntParam(COPT_INTPARAM_ROUNDINGHEURLEVEL, 1); - model.SetIntParam(COPT_INTPARAM_SUBMIPHEURLEVEL, 1); + model_.SetIntParam(COPT_INTPARAM_SUBMIPHEURLEVEL, 1); // model.SetIntParam(COPT_INTPARAM_PRESOLVE, 1); // model.SetIntParam(COPT_INTPARAM_CUTLEVEL, 0); - model.SetIntParam(COPT_INTPARAM_TREECUTLEVEL, 2); + model_.SetIntParam(COPT_INTPARAM_TREECUTLEVEL, 2); // model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2); - model.Solve(); + model_.Solve(); } -template -RETURN_STATUS AcyclicPartitioningILP::computePartitioning(const BspInstance &instance, - std::vector &partitioning) { +template +ReturnStatus AcyclicPartitioningILP::ComputePartitioning(const BspInstance &instance, + std::vector &partitioning) { partitioning.clear(); - if (numberOfParts == 0) { - numberOfParts = static_cast( - std::floor(static_cast(instance.numberOfVertices()) / static_cast(minPartitionSize))); - std::cout << "ILP nr parts: " << numberOfParts << std::endl; + if (numberOfParts_ == 0) { + numberOfParts_ = static_cast( + std::floor(static_cast(instance.NumberOfVertices()) / static_cast(minPartitionSize_))); + std::cout << "ILP nr parts: " << numberOfParts_ << std::endl; } - setupVariablesConstraintsObjective(instance); + SetupVariablesConstraintsObjective(instance); - solveILP(); + SolveIlp(); - if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - partitioning = returnAssignment(instance); - return RETURN_STATUS::OSP_SUCCESS; + if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { + partitioning = ReturnAssignment(instance); + return ReturnStatus::OSP_SUCCESS; - } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - partitioning.resize(instance.numberOfVertices(), UINT_MAX); - return RETURN_STATUS::ERROR; + } else if (model_.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { + partitioning.resize(instance.NumberOfVertices(), UINT_MAX); + return ReturnStatus::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - partitioning = returnAssignment(instance); - return RETURN_STATUS::OSP_SUCCESS; + if (model_.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { + partitioning = ReturnAssignment(instance); + return ReturnStatus::OSP_SUCCESS; } else { - partitioning.resize(instance.numberOfVertices(), UINT_MAX); - return RETURN_STATUS::ERROR; + partitioning.resize(instance.NumberOfVertices(), UINT_MAX); + return ReturnStatus::ERROR; } } } -template -void AcyclicPartitioningILP::setupVariablesConstraintsObjective(const BspInstance &instance) { +template +void AcyclicPartitioningILP::SetupVariablesConstraintsObjective(const BspInstance &instance) { // Variables - node_in_partition = std::vector(instance.numberOfVertices()); + nodeInPartition_ = std::vector(instance.NumberOfVertices()); - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - node_in_partition[node] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "node_in_partition"); + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + nodeInPartition_[node] = model_.AddVars(static_cast(numberOfParts_), COPT_BINARY, "node_in_partition"); } - std::map node_to_hyperedge_index; + std::map nodeToHyperedgeIndex; unsigned numberOfHyperedges = 0; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (instance.getComputationalDag().out_degree(node) > 0) { - node_to_hyperedge_index[node] = numberOfHyperedges; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (instance.GetComputationalDag().OutDegree(node) > 0) { + nodeToHyperedgeIndex[node] = numberOfHyperedges; ++numberOfHyperedges; } } - hyperedge_intersects_partition = std::vector(numberOfHyperedges); + hyperedgeIntersectsPartition_ = std::vector(numberOfHyperedges); for (unsigned hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) { - hyperedge_intersects_partition[hyperedge] - = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "hyperedge_intersects_partition"); + hyperedgeIntersectsPartition_[hyperedge] + = model_.AddVars(static_cast(numberOfParts_), COPT_BINARY, "hyperedge_intersects_partition"); } // Constraints // each node assigned to exactly one partition - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { Expr expr; - for (unsigned part = 0; part < numberOfParts; part++) { - expr += node_in_partition[node][static_cast(part)]; + for (unsigned part = 0; part < numberOfParts_; part++) { + expr += nodeInPartition_[node][static_cast(part)]; } - model.AddConstr(expr == 1); + model_.AddConstr(expr == 1); } // hyperedge indicators match node variables - for (unsigned part = 0; part < numberOfParts; part++) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (instance.getComputationalDag().out_degree(node) == 0) { + for (unsigned part = 0; part < numberOfParts_; part++) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (instance.GetComputationalDag().OutDegree(node) == 0) { continue; } - model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)] - >= node_in_partition[node][static_cast(part)]); - for (const auto &succ : instance.getComputationalDag().children(node)) { - model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)] - >= node_in_partition[succ][static_cast(part)]); + model_.AddConstr(hyperedgeIntersectsPartition_[nodeToHyperedgeIndex[node]][static_cast(part)] + >= nodeInPartition_[node][static_cast(part)]); + for (const auto &succ : instance.GetComputationalDag().Children(node)) { + model_.AddConstr(hyperedgeIntersectsPartition_[nodeToHyperedgeIndex[node]][static_cast(part)] + >= nodeInPartition_[succ][static_cast(part)]); } } } // partition size constraints - for (unsigned part = 0; part < numberOfParts; part++) { + for (unsigned part = 0; part < numberOfParts_; part++) { Expr expr; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (!ignore_sources_for_constraint || is_original_source.empty() || !is_original_source[node]) { - expr += node_in_partition[node][static_cast(part)]; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (!ignoreSourcesForConstraint_ || isOriginalSource_.empty() || !isOriginalSource_[node]) { + expr += nodeInPartition_[node][static_cast(part)]; } } - model.AddConstr(expr <= maxPartitionSize); - model.AddConstr(expr >= minPartitionSize); + model_.AddConstr(expr <= maxPartitionSize_); + model_.AddConstr(expr >= minPartitionSize_); } // acyclicity constraints - for (unsigned from_part = 0; from_part < numberOfParts; from_part++) { - for (unsigned to_part = 0; to_part < from_part; to_part++) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (const auto &succ : instance.getComputationalDag().children(node)) { - model.AddConstr(node_in_partition[node][static_cast(from_part)] - + node_in_partition[succ][static_cast(to_part)] - <= 1); + for (unsigned fromPart = 0; fromPart < numberOfParts_; fromPart++) { + for (unsigned toPart = 0; toPart < fromPart; toPart++) { + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + for (const auto &succ : instance.GetComputationalDag().Children(node)) { + model_.AddConstr( + nodeInPartition_[node][static_cast(fromPart)] + nodeInPartition_[succ][static_cast(toPart)] <= 1); } } } @@ -294,67 +289,67 @@ void AcyclicPartitioningILP::setupVariablesConstraintsObjective(const B // set objective Expr expr; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if (instance.getComputationalDag().out_degree(node) > 0) { - expr -= instance.getComputationalDag().vertex_comm_weight(node); - for (unsigned part = 0; part < numberOfParts; part++) { - expr += instance.getComputationalDag().vertex_comm_weight(node) - * hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)]; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + if (instance.GetComputationalDag().OutDegree(node) > 0) { + expr -= instance.GetComputationalDag().VertexCommWeight(node); + for (unsigned part = 0; part < numberOfParts_; part++) { + expr += instance.GetComputationalDag().VertexCommWeight(node) + * hyperedgeIntersectsPartition_[nodeToHyperedgeIndex[node]][static_cast(part)]; } } } - model.SetObjective(expr, COPT_MINIMIZE); -}; + model_.SetObjective(expr, COPT_MINIMIZE); +} -template -void AcyclicPartitioningILP::WriteSolutionCallback::callback() { - if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { +template +void AcyclicPartitioningILP::WriteSolutionCallback::callback() { + if (Where() == COPT_CBCONTEXT_MIPSOL && counter_ < maxNumberSolution_ && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { try { - if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { - best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); - counter++; + if (GetDblInfo(COPT_CBINFO_BESTOBJ) < bestObj_ && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { + bestObj_ = GetDblInfo(COPT_CBINFO_BESTOBJ); + counter_++; } } catch (const std::exception &e) {} } -}; +} -template -std::vector AcyclicPartitioningILP::returnAssignment(const BspInstance &instance) { - std::vector node_to_partition(instance.numberOfVertices(), UINT_MAX); +template +std::vector AcyclicPartitioningILP::ReturnAssignment(const BspInstance &instance) { + std::vector nodeToPartition(instance.NumberOfVertices(), UINT_MAX); - std::set nonempty_partition_ids; - for (unsigned node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned part = 0; part < numberOfParts; part++) { - if (node_in_partition[node][static_cast(part)].Get(COPT_DBLINFO_VALUE) >= .99) { - node_to_partition[node] = part; - nonempty_partition_ids.insert(part); + std::set nonemptyPartitionIds; + for (unsigned node = 0; node < instance.NumberOfVertices(); node++) { + for (unsigned part = 0; part < numberOfParts_; part++) { + if (nodeInPartition_[node][static_cast(part)].Get(COPT_DBLINFO_VALUE) >= .99) { + nodeToPartition[node] = part; + nonemptyPartitionIds.insert(part); } } } - for (unsigned chosen_partition : node_to_partition) { - if (chosen_partition == UINT_MAX) { + for (unsigned chosenPartition : nodeToPartition) { + if (chosenPartition == UINT_MAX) { std::cout << "Error: partitioning returned by ILP seems incomplete!" << std::endl; } } - unsigned current_index = 0; - std::map new_index; - for (unsigned part_index : nonempty_partition_ids) { - new_index[part_index] = current_index; - ++current_index; + unsigned currentIndex = 0; + std::map newIndex; + for (unsigned partIndex : nonemptyPartitionIds) { + newIndex[partIndex] = currentIndex; + ++currentIndex; } - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - node_to_partition[node] = new_index[node_to_partition[node]]; + for (VertexIdx node = 0; node < instance.NumberOfVertices(); node++) { + nodeToPartition[node] = newIndex[nodeToPartition[node]]; } - std::cout << "Acyclic partitioning ILP best solution value: " << model.GetDblAttr(COPT_DBLATTR_BESTOBJ) - << ", best lower bound: " << model.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl; + std::cout << "Acyclic partitioning ILP best solution value: " << model_.GetDblAttr(COPT_DBLATTR_BESTOBJ) + << ", best lower bound: " << model_.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl; - return node_to_partition; + return nodeToPartition; } } // namespace osp diff --git a/include/osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp b/include/osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp index e5678cee..717aa6df 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp @@ -24,104 +24,104 @@ limitations under the License. namespace osp { -template -class SubproblemMultiScheduling : public Scheduler { - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); +template +class SubproblemMultiScheduling : public Scheduler { + static_assert(isComputationalDagV, "PebblingSchedule can only be used with computational DAGs."); private: - using vertex_idx = vertex_idx_t; - using commweight_type = v_commw_t; - using workweight_type = v_workw_t; + using VertexIdx = VertexIdxT; + using commweight_type = VCommwT; + using workweight_type = VWorkwT; - std::vector last_node_on_proc; - std::vector> proc_task_lists; - std::vector longest_outgoing_path; + std::vector lastNodeOnProc_; + std::vector> procTaskLists_; + std::vector longestOutgoingPath_; public: SubproblemMultiScheduling() {} virtual ~SubproblemMultiScheduling() = default; - RETURN_STATUS computeMultiSchedule(const BspInstance &instance, std::vector> &processors_to_node); + ReturnStatus ComputeMultiSchedule(const BspInstance &instance, std::vector> &processorsToNode); - std::vector> makeAssignment(const BspInstance &instance, - const std::set> &nodes_available, - const std::set &procs_available) const; + std::vector> MakeAssignment(const BspInstance &instance, + const std::set> &nodesAvailable, + const std::set &procsAvailable) const; - std::vector static get_longest_path(const Graph_t &graph); + std::vector static GetLongestPath(const GraphT &graph); // not used, only here for using scheduler class base functionality (status enums, timelimits, etc) - RETURN_STATUS computeSchedule(BspSchedule &schedule) override; + ReturnStatus ComputeSchedule(BspSchedule &schedule) override; /** * @brief Get the name of the schedule. * * @return The name of the schedule. */ - virtual std::string getScheduleName() const override { return "SubproblemMultiScheduling"; } + virtual std::string GetScheduleName() const override { return "SubproblemMultiScheduling"; } - inline const std::vector> &getProcTaskLists() const { return proc_task_lists; } + inline const std::vector> &GetProcTaskLists() const { return procTaskLists_; } }; // currently duplicated from BSP locking scheduler's code -template -std::vector> SubproblemMultiScheduling::get_longest_path(const Graph_t &graph) { - std::vector longest_path(graph.num_vertices(), 0); +template +std::vector> SubproblemMultiScheduling::GetLongestPath(const GraphT &graph) { + std::vector longestPath(graph.NumVertices(), 0); - std::vector top_order = GetTopOrder(graph); + std::vector topOrder = GetTopOrder(graph); - for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { - longest_path[*r_iter] = graph.vertex_work_weight(*r_iter); - if (graph.out_degree(*r_iter) > 0) { + for (auto rIter = topOrder.rbegin(); rIter != topOrder.crend(); rIter++) { + longestPath[*rIter] = graph.VertexWorkWeight(*rIter); + if (graph.OutDegree(*rIter) > 0) { workweight_type max = 0; - for (const auto &child : graph.children(*r_iter)) { - if (max <= longest_path[child]) { - max = longest_path[child]; + for (const auto &child : graph.Children(*rIter)) { + if (max <= longestPath[child]) { + max = longestPath[child]; } } - longest_path[*r_iter] += max; + longestPath[*rIter] += max; } } - return longest_path; + return longestPath; } -template -RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const BspInstance &instance, - std::vector> &processors_to_node) { - const unsigned &N = static_cast(instance.numberOfVertices()); - const unsigned &P = instance.numberOfProcessors(); - const auto &G = instance.getComputationalDag(); +template +ReturnStatus SubproblemMultiScheduling::ComputeMultiSchedule(const BspInstance &instance, + std::vector> &processorsToNode) { + const unsigned &n = static_cast(instance.NumberOfVertices()); + const unsigned &p = instance.NumberOfProcessors(); + const auto &g = instance.GetComputationalDag(); - processors_to_node.clear(); - processors_to_node.resize(N); + processorsToNode.clear(); + processorsToNode.resize(n); - proc_task_lists.clear(); - proc_task_lists.resize(P); + procTaskLists_.clear(); + procTaskLists_.resize(p); - last_node_on_proc.clear(); - last_node_on_proc.resize(P, UINT_MAX); + lastNodeOnProc_.clear(); + lastNodeOnProc_.resize(p, UINT_MAX); - longest_outgoing_path = get_longest_path(G); + longestOutgoingPath_ = GetLongestPath(g); - std::set> readySet; + std::set> readySet; - std::vector nrPredecRemain(N); - for (vertex_idx node = 0; node < N; node++) { - nrPredecRemain[node] = static_cast(G.in_degree(node)); - if (G.in_degree(node) == 0) { - readySet.emplace(-longest_outgoing_path[node], node); + std::vector nrPredecRemain(n); + for (VertexIdx node = 0; node < n; node++) { + nrPredecRemain[node] = static_cast(g.InDegree(node)); + if (g.InDegree(node) == 0) { + readySet.emplace(-longestOutgoingPath_[node], node); } } - std::set free_procs; - for (unsigned proc = 0; proc < P; ++proc) { - free_procs.insert(proc); + std::set freeProcs; + for (unsigned proc = 0; proc < p; ++proc) { + freeProcs.insert(proc); } - std::vector node_finish_time(N, 0); + std::vector nodeFinishTime(n, 0); - std::set> finishTimes; + std::set> finishTimes; finishTimes.emplace(0, std::numeric_limits::max()); while (!readySet.empty() || !finishTimes.empty()) { @@ -129,18 +129,18 @@ RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const Bsp // Find new ready jobs while (!finishTimes.empty() && fabs(finishTimes.begin()->first - time) < 0.0001) { - const vertex_idx node = finishTimes.begin()->second; + const VertexIdx node = finishTimes.begin()->second; finishTimes.erase(finishTimes.begin()); if (node != std::numeric_limits::max()) { - for (const vertex_idx &succ : G.children(node)) { + for (const VertexIdx &succ : g.Children(node)) { nrPredecRemain[succ]--; if (nrPredecRemain[succ] == 0) { - readySet.emplace(-longest_outgoing_path[succ], succ); + readySet.emplace(-longestOutgoingPath_[succ], succ); } } - for (unsigned proc : processors_to_node[node]) { - free_procs.insert(proc); + for (unsigned proc : processorsToNode[node]) { + freeProcs.insert(proc); } } } @@ -148,100 +148,99 @@ RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const Bsp // Assign new jobs to idle processors // first assign free processors to ready nodes - std::vector> new_assingments = makeAssignment(instance, readySet, free_procs); + std::vector> newAssingments = MakeAssignment(instance, readySet, freeProcs); - for (auto entry : new_assingments) { - vertex_idx node = entry.first; + for (auto entry : newAssingments) { + VertexIdx node = entry.first; unsigned proc = entry.second; - processors_to_node[node].insert(proc); - proc_task_lists[proc].push_back(node); - finishTimes.emplace(time + G.vertex_work_weight(node), node); - node_finish_time[node] = time + G.vertex_work_weight(node); - last_node_on_proc[proc] = node; - free_procs.erase(proc); - readySet.erase({-longest_outgoing_path[node], node}); + processorsToNode[node].insert(proc); + procTaskLists_[proc].push_back(node); + finishTimes.emplace(time + g.VertexWorkWeight(node), node); + nodeFinishTime[node] = time + g.VertexWorkWeight(node); + lastNodeOnProc_[proc] = node; + freeProcs.erase(proc); + readySet.erase({-longestOutgoingPath_[node], node}); } // assign remaining free processors to already started nodes, if it helps decltype(finishTimes.rbegin()) itr = finishTimes.rbegin(); - while (!free_procs.empty() && itr != finishTimes.rend()) { - double last_finish_time = itr->first; - - decltype(finishTimes.rbegin()) itr_latest = itr; - std::set> possible_nodes; - while (itr_latest != finishTimes.rend() && itr_latest->first + 0.0001 > last_finish_time) { - vertex_idx node = itr_latest->second; - double new_finish_time = time - + static_cast(G.vertex_work_weight(node)) - / (static_cast(processors_to_node[node].size()) + 1); - if (new_finish_time + 0.0001 < itr_latest->first) { - possible_nodes.emplace(-longest_outgoing_path[node], node); + while (!freeProcs.empty() && itr != finishTimes.rend()) { + double lastFinishTime = itr->first; + + decltype(finishTimes.rbegin()) itrLatest = itr; + std::set> possibleNodes; + while (itrLatest != finishTimes.rend() && itrLatest->first + 0.0001 > lastFinishTime) { + VertexIdx node = itrLatest->second; + double newFinishTime + = time + + static_cast(g.VertexWorkWeight(node)) / (static_cast(processorsToNode[node].size()) + 1); + if (newFinishTime + 0.0001 < itrLatest->first) { + possibleNodes.emplace(-longestOutgoingPath_[node], node); } - ++itr_latest; + ++itrLatest; } - new_assingments = makeAssignment(instance, possible_nodes, free_procs); - for (auto entry : new_assingments) { - vertex_idx node = entry.first; + newAssingments = MakeAssignment(instance, possibleNodes, freeProcs); + for (auto entry : newAssingments) { + VertexIdx node = entry.first; unsigned proc = entry.second; - processors_to_node[node].insert(proc); - proc_task_lists[proc].push_back(node); - finishTimes.erase({node_finish_time[node], node}); - double new_finish_time - = time - + static_cast(G.vertex_work_weight(node)) / (static_cast(processors_to_node[node].size())); - finishTimes.emplace(new_finish_time, node); - node_finish_time[node] = new_finish_time; - last_node_on_proc[proc] = node; - free_procs.erase(proc); + processorsToNode[node].insert(proc); + procTaskLists_[proc].push_back(node); + finishTimes.erase({nodeFinishTime[node], node}); + double newFinishTime + = time + static_cast(g.VertexWorkWeight(node)) / (static_cast(processorsToNode[node].size())); + finishTimes.emplace(newFinishTime, node); + nodeFinishTime[node] = newFinishTime; + lastNodeOnProc_[proc] = node; + freeProcs.erase(proc); } - if (new_assingments.empty()) { - itr = itr_latest; + if (newAssingments.empty()) { + itr = itrLatest; } } } - return RETURN_STATUS::OSP_SUCCESS; + return ReturnStatus::OSP_SUCCESS; } -template -std::vector, unsigned>> SubproblemMultiScheduling::makeAssignment( - const BspInstance &instance, - const std::set> &nodes_available, - const std::set &procs_available) const { - std::vector> assignments; - if (nodes_available.empty() || procs_available.empty()) { +template +std::vector, unsigned>> SubproblemMultiScheduling::MakeAssignment( + const BspInstance &instance, + const std::set> &nodesAvailable, + const std::set &procsAvailable) const { + std::vector> assignments; + if (nodesAvailable.empty() || procsAvailable.empty()) { return assignments; } - std::set assigned_nodes; - std::vector assigned_procs(instance.numberOfProcessors(), false); + std::set assignedNodes; + std::vector assignedProcs(instance.NumberOfProcessors(), false); - for (unsigned proc : procs_available) { - if (last_node_on_proc[proc] == UINT_MAX) { + for (unsigned proc : procsAvailable) { + if (lastNodeOnProc_[proc] == UINT_MAX) { continue; } - for (const auto &succ : instance.getComputationalDag().children(last_node_on_proc[proc])) { - if (nodes_available.find({-longest_outgoing_path[succ], succ}) != nodes_available.end() - && instance.isCompatible(succ, proc) && assigned_nodes.find(succ) == assigned_nodes.end()) { + for (const auto &succ : instance.GetComputationalDag().Children(lastNodeOnProc_[proc])) { + if (nodesAvailable.find({-longestOutgoingPath_[succ], succ}) != nodesAvailable.end() + && instance.IsCompatible(succ, proc) && assignedNodes.find(succ) == assignedNodes.end()) { assignments.emplace_back(succ, proc); - assigned_nodes.insert(succ); - assigned_procs[proc] = true; + assignedNodes.insert(succ); + assignedProcs[proc] = true; break; } } } - for (unsigned proc : procs_available) { - if (!assigned_procs[proc]) { - for (auto itr = nodes_available.begin(); itr != nodes_available.end(); ++itr) { - vertex_idx node = itr->second; - if (instance.isCompatible(node, proc) && assigned_nodes.find(node) == assigned_nodes.end()) { + for (unsigned proc : procsAvailable) { + if (!assignedProcs[proc]) { + for (auto itr = nodesAvailable.begin(); itr != nodesAvailable.end(); ++itr) { + VertexIdx node = itr->second; + if (instance.IsCompatible(node, proc) && assignedNodes.find(node) == assignedNodes.end()) { assignments.emplace_back(node, proc); - assigned_nodes.insert(node); + assignedNodes.insert(node); break; } } @@ -251,9 +250,9 @@ std::vector, unsigned>> SubproblemMultiSchedulin return assignments; } -template -RETURN_STATUS SubproblemMultiScheduling::computeSchedule(BspSchedule &) { - return RETURN_STATUS::ERROR; +template +ReturnStatus SubproblemMultiScheduling::ComputeSchedule(BspSchedule &) { + return ReturnStatus::ERROR; } } // namespace osp diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index 3579c1a0..ebc5c6cf 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -52,8 +52,6 @@ _add_test( directed_graph_util ) _add_test( directed_graph_top_sort ) -#_add_test( kl ) - _add_test( kl_total ) _add_test( kl_lambda ) @@ -129,6 +127,8 @@ _add_test( permutations ) _add_test( bit_mask ) +_add_test( hash_pair ) + ## io _add_test( filereader DATA ) diff --git a/tests/balanced_coin_flips.cpp b/tests/balanced_coin_flips.cpp index f30b64f0..a1462ca3 100644 --- a/tests/balanced_coin_flips.cpp +++ b/tests/balanced_coin_flips.cpp @@ -25,33 +25,33 @@ limitations under the License. using namespace osp; -bool thue_morse_gen(long unsigned int n) { +bool ThueMorseGen(long unsigned int n) { // std::bitset bits(n); - unsigned long int bin_sum = 0; + unsigned long int binSum = 0; while (n != 0) { - bin_sum += n % 2; + binSum += n % 2; n /= 2; } - return bool(bin_sum % 2); // (bits.count()%2); + return bool(binSum % 2); // (bits.count()%2); } -BOOST_AUTO_TEST_CASE(Random_Biased_Coin) { +BOOST_AUTO_TEST_CASE(RandomBiasedCoin) { std::cout << "True: " << true << " False: " << false << std::endl; - Biased_Random Coin; + BiasedRandom coin; std::cout << "Biased Coin: "; for (int i = 0; i < 200; i++) { - std::cout << Coin.get_flip(); + std::cout << coin.GetFlip(); } std::cout << std::endl << std::endl; } -BOOST_AUTO_TEST_CASE(Thue__Morse) { - Thue_Morse_Sequence Coin(0); +BOOST_AUTO_TEST_CASE(ThueMorse) { + ThueMorseSequence coin(0); std::vector beginning({0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1}); std::vector generated; for (long unsigned i = 0; i < beginning.size(); i++) { - bool next = Coin.get_flip(); + bool next = coin.GetFlip(); generated.emplace_back(next); // std::cout << next; } @@ -59,120 +59,120 @@ BOOST_AUTO_TEST_CASE(Thue__Morse) { BOOST_CHECK(beginning == generated); - Thue_Morse_Sequence Test_Coin_in_seq(0); + ThueMorseSequence testCoinInSeq(0); for (unsigned i = 0; i < 200; i++) { - BOOST_CHECK_EQUAL(Test_Coin_in_seq.get_flip(), thue_morse_gen(i)); + BOOST_CHECK_EQUAL(testCoinInSeq.GetFlip(), ThueMorseGen(i)); // std::cout << "hi " << i << std::endl; } for (int i = 0; i < 100; i++) { - unsigned ind = static_cast(randInt(1048575)); - Thue_Morse_Sequence Test_Coin_random(ind); - BOOST_CHECK_EQUAL(Test_Coin_random.get_flip(), thue_morse_gen(ind)); + unsigned ind = static_cast(RandInt(1048575)); + ThueMorseSequence testCoinRandom(ind); + BOOST_CHECK_EQUAL(testCoinRandom.GetFlip(), ThueMorseGen(ind)); // std::cout << "bye " << i << std::endl; } } -BOOST_AUTO_TEST_CASE(Repeater_Coin) { - Repeat_Chance Coin; +BOOST_AUTO_TEST_CASE(RepeaterCoin) { + RepeatChance coin; std::cout << "Repeater Coin: "; for (int i = 0; i < 200; i++) { - std::cout << Coin.get_flip(); + std::cout << coin.GetFlip(); } std::cout << std::endl << std::endl; } -BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_1) { - Biased_Random_with_side_bias Coin({1, 1}); - int true_count = 0; - int false_count = 0; +BOOST_AUTO_TEST_CASE(RandomBiasedCoinWithSideBias11) { + BiasedRandomWithSideBias coin({1, 1}); + int trueCount = 0; + int falseCount = 0; std::cout << "Biased Coin with side bias 1:1 : "; for (int i = 0; i < 200; i++) { - bool flip = Coin.get_flip(); + bool flip = coin.GetFlip(); if (flip) { - true_count++; + trueCount++; } else { - false_count++; + falseCount++; } std::cout << flip; } std::cout << std::endl; - std::cout << "True count: " << true_count << " False count: " << false_count << std::endl; + std::cout << "True count: " << trueCount << " False count: " << falseCount << std::endl; std::cout << std::endl; } -BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_0) { - Biased_Random_with_side_bias Coin({1, 0}); - int true_count = 0; - int false_count = 0; +BOOST_AUTO_TEST_CASE(RandomBiasedCoinWithSideBias10) { + BiasedRandomWithSideBias coin({1, 0}); + int trueCount = 0; + int falseCount = 0; std::cout << "Biased Coin with side bias 1:0 : "; for (int i = 0; i < 200; i++) { - bool flip = Coin.get_flip(); + bool flip = coin.GetFlip(); if (flip) { - true_count++; + trueCount++; } else { - false_count++; + falseCount++; } std::cout << flip; } std::cout << std::endl; - std::cout << "True count: " << true_count << " False count: " << false_count << std::endl; + std::cout << "True count: " << trueCount << " False count: " << falseCount << std::endl; std::cout << std::endl; } -BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_0_1) { - Biased_Random_with_side_bias Coin({0, 1}); - int true_count = 0; - int false_count = 0; +BOOST_AUTO_TEST_CASE(RandomBiasedCoinWithSideBias01) { + BiasedRandomWithSideBias coin({0, 1}); + int trueCount = 0; + int falseCount = 0; std::cout << "Biased Coin with side bias 0:1 : "; for (int i = 0; i < 200; i++) { - bool flip = Coin.get_flip(); + bool flip = coin.GetFlip(); if (flip) { - true_count++; + trueCount++; } else { - false_count++; + falseCount++; } std::cout << flip; } std::cout << std::endl; - std::cout << "True count: " << true_count << " False count: " << false_count << std::endl; + std::cout << "True count: " << trueCount << " False count: " << falseCount << std::endl; std::cout << std::endl; } -BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_2) { - Biased_Random_with_side_bias Coin({3, 2}); - int true_count = 0; - int false_count = 0; +BOOST_AUTO_TEST_CASE(RandomBiasedCoinWithSideBias32) { + BiasedRandomWithSideBias coin({3, 2}); + int trueCount = 0; + int falseCount = 0; std::cout << "Biased Coin with side bias 3:2 : "; for (int i = 0; i < 200; i++) { - bool flip = Coin.get_flip(); + bool flip = coin.GetFlip(); if (flip) { - true_count++; + trueCount++; } else { - false_count++; + falseCount++; } std::cout << flip; } std::cout << std::endl; - std::cout << "True count: " << true_count << " False count: " << false_count << std::endl; + std::cout << "True count: " << trueCount << " False count: " << falseCount << std::endl; std::cout << std::endl; } -BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_1) { - Biased_Random_with_side_bias Coin({3, 1}); - int true_count = 0; - int false_count = 0; +BOOST_AUTO_TEST_CASE(RandomBiasedCoinWithSideBias31) { + BiasedRandomWithSideBias coin({3, 1}); + int trueCount = 0; + int falseCount = 0; std::cout << "Biased Coin with side bias 3:1 : "; for (int i = 0; i < 200; i++) { - bool flip = Coin.get_flip(); + bool flip = coin.GetFlip(); if (flip) { - true_count++; + trueCount++; } else { - false_count++; + falseCount++; } std::cout << flip; } std::cout << std::endl; - std::cout << "True count: " << true_count << " False count: " << false_count << std::endl; + std::cout << "True count: " << trueCount << " False count: " << falseCount << std::endl; std::cout << std::endl; } diff --git a/tests/bit_mask.cpp b/tests/bit_mask.cpp index 5ba648b5..654fb673 100644 --- a/tests/bit_mask.cpp +++ b/tests/bit_mask.cpp @@ -23,40 +23,40 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(BitMaskTest_1) { - const std::size_t num_flags = 4U; - BitMask mask(num_flags); +BOOST_AUTO_TEST_CASE(BitMaskTest1) { + const std::size_t numFlags = 4U; + BitMask mask(numFlags); for (unsigned i = 0; i < 25U; ++i) { - for (std::size_t j = 0; j < num_flags; ++j) { - BOOST_CHECK_EQUAL(mask.mask[j], bool(i & (1U << j))); + for (std::size_t j = 0; j < numFlags; ++j) { + BOOST_CHECK_EQUAL(mask.mask_[j], bool(i & (1U << j))); } ++mask; } } -BOOST_AUTO_TEST_CASE(BitMaskTest_2) { - const std::size_t num_flags = 6U; - BitMask mask(num_flags); +BOOST_AUTO_TEST_CASE(BitMaskTest2) { + const std::size_t numFlags = 6U; + BitMask mask(numFlags); for (unsigned i = 0; i < 256U; ++i) { BitMask tmp = mask; BitMask post = mask++; - for (std::size_t j = 0; j < num_flags; ++j) { - BOOST_CHECK_EQUAL(tmp.mask[j], post.mask[j]); + for (std::size_t j = 0; j < numFlags; ++j) { + BOOST_CHECK_EQUAL(tmp.mask_[j], post.mask_[j]); } } } -BOOST_AUTO_TEST_CASE(BitMaskTest_3) { - const std::size_t num_flags = 5U; - BitMask mask(num_flags); +BOOST_AUTO_TEST_CASE(BitMaskTest3) { + const std::size_t numFlags = 5U; + BitMask mask(numFlags); for (unsigned i = 0; i < 256U; ++i) { BitMask tmp = mask++; ++tmp; - for (std::size_t j = 0; j < num_flags; ++j) { - BOOST_CHECK_EQUAL(tmp.mask[j], mask.mask[j]); + for (std::size_t j = 0; j < numFlags; ++j) { + BOOST_CHECK_EQUAL(tmp.mask_[j], mask.mask_[j]); } } } diff --git a/tests/boost_graph_adaptor.cpp b/tests/boost_graph_adaptor.cpp index 401cf216..e0544feb 100644 --- a/tests/boost_graph_adaptor.cpp +++ b/tests/boost_graph_adaptor.cpp @@ -29,84 +29,84 @@ limitations under the License. using namespace osp; -boost_graph_int_t constr_graph_1() { - boost_graph_int_t graph; +BoostGraphIntT ConstrGraph1() { + BoostGraphIntT graph; - using vertex_idx = boost_graph_int_t::vertex_idx; + using VertexIdx = BoostGraphIntT::VertexIdx; - vertex_idx v1 = graph.add_vertex(1, 2, 3, 4); - vertex_idx v2 = graph.add_vertex(5, 6, 7, 8); - vertex_idx v3 = graph.add_vertex(9, 10, 11, 12); - vertex_idx v4 = graph.add_vertex(13, 14, 15, 16); - vertex_idx v5 = graph.add_vertex(17, 18, 19, 20); - vertex_idx v6 = graph.add_vertex(21, 22, 23, 24); - vertex_idx v7 = graph.add_vertex(25, 26, 27, 28); - vertex_idx v8 = graph.add_vertex(29, 30, 31, 32); + VertexIdx v1 = graph.AddVertex(1, 2, 3, 4); + VertexIdx v2 = graph.AddVertex(5, 6, 7, 8); + VertexIdx v3 = graph.AddVertex(9, 10, 11, 12); + VertexIdx v4 = graph.AddVertex(13, 14, 15, 16); + VertexIdx v5 = graph.AddVertex(17, 18, 19, 20); + VertexIdx v6 = graph.AddVertex(21, 22, 23, 24); + VertexIdx v7 = graph.AddVertex(25, 26, 27, 28); + VertexIdx v8 = graph.AddVertex(29, 30, 31, 32); - auto pair = graph.add_edge(v1, v2); + auto pair = graph.AddEdge(v1, v2); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v1, v3); + pair = graph.AddEdge(v1, v3); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v1, v4); + pair = graph.AddEdge(v1, v4); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v2, v5); + pair = graph.AddEdge(v2, v5); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v2, v7); + pair = graph.AddEdge(v2, v7); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v3, v5); + pair = graph.AddEdge(v3, v5); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v3, v6); + pair = graph.AddEdge(v3, v6); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v4, v8); + pair = graph.AddEdge(v4, v8); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v5, v8); + pair = graph.AddEdge(v5, v8); BOOST_CHECK_EQUAL(pair.second, true); - BOOST_CHECK_EQUAL(graph.num_edges(), 9); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); return graph; } -BOOST_AUTO_TEST_CASE(test_empty_dag_boost_graph_adapter) { - boost_graph_int_t graph; - BOOST_CHECK_EQUAL(graph.num_edges(), 0); - BOOST_CHECK_EQUAL(graph.num_vertices(), 0); +BOOST_AUTO_TEST_CASE(TestEmptyDagBoostGraphAdapter) { + BoostGraphIntT graph; + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 0); } -BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) { - boost_graph_int_t graph = constr_graph_1(); +BOOST_AUTO_TEST_CASE(TestBoostGraphAdapter1) { + BoostGraphIntT graph = ConstrGraph1(); - using vertex_idx = boost_graph_int_t::vertex_idx; + using VertexIdx = BoostGraphIntT::VertexIdx; - std::vector edge_sources{0, 0, 0, 1, 1, 2, 2, 3, 4}; - std::vector edge_targets{1, 2, 3, 4, 6, 4, 5, 7, 7}; + std::vector edgeSources{0, 0, 0, 1, 1, 2, 2, 3, 4}; + std::vector edgeTargets{1, 2, 3, 4, 6, 4, 5, 7, 7}; - size_t edge_idx = 0; - for (const auto &edge : graph.edges()) { - BOOST_CHECK_EQUAL(source(edge, graph), edge_sources[edge_idx]); - BOOST_CHECK_EQUAL(target(edge, graph), edge_targets[edge_idx]); - edge_idx++; + size_t edgeIdx = 0; + for (const auto &edge : graph.Edges()) { + BOOST_CHECK_EQUAL(Source(edge, graph), edgeSources[edgeIdx]); + BOOST_CHECK_EQUAL(Target(edge, graph), edgeTargets[edgeIdx]); + edgeIdx++; } - edge_idx = 0; - for (const auto &edge : edges(graph)) { - BOOST_CHECK_EQUAL(source(edge, graph), edge_sources[edge_idx]); - BOOST_CHECK_EQUAL(target(edge, graph), edge_targets[edge_idx]); - edge_idx++; + edgeIdx = 0; + for (const auto &edge : Edges(graph)) { + BOOST_CHECK_EQUAL(Source(edge, graph), edgeSources[edgeIdx]); + BOOST_CHECK_EQUAL(Target(edge, graph), edgeTargets[edgeIdx]); + edgeIdx++; } - std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; + std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; - std::vector> out_neighbors{ + std::vector> outNeighbors{ {1, 2, 3}, {4, 6}, {4, 5}, @@ -117,7 +117,7 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) { {} }; - std::vector> in_neighbors{ + std::vector> inNeighbors{ {}, {0}, {0}, @@ -130,241 +130,241 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) { size_t idx = 0; - for (const auto &v : graph.vertices()) { + for (const auto &v : graph.Vertices()) { BOOST_CHECK_EQUAL(v, vertices[idx++]); size_t i = 0; - for (const auto &e : graph.children(v)) { - BOOST_CHECK_EQUAL(e, out_neighbors[v][i++]); + for (const auto &e : graph.Children(v)) { + BOOST_CHECK_EQUAL(e, outNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.out_edges(v)) { - BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[v][i++]); + for (const auto &e : graph.OutEdges(v)) { + BOOST_CHECK_EQUAL(Target(e, graph), outNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.out_edges(v)) { - BOOST_CHECK_EQUAL(graph.target(e), out_neighbors[v][i++]); + for (const auto &e : graph.OutEdges(v)) { + BOOST_CHECK_EQUAL(graph.Target(e), outNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.parents(v)) { - BOOST_CHECK_EQUAL(e, in_neighbors[v][i++]); + for (const auto &e : graph.Parents(v)) { + BOOST_CHECK_EQUAL(e, inNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.in_edges(v)) { - BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[v][i++]); + for (const auto &e : graph.InEdges(v)) { + BOOST_CHECK_EQUAL(Source(e, graph), inNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.in_edges(v)) { - BOOST_CHECK_EQUAL(graph.source(e), in_neighbors[v][i++]); + for (const auto &e : graph.InEdges(v)) { + BOOST_CHECK_EQUAL(graph.Source(e), inNeighbors[v][i++]); } i = 0; - for (const auto &e : in_edges(v, graph)) { - BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[v][i++]); + for (const auto &e : InEdges(v, graph)) { + BOOST_CHECK_EQUAL(Source(e, graph), inNeighbors[v][i++]); } i = 0; - for (const auto &e : out_edges(v, graph)) { - BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[v][i++]); + for (const auto &e : OutEdges(v, graph)) { + BOOST_CHECK_EQUAL(Target(e, graph), outNeighbors[v][i++]); } - BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[v].size()); - BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[v].size()); + BOOST_CHECK_EQUAL(graph.InDegree(v), inNeighbors[v].size()); + BOOST_CHECK_EQUAL(graph.OutDegree(v), outNeighbors[v].size()); } } -BOOST_AUTO_TEST_CASE(test_util_1) { - const boost_graph_int_t graph = constr_graph_1(); +BOOST_AUTO_TEST_CASE(TestUtil1) { + const BoostGraphIntT graph = ConstrGraph1(); - BOOST_CHECK_EQUAL(graph.num_edges(), 9); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); - auto sources = source_vertices(graph); + auto sources = SourceVertices(graph); BOOST_CHECK_EQUAL(sources.size(), 1); BOOST_CHECK_EQUAL(sources[0], 0); - auto sinks = sink_vertices(graph); + auto sinks = SinkVertices(graph); BOOST_CHECK_EQUAL(sinks.size(), 3); BOOST_CHECK_EQUAL(sinks[0], 5); BOOST_CHECK_EQUAL(sinks[1], 6); BOOST_CHECK_EQUAL(sinks[2], 7); - BOOST_CHECK_EQUAL(has_path(0, 1, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 2, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 3, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 5, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 6, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(1, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 6, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 5, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(3, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(4, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(1, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 7, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 7, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(0, 1, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 2, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 3, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 5, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 6, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(1, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 6, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 5, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(3, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(4, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(1, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 7, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 7, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 6, graph), false); } -BOOST_AUTO_TEST_CASE(test_constr_dag) { - boost_graph_int_t graph; - - graph.add_vertex(1, 2, 3); - graph.add_vertex(5, 6, 7); - graph.add_vertex(9, 10, 11); - graph.add_vertex(13, 14, 15); - - graph.add_edge(0, 1); - graph.add_edge(0, 2); - graph.add_edge(0, 3); - - boost_graph_int_t graph_2(graph); - - BOOST_CHECK_EQUAL(graph_2.num_edges(), 3); - BOOST_CHECK_EQUAL(graph_2.num_vertices(), 4); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(0), 1); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(0), 2); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(0), 3); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(1), 5); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(1), 6); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(1), 7); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(2), 9); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(2), 10); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(2), 11); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(3), 13); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(3), 14); - - computational_dag_edge_idx_vector_impl_def_int_t graph_other; - - graph_other.add_vertex(1, 2, 3, 4); - graph_other.add_vertex(5, 6, 7, 8); - graph_other.add_edge(0, 1, 9); - - boost_graph_int_t graph_3(graph_other); - - BOOST_CHECK_EQUAL(graph_3.num_edges(), 1); - BOOST_CHECK_EQUAL(graph_3.num_vertices(), 2); - BOOST_CHECK_EQUAL(graph_3.vertex_work_weight(0), 1); - BOOST_CHECK_EQUAL(graph_3.vertex_comm_weight(0), 2); - BOOST_CHECK_EQUAL(graph_3.vertex_mem_weight(0), 3); - BOOST_CHECK_EQUAL(graph_3.vertex_work_weight(1), 5); - BOOST_CHECK_EQUAL(graph_3.vertex_comm_weight(1), 6); - BOOST_CHECK_EQUAL(graph_3.vertex_mem_weight(1), 7); +BOOST_AUTO_TEST_CASE(TestConstrDag) { + BoostGraphIntT graph; + + graph.AddVertex(1, 2, 3); + graph.AddVertex(5, 6, 7); + graph.AddVertex(9, 10, 11); + graph.AddVertex(13, 14, 15); + + graph.AddEdge(0, 1); + graph.AddEdge(0, 2); + graph.AddEdge(0, 3); + + BoostGraphIntT graph2(graph); + + BOOST_CHECK_EQUAL(graph2.NumEdges(), 3); + BOOST_CHECK_EQUAL(graph2.NumVertices(), 4); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(0), 1); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(0), 2); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(0), 3); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(1), 5); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(1), 6); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(1), 7); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(2), 9); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(2), 10); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(2), 11); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(3), 13); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(3), 14); + + ComputationalDagEdgeIdxVectorImplDefIntT graphOther; + + graphOther.AddVertex(1, 2, 3, 4); + graphOther.AddVertex(5, 6, 7, 8); + graphOther.AddEdge(0, 1, 9); + + BoostGraphIntT graph3(graphOther); + + BOOST_CHECK_EQUAL(graph3.NumEdges(), 1); + BOOST_CHECK_EQUAL(graph3.NumVertices(), 2); + BOOST_CHECK_EQUAL(graph3.VertexWorkWeight(0), 1); + BOOST_CHECK_EQUAL(graph3.VertexCommWeight(0), 2); + BOOST_CHECK_EQUAL(graph3.VertexMemWeight(0), 3); + BOOST_CHECK_EQUAL(graph3.VertexWorkWeight(1), 5); + BOOST_CHECK_EQUAL(graph3.VertexCommWeight(1), 6); + BOOST_CHECK_EQUAL(graph3.VertexMemWeight(1), 7); } -BOOST_AUTO_TEST_CASE(test_boost_graph_const_1) { - boost_graph_int_t graph(10u); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); - BOOST_CHECK_EQUAL(graph.num_vertices(), 10); +BOOST_AUTO_TEST_CASE(TestBoostGraphConst1) { + BoostGraphIntT graph(10u); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 10); } -BOOST_AUTO_TEST_CASE(test_boost_graph_const_2) { - boost_graph_int_t graph_1 = constr_graph_1(); - - boost_graph_int_t graph_copy(graph_1); - BOOST_CHECK_EQUAL(graph_copy.num_edges(), 9); - BOOST_CHECK_EQUAL(graph_copy.num_vertices(), 8); - - BOOST_CHECK_EQUAL(has_path(2, 7, graph_copy), true); - BOOST_CHECK_EQUAL(has_path(3, 7, graph_copy), true); - BOOST_CHECK_EQUAL(has_path(4, 7, graph_copy), true); - BOOST_CHECK_EQUAL(has_path(1, 2, graph_copy), false); - BOOST_CHECK_EQUAL(has_path(1, 3, graph_copy), false); - BOOST_CHECK_EQUAL(has_path(2, 1, graph_copy), false); - - boost_graph_int_t graph_copy_2 = graph_1; - - BOOST_CHECK_EQUAL(graph_1.num_edges(), 9); - BOOST_CHECK_EQUAL(graph_1.num_vertices(), 8); - - BOOST_CHECK_EQUAL(has_path(2, 7, graph_1), true); - BOOST_CHECK_EQUAL(has_path(3, 7, graph_1), true); - BOOST_CHECK_EQUAL(has_path(4, 7, graph_1), true); - BOOST_CHECK_EQUAL(has_path(1, 2, graph_1), false); - BOOST_CHECK_EQUAL(has_path(1, 3, graph_1), false); - BOOST_CHECK_EQUAL(has_path(2, 1, graph_1), false); - - BOOST_CHECK_EQUAL(graph_copy_2.num_edges(), 9); - BOOST_CHECK_EQUAL(graph_copy_2.num_vertices(), 8); - - BOOST_CHECK_EQUAL(has_path(2, 7, graph_copy_2), true); - BOOST_CHECK_EQUAL(has_path(3, 7, graph_copy_2), true); - BOOST_CHECK_EQUAL(has_path(4, 7, graph_copy_2), true); - BOOST_CHECK_EQUAL(has_path(1, 2, graph_copy_2), false); - BOOST_CHECK_EQUAL(has_path(1, 3, graph_copy_2), false); - BOOST_CHECK_EQUAL(has_path(2, 1, graph_copy_2), false); - - boost_graph_int_t graph_move_1(std::move(graph_copy)); - - BOOST_CHECK_EQUAL(graph_copy.num_edges(), 0); - BOOST_CHECK_EQUAL(graph_copy.num_vertices(), 0); - - BOOST_CHECK_EQUAL(graph_move_1.num_edges(), 9); - BOOST_CHECK_EQUAL(graph_move_1.num_vertices(), 8); - - BOOST_CHECK_EQUAL(has_path(2, 7, graph_move_1), true); - BOOST_CHECK_EQUAL(has_path(3, 7, graph_move_1), true); - BOOST_CHECK_EQUAL(has_path(4, 7, graph_move_1), true); - BOOST_CHECK_EQUAL(has_path(1, 2, graph_move_1), false); - BOOST_CHECK_EQUAL(has_path(1, 3, graph_move_1), false); - BOOST_CHECK_EQUAL(has_path(2, 1, graph_move_1), false); - - boost_graph_int_t graph_move_2 = std::move(graph_copy_2); - BOOST_CHECK_EQUAL(graph_copy_2.num_edges(), 0); - BOOST_CHECK_EQUAL(graph_copy_2.num_vertices(), 0); - - BOOST_CHECK_EQUAL(graph_move_2.num_edges(), 9); - BOOST_CHECK_EQUAL(graph_move_2.num_vertices(), 8); - - BOOST_CHECK_EQUAL(has_path(2, 7, graph_move_2), true); - BOOST_CHECK_EQUAL(has_path(3, 7, graph_move_2), true); - BOOST_CHECK_EQUAL(has_path(4, 7, graph_move_2), true); - BOOST_CHECK_EQUAL(has_path(1, 2, graph_move_2), false); - BOOST_CHECK_EQUAL(has_path(1, 3, graph_move_2), false); - BOOST_CHECK_EQUAL(has_path(2, 1, graph_move_2), false); +BOOST_AUTO_TEST_CASE(TestBoostGraphConst2) { + BoostGraphIntT graph1 = ConstrGraph1(); + + BoostGraphIntT graphCopy(graph1); + BOOST_CHECK_EQUAL(graphCopy.NumEdges(), 9); + BOOST_CHECK_EQUAL(graphCopy.NumVertices(), 8); + + BOOST_CHECK_EQUAL(HasPath(2, 7, graphCopy), true); + BOOST_CHECK_EQUAL(HasPath(3, 7, graphCopy), true); + BOOST_CHECK_EQUAL(HasPath(4, 7, graphCopy), true); + BOOST_CHECK_EQUAL(HasPath(1, 2, graphCopy), false); + BOOST_CHECK_EQUAL(HasPath(1, 3, graphCopy), false); + BOOST_CHECK_EQUAL(HasPath(2, 1, graphCopy), false); + + BoostGraphIntT graphCopy2 = graph1; + + BOOST_CHECK_EQUAL(graph1.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph1.NumVertices(), 8); + + BOOST_CHECK_EQUAL(HasPath(2, 7, graph1), true); + BOOST_CHECK_EQUAL(HasPath(3, 7, graph1), true); + BOOST_CHECK_EQUAL(HasPath(4, 7, graph1), true); + BOOST_CHECK_EQUAL(HasPath(1, 2, graph1), false); + BOOST_CHECK_EQUAL(HasPath(1, 3, graph1), false); + BOOST_CHECK_EQUAL(HasPath(2, 1, graph1), false); + + BOOST_CHECK_EQUAL(graphCopy2.NumEdges(), 9); + BOOST_CHECK_EQUAL(graphCopy2.NumVertices(), 8); + + BOOST_CHECK_EQUAL(HasPath(2, 7, graphCopy2), true); + BOOST_CHECK_EQUAL(HasPath(3, 7, graphCopy2), true); + BOOST_CHECK_EQUAL(HasPath(4, 7, graphCopy2), true); + BOOST_CHECK_EQUAL(HasPath(1, 2, graphCopy2), false); + BOOST_CHECK_EQUAL(HasPath(1, 3, graphCopy2), false); + BOOST_CHECK_EQUAL(HasPath(2, 1, graphCopy2), false); + + BoostGraphIntT graphMove1(std::move(graphCopy)); + + BOOST_CHECK_EQUAL(graphCopy.NumEdges(), 0); + BOOST_CHECK_EQUAL(graphCopy.NumVertices(), 0); + + BOOST_CHECK_EQUAL(graphMove1.NumEdges(), 9); + BOOST_CHECK_EQUAL(graphMove1.NumVertices(), 8); + + BOOST_CHECK_EQUAL(HasPath(2, 7, graphMove1), true); + BOOST_CHECK_EQUAL(HasPath(3, 7, graphMove1), true); + BOOST_CHECK_EQUAL(HasPath(4, 7, graphMove1), true); + BOOST_CHECK_EQUAL(HasPath(1, 2, graphMove1), false); + BOOST_CHECK_EQUAL(HasPath(1, 3, graphMove1), false); + BOOST_CHECK_EQUAL(HasPath(2, 1, graphMove1), false); + + BoostGraphIntT graphMove2 = std::move(graphCopy2); + BOOST_CHECK_EQUAL(graphCopy2.NumEdges(), 0); + BOOST_CHECK_EQUAL(graphCopy2.NumVertices(), 0); + + BOOST_CHECK_EQUAL(graphMove2.NumEdges(), 9); + BOOST_CHECK_EQUAL(graphMove2.NumVertices(), 8); + + BOOST_CHECK_EQUAL(HasPath(2, 7, graphMove2), true); + BOOST_CHECK_EQUAL(HasPath(3, 7, graphMove2), true); + BOOST_CHECK_EQUAL(HasPath(4, 7, graphMove2), true); + BOOST_CHECK_EQUAL(HasPath(1, 2, graphMove2), false); + BOOST_CHECK_EQUAL(HasPath(1, 3, graphMove2), false); + BOOST_CHECK_EQUAL(HasPath(2, 1, graphMove2), false); } diff --git a/tests/bsp_architecture.cpp b/tests/bsp_architecture.cpp index 16e221bc..2cbba44b 100644 --- a/tests/bsp_architecture.cpp +++ b/tests/bsp_architecture.cpp @@ -25,47 +25,47 @@ limitations under the License. using namespace osp; BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) { - std::vector> uniform_sent_costs = { + std::vector> uniformSentCosts = { {0, 1, 1, 1}, {1, 0, 1, 1}, {1, 1, 0, 1}, {1, 1, 1, 0} }; - BspArchitecture architecture(4, 2, 3); - BOOST_TEST(architecture.numberOfProcessors() == 4); - BOOST_TEST(architecture.communicationCosts() == 2); - BOOST_TEST(architecture.synchronisationCosts() == 3); - BOOST_CHECK_EQUAL(architecture.getMemoryConstraintType(), MEMORY_CONSTRAINT_TYPE::NONE); - BOOST_CHECK_EQUAL(architecture.getNumberOfProcessorTypes(), 1); - BOOST_CHECK_EQUAL(architecture.isNumaArchitecture(), false); + BspArchitecture architecture(4, 2, 3); + BOOST_TEST(architecture.NumberOfProcessors() == 4); + BOOST_TEST(architecture.CommunicationCosts() == 2); + BOOST_TEST(architecture.SynchronisationCosts() == 3); + BOOST_CHECK_EQUAL(architecture.GetMemoryConstraintType(), MemoryConstraintType::NONE); + BOOST_CHECK_EQUAL(architecture.GetNumberOfProcessorTypes(), 1); + BOOST_CHECK_EQUAL(architecture.IsNumaArchitecture(), false); - BOOST_CHECK_EQUAL(architecture.memoryBound(0), 100); - BOOST_CHECK_EQUAL(architecture.memoryBound(1), 100); - BOOST_CHECK_EQUAL(architecture.memoryBound(2), 100); - BOOST_CHECK_EQUAL(architecture.memoryBound(3), 100); + BOOST_CHECK_EQUAL(architecture.MemoryBound(0), 100); + BOOST_CHECK_EQUAL(architecture.MemoryBound(1), 100); + BOOST_CHECK_EQUAL(architecture.MemoryBound(2), 100); + BOOST_CHECK_EQUAL(architecture.MemoryBound(3), 100); - BOOST_CHECK_EQUAL(architecture.processorTypes()[0], 0); - BOOST_CHECK_EQUAL(architecture.processorTypes()[1], 0); - BOOST_CHECK_EQUAL(architecture.processorTypes()[2], 0); - BOOST_CHECK_EQUAL(architecture.processorTypes()[3], 0); + BOOST_CHECK_EQUAL(architecture.ProcessorTypes()[0], 0); + BOOST_CHECK_EQUAL(architecture.ProcessorTypes()[1], 0); + BOOST_CHECK_EQUAL(architecture.ProcessorTypes()[2], 0); + BOOST_CHECK_EQUAL(architecture.ProcessorTypes()[3], 0); - BOOST_CHECK_EQUAL(architecture.processorType(0), 0); - BOOST_CHECK_EQUAL(architecture.processorType(1), 0); - BOOST_CHECK_EQUAL(architecture.processorType(2), 0); - BOOST_CHECK_EQUAL(architecture.processorType(3), 0); + BOOST_CHECK_EQUAL(architecture.ProcessorType(0), 0); + BOOST_CHECK_EQUAL(architecture.ProcessorType(1), 0); + BOOST_CHECK_EQUAL(architecture.ProcessorType(2), 0); + BOOST_CHECK_EQUAL(architecture.ProcessorType(3), 0); - BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 1), 2); - BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 0), 0); + BOOST_CHECK_EQUAL(architecture.CommunicationCosts(0, 1), 2); + BOOST_CHECK_EQUAL(architecture.CommunicationCosts(0, 0), 0); - BOOST_CHECK_EQUAL(architecture.getProcessorTypeCount().size(), 1); - BOOST_CHECK_EQUAL(architecture.getProcessorTypeCount()[0], 4); + BOOST_CHECK_EQUAL(architecture.GetProcessorTypeCount().size(), 1); + BOOST_CHECK_EQUAL(architecture.GetProcessorTypeCount()[0], 4); - BOOST_CHECK_EQUAL(architecture.getNumberOfProcessorTypes(), 1); + BOOST_CHECK_EQUAL(architecture.GetNumberOfProcessorTypes(), 1); - BOOST_CHECK_EQUAL(architecture.maxMemoryBoundProcType(0), 100); + BOOST_CHECK_EQUAL(architecture.MaxMemoryBoundProcType(0), 100); - BOOST_TEST(architecture.sendCost() == uniform_sent_costs); + BOOST_TEST(architecture.SendCost() == uniformSentCosts); std::vector> expectedSendCosts = { {0, 2, 2, 2}, @@ -75,81 +75,81 @@ BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) { }; architecture.SetSendCosts(expectedSendCosts); - BOOST_TEST(architecture.sendCost() == expectedSendCosts); + BOOST_TEST(architecture.SendCost() == expectedSendCosts); - BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 1), 4); - BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 0), 0); + BOOST_CHECK_EQUAL(architecture.CommunicationCosts(0, 1), 4); + BOOST_CHECK_EQUAL(architecture.CommunicationCosts(0, 0), 0); architecture.SetUniformSendCost(); - BOOST_TEST(architecture.sendCost() == uniform_sent_costs); + BOOST_TEST(architecture.SendCost() == uniformSentCosts); - BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 1), 2); - BOOST_CHECK_EQUAL(architecture.communicationCosts(0, 0), 0); + BOOST_CHECK_EQUAL(architecture.CommunicationCosts(0, 1), 2); + BOOST_CHECK_EQUAL(architecture.CommunicationCosts(0, 0), 0); } BOOST_AUTO_TEST_CASE(Architecture) { // default constructor - BspArchitecture test; - BOOST_CHECK_EQUAL(test.numberOfProcessors(), 2); - BOOST_CHECK_EQUAL(test.communicationCosts(), 1); - BOOST_CHECK_EQUAL(test.synchronisationCosts(), 2); - BOOST_CHECK_EQUAL(test.isNumaArchitecture(), false); - BOOST_CHECK_EQUAL(test.sendCosts(0, 1), 1); - BOOST_CHECK_EQUAL(test.sendCosts(0, 0), 0); - BOOST_CHECK_EQUAL(test.sendCosts(1, 1), 0); - BOOST_CHECK_EQUAL(test.sendCosts(1, 0), 1); + BspArchitecture test; + BOOST_CHECK_EQUAL(test.NumberOfProcessors(), 2); + BOOST_CHECK_EQUAL(test.CommunicationCosts(), 1); + BOOST_CHECK_EQUAL(test.SynchronisationCosts(), 2); + BOOST_CHECK_EQUAL(test.IsNumaArchitecture(), false); + BOOST_CHECK_EQUAL(test.SendCosts(0, 1), 1); + BOOST_CHECK_EQUAL(test.SendCosts(0, 0), 0); + BOOST_CHECK_EQUAL(test.SendCosts(1, 1), 0); + BOOST_CHECK_EQUAL(test.SendCosts(1, 0), 1); // constructor - BspArchitecture test2(5, 7, 14); - BOOST_CHECK_EQUAL(test2.numberOfProcessors(), 5); - BOOST_CHECK_EQUAL(test2.communicationCosts(), 7); - BOOST_CHECK_EQUAL(test2.synchronisationCosts(), 14); - BOOST_CHECK_EQUAL(test2.isNumaArchitecture(), false); + BspArchitecture test2(5, 7, 14); + BOOST_CHECK_EQUAL(test2.NumberOfProcessors(), 5); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(), 7); + BOOST_CHECK_EQUAL(test2.SynchronisationCosts(), 14); + BOOST_CHECK_EQUAL(test2.IsNumaArchitecture(), false); for (unsigned i = 0; i < 5; i++) { for (unsigned j = 0; j < 5; j++) { if (i == j) { - BOOST_CHECK_EQUAL(test2.sendCosts(i, j), 0); - BOOST_CHECK_EQUAL(test2.communicationCosts(i, j), 0); + BOOST_CHECK_EQUAL(test2.SendCosts(i, j), 0); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(i, j), 0); } else { - BOOST_CHECK_EQUAL(test2.sendCosts(i, j), 1); - BOOST_CHECK_EQUAL(test2.communicationCosts(i, j), 7); + BOOST_CHECK_EQUAL(test2.SendCosts(i, j), 1); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(i, j), 7); } } } - test2.setCommunicationCosts(14); - BOOST_CHECK_EQUAL(test2.communicationCosts(), 14); + test2.SetCommunicationCosts(14); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(), 14); for (unsigned i = 0; i < 5; i++) { for (unsigned j = 0; j < 5; j++) { if (i == j) { - BOOST_CHECK_EQUAL(test2.sendCosts(i, j), 0); - BOOST_CHECK_EQUAL(test2.communicationCosts(i, j), 0); + BOOST_CHECK_EQUAL(test2.SendCosts(i, j), 0); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(i, j), 0); } else { - BOOST_CHECK_EQUAL(test2.sendCosts(i, j), 1); - BOOST_CHECK_EQUAL(test2.communicationCosts(i, j), 14); + BOOST_CHECK_EQUAL(test2.SendCosts(i, j), 1); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(i, j), 14); } } } - test2.setCommunicationCosts(0); - BOOST_CHECK_EQUAL(test2.communicationCosts(), 0); + test2.SetCommunicationCosts(0); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(), 0); for (unsigned i = 0; i < 5; i++) { for (unsigned j = 0; j < 5; j++) { if (i == j) { - BOOST_CHECK_EQUAL(test2.sendCosts(i, j), 0); - BOOST_CHECK_EQUAL(test2.communicationCosts(i, j), 0); + BOOST_CHECK_EQUAL(test2.SendCosts(i, j), 0); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(i, j), 0); } else { - BOOST_CHECK_EQUAL(test2.sendCosts(i, j), 1); - BOOST_CHECK_EQUAL(test2.communicationCosts(i, j), 0); + BOOST_CHECK_EQUAL(test2.SendCosts(i, j), 1); + BOOST_CHECK_EQUAL(test2.CommunicationCosts(i, j), 0); } } } // constructor - std::vector> send_costs = { + std::vector> sendCosts = { {0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, @@ -158,32 +158,30 @@ BOOST_AUTO_TEST_CASE(Architecture) { {1, 1, 1, 1, 1, 0} }; - BOOST_CHECK_THROW(BspArchitecture test31(7, 42942, 0, send_costs), - std::invalid_argument); - BOOST_CHECK_THROW(BspArchitecture test32(5, 42942, 0, send_costs), - std::invalid_argument); + BOOST_CHECK_THROW(BspArchitecture test31(7, 42942, 0, sendCosts), std::invalid_argument); + BOOST_CHECK_THROW(BspArchitecture test32(5, 42942, 0, sendCosts), std::invalid_argument); - BspArchitecture test3(6, 47295, 0, send_costs); - BOOST_CHECK_EQUAL(test3.numberOfProcessors(), 6); - BOOST_CHECK_EQUAL(test3.communicationCosts(), 47295); - BOOST_CHECK_EQUAL(test3.synchronisationCosts(), 0); - BOOST_CHECK_EQUAL(test3.isNumaArchitecture(), false); + BspArchitecture test3(6, 47295, 0, sendCosts); + BOOST_CHECK_EQUAL(test3.NumberOfProcessors(), 6); + BOOST_CHECK_EQUAL(test3.CommunicationCosts(), 47295); + BOOST_CHECK_EQUAL(test3.SynchronisationCosts(), 0); + BOOST_CHECK_EQUAL(test3.IsNumaArchitecture(), false); for (unsigned i = 0; i < 6; i++) { for (unsigned j = 0; j < 6; j++) { if (i == j) { - BOOST_CHECK_EQUAL(test3.sendCosts(i, j), 0); - BOOST_CHECK_EQUAL(test3.communicationCosts(i, j), 0); + BOOST_CHECK_EQUAL(test3.SendCosts(i, j), 0); + BOOST_CHECK_EQUAL(test3.CommunicationCosts(i, j), 0); } else { - BOOST_CHECK_EQUAL(test3.sendCosts(i, j), 1); - BOOST_CHECK_EQUAL(test3.communicationCosts(i, j), 47295); + BOOST_CHECK_EQUAL(test3.SendCosts(i, j), 1); + BOOST_CHECK_EQUAL(test3.CommunicationCosts(i, j), 47295); } } } // constructor - std::vector> send_costs2 = { + std::vector> sendCosts2 = { {0, 1, 2, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, @@ -191,7 +189,7 @@ BOOST_AUTO_TEST_CASE(Architecture) { {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0} }; - std::vector> send_costs3 = { + std::vector> sendCosts3 = { {0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, @@ -200,34 +198,34 @@ BOOST_AUTO_TEST_CASE(Architecture) { {1, 1, 1, 1, 1, 0} }; - BspArchitecture test4(6, 0, 4294965, send_costs2); - BOOST_CHECK_EQUAL(test4.numberOfProcessors(), 6); - BOOST_CHECK_EQUAL(test4.communicationCosts(), 0); - BOOST_CHECK_EQUAL(test4.synchronisationCosts(), 4294965); - BOOST_CHECK_EQUAL(test4.isNumaArchitecture(), true); - BOOST_CHECK_EQUAL(test4.sendCosts(0, 2), 2); - - BspArchitecture test5(6, 0, 4294965, send_costs3); - BOOST_CHECK_EQUAL(test5.numberOfProcessors(), 6); - BOOST_CHECK_EQUAL(test5.communicationCosts(), 0); - BOOST_CHECK_EQUAL(test5.synchronisationCosts(), 4294965); - BOOST_CHECK_EQUAL(test5.isNumaArchitecture(), true); - BOOST_CHECK_EQUAL(test5.sendCosts(3, 0), 3); - - test5.setNumberOfProcessors(8); - BOOST_CHECK_EQUAL(test5.numberOfProcessors(), 8); - BOOST_CHECK_EQUAL(test5.communicationCosts(), 0); - BOOST_CHECK_EQUAL(test5.synchronisationCosts(), 4294965); - BOOST_CHECK_EQUAL(test5.sendCosts(3, 0), 1); - BOOST_CHECK_EQUAL(test5.sendCosts(7, 7), 0); - BOOST_CHECK_EQUAL(test5.sendCosts(7, 6), 1); - BOOST_CHECK_EQUAL(test5.sendCosts(3, 5), 1); - BOOST_CHECK_EQUAL(test5.isNumaArchitecture(), false); - - test.setNumberOfProcessors(5); - BOOST_CHECK_EQUAL(test.numberOfProcessors(), 5); - BOOST_CHECK_EQUAL(test.communicationCosts(), 1); - BOOST_CHECK_EQUAL(test.synchronisationCosts(), 2); - BOOST_CHECK_EQUAL(test.sendCosts(4, 3), 1); - BOOST_CHECK_EQUAL(test.isNumaArchitecture(), false); + BspArchitecture test4(6, 0, 4294965, sendCosts2); + BOOST_CHECK_EQUAL(test4.NumberOfProcessors(), 6); + BOOST_CHECK_EQUAL(test4.CommunicationCosts(), 0); + BOOST_CHECK_EQUAL(test4.SynchronisationCosts(), 4294965); + BOOST_CHECK_EQUAL(test4.IsNumaArchitecture(), true); + BOOST_CHECK_EQUAL(test4.SendCosts(0, 2), 2); + + BspArchitecture test5(6, 0, 4294965, sendCosts3); + BOOST_CHECK_EQUAL(test5.NumberOfProcessors(), 6); + BOOST_CHECK_EQUAL(test5.CommunicationCosts(), 0); + BOOST_CHECK_EQUAL(test5.SynchronisationCosts(), 4294965); + BOOST_CHECK_EQUAL(test5.IsNumaArchitecture(), true); + BOOST_CHECK_EQUAL(test5.SendCosts(3, 0), 3); + + test5.SetNumberOfProcessors(8); + BOOST_CHECK_EQUAL(test5.NumberOfProcessors(), 8); + BOOST_CHECK_EQUAL(test5.CommunicationCosts(), 0); + BOOST_CHECK_EQUAL(test5.SynchronisationCosts(), 4294965); + BOOST_CHECK_EQUAL(test5.SendCosts(3, 0), 1); + BOOST_CHECK_EQUAL(test5.SendCosts(7, 7), 0); + BOOST_CHECK_EQUAL(test5.SendCosts(7, 6), 1); + BOOST_CHECK_EQUAL(test5.SendCosts(3, 5), 1); + BOOST_CHECK_EQUAL(test5.IsNumaArchitecture(), false); + + test.SetNumberOfProcessors(5); + BOOST_CHECK_EQUAL(test.NumberOfProcessors(), 5); + BOOST_CHECK_EQUAL(test.CommunicationCosts(), 1); + BOOST_CHECK_EQUAL(test.SynchronisationCosts(), 2); + BOOST_CHECK_EQUAL(test.SendCosts(4, 3), 1); + BOOST_CHECK_EQUAL(test.IsNumaArchitecture(), false); } diff --git a/tests/bsp_greedy_recomputer.cpp b/tests/bsp_greedy_recomputer.cpp index cd3ab446..95bcee5d 100644 --- a/tests/bsp_greedy_recomputer.cpp +++ b/tests/bsp_greedy_recomputer.cpp @@ -28,45 +28,45 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_recomputer) { - using graph = computational_dag_vector_impl_def_t; - - BspInstance instance1; - instance1.setNumberOfProcessors(2); - instance1.setCommunicationCosts(1); - instance1.setSynchronisationCosts(1); - - instance1.getComputationalDag().add_vertex(10, 1, 0); - instance1.getComputationalDag().add_vertex(10, 1, 0); - instance1.getComputationalDag().add_vertex(10, 1, 0); - instance1.getComputationalDag().add_edge(0, 1); - instance1.getComputationalDag().add_edge(0, 2); - - BspSchedule schedule_init1(instance1); - schedule_init1.setAssignedProcessor(0, 0); - schedule_init1.setAssignedSuperstep(0, 0); - schedule_init1.setAssignedProcessor(1, 0); - schedule_init1.setAssignedSuperstep(1, 1); - schedule_init1.setAssignedProcessor(2, 1); - schedule_init1.setAssignedSuperstep(2, 1); - BOOST_CHECK(schedule_init1.satisfiesPrecedenceConstraints()); - BspScheduleCS schedule_init_cs1(schedule_init1); - BOOST_CHECK(schedule_init_cs1.hasValidCommSchedule()); - - BspScheduleRecomp schedule(instance1); - GreedyRecomputer scheduler; - scheduler.computeRecompSchedule(schedule_init_cs1, schedule); - BOOST_CHECK(schedule.satisfiesConstraints()); - BOOST_CHECK(schedule.computeCosts() < schedule_init_cs1.computeCosts()); - std::cout << "Cost decrease by greedy recomp: " << schedule_init_cs1.computeCosts() << " -> " << schedule.computeCosts() +BOOST_AUTO_TEST_CASE(TestRecomputer) { + using Graph = ComputationalDagVectorImplDefUnsignedT; + + BspInstance instance1; + instance1.SetNumberOfProcessors(2); + instance1.SetCommunicationCosts(1); + instance1.SetSynchronisationCosts(1); + + instance1.GetComputationalDag().AddVertex(10, 1, 0); + instance1.GetComputationalDag().AddVertex(10, 1, 0); + instance1.GetComputationalDag().AddVertex(10, 1, 0); + instance1.GetComputationalDag().AddEdge(0, 1); + instance1.GetComputationalDag().AddEdge(0, 2); + + BspSchedule scheduleInit1(instance1); + scheduleInit1.SetAssignedProcessor(0, 0); + scheduleInit1.SetAssignedSuperstep(0, 0); + scheduleInit1.SetAssignedProcessor(1, 0); + scheduleInit1.SetAssignedSuperstep(1, 1); + scheduleInit1.SetAssignedProcessor(2, 1); + scheduleInit1.SetAssignedSuperstep(2, 1); + BOOST_CHECK(scheduleInit1.SatisfiesPrecedenceConstraints()); + BspScheduleCS scheduleInitCs1(scheduleInit1); + BOOST_CHECK(scheduleInitCs1.HasValidCommSchedule()); + + BspScheduleRecomp schedule(instance1); + GreedyRecomputer scheduler; + scheduler.ComputeRecompSchedule(scheduleInitCs1, schedule); + BOOST_CHECK(schedule.SatisfiesConstraints()); + BOOST_CHECK(schedule.ComputeCosts() < scheduleInitCs1.ComputeCosts()); + std::cout << "Cost decrease by greedy recomp: " << scheduleInitCs1.ComputeCosts() << " -> " << schedule.ComputeCosts() << std::endl; // non-toy instance - BspInstance instance2; - instance2.setNumberOfProcessors(4); - instance2.setCommunicationCosts(5); - instance2.setSynchronisationCosts(20); + BspInstance instance2; + instance2.SetNumberOfProcessors(4); + instance2.SetCommunicationCosts(5); + instance2.SetSynchronisationCosts(20); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -76,21 +76,21 @@ BOOST_AUTO_TEST_CASE(test_recomputer) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), - instance2.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), + instance2.GetComputationalDag()); BOOST_CHECK(status); - BspSchedule schedule_init2(instance2); - BspLocking greedy; - greedy.computeSchedule(schedule_init2); - BOOST_CHECK(schedule_init2.satisfiesPrecedenceConstraints()); - BspScheduleCS schedule_init_cs2(schedule_init2); - BOOST_CHECK(schedule_init_cs2.hasValidCommSchedule()); - - scheduler.computeRecompSchedule(schedule_init_cs2, schedule); - BOOST_CHECK(schedule.satisfiesConstraints()); - BOOST_CHECK(schedule.computeCosts() < schedule_init_cs2.computeCosts()); - std::cout << "Cost decrease by greedy recomp: " << schedule_init_cs2.computeCosts() << " -> " << schedule.computeCosts() + BspSchedule scheduleInit2(instance2); + BspLocking greedy; + greedy.ComputeSchedule(scheduleInit2); + BOOST_CHECK(scheduleInit2.SatisfiesPrecedenceConstraints()); + BspScheduleCS scheduleInitCs2(scheduleInit2); + BOOST_CHECK(scheduleInitCs2.HasValidCommSchedule()); + + scheduler.ComputeRecompSchedule(scheduleInitCs2, schedule); + BOOST_CHECK(schedule.SatisfiesConstraints()); + BOOST_CHECK(schedule.ComputeCosts() < scheduleInitCs2.ComputeCosts()); + std::cout << "Cost decrease by greedy recomp: " << scheduleInitCs2.ComputeCosts() << " -> " << schedule.ComputeCosts() << std::endl; } diff --git a/tests/bsp_improvementschedulers.cpp b/tests/bsp_improvementschedulers.cpp index f1695297..523650bf 100644 --- a/tests/bsp_improvementschedulers.cpp +++ b/tests/bsp_improvementschedulers.cpp @@ -36,11 +36,11 @@ limitations under the License. // void print_bsp_schedule(const BspSchedule &bsp_schedule) { // std::vector>> schedule( -// bsp_schedule.numberOfSupersteps(), -// std::vector>(bsp_schedule.getInstance().numberOfProcessors(), std::vector())); +// bsp_schedule.NumberOfSupersteps(), +// std::vector>(bsp_schedule.GetInstance().NumberOfProcessors(), std::vector())); -// for (size_t node = 0; node < bsp_schedule.getInstance().numberOfVertices(); node++) { -// schedule[bsp_schedule.assignedSuperstep(node)][bsp_schedule.assignedProcessor(node)].push_back(node); +// for (size_t node = 0; node < bsp_schedule.GetInstance().NumberOfVertices(); node++) { +// schedule[bsp_schedule.AssignedSuperstep(node)][bsp_schedule.AssignedProcessor(node)].push_back(node); // } // std::cout << std::endl << "Schedule:" << std::endl; @@ -80,9 +80,9 @@ limitations under the License. // std::cout << "Architecture: " << name_machine << std::endl; // auto [status_graph, graph] = -// FileReader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string()); +// FileReader::ReadComputationalDagHyperdagFormatDB((cwd / filename_graph).string()); // auto [status_architecture, architecture] = -// FileReader::readBspArchitecture((cwd / filename_machine).string()); +// FileReader::ReadBspArchitecture((cwd / filename_machine).string()); // if (!status_graph || !status_architecture) { @@ -94,45 +94,45 @@ limitations under the License. // RandomBadGreedy test0; -// std::pair result0 = test0.computeSchedule(instance); -// test_improver->improveSchedule(result0.second); +// std::pair result0 = test0.ComputeSchedule(instance); +// test_improver->ImproveSchedule(result0.second); // print_bsp_schedule(result0.second); // BOOST_CHECK_EQUAL(SUCCESS, result0.first); -// BOOST_CHECK(result0.second.satisfiesPrecedenceConstraints()); -// BOOST_CHECK(result0.second.hasValidCommSchedule()); +// BOOST_CHECK(result0.second.SatisfiesPrecedenceConstraints()); +// BOOST_CHECK(result0.second.HasValidCommSchedule()); // BalDMixR test1; -// std::pair result1 = test1.computeSchedule(instance); -// test_improver->improveSchedule(result1.second); +// std::pair result1 = test1.ComputeSchedule(instance); +// test_improver->ImproveSchedule(result1.second); // print_bsp_schedule(result1.second); // BOOST_CHECK_EQUAL(SUCCESS, result1.first); -// BOOST_CHECK(result1.second.satisfiesPrecedenceConstraints()); -// BOOST_CHECK(result1.second.hasValidCommSchedule()); +// BOOST_CHECK(result1.second.SatisfiesPrecedenceConstraints()); +// BOOST_CHECK(result1.second.HasValidCommSchedule()); // HDagg_simple test2; -// std::pair result2 = test2.computeSchedule(instance); -// test_improver->improveSchedule(result2.second); +// std::pair result2 = test2.ComputeSchedule(instance); +// test_improver->ImproveSchedule(result2.second); // print_bsp_schedule(result2.second); // BOOST_CHECK_EQUAL(SUCCESS, result2.first); -// BOOST_CHECK(result2.second.satisfiesPrecedenceConstraints()); -// BOOST_CHECK(result2.second.hasValidCommSchedule()); +// BOOST_CHECK(result2.second.SatisfiesPrecedenceConstraints()); +// BOOST_CHECK(result2.second.HasValidCommSchedule()); // } // } // }; -BOOST_AUTO_TEST_CASE(Hungarian_alg_process_permuter_test) { +BOOST_AUTO_TEST_CASE(HungarianAlgProcessPermuterTest) { // Hungarian_alg_process_permuter test; // run_test(&test); } @@ -145,7 +145,7 @@ BOOST_AUTO_TEST_CASE(Hungarian_alg_process_permuter_test) { // BOOST_AUTO_TEST_CASE(LKTotalCommScheduler_test) { // kl_total_comm test; -// test.setTimeLimitSeconds(10); +// test.SetTimeLimitSeconds(10); // test.set_compute_with_time_limit(true); // run_test(&test); // } diff --git a/tests/bsp_instance.cpp b/tests/bsp_instance.cpp index d87d8259..854fd8df 100644 --- a/tests/bsp_instance.cpp +++ b/tests/bsp_instance.cpp @@ -31,32 +31,32 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_1) { - BspArchitecture architecture(4, 2, 3); - computational_dag_vector_impl_def_t graph; +BOOST_AUTO_TEST_CASE(Test1) { + BspArchitecture architecture(4, 2, 3); + ComputationalDagEdgeIdxVectorImplDefT graph; BspInstance instance(graph, architecture); - BOOST_CHECK_EQUAL(instance.numberOfVertices(), 0); - BOOST_CHECK_EQUAL(instance.numberOfProcessors(), 4); - BOOST_CHECK_EQUAL(instance.synchronisationCosts(), 3); - BOOST_CHECK_EQUAL(instance.communicationCosts(), 2); + BOOST_CHECK_EQUAL(instance.NumberOfVertices(), 0); + BOOST_CHECK_EQUAL(instance.NumberOfProcessors(), 4); + BOOST_CHECK_EQUAL(instance.SynchronisationCosts(), 3); + BOOST_CHECK_EQUAL(instance.CommunicationCosts(), 2); - BspArchitecture architecture_2(6, 3, 1); + BspArchitecture architecture2(6, 3, 1); - instance.getArchitecture() = architecture_2; + instance.GetArchitecture() = architecture2; - BOOST_CHECK_EQUAL(instance.numberOfProcessors(), 6); - BOOST_CHECK_EQUAL(instance.synchronisationCosts(), 1); - BOOST_CHECK_EQUAL(instance.communicationCosts(), 3); - BOOST_CHECK_EQUAL(instance.numberOfVertices(), 0); + BOOST_CHECK_EQUAL(instance.NumberOfProcessors(), 6); + BOOST_CHECK_EQUAL(instance.SynchronisationCosts(), 1); + BOOST_CHECK_EQUAL(instance.CommunicationCosts(), 3); + BOOST_CHECK_EQUAL(instance.NumberOfVertices(), 0); } -BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { - BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(2); - instance.setSynchronisationCosts(3); +BOOST_AUTO_TEST_CASE(TestInstanceBicgstab) { + BspInstance instance; + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(2); + instance.SetSynchronisationCosts(3); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -66,97 +66,97 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), - instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), + instance.GetComputationalDag()); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertices(), 54); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertex_types(), 1); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertexTypes(), 1); - instance.getComputationalDag().set_vertex_type(0, 1); + instance.GetComputationalDag().SetVertexType(0, 1); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertex_types(), 2); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertexTypes(), 2); - instance.getArchitecture().setProcessorType(0, 1); - instance.setDiagonalCompatibilityMatrix(2); + instance.GetArchitecture().SetProcessorType(0, 1); + instance.SetDiagonalCompatibilityMatrix(2); - BOOST_CHECK_EQUAL(instance.isCompatible(0, 0), true); - BOOST_CHECK_EQUAL(instance.isCompatible(1, 0), false); + BOOST_CHECK_EQUAL(instance.IsCompatible(0, 0), true); + BOOST_CHECK_EQUAL(instance.IsCompatible(1, 0), false); CompatibleProcessorRange range(instance); - BOOST_CHECK_EQUAL(range.compatible_processors_type(0).size(), 3); - BOOST_CHECK_EQUAL(range.compatible_processors_type(1).size(), 1); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsType(0).size(), 3); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsType(1).size(), 1); std::cout << "Compatible processors type 0: " << std::endl; - for (const auto &p : range.compatible_processors_type(0)) { + for (const auto &p : range.CompatibleProcessorsType(0)) { std::cout << p; } std::cout << std::endl; std::cout << "Compatible processors type 1: " << std::endl; - for (const auto &p : range.compatible_processors_type(1)) { + for (const auto &p : range.CompatibleProcessorsType(1)) { std::cout << p; } std::cout << std::endl; - BOOST_CHECK_EQUAL(range.compatible_processors_vertex(0).size(), 1); - BOOST_CHECK_EQUAL(range.compatible_processors_vertex(1).size(), 3); - BOOST_CHECK_EQUAL(range.compatible_processors_vertex(2).size(), 3); - BOOST_CHECK_EQUAL(range.compatible_processors_vertex(3).size(), 3); - - BOOST_CHECK_EQUAL(range.compatible_processors_type(1)[0], 0); - BOOST_CHECK_EQUAL(range.compatible_processors_type(0)[0], 1); - BOOST_CHECK_EQUAL(range.compatible_processors_type(0)[1], 2); - BOOST_CHECK_EQUAL(range.compatible_processors_type(0)[2], 3); - - BspInstance instance_t2(instance); - - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); - BOOST_CHECK_EQUAL(instance_t2.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors()); - BOOST_CHECK_EQUAL(instance_t2.getArchitecture().getNumberOfProcessorTypes(), - instance.getArchitecture().getNumberOfProcessorTypes()); - BOOST_CHECK_EQUAL(instance_t2.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts()); - BOOST_CHECK_EQUAL(instance_t2.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts()); - - BspInstance instance_t3; - - instance_t3 = instance; - - BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); - BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); - BOOST_CHECK_EQUAL(instance_t3.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors()); - BOOST_CHECK_EQUAL(instance_t3.getArchitecture().getNumberOfProcessorTypes(), - instance.getArchitecture().getNumberOfProcessorTypes()); - BOOST_CHECK_EQUAL(instance_t3.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts()); - BOOST_CHECK_EQUAL(instance_t3.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts()); - - BspInstance instance_t4(std::move(instance_t3)); - - BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); - BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); - BOOST_CHECK_EQUAL(instance_t4.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors()); - BOOST_CHECK_EQUAL(instance_t4.getArchitecture().getNumberOfProcessorTypes(), - instance.getArchitecture().getNumberOfProcessorTypes()); - BOOST_CHECK_EQUAL(instance_t4.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts()); - BOOST_CHECK_EQUAL(instance_t4.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts()); - - BspInstance instance_t5; - - instance_t5 = std::move(instance_t4); - BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); - BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); - BOOST_CHECK_EQUAL(instance_t5.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors()); - BOOST_CHECK_EQUAL(instance_t5.getArchitecture().getNumberOfProcessorTypes(), - instance.getArchitecture().getNumberOfProcessorTypes()); - BOOST_CHECK_EQUAL(instance_t5.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts()); - BOOST_CHECK_EQUAL(instance_t5.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts()); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsVertex(0).size(), 1); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsVertex(1).size(), 3); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsVertex(2).size(), 3); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsVertex(3).size(), 3); + + BOOST_CHECK_EQUAL(range.CompatibleProcessorsType(1)[0], 0); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsType(0)[0], 1); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsType(0)[1], 2); + BOOST_CHECK_EQUAL(range.CompatibleProcessorsType(0)[2], 3); + + BspInstance instanceT2(instance); + + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().NumVertexTypes(), instance.GetComputationalDag().NumVertexTypes()); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().NumEdges(), instance.GetComputationalDag().NumEdges()); + BOOST_CHECK_EQUAL(instanceT2.GetArchitecture().NumberOfProcessors(), instance.GetArchitecture().NumberOfProcessors()); + BOOST_CHECK_EQUAL(instanceT2.GetArchitecture().GetNumberOfProcessorTypes(), + instance.GetArchitecture().GetNumberOfProcessorTypes()); + BOOST_CHECK_EQUAL(instanceT2.GetArchitecture().CommunicationCosts(), instance.GetArchitecture().CommunicationCosts()); + BOOST_CHECK_EQUAL(instanceT2.GetArchitecture().SynchronisationCosts(), instance.GetArchitecture().SynchronisationCosts()); + + BspInstance instanceT3; + + instanceT3 = instance; + + BOOST_CHECK_EQUAL(instanceT3.GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK_EQUAL(instanceT3.GetComputationalDag().NumVertexTypes(), instance.GetComputationalDag().NumVertexTypes()); + BOOST_CHECK_EQUAL(instanceT3.GetComputationalDag().NumEdges(), instance.GetComputationalDag().NumEdges()); + BOOST_CHECK_EQUAL(instanceT3.GetArchitecture().NumberOfProcessors(), instance.GetArchitecture().NumberOfProcessors()); + BOOST_CHECK_EQUAL(instanceT3.GetArchitecture().GetNumberOfProcessorTypes(), + instance.GetArchitecture().GetNumberOfProcessorTypes()); + BOOST_CHECK_EQUAL(instanceT3.GetArchitecture().CommunicationCosts(), instance.GetArchitecture().CommunicationCosts()); + BOOST_CHECK_EQUAL(instanceT3.GetArchitecture().SynchronisationCosts(), instance.GetArchitecture().SynchronisationCosts()); + + BspInstance instanceT4(std::move(instanceT3)); + + BOOST_CHECK_EQUAL(instanceT4.GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK_EQUAL(instanceT4.GetComputationalDag().NumVertexTypes(), instance.GetComputationalDag().NumVertexTypes()); + BOOST_CHECK_EQUAL(instanceT4.GetComputationalDag().NumEdges(), instance.GetComputationalDag().NumEdges()); + BOOST_CHECK_EQUAL(instanceT4.GetArchitecture().NumberOfProcessors(), instance.GetArchitecture().NumberOfProcessors()); + BOOST_CHECK_EQUAL(instanceT4.GetArchitecture().GetNumberOfProcessorTypes(), + instance.GetArchitecture().GetNumberOfProcessorTypes()); + BOOST_CHECK_EQUAL(instanceT4.GetArchitecture().CommunicationCosts(), instance.GetArchitecture().CommunicationCosts()); + BOOST_CHECK_EQUAL(instanceT4.GetArchitecture().SynchronisationCosts(), instance.GetArchitecture().SynchronisationCosts()); + + BspInstance instanceT5; + + instanceT5 = std::move(instanceT4); + BOOST_CHECK_EQUAL(instanceT5.GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK_EQUAL(instanceT5.GetComputationalDag().NumVertexTypes(), instance.GetComputationalDag().NumVertexTypes()); + BOOST_CHECK_EQUAL(instanceT5.GetComputationalDag().NumEdges(), instance.GetComputationalDag().NumEdges()); + BOOST_CHECK_EQUAL(instanceT5.GetArchitecture().NumberOfProcessors(), instance.GetArchitecture().NumberOfProcessors()); + BOOST_CHECK_EQUAL(instanceT5.GetArchitecture().GetNumberOfProcessorTypes(), + instance.GetArchitecture().GetNumberOfProcessorTypes()); + BOOST_CHECK_EQUAL(instanceT5.GetArchitecture().CommunicationCosts(), instance.GetArchitecture().CommunicationCosts()); + BOOST_CHECK_EQUAL(instanceT5.GetArchitecture().SynchronisationCosts(), instance.GetArchitecture().SynchronisationCosts()); } diff --git a/tests/bsp_schedule.cpp b/tests/bsp_schedule.cpp index 60cdf53e..683159c1 100644 --- a/tests/bsp_schedule.cpp +++ b/tests/bsp_schedule.cpp @@ -49,13 +49,13 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; +BOOST_AUTO_TEST_CASE(TestInstanceBicgstab) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; - BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + BspInstance instance; + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -65,72 +65,72 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { std::cout << cwd << std::endl; } - bool status = file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertex_types(), 1); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertices(), 54); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertexTypes(), 1); - std::vector *> schedulers = {new BspLocking(), - new EtfScheduler(), - new GreedyBspScheduler(), - new GreedyChildren(), - new GrowLocalAutoCores(), - new VarianceFillup()}; + std::vector *> schedulers = {new BspLocking(), + new EtfScheduler(), + new GreedyBspScheduler(), + new GreedyChildren(), + new GrowLocalAutoCores(), + new VarianceFillup()}; - std::vector expected_bsp_costs = {92, 108, 100, 108, 102, 110}; - std::vector expected_total_costs = {74, 87, 84.25, 80.25, 91.25, 86.75}; - std::vector expected_buffered_sending_costs = {92, 111, 103, 105, 102, 113}; - std::vector expected_supersteps = {6, 7, 7, 5, 3, 7}; + std::vector expectedBspCosts = {92, 108, 100, 108, 102, 110}; + std::vector expectedTotalCosts = {74, 87, 84.25, 80.25, 91.25, 86.75}; + std::vector expectedBufferedSendingCosts = {92, 111, 103, 105, 102, 113}; + std::vector expectedSupersteps = {6, 7, 7, 5, 3, 7}; - std::vector expected_bsp_cs_costs = {86, 99, 97, 99, 102, 107}; + std::vector expectedBspCsCosts = {86, 99, 97, 99, 102, 107}; size_t i = 0; for (auto &scheduler : schedulers) { - BspSchedule schedule(instance); + BspSchedule schedule(instance); - const auto result = scheduler->computeSchedule(schedule); + const auto result = scheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(schedule.computeCosts(), expected_bsp_costs[i]); - BOOST_CHECK_EQUAL(TotalCommunicationCost()(schedule), expected_total_costs[i]); - BOOST_CHECK_EQUAL(BufferedSendingCost()(schedule), expected_buffered_sending_costs[i]); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), expected_supersteps[i]); + BOOST_CHECK_EQUAL(schedule.ComputeCosts(), expectedBspCosts[i]); + BOOST_CHECK_EQUAL(TotalCommunicationCost()(schedule), expectedTotalCosts[i]); + BOOST_CHECK_EQUAL(BufferedSendingCost()(schedule), expectedBufferedSendingCosts[i]); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), expectedSupersteps[i]); - BspScheduleCS schedule_cs(instance); + BspScheduleCS scheduleCs(instance); - const auto result_cs = scheduler->computeScheduleCS(schedule_cs); + const auto resultCs = scheduler->ComputeScheduleCS(scheduleCs); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result_cs); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, resultCs); - BOOST_CHECK(schedule_cs.hasValidCommSchedule()); + BOOST_CHECK(scheduleCs.HasValidCommSchedule()); - BOOST_CHECK_EQUAL(schedule_cs.computeCosts(), expected_bsp_cs_costs[i]); + BOOST_CHECK_EQUAL(scheduleCs.ComputeCosts(), expectedBspCsCosts[i]); i++; delete scheduler; } - BspSchedule schedule(instance); - Serial serial; - const auto result = serial.computeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 1); + BspSchedule schedule(instance); + Serial serial; + const auto result = serial.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 1); } -BOOST_AUTO_TEST_CASE(test_schedule_writer) { - using graph_t1 = computational_dag_edge_idx_vector_impl_def_int_t; - using graph_t2 = computational_dag_vector_impl_def_int_t; +BOOST_AUTO_TEST_CASE(TestScheduleWriter) { + using GraphT1 = ComputationalDagEdgeIdxVectorImplDefIntT; + using GraphT2 = ComputationalDagVectorImplDefIntT; - BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + BspInstance instance; + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -140,81 +140,77 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) { std::cout << cwd << std::endl; } - bool status = file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertex_types(), 1); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertices(), 54); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertexTypes(), 1); - BspLocking scheduler; - BspSchedule schedule(instance); - const auto result = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BspLocking scheduler; + BspSchedule schedule(instance); + const auto result = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - DotFileWriter sched_writer; + DotFileWriter schedWriter; std::cout << "Writing Graph" << std::endl; - sched_writer.write_graph(std::cout, instance.getComputationalDag()); + schedWriter.WriteGraph(std::cout, instance.GetComputationalDag()); std::cout << "Writing schedule_t1" << std::endl; - sched_writer.write_schedule(std::cout, schedule); + schedWriter.WriteSchedule(std::cout, schedule); - BspInstance instance_t2(instance); - BspSchedule schedule_t2(instance_t2); + BspInstance instanceT2(instance); + BspSchedule scheduleT2(instanceT2); - BOOST_CHECK_EQUAL(schedule_t2.getInstance().getComputationalDag().num_vertices(), - instance.getComputationalDag().num_vertices()); - BOOST_CHECK(schedule_t2.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(scheduleT2.GetInstance().GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK(scheduleT2.SatisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().NumVertexTypes(), instance.GetComputationalDag().NumVertexTypes()); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().NumEdges(), instance.GetComputationalDag().NumEdges()); - for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_work_weight(v), - instance.getComputationalDag().vertex_work_weight(v)); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_comm_weight(v), - instance.getComputationalDag().vertex_comm_weight(v)); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().VertexWorkWeight(v), instance.GetComputationalDag().VertexWorkWeight(v)); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().VertexCommWeight(v), instance.GetComputationalDag().VertexCommWeight(v)); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_mem_weight(v), - instance.getComputationalDag().vertex_mem_weight(v)); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().VertexMemWeight(v), instance.GetComputationalDag().VertexMemWeight(v)); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_type(v), instance.getComputationalDag().vertex_type(v)); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().VertexType(v), instance.GetComputationalDag().VertexType(v)); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().out_degree(v), instance.getComputationalDag().out_degree(v)); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().OutDegree(v), instance.GetComputationalDag().OutDegree(v)); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().in_degree(v), instance.getComputationalDag().in_degree(v)); + BOOST_CHECK_EQUAL(instanceT2.GetComputationalDag().InDegree(v), instance.GetComputationalDag().InDegree(v)); } std::cout << "Writing schedule_t2" << std::endl; - sched_writer.write_schedule(std::cout, schedule_t2); + schedWriter.WriteSchedule(std::cout, scheduleT2); - BspScheduleRecomp schedule_recomp(schedule_t2); + BspScheduleRecomp scheduleRecomp(scheduleT2); - schedule_recomp.assignments(0).emplace_back(1, 0); - schedule_recomp.assignments(0).emplace_back(2, 0); - schedule_recomp.assignments(0).emplace_back(3, 0); + scheduleRecomp.Assignments(0).emplace_back(1, 0); + scheduleRecomp.Assignments(0).emplace_back(2, 0); + scheduleRecomp.Assignments(0).emplace_back(3, 0); std::cout << "Writing schedule_recomp" << std::endl; - sched_writer.write_schedule_recomp(std::cout, schedule_recomp); + schedWriter.WriteScheduleRecomp(std::cout, scheduleRecomp); std::cout << "Writing schedule_recomp_duplicate" << std::endl; - sched_writer.write_schedule_recomp_duplicate(std::cout, schedule_recomp); + schedWriter.WriteScheduleRecompDuplicate(std::cout, scheduleRecomp); std::cout << "Writing schedule_t2 CS" << std::endl; - BspScheduleCS schedule_cs(schedule_t2); - sched_writer.write_schedule_cs(std::cout, schedule_cs); + BspScheduleCS scheduleCs(scheduleT2); + schedWriter.WriteScheduleCS(std::cout, scheduleCs); } -BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; +BOOST_AUTO_TEST_CASE(TestBspScheduleCs) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; - BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + BspInstance instance; + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -224,132 +220,125 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { std::cout << cwd << std::endl; } - file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); + file_reader::ReadGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.GetComputationalDag()); - BspSchedule schedule(instance); - BspLocking scheduler; + BspSchedule schedule(instance); + BspLocking scheduler; - const auto result = scheduler.computeSchedule(schedule); + const auto result = scheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BspSchedule schedule_t2(schedule); + BspSchedule scheduleT2(schedule); - BOOST_CHECK_EQUAL(schedule_t2.getInstance().getComputationalDag().num_vertices(), - instance.getComputationalDag().num_vertices()); - BOOST_CHECK(schedule_t2.satisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(schedule_t2.numberOfSupersteps(), schedule.numberOfSupersteps()); + BOOST_CHECK_EQUAL(scheduleT2.GetInstance().GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK(scheduleT2.SatisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(scheduleT2.NumberOfSupersteps(), schedule.NumberOfSupersteps()); - for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t2.assignedSuperstep(v), schedule.assignedSuperstep(v)); - BOOST_CHECK_EQUAL(schedule_t2.assignedProcessor(v), schedule.assignedProcessor(v)); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + BOOST_CHECK_EQUAL(scheduleT2.AssignedSuperstep(v), schedule.AssignedSuperstep(v)); + BOOST_CHECK_EQUAL(scheduleT2.AssignedProcessor(v), schedule.AssignedProcessor(v)); } - BspSchedule schedule_t3(instance); - schedule_t3 = schedule_t2; - BOOST_CHECK_EQUAL(schedule_t3.getInstance().getComputationalDag().num_vertices(), - instance.getComputationalDag().num_vertices()); - BOOST_CHECK(schedule_t3.satisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(schedule_t3.numberOfSupersteps(), schedule.numberOfSupersteps()); + BspSchedule scheduleT3(instance); + scheduleT3 = scheduleT2; + BOOST_CHECK_EQUAL(scheduleT3.GetInstance().GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK(scheduleT3.SatisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(scheduleT3.NumberOfSupersteps(), schedule.NumberOfSupersteps()); - for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t3.assignedSuperstep(v), schedule.assignedSuperstep(v)); - BOOST_CHECK_EQUAL(schedule_t3.assignedProcessor(v), schedule.assignedProcessor(v)); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + BOOST_CHECK_EQUAL(scheduleT3.AssignedSuperstep(v), schedule.AssignedSuperstep(v)); + BOOST_CHECK_EQUAL(scheduleT3.AssignedProcessor(v), schedule.AssignedProcessor(v)); } - BspSchedule schedule_t4(instance); - schedule_t4 = std::move(schedule_t3); + BspSchedule scheduleT4(instance); + scheduleT4 = std::move(scheduleT3); - BOOST_CHECK_EQUAL(schedule_t4.getInstance().getComputationalDag().num_vertices(), - instance.getComputationalDag().num_vertices()); - BOOST_CHECK(schedule_t4.satisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(schedule_t4.numberOfSupersteps(), schedule.numberOfSupersteps()); - for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t4.assignedSuperstep(v), schedule.assignedSuperstep(v)); - BOOST_CHECK_EQUAL(schedule_t4.assignedProcessor(v), schedule.assignedProcessor(v)); + BOOST_CHECK_EQUAL(scheduleT4.GetInstance().GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK(scheduleT4.SatisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(scheduleT4.NumberOfSupersteps(), schedule.NumberOfSupersteps()); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + BOOST_CHECK_EQUAL(scheduleT4.AssignedSuperstep(v), schedule.AssignedSuperstep(v)); + BOOST_CHECK_EQUAL(scheduleT4.AssignedProcessor(v), schedule.AssignedProcessor(v)); } - BspSchedule schedule_t5(std::move(schedule_t4)); - BOOST_CHECK_EQUAL(schedule_t5.getInstance().getComputationalDag().num_vertices(), - instance.getComputationalDag().num_vertices()); - BOOST_CHECK(schedule_t5.satisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(schedule_t5.numberOfSupersteps(), schedule.numberOfSupersteps()); + BspSchedule scheduleT5(std::move(scheduleT4)); + BOOST_CHECK_EQUAL(scheduleT5.GetInstance().GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK(scheduleT5.SatisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(scheduleT5.NumberOfSupersteps(), schedule.NumberOfSupersteps()); - for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t5.assignedSuperstep(v), schedule.assignedSuperstep(v)); - BOOST_CHECK_EQUAL(schedule_t5.assignedProcessor(v), schedule.assignedProcessor(v)); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + BOOST_CHECK_EQUAL(scheduleT5.AssignedSuperstep(v), schedule.AssignedSuperstep(v)); + BOOST_CHECK_EQUAL(scheduleT5.AssignedProcessor(v), schedule.AssignedProcessor(v)); } - BspScheduleCS schedule_cs(schedule_t5); - BOOST_CHECK_EQUAL(schedule_cs.getInstance().getComputationalDag().num_vertices(), - instance.getComputationalDag().num_vertices()); - BOOST_CHECK(schedule_cs.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule_cs.hasValidCommSchedule()); - BOOST_CHECK_EQUAL(schedule_cs.numberOfSupersteps(), schedule.numberOfSupersteps()); + BspScheduleCS scheduleCs(scheduleT5); + BOOST_CHECK_EQUAL(scheduleCs.GetInstance().GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK(scheduleCs.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(scheduleCs.HasValidCommSchedule()); + BOOST_CHECK_EQUAL(scheduleCs.NumberOfSupersteps(), schedule.NumberOfSupersteps()); - for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_cs.assignedSuperstep(v), schedule.assignedSuperstep(v)); - BOOST_CHECK_EQUAL(schedule_cs.assignedProcessor(v), schedule.assignedProcessor(v)); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + BOOST_CHECK_EQUAL(scheduleCs.AssignedSuperstep(v), schedule.AssignedSuperstep(v)); + BOOST_CHECK_EQUAL(scheduleCs.AssignedProcessor(v), schedule.AssignedProcessor(v)); } // schedule_t5 is still valid - BOOST_CHECK_EQUAL(schedule_t5.getInstance().getComputationalDag().num_vertices(), - instance.getComputationalDag().num_vertices()); - BOOST_CHECK(schedule_t5.satisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(schedule_t5.numberOfSupersteps(), schedule.numberOfSupersteps()); - - for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t5.assignedSuperstep(v), schedule.assignedSuperstep(v)); - BOOST_CHECK_EQUAL(schedule_t5.assignedProcessor(v), schedule.assignedProcessor(v)); + BOOST_CHECK_EQUAL(scheduleT5.GetInstance().GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK(scheduleT5.SatisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(scheduleT5.NumberOfSupersteps(), schedule.NumberOfSupersteps()); + + for (const auto &v : instance.GetComputationalDag().Vertices()) { + BOOST_CHECK_EQUAL(scheduleT5.AssignedSuperstep(v), schedule.AssignedSuperstep(v)); + BOOST_CHECK_EQUAL(scheduleT5.AssignedProcessor(v), schedule.AssignedProcessor(v)); } - BspScheduleCS schedule_cs_t2(std::move(schedule_t5)); - BOOST_CHECK_EQUAL(schedule_cs_t2.getInstance().getComputationalDag().num_vertices(), - instance.getComputationalDag().num_vertices()); - BOOST_CHECK(schedule_cs_t2.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule_cs_t2.hasValidCommSchedule()); - BOOST_CHECK_EQUAL(schedule_cs_t2.numberOfSupersteps(), schedule.numberOfSupersteps()); + BspScheduleCS scheduleCsT2(std::move(scheduleT5)); + BOOST_CHECK_EQUAL(scheduleCsT2.GetInstance().GetComputationalDag().NumVertices(), instance.GetComputationalDag().NumVertices()); + BOOST_CHECK(scheduleCsT2.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(scheduleCsT2.HasValidCommSchedule()); + BOOST_CHECK_EQUAL(scheduleCsT2.NumberOfSupersteps(), schedule.NumberOfSupersteps()); - for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_cs_t2.assignedSuperstep(v), schedule.assignedSuperstep(v)); - BOOST_CHECK_EQUAL(schedule_cs_t2.assignedProcessor(v), schedule.assignedProcessor(v)); + for (const auto &v : instance.GetComputationalDag().Vertices()) { + BOOST_CHECK_EQUAL(scheduleCsT2.AssignedSuperstep(v), schedule.AssignedSuperstep(v)); + BOOST_CHECK_EQUAL(scheduleCsT2.AssignedProcessor(v), schedule.AssignedProcessor(v)); } } -BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; +BOOST_AUTO_TEST_CASE(TestMaxBspSchedule) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; - BspInstance instance; - instance.setNumberOfProcessors(2); - instance.setCommunicationCosts(10); // g=10 - instance.setSynchronisationCosts(100); // l=100 (not used in MaxBspSchedule cost model) + BspInstance instance; + instance.SetNumberOfProcessors(2); + instance.SetCommunicationCosts(10); // g=10 + instance.SetSynchronisationCosts(100); // l=100 (not used in MaxBspSchedule cost model) - auto &dag = instance.getComputationalDag(); - dag.add_vertex(10, 1, 0); // Node 0 - dag.add_vertex(5, 2, 0); // Node 1 - dag.add_vertex(5, 3, 0); // Node 2 - dag.add_vertex(10, 4, 0); // Node 3 - dag.add_edge(0, 1); - dag.add_edge(0, 2); - dag.add_edge(1, 3); - dag.add_edge(2, 3); + auto &dag = instance.GetComputationalDag(); + dag.AddVertex(10, 1, 0); // Node 0 + dag.AddVertex(5, 2, 0); // Node 1 + dag.AddVertex(5, 3, 0); // Node 2 + dag.AddVertex(10, 4, 0); // Node 3 + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); + dag.AddEdge(1, 3); + dag.AddEdge(2, 3); // Test a valid schedule with staleness = 2 { - MaxBspSchedule schedule(instance); - schedule.setAssignedProcessor(0, 0); - schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedProcessor(1, 0); - schedule.setAssignedSuperstep(1, 1); - schedule.setAssignedProcessor(2, 1); - schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 - schedule.setAssignedProcessor(3, 0); - schedule.setAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2 - schedule.updateNumberOfSupersteps(); - - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + MaxBspSchedule schedule(instance); + schedule.SetAssignedProcessor(0, 0); + schedule.SetAssignedSuperstep(0, 0); + schedule.SetAssignedProcessor(1, 0); + schedule.SetAssignedSuperstep(1, 1); + schedule.SetAssignedProcessor(2, 1); + schedule.SetAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 + schedule.SetAssignedProcessor(3, 0); + schedule.SetAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2 + schedule.UpdateNumberOfSupersteps(); + + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); // Manual cost calculation: // Superstep 0: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10. @@ -358,23 +347,23 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) { // = {0, 0} -> max_work = 0. comm from SS2: 2->3 (P1->P0) needed at SS4, comm sent in SS2. comm=3*10=30. Cost = max(0,l+30) = 130. // Superstep 4: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10. // Total cost = 10 + 110 + 5 + 130 + 10 = 265 - BOOST_CHECK_EQUAL(schedule.computeCosts(), 265); + BOOST_CHECK_EQUAL(schedule.ComputeCosts(), 265); } // Test another valid schedule { - MaxBspSchedule schedule(instance); - schedule.setAssignedProcessor(0, 0); - schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedProcessor(1, 1); - schedule.setAssignedSuperstep(1, 2); // 0->1 is cross-proc, 2 >= 0+2 - schedule.setAssignedProcessor(2, 1); - schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 - schedule.setAssignedProcessor(3, 0); - schedule.setAssignedSuperstep(3, 4); // 1->3, 2->3 are cross-proc, 4 >= 2+2 - schedule.updateNumberOfSupersteps(); - - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + MaxBspSchedule schedule(instance); + schedule.SetAssignedProcessor(0, 0); + schedule.SetAssignedSuperstep(0, 0); + schedule.SetAssignedProcessor(1, 1); + schedule.SetAssignedSuperstep(1, 2); // 0->1 is cross-proc, 2 >= 0+2 + schedule.SetAssignedProcessor(2, 1); + schedule.SetAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 + schedule.SetAssignedProcessor(3, 0); + schedule.SetAssignedSuperstep(3, 4); // 1->3, 2->3 are cross-proc, 4 >= 2+2 + schedule.UpdateNumberOfSupersteps(); + + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); // Manual cost calculation: // Superstep 0: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10. @@ -383,60 +372,60 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) { // Superstep 3: work = {0, 0} -> max_work = 0. comm from SS2: 1->3, 2->3 (P1->P0) needed at SS4, comm sent in SS2. // comm=(2+3)*10=50. Cost = max(0,l+50)=150. Superstep 4: work = {10, 0} -> max_work = 10. Cost = max(10, 0) = 10. Total // cost = 10 + 110 + 10 + 150 + 10 = 290 - BOOST_CHECK_EQUAL(schedule.computeCosts(), 290); + BOOST_CHECK_EQUAL(schedule.ComputeCosts(), 290); } // Test an invalid schedule (violates staleness=2) { - MaxBspSchedule schedule(instance); - schedule.setAssignedProcessor(0, 0); - schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedProcessor(1, 1); // 0->1 on different procs - schedule.setAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1) - schedule.updateNumberOfSupersteps(); - - BOOST_CHECK(!schedule.satisfiesPrecedenceConstraints()); + MaxBspSchedule schedule(instance); + schedule.SetAssignedProcessor(0, 0); + schedule.SetAssignedSuperstep(0, 0); + schedule.SetAssignedProcessor(1, 1); // 0->1 on different procs + schedule.SetAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1) + schedule.UpdateNumberOfSupersteps(); + + BOOST_CHECK(!schedule.SatisfiesPrecedenceConstraints()); } } -BOOST_AUTO_TEST_CASE(test_max_bsp_schedule_cs) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; +BOOST_AUTO_TEST_CASE(TestMaxBspScheduleCs) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; - BspInstance instance; - instance.setNumberOfProcessors(2); - instance.setCommunicationCosts(10); // g=10 - instance.setSynchronisationCosts(100); // l=100 + BspInstance instance; + instance.SetNumberOfProcessors(2); + instance.SetCommunicationCosts(10); // g=10 + instance.SetSynchronisationCosts(100); // l=100 - auto &dag = instance.getComputationalDag(); - dag.add_vertex(10, 1, 0); // Node 0 - dag.add_vertex(5, 2, 0); // Node 1 - dag.add_vertex(5, 3, 0); // Node 2 - dag.add_vertex(10, 4, 0); // Node 3 - dag.add_edge(0, 1); - dag.add_edge(0, 2); - dag.add_edge(1, 3); - dag.add_edge(2, 3); + auto &dag = instance.GetComputationalDag(); + dag.AddVertex(10, 1, 0); // Node 0 + dag.AddVertex(5, 2, 0); // Node 1 + dag.AddVertex(5, 3, 0); // Node 2 + dag.AddVertex(10, 4, 0); // Node 3 + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); + dag.AddEdge(1, 3); + dag.AddEdge(2, 3); // Test a valid schedule with staleness = 2 { - MaxBspScheduleCS schedule(instance); - schedule.setAssignedProcessor(0, 0); - schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedProcessor(1, 0); - schedule.setAssignedSuperstep(1, 1); - schedule.setAssignedProcessor(2, 1); - schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 - schedule.setAssignedProcessor(3, 0); - schedule.setAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2 - schedule.updateNumberOfSupersteps(); - - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + MaxBspScheduleCS schedule(instance); + schedule.SetAssignedProcessor(0, 0); + schedule.SetAssignedSuperstep(0, 0); + schedule.SetAssignedProcessor(1, 0); + schedule.SetAssignedSuperstep(1, 1); + schedule.SetAssignedProcessor(2, 1); + schedule.SetAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 + schedule.SetAssignedProcessor(3, 0); + schedule.SetAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2 + schedule.UpdateNumberOfSupersteps(); + + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); // Set communication schedule (eager) - schedule.addCommunicationScheduleEntry(0, 0, 1, 0); // 0->2 (P0->P1) sent in SS0 - schedule.addCommunicationScheduleEntry(2, 1, 0, 2); // 2->3 (P1->P0) sent in SS2 + schedule.AddCommunicationScheduleEntry(0, 0, 1, 0); // 0->2 (P0->P1) sent in SS0 + schedule.AddCommunicationScheduleEntry(2, 1, 0, 2); // 2->3 (P1->P0) sent in SS2 - BOOST_CHECK(schedule.hasValidCommSchedule()); + BOOST_CHECK(schedule.HasValidCommSchedule()); // Manual cost calculation: // SS0: work={10,0}, max_work=10. comm_send(P0)=1, comm_rec(P1)=0. max_comm_h=1. Cost=max(10, 0)=10. @@ -445,18 +434,18 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule_cs) { // SS3: work={0,0}, max_work=0. comm from SS2: h=3, cost=3*10=30. Cost=max(0,30)+l=30+100=130. // SS4: work={10,0}, max_work=10. comm from SS3: h=0, cost=0. Cost=max(10,0)=10. // Total cost = 10 + 110 + 5 + 130 + 10 = 265 - BOOST_CHECK_EQUAL(schedule.computeCosts(), 265); + BOOST_CHECK_EQUAL(schedule.ComputeCosts(), 265); } // Test an invalid schedule (violates staleness=2) { - MaxBspScheduleCS schedule(instance); - schedule.setAssignedProcessor(0, 0); - schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedProcessor(1, 1); // 0->1 on different procs - schedule.setAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1) - schedule.updateNumberOfSupersteps(); - - BOOST_CHECK(!schedule.satisfiesPrecedenceConstraints()); + MaxBspScheduleCS schedule(instance); + schedule.SetAssignedProcessor(0, 0); + schedule.SetAssignedSuperstep(0, 0); + schedule.SetAssignedProcessor(1, 1); // 0->1 on different procs + schedule.SetAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1) + schedule.UpdateNumberOfSupersteps(); + + BOOST_CHECK(!schedule.SatisfiesPrecedenceConstraints()); } } diff --git a/tests/bsp_schedule_recomp.cpp b/tests/bsp_schedule_recomp.cpp index 6d1ce9f1..8c1c0612 100644 --- a/tests/bsp_schedule_recomp.cpp +++ b/tests/bsp_schedule_recomp.cpp @@ -28,13 +28,13 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) { - using graph = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(BspScheduleRecompTest) { + using Graph = ComputationalDagVectorImplDefUnsignedT; - BspInstance instance; - instance.setNumberOfProcessors(3); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + BspInstance instance; + instance.SetNumberOfProcessors(3); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -44,27 +44,27 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) { std::cout << cwd << std::endl; } - file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), - instance.getComputationalDag()); + file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), + instance.GetComputationalDag()); - BspSchedule schedule(instance); - GreedyBspScheduler scheduler; - const auto result = scheduler.computeSchedule(schedule); + BspSchedule schedule(instance); + GreedyBspScheduler scheduler; + const auto result = scheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); BspScheduleCS scheduleCS(schedule); - BspScheduleRecomp schedule_recomp(schedule); + BspScheduleRecomp scheduleRecomp(schedule); - BOOST_CHECK(schedule_recomp.satisfiesConstraints()); - BOOST_CHECK_EQUAL(schedule_recomp.getTotalAssignments(), instance.numberOfVertices()); - BOOST_CHECK_EQUAL(schedule_recomp.computeWorkCosts(), schedule.computeWorkCosts()); - BOOST_CHECK_EQUAL(schedule_recomp.computeCosts(), scheduleCS.computeCosts()); + BOOST_CHECK(scheduleRecomp.SatisfiesConstraints()); + BOOST_CHECK_EQUAL(scheduleRecomp.GetTotalAssignments(), instance.NumberOfVertices()); + BOOST_CHECK_EQUAL(scheduleRecomp.ComputeWorkCosts(), schedule.ComputeWorkCosts()); + BOOST_CHECK_EQUAL(scheduleRecomp.ComputeCosts(), scheduleCS.ComputeCosts()); - BspScheduleRecomp schedule_recomp_from_cs(scheduleCS); - BOOST_CHECK(schedule_recomp_from_cs.satisfiesConstraints()); - BOOST_CHECK_EQUAL(schedule_recomp_from_cs.computeCosts(), scheduleCS.computeCosts()); + BspScheduleRecomp scheduleRecompFromCs(scheduleCS); + BOOST_CHECK(scheduleRecompFromCs.SatisfiesConstraints()); + BOOST_CHECK_EQUAL(scheduleRecompFromCs.ComputeCosts(), scheduleCS.ComputeCosts()); } diff --git a/tests/bsp_schedulers.cpp b/tests/bsp_schedulers.cpp index d9f16d41..a25a011a 100644 --- a/tests/bsp_schedulers.cpp +++ b/tests/bsp_schedulers.cpp @@ -48,13 +48,13 @@ limitations under the License. using namespace osp; -std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } +std::vector TestArchitectures() { return {"data/machine_params/p3.arch"}; } -template -void run_test(Scheduler *test_scheduler) { +template +void RunTest(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); - std::vector filenames_architectures = test_architectures(); + std::vector filenamesGraph = TinySpaaGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -64,42 +64,42 @@ void run_test(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; - std::cout << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Scheduler: " << testScheduler->GetScheduleName() << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); } } } -template -void run_test_2(Scheduler *test_scheduler) { +template +void RunTest2(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); - std::vector filenames_architectures = test_architectures(); + std::vector filenamesGraph = TinySpaaGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -109,239 +109,239 @@ void run_test_2(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; - std::cout << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Scheduler: " << testScheduler->GetScheduleName() << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - computational_dag_edge_idx_vector_impl_def_t graph; - BspArchitecture arch; + ComputationalDagEdgeIdxVectorImplDefT graph; + BspArchitecture arch; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), graph); + bool statusArchitecture = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspInstance instance(graph, arch); + BspInstance instance(graph, arch); - BspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); } } } -BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test) { - GreedyBspScheduler test; - run_test(&test); +BOOST_AUTO_TEST_CASE(GreedyBspSchedulerTest) { + GreedyBspScheduler test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test_2) { - GreedyBspScheduler test; - run_test(&test); +BOOST_AUTO_TEST_CASE(GreedyBspSchedulerTest2) { + GreedyBspScheduler test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(Serial_test) { - Serial test; - run_test(&test); +BOOST_AUTO_TEST_CASE(SerialTest) { + Serial test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(cilk_test_1) { - CilkScheduler test; - test.setMode(CILK); - run_test(&test); +BOOST_AUTO_TEST_CASE(CilkTest1) { + CilkScheduler test; + test.SetMode(CILK); + RunTest(&test); } -BOOST_AUTO_TEST_CASE(cilk_test_2) { - CilkScheduler test; - test.setMode(SJF); - run_test(&test); +BOOST_AUTO_TEST_CASE(CilkTest2) { + CilkScheduler test; + test.SetMode(SJF); + RunTest(&test); } -BOOST_AUTO_TEST_CASE(etf_test) { - EtfScheduler test; - run_test(&test); +BOOST_AUTO_TEST_CASE(EtfTest) { + EtfScheduler test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(random_test) { - RandomGreedy test; - run_test(&test); +BOOST_AUTO_TEST_CASE(RandomTest) { + RandomGreedy test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(children_test) { - GreedyChildren test; - run_test(&test); +BOOST_AUTO_TEST_CASE(ChildrenTest) { + GreedyChildren test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(locking_test) { - BspLocking test; - run_test(&test); +BOOST_AUTO_TEST_CASE(LockingTest) { + BspLocking test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(variancefillup_test) { - VarianceFillup test; - run_test(&test); +BOOST_AUTO_TEST_CASE(VariancefillupTest) { + VarianceFillup test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(etf_test_edge_desc_impl) { - EtfScheduler test; - run_test(&test); +BOOST_AUTO_TEST_CASE(EtfTestEdgeDescImpl) { + EtfScheduler test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(grow_local_auto_test_edge_desc_impl) { - GrowLocalAutoCores test; - run_test(&test); +BOOST_AUTO_TEST_CASE(GrowLocalAutoTestEdgeDescImpl) { + GrowLocalAutoCores test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(grow_local_auto_parallel_top_test_1) { +BOOST_AUTO_TEST_CASE(GrowLocalAutoParallelTopTest1) { { - using Graph_t = computational_dag_vector_impl_def_t; - GrowLocalAutoCoresParallel_Params, v_workw_t> params; + using GraphT = ComputationalDagVectorImplDefUnsignedT; + GrowLocalAutoCoresParallelParams, VWorkwT> params; - params.numThreads = 1; + params.numThreads_ = 1; - GrowLocalAutoCoresParallel test(params); - run_test(&test); + GrowLocalAutoCoresParallel test(params); + RunTest(&test); } } -BOOST_AUTO_TEST_CASE(grow_local_auto_parallel_top_test_2) { +BOOST_AUTO_TEST_CASE(GrowLocalAutoParallelTopTest2) { { - using Graph_t = computational_dag_vector_impl_def_t; - GrowLocalAutoCoresParallel_Params, v_workw_t> params; + using GraphT = ComputationalDagVectorImplDefUnsignedT; + GrowLocalAutoCoresParallelParams, VWorkwT> params; - params.numThreads = 2; + params.numThreads_ = 2; - GrowLocalAutoCoresParallel test(params); - run_test(&test); + GrowLocalAutoCoresParallel test(params); + RunTest(&test); } } -BOOST_AUTO_TEST_CASE(grow_local_auto_parallel_top_test_5) { +BOOST_AUTO_TEST_CASE(GrowLocalAutoParallelTopTest5) { { - using Graph_t = computational_dag_vector_impl_def_t; - GrowLocalAutoCoresParallel_Params, v_workw_t> params; + using GraphT = ComputationalDagVectorImplDefUnsignedT; + GrowLocalAutoCoresParallelParams, VWorkwT> params; - params.numThreads = 5; + params.numThreads_ = 5; - GrowLocalAutoCoresParallel test(params); - run_test(&test); + GrowLocalAutoCoresParallel test(params); + RunTest(&test); } } -BOOST_AUTO_TEST_CASE(grow_local_auto_parallel_test_1) { +BOOST_AUTO_TEST_CASE(GrowLocalAutoParallelTest1) { { - using Graph_t = Compact_Sparse_Graph; - GrowLocalAutoCoresParallel_Params, v_workw_t> params; + using GraphT = CompactSparseGraph; + GrowLocalAutoCoresParallelParams, VWorkwT> params; - params.numThreads = 1; + params.numThreads_ = 1; - GrowLocalAutoCoresParallel test(params); - run_test_2(&test); + GrowLocalAutoCoresParallel test(params); + RunTest2(&test); } } -BOOST_AUTO_TEST_CASE(grow_local_auto_parallel_test_2) { +BOOST_AUTO_TEST_CASE(GrowLocalAutoParallelTest2) { { - using Graph_t = Compact_Sparse_Graph; - GrowLocalAutoCoresParallel_Params, v_workw_t> params; + using GraphT = CompactSparseGraph; + GrowLocalAutoCoresParallelParams, VWorkwT> params; - params.numThreads = 2; + params.numThreads_ = 2; - GrowLocalAutoCoresParallel test(params); - run_test_2(&test); + GrowLocalAutoCoresParallel test(params); + RunTest2(&test); } } -BOOST_AUTO_TEST_CASE(grow_local_auto_parallel_test_5) { +BOOST_AUTO_TEST_CASE(GrowLocalAutoParallelTest5) { { - using Graph_t = Compact_Sparse_Graph; - GrowLocalAutoCoresParallel_Params, v_workw_t> params; + using GraphT = CompactSparseGraph; + GrowLocalAutoCoresParallelParams, VWorkwT> params; - params.numThreads = 5; + params.numThreads_ = 5; - GrowLocalAutoCoresParallel test(params); - run_test_2(&test); + GrowLocalAutoCoresParallel test(params); + RunTest2(&test); } } -BOOST_AUTO_TEST_CASE(VariancePartitioner_test) { - VariancePartitioner test_linear; - run_test(&test_linear); +BOOST_AUTO_TEST_CASE(VariancePartitionerTest) { + VariancePartitioner testLinear; + RunTest(&testLinear); - VariancePartitioner test_flat; - run_test(&test_flat); + VariancePartitioner testFlat; + RunTest(&testFlat); - VariancePartitioner test_superstep; - run_test(&test_superstep); + VariancePartitioner testSuperstep; + RunTest(&testSuperstep); - VariancePartitioner test_global; - run_test(&test_global); + VariancePartitioner testGlobal; + RunTest(&testGlobal); } -BOOST_AUTO_TEST_CASE(LightEdgeVariancePartitioner_test) { - LightEdgeVariancePartitioner test_linear; - run_test(&test_linear); +BOOST_AUTO_TEST_CASE(LightEdgeVariancePartitionerTest) { + LightEdgeVariancePartitioner testLinear; + RunTest(&testLinear); - LightEdgeVariancePartitioner test_flat; - run_test(&test_flat); + LightEdgeVariancePartitioner testFlat; + RunTest(&testFlat); - LightEdgeVariancePartitioner test_superstep; - run_test(&test_superstep); + LightEdgeVariancePartitioner testSuperstep; + RunTest(&testSuperstep); - LightEdgeVariancePartitioner test_global; - run_test(&test_global); + LightEdgeVariancePartitioner testGlobal; + RunTest(&testGlobal); } -BOOST_AUTO_TEST_CASE(SquashAMul_test) { - GreedyBspScheduler sched; +BOOST_AUTO_TEST_CASE(SquashAMulTest) { + GreedyBspScheduler sched; - SquashAMul ml_coarsen; - MultilevelCoarseAndSchedule - coarsen_test(sched, ml_coarsen); + SquashAMul mlCoarsen; + MultilevelCoarseAndSchedule coarsenTest( + sched, mlCoarsen); - run_test(&coarsen_test); + RunTest(&coarsenTest); } -BOOST_AUTO_TEST_CASE(SquashAMul_improver_test) { - GreedyBspScheduler sched; - HillClimbingScheduler improver; +BOOST_AUTO_TEST_CASE(SquashAMulImproverTest) { + GreedyBspScheduler sched; + HillClimbingScheduler improver; - SquashAMul ml_coarsen; - MultilevelCoarseAndSchedule - coarsen_test(sched, improver, ml_coarsen); + SquashAMul mlCoarsen; + MultilevelCoarseAndSchedule coarsenTest( + sched, improver, mlCoarsen); - run_test(&coarsen_test); + RunTest(&coarsenTest); } -BOOST_AUTO_TEST_CASE(SarkarMul_test) { - GreedyBspScheduler sched; +BOOST_AUTO_TEST_CASE(SarkarMulTest) { + GreedyBspScheduler sched; - SarkarMul ml_coarsen; - MultilevelCoarseAndSchedule - coarsen_test(sched, ml_coarsen); + SarkarMul mlCoarsen; + MultilevelCoarseAndSchedule coarsenTest( + sched, mlCoarsen); - run_test(&coarsen_test); + RunTest(&coarsenTest); } -BOOST_AUTO_TEST_CASE(SarkarMul_improver_test) { - GreedyBspScheduler sched; - HillClimbingScheduler improver; +BOOST_AUTO_TEST_CASE(SarkarMulImproverTest) { + GreedyBspScheduler sched; + HillClimbingScheduler improver; - SarkarMul ml_coarsen; - MultilevelCoarseAndSchedule - coarsen_test(sched, improver, ml_coarsen); + SarkarMul mlCoarsen; + MultilevelCoarseAndSchedule coarsenTest( + sched, improver, mlCoarsen); - run_test(&coarsen_test); + RunTest(&coarsenTest); } diff --git a/tests/bsp_schedulers_mem_const.cpp b/tests/bsp_schedulers_mem_const.cpp index f1f39a34..0e0c0fda 100644 --- a/tests/bsp_schedulers_mem_const.cpp +++ b/tests/bsp_schedulers_mem_const.cpp @@ -31,8 +31,6 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" #include "osp/bsp/scheduler/Serial.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" @@ -40,24 +38,24 @@ limitations under the License. using namespace osp; -std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } +std::vector TestArchitectures() { return {"data/machine_params/p3.arch"}; } -template -void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; - int comm_weight = 1; +template +void AddMemWeights(GraphT &dag) { + int memWeight = 1; + int commWeight = 1; - for (const auto &v : dag.vertices()) { - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 3 + 1)); - dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 3 + 1)); + for (const auto &v : dag.Vertices()) { + dag.SetVertexMemWeight(v, static_cast>(memWeight++ % 3 + 1)); + dag.SetVertexCommWeight(v, static_cast>(commWeight++ % 3 + 1)); } } -template -void run_test_local_memory(Scheduler *test_scheduler) { +template +void RunTestLocalMemory(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = test_graphs(); - std::vector filenames_architectures = test_architectures(); + std::vector filenamesGraph = TestGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -67,54 +65,54 @@ void run_test_local_memory(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; - std::cout << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Scheduler: " << testScheduler->GetScheduleName() << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), + instance.GetComputationalDag()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - add_mem_weights(instance.getComputationalDag()); - instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL); + AddMemWeights(instance.GetComputationalDag()); + instance.GetArchitecture().SetMemoryConstraintType(MemoryConstraintType::LOCAL); std::cout << "Memory constraint type: LOCAL" << std::endl; - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - const std::vector> bounds_to_test = {10, 20, 50, 100}; + const std::vector> boundsToTest = {10, 20, 50, 100}; - for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); + for (const auto &bound : boundsToTest) { + instance.GetArchitecture().SetMemoryBound(bound); - BspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK(RETURN_STATUS::OSP_SUCCESS == result || RETURN_STATUS::BEST_FOUND == result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesMemoryConstraints()); + BOOST_CHECK(ReturnStatus::OSP_SUCCESS == result || ReturnStatus::BEST_FOUND == result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesMemoryConstraints()); } } } } -template -void run_test_persistent_transient_memory(Scheduler *test_scheduler) { +template +void RunTestPersistentTransientMemory(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = test_graphs(); - std::vector filenames_architectures = test_architectures(); + std::vector filenamesGraph = TestGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -124,54 +122,54 @@ void run_test_persistent_transient_memory(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; - std::cout << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Scheduler: " << testScheduler->GetScheduleName() << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), + instance.GetComputationalDag()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - add_mem_weights(instance.getComputationalDag()); - instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT); + AddMemWeights(instance.GetComputationalDag()); + instance.GetArchitecture().SetMemoryConstraintType(MemoryConstraintType::PERSISTENT_AND_TRANSIENT); std::cout << "Memory constraint type: PERSISTENT_AND_TRANSIENT" << std::endl; - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - const std::vector> bounds_to_test = {50, 100}; + const std::vector> boundsToTest = {50, 100}; - for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); + for (const auto &bound : boundsToTest) { + instance.GetArchitecture().SetMemoryBound(bound); - BspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesMemoryConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesMemoryConstraints()); } } } } -template -void run_test_local_in_out_memory(Scheduler *test_scheduler) { +template +void RunTestLocalInOutMemory(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = test_graphs(); - std::vector filenames_architectures = test_architectures(); + std::vector filenamesGraph = TestGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -181,54 +179,54 @@ void run_test_local_in_out_memory(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; - std::cout << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Scheduler: " << testScheduler->GetScheduleName() << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), + instance.GetComputationalDag()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - add_mem_weights(instance.getComputationalDag()); - instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT); + AddMemWeights(instance.GetComputationalDag()); + instance.GetArchitecture().SetMemoryConstraintType(MemoryConstraintType::LOCAL_IN_OUT); std::cout << "Memory constraint type: LOCAL_IN_OUT" << std::endl; - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - const std::vector> bounds_to_test = {10, 20, 50, 100}; + const std::vector> boundsToTest = {10, 20, 50, 100}; - for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); + for (const auto &bound : boundsToTest) { + instance.GetArchitecture().SetMemoryBound(bound); - BspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesMemoryConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesMemoryConstraints()); } } } } -template -void run_test_local_inc_edges_memory(Scheduler *test_scheduler) { +template +void RunTestLocalIncEdgesMemory(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = test_graphs(); - std::vector filenames_architectures = test_architectures(); + std::vector filenamesGraph = TestGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -238,54 +236,54 @@ void run_test_local_inc_edges_memory(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; - std::cout << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Scheduler: " << testScheduler->GetScheduleName() << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), + instance.GetComputationalDag()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - add_mem_weights(instance.getComputationalDag()); - instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES); + AddMemWeights(instance.GetComputationalDag()); + instance.GetArchitecture().SetMemoryConstraintType(MemoryConstraintType::LOCAL_INC_EDGES); std::cout << "Memory constraint type: LOCAL_INC_EDGES" << std::endl; - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - const std::vector> bounds_to_test = {50, 100}; + const std::vector> boundsToTest = {50, 100}; - for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); + for (const auto &bound : boundsToTest) { + instance.GetArchitecture().SetMemoryBound(bound); - BspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesMemoryConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesMemoryConstraints()); } } } } -template -void run_test_local_inc_edges_2_memory(Scheduler *test_scheduler) { +template +void RunTestLocalIncEdges2Memory(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = test_graphs(); - std::vector filenames_architectures = test_architectures(); + std::vector filenamesGraph = TestGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -295,227 +293,223 @@ void run_test_local_inc_edges_2_memory(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; - std::cout << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Scheduler: " << testScheduler->GetScheduleName() << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), + instance.GetComputationalDag()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - add_mem_weights(instance.getComputationalDag()); - instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES); + AddMemWeights(instance.GetComputationalDag()); + instance.GetArchitecture().SetMemoryConstraintType(MemoryConstraintType::LOCAL_SOURCES_INC_EDGES); std::cout << "Memory constraint type: LOCAL_SOURCES_INC_EDGES" << std::endl; - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - const std::vector> bounds_to_test = {20, 50, 100}; + const std::vector> boundsToTest = {20, 50, 100}; - for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); + for (const auto &bound : boundsToTest) { + instance.GetArchitecture().SetMemoryBound(bound); - BspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesMemoryConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesMemoryConstraints()); } } } } -BOOST_AUTO_TEST_CASE(GreedyBspScheduler_local_test) { - using graph_impl_t = computational_dag_edge_idx_vector_impl_def_int_t; +BOOST_AUTO_TEST_CASE(GreedyBspSchedulerLocalTest) { + using GraphImplT = ComputationalDagEdgeIdxVectorImplDefIntT; - GreedyBspScheduler> test_1; - run_test_local_memory(&test_1); + GreedyBspScheduler> test1; + RunTestLocalMemory(&test1); - GreedyBspScheduler> test_2; - run_test_local_in_out_memory(&test_2); + GreedyBspScheduler> test2; + RunTestLocalInOutMemory(&test2); - GreedyBspScheduler> test_3; - run_test_local_inc_edges_memory(&test_3); + GreedyBspScheduler> test3; + RunTestLocalIncEdgesMemory(&test3); - GreedyBspScheduler> test_4; - run_test_local_inc_edges_2_memory(&test_4); + GreedyBspScheduler> test4; + RunTestLocalIncEdges2Memory(&test4); } -BOOST_AUTO_TEST_CASE(GrowLocalAutoCores_local_test) { - using graph_impl_t = computational_dag_edge_idx_vector_impl_def_int_t; +BOOST_AUTO_TEST_CASE(GrowLocalAutoCoresLocalTest) { + using GraphImplT = ComputationalDagEdgeIdxVectorImplDefIntT; - GrowLocalAutoCores> test_1; - run_test_local_memory(&test_1); + GrowLocalAutoCores> test1; + RunTestLocalMemory(&test1); - GrowLocalAutoCores> test_2; - run_test_local_in_out_memory(&test_2); + GrowLocalAutoCores> test2; + RunTestLocalInOutMemory(&test2); - GrowLocalAutoCores> test_3; - run_test_local_inc_edges_memory(&test_3); + GrowLocalAutoCores> test3; + RunTestLocalIncEdgesMemory(&test3); - GrowLocalAutoCores> test_4; - run_test_local_inc_edges_2_memory(&test_4); + GrowLocalAutoCores> test4; + RunTestLocalIncEdges2Memory(&test4); } -BOOST_AUTO_TEST_CASE(BspLocking_local_test) { - using graph_impl_t = computational_dag_edge_idx_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(BspLockingLocalTest) { + using GraphImplT = ComputationalDagEdgeIdxVectorImplDefT; - BspLocking> test_1; - run_test_local_memory(&test_1); + BspLocking> test1; + RunTestLocalMemory(&test1); - BspLocking> test_2; - run_test_local_in_out_memory(&test_2); + BspLocking> test2; + RunTestLocalInOutMemory(&test2); - BspLocking> test_3; - run_test_local_inc_edges_memory(&test_3); + BspLocking> test3; + RunTestLocalIncEdgesMemory(&test3); - BspLocking> test_4; - run_test_local_inc_edges_2_memory(&test_4); + BspLocking> test4; + RunTestLocalIncEdges2Memory(&test4); } -BOOST_AUTO_TEST_CASE(variance_local_test) { - VarianceFillup> - test; - run_test_local_memory(&test); +BOOST_AUTO_TEST_CASE(VarianceLocalTest) { + VarianceFillup> test; + RunTestLocalMemory(&test); } // BOOST_AUTO_TEST_CASE(kl_local_test) { -// VarianceFillup> +// VarianceFillup> // test; -// kl_total_comm> kl; +// kl_total_comm> kl; -// ComboScheduler combo_test(test, kl); +// ComboScheduler combo_test(test, kl); // run_test_local_memory(&combo_test); // }; -BOOST_AUTO_TEST_CASE(GreedyBspScheduler_persistent_transient_test) { - GreedyBspScheduler> +BOOST_AUTO_TEST_CASE(GreedyBspSchedulerPersistentTransientTest) { + GreedyBspScheduler> test; - run_test_persistent_transient_memory(&test); + RunTestPersistentTransientMemory(&test); } -BOOST_AUTO_TEST_CASE(EtfScheduler_persistent_transient_test) { - EtfScheduler> - test; - run_test_persistent_transient_memory(&test); +BOOST_AUTO_TEST_CASE(EtfSchedulerPersistentTransientTest) { + EtfScheduler> test; + RunTestPersistentTransientMemory(&test); } -BOOST_AUTO_TEST_CASE(VariancePartitioner_test) { - VariancePartitioner> - test_linear; - run_test_local_memory(&test_linear); - - VariancePartitioner> - test_flat; - run_test_local_memory(&test_flat); - - VariancePartitioner> - test_superstep; - run_test_local_memory(&test_superstep); - - VariancePartitioner> - test_global; - run_test_local_memory(&test_global); - - VariancePartitioner> - test_linear_tp; - run_test_persistent_transient_memory(&test_linear_tp); - - VariancePartitioner> - test_flat_tp; - run_test_persistent_transient_memory(&test_flat_tp); - - VariancePartitioner> - test_superstep_tp; - run_test_persistent_transient_memory(&test_superstep_tp); - - VariancePartitioner> - test_global_tp; - run_test_persistent_transient_memory(&test_global_tp); +BOOST_AUTO_TEST_CASE(VariancePartitionerTest) { + VariancePartitioner> + testLinear; + RunTestLocalMemory(&testLinear); + + VariancePartitioner> + testFlat; + RunTestLocalMemory(&testFlat); + + VariancePartitioner> + testSuperstep; + RunTestLocalMemory(&testSuperstep); + + VariancePartitioner> + testGlobal; + RunTestLocalMemory(&testGlobal); + + VariancePartitioner> + testLinearTp; + RunTestPersistentTransientMemory(&testLinearTp); + + VariancePartitioner> + testFlatTp; + RunTestPersistentTransientMemory(&testFlatTp); + + VariancePartitioner> + testSuperstepTp; + RunTestPersistentTransientMemory(&testSuperstepTp); + + VariancePartitioner> + testGlobalTp; + RunTestPersistentTransientMemory(&testGlobalTp); } -BOOST_AUTO_TEST_CASE(LightEdgeVariancePartitioner_test) { - LightEdgeVariancePartitioner> - test_linear; - run_test_local_memory(&test_linear); - - LightEdgeVariancePartitioner> - test_flat; - run_test_local_memory(&test_flat); - - LightEdgeVariancePartitioner> - test_superstep; - run_test_local_memory(&test_superstep); - - LightEdgeVariancePartitioner> - test_global; - run_test_local_memory(&test_global); - - LightEdgeVariancePartitioner> - test_linear_tp; - run_test_persistent_transient_memory(&test_linear_tp); - - LightEdgeVariancePartitioner> - test_flat_tp; - run_test_persistent_transient_memory(&test_flat_tp); - - LightEdgeVariancePartitioner> - test_superstep_tp; - run_test_persistent_transient_memory(&test_superstep_tp); - - LightEdgeVariancePartitioner> - test_global_tp; - run_test_persistent_transient_memory(&test_global_tp); +BOOST_AUTO_TEST_CASE(LightEdgeVariancePartitionerTest) { + LightEdgeVariancePartitioner> + testLinear; + RunTestLocalMemory(&testLinear); + + LightEdgeVariancePartitioner> + testFlat; + RunTestLocalMemory(&testFlat); + + LightEdgeVariancePartitioner> + testSuperstep; + RunTestLocalMemory(&testSuperstep); + + LightEdgeVariancePartitioner> + testGlobal; + RunTestLocalMemory(&testGlobal); + + LightEdgeVariancePartitioner> + testLinearTp; + RunTestPersistentTransientMemory(&testLinearTp); + + LightEdgeVariancePartitioner> + testFlatTp; + RunTestPersistentTransientMemory(&testFlatTp); + + LightEdgeVariancePartitioner> + testSuperstepTp; + RunTestPersistentTransientMemory(&testSuperstepTp); + + LightEdgeVariancePartitioner> + testGlobalTp; + RunTestPersistentTransientMemory(&testGlobalTp); } diff --git a/tests/coarser.cpp b/tests/coarser.cpp index d0882512..8bcf61f8 100644 --- a/tests/coarser.cpp +++ b/tests/coarser.cpp @@ -44,9 +44,9 @@ limitations under the License. using namespace osp; -using VertexType = vertex_idx_t; +using VertexType = VertexIdxT; -bool check_vertex_map(std::vector> &map, std::size_t size) { +bool CheckVertexMap(std::vector> &map, std::size_t size) { std::unordered_set vertices; for (auto &v : map) { @@ -62,47 +62,47 @@ bool check_vertex_map(std::vector> &map, std::size_t siz } template -bool check_vertex_map_constraints(std::vector> &map, - ComputationalDag &dag, - v_type_t size_threshold, - v_memw_t memory_threshold, - v_workw_t work_threshold, - v_commw_t communication_threshold) { +bool CheckVertexMapConstraints(std::vector> &map, + ComputationalDag &dag, + VTypeT sizeThreshold, + VMemwT memoryThreshold, + VWorkwT workThreshold, + VCommwT communicationThreshold) { std::unordered_set vertices; - for (auto &super_node : map) { - v_memw_t memory = 0; - v_workw_t work = 0; - v_commw_t communication = 0; + for (auto &superNode : map) { + VMemwT memory = 0; + VWorkwT work = 0; + VCommwT communication = 0; - if (super_node.size() > size_threshold) { + if (superNode.size() > sizeThreshold) { return false; } - if (super_node.size() == 0) { + if (superNode.size() == 0) { return false; } - for (auto &v : super_node) { - memory += dag.vertex_mem_weight(v); - work += dag.vertex_work_weight(v); - communication += dag.vertex_comm_weight(v); + for (auto &v : superNode) { + memory += dag.VertexMemWeight(v); + work += dag.VertexWorkWeight(v); + communication += dag.VertexCommWeight(v); - if (dag.vertex_type(v) != dag.vertex_type(super_node[0])) { + if (dag.VertexType(v) != dag.VertexType(superNode[0])) { return false; } } - if (memory > memory_threshold || work > work_threshold || communication > communication_threshold) { + if (memory > memoryThreshold || work > workThreshold || communication > communicationThreshold) { return false; } } return true; } -BOOST_AUTO_TEST_CASE(coarser_hdagg_test) { +BOOST_AUTO_TEST_CASE(CoarserHdaggTest) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenamesGraph = TinySpaaGraphs(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -112,66 +112,66 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + for (auto &filenameGraph : filenamesGraph) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << nameGraph << std::endl; - using graph_t = computational_dag_edge_idx_vector_impl_def_t; + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspInstance coarse_instance; - coarse_instance.getArchitecture() = instance.getArchitecture(); - std::vector> vertex_map; - std::vector reverse_vertex_map; + BspInstance coarseInstance; + coarseInstance.GetArchitecture() = instance.GetArchitecture(); + std::vector> vertexMap; + std::vector reverseVertexMap; - hdagg_coarser coarser; + HdaggCoarser coarser; - BOOST_CHECK_EQUAL(coarser.getCoarserName(), "hdagg_coarser"); + BOOST_CHECK_EQUAL(coarser.GetCoarserName(), "hdagg_coarser"); - coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); + coarser.CoarsenDag(instance.GetComputationalDag(), coarseInstance.GetComputationalDag(), reverseVertexMap); - vertex_map = coarser_util::invert_vertex_contraction_map(reverse_vertex_map); + vertexMap = coarser_util::InvertVertexContractionMap(reverseVertexMap); - BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices())); + BOOST_CHECK(CheckVertexMap(vertexMap, instance.GetComputationalDag().NumVertices())); - GreedyBspScheduler scheduler; - BspSchedule schedule(coarse_instance); + GreedyBspScheduler scheduler; + BspSchedule schedule(coarseInstance); - const auto status_sched = scheduler.computeSchedule(schedule); + const auto statusSched = scheduler.ComputeSchedule(schedule); - BOOST_CHECK(status_sched == RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK(statusSched == ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BspSchedule schedule_out(instance); + BspSchedule scheduleOut(instance); - BOOST_CHECK_EQUAL(coarser_util::pull_back_schedule(schedule, vertex_map, schedule_out), true); - BOOST_CHECK(schedule_out.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(coarser_util::PullBackSchedule(schedule, vertexMap, scheduleOut), true); + BOOST_CHECK(scheduleOut.SatisfiesPrecedenceConstraints()); - CoarseAndSchedule coarse_and_schedule(coarser, scheduler); - BspSchedule schedule2(instance); + CoarseAndSchedule coarseAndSchedule(coarser, scheduler); + BspSchedule schedule2(instance); - const auto status = coarse_and_schedule.computeSchedule(schedule2); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints()); + const auto status = coarseAndSchedule.ComputeSchedule(schedule2); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule2.SatisfiesPrecedenceConstraints()); } } -BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) { +BOOST_AUTO_TEST_CASE(CoarserHdaggTestDiffGraphImpl) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenamesGraph = TinySpaaGraphs(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -181,66 +181,66 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + for (auto &filenameGraph : filenamesGraph) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << nameGraph << std::endl; - using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; - using graph_t2 = computational_dag_vector_impl_def_t; + using GraphT1 = ComputationalDagEdgeIdxVectorImplDefT; + using GraphT2 = ComputationalDagVectorImplDefUnsignedT; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspInstance coarse_instance; - BspArchitecture architecture_t2(instance.getArchitecture()); - coarse_instance.getArchitecture() = architecture_t2; - std::vector> vertex_map; - std::vector reverse_vertex_map; + BspInstance coarseInstance; + BspArchitecture architectureT2(instance.GetArchitecture()); + coarseInstance.GetArchitecture() = architectureT2; + std::vector> vertexMap; + std::vector reverseVertexMap; - hdagg_coarser coarser; + HdaggCoarser coarser; - coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); + coarser.CoarsenDag(instance.GetComputationalDag(), coarseInstance.GetComputationalDag(), reverseVertexMap); - vertex_map = coarser_util::invert_vertex_contraction_map(reverse_vertex_map); + vertexMap = coarser_util::InvertVertexContractionMap(reverseVertexMap); - BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices())); + BOOST_CHECK(CheckVertexMap(vertexMap, instance.GetComputationalDag().NumVertices())); - GreedyBspScheduler scheduler; - BspSchedule schedule(coarse_instance); + GreedyBspScheduler scheduler; + BspSchedule schedule(coarseInstance); - auto status_sched = scheduler.computeSchedule(schedule); + auto statusSched = scheduler.ComputeSchedule(schedule); - BOOST_CHECK(status_sched == RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK(statusSched == ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BspSchedule schedule_out(instance); + BspSchedule scheduleOut(instance); - BOOST_CHECK_EQUAL(coarser_util::pull_back_schedule(schedule, vertex_map, schedule_out), true); - BOOST_CHECK(schedule_out.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(coarser_util::PullBackSchedule(schedule, vertexMap, scheduleOut), true); + BOOST_CHECK(scheduleOut.SatisfiesPrecedenceConstraints()); - CoarseAndSchedule coarse_and_schedule(coarser, scheduler); - BspSchedule schedule2(instance); + CoarseAndSchedule coarseAndSchedule(coarser, scheduler); + BspSchedule schedule2(instance); - auto status = coarse_and_schedule.computeSchedule(schedule2); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints()); + auto status = coarseAndSchedule.ComputeSchedule(schedule2); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule2.SatisfiesPrecedenceConstraints()); } } -BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) { +BOOST_AUTO_TEST_CASE(CoarserBspscheduleTest) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenamesGraph = TinySpaaGraphs(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -250,73 +250,73 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + for (auto &filenameGraph : filenamesGraph) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << nameGraph << std::endl; - using graph_t = computational_dag_edge_idx_vector_impl_def_t; + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspInstance coarse_instance; - coarse_instance.getArchitecture() = instance.getArchitecture(); - std::vector> vertex_map; - std::vector reverse_vertex_map; + BspInstance coarseInstance; + coarseInstance.GetArchitecture() = instance.GetArchitecture(); + std::vector> vertexMap; + std::vector reverseVertexMap; - GreedyBspScheduler scheduler; - BspSchedule schedule_orig(instance); + GreedyBspScheduler scheduler; + BspSchedule scheduleOrig(instance); - const auto status_sched_orig = scheduler.computeSchedule(schedule_orig); + const auto statusSchedOrig = scheduler.ComputeSchedule(scheduleOrig); - BOOST_CHECK(status_sched_orig == RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule_orig.satisfiesPrecedenceConstraints()); + BOOST_CHECK(statusSchedOrig == ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(scheduleOrig.SatisfiesPrecedenceConstraints()); - BspScheduleCoarser coarser(schedule_orig); + BspScheduleCoarser coarser(scheduleOrig); - coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); + coarser.CoarsenDag(instance.GetComputationalDag(), coarseInstance.GetComputationalDag(), reverseVertexMap); - vertex_map = coarser_util::invert_vertex_contraction_map(reverse_vertex_map); + vertexMap = coarser_util::InvertVertexContractionMap(reverseVertexMap); - BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices())); + BOOST_CHECK(CheckVertexMap(vertexMap, instance.GetComputationalDag().NumVertices())); - BspSchedule schedule(coarse_instance); + BspSchedule schedule(coarseInstance); - const auto status_sched = scheduler.computeSchedule(schedule); + const auto statusSched = scheduler.ComputeSchedule(schedule); - BOOST_CHECK(status_sched == RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK(statusSched == ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BspSchedule schedule_out(instance); + BspSchedule scheduleOut(instance); - BOOST_CHECK_EQUAL(coarser_util::pull_back_schedule(schedule, vertex_map, schedule_out), true); - BOOST_CHECK(schedule_out.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(coarser_util::PullBackSchedule(schedule, vertexMap, scheduleOut), true); + BOOST_CHECK(scheduleOut.SatisfiesPrecedenceConstraints()); - CoarseAndSchedule coarse_and_schedule(coarser, scheduler); - BspSchedule schedule2(instance); + CoarseAndSchedule coarseAndSchedule(coarser, scheduler); + BspSchedule schedule2(instance); - const auto status = coarse_and_schedule.computeSchedule(schedule2); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints()); + const auto status = coarseAndSchedule.ComputeSchedule(schedule2); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule2.SatisfiesPrecedenceConstraints()); } } -template -void test_coarser_same_graph(Coarser &coarser) { +template +void TestCoarserSameGraph(Coarser &coarser) { // BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenamesGraph = TinySpaaGraphs(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -326,125 +326,125 @@ void test_coarser_same_graph(Coarser &coarser) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + for (auto &filenameGraph : filenamesGraph) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << nameGraph << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspInstance coarse_instance; - coarse_instance.getArchitecture() = instance.getArchitecture(); - std::vector> vertex_map; - std::vector reverse_vertex_map; + BspInstance coarseInstance; + coarseInstance.GetArchitecture() = instance.GetArchitecture(); + std::vector> vertexMap; + std::vector reverseVertexMap; - GreedyBspScheduler scheduler; + GreedyBspScheduler scheduler; - bool coarse_success - = coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); - BOOST_CHECK(coarse_success); + bool coarseSuccess + = coarser.CoarsenDag(instance.GetComputationalDag(), coarseInstance.GetComputationalDag(), reverseVertexMap); + BOOST_CHECK(coarseSuccess); - vertex_map = coarser_util::invert_vertex_contraction_map(reverse_vertex_map); + vertexMap = coarser_util::InvertVertexContractionMap(reverseVertexMap); - BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices())); + BOOST_CHECK(CheckVertexMap(vertexMap, instance.GetComputationalDag().NumVertices())); - BspSchedule schedule(coarse_instance); + BspSchedule schedule(coarseInstance); - const auto status_sched = scheduler.computeSchedule(schedule); + const auto statusSched = scheduler.ComputeSchedule(schedule); - BOOST_CHECK(status_sched == RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK(statusSched == ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BspSchedule schedule_out(instance); + BspSchedule scheduleOut(instance); - BOOST_CHECK_EQUAL(coarser_util::pull_back_schedule(schedule, vertex_map, schedule_out), true); - BOOST_CHECK(schedule_out.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(coarser_util::PullBackSchedule(schedule, vertexMap, scheduleOut), true); + BOOST_CHECK(scheduleOut.SatisfiesPrecedenceConstraints()); - CoarseAndSchedule coarse_and_schedule(coarser, scheduler); - BspSchedule schedule2(instance); + CoarseAndSchedule coarseAndSchedule(coarser, scheduler); + BspSchedule schedule2(instance); - const auto status = coarse_and_schedule.computeSchedule(schedule2); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints()); + const auto status = coarseAndSchedule.ComputeSchedule(schedule2); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule2.SatisfiesPrecedenceConstraints()); } } -BOOST_AUTO_TEST_CASE(coarser_funndel_bfs_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; - FunnelBfs coarser; +BOOST_AUTO_TEST_CASE(CoarserFunndelBfsTest) { + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; + FunnelBfs coarser; - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); - FunnelBfs::FunnelBfs_parameters params{std::numeric_limits>::max(), - std::numeric_limits>::max(), - std::numeric_limits::max(), - false, - true}; + FunnelBfs::FunnelBfsParameters params{std::numeric_limits>::max(), + std::numeric_limits>::max(), + std::numeric_limits::max(), + false, + true}; - FunnelBfs coarser_params(params); + FunnelBfs coarserParams(params); - test_coarser_same_graph(coarser_params); + TestCoarserSameGraph(coarserParams); - params.max_depth = 2; - FunnelBfs coarser_params_2(params); + params.maxDepth_ = 2; + FunnelBfs coarserParams2(params); - test_coarser_same_graph(coarser_params_2); + TestCoarserSameGraph(coarserParams2); } -BOOST_AUTO_TEST_CASE(coarser_top_sort_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; - top_order_coarser coarser; +BOOST_AUTO_TEST_CASE(CoarserTopSortTest) { + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; + TopOrderCoarser coarser; - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); - top_order_coarser coarser_2; + TopOrderCoarser coarser2; - test_coarser_same_graph(coarser_2); + TestCoarserSameGraph(coarser2); - top_order_coarser coarser_3; + TopOrderCoarser coarser3; - test_coarser_same_graph(coarser_3); + TestCoarserSameGraph(coarser3); } -BOOST_AUTO_TEST_CASE(squashA_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; - // using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(SquashATest) { + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; + // using graph_t = ComputationalDagVectorImplDefUnsignedT; - SquashAParams::Parameters params; - params.mode = SquashAParams::Mode::EDGE_WEIGHT; - params.use_structured_poset = false; + squash_a_params::Parameters params; + params.mode_ = squash_a_params::Mode::EDGE_WEIGHT; + params.useStructuredPoset_ = false; - SquashA coarser(params); + SquashA coarser(params); - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); - params.mode = SquashAParams::Mode::TRIANGLES; - params.use_structured_poset = true; - params.use_top_poset = true; - coarser.setParams(params); + params.mode_ = squash_a_params::Mode::TRIANGLES; + params.useStructuredPoset_ = true; + params.useTopPoset_ = true; + coarser.SetParams(params); - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); - params.use_top_poset = false; - coarser.setParams(params); + params.useTopPoset_ = false; + coarser.SetParams(params); - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); } -BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) { +BOOST_AUTO_TEST_CASE(CoarserSquashATestDiffGraphImplCsg) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenamesGraph = TinySpaaGraphs(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -454,70 +454,70 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + for (auto &filenameGraph : filenamesGraph) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << nameGraph << std::endl; - using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; - using graph_t2 = CSG; + using GraphT1 = ComputationalDagEdgeIdxVectorImplDefT; + using GraphT2 = CSG; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspInstance coarse_instance; - BspArchitecture architecture_t2(instance.getArchitecture()); - coarse_instance.getArchitecture() = architecture_t2; - std::vector> vertex_map; - std::vector reverse_vertex_map; + BspInstance coarseInstance; + BspArchitecture architectureT2(instance.GetArchitecture()); + coarseInstance.GetArchitecture() = architectureT2; + std::vector> vertexMap; + std::vector reverseVertexMap; - SquashAParams::Parameters params; - params.mode = SquashAParams::Mode::EDGE_WEIGHT; - params.use_structured_poset = false; + squash_a_params::Parameters params; + params.mode_ = squash_a_params::Mode::EDGE_WEIGHT; + params.useStructuredPoset_ = false; - SquashA coarser(params); + SquashA coarser(params); - coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); + coarser.CoarsenDag(instance.GetComputationalDag(), coarseInstance.GetComputationalDag(), reverseVertexMap); - vertex_map = coarser_util::invert_vertex_contraction_map(reverse_vertex_map); + vertexMap = coarser_util::InvertVertexContractionMap(reverseVertexMap); - BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices())); + BOOST_CHECK(CheckVertexMap(vertexMap, instance.GetComputationalDag().NumVertices())); - GreedyBspScheduler scheduler; - BspSchedule schedule(coarse_instance); + GreedyBspScheduler scheduler; + BspSchedule schedule(coarseInstance); - auto status_sched = scheduler.computeSchedule(schedule); + auto statusSched = scheduler.ComputeSchedule(schedule); - BOOST_CHECK(status_sched == RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK(statusSched == ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BspSchedule schedule_out(instance); + BspSchedule scheduleOut(instance); - BOOST_CHECK_EQUAL(coarser_util::pull_back_schedule(schedule, vertex_map, schedule_out), true); - BOOST_CHECK(schedule_out.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(coarser_util::PullBackSchedule(schedule, vertexMap, scheduleOut), true); + BOOST_CHECK(scheduleOut.SatisfiesPrecedenceConstraints()); - CoarseAndSchedule coarse_and_schedule(coarser, scheduler); - BspSchedule schedule2(instance); + CoarseAndSchedule coarseAndSchedule(coarser, scheduler); + BspSchedule schedule2(instance); - auto status = coarse_and_schedule.computeSchedule(schedule2); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints()); + auto status = coarseAndSchedule.ComputeSchedule(schedule2); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule2.SatisfiesPrecedenceConstraints()); } } -BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) { +BOOST_AUTO_TEST_CASE(CoarserSquashATestDiffGraphImplCsge) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); + std::vector filenamesGraph = TinySpaaGraphs(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -527,153 +527,153 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); + for (auto &filenameGraph : filenamesGraph) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << nameGraph << std::endl; - using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; - using graph_t2 = CSGE; + using GraphT1 = ComputationalDagEdgeIdxVectorImplDefT; + using GraphT2 = CSGE; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspInstance coarse_instance; - BspArchitecture architecture_t2(instance.getArchitecture()); - coarse_instance.getArchitecture() = architecture_t2; - std::vector> vertex_map; - std::vector reverse_vertex_map; + BspInstance coarseInstance; + BspArchitecture architectureT2(instance.GetArchitecture()); + coarseInstance.GetArchitecture() = architectureT2; + std::vector> vertexMap; + std::vector reverseVertexMap; - SquashAParams::Parameters params; - params.mode = SquashAParams::Mode::EDGE_WEIGHT; - params.use_structured_poset = false; + squash_a_params::Parameters params; + params.mode_ = squash_a_params::Mode::EDGE_WEIGHT; + params.useStructuredPoset_ = false; - SquashA coarser(params); + SquashA coarser(params); - coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); + coarser.CoarsenDag(instance.GetComputationalDag(), coarseInstance.GetComputationalDag(), reverseVertexMap); - vertex_map = coarser_util::invert_vertex_contraction_map(reverse_vertex_map); + vertexMap = coarser_util::InvertVertexContractionMap(reverseVertexMap); - BOOST_CHECK(check_vertex_map(vertex_map, instance.getComputationalDag().num_vertices())); + BOOST_CHECK(CheckVertexMap(vertexMap, instance.GetComputationalDag().NumVertices())); - GreedyBspScheduler scheduler; - BspSchedule schedule(coarse_instance); + GreedyBspScheduler scheduler; + BspSchedule schedule(coarseInstance); - auto status_sched = scheduler.computeSchedule(schedule); + auto statusSched = scheduler.ComputeSchedule(schedule); - BOOST_CHECK(status_sched == RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK(statusSched == ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BspSchedule schedule_out(instance); + BspSchedule scheduleOut(instance); - BOOST_CHECK_EQUAL(coarser_util::pull_back_schedule(schedule, vertex_map, schedule_out), true); - BOOST_CHECK(schedule_out.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(coarser_util::PullBackSchedule(schedule, vertexMap, scheduleOut), true); + BOOST_CHECK(scheduleOut.SatisfiesPrecedenceConstraints()); - CoarseAndSchedule coarse_and_schedule(coarser, scheduler); - BspSchedule schedule2(instance); + CoarseAndSchedule coarseAndSchedule(coarser, scheduler); + BspSchedule schedule2(instance); - auto status = coarse_and_schedule.computeSchedule(schedule2); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints()); + auto status = coarseAndSchedule.ComputeSchedule(schedule2); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule2.SatisfiesPrecedenceConstraints()); } } -BOOST_AUTO_TEST_CASE(Sarkar_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; - // using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(SarkarTest) { + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; + // using graph_t = ComputationalDagVectorImplDefUnsignedT; - SarkarParams::Parameters> params; - params.mode = SarkarParams::Mode::LINES; - params.commCost = 100; - params.useTopPoset = true; + sarkar_params::Parameters> params; + params.mode_ = sarkar_params::Mode::LINES; + params.commCost_ = 100; + params.useTopPoset_ = true; - Sarkar coarser(params); + Sarkar coarser(params); - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); - params.useTopPoset = false; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.useTopPoset_ = false; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::FAN_IN_FULL; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::FAN_IN_FULL; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::FAN_IN_PARTIAL; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::FAN_IN_PARTIAL; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::FAN_OUT_FULL; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::FAN_OUT_FULL; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::FAN_OUT_PARTIAL; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::LEVEL_EVEN; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::LEVEL_EVEN; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::LEVEL_ODD; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::LEVEL_ODD; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::FAN_IN_BUFFER; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::FAN_IN_BUFFER; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::FAN_OUT_BUFFER; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::FAN_OUT_BUFFER; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); - params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER; - coarser.setParameters(params); - test_coarser_same_graph(coarser); + params.mode_ = sarkar_params::Mode::HOMOGENEOUS_BUFFER; + coarser.SetParameters(params); + TestCoarserSameGraph(coarser); } -BOOST_AUTO_TEST_CASE(SarkarML_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; - // using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(SarkarMlTest) { + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; + // using graph_t = ComputationalDagVectorImplDefUnsignedT; - SarkarParams::MulParameters> params; - params.commCostVec = {100}; + sarkar_params::MulParameters> params; + params.commCostVec_ = {100}; - SarkarMul coarser; - coarser.setParameters(params); + SarkarMul coarser; + coarser.SetParameters(params); - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); } -BOOST_AUTO_TEST_CASE(SarkarMLBufferMerge_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; - // using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(SarkarMlBufferMergeTest) { + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; + // using graph_t = ComputationalDagVectorImplDefUnsignedT; - SarkarParams::MulParameters> params; - params.commCostVec = {1, 2, 10, 50, 100}; - params.buffer_merge_mode = SarkarParams::BufferMergeMode::FULL; + sarkar_params::MulParameters> params; + params.commCostVec_ = {1, 2, 10, 50, 100}; + params.bufferMergeMode_ = sarkar_params::BufferMergeMode::FULL; - SarkarMul coarser; - coarser.setParameters(params); + SarkarMul coarser; + coarser.SetParameters(params); - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); } -BOOST_AUTO_TEST_CASE(SquashAML_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; - // using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(SquashAmlTest) { + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; + // using graph_t = ComputationalDagVectorImplDefUnsignedT; - SquashAMul coarser; + SquashAMul coarser; - test_coarser_same_graph(coarser); + TestCoarserSameGraph(coarser); } diff --git a/tests/coarser_util.cpp b/tests/coarser_util.cpp index 61b418d7..aa7657c6 100644 --- a/tests/coarser_util.cpp +++ b/tests/coarser_util.cpp @@ -27,50 +27,50 @@ limitations under the License. using namespace osp; using namespace osp::coarser_util; -using GraphType = Compact_Sparse_Graph; +using GraphType = CompactSparseGraph; BOOST_AUTO_TEST_CASE(ContractionMapValidity) { - const std::vector> contractionmap1 = {0, 1, 2, 3}; - BOOST_CHECK(check_valid_contraction_map(contractionmap1)); + const std::vector> contractionmap1 = {0, 1, 2, 3}; + BOOST_CHECK(CheckValidContractionMap(contractionmap1)); - const std::vector> contractionmap2 = {1, 2, 3}; - BOOST_CHECK(not check_valid_contraction_map(contractionmap2)); + const std::vector> contractionmap2 = {1, 2, 3}; + BOOST_CHECK(not CheckValidContractionMap(contractionmap2)); - const std::vector> contractionmap3 = {0, 1, 3, 4}; - BOOST_CHECK(not check_valid_contraction_map(contractionmap3)); + const std::vector> contractionmap3 = {0, 1, 3, 4}; + BOOST_CHECK(not CheckValidContractionMap(contractionmap3)); - const std::vector> contractionmap4 = {0, 1, 0, 1}; - BOOST_CHECK(check_valid_contraction_map(contractionmap4)); + const std::vector> contractionmap4 = {0, 1, 0, 1}; + BOOST_CHECK(CheckValidContractionMap(contractionmap4)); - const std::vector> contractionmap5 = {2, 1, 2, 0, 1, 1}; - BOOST_CHECK(check_valid_contraction_map(contractionmap5)); + const std::vector> contractionmap5 = {2, 1, 2, 0, 1, 1}; + BOOST_CHECK(CheckValidContractionMap(contractionmap5)); } BOOST_AUTO_TEST_CASE(ExpansionMapValidity) { - const std::vector>> expansionmap1 = {{0}, {1}, {2}, {3}}; - BOOST_CHECK(check_valid_expansion_map(expansionmap1)); + const std::vector>> expansionmap1 = {{0}, {1}, {2}, {3}}; + BOOST_CHECK(CheckValidExpansionMap(expansionmap1)); - const std::vector>> expansionmap2 = {{0}, {2}, {3}}; - BOOST_CHECK(not check_valid_expansion_map(expansionmap2)); + const std::vector>> expansionmap2 = {{0}, {2}, {3}}; + BOOST_CHECK(not CheckValidExpansionMap(expansionmap2)); - const std::vector>> expansionmap3 = { + const std::vector>> expansionmap3 = { {0, 3} }; - BOOST_CHECK(not check_valid_expansion_map(expansionmap3)); + BOOST_CHECK(not CheckValidExpansionMap(expansionmap3)); - const std::vector>> expansionmap4 = { + const std::vector>> expansionmap4 = { {0, 3}, {2, 1, 4}, {5} }; - BOOST_CHECK(check_valid_expansion_map(expansionmap4)); + BOOST_CHECK(CheckValidExpansionMap(expansionmap4)); - const std::vector>> expansionmap5 = {{0}, {}, {2}, {3}, {1}}; - BOOST_CHECK(not check_valid_expansion_map(expansionmap5)); + const std::vector>> expansionmap5 = {{0}, {}, {2}, {3}, {1}}; + BOOST_CHECK(not CheckValidExpansionMap(expansionmap5)); } BOOST_AUTO_TEST_CASE(ContractionMapCoarsening) { - std::set, vertex_idx_t>> edges({ + std::set, VertexIdxT>> edges({ {0, 1}, {1, 2} }); @@ -78,26 +78,26 @@ BOOST_AUTO_TEST_CASE(ContractionMapCoarsening) { GraphType coarseGraph1; - std::vector> contractionMap({0, 0, 1, 1, 2, 3}); - BOOST_CHECK(construct_coarse_dag(graph, coarseGraph1, contractionMap)); - BOOST_CHECK(contractionMap == std::vector>({0, 0, 1, 1, 2, 3})); + std::vector> contractionMap({0, 0, 1, 1, 2, 3}); + BOOST_CHECK(ConstructCoarseDag(graph, coarseGraph1, contractionMap)); + BOOST_CHECK(contractionMap == std::vector>({0, 0, 1, 1, 2, 3})); - BOOST_CHECK_EQUAL(coarseGraph1.num_vertices(), 4); - BOOST_CHECK_EQUAL(coarseGraph1.num_edges(), 1); + BOOST_CHECK_EQUAL(coarseGraph1.NumVertices(), 4); + BOOST_CHECK_EQUAL(coarseGraph1.NumEdges(), 1); - BOOST_CHECK_EQUAL(coarseGraph1.out_degree(0), 1); - BOOST_CHECK_EQUAL(coarseGraph1.out_degree(1), 0); - BOOST_CHECK_EQUAL(coarseGraph1.out_degree(2), 0); + BOOST_CHECK_EQUAL(coarseGraph1.OutDegree(0), 1); + BOOST_CHECK_EQUAL(coarseGraph1.OutDegree(1), 0); + BOOST_CHECK_EQUAL(coarseGraph1.OutDegree(2), 0); - BOOST_CHECK_EQUAL(coarseGraph1.in_degree(0), 0); - BOOST_CHECK_EQUAL(coarseGraph1.in_degree(1), 1); - BOOST_CHECK_EQUAL(coarseGraph1.in_degree(2), 0); + BOOST_CHECK_EQUAL(coarseGraph1.InDegree(0), 0); + BOOST_CHECK_EQUAL(coarseGraph1.InDegree(1), 1); + BOOST_CHECK_EQUAL(coarseGraph1.InDegree(2), 0); - for (const auto &vert : coarseGraph1.children(0)) { + for (const auto &vert : coarseGraph1.Children(0)) { BOOST_CHECK_EQUAL(vert, 1); } - for (const auto &vert : coarseGraph1.parents(1)) { + for (const auto &vert : coarseGraph1.Parents(1)) { BOOST_CHECK_EQUAL(vert, 0); } } diff --git a/tests/compact_sparse_graph.cpp b/tests/compact_sparse_graph.cpp index ba191d70..9374b16f 100644 --- a/tests/compact_sparse_graph.cpp +++ b/tests/compact_sparse_graph.cpp @@ -25,44 +25,44 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(Empty_Graph_keep_order) { - Compact_Sparse_Graph graph; +BOOST_AUTO_TEST_CASE(EmptyGraphKeepOrder) { + CompactSparseGraph graph; - BOOST_CHECK_EQUAL(graph.num_vertices(), 0); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 0); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); } -BOOST_AUTO_TEST_CASE(Empty_Graph_reorder) { - Compact_Sparse_Graph graph; +BOOST_AUTO_TEST_CASE(EmptyGraphReorder) { + CompactSparseGraph graph; - BOOST_CHECK_EQUAL(graph.num_vertices(), 0); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 0); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); } -BOOST_AUTO_TEST_CASE(No_Edges_Graph_keep_order) { +BOOST_AUTO_TEST_CASE(NoEdgesGraphKeepOrder) { const std::vector> edges({}); - Compact_Sparse_Graph graph(10, edges); + CompactSparseGraph graph(10, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 10); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 10); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); } -BOOST_AUTO_TEST_CASE(No_Edges_Graph_reorder) { +BOOST_AUTO_TEST_CASE(NoEdgesGraphReorder) { const std::vector> edges({}); - Compact_Sparse_Graph graph(10, edges); + CompactSparseGraph graph(10, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 10); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 10); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); std::vector perm(10, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); } -BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { +BOOST_AUTO_TEST_CASE(LineGraphKeepOrder) { const std::set> edges({ {0, 1}, {1, 2}, @@ -73,63 +73,63 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { {6, 7} }); - Compact_Sparse_Graph graph(8, edges); + CompactSparseGraph graph(8, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(graph.num_edges(), 7); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 7); std::size_t cntr = 0; - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { BOOST_CHECK_EQUAL(vert, cntr); ++cntr; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr); - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 7) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), 1); - for (const std::size_t &chld : graph.children(vert)) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), 1); + for (const std::size_t &chld : graph.Children(vert)) { BOOST_CHECK_EQUAL(chld, vert + 1); } - auto chldren = graph.children(vert); - BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.out_degree(vert)); + auto chldren = graph.Children(vert); + BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.OutDegree(vert)); for (auto it = chldren.crbegin(); it != chldren.crend(); ++it) { BOOST_CHECK_EQUAL(*it, vert + 1); } } else { - BOOST_CHECK_EQUAL(graph.out_degree(vert), 0); - for (const std::size_t &chld : graph.children(vert)) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), 0); + for (const std::size_t &chld : graph.Children(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(chld, 100); } - auto chldren = graph.children(vert); - BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.out_degree(vert)); + auto chldren = graph.Children(vert); + BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.OutDegree(vert)); for (auto it = chldren.crbegin(); it != chldren.crend(); ++it) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(*it, 100); } } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 0) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), 1); - for (const std::size_t &par : graph.parents(vert)) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), 1); + for (const std::size_t &par : graph.Parents(vert)) { BOOST_CHECK_EQUAL(par, vert - 1); } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { BOOST_CHECK_EQUAL(*it, vert - 1); } } else { - BOOST_CHECK_EQUAL(graph.in_degree(vert), 0); - for (const std::size_t &par : graph.parents(vert)) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), 0); + for (const std::size_t &par : graph.Parents(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(par, 100); } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(*it, 100); @@ -137,20 +137,20 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 0) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 2); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 2); } else { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1); } } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } } -BOOST_AUTO_TEST_CASE(LineGraph_reorder) { +BOOST_AUTO_TEST_CASE(LineGraphReorder) { const std::vector> edges({ {0, 1}, {1, 2}, @@ -161,62 +161,62 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) { {6, 7} }); - Compact_Sparse_Graph graph(8, edges); + CompactSparseGraph graph(8, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(graph.num_edges(), 7); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 7); std::size_t cntr = 0; - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { BOOST_CHECK_EQUAL(vert, cntr); ++cntr; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr); - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 7) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), 1); - for (const std::size_t &chld : graph.children(vert)) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), 1); + for (const std::size_t &chld : graph.Children(vert)) { BOOST_CHECK_EQUAL(chld, vert + 1); } - auto chldren = graph.children(vert); - BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.out_degree(vert)); + auto chldren = graph.Children(vert); + BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.OutDegree(vert)); for (auto it = chldren.crbegin(); it != chldren.crend(); ++it) { BOOST_CHECK_EQUAL(*it, vert + 1); } } else { - BOOST_CHECK_EQUAL(graph.out_degree(vert), 0); - for (const std::size_t &chld : graph.children(vert)) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), 0); + for (const std::size_t &chld : graph.Children(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(chld, 100); } - auto chldren = graph.children(vert); - BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.out_degree(vert)); + auto chldren = graph.Children(vert); + BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.OutDegree(vert)); for (auto it = chldren.crbegin(); it != chldren.crend(); ++it) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(*it, 100); } } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 0) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), 1); - for (const std::size_t &par : graph.parents(vert)) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), 1); + for (const std::size_t &par : graph.Parents(vert)) { BOOST_CHECK_EQUAL(par, vert - 1); } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { BOOST_CHECK_EQUAL(*it, vert - 1); } } else { - BOOST_CHECK_EQUAL(graph.in_degree(vert), 0); - for (const std::size_t &par : graph.parents(vert)) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), 0); + for (const std::size_t &par : graph.Parents(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(par, 100); } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(*it, 100); @@ -224,29 +224,29 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) { } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 0) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 2); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 2); } else { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1); } } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } std::vector perm(8, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(perm[vert], graph_perm[vert]); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(perm[vert], graphPerm[vert]); } } -BOOST_AUTO_TEST_CASE(Graph1_keep_order) { +BOOST_AUTO_TEST_CASE(Graph1KeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -261,19 +261,19 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { {1, 2} }); - Compact_Sparse_Graph graph(11, edges); + CompactSparseGraph graph(11, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); - std::size_t cntr_0 = 0; - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(vert, cntr_0); - ++cntr_0; + std::size_t cntr0 = 0; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(vert, cntr0); + ++cntr0; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr_0); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr0); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2}, {2, 6}, {3}, @@ -287,27 +287,27 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { {} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[vert].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), outEdges[vert].size()); std::size_t cntr = 0; - for (const auto &chld : graph.children(vert)) { - BOOST_CHECK_EQUAL(chld, out_edges[vert][cntr]); + for (const auto &chld : graph.Children(vert)) { + BOOST_CHECK_EQUAL(chld, outEdges[vert][cntr]); ++cntr; } - auto chldrn = graph.children(vert); - BOOST_CHECK_EQUAL(chldrn.crend() - chldrn.crbegin(), graph.out_degree(vert)); + auto chldrn = graph.Children(vert); + BOOST_CHECK_EQUAL(chldrn.crend() - chldrn.crbegin(), graph.OutDegree(vert)); for (auto it = chldrn.crbegin(); it != chldrn.crend(); ++it) { --cntr; - BOOST_CHECK_EQUAL(*it, out_edges[vert][cntr]); + BOOST_CHECK_EQUAL(*it, outEdges[vert][cntr]); } cntr = 0; - for (const auto &e : osp::out_edges(vert, graph)) { - BOOST_CHECK_EQUAL(target(e, graph), out_edges[vert][cntr++]); + for (const auto &e : osp::OutEdges(vert, graph)) { + BOOST_CHECK_EQUAL(Target(e, graph), outEdges[vert][cntr++]); } } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0, 1}, @@ -321,44 +321,44 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { {6} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[vert].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), inEdges[vert].size()); std::size_t cntr = 0; - for (const auto &par : graph.parents(vert)) { - BOOST_CHECK_EQUAL(par, in_edges[vert][cntr]); + for (const auto &par : graph.Parents(vert)) { + BOOST_CHECK_EQUAL(par, inEdges[vert][cntr]); ++cntr; } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { --cntr; - BOOST_CHECK_EQUAL(*it, in_edges[vert][cntr]); + BOOST_CHECK_EQUAL(*it, inEdges[vert][cntr]); } cntr = 0; - for (const auto &e : osp::in_edges(vert, graph)) { - BOOST_CHECK_EQUAL(source(e, graph), in_edges[vert][cntr++]); + for (const auto &e : osp::InEdges(vert, graph)) { + BOOST_CHECK_EQUAL(Source(e, graph), inEdges[vert][cntr++]); } } unsigned count = 0; - for (const auto &e : osp::edges(graph)) { - std::cout << e.source << " -> " << e.target << std::endl; + for (const auto &e : osp::Edges(graph)) { + std::cout << e.source_ << " -> " << e.target_ << std::endl; count++; } BOOST_CHECK_EQUAL(count, 11); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[vert].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1 + inEdges[vert].size()); } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } } -BOOST_AUTO_TEST_CASE(Graph1_reorder) { +BOOST_AUTO_TEST_CASE(Graph1Reorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -373,24 +373,24 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { {1, 2} }); - Compact_Sparse_Graph graph(11, edges); + CompactSparseGraph graph(11, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); - std::size_t cntr_0 = 0; - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(vert, cntr_0); - ++cntr_0; + std::size_t cntr0 = 0; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(vert, cntr0); + ++cntr0; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr_0); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr0); std::vector perm(11, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2}, {2, 6}, {3}, @@ -404,39 +404,39 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { {} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), outEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_chld = 0; + std::size_t previousChld = 0; std::size_t cntr = 0; - for (const auto &chld : graph.children(vert)) { + for (const auto &chld : graph.Children(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_chld, chld); + BOOST_CHECK_LE(previousChld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) - != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(outEdges[oriVert].cbegin(), outEdges[oriVert].cend(), graphPerm[chld]) + != outEdges[oriVert].cend()); - previous_chld = chld; + previousChld = chld; ++cntr; } - auto chldrn = graph.children(vert); - BOOST_CHECK_EQUAL(chldrn.crend() - chldrn.crbegin(), graph.out_degree(vert)); + auto chldrn = graph.Children(vert); + BOOST_CHECK_EQUAL(chldrn.crend() - chldrn.crbegin(), graph.OutDegree(vert)); for (auto it = chldrn.crbegin(); it != chldrn.crend(); ++it) { - if (cntr < graph.out_degree(vert)) { - BOOST_CHECK_GE(previous_chld, *it); + if (cntr < graph.OutDegree(vert)) { + BOOST_CHECK_GE(previousChld, *it); } --cntr; - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it]) - != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(outEdges[oriVert].cbegin(), outEdges[oriVert].cend(), graphPerm[*it]) + != outEdges[oriVert].cend()); - previous_chld = *it; + previousChld = *it; } } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0, 1}, @@ -450,96 +450,94 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { {6} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), inEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_par = 0; + std::size_t previousPar = 0; std::size_t cntr = 0; - for (const auto &par : graph.parents(vert)) { + for (const auto &par : graph.Parents(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_par, par); + BOOST_CHECK_LE(previousPar, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) - != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(inEdges[oriVert].cbegin(), inEdges[oriVert].cend(), graphPerm[par]) != inEdges[oriVert].cend()); - previous_par = par; + previousPar = par; ++cntr; } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { - if (cntr < graph.out_degree(vert)) { - BOOST_CHECK_GE(previous_par, *it); + if (cntr < graph.OutDegree(vert)) { + BOOST_CHECK_GE(previousPar, *it); } --cntr; - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it]) - != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(inEdges[oriVert].cbegin(), inEdges[oriVert].cend(), graphPerm[*it]) != inEdges[oriVert].cend()); - previous_par = *it; + previousPar = *it; } } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1 + inEdges[graphPerm[vert]].size()); } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } } -BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { - computational_dag_edge_idx_vector_impl_def_t graph; +BOOST_AUTO_TEST_CASE(GraphEdgeContruction) { + ComputationalDagEdgeIdxVectorImplDefT graph; - using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx; + using VertexIdx = ComputationalDagEdgeIdxVectorImplDefT::VertexIdx; - vertex_idx v1 = graph.add_vertex(1, 2, 3, 4); - vertex_idx v2 = graph.add_vertex(5, 6, 7, 8); - vertex_idx v3 = graph.add_vertex(9, 10, 11, 12); - vertex_idx v4 = graph.add_vertex(13, 14, 15, 16); - vertex_idx v5 = graph.add_vertex(17, 18, 19, 20); - vertex_idx v6 = graph.add_vertex(21, 22, 23, 24); - vertex_idx v7 = graph.add_vertex(25, 26, 27, 28); - vertex_idx v8 = graph.add_vertex(29, 30, 31, 32); + VertexIdx v1 = graph.AddVertex(1, 2, 3, 4); + VertexIdx v2 = graph.AddVertex(5, 6, 7, 8); + VertexIdx v3 = graph.AddVertex(9, 10, 11, 12); + VertexIdx v4 = graph.AddVertex(13, 14, 15, 16); + VertexIdx v5 = graph.AddVertex(17, 18, 19, 20); + VertexIdx v6 = graph.AddVertex(21, 22, 23, 24); + VertexIdx v7 = graph.AddVertex(25, 26, 27, 28); + VertexIdx v8 = graph.AddVertex(29, 30, 31, 32); - auto pair = graph.add_edge(v1, v2); + auto pair = graph.AddEdge(v1, v2); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v1, v3); + pair = graph.AddEdge(v1, v3); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v1, v4); + pair = graph.AddEdge(v1, v4); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v2, v5); + pair = graph.AddEdge(v2, v5); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v2, v7); + pair = graph.AddEdge(v2, v7); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v3, v5); + pair = graph.AddEdge(v3, v5); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v3, v6); + pair = graph.AddEdge(v3, v6); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v4, v8); + pair = graph.AddEdge(v4, v8); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v5, v8); + pair = graph.AddEdge(v5, v8); BOOST_CHECK_EQUAL(pair.second, true); - BOOST_CHECK_EQUAL(graph.num_edges(), 9); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); - Compact_Sparse_Graph copy_graph(graph.num_vertices(), edge_view(graph)); - BOOST_CHECK_EQUAL(copy_graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(copy_graph.num_edges(), 9); + CompactSparseGraph copyGraph(graph.NumVertices(), EdgeView(graph)); + BOOST_CHECK_EQUAL(copyGraph.NumVertices(), 8); + BOOST_CHECK_EQUAL(copyGraph.NumEdges(), 9); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2, 3}, {4, 6}, {4, 5}, @@ -550,16 +548,16 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { {} }); - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.out_degree(vert), out_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.OutDegree(vert), outEdges[vert].size()); std::size_t cntr = 0; - for (const auto &chld : copy_graph.children(vert)) { - BOOST_CHECK_EQUAL(chld, out_edges[vert][cntr]); + for (const auto &chld : copyGraph.Children(vert)) { + BOOST_CHECK_EQUAL(chld, outEdges[vert][cntr]); ++cntr; } } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0}, @@ -570,64 +568,63 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { {3, 4} }); - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.in_degree(vert), in_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.InDegree(vert), inEdges[vert].size()); std::size_t cntr = 0; - for (const auto &par : copy_graph.parents(vert)) { - BOOST_CHECK_EQUAL(par, in_edges[vert][cntr]); + for (const auto &par : copyGraph.Parents(vert)) { + BOOST_CHECK_EQUAL(par, inEdges[vert][cntr]); ++cntr; } } - Compact_Sparse_Graph reorder_graph(graph.num_vertices(), edge_view(graph)); - BOOST_CHECK_EQUAL(reorder_graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(reorder_graph.num_edges(), 9); + CompactSparseGraph reorderGraph(graph.NumVertices(), EdgeView(graph)); + BOOST_CHECK_EQUAL(reorderGraph.NumVertices(), 8); + BOOST_CHECK_EQUAL(reorderGraph.NumEdges(), 9); std::vector perm(8, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = reorder_graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = reorderGraph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); - for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : reorderGraph.Vertices()) { + BOOST_CHECK_EQUAL(reorderGraph.OutDegree(vert), outEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_chld = 0; + std::size_t previousChld = 0; std::size_t cntr = 0; - for (const auto &chld : reorder_graph.children(vert)) { + for (const auto &chld : reorderGraph.Children(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_chld, chld); + BOOST_CHECK_LE(previousChld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) - != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(outEdges[oriVert].cbegin(), outEdges[oriVert].cend(), graphPerm[chld]) + != outEdges[oriVert].cend()); - previous_chld = chld; + previousChld = chld; ++cntr; } } - for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : reorderGraph.Vertices()) { + BOOST_CHECK_EQUAL(reorderGraph.InDegree(vert), inEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_par = 0; + std::size_t previousPar = 0; std::size_t cntr = 0; - for (const auto &par : reorder_graph.parents(vert)) { + for (const auto &par : reorderGraph.Parents(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_par, par); + BOOST_CHECK_LE(previousPar, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) - != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(inEdges[oriVert].cbegin(), inEdges[oriVert].cend(), graphPerm[par]) != inEdges[oriVert].cend()); - previous_par = par; + previousPar = par; ++cntr; } } } -BOOST_AUTO_TEST_CASE(Graph_work_weights_keep_order) { +BOOST_AUTO_TEST_CASE(GraphWorkWeightsKeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -645,18 +642,18 @@ BOOST_AUTO_TEST_CASE(Graph_work_weights_keep_order) { std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); - Compact_Sparse_Graph graph(11, edges, ww); + CompactSparseGraph graph(11, edges, ww); - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), ww[vert]); const unsigned wt = static_cast(rand()); - graph.set_vertex_work_weight(vert, wt); - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), wt); + graph.SetVertexWorkWeight(vert, wt); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), wt); } } -BOOST_AUTO_TEST_CASE(Graph_work_weights_reorder) { +BOOST_AUTO_TEST_CASE(GraphWorkWeightsReorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -674,20 +671,20 @@ BOOST_AUTO_TEST_CASE(Graph_work_weights_reorder) { std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); - Compact_Sparse_Graph graph(11, edges, ww); + CompactSparseGraph graph(11, edges, ww); - const std::vector &graph_perm = graph.get_pullback_permutation(); + const std::vector &graphPerm = graph.GetPullbackPermutation(); - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), ww[graphPerm[vert]]); const unsigned wt = static_cast(rand()); - graph.set_vertex_work_weight(graph_perm[vert], wt); - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), wt); + graph.SetVertexWorkWeight(graphPerm[vert], wt); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), wt); } } -BOOST_AUTO_TEST_CASE(Graph_comm_weights_keep_order) { +BOOST_AUTO_TEST_CASE(GraphCommWeightsKeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -708,22 +705,22 @@ BOOST_AUTO_TEST_CASE(Graph_comm_weights_keep_order) { std::vector cw(11); std::iota(cw.begin(), cw.end(), 11); - Compact_Sparse_Graph graph(11, edges, ww, cw); + CompactSparseGraph graph(11, edges, ww, cw); - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), ww[vert]); } - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), cw[vert]); const unsigned wt = static_cast(rand()); - graph.set_vertex_comm_weight(vert, wt); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), wt); + graph.SetVertexCommWeight(vert, wt); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), wt); } } -BOOST_AUTO_TEST_CASE(Graph_comm_weights_reorder) { +BOOST_AUTO_TEST_CASE(GraphCommWeightsReorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -744,24 +741,24 @@ BOOST_AUTO_TEST_CASE(Graph_comm_weights_reorder) { std::vector cw(11); std::iota(cw.begin(), cw.end(), 11); - Compact_Sparse_Graph graph(11, edges, ww, cw); + CompactSparseGraph graph(11, edges, ww, cw); - const std::vector &graph_perm = graph.get_pullback_permutation(); + const std::vector &graphPerm = graph.GetPullbackPermutation(); - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), ww[graphPerm[vert]]); } - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), cw[graphPerm[vert]]); const unsigned wt = static_cast(rand()); - graph.set_vertex_comm_weight(graph_perm[vert], wt); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), wt); + graph.SetVertexCommWeight(graphPerm[vert], wt); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), wt); } } -BOOST_AUTO_TEST_CASE(Graph_mem_weights_keep_order) { +BOOST_AUTO_TEST_CASE(GraphMemWeightsKeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -785,23 +782,23 @@ BOOST_AUTO_TEST_CASE(Graph_mem_weights_keep_order) { std::vector mw(11); std::iota(mw.begin(), mw.end(), 22); - Compact_Sparse_Graph graph(11, edges, ww, cw, mw); + CompactSparseGraph graph(11, edges, ww, cw, mw); - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), ww[vert]); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), cw[vert]); } - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[vert]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), mw[vert]); const unsigned wt = static_cast(rand()); - graph.set_vertex_mem_weight(vert, wt); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), wt); + graph.SetVertexMemWeight(vert, wt); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), wt); } } -BOOST_AUTO_TEST_CASE(Graph_mem_weights_reorder) { +BOOST_AUTO_TEST_CASE(GraphMemWeightsReorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -825,25 +822,25 @@ BOOST_AUTO_TEST_CASE(Graph_mem_weights_reorder) { std::vector mw(11); std::iota(mw.begin(), mw.end(), 22); - Compact_Sparse_Graph graph(11, edges, ww, cw, mw); + CompactSparseGraph graph(11, edges, ww, cw, mw); - const std::vector &graph_perm = graph.get_pullback_permutation(); + const std::vector &graphPerm = graph.GetPullbackPermutation(); - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), ww[graphPerm[vert]]); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), cw[graphPerm[vert]]); } - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[graph_perm[vert]]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), mw[graphPerm[vert]]); const unsigned wt = static_cast(rand()); - graph.set_vertex_mem_weight(graph_perm[vert], wt); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), wt); + graph.SetVertexMemWeight(graphPerm[vert], wt); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), wt); } } -BOOST_AUTO_TEST_CASE(Graph_vtype_keep_order) { +BOOST_AUTO_TEST_CASE(GraphVtypeKeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -870,24 +867,24 @@ BOOST_AUTO_TEST_CASE(Graph_vtype_keep_order) { std::vector vt(11); std::iota(vt.begin(), vt.end(), 33); - Compact_Sparse_Graph graph(11, edges, ww, cw, mw, vt); + CompactSparseGraph graph(11, edges, ww, cw, mw, vt); - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[vert]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), ww[vert]); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), cw[vert]); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), mw[vert]); } - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), vt[vert]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), vt[vert]); const unsigned wt = static_cast(rand()); - graph.set_vertex_type(vert, wt); - BOOST_CHECK_EQUAL(graph.vertex_type(vert), wt); + graph.SetVertexType(vert, wt); + BOOST_CHECK_EQUAL(graph.VertexType(vert), wt); } } -BOOST_AUTO_TEST_CASE(Graph_vtype_reorder) { +BOOST_AUTO_TEST_CASE(GraphVtypeReorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -914,85 +911,85 @@ BOOST_AUTO_TEST_CASE(Graph_vtype_reorder) { std::vector vt(11); std::iota(vt.begin(), vt.end(), 33); - Compact_Sparse_Graph graph(11, edges, ww, cw, mw, vt); + CompactSparseGraph graph(11, edges, ww, cw, mw, vt); - const std::vector &graph_perm = graph.get_pullback_permutation(); + const std::vector &graphPerm = graph.GetPullbackPermutation(); - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[graph_perm[vert]]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), ww[graphPerm[vert]]); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), cw[graphPerm[vert]]); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), mw[graphPerm[vert]]); } - for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), vt[graph_perm[vert]]); + for (auto vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), vt[graphPerm[vert]]); const unsigned wt = static_cast(rand()); - graph.set_vertex_type(graph_perm[vert], wt); - BOOST_CHECK_EQUAL(graph.vertex_type(vert), wt); + graph.SetVertexType(graphPerm[vert], wt); + BOOST_CHECK_EQUAL(graph.VertexType(vert), wt); } } -BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { - computational_dag_edge_idx_vector_impl_def_t graph; +BOOST_AUTO_TEST_CASE(GraphTypeCopyContruction) { + ComputationalDagEdgeIdxVectorImplDefT graph; - using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx; + using VertexIdx = ComputationalDagEdgeIdxVectorImplDefT::VertexIdx; - vertex_idx v1 = graph.add_vertex(1, 2, 3, 4); - vertex_idx v2 = graph.add_vertex(5, 6, 7, 8); - vertex_idx v3 = graph.add_vertex(9, 10, 11, 12); - vertex_idx v4 = graph.add_vertex(13, 14, 15, 16); - vertex_idx v5 = graph.add_vertex(17, 18, 19, 20); - vertex_idx v6 = graph.add_vertex(21, 22, 23, 24); - vertex_idx v7 = graph.add_vertex(25, 26, 27, 28); - vertex_idx v8 = graph.add_vertex(29, 30, 31, 32); + VertexIdx v1 = graph.AddVertex(1, 2, 3, 4); + VertexIdx v2 = graph.AddVertex(5, 6, 7, 8); + VertexIdx v3 = graph.AddVertex(9, 10, 11, 12); + VertexIdx v4 = graph.AddVertex(13, 14, 15, 16); + VertexIdx v5 = graph.AddVertex(17, 18, 19, 20); + VertexIdx v6 = graph.AddVertex(21, 22, 23, 24); + VertexIdx v7 = graph.AddVertex(25, 26, 27, 28); + VertexIdx v8 = graph.AddVertex(29, 30, 31, 32); - auto pair = graph.add_edge(v1, v2); + auto pair = graph.AddEdge(v1, v2); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v1, v3); + pair = graph.AddEdge(v1, v3); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v1, v4); + pair = graph.AddEdge(v1, v4); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v2, v5); + pair = graph.AddEdge(v2, v5); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v2, v7); + pair = graph.AddEdge(v2, v7); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v3, v5); + pair = graph.AddEdge(v3, v5); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v3, v6); + pair = graph.AddEdge(v3, v6); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v4, v8); + pair = graph.AddEdge(v4, v8); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v5, v8); + pair = graph.AddEdge(v5, v8); BOOST_CHECK_EQUAL(pair.second, true); - BOOST_CHECK_EQUAL(graph.num_edges(), 9); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - - Compact_Sparse_Graph - copy_graph(graph); - BOOST_CHECK_EQUAL(copy_graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(copy_graph.num_edges(), 9); - - std::vector> out_edges({ + BOOST_CHECK_EQUAL(graph.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); + + CompactSparseGraph + copyGraph(graph); + BOOST_CHECK_EQUAL(copyGraph.NumVertices(), 8); + BOOST_CHECK_EQUAL(copyGraph.NumEdges(), 9); + + std::vector> outEdges({ {1, 2, 3}, {4, 6}, {4, 5}, @@ -1003,23 +1000,23 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { {} }); - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), copy_graph.vertex_work_weight(vert)); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), copy_graph.vertex_comm_weight(vert)); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), copy_graph.vertex_mem_weight(vert)); - BOOST_CHECK_EQUAL(graph.vertex_type(vert), copy_graph.vertex_type(vert)); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), copyGraph.VertexWorkWeight(vert)); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), copyGraph.VertexCommWeight(vert)); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), copyGraph.VertexMemWeight(vert)); + BOOST_CHECK_EQUAL(graph.VertexType(vert), copyGraph.VertexType(vert)); } - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.out_degree(vert), out_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.OutDegree(vert), outEdges[vert].size()); std::size_t cntr = 0; - for (const auto &chld : copy_graph.children(vert)) { - BOOST_CHECK_EQUAL(chld, out_edges[vert][cntr]); + for (const auto &chld : copyGraph.Children(vert)) { + BOOST_CHECK_EQUAL(chld, outEdges[vert][cntr]); ++cntr; } } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0}, @@ -1030,82 +1027,81 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { {3, 4} }); - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.in_degree(vert), in_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.InDegree(vert), inEdges[vert].size()); std::size_t cntr = 0; - for (const auto &par : copy_graph.parents(vert)) { - BOOST_CHECK_EQUAL(par, in_edges[vert][cntr]); + for (const auto &par : copyGraph.Parents(vert)) { + BOOST_CHECK_EQUAL(par, inEdges[vert][cntr]); ++cntr; } } - Compact_Sparse_Graph - reorder_graph(graph); - BOOST_CHECK_EQUAL(reorder_graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(reorder_graph.num_edges(), 9); + CompactSparseGraph + reorderGraph(graph); + BOOST_CHECK_EQUAL(reorderGraph.NumVertices(), 8); + BOOST_CHECK_EQUAL(reorderGraph.NumEdges(), 9); std::vector perm(8, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = reorder_graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = reorderGraph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); - for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(graph_perm[vert]), reorder_graph.vertex_work_weight(vert)); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(graph_perm[vert]), reorder_graph.vertex_comm_weight(vert)); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(graph_perm[vert]), reorder_graph.vertex_mem_weight(vert)); - BOOST_CHECK_EQUAL(graph.vertex_type(graph_perm[vert]), reorder_graph.vertex_type(vert)); + for (const auto &vert : reorderGraph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(graphPerm[vert]), reorderGraph.VertexWorkWeight(vert)); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(graphPerm[vert]), reorderGraph.VertexCommWeight(vert)); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(graphPerm[vert]), reorderGraph.VertexMemWeight(vert)); + BOOST_CHECK_EQUAL(graph.VertexType(graphPerm[vert]), reorderGraph.VertexType(vert)); } - for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : reorderGraph.Vertices()) { + BOOST_CHECK_EQUAL(reorderGraph.OutDegree(vert), outEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_chld = 0; + std::size_t previousChld = 0; std::size_t cntr = 0; - for (const auto &chld : reorder_graph.children(vert)) { + for (const auto &chld : reorderGraph.Children(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_chld, chld); + BOOST_CHECK_LE(previousChld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) - != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(outEdges[oriVert].cbegin(), outEdges[oriVert].cend(), graphPerm[chld]) + != outEdges[oriVert].cend()); - previous_chld = chld; + previousChld = chld; ++cntr; } } - for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : reorderGraph.Vertices()) { + BOOST_CHECK_EQUAL(reorderGraph.InDegree(vert), inEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_par = 0; + std::size_t previousPar = 0; std::size_t cntr = 0; - for (const auto &par : reorder_graph.parents(vert)) { + for (const auto &par : reorderGraph.Parents(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_par, par); + BOOST_CHECK_LE(previousPar, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) - != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(inEdges[oriVert].cbegin(), inEdges[oriVert].cend(), graphPerm[par]) != inEdges[oriVert].cend()); - previous_par = par; + previousPar = par; ++cntr; } } } -BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) { +BOOST_AUTO_TEST_CASE(Graph1CopyKeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -1120,20 +1116,20 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) { {1, 2} }); - Compact_Sparse_Graph graph(11, edges); - Compact_Sparse_Graph copy_graph(graph); + CompactSparseGraph graph(11, edges); + CompactSparseGraph copyGraph(graph); - BOOST_CHECK_EQUAL(copy_graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(copy_graph.num_edges(), 11); + BOOST_CHECK_EQUAL(copyGraph.NumVertices(), 11); + BOOST_CHECK_EQUAL(copyGraph.NumEdges(), 11); - std::size_t cntr_0 = 0; - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(vert, cntr_0); - ++cntr_0; + std::size_t cntr0 = 0; + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(vert, cntr0); + ++cntr0; } - BOOST_CHECK_EQUAL(copy_graph.num_vertices(), cntr_0); + BOOST_CHECK_EQUAL(copyGraph.NumVertices(), cntr0); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2}, {2, 6}, {3}, @@ -1147,16 +1143,16 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) { {} }); - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.out_degree(vert), out_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.OutDegree(vert), outEdges[vert].size()); std::size_t cntr = 0; - for (const auto &chld : copy_graph.children(vert)) { - BOOST_CHECK_EQUAL(chld, out_edges[vert][cntr]); + for (const auto &chld : copyGraph.Children(vert)) { + BOOST_CHECK_EQUAL(chld, outEdges[vert][cntr]); ++cntr; } } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0, 1}, @@ -1170,25 +1166,25 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) { {6} }); - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.in_degree(vert), in_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.InDegree(vert), inEdges[vert].size()); std::size_t cntr = 0; - for (const auto &par : copy_graph.parents(vert)) { - BOOST_CHECK_EQUAL(par, in_edges[vert][cntr]); + for (const auto &par : copyGraph.Parents(vert)) { + BOOST_CHECK_EQUAL(par, inEdges[vert][cntr]); ++cntr; } } - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.vertex_work_weight(vert), 1 + in_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.VertexWorkWeight(vert), 1 + inEdges[vert].size()); } - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.vertex_type(vert), 0); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.VertexType(vert), 0); } } -BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) { +BOOST_AUTO_TEST_CASE(Graph1MoveKeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -1203,20 +1199,20 @@ BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) { {1, 2} }); - Compact_Sparse_Graph graph(11, edges); - Compact_Sparse_Graph copy_graph(std::move(graph)); + CompactSparseGraph graph(11, edges); + CompactSparseGraph copyGraph(std::move(graph)); - BOOST_CHECK_EQUAL(copy_graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(copy_graph.num_edges(), 11); + BOOST_CHECK_EQUAL(copyGraph.NumVertices(), 11); + BOOST_CHECK_EQUAL(copyGraph.NumEdges(), 11); - std::size_t cntr_0 = 0; - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(vert, cntr_0); - ++cntr_0; + std::size_t cntr0 = 0; + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(vert, cntr0); + ++cntr0; } - BOOST_CHECK_EQUAL(copy_graph.num_vertices(), cntr_0); + BOOST_CHECK_EQUAL(copyGraph.NumVertices(), cntr0); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2}, {2, 6}, {3}, @@ -1230,16 +1226,16 @@ BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) { {} }); - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.out_degree(vert), out_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.OutDegree(vert), outEdges[vert].size()); std::size_t cntr = 0; - for (const auto &chld : copy_graph.children(vert)) { - BOOST_CHECK_EQUAL(chld, out_edges[vert][cntr]); + for (const auto &chld : copyGraph.Children(vert)) { + BOOST_CHECK_EQUAL(chld, outEdges[vert][cntr]); ++cntr; } } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0, 1}, @@ -1253,25 +1249,25 @@ BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) { {6} }); - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.in_degree(vert), in_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.InDegree(vert), inEdges[vert].size()); std::size_t cntr = 0; - for (const auto &par : copy_graph.parents(vert)) { - BOOST_CHECK_EQUAL(par, in_edges[vert][cntr]); + for (const auto &par : copyGraph.Parents(vert)) { + BOOST_CHECK_EQUAL(par, inEdges[vert][cntr]); ++cntr; } } - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.vertex_work_weight(vert), 1 + in_edges[vert].size()); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.VertexWorkWeight(vert), 1 + inEdges[vert].size()); } - for (const auto &vert : copy_graph.vertices()) { - BOOST_CHECK_EQUAL(copy_graph.vertex_type(vert), 0); + for (const auto &vert : copyGraph.Vertices()) { + BOOST_CHECK_EQUAL(copyGraph.VertexType(vert), 0); } } -BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { +BOOST_AUTO_TEST_CASE(Graph1CopyReorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -1286,25 +1282,25 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { {1, 2} }); - Compact_Sparse_Graph ori_graph(11, edges); - Compact_Sparse_Graph graph(ori_graph); + CompactSparseGraph oriGraph(11, edges); + CompactSparseGraph graph(oriGraph); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); - std::size_t cntr_0 = 0; - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(vert, cntr_0); - ++cntr_0; + std::size_t cntr0 = 0; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(vert, cntr0); + ++cntr0; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr_0); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr0); std::vector perm(11, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2}, {2, 6}, {3}, @@ -1318,26 +1314,26 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { {} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), outEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_chld = 0; + std::size_t previousChld = 0; std::size_t cntr = 0; - for (const auto &chld : graph.children(vert)) { + for (const auto &chld : graph.Children(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_chld, chld); + BOOST_CHECK_LE(previousChld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) - != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(outEdges[oriVert].cbegin(), outEdges[oriVert].cend(), graphPerm[chld]) + != outEdges[oriVert].cend()); - previous_chld = chld; + previousChld = chld; ++cntr; } } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0, 1}, @@ -1351,35 +1347,34 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { {6} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), inEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_par = 0; + std::size_t previousPar = 0; std::size_t cntr = 0; - for (const auto &par : graph.parents(vert)) { + for (const auto &par : graph.Parents(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_par, par); + BOOST_CHECK_LE(previousPar, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) - != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(inEdges[oriVert].cbegin(), inEdges[oriVert].cend(), graphPerm[par]) != inEdges[oriVert].cend()); - previous_par = par; + previousPar = par; ++cntr; } } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1 + inEdges[graphPerm[vert]].size()); } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } } -BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { +BOOST_AUTO_TEST_CASE(Graph1MoveReorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -1394,25 +1389,25 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { {1, 2} }); - Compact_Sparse_Graph ori_graph(11, edges); - Compact_Sparse_Graph graph(std::move(ori_graph)); + CompactSparseGraph oriGraph(11, edges); + CompactSparseGraph graph(std::move(oriGraph)); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); - std::size_t cntr_0 = 0; - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(vert, cntr_0); - ++cntr_0; + std::size_t cntr0 = 0; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(vert, cntr0); + ++cntr0; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr_0); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr0); std::vector perm(11, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2}, {2, 6}, {3}, @@ -1426,26 +1421,26 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { {} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), outEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_chld = 0; + std::size_t previousChld = 0; std::size_t cntr = 0; - for (const auto &chld : graph.children(vert)) { + for (const auto &chld : graph.Children(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_chld, chld); + BOOST_CHECK_LE(previousChld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) - != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(outEdges[oriVert].cbegin(), outEdges[oriVert].cend(), graphPerm[chld]) + != outEdges[oriVert].cend()); - previous_chld = chld; + previousChld = chld; ++cntr; } } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0, 1}, @@ -1459,30 +1454,29 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { {6} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), inEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_par = 0; + std::size_t previousPar = 0; std::size_t cntr = 0; - for (const auto &par : graph.parents(vert)) { + for (const auto &par : graph.Parents(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_par, par); + BOOST_CHECK_LE(previousPar, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) - != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(inEdges[oriVert].cbegin(), inEdges[oriVert].cend(), graphPerm[par]) != inEdges[oriVert].cend()); - previous_par = par; + previousPar = par; ++cntr; } } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1 + inEdges[graphPerm[vert]].size()); } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } } diff --git a/tests/compact_sparse_graph_edge_desc.cpp b/tests/compact_sparse_graph_edge_desc.cpp index a8a4957b..8e87816e 100644 --- a/tests/compact_sparse_graph_edge_desc.cpp +++ b/tests/compact_sparse_graph_edge_desc.cpp @@ -23,125 +23,125 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(Empty_Graph_keep_order) { - Compact_Sparse_Graph_EdgeDesc graph; +BOOST_AUTO_TEST_CASE(EmptyGraphKeepOrder) { + CompactSparseGraphEdgeDesc graph; - BOOST_CHECK_EQUAL(graph.num_vertices(), 0); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 0); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); - for (const auto &edge : graph.edges()) { + for (const auto &edge : graph.Edges()) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); - BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); + BOOST_CHECK_EQUAL(edge, graph.Edge(graph.Source(edge), graph.Target(edge))); } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(vert, 100); - for (const auto &edge : graph.in_edges(vert)) { + for (const auto &edge : graph.InEdges(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); } - for (const auto &edge : graph.out_edges(vert)) { + for (const auto &edge : graph.OutEdges(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); } } } -BOOST_AUTO_TEST_CASE(Empty_Graph_reorder) { - Compact_Sparse_Graph_EdgeDesc graph; +BOOST_AUTO_TEST_CASE(EmptyGraphReorder) { + CompactSparseGraphEdgeDesc graph; - BOOST_CHECK_EQUAL(graph.num_vertices(), 0); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 0); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); - for (const auto &edge : graph.edges()) { + for (const auto &edge : graph.Edges()) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); - BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); + BOOST_CHECK_EQUAL(edge, graph.Edge(graph.Source(edge), graph.Target(edge))); } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(vert, 100); - for (const auto &edge : graph.in_edges(vert)) { + for (const auto &edge : graph.InEdges(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); } - for (const auto &edge : graph.out_edges(vert)) { + for (const auto &edge : graph.OutEdges(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); } } } -BOOST_AUTO_TEST_CASE(No_Edges_Graph_keep_order) { +BOOST_AUTO_TEST_CASE(NoEdgesGraphKeepOrder) { const std::vector> edges({}); - Compact_Sparse_Graph_EdgeDesc graph(10, edges); + CompactSparseGraphEdgeDesc graph(10, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 10); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 10); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); - for (const auto &edge : graph.edges()) { + for (const auto &edge : graph.Edges()) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); - BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); + BOOST_CHECK_EQUAL(edge, graph.Edge(graph.Source(edge), graph.Target(edge))); } - std::size_t vert_counter = 0; - for (const auto &vert : graph.vertices()) { - vert_counter++; + std::size_t vertCounter = 0; + for (const auto &vert : graph.Vertices()) { + vertCounter++; - for (const auto &edge : graph.in_edges(vert)) { + for (const auto &edge : graph.InEdges(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); } - for (const auto &edge : graph.out_edges(vert)) { + for (const auto &edge : graph.OutEdges(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); } } - BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices()); + BOOST_CHECK_EQUAL(vertCounter, graph.NumVertices()); } -BOOST_AUTO_TEST_CASE(No_Edges_Graph_reorder) { +BOOST_AUTO_TEST_CASE(NoEdgesGraphReorder) { const std::vector> edges({}); - Compact_Sparse_Graph_EdgeDesc graph(10, edges); + CompactSparseGraphEdgeDesc graph(10, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 10); - BOOST_CHECK_EQUAL(graph.num_edges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 10); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); - std::size_t vert_counter = 0; - for (const auto &vert : graph.vertices()) { - vert_counter++; + std::size_t vertCounter = 0; + for (const auto &vert : graph.Vertices()) { + vertCounter++; - for (const auto &edge : graph.in_edges(vert)) { + for (const auto &edge : graph.InEdges(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); } - for (const auto &edge : graph.out_edges(vert)) { + for (const auto &edge : graph.OutEdges(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(edge, 100); } } - BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices()); + BOOST_CHECK_EQUAL(vertCounter, graph.NumVertices()); std::vector perm(10, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); } -BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { +BOOST_AUTO_TEST_CASE(LineGraphKeepOrder) { const std::vector> edges({ {0, 1}, {1, 2}, @@ -152,63 +152,63 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { {6, 7} }); - Compact_Sparse_Graph_EdgeDesc graph(8, edges); + CompactSparseGraphEdgeDesc graph(8, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(graph.num_edges(), 7); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 7); std::size_t cntr = 0; - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { BOOST_CHECK_EQUAL(vert, cntr); ++cntr; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr); - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 7) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), 1); - for (const std::size_t &chld : graph.children(vert)) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), 1); + for (const std::size_t &chld : graph.Children(vert)) { BOOST_CHECK_EQUAL(chld, vert + 1); } - auto chldren = graph.children(vert); - BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.out_degree(vert)); + auto chldren = graph.Children(vert); + BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.OutDegree(vert)); for (auto it = chldren.crbegin(); it != chldren.crend(); ++it) { BOOST_CHECK_EQUAL(*it, vert + 1); } } else { - BOOST_CHECK_EQUAL(graph.out_degree(vert), 0); - for (const std::size_t &chld : graph.children(vert)) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), 0); + for (const std::size_t &chld : graph.Children(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(chld, 100); } - auto chldren = graph.children(vert); - BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.out_degree(vert)); + auto chldren = graph.Children(vert); + BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.OutDegree(vert)); for (auto it = chldren.crbegin(); it != chldren.crend(); ++it) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(*it, 100); } } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 0) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), 1); - for (const std::size_t &par : graph.parents(vert)) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), 1); + for (const std::size_t &par : graph.Parents(vert)) { BOOST_CHECK_EQUAL(par, vert - 1); } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { BOOST_CHECK_EQUAL(*it, vert - 1); } } else { - BOOST_CHECK_EQUAL(graph.in_degree(vert), 0); - for (const std::size_t &par : graph.parents(vert)) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), 0); + for (const std::size_t &par : graph.Parents(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(par, 100); } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(*it, 100); @@ -216,68 +216,68 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 0) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 2); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 2); } else { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1); } } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } - std::size_t edge_counter = 0; - for (const auto &edge : graph.edges()) { - BOOST_CHECK_EQUAL(graph.source(edge), edge_counter); - BOOST_CHECK_EQUAL(graph.target(edge), edge_counter + 1); + std::size_t edgeCounter = 0; + for (const auto &edge : graph.Edges()) { + BOOST_CHECK_EQUAL(graph.Source(edge), edgeCounter); + BOOST_CHECK_EQUAL(graph.Target(edge), edgeCounter + 1); - BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); + BOOST_CHECK_EQUAL(edge, graph.Edge(graph.Source(edge), graph.Target(edge))); - ++edge_counter; + ++edgeCounter; } - BOOST_CHECK_EQUAL(edge_counter, graph.num_edges()); + BOOST_CHECK_EQUAL(edgeCounter, graph.NumEdges()); - edge_counter = 0; - for (const auto &edge : osp::edges(graph)) { - BOOST_CHECK_EQUAL(source(edge, graph), edge_counter); - BOOST_CHECK_EQUAL(target(edge, graph), edge_counter + 1); + edgeCounter = 0; + for (const auto &edge : osp::Edges(graph)) { + BOOST_CHECK_EQUAL(Source(edge, graph), edgeCounter); + BOOST_CHECK_EQUAL(Target(edge, graph), edgeCounter + 1); - BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); + BOOST_CHECK_EQUAL(edge, graph.Edge(graph.Source(edge), graph.Target(edge))); - ++edge_counter; + ++edgeCounter; } - BOOST_CHECK_EQUAL(edge_counter, graph.num_edges()); + BOOST_CHECK_EQUAL(edgeCounter, graph.NumEdges()); - std::size_t vert_counter = 0; - for (const auto &vert : graph.vertices()) { - for (const auto &edge : graph.in_edges(vert)) { - BOOST_CHECK_EQUAL(graph.source(edge), vert - 1); - BOOST_CHECK_EQUAL(graph.target(edge), vert); + std::size_t vertCounter = 0; + for (const auto &vert : graph.Vertices()) { + for (const auto &edge : graph.InEdges(vert)) { + BOOST_CHECK_EQUAL(graph.Source(edge), vert - 1); + BOOST_CHECK_EQUAL(graph.Target(edge), vert); } - for (const auto &edge : in_edges(vert, graph)) { - BOOST_CHECK_EQUAL(source(edge, graph), vert - 1); - BOOST_CHECK_EQUAL(target(edge, graph), vert); + for (const auto &edge : InEdges(vert, graph)) { + BOOST_CHECK_EQUAL(Source(edge, graph), vert - 1); + BOOST_CHECK_EQUAL(Target(edge, graph), vert); } - for (const auto &edge : graph.out_edges(vert)) { - BOOST_CHECK_EQUAL(graph.source(edge), vert); - BOOST_CHECK_EQUAL(graph.target(edge), vert + 1); + for (const auto &edge : graph.OutEdges(vert)) { + BOOST_CHECK_EQUAL(graph.Source(edge), vert); + BOOST_CHECK_EQUAL(graph.Target(edge), vert + 1); } - for (const auto &edge : out_edges(vert, graph)) { - BOOST_CHECK_EQUAL(source(edge, graph), vert); - BOOST_CHECK_EQUAL(target(edge, graph), vert + 1); + for (const auto &edge : OutEdges(vert, graph)) { + BOOST_CHECK_EQUAL(Source(edge, graph), vert); + BOOST_CHECK_EQUAL(Target(edge, graph), vert + 1); } - ++vert_counter; + ++vertCounter; } - BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices()); + BOOST_CHECK_EQUAL(vertCounter, graph.NumVertices()); } -BOOST_AUTO_TEST_CASE(LineGraph_reorder) { +BOOST_AUTO_TEST_CASE(LineGraphReorder) { const std::vector> edges({ {0, 1}, {1, 2}, @@ -288,62 +288,62 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) { {6, 7} }); - Compact_Sparse_Graph_EdgeDesc graph(8, edges); + CompactSparseGraphEdgeDesc graph(8, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(graph.num_edges(), 7); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 7); std::size_t cntr = 0; - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { BOOST_CHECK_EQUAL(vert, cntr); ++cntr; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr); - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 7) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), 1); - for (const std::size_t &chld : graph.children(vert)) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), 1); + for (const std::size_t &chld : graph.Children(vert)) { BOOST_CHECK_EQUAL(chld, vert + 1); } - auto chldren = graph.children(vert); - BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.out_degree(vert)); + auto chldren = graph.Children(vert); + BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.OutDegree(vert)); for (auto it = chldren.crbegin(); it != chldren.crend(); ++it) { BOOST_CHECK_EQUAL(*it, vert + 1); } } else { - BOOST_CHECK_EQUAL(graph.out_degree(vert), 0); - for (const std::size_t &chld : graph.children(vert)) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), 0); + for (const std::size_t &chld : graph.Children(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(chld, 100); } - auto chldren = graph.children(vert); - BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.out_degree(vert)); + auto chldren = graph.Children(vert); + BOOST_CHECK_EQUAL(chldren.crend() - chldren.crbegin(), graph.OutDegree(vert)); for (auto it = chldren.crbegin(); it != chldren.crend(); ++it) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(*it, 100); } } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 0) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), 1); - for (const std::size_t &par : graph.parents(vert)) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), 1); + for (const std::size_t &par : graph.Parents(vert)) { BOOST_CHECK_EQUAL(par, vert - 1); } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { BOOST_CHECK_EQUAL(*it, vert - 1); } } else { - BOOST_CHECK_EQUAL(graph.in_degree(vert), 0); - for (const std::size_t &par : graph.parents(vert)) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), 0); + for (const std::size_t &par : graph.Parents(vert)) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(par, 100); } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { BOOST_CHECK(false); BOOST_CHECK_EQUAL(*it, 100); @@ -351,55 +351,55 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) { } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { if (vert != 0) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 2); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 2); } else { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1); } } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } std::vector perm(8, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(perm[vert], graph_perm[vert]); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(perm[vert], graphPerm[vert]); } - std::size_t edge_counter = 0; - for (const auto &edge : graph.edges()) { - BOOST_CHECK_EQUAL(graph.source(edge), edge_counter); - BOOST_CHECK_EQUAL(graph.target(edge), edge_counter + 1); + std::size_t edgeCounter = 0; + for (const auto &edge : graph.Edges()) { + BOOST_CHECK_EQUAL(graph.Source(edge), edgeCounter); + BOOST_CHECK_EQUAL(graph.Target(edge), edgeCounter + 1); - BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); + BOOST_CHECK_EQUAL(edge, graph.Edge(graph.Source(edge), graph.Target(edge))); - ++edge_counter; + ++edgeCounter; } - BOOST_CHECK_EQUAL(edge_counter, graph.num_edges()); + BOOST_CHECK_EQUAL(edgeCounter, graph.NumEdges()); - std::size_t vert_counter = 0; - for (const auto &vert : graph.vertices()) { - for (const auto &edge : graph.in_edges(vert)) { - BOOST_CHECK_EQUAL(graph.source(edge), vert - 1); - BOOST_CHECK_EQUAL(graph.target(edge), vert); + std::size_t vertCounter = 0; + for (const auto &vert : graph.Vertices()) { + for (const auto &edge : graph.InEdges(vert)) { + BOOST_CHECK_EQUAL(graph.Source(edge), vert - 1); + BOOST_CHECK_EQUAL(graph.Target(edge), vert); } - for (const auto &edge : graph.out_edges(vert)) { - BOOST_CHECK_EQUAL(graph.source(edge), vert); - BOOST_CHECK_EQUAL(graph.target(edge), vert + 1); + for (const auto &edge : graph.OutEdges(vert)) { + BOOST_CHECK_EQUAL(graph.Source(edge), vert); + BOOST_CHECK_EQUAL(graph.Target(edge), vert + 1); } - ++vert_counter; + ++vertCounter; } - BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices()); + BOOST_CHECK_EQUAL(vertCounter, graph.NumVertices()); } -BOOST_AUTO_TEST_CASE(Graph1_keep_order) { +BOOST_AUTO_TEST_CASE(Graph1KeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -414,19 +414,19 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { {1, 2} }); - Compact_Sparse_Graph_EdgeDesc graph(11, edges); + CompactSparseGraphEdgeDesc graph(11, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); - std::size_t cntr_0 = 0; - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(vert, cntr_0); - ++cntr_0; + std::size_t cntr0 = 0; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(vert, cntr0); + ++cntr0; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr_0); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr0); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2}, {2, 6}, {3}, @@ -440,32 +440,32 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { {} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[vert].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), outEdges[vert].size()); std::size_t cntr = 0; - for (const auto &chld : graph.children(vert)) { - BOOST_CHECK_EQUAL(chld, out_edges[vert][cntr]); + for (const auto &chld : graph.Children(vert)) { + BOOST_CHECK_EQUAL(chld, outEdges[vert][cntr]); ++cntr; } - auto chldrn = graph.children(vert); - BOOST_CHECK_EQUAL(chldrn.crend() - chldrn.crbegin(), graph.out_degree(vert)); + auto chldrn = graph.Children(vert); + BOOST_CHECK_EQUAL(chldrn.crend() - chldrn.crbegin(), graph.OutDegree(vert)); for (auto it = chldrn.crbegin(); it != chldrn.crend(); ++it) { --cntr; - BOOST_CHECK_EQUAL(*it, out_edges[vert][cntr]); + BOOST_CHECK_EQUAL(*it, outEdges[vert][cntr]); } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { std::size_t cntr = 0; - for (const auto &edge : graph.out_edges(vert)) { - BOOST_CHECK_EQUAL(graph.source(edge), vert); - BOOST_CHECK_EQUAL(graph.target(edge), out_edges[vert][cntr]); + for (const auto &edge : graph.OutEdges(vert)) { + BOOST_CHECK_EQUAL(graph.Source(edge), vert); + BOOST_CHECK_EQUAL(graph.Target(edge), outEdges[vert][cntr]); ++cntr; } - BOOST_CHECK_EQUAL(cntr, graph.out_degree(vert)); + BOOST_CHECK_EQUAL(cntr, graph.OutDegree(vert)); } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0, 1}, @@ -479,60 +479,60 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { {6} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[vert].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), inEdges[vert].size()); std::size_t cntr = 0; - for (const auto &par : graph.parents(vert)) { - BOOST_CHECK_EQUAL(par, in_edges[vert][cntr]); + for (const auto &par : graph.Parents(vert)) { + BOOST_CHECK_EQUAL(par, inEdges[vert][cntr]); ++cntr; } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { --cntr; - BOOST_CHECK_EQUAL(*it, in_edges[vert][cntr]); + BOOST_CHECK_EQUAL(*it, inEdges[vert][cntr]); } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { std::size_t cntr = 0; - for (const auto &edge : graph.in_edges(vert)) { - BOOST_CHECK_EQUAL(graph.source(edge), in_edges[vert][cntr]); - BOOST_CHECK_EQUAL(graph.target(edge), vert); + for (const auto &edge : graph.InEdges(vert)) { + BOOST_CHECK_EQUAL(graph.Source(edge), inEdges[vert][cntr]); + BOOST_CHECK_EQUAL(graph.Target(edge), vert); ++cntr; } - BOOST_CHECK_EQUAL(cntr, graph.in_degree(vert)); + BOOST_CHECK_EQUAL(cntr, graph.InDegree(vert)); } - std::size_t edge_cntr = 0; - for (const auto &edge : graph.edges()) { - BOOST_CHECK_EQUAL(edge, edge_cntr); + std::size_t edgeCntr = 0; + for (const auto &edge : graph.Edges()) { + BOOST_CHECK_EQUAL(edge, edgeCntr); - BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); + BOOST_CHECK_EQUAL(edge, graph.Edge(graph.Source(edge), graph.Target(edge))); - ++edge_cntr; + ++edgeCntr; } - BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges()); + BOOST_CHECK_EQUAL(edgeCntr, graph.NumEdges()); - edge_cntr = 0; - for (const auto &vert : graph.vertices()) { - for (const auto &edge : graph.out_edges(vert)) { - BOOST_CHECK_EQUAL(edge, edge_cntr); - ++edge_cntr; + edgeCntr = 0; + for (const auto &vert : graph.Vertices()) { + for (const auto &edge : graph.OutEdges(vert)) { + BOOST_CHECK_EQUAL(edge, edgeCntr); + ++edgeCntr; } } - BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges()); + BOOST_CHECK_EQUAL(edgeCntr, graph.NumEdges()); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[vert].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1 + inEdges[vert].size()); } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } } -BOOST_AUTO_TEST_CASE(Graph1_reorder) { +BOOST_AUTO_TEST_CASE(Graph1Reorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -547,24 +547,24 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { {1, 2} }); - Compact_Sparse_Graph_EdgeDesc graph(11, edges); + CompactSparseGraphEdgeDesc graph(11, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); - std::size_t cntr_0 = 0; - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(vert, cntr_0); - ++cntr_0; + std::size_t cntr0 = 0; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(vert, cntr0); + ++cntr0; } - BOOST_CHECK_EQUAL(graph.num_vertices(), cntr_0); + BOOST_CHECK_EQUAL(graph.NumVertices(), cntr0); std::vector perm(11, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); - std::vector> out_edges({ + std::vector> outEdges({ {1, 2}, {2, 6}, {3}, @@ -578,48 +578,48 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { {} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.OutDegree(vert), outEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_chld = 0; + std::size_t previousChld = 0; std::size_t cntr = 0; - for (const auto &chld : graph.children(vert)) { + for (const auto &chld : graph.Children(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_chld, chld); + BOOST_CHECK_LE(previousChld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) - != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(outEdges[oriVert].cbegin(), outEdges[oriVert].cend(), graphPerm[chld]) + != outEdges[oriVert].cend()); - previous_chld = chld; + previousChld = chld; ++cntr; } - auto chldrn = graph.children(vert); - BOOST_CHECK_EQUAL(chldrn.crend() - chldrn.crbegin(), graph.out_degree(vert)); + auto chldrn = graph.Children(vert); + BOOST_CHECK_EQUAL(chldrn.crend() - chldrn.crbegin(), graph.OutDegree(vert)); for (auto it = chldrn.crbegin(); it != chldrn.crend(); ++it) { - if (cntr < graph.out_degree(vert)) { - BOOST_CHECK_GE(previous_chld, *it); + if (cntr < graph.OutDegree(vert)) { + BOOST_CHECK_GE(previousChld, *it); } --cntr; - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it]) - != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(outEdges[oriVert].cbegin(), outEdges[oriVert].cend(), graphPerm[*it]) + != outEdges[oriVert].cend()); - previous_chld = *it; + previousChld = *it; } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { std::size_t cntr = 0; - for (const auto &edge : graph.out_edges(vert)) { - BOOST_CHECK_EQUAL(graph.source(edge), vert); + for (const auto &edge : graph.OutEdges(vert)) { + BOOST_CHECK_EQUAL(graph.Source(edge), vert); ++cntr; } - BOOST_CHECK_EQUAL(cntr, graph.out_degree(vert)); + BOOST_CHECK_EQUAL(cntr, graph.OutDegree(vert)); } - std::vector> in_edges({ + std::vector> inEdges({ {}, {0}, {0, 1}, @@ -633,76 +633,74 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { {6} }); - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size()); - std::size_t ori_vert = graph_perm[vert]; + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.InDegree(vert), inEdges[graphPerm[vert]].size()); + std::size_t oriVert = graphPerm[vert]; - std::size_t previous_par = 0; + std::size_t previousPar = 0; std::size_t cntr = 0; - for (const auto &par : graph.parents(vert)) { + for (const auto &par : graph.Parents(vert)) { if (cntr > 0) { - BOOST_CHECK_LE(previous_par, par); + BOOST_CHECK_LE(previousPar, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) - != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(inEdges[oriVert].cbegin(), inEdges[oriVert].cend(), graphPerm[par]) != inEdges[oriVert].cend()); - previous_par = par; + previousPar = par; ++cntr; } - auto prnts = graph.parents(vert); - BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.in_degree(vert)); + auto prnts = graph.Parents(vert); + BOOST_CHECK_EQUAL(prnts.crend() - prnts.crbegin(), graph.InDegree(vert)); for (auto it = prnts.crbegin(); it != prnts.crend(); ++it) { - if (cntr < graph.out_degree(vert)) { - BOOST_CHECK_GE(previous_par, *it); + if (cntr < graph.OutDegree(vert)) { + BOOST_CHECK_GE(previousPar, *it); } --cntr; - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it]) - != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(inEdges[oriVert].cbegin(), inEdges[oriVert].cend(), graphPerm[*it]) != inEdges[oriVert].cend()); - previous_par = *it; + previousPar = *it; } } - for (const auto &vert : graph.vertices()) { + for (const auto &vert : graph.Vertices()) { std::size_t cntr = 0; - for (const auto &edge : graph.in_edges(vert)) { - BOOST_CHECK_EQUAL(graph.target(edge), vert); + for (const auto &edge : graph.InEdges(vert)) { + BOOST_CHECK_EQUAL(graph.Target(edge), vert); ++cntr; } - BOOST_CHECK_EQUAL(cntr, graph.in_degree(vert)); + BOOST_CHECK_EQUAL(cntr, graph.InDegree(vert)); } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size()); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), 1 + inEdges[graphPerm[vert]].size()); } - for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); + for (const auto &vert : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexType(vert), 0); } - std::size_t edge_cntr = 0; - for (const auto &edge : graph.edges()) { - BOOST_CHECK_EQUAL(edge, edge_cntr); + std::size_t edgeCntr = 0; + for (const auto &edge : graph.Edges()) { + BOOST_CHECK_EQUAL(edge, edgeCntr); - BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); + BOOST_CHECK_EQUAL(edge, graph.Edge(graph.Source(edge), graph.Target(edge))); - ++edge_cntr; + ++edgeCntr; } - BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges()); + BOOST_CHECK_EQUAL(edgeCntr, graph.NumEdges()); - edge_cntr = 0; - for (const auto &vert : graph.vertices()) { - for (const auto &edge : graph.out_edges(vert)) { - BOOST_CHECK_EQUAL(edge, edge_cntr); - ++edge_cntr; + edgeCntr = 0; + for (const auto &vert : graph.Vertices()) { + for (const auto &edge : graph.OutEdges(vert)) { + BOOST_CHECK_EQUAL(edge, edgeCntr); + ++edgeCntr; } } - BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges()); + BOOST_CHECK_EQUAL(edgeCntr, graph.NumEdges()); } -BOOST_AUTO_TEST_CASE(Graph1_e_comm_keep_order) { +BOOST_AUTO_TEST_CASE(Graph1ECommKeepOrder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -716,31 +714,31 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_keep_order) { {3, 7}, {1, 2} }); - const std::vector edge_weights({3, 6, 12, 874, 134, 67, 234, 980, 123, 152, 34}); + const std::vector edgeWeights({3, 6, 12, 874, 134, 67, 234, 980, 123, 152, 34}); - Compact_Sparse_Graph_EdgeDesc graph(11, edges); + CompactSparseGraphEdgeDesc graph(11, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); for (std::size_t i = 0; i < edges.size(); ++i) { const auto &[src, tgt] = edges[i]; - graph.set_edge_comm_weight(src, tgt, edge_weights[i]); + graph.SetEdgeCommWeight(src, tgt, edgeWeights[i]); } - for (const auto &edge : graph.edges()) { - const auto src = graph.source(edge); - const auto tgt = graph.target(edge); + for (const auto &edge : graph.Edges()) { + const auto src = graph.Source(edge); + const auto tgt = graph.Target(edge); auto it = std::find(edges.cbegin(), edges.cend(), std::make_pair(src, tgt)); BOOST_CHECK(it != edges.cend()); auto ind = std::distance(edges.cbegin(), it); - BOOST_CHECK_EQUAL(edge_weights[static_cast(ind)], graph.edge_comm_weight(edge)); + BOOST_CHECK_EQUAL(edgeWeights[static_cast(ind)], graph.EdgeCommWeight(edge)); } } -BOOST_AUTO_TEST_CASE(Graph1_e_comm_reorder) { +BOOST_AUTO_TEST_CASE(Graph1ECommReorder) { const std::vector> edges({ {0, 1}, {2, 3}, @@ -754,31 +752,31 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_reorder) { {3, 7}, {1, 2} }); - const std::vector edge_weights({3, 6, 12, 874, 134, 67, 234, 980, 123, 152, 34}); + const std::vector edgeWeights({3, 6, 12, 874, 134, 67, 234, 980, 123, 152, 34}); - Compact_Sparse_Graph_EdgeDesc graph(11, edges); + CompactSparseGraphEdgeDesc graph(11, edges); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); std::vector perm(11, 0); std::iota(perm.begin(), perm.end(), 0); - const std::vector &graph_perm = graph.get_pullback_permutation(); - BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); + const std::vector &graphPerm = graph.GetPullbackPermutation(); + BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graphPerm.cbegin(), graphPerm.cend())); for (std::size_t i = 0; i < edges.size(); ++i) { const auto &[src, tgt] = edges[i]; - graph.set_edge_comm_weight(src, tgt, edge_weights[i]); + graph.SetEdgeCommWeight(src, tgt, edgeWeights[i]); } - for (const auto &edge : graph.edges()) { - const auto src = graph_perm[graph.source(edge)]; - const auto tgt = graph_perm[graph.target(edge)]; + for (const auto &edge : graph.Edges()) { + const auto src = graphPerm[graph.Source(edge)]; + const auto tgt = graphPerm[graph.Target(edge)]; auto it = std::find(edges.cbegin(), edges.cend(), std::make_pair(src, tgt)); BOOST_CHECK(it != edges.cend()); auto ind = std::distance(edges.cbegin(), it); - BOOST_CHECK_EQUAL(edge_weights[static_cast(ind)], graph.edge_comm_weight(edge)); + BOOST_CHECK_EQUAL(edgeWeights[static_cast(ind)], graph.EdgeCommWeight(edge)); } } diff --git a/tests/connected_components_part.cpp b/tests/connected_components_part.cpp index 57031311..6944c637 100644 --- a/tests/connected_components_part.cpp +++ b/tests/connected_components_part.cpp @@ -27,97 +27,97 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(ConnectedComponentPart_test) { - BspInstance instance; - computational_dag_vector_impl_def_int_t &dag = instance.getComputationalDag(); - using VertexType = vertex_idx_t; +BOOST_AUTO_TEST_CASE(ConnectedComponentPartTest) { + BspInstance instance; + ComputationalDagVectorImplDefIntT &dag = instance.GetComputationalDag(); + using VertexType = VertexIdxT; - BOOST_CHECK_EQUAL(dag.num_vertices(), 0); - BOOST_CHECK_EQUAL(dag.num_edges(), 0); + BOOST_CHECK_EQUAL(dag.NumVertices(), 0); + BOOST_CHECK_EQUAL(dag.NumEdges(), 0); - VertexType v1 = dag.add_vertex(2, 1, 2); - VertexType v2 = dag.add_vertex(3, 1, 2); - VertexType v3 = dag.add_vertex(4, 1, 2); - VertexType v4 = dag.add_vertex(5, 1, 2); - VertexType v5 = dag.add_vertex(6, 1, 2); - VertexType v6 = dag.add_vertex(7, 1, 2); - VertexType v7 = dag.add_vertex(8, 1, 2); - VertexType v8 = dag.add_vertex(9, 1, 2); + VertexType v1 = dag.AddVertex(2, 1, 2); + VertexType v2 = dag.AddVertex(3, 1, 2); + VertexType v3 = dag.AddVertex(4, 1, 2); + VertexType v4 = dag.AddVertex(5, 1, 2); + VertexType v5 = dag.AddVertex(6, 1, 2); + VertexType v6 = dag.AddVertex(7, 1, 2); + VertexType v7 = dag.AddVertex(8, 1, 2); + VertexType v8 = dag.AddVertex(9, 1, 2); - BOOST_CHECK_EQUAL(dag.num_vertices(), 8); - BOOST_CHECK_EQUAL(dag.num_edges(), 0); + BOOST_CHECK_EQUAL(dag.NumVertices(), 8); + BOOST_CHECK_EQUAL(dag.NumEdges(), 0); - dag.add_edge(v1, v2); - dag.add_edge(v1, v3); - dag.add_edge(v1, v4); - dag.add_edge(v2, v5); - dag.add_edge(v3, v6); - dag.add_edge(v3, v5); - dag.add_edge(v2, v7); - dag.add_edge(v5, v8); - dag.add_edge(v4, v8); + dag.AddEdge(v1, v2); + dag.AddEdge(v1, v3); + dag.AddEdge(v1, v4); + dag.AddEdge(v2, v5); + dag.AddEdge(v3, v6); + dag.AddEdge(v3, v5); + dag.AddEdge(v2, v7); + dag.AddEdge(v5, v8); + dag.AddEdge(v4, v8); - ConnectedComponentDivider partitioner; + ConnectedComponentDivider partitioner; - partitioner.divide(dag); + partitioner.Divide(dag); - GreedyBspScheduler bsp_scheduler; - ConnectedComponentScheduler scheduler(bsp_scheduler); + GreedyBspScheduler bspScheduler; + ConnectedComponentScheduler scheduler(bspScheduler); - BspArchitecture arch = instance.getArchitecture(); - arch.setNumberOfProcessors(6); + BspArchitecture arch = instance.GetArchitecture(); + arch.SetNumberOfProcessors(6); - BspSchedule schedule(instance); - auto status = scheduler.computeSchedule(schedule); + BspSchedule schedule(instance); + auto status = scheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(status, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(status, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BOOST_CHECK(partitioner.get_sub_dags().size() == 1); - BOOST_CHECK(partitioner.get_sub_dags()[0].num_vertices() == 8); - BOOST_CHECK(partitioner.get_sub_dags()[0].num_edges() == 9); + BOOST_CHECK(partitioner.GetSubDags().size() == 1); + BOOST_CHECK(partitioner.GetSubDags()[0].NumVertices() == 8); + BOOST_CHECK(partitioner.GetSubDags()[0].NumEdges() == 9); for (unsigned i = 0; i < 8; i++) { - BOOST_CHECK_EQUAL(partitioner.get_component()[i], 0); - BOOST_CHECK(partitioner.get_vertex_map()[i] <= i + 1); - BOOST_CHECK(partitioner.get_vertex_mapping()[0].at(i) <= 1 + i); + BOOST_CHECK_EQUAL(partitioner.GetComponent()[i], 0); + BOOST_CHECK(partitioner.GetVertexMap()[i] <= i + 1); + BOOST_CHECK(partitioner.GetVertexMapping()[0].at(i) <= 1 + i); } - VertexType v9 = dag.add_vertex(2, 1, 4); - VertexType v10 = dag.add_vertex(3, 1, 6); - VertexType v11 = dag.add_vertex(4, 1, 6); - VertexType v12 = dag.add_vertex(5, 1, 6); + VertexType v9 = dag.AddVertex(2, 1, 4); + VertexType v10 = dag.AddVertex(3, 1, 6); + VertexType v11 = dag.AddVertex(4, 1, 6); + VertexType v12 = dag.AddVertex(5, 1, 6); - dag.add_edge(v9, v10); - dag.add_edge(v9, v11); - dag.add_edge(v9, v12); - dag.add_edge(v10, v11); + dag.AddEdge(v9, v10); + dag.AddEdge(v9, v11); + dag.AddEdge(v9, v12); + dag.AddEdge(v10, v11); - partitioner.compute_connected_components(dag); + partitioner.ComputeConnectedComponents(dag); - BOOST_CHECK_EQUAL(partitioner.get_sub_dags().size(), 2); - BOOST_CHECK_EQUAL(partitioner.get_sub_dags()[0].num_vertices(), 8); - BOOST_CHECK_EQUAL(partitioner.get_sub_dags()[0].num_edges(), 9); - BOOST_CHECK_EQUAL(partitioner.get_sub_dags()[1].num_vertices(), 4); - BOOST_CHECK_EQUAL(partitioner.get_sub_dags()[1].num_edges(), 4); + BOOST_CHECK_EQUAL(partitioner.GetSubDags().size(), 2); + BOOST_CHECK_EQUAL(partitioner.GetSubDags()[0].NumVertices(), 8); + BOOST_CHECK_EQUAL(partitioner.GetSubDags()[0].NumEdges(), 9); + BOOST_CHECK_EQUAL(partitioner.GetSubDags()[1].NumVertices(), 4); + BOOST_CHECK_EQUAL(partitioner.GetSubDags()[1].NumEdges(), 4); for (unsigned i = 0; i < 8; i++) { - BOOST_CHECK_EQUAL(partitioner.get_component()[i], 0); - BOOST_CHECK(partitioner.get_vertex_map()[i] <= i + 1); - BOOST_CHECK(partitioner.get_vertex_mapping()[0].at(i) <= 1 + i); + BOOST_CHECK_EQUAL(partitioner.GetComponent()[i], 0); + BOOST_CHECK(partitioner.GetVertexMap()[i] <= i + 1); + BOOST_CHECK(partitioner.GetVertexMapping()[0].at(i) <= 1 + i); } for (unsigned i = 8; i < 12; i++) { - BOOST_CHECK_EQUAL(partitioner.get_component()[i], 1); - BOOST_CHECK(partitioner.get_vertex_map()[i] <= 1 + i - 8); - BOOST_CHECK(partitioner.get_vertex_mapping()[1].at(i - 8) <= 1 + i); + BOOST_CHECK_EQUAL(partitioner.GetComponent()[i], 1); + BOOST_CHECK(partitioner.GetVertexMap()[i] <= 1 + i - 8); + BOOST_CHECK(partitioner.GetVertexMapping()[1].at(i - 8) <= 1 + i); } - BspInstance instance_new(dag, arch); - BspSchedule schedule_new(instance_new); + BspInstance instanceNew(dag, arch); + BspSchedule scheduleNew(instanceNew); - auto status_new = scheduler.computeSchedule(schedule_new); + auto statusNew = scheduler.ComputeSchedule(scheduleNew); - BOOST_CHECK_EQUAL(status_new, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule_new.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(statusNew, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(scheduleNew.SatisfiesPrecedenceConstraints()); } diff --git a/tests/cost_evaluation.cpp b/tests/cost_evaluation.cpp index 9375f4c8..0acf2c4f 100644 --- a/tests/cost_evaluation.cpp +++ b/tests/cost_evaluation.cpp @@ -29,42 +29,42 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_cost_models_simple_dag) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - - BspInstance instance; - instance.setNumberOfProcessors(2); - instance.setCommunicationCosts(10); - instance.setSynchronisationCosts(5); - - auto &dag = instance.getComputationalDag(); - dag.add_vertex(10, 1, 0); - dag.add_vertex(20, 2, 0); - dag.add_vertex(30, 3, 0); - dag.add_vertex(40, 4, 0); - dag.add_vertex(50, 5, 0); - dag.add_edge(0, 1); - dag.add_edge(0, 2); - dag.add_edge(1, 4); - dag.add_edge(2, 3); - dag.add_edge(3, 4); - - BspSchedule schedule(instance); - - schedule.setAssignedProcessor(0, 0); - schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedProcessor(1, 0); - schedule.setAssignedSuperstep(1, 1); - schedule.setAssignedProcessor(2, 1); - schedule.setAssignedSuperstep(2, 1); - schedule.setAssignedProcessor(3, 1); - schedule.setAssignedSuperstep(3, 2); - schedule.setAssignedProcessor(4, 1); - schedule.setAssignedSuperstep(4, 3); - schedule.updateNumberOfSupersteps(); - - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 4); +BOOST_AUTO_TEST_CASE(TestCostModelsSimpleDag) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + + BspInstance instance; + instance.SetNumberOfProcessors(2); + instance.SetCommunicationCosts(10); + instance.SetSynchronisationCosts(5); + + auto &dag = instance.GetComputationalDag(); + dag.AddVertex(10, 1, 0); + dag.AddVertex(20, 2, 0); + dag.AddVertex(30, 3, 0); + dag.AddVertex(40, 4, 0); + dag.AddVertex(50, 5, 0); + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); + dag.AddEdge(1, 4); + dag.AddEdge(2, 3); + dag.AddEdge(3, 4); + + BspSchedule schedule(instance); + + schedule.SetAssignedProcessor(0, 0); + schedule.SetAssignedSuperstep(0, 0); + schedule.SetAssignedProcessor(1, 0); + schedule.SetAssignedSuperstep(1, 1); + schedule.SetAssignedProcessor(2, 1); + schedule.SetAssignedSuperstep(2, 1); + schedule.SetAssignedProcessor(3, 1); + schedule.SetAssignedSuperstep(3, 2); + schedule.SetAssignedProcessor(4, 1); + schedule.SetAssignedSuperstep(4, 3); + schedule.UpdateNumberOfSupersteps(); + + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 4); // Work cost (BSP model) = sum of max work per superstep across processors // SS0: max(P0=10, P1=0) = 10 @@ -72,7 +72,7 @@ BOOST_AUTO_TEST_CASE(test_cost_models_simple_dag) { // SS2: max(P0=0, P1=40) = 40 // SS3: max(P0=0, P1=50) = 50 // Total work = 10 + 30 + 40 + 50 = 130 - BOOST_CHECK_EQUAL(schedule.computeWorkCosts(), 130); + BOOST_CHECK_EQUAL(schedule.ComputeWorkCosts(), 130); // LazyCommunicationCost // Sends/receives at step_needed - staleness (staleness=1) @@ -82,7 +82,7 @@ BOOST_AUTO_TEST_CASE(test_cost_models_simple_dag) { // Comm = 10 + 20 = 30 // Syncs = 2 * L = 2 * 5 = 10 (only steps with comm) // Total = 30 + 10 + 130 = 170 - BOOST_CHECK_EQUAL(LazyCommunicationCost()(schedule), 170); + BOOST_CHECK_EQUAL(LazyCommunicationCost()(schedule), 170); // BufferedSendingCost // Send at producer step, receive at step_needed - staleness @@ -94,7 +94,7 @@ BOOST_AUTO_TEST_CASE(test_cost_models_simple_dag) { // Comm = 10 + 20 + 20 = 50 // Syncs = 3 * L = 3 * 5 = 15 (all steps with comm) // Total = 50 + 15 + 130 = 195 - BOOST_CHECK_EQUAL(BufferedSendingCost()(schedule), 195); + BOOST_CHECK_EQUAL(BufferedSendingCost()(schedule), 195); // TotalCommunicationCost // Sum of cross-processor edge comm weights * g / P @@ -104,7 +104,7 @@ BOOST_AUTO_TEST_CASE(test_cost_models_simple_dag) { // Work = 130 // Sync = 3 * 5 = 15 (number_of_supersteps - 1) // Total = 15 + 130 + 15 = 160 - BOOST_CHECK_EQUAL(TotalCommunicationCost()(schedule), 160); + BOOST_CHECK_EQUAL(TotalCommunicationCost()(schedule), 160); // TotalLambdaCommunicationCost // For each node, sum comm_weight * sendCosts over unique target processors @@ -113,8 +113,8 @@ BOOST_AUTO_TEST_CASE(test_cost_models_simple_dag) { // Node 1 (P0, cw=2): target_procs={P1} → 2*1 = 2 // Node 2 (P1, cw=3): target_procs={P1} → 3*0 = 0 // Node 3 (P1, cw=4): target_procs={P1} → 4*0 = 0 - // comm_costs = 1+2+0+0 = 3, comm_cost = 3 * (1/2) * 10 = 15 + // comm_costs = 1+2+0+0 = 3, commCost = 3 * (1/2) * 10 = 15 // Work = 130, Sync = 3 * 5 = 15 // Total = 15 + 130 + 15 = 160 - BOOST_CHECK_EQUAL(TotalLambdaCommunicationCost()(schedule), 160); + BOOST_CHECK_EQUAL(TotalLambdaCommunicationCost()(schedule), 160); } diff --git a/tests/cuthill_mckee.cpp b/tests/cuthill_mckee.cpp index 89cf42f0..2da653d1 100644 --- a/tests/cuthill_mckee.cpp +++ b/tests/cuthill_mckee.cpp @@ -29,106 +29,104 @@ limitations under the License. using namespace osp; -using ComputationalDag = boost_graph_int_t; -using VertexType = vertex_idx_t; +using ComputationalDag = BoostGraphIntT; +using VertexType = VertexIdxT; -BOOST_AUTO_TEST_CASE(cuthill_mckee_1) { +BOOST_AUTO_TEST_CASE(CuthillMckee1) { ComputationalDag dag; - dag.add_vertex(2, 9); - dag.add_vertex(3, 8); - dag.add_vertex(4, 7); - dag.add_vertex(5, 6); - dag.add_vertex(6, 5); - dag.add_vertex(7, 4); - dag.add_vertex(8, 3); - dag.add_vertex(9, 2); - - dag.add_edge(0, 1, 2); - dag.add_edge(0, 2, 3); - dag.add_edge(0, 3, 4); - dag.add_edge(1, 4, 5); - dag.add_edge(2, 4, 6); - dag.add_edge(2, 5, 7); - dag.add_edge(1, 6, 8); - dag.add_edge(4, 7, 9); - dag.add_edge(3, 7, 9); - - std::vector cm_wavefront = cuthill_mckee_wavefront(dag); - std::vector expected_cm_wavefront = {0, 3, 1, 2, 6, 4, 5, 7}; + dag.AddVertex(2, 9); + dag.AddVertex(3, 8); + dag.AddVertex(4, 7); + dag.AddVertex(5, 6); + dag.AddVertex(6, 5); + dag.AddVertex(7, 4); + dag.AddVertex(8, 3); + dag.AddVertex(9, 2); + + dag.AddEdge(0, 1, 2); + dag.AddEdge(0, 2, 3); + dag.AddEdge(0, 3, 4); + dag.AddEdge(1, 4, 5); + dag.AddEdge(2, 4, 6); + dag.AddEdge(2, 5, 7); + dag.AddEdge(1, 6, 8); + dag.AddEdge(4, 7, 9); + dag.AddEdge(3, 7, 9); + + std::vector cmWavefront = CuthillMckeeWavefront(dag); + std::vector expectedCmWavefront = {0, 3, 1, 2, 6, 4, 5, 7}; + BOOST_CHECK_EQUAL_COLLECTIONS(cmWavefront.begin(), cmWavefront.end(), expectedCmWavefront.begin(), expectedCmWavefront.end()); + + cmWavefront = CuthillMckeeWavefront(dag, true); + expectedCmWavefront = {0, 2, 3, 1, 5, 6, 4, 7}; + + BOOST_CHECK_EQUAL_COLLECTIONS(cmWavefront.begin(), cmWavefront.end(), expectedCmWavefront.begin(), expectedCmWavefront.end()); + + std::vector cmUndirected; + std::vector expectedCmUndirected; + + cmUndirected = CuthillMckeeUndirected(dag, true); + expectedCmUndirected = {7, 3, 4, 0, 1, 2, 6, 5}; BOOST_CHECK_EQUAL_COLLECTIONS( - cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(), expected_cm_wavefront.end()); + cmUndirected.begin(), cmUndirected.end(), expectedCmUndirected.begin(), expectedCmUndirected.end()); - cm_wavefront = cuthill_mckee_wavefront(dag, true); - expected_cm_wavefront = {0, 2, 3, 1, 5, 6, 4, 7}; - - BOOST_CHECK_EQUAL_COLLECTIONS( - cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(), expected_cm_wavefront.end()); - - std::vector cm_undirected; - std::vector expected_cm_undirected; - - cm_undirected = cuthill_mckee_undirected(dag, true); - expected_cm_undirected = {7, 3, 4, 0, 1, 2, 6, 5}; - BOOST_CHECK_EQUAL_COLLECTIONS( - cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); - - cm_undirected = cuthill_mckee_undirected(dag, false); - expected_cm_undirected = {0, 3, 1, 2, 7, 6, 4, 5}; + cmUndirected = CuthillMckeeUndirected(dag, false); + expectedCmUndirected = {0, 3, 1, 2, 7, 6, 4, 5}; BOOST_CHECK_EQUAL_COLLECTIONS( - cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); + cmUndirected.begin(), cmUndirected.end(), expectedCmUndirected.begin(), expectedCmUndirected.end()); - cm_undirected = cuthill_mckee_undirected(dag, true, true); - expected_cm_undirected = {3, 4, 5, 1, 2, 7, 6, 0}; + cmUndirected = CuthillMckeeUndirected(dag, true, true); + expectedCmUndirected = {3, 4, 5, 1, 2, 7, 6, 0}; BOOST_CHECK_EQUAL_COLLECTIONS( - cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); + cmUndirected.begin(), cmUndirected.end(), expectedCmUndirected.begin(), expectedCmUndirected.end()); - std::vector top_sort; - for (const auto &vertex : priority_vec_top_sort_view(dag, cm_undirected)) { - top_sort.push_back(vertex); + std::vector topSort; + for (const auto &vertex : PriorityVecTopSortView(dag, cmUndirected)) { + topSort.push_back(vertex); } - std::vector expected_top_sort = {0, 2, 5, 1, 6, 4, 3, 7}; + std::vector expectedTopSort = {0, 2, 5, 1, 6, 4, 3, 7}; - BOOST_CHECK_EQUAL_COLLECTIONS(top_sort.begin(), top_sort.end(), expected_top_sort.begin(), expected_top_sort.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(topSort.begin(), topSort.end(), expectedTopSort.begin(), expectedTopSort.end()); - cm_undirected = cuthill_mckee_undirected(dag, false, true); - expected_cm_undirected = {0, 2, 3, 1, 6, 7, 5, 4}; + cmUndirected = CuthillMckeeUndirected(dag, false, true); + expectedCmUndirected = {0, 2, 3, 1, 6, 7, 5, 4}; BOOST_CHECK_EQUAL_COLLECTIONS( - cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); + cmUndirected.begin(), cmUndirected.end(), expectedCmUndirected.begin(), expectedCmUndirected.end()); - dag.add_edge(8, 9); - dag.add_edge(9, 10); + dag.AddEdge(8, 9); + dag.AddEdge(9, 10); - cm_undirected = cuthill_mckee_undirected(dag, true); - expected_cm_undirected = {7, 3, 4, 0, 1, 2, 6, 5, 10, 9, 8}; + cmUndirected = CuthillMckeeUndirected(dag, true); + expectedCmUndirected = {7, 3, 4, 0, 1, 2, 6, 5, 10, 9, 8}; BOOST_CHECK_EQUAL_COLLECTIONS( - cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); + cmUndirected.begin(), cmUndirected.end(), expectedCmUndirected.begin(), expectedCmUndirected.end()); - cm_undirected = cuthill_mckee_undirected(dag, false); - expected_cm_undirected = {0, 3, 1, 2, 7, 6, 4, 5, 8, 9, 10}; + cmUndirected = CuthillMckeeUndirected(dag, false); + expectedCmUndirected = {0, 3, 1, 2, 7, 6, 4, 5, 8, 9, 10}; BOOST_CHECK_EQUAL_COLLECTIONS( - cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); + cmUndirected.begin(), cmUndirected.end(), expectedCmUndirected.begin(), expectedCmUndirected.end()); } -bool is_permutation(const std::vector &vec) { - std::vector sorted_vec = vec; - std::sort(sorted_vec.begin(), sorted_vec.end()); - for (unsigned i = 0; i < sorted_vec.size(); ++i) { - if (sorted_vec[i] != i) { +bool IsPermutation(const std::vector &vec) { + std::vector sortedVec = vec; + std::sort(sortedVec.begin(), sortedVec.end()); + for (unsigned i = 0; i < sortedVec.size(); ++i) { + if (sortedVec[i] != i) { return false; } } return true; } -bool is_top_sort(const std::vector &vec, const ComputationalDag &dag) { +bool IsTopSort(const std::vector &vec, const ComputationalDag &dag) { std::unordered_map position; for (VertexType i = 0; i < vec.size(); ++i) { position[vec[i]] = i; } - for (const auto &vertex : dag.vertices()) { - for (const auto &child : dag.children(vertex)) { + for (const auto &vertex : dag.Vertices()) { + for (const auto &child : dag.Children(vertex)) { if (position[vertex] > position[child]) { return false; } @@ -138,8 +136,8 @@ bool is_top_sort(const std::vector &vec, const ComputationalDag &dag return true; } -BOOST_AUTO_TEST_CASE(cuthill_mckee_2) { - std::vector filenames_graph = tiny_spaa_graphs(); +BOOST_AUTO_TEST_CASE(CuthillMckee2) { + std::vector filenamesGraph = TinySpaaGraphs(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -149,33 +147,33 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_2) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { + for (auto &filenameGraph : filenamesGraph) { ComputationalDag graph; - auto status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), graph); + auto statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), graph); - if (!status_graph) { + if (!statusGraph) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } else { - std::cout << "File read:" << filename_graph << std::endl; + std::cout << "File read:" << filenameGraph << std::endl; } - std::vector wavefront = cuthill_mckee_wavefront(graph); - BOOST_CHECK(is_permutation(wavefront)); + std::vector wavefront = CuthillMckeeWavefront(graph); + BOOST_CHECK(IsPermutation(wavefront)); - wavefront = cuthill_mckee_wavefront(graph, true); - BOOST_CHECK(is_permutation(wavefront)); + wavefront = CuthillMckeeWavefront(graph, true); + BOOST_CHECK(IsPermutation(wavefront)); - const auto cm_undirected = cuthill_mckee_undirected(graph, true, true); - BOOST_CHECK(is_permutation(cm_undirected)); + const auto cmUndirected = CuthillMckeeUndirected(graph, true, true); + BOOST_CHECK(IsPermutation(cmUndirected)); - std::vector top_sort; + std::vector topSort; - for (const auto &vertex : priority_vec_top_sort_view(graph, cm_undirected)) { - top_sort.push_back(vertex); + for (const auto &vertex : PriorityVecTopSortView(graph, cmUndirected)) { + topSort.push_back(vertex); } - BOOST_CHECK(is_permutation(top_sort)); - BOOST_CHECK(is_top_sort(top_sort, graph)); + BOOST_CHECK(IsPermutation(topSort)); + BOOST_CHECK(IsTopSort(topSort, graph)); } } diff --git a/tests/debug_merkle_divider.cpp b/tests/debug_merkle_divider.cpp index a9a7ed1e..7d0f752f 100644 --- a/tests/debug_merkle_divider.cpp +++ b/tests/debug_merkle_divider.cpp @@ -24,7 +24,7 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp" #include "osp/bsp/scheduler/Serial.hpp" #include "osp/coarser/coarser_util.hpp" #include "osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp" @@ -33,10 +33,10 @@ limitations under the License. using namespace osp; template -void check_partition_type_homogeneity(const GraphT &dag, const std::vector> &partition) { +void CheckPartitionTypeHomogeneity(const GraphT &dag, const std::vector> &partition) { // Group partitions by their ID - std::map, std::vector>> partitions; - for (vertex_idx_t i = 0; i < dag.num_vertices(); ++i) { + std::map, std::vector>> partitions; + for (VertexIdxT i = 0; i < dag.NumVertices(); ++i) { partitions[partition[i]].push_back(i); } @@ -45,9 +45,9 @@ void check_partition_type_homogeneity(const GraphT &dag, const std::vector instance; - if (!file_reader::readComputationalDagDotFormat(dot_file_path, instance.getComputationalDag())) { - std::cerr << "Failed to read graph from " << dot_file_path << std::endl; + BspInstance instance; + if (!file_reader::ReadComputationalDagDotFormat(dotFilePath, instance.GetComputationalDag())) { + std::cerr << "Failed to read graph from " << dotFilePath << std::endl; return 1; } - std::cout << "Graph loaded successfully. " << instance.numberOfVertices() << " vertices." << std::endl; + std::cout << "Graph loaded successfully. " << instance.NumberOfVertices() << " vertices." << std::endl; - for (auto v : instance.getComputationalDag().vertices()) { - instance.getComputationalDag().set_vertex_comm_weight( - v, static_cast>(instance.getComputationalDag().vertex_comm_weight(v) * 0.01)); + for (auto v : instance.GetComputationalDag().Vertices()) { + instance.GetComputationalDag().SetVertexCommWeight( + v, static_cast>(instance.GetComputationalDag().VertexCommWeight(v) * 0.01)); } // Set up architecture - instance.getArchitecture().SetProcessorsConsequTypes({24, 48}, {100, 100}); - instance.setDiagonalCompatibilityMatrix(2); - instance.setSynchronisationCosts(2000); - instance.setCommunicationCosts(1); + instance.GetArchitecture().SetProcessorsConsequTypes({24, 48}, {100, 100}); + instance.SetDiagonalCompatibilityMatrix(2); + instance.SetSynchronisationCosts(2000); + instance.SetCommunicationCosts(1); // Set up the scheduler - GrowLocalAutoCores growlocal; - BspLocking locking; - GreedyChildren children; - kl_total_lambda_comm_improver kl(42); - kl.setSuperstepRemoveStrengthParameter(1.0); - kl.setTimeQualityParameter(1.0); - ComboScheduler growlocal_kl(growlocal, kl); - ComboScheduler locking_kl(locking, kl); - ComboScheduler children_kl(children, kl); - - GreedyMetaScheduler scheduler; + GrowLocalAutoCores growlocal; + BspLocking locking; + GreedyChildren children; + KlTotalCommImprover kl(42); + kl.SetSuperstepRemoveStrengthParameter(1.0); + kl.SetTimeQualityParameter(1.0); + ComboScheduler growlocalKl(growlocal, kl); + ComboScheduler lockingKl(locking, kl); + ComboScheduler childrenKl(children, kl); + + GreedyMetaScheduler scheduler; // scheduler.addScheduler(growlocal_kl); - scheduler.addScheduler(locking_kl); - scheduler.addScheduler(children_kl); - scheduler.addSerialScheduler(); - - IsomorphicSubgraphScheduler iso_scheduler(scheduler); - iso_scheduler.setMergeDifferentTypes(false); - iso_scheduler.setWorkThreshold(100); - iso_scheduler.setCriticalPathThreshold(500); - iso_scheduler.setOrbitLockRatio(0.5); - iso_scheduler.setAllowTrimmedScheduler(false); - iso_scheduler.set_plot_dot_graphs(true); // Enable plotting for debug + scheduler.AddScheduler(lockingKl); + scheduler.AddScheduler(childrenKl); + scheduler.AddSerialScheduler(); + + IsomorphicSubgraphScheduler isoScheduler(scheduler); + isoScheduler.SetMergeDifferentTypes(false); + isoScheduler.SetWorkThreshold(100); + isoScheduler.SetCriticalPathThreshold(500); + isoScheduler.SetOrbitLockRatio(0.5); + isoScheduler.SetAllowTrimmedScheduler(false); + isoScheduler.SetPlotDotGraphs(true); // Enable plotting for debug std::cout << "Starting partition computation..." << std::endl; // This is the call that is expected to throw the exception - auto partition = iso_scheduler.compute_partition(instance); + auto partition = isoScheduler.ComputePartition(instance); - check_partition_type_homogeneity(instance.getComputationalDag(), partition); + CheckPartitionTypeHomogeneity(instance.GetComputationalDag(), partition); - graph_t corase_graph; - coarser_util::construct_coarse_dag(instance.getComputationalDag(), corase_graph, partition); - bool acyc = is_acyclic(corase_graph); + GraphT coraseGraph; + coarser_util::ConstructCoarseDag(instance.GetComputationalDag(), coraseGraph, partition); + bool acyc = IsAcyclic(coraseGraph); std::cout << "Partition is " << (acyc ? "acyclic." : "not acyclic."); std::cout << "Partition computation finished." << std::endl; - std::cout << "Generated " << std::set>(partition.begin(), partition.end()).size() << " partitions." + std::cout << "Generated " << std::set>(partition.begin(), partition.end()).size() << " partitions." << std::endl; return 0; diff --git a/tests/directed_graph_algorithms.cpp b/tests/directed_graph_algorithms.cpp index da141811..9cfe95b4 100644 --- a/tests/directed_graph_algorithms.cpp +++ b/tests/directed_graph_algorithms.cpp @@ -37,45 +37,45 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) { - using graph_t = boost_graph_int_t; +BOOST_AUTO_TEST_CASE(LongestEdgeTriangleParallel) { + using GraphT = BoostGraphIntT; // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = large_spaa_graphs(); + std::vector filenamesGraph = LargeSpaaGraphs(); - const auto project_root = get_project_root(); + const auto projectRoot = GetProjectRoot(); - for (auto &filename_graph : filenames_graph) { - graph_t graph; + for (auto &filenameGraph : filenamesGraph) { + GraphT graph; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((project_root / filename_graph).string(), graph); + bool statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((projectRoot / filenameGraph).string(), graph); - BOOST_CHECK(status_graph); + BOOST_CHECK(statusGraph); - auto start_time = std::chrono::high_resolution_clock::now(); - auto deleted_edges = long_edges_in_triangles(graph); - auto finish_time = std::chrono::high_resolution_clock::now(); + auto startTime = std::chrono::high_resolution_clock::now(); + auto deletedEdges = LongEdgesInTriangles(graph); + auto finishTime = std::chrono::high_resolution_clock::now(); - std::cout << "\n" << filename_graph << std::endl; + std::cout << "\n" << filenameGraph << std::endl; std::cout << "Time for long_edges_in_triangles: " - << std::chrono::duration_cast(finish_time - start_time).count() << "ms" << std::endl; + << std::chrono::duration_cast(finishTime - startTime).count() << "ms" << std::endl; - start_time = std::chrono::high_resolution_clock::now(); - auto deleted_edges_parallel = long_edges_in_triangles_parallel(graph); - finish_time = std::chrono::high_resolution_clock::now(); + startTime = std::chrono::high_resolution_clock::now(); + auto deletedEdgesParallel = LongEdgesInTrianglesParallel(graph); + finishTime = std::chrono::high_resolution_clock::now(); std::cout << "Time for long_edges_in_triangles_parallel: " - << std::chrono::duration_cast(finish_time - start_time).count() << "ms" << std::endl; + << std::chrono::duration_cast(finishTime - startTime).count() << "ms" << std::endl; - BOOST_CHECK_EQUAL(deleted_edges.size(), deleted_edges_parallel.size()); + BOOST_CHECK_EQUAL(deletedEdges.size(), deletedEdgesParallel.size()); - for (const auto &edge : deleted_edges) { - BOOST_CHECK(deleted_edges_parallel.find(edge) != deleted_edges_parallel.cend()); + for (const auto &edge : deletedEdges) { + BOOST_CHECK(deletedEdgesParallel.find(edge) != deletedEdgesParallel.cend()); } - for (const auto &edge : deleted_edges_parallel) { - BOOST_CHECK(deleted_edges.find(edge) != deleted_edges.cend()); + for (const auto &edge : deletedEdgesParallel) { + BOOST_CHECK(deletedEdges.find(edge) != deletedEdges.cend()); } } } diff --git a/tests/directed_graph_top_sort.cpp b/tests/directed_graph_top_sort.cpp index 3b2703f9..e1c2cea8 100644 --- a/tests/directed_graph_top_sort.cpp +++ b/tests/directed_graph_top_sort.cpp @@ -34,42 +34,42 @@ limitations under the License. using namespace osp; -computational_dag_vector_impl_def_t constr_graph_1() { - computational_dag_vector_impl_def_t graph; - - using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; - - vertex_idx v1 = graph.add_vertex(1, 2, 3, 4); - vertex_idx v2 = graph.add_vertex(5, 6, 7, 8); - vertex_idx v3 = graph.add_vertex(9, 10, 11, 12); - vertex_idx v4 = graph.add_vertex(13, 14, 15, 16); - vertex_idx v5 = graph.add_vertex(17, 18, 19, 20); - vertex_idx v6 = graph.add_vertex(21, 22, 23, 24); - vertex_idx v7 = graph.add_vertex(25, 26, 27, 28); - vertex_idx v8 = graph.add_vertex(29, 30, 31, 32); - - graph.add_edge(v1, v2); - graph.add_edge(v1, v3); - graph.add_edge(v1, v4); - graph.add_edge(v2, v5); - - graph.add_edge(v3, v5); - graph.add_edge(v3, v6); - graph.add_edge(v2, v7); - graph.add_edge(v5, v8); - graph.add_edge(v4, v8); +ComputationalDagVectorImplDefUnsignedT ConstrGraph1() { + ComputationalDagVectorImplDefUnsignedT graph; + + using VertexIdx = ComputationalDagVectorImplDefUnsignedT::VertexIdx; + + VertexIdx v1 = graph.AddVertex(1, 2, 3, 4); + VertexIdx v2 = graph.AddVertex(5, 6, 7, 8); + VertexIdx v3 = graph.AddVertex(9, 10, 11, 12); + VertexIdx v4 = graph.AddVertex(13, 14, 15, 16); + VertexIdx v5 = graph.AddVertex(17, 18, 19, 20); + VertexIdx v6 = graph.AddVertex(21, 22, 23, 24); + VertexIdx v7 = graph.AddVertex(25, 26, 27, 28); + VertexIdx v8 = graph.AddVertex(29, 30, 31, 32); + + graph.AddEdge(v1, v2); + graph.AddEdge(v1, v3); + graph.AddEdge(v1, v4); + graph.AddEdge(v2, v5); + + graph.AddEdge(v3, v5); + graph.AddEdge(v3, v6); + graph.AddEdge(v2, v7); + graph.AddEdge(v5, v8); + graph.AddEdge(v4, v8); return graph; } -BOOST_AUTO_TEST_CASE(test_util_1) { - const computational_dag_vector_impl_def_t graph = constr_graph_1(); +BOOST_AUTO_TEST_CASE(TestUtil1) { + const ComputationalDagVectorImplDefUnsignedT graph = ConstrGraph1(); - // using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; + // using VertexIdx = ComputationalDagVectorImplDefUnsignedT::VertexIdx; } BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; const std::vector> out({ {7}, @@ -86,195 +86,195 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { const std::vector workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); const std::vector commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); - const boost_graph_int_t graph(out, workW, commW); - const boost_graph_int_t graph_empty; + const BoostGraphIntT graph(out, workW, commW); + const BoostGraphIntT graphEmpty; - std::vector top_order; - std::vector index_in_top_order; + std::vector topOrder; + std::vector indexInTopOrder; - top_order = GetTopOrderReverse(graph); + topOrder = GetTopOrderReverse(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_GT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_GT(indexInTopOrder[i], indexInTopOrder[j]); } } - top_order = GetTopOrderMaxChildren(graph); + topOrder = GetTopOrderMaxChildren(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - top_order = GetTopOrderRandom(graph); + topOrder = GetTopOrderRandom(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - top_order = GetTopOrderMinIndex(graph); + topOrder = GetTopOrderMinIndex(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - top_order = GetTopOrderGorder(graph); + topOrder = GetTopOrderGorder(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - top_order = GetTopOrder(graph); + topOrder = GetTopOrder(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); - BOOST_CHECK(GetTopOrder(graph_empty).size() == graph_empty.num_vertices()); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); + BOOST_CHECK(GetTopOrder(graphEmpty).size() == graphEmpty.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } size_t idx = 0; - std::vector bfs_view_top_sort; - for (const auto &v : bfs_top_sort_view(graph)) { - bfs_view_top_sort.push_back(v); - BOOST_CHECK_EQUAL(top_order[idx], v); + std::vector bfsViewTopSort; + for (const auto &v : BfsTopSortView(graph)) { + bfsViewTopSort.push_back(v); + BOOST_CHECK_EQUAL(topOrder[idx], v); ++idx; } - BOOST_CHECK_EQUAL(bfs_view_top_sort.size(), graph.num_vertices()); + BOOST_CHECK_EQUAL(bfsViewTopSort.size(), graph.NumVertices()); - index_in_top_order = sorting_arrangement(bfs_view_top_sort); - for (const auto &i : bfs_view_top_sort) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + indexInTopOrder = SortingArrangement(bfsViewTopSort); + for (const auto &i : bfsViewTopSort) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - std::vector dfs_view_top_sort; - for (const auto &v : top_sort_view(graph)) { - dfs_view_top_sort.push_back(v); + std::vector dfsViewTopSort; + for (const auto &v : TopSortView(graph)) { + dfsViewTopSort.push_back(v); } - BOOST_CHECK_EQUAL(dfs_view_top_sort.size(), graph.num_vertices()); + BOOST_CHECK_EQUAL(dfsViewTopSort.size(), graph.NumVertices()); - index_in_top_order = sorting_arrangement(dfs_view_top_sort); - for (const auto &i : dfs_view_top_sort) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + indexInTopOrder = SortingArrangement(dfsViewTopSort); + for (const auto &i : dfsViewTopSort) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - BOOST_CHECK_EQUAL(dfs_view_top_sort[0], 9); - BOOST_CHECK_EQUAL(dfs_view_top_sort[1], 5); - BOOST_CHECK_EQUAL(dfs_view_top_sort[2], 6); - BOOST_CHECK_EQUAL(dfs_view_top_sort[3], 1); - BOOST_CHECK_EQUAL(dfs_view_top_sort[4], 8); - BOOST_CHECK_EQUAL(dfs_view_top_sort[5], 4); - BOOST_CHECK_EQUAL(dfs_view_top_sort[6], 3); - BOOST_CHECK_EQUAL(dfs_view_top_sort[7], 2); - BOOST_CHECK_EQUAL(dfs_view_top_sort[8], 0); - BOOST_CHECK_EQUAL(dfs_view_top_sort[9], 7); - - std::vector loc_view_top_sort; - - for (const auto &v : locality_top_sort_view(graph)) { - loc_view_top_sort.push_back(v); + BOOST_CHECK_EQUAL(dfsViewTopSort[0], 9); + BOOST_CHECK_EQUAL(dfsViewTopSort[1], 5); + BOOST_CHECK_EQUAL(dfsViewTopSort[2], 6); + BOOST_CHECK_EQUAL(dfsViewTopSort[3], 1); + BOOST_CHECK_EQUAL(dfsViewTopSort[4], 8); + BOOST_CHECK_EQUAL(dfsViewTopSort[5], 4); + BOOST_CHECK_EQUAL(dfsViewTopSort[6], 3); + BOOST_CHECK_EQUAL(dfsViewTopSort[7], 2); + BOOST_CHECK_EQUAL(dfsViewTopSort[8], 0); + BOOST_CHECK_EQUAL(dfsViewTopSort[9], 7); + + std::vector locViewTopSort; + + for (const auto &v : LocalityTopSortView(graph)) { + locViewTopSort.push_back(v); } - BOOST_CHECK_EQUAL(loc_view_top_sort.size(), graph.num_vertices()); + BOOST_CHECK_EQUAL(locViewTopSort.size(), graph.NumVertices()); - index_in_top_order = sorting_arrangement(loc_view_top_sort); - for (const auto &i : loc_view_top_sort) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + indexInTopOrder = SortingArrangement(locViewTopSort); + for (const auto &i : locViewTopSort) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - BOOST_CHECK_EQUAL(loc_view_top_sort[0], 3); - BOOST_CHECK_EQUAL(loc_view_top_sort[1], 8); - BOOST_CHECK_EQUAL(loc_view_top_sort[2], 4); - BOOST_CHECK_EQUAL(loc_view_top_sort[3], 9); - BOOST_CHECK_EQUAL(loc_view_top_sort[4], 5); - BOOST_CHECK_EQUAL(loc_view_top_sort[5], 6); - BOOST_CHECK_EQUAL(loc_view_top_sort[6], 1); - BOOST_CHECK_EQUAL(loc_view_top_sort[7], 2); - BOOST_CHECK_EQUAL(loc_view_top_sort[8], 0); - BOOST_CHECK_EQUAL(loc_view_top_sort[9], 7); - - std::vector max_children_view_top_sort; - for (const auto &v : max_children_top_sort_view(graph)) { - max_children_view_top_sort.push_back(v); + BOOST_CHECK_EQUAL(locViewTopSort[0], 3); + BOOST_CHECK_EQUAL(locViewTopSort[1], 8); + BOOST_CHECK_EQUAL(locViewTopSort[2], 4); + BOOST_CHECK_EQUAL(locViewTopSort[3], 9); + BOOST_CHECK_EQUAL(locViewTopSort[4], 5); + BOOST_CHECK_EQUAL(locViewTopSort[5], 6); + BOOST_CHECK_EQUAL(locViewTopSort[6], 1); + BOOST_CHECK_EQUAL(locViewTopSort[7], 2); + BOOST_CHECK_EQUAL(locViewTopSort[8], 0); + BOOST_CHECK_EQUAL(locViewTopSort[9], 7); + + std::vector maxChildrenViewTopSort; + for (const auto &v : MaxChildrenTopSortView(graph)) { + maxChildrenViewTopSort.push_back(v); } - BOOST_CHECK_EQUAL(max_children_view_top_sort.size(), graph.num_vertices()); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort.size(), graph.NumVertices()); - index_in_top_order = sorting_arrangement(max_children_view_top_sort); - for (const auto &i : max_children_view_top_sort) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + indexInTopOrder = SortingArrangement(maxChildrenViewTopSort); + for (const auto &i : maxChildrenViewTopSort) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - BOOST_CHECK_EQUAL(max_children_view_top_sort[0], 9); - BOOST_CHECK_EQUAL(max_children_view_top_sort[1], 6); - BOOST_CHECK_EQUAL(max_children_view_top_sort[2], 5); - BOOST_CHECK_EQUAL(max_children_view_top_sort[3], 3); - BOOST_CHECK_EQUAL(max_children_view_top_sort[4], 2); - BOOST_CHECK_EQUAL(max_children_view_top_sort[5], 0); - BOOST_CHECK_EQUAL(max_children_view_top_sort[6], 8); - BOOST_CHECK_EQUAL(max_children_view_top_sort[7], 1); - BOOST_CHECK_EQUAL(max_children_view_top_sort[8], 4); - BOOST_CHECK_EQUAL(max_children_view_top_sort[9], 7); - - std::vector random_view_top_sort; - for (const auto &v : random_top_sort_view(graph)) { - random_view_top_sort.push_back(v); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[0], 9); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[1], 6); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[2], 5); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[3], 3); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[4], 2); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[5], 0); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[6], 8); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[7], 1); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[8], 4); + BOOST_CHECK_EQUAL(maxChildrenViewTopSort[9], 7); + + std::vector randomViewTopSort; + for (const auto &v : RandomTopSortView(graph)) { + randomViewTopSort.push_back(v); } - BOOST_CHECK_EQUAL(random_view_top_sort.size(), graph.num_vertices()); + BOOST_CHECK_EQUAL(randomViewTopSort.size(), graph.NumVertices()); - index_in_top_order = sorting_arrangement(random_view_top_sort); + indexInTopOrder = SortingArrangement(randomViewTopSort); - for (const auto &i : random_view_top_sort) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : randomViewTopSort) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } } -BOOST_AUTO_TEST_CASE(top_sort_template_overload_csr) { - using VertexType = vertex_idx_t; +BOOST_AUTO_TEST_CASE(TopSortTemplateOverloadCsr) { + using VertexType = VertexIdxT; const std::vector> out({ {7}, @@ -291,22 +291,22 @@ BOOST_AUTO_TEST_CASE(top_sort_template_overload_csr) { const std::vector workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); const std::vector commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); - const boost_graph_int_t graph(out, workW, commW); + const BoostGraphIntT graph(out, workW, commW); - Compact_Sparse_Graph graph_csr(graph); + CompactSparseGraph graphCsr(graph); - BOOST_CHECK_EQUAL(graph_csr.num_vertices(), 10); - BOOST_CHECK_EQUAL(graph_csr.num_edges(), 12); + BOOST_CHECK_EQUAL(graphCsr.NumVertices(), 10); + BOOST_CHECK_EQUAL(graphCsr.NumEdges(), 12); - auto top_order = GetTopOrder(graph_csr); - BOOST_CHECK_EQUAL(top_order.size(), graph_csr.num_vertices()); + auto topOrder = GetTopOrder(graphCsr); + BOOST_CHECK_EQUAL(topOrder.size(), graphCsr.NumVertices()); - std::vector expected_top_order{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; + std::vector expectedTopOrder{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; size_t idx = 0; - for (const auto &v : top_sort_view(graph_csr)) { - BOOST_CHECK_EQUAL(top_order[idx], v); - BOOST_CHECK_EQUAL(expected_top_order[idx], v); + for (const auto &v : TopSortView(graphCsr)) { + BOOST_CHECK_EQUAL(topOrder[idx], v); + BOOST_CHECK_EQUAL(expectedTopOrder[idx], v); ++idx; } } diff --git a/tests/directed_graph_util.cpp b/tests/directed_graph_util.cpp index fe2c53bc..b69c2a67 100644 --- a/tests/directed_graph_util.cpp +++ b/tests/directed_graph_util.cpp @@ -34,90 +34,90 @@ limitations under the License. using namespace osp; -computational_dag_vector_impl_def_t constr_graph_1() { - computational_dag_vector_impl_def_t graph; - - using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; - - vertex_idx v1 = graph.add_vertex(1, 2, 3, 4); - vertex_idx v2 = graph.add_vertex(5, 6, 7, 8); - vertex_idx v3 = graph.add_vertex(9, 10, 11, 12); - vertex_idx v4 = graph.add_vertex(13, 14, 15, 16); - vertex_idx v5 = graph.add_vertex(17, 18, 19, 20); - vertex_idx v6 = graph.add_vertex(21, 22, 23, 24); - vertex_idx v7 = graph.add_vertex(25, 26, 27, 28); - vertex_idx v8 = graph.add_vertex(29, 30, 31, 32); - - graph.add_edge(v1, v2); - graph.add_edge(v1, v3); - graph.add_edge(v1, v4); - graph.add_edge(v2, v5); - - graph.add_edge(v3, v5); - graph.add_edge(v3, v6); - graph.add_edge(v2, v7); - graph.add_edge(v5, v8); - graph.add_edge(v4, v8); +ComputationalDagVectorImplDefUnsignedT ConstrGraph1() { + ComputationalDagVectorImplDefUnsignedT graph; + + using VertexIdx = ComputationalDagVectorImplDefUnsignedT::VertexIdx; + + VertexIdx v1 = graph.AddVertex(1, 2, 3, 4); + VertexIdx v2 = graph.AddVertex(5, 6, 7, 8); + VertexIdx v3 = graph.AddVertex(9, 10, 11, 12); + VertexIdx v4 = graph.AddVertex(13, 14, 15, 16); + VertexIdx v5 = graph.AddVertex(17, 18, 19, 20); + VertexIdx v6 = graph.AddVertex(21, 22, 23, 24); + VertexIdx v7 = graph.AddVertex(25, 26, 27, 28); + VertexIdx v8 = graph.AddVertex(29, 30, 31, 32); + + graph.AddEdge(v1, v2); + graph.AddEdge(v1, v3); + graph.AddEdge(v1, v4); + graph.AddEdge(v2, v5); + + graph.AddEdge(v3, v5); + graph.AddEdge(v3, v6); + graph.AddEdge(v2, v7); + graph.AddEdge(v5, v8); + graph.AddEdge(v4, v8); return graph; } -BOOST_AUTO_TEST_CASE(test_empty_graph) { - computational_dag_vector_impl_def_t graph; +BOOST_AUTO_TEST_CASE(TestEmptyGraph) { + ComputationalDagVectorImplDefUnsignedT graph; - using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; + using VertexIdx = ComputationalDagVectorImplDefUnsignedT::VertexIdx; - BOOST_CHECK_EQUAL(graph.num_edges(), 0); - BOOST_CHECK_EQUAL(graph.num_vertices(), 0); + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 0); - std::vector sources = source_vertices(graph); + std::vector sources = SourceVertices(graph); BOOST_CHECK_EQUAL(sources.size(), 0); - std::vector sinks = sink_vertices(graph); + std::vector sinks = SinkVertices(graph); BOOST_CHECK_EQUAL(sinks.size(), 0); - BOOST_CHECK_EQUAL(is_acyclic(graph), true); - BOOST_CHECK_EQUAL(is_connected(graph), true); + BOOST_CHECK_EQUAL(IsAcyclic(graph), true); + BOOST_CHECK_EQUAL(IsConnected(graph), true); } -BOOST_AUTO_TEST_CASE(test_util_1) { - computational_dag_vector_impl_def_t graph = constr_graph_1(); +BOOST_AUTO_TEST_CASE(TestUtil1) { + ComputationalDagVectorImplDefUnsignedT graph = ConstrGraph1(); - using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; + using VertexIdx = ComputationalDagVectorImplDefUnsignedT::VertexIdx; - BOOST_CHECK_EQUAL(graph.num_edges(), 9); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); - std::vector sources = source_vertices(graph); + std::vector sources = SourceVertices(graph); BOOST_CHECK_EQUAL(sources.size(), 1); BOOST_CHECK_EQUAL(sources[0], 0); - std::vector sources_s; - for (const auto &v : source_vertices_view(graph)) { - sources_s.push_back(v); + std::vector sourcesS; + for (const auto &v : SourceVerticesView(graph)) { + sourcesS.push_back(v); } - BOOST_CHECK_EQUAL(sources_s.size(), 1); - BOOST_CHECK_EQUAL(sources_s[0], 0); + BOOST_CHECK_EQUAL(sourcesS.size(), 1); + BOOST_CHECK_EQUAL(sourcesS[0], 0); - std::vector sinks = sink_vertices(graph); + std::vector sinks = SinkVertices(graph); BOOST_CHECK_EQUAL(sinks.size(), 3); BOOST_CHECK_EQUAL(sinks[0], 5); BOOST_CHECK_EQUAL(sinks[1], 6); BOOST_CHECK_EQUAL(sinks[2], 7); - std::vector sinks_s; - for (const auto &v : sink_vertices_view(graph)) { - sinks_s.push_back(v); + std::vector sinksS; + for (const auto &v : SinkVerticesView(graph)) { + sinksS.push_back(v); } - BOOST_CHECK_EQUAL(sinks_s.size(), 3); - BOOST_CHECK_EQUAL(sinks_s[0], 5); - BOOST_CHECK_EQUAL(sinks_s[1], 6); - BOOST_CHECK_EQUAL(sinks_s[2], 7); + BOOST_CHECK_EQUAL(sinksS.size(), 3); + BOOST_CHECK_EQUAL(sinksS[0], 5); + BOOST_CHECK_EQUAL(sinksS[1], 6); + BOOST_CHECK_EQUAL(sinksS[2], 7); - std::vector bfs; + std::vector bfs; - for (const auto &v : bfs_view(graph, 1)) { + for (const auto &v : BfsView(graph, 1)) { bfs.push_back(v); } @@ -127,24 +127,24 @@ BOOST_AUTO_TEST_CASE(test_util_1) { BOOST_CHECK_EQUAL(bfs[2], 6); BOOST_CHECK_EQUAL(bfs[3], 7); - auto t = successors(1, graph); + auto t = Successors(1, graph); BOOST_CHECK_EQUAL_COLLECTIONS(bfs.begin(), bfs.end(), t.begin(), t.end()); bfs.clear(); - for (const auto &v : bfs_view(graph, 5)) { + for (const auto &v : BfsView(graph, 5)) { bfs.push_back(v); } BOOST_CHECK_EQUAL(bfs.size(), 1); BOOST_CHECK_EQUAL(bfs[0], 5); - t = successors(5, graph); + t = Successors(5, graph); BOOST_CHECK_EQUAL_COLLECTIONS(bfs.begin(), bfs.end(), t.begin(), t.end()); bfs.clear(); - for (const auto &v : bfs_view(graph, 0)) { + for (const auto &v : BfsView(graph, 0)) { bfs.push_back(v); } @@ -158,12 +158,12 @@ BOOST_AUTO_TEST_CASE(test_util_1) { BOOST_CHECK_EQUAL(bfs[6], 5); BOOST_CHECK_EQUAL(bfs[7], 7); - t = successors(0, graph); + t = Successors(0, graph); BOOST_CHECK_EQUAL_COLLECTIONS(bfs.begin(), bfs.end(), t.begin(), t.end()); - std::vector dfs; + std::vector dfs; - for (const auto &v : dfs_view(graph, 1)) { + for (const auto &v : DfsView(graph, 1)) { dfs.push_back(v); } @@ -174,7 +174,7 @@ BOOST_AUTO_TEST_CASE(test_util_1) { BOOST_CHECK_EQUAL(dfs[3], 7); dfs.clear(); - for (const auto &v : dfs_view(graph, 5)) { + for (const auto &v : DfsView(graph, 5)) { dfs.push_back(v); } @@ -183,7 +183,7 @@ BOOST_AUTO_TEST_CASE(test_util_1) { dfs.clear(); - for (const auto &v : dfs_view(graph, 0)) { + for (const auto &v : DfsView(graph, 0)) { dfs.push_back(v); } @@ -197,230 +197,230 @@ BOOST_AUTO_TEST_CASE(test_util_1) { BOOST_CHECK_EQUAL(dfs[6], 1); BOOST_CHECK_EQUAL(dfs[7], 6); - std::vector bfs_reverse; + std::vector bfsReverse; - for (const auto &v : bfs_reverse_view(graph, 1)) { - bfs_reverse.push_back(v); + for (const auto &v : BfsReverseView(graph, 1)) { + bfsReverse.push_back(v); } - BOOST_CHECK_EQUAL(bfs_reverse.size(), 2); - BOOST_CHECK_EQUAL(bfs_reverse[0], 1); - BOOST_CHECK_EQUAL(bfs_reverse[1], 0); + BOOST_CHECK_EQUAL(bfsReverse.size(), 2); + BOOST_CHECK_EQUAL(bfsReverse[0], 1); + BOOST_CHECK_EQUAL(bfsReverse[1], 0); - t = ancestors(1, graph); - BOOST_CHECK_EQUAL_COLLECTIONS(bfs_reverse.begin(), bfs_reverse.end(), t.begin(), t.end()); + t = Ancestors(1, graph); + BOOST_CHECK_EQUAL_COLLECTIONS(bfsReverse.begin(), bfsReverse.end(), t.begin(), t.end()); - bfs_reverse.clear(); + bfsReverse.clear(); - for (const auto &v : bfs_reverse_view(graph, 5)) { - bfs_reverse.push_back(v); + for (const auto &v : BfsReverseView(graph, 5)) { + bfsReverse.push_back(v); } - BOOST_CHECK_EQUAL(bfs_reverse.size(), 3); - BOOST_CHECK_EQUAL(bfs_reverse[0], 5); - BOOST_CHECK_EQUAL(bfs_reverse[1], 2); - BOOST_CHECK_EQUAL(bfs_reverse[2], 0); + BOOST_CHECK_EQUAL(bfsReverse.size(), 3); + BOOST_CHECK_EQUAL(bfsReverse[0], 5); + BOOST_CHECK_EQUAL(bfsReverse[1], 2); + BOOST_CHECK_EQUAL(bfsReverse[2], 0); - t = ancestors(5, graph); - BOOST_CHECK_EQUAL_COLLECTIONS(bfs_reverse.begin(), bfs_reverse.end(), t.begin(), t.end()); + t = Ancestors(5, graph); + BOOST_CHECK_EQUAL_COLLECTIONS(bfsReverse.begin(), bfsReverse.end(), t.begin(), t.end()); - bfs_reverse.clear(); + bfsReverse.clear(); - for (const auto &v : bfs_reverse_view(graph, 0)) { - bfs_reverse.push_back(v); + for (const auto &v : BfsReverseView(graph, 0)) { + bfsReverse.push_back(v); } - BOOST_CHECK_EQUAL(bfs_reverse.size(), 1); - BOOST_CHECK_EQUAL(bfs_reverse[0], 0); + BOOST_CHECK_EQUAL(bfsReverse.size(), 1); + BOOST_CHECK_EQUAL(bfsReverse[0], 0); - t = ancestors(0, graph); - BOOST_CHECK_EQUAL_COLLECTIONS(bfs_reverse.begin(), bfs_reverse.end(), t.begin(), t.end()); + t = Ancestors(0, graph); + BOOST_CHECK_EQUAL_COLLECTIONS(bfsReverse.begin(), bfsReverse.end(), t.begin(), t.end()); - bfs_reverse.clear(); + bfsReverse.clear(); - for (const auto &v : bfs_reverse_view(graph, 7)) { - bfs_reverse.push_back(v); + for (const auto &v : BfsReverseView(graph, 7)) { + bfsReverse.push_back(v); } - BOOST_CHECK_EQUAL(bfs_reverse.size(), 6); - BOOST_CHECK_EQUAL(bfs_reverse[0], 7); - BOOST_CHECK_EQUAL(bfs_reverse[1], 4); - BOOST_CHECK_EQUAL(bfs_reverse[2], 3); - BOOST_CHECK_EQUAL(bfs_reverse[3], 1); - BOOST_CHECK_EQUAL(bfs_reverse[4], 2); - BOOST_CHECK_EQUAL(bfs_reverse[5], 0); - - t = ancestors(7, graph); - BOOST_CHECK_EQUAL_COLLECTIONS(bfs_reverse.begin(), bfs_reverse.end(), t.begin(), t.end()); - - BOOST_CHECK_EQUAL(edge(0, 1, graph), true); - BOOST_CHECK_EQUAL(edge(0, 2, graph), true); - BOOST_CHECK_EQUAL(edge(0, 3, graph), true); - BOOST_CHECK_EQUAL(edge(0, 4, graph), false); - BOOST_CHECK_EQUAL(edge(0, 5, graph), false); - BOOST_CHECK_EQUAL(edge(0, 6, graph), false); - BOOST_CHECK_EQUAL(edge(0, 7, graph), false); - - BOOST_CHECK_EQUAL(edge(1, 0, graph), false); - BOOST_CHECK_EQUAL(edge(1, 1, graph), false); - BOOST_CHECK_EQUAL(edge(1, 2, graph), false); - BOOST_CHECK_EQUAL(edge(1, 3, graph), false); - BOOST_CHECK_EQUAL(edge(1, 4, graph), true); - BOOST_CHECK_EQUAL(edge(1, 5, graph), false); - BOOST_CHECK_EQUAL(edge(1, 6, graph), true); - BOOST_CHECK_EQUAL(edge(1, 7, graph), false); - - BOOST_CHECK_EQUAL(edge(2, 0, graph), false); - BOOST_CHECK_EQUAL(edge(2, 1, graph), false); - BOOST_CHECK_EQUAL(edge(2, 2, graph), false); - BOOST_CHECK_EQUAL(edge(2, 3, graph), false); - BOOST_CHECK_EQUAL(edge(2, 4, graph), true); - BOOST_CHECK_EQUAL(edge(2, 5, graph), true); - BOOST_CHECK_EQUAL(edge(2, 6, graph), false); - BOOST_CHECK_EQUAL(edge(2, 7, graph), false); - - BOOST_CHECK_EQUAL(edge(3, 0, graph), false); - BOOST_CHECK_EQUAL(edge(3, 1, graph), false); - BOOST_CHECK_EQUAL(edge(3, 2, graph), false); - BOOST_CHECK_EQUAL(edge(3, 3, graph), false); - BOOST_CHECK_EQUAL(edge(3, 4, graph), false); - BOOST_CHECK_EQUAL(edge(3, 5, graph), false); - BOOST_CHECK_EQUAL(edge(3, 6, graph), false); - BOOST_CHECK_EQUAL(edge(3, 7, graph), true); - - BOOST_CHECK_EQUAL(edge(4, 0, graph), false); - BOOST_CHECK_EQUAL(edge(4, 1, graph), false); - BOOST_CHECK_EQUAL(edge(4, 2, graph), false); - BOOST_CHECK_EQUAL(edge(4, 3, graph), false); - BOOST_CHECK_EQUAL(edge(4, 4, graph), false); - BOOST_CHECK_EQUAL(edge(4, 5, graph), false); - BOOST_CHECK_EQUAL(edge(4, 6, graph), false); - BOOST_CHECK_EQUAL(edge(4, 7, graph), true); - - BOOST_CHECK_EQUAL(edge(5, 0, graph), false); - BOOST_CHECK_EQUAL(edge(5, 1, graph), false); - BOOST_CHECK_EQUAL(edge(5, 2, graph), false); - BOOST_CHECK_EQUAL(edge(5, 3, graph), false); - BOOST_CHECK_EQUAL(edge(5, 4, graph), false); - BOOST_CHECK_EQUAL(edge(5, 5, graph), false); - BOOST_CHECK_EQUAL(edge(5, 6, graph), false); - BOOST_CHECK_EQUAL(edge(5, 7, graph), false); - - BOOST_CHECK_EQUAL(edge(6, 0, graph), false); - BOOST_CHECK_EQUAL(edge(6, 1, graph), false); - BOOST_CHECK_EQUAL(edge(6, 2, graph), false); - BOOST_CHECK_EQUAL(edge(6, 3, graph), false); - BOOST_CHECK_EQUAL(edge(6, 4, graph), false); - BOOST_CHECK_EQUAL(edge(6, 5, graph), false); - BOOST_CHECK_EQUAL(edge(6, 6, graph), false); - BOOST_CHECK_EQUAL(edge(6, 7, graph), false); - - BOOST_CHECK_EQUAL(edge(7, 0, graph), false); - BOOST_CHECK_EQUAL(edge(7, 1, graph), false); - BOOST_CHECK_EQUAL(edge(7, 2, graph), false); - BOOST_CHECK_EQUAL(edge(7, 3, graph), false); - BOOST_CHECK_EQUAL(edge(7, 4, graph), false); - BOOST_CHECK_EQUAL(edge(7, 5, graph), false); - BOOST_CHECK_EQUAL(edge(7, 6, graph), false); - - BOOST_CHECK_EQUAL(is_source(0, graph), true); - BOOST_CHECK_EQUAL(is_source(1, graph), false); - BOOST_CHECK_EQUAL(is_source(2, graph), false); - BOOST_CHECK_EQUAL(is_source(3, graph), false); - BOOST_CHECK_EQUAL(is_source(4, graph), false); - BOOST_CHECK_EQUAL(is_source(5, graph), false); - BOOST_CHECK_EQUAL(is_source(6, graph), false); - BOOST_CHECK_EQUAL(is_source(7, graph), false); - - BOOST_CHECK_EQUAL(is_sink(0, graph), false); - BOOST_CHECK_EQUAL(is_sink(1, graph), false); - BOOST_CHECK_EQUAL(is_sink(2, graph), false); - BOOST_CHECK_EQUAL(is_sink(3, graph), false); - BOOST_CHECK_EQUAL(is_sink(4, graph), false); - BOOST_CHECK_EQUAL(is_sink(5, graph), true); - BOOST_CHECK_EQUAL(is_sink(6, graph), true); - BOOST_CHECK_EQUAL(is_sink(7, graph), true); - - BOOST_CHECK_EQUAL(has_path(0, 1, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 2, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 3, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 5, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 6, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(1, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 6, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 5, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(3, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(4, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(1, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 7, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 7, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 6, graph), false); - - std::vector edge_source = {0, 0, 0, 1, 1, 2, 2, 3, 4}; - std::vector edge_target = {1, 2, 3, 4, 6, 4, 5, 7, 7}; + BOOST_CHECK_EQUAL(bfsReverse.size(), 6); + BOOST_CHECK_EQUAL(bfsReverse[0], 7); + BOOST_CHECK_EQUAL(bfsReverse[1], 4); + BOOST_CHECK_EQUAL(bfsReverse[2], 3); + BOOST_CHECK_EQUAL(bfsReverse[3], 1); + BOOST_CHECK_EQUAL(bfsReverse[4], 2); + BOOST_CHECK_EQUAL(bfsReverse[5], 0); + + t = Ancestors(7, graph); + BOOST_CHECK_EQUAL_COLLECTIONS(bfsReverse.begin(), bfsReverse.end(), t.begin(), t.end()); + + BOOST_CHECK_EQUAL(Edge(0, 1, graph), true); + BOOST_CHECK_EQUAL(Edge(0, 2, graph), true); + BOOST_CHECK_EQUAL(Edge(0, 3, graph), true); + BOOST_CHECK_EQUAL(Edge(0, 4, graph), false); + BOOST_CHECK_EQUAL(Edge(0, 5, graph), false); + BOOST_CHECK_EQUAL(Edge(0, 6, graph), false); + BOOST_CHECK_EQUAL(Edge(0, 7, graph), false); + + BOOST_CHECK_EQUAL(Edge(1, 0, graph), false); + BOOST_CHECK_EQUAL(Edge(1, 1, graph), false); + BOOST_CHECK_EQUAL(Edge(1, 2, graph), false); + BOOST_CHECK_EQUAL(Edge(1, 3, graph), false); + BOOST_CHECK_EQUAL(Edge(1, 4, graph), true); + BOOST_CHECK_EQUAL(Edge(1, 5, graph), false); + BOOST_CHECK_EQUAL(Edge(1, 6, graph), true); + BOOST_CHECK_EQUAL(Edge(1, 7, graph), false); + + BOOST_CHECK_EQUAL(Edge(2, 0, graph), false); + BOOST_CHECK_EQUAL(Edge(2, 1, graph), false); + BOOST_CHECK_EQUAL(Edge(2, 2, graph), false); + BOOST_CHECK_EQUAL(Edge(2, 3, graph), false); + BOOST_CHECK_EQUAL(Edge(2, 4, graph), true); + BOOST_CHECK_EQUAL(Edge(2, 5, graph), true); + BOOST_CHECK_EQUAL(Edge(2, 6, graph), false); + BOOST_CHECK_EQUAL(Edge(2, 7, graph), false); + + BOOST_CHECK_EQUAL(Edge(3, 0, graph), false); + BOOST_CHECK_EQUAL(Edge(3, 1, graph), false); + BOOST_CHECK_EQUAL(Edge(3, 2, graph), false); + BOOST_CHECK_EQUAL(Edge(3, 3, graph), false); + BOOST_CHECK_EQUAL(Edge(3, 4, graph), false); + BOOST_CHECK_EQUAL(Edge(3, 5, graph), false); + BOOST_CHECK_EQUAL(Edge(3, 6, graph), false); + BOOST_CHECK_EQUAL(Edge(3, 7, graph), true); + + BOOST_CHECK_EQUAL(Edge(4, 0, graph), false); + BOOST_CHECK_EQUAL(Edge(4, 1, graph), false); + BOOST_CHECK_EQUAL(Edge(4, 2, graph), false); + BOOST_CHECK_EQUAL(Edge(4, 3, graph), false); + BOOST_CHECK_EQUAL(Edge(4, 4, graph), false); + BOOST_CHECK_EQUAL(Edge(4, 5, graph), false); + BOOST_CHECK_EQUAL(Edge(4, 6, graph), false); + BOOST_CHECK_EQUAL(Edge(4, 7, graph), true); + + BOOST_CHECK_EQUAL(Edge(5, 0, graph), false); + BOOST_CHECK_EQUAL(Edge(5, 1, graph), false); + BOOST_CHECK_EQUAL(Edge(5, 2, graph), false); + BOOST_CHECK_EQUAL(Edge(5, 3, graph), false); + BOOST_CHECK_EQUAL(Edge(5, 4, graph), false); + BOOST_CHECK_EQUAL(Edge(5, 5, graph), false); + BOOST_CHECK_EQUAL(Edge(5, 6, graph), false); + BOOST_CHECK_EQUAL(Edge(5, 7, graph), false); + + BOOST_CHECK_EQUAL(Edge(6, 0, graph), false); + BOOST_CHECK_EQUAL(Edge(6, 1, graph), false); + BOOST_CHECK_EQUAL(Edge(6, 2, graph), false); + BOOST_CHECK_EQUAL(Edge(6, 3, graph), false); + BOOST_CHECK_EQUAL(Edge(6, 4, graph), false); + BOOST_CHECK_EQUAL(Edge(6, 5, graph), false); + BOOST_CHECK_EQUAL(Edge(6, 6, graph), false); + BOOST_CHECK_EQUAL(Edge(6, 7, graph), false); + + BOOST_CHECK_EQUAL(Edge(7, 0, graph), false); + BOOST_CHECK_EQUAL(Edge(7, 1, graph), false); + BOOST_CHECK_EQUAL(Edge(7, 2, graph), false); + BOOST_CHECK_EQUAL(Edge(7, 3, graph), false); + BOOST_CHECK_EQUAL(Edge(7, 4, graph), false); + BOOST_CHECK_EQUAL(Edge(7, 5, graph), false); + BOOST_CHECK_EQUAL(Edge(7, 6, graph), false); + + BOOST_CHECK_EQUAL(IsSource(0, graph), true); + BOOST_CHECK_EQUAL(IsSource(1, graph), false); + BOOST_CHECK_EQUAL(IsSource(2, graph), false); + BOOST_CHECK_EQUAL(IsSource(3, graph), false); + BOOST_CHECK_EQUAL(IsSource(4, graph), false); + BOOST_CHECK_EQUAL(IsSource(5, graph), false); + BOOST_CHECK_EQUAL(IsSource(6, graph), false); + BOOST_CHECK_EQUAL(IsSource(7, graph), false); + + BOOST_CHECK_EQUAL(IsSink(0, graph), false); + BOOST_CHECK_EQUAL(IsSink(1, graph), false); + BOOST_CHECK_EQUAL(IsSink(2, graph), false); + BOOST_CHECK_EQUAL(IsSink(3, graph), false); + BOOST_CHECK_EQUAL(IsSink(4, graph), false); + BOOST_CHECK_EQUAL(IsSink(5, graph), true); + BOOST_CHECK_EQUAL(IsSink(6, graph), true); + BOOST_CHECK_EQUAL(IsSink(7, graph), true); + + BOOST_CHECK_EQUAL(HasPath(0, 1, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 2, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 3, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 5, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 6, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(1, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 6, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 5, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(3, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(4, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(1, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 7, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 7, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 6, graph), false); + + std::vector edgeSource = {0, 0, 0, 1, 1, 2, 2, 3, 4}; + std::vector edgeTarget = {1, 2, 3, 4, 6, 4, 5, 7, 7}; size_t i = 0; - for (const auto &e : edge_view(graph)) { - BOOST_CHECK_EQUAL(e.source, edge_source[i]); - BOOST_CHECK_EQUAL(e.target, edge_target[i]); + for (const auto &e : EdgeView(graph)) { + BOOST_CHECK_EQUAL(e.source_, edgeSource[i]); + BOOST_CHECK_EQUAL(e.target_, edgeTarget[i]); ++i; } - BOOST_CHECK_EQUAL(is_acyclic(graph), true); - BOOST_CHECK_EQUAL(is_connected(graph), true); + BOOST_CHECK_EQUAL(IsAcyclic(graph), true); + BOOST_CHECK_EQUAL(IsConnected(graph), true); - graph.add_edge(7, 5); - BOOST_CHECK_EQUAL(is_acyclic(graph), true); - graph.add_edge(7, 0); - BOOST_CHECK_EQUAL(is_acyclic(graph), false); + graph.AddEdge(7, 5); + BOOST_CHECK_EQUAL(IsAcyclic(graph), true); + graph.AddEdge(7, 0); + BOOST_CHECK_EQUAL(IsAcyclic(graph), false); - graph.add_vertex(1, 2, 3, 4); - BOOST_CHECK_EQUAL(is_connected(graph), false); + graph.AddVertex(1, 2, 3, 4); + BOOST_CHECK_EQUAL(IsConnected(graph), false); } BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { - using VertexType = vertex_idx_t; + using VertexType = VertexIdxT; const std::vector> out({ {7}, @@ -437,173 +437,173 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { const std::vector workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); const std::vector commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); - const boost_graph_int_t graph(out, workW, commW); - const boost_graph_int_t graph_empty; + const BoostGraphIntT graph(out, workW, commW); + const BoostGraphIntT graphEmpty; - BOOST_CHECK_EQUAL(graph.num_edges(), 12); - BOOST_CHECK_EQUAL(graph.num_vertices(), 10); - BOOST_CHECK_EQUAL(graph_empty.num_edges(), 0); - BOOST_CHECK_EQUAL(graph_empty.num_vertices(), 0); - BOOST_CHECK_EQUAL(graph.num_vertex_types(), 1); + BOOST_CHECK_EQUAL(graph.NumEdges(), 12); + BOOST_CHECK_EQUAL(graph.NumVertices(), 10); + BOOST_CHECK_EQUAL(graphEmpty.NumEdges(), 0); + BOOST_CHECK_EQUAL(graphEmpty.NumVertices(), 0); + BOOST_CHECK_EQUAL(graph.NumVertexTypes(), 1); - BOOST_CHECK_EQUAL(is_acyclic(graph), true); - BOOST_CHECK_EQUAL(is_acyclic(graph_empty), true); - BOOST_CHECK_EQUAL(is_connected(graph), false); - BOOST_CHECK_EQUAL(is_connected(graph_empty), true); + BOOST_CHECK_EQUAL(IsAcyclic(graph), true); + BOOST_CHECK_EQUAL(IsAcyclic(graphEmpty), true); + BOOST_CHECK_EQUAL(IsConnected(graph), false); + BOOST_CHECK_EQUAL(IsConnected(graphEmpty), true); - const auto long_edges = long_edges_in_triangles(graph); + const auto longEdges = LongEdgesInTriangles(graph); - BOOST_CHECK_EQUAL(graph.num_vertices(), std::distance(graph.vertices().begin(), graph.vertices().end())); - BOOST_CHECK_EQUAL(graph.num_edges(), std::distance(edges(graph).begin(), edges(graph).end())); - for (const auto &v : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(v), std::distance(graph.parents(v).begin(), graph.parents(v).end())); - BOOST_CHECK_EQUAL(graph.out_degree(v), std::distance(graph.children(v).begin(), graph.children(v).end())); + BOOST_CHECK_EQUAL(graph.NumVertices(), std::distance(graph.Vertices().begin(), graph.Vertices().end())); + BOOST_CHECK_EQUAL(graph.NumEdges(), std::distance(Edges(graph).begin(), Edges(graph).end())); + for (const auto &v : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.InDegree(v), std::distance(graph.Parents(v).begin(), graph.Parents(v).end())); + BOOST_CHECK_EQUAL(graph.OutDegree(v), std::distance(graph.Children(v).begin(), graph.Children(v).end())); } - for (const auto i : graph.vertices()) { - const auto v = graph.get_boost_graph()[i]; - BOOST_CHECK_EQUAL(v.workWeight, workW[i]); - BOOST_CHECK_EQUAL(v.workWeight, graph.vertex_work_weight(i)); - BOOST_CHECK_EQUAL(v.communicationWeight, commW[i]); - BOOST_CHECK_EQUAL(v.communicationWeight, graph.vertex_comm_weight(i)); + for (const auto i : graph.Vertices()) { + const auto v = graph.GetBoostGraph()[i]; + BOOST_CHECK_EQUAL(v.workWeight_, workW[i]); + BOOST_CHECK_EQUAL(v.workWeight_, graph.VertexWorkWeight(i)); + BOOST_CHECK_EQUAL(v.communicationWeight_, commW[i]); + BOOST_CHECK_EQUAL(v.communicationWeight_, graph.VertexCommWeight(i)); } - BOOST_CHECK_EQUAL(sumOfVerticesWorkWeights({0, 1}, graph), 2); + BOOST_CHECK_EQUAL(SumOfVerticesWorkWeights({0, 1}, graph), 2); { - int sum_of_work_weights = graph.vertex_work_weight(0) + graph.vertex_work_weight(1); - BOOST_CHECK_EQUAL(2, sum_of_work_weights); + int sumOfWorkWeights = graph.VertexWorkWeight(0) + graph.VertexWorkWeight(1); + BOOST_CHECK_EQUAL(2, sumOfWorkWeights); } - BOOST_CHECK_EQUAL(sumOfVerticesWorkWeights({5, 3}, graph), 4); - BOOST_CHECK_EQUAL(sumOfVerticesWorkWeights({}, graph), 0); - BOOST_CHECK_EQUAL(sumOfVerticesWorkWeights({0, 1, 2, 3, 4, 5}, graph), 9); + BOOST_CHECK_EQUAL(SumOfVerticesWorkWeights({5, 3}, graph), 4); + BOOST_CHECK_EQUAL(SumOfVerticesWorkWeights({}, graph), 0); + BOOST_CHECK_EQUAL(SumOfVerticesWorkWeights({0, 1, 2, 3, 4, 5}, graph), 9); - BOOST_CHECK_EQUAL(sumOfVerticesWorkWeights({}, graph_empty), 0); + BOOST_CHECK_EQUAL(SumOfVerticesWorkWeights({}, graphEmpty), 0); - std::size_t num_edges = 0; - for (const auto &vertex : graph.vertices()) { - num_edges += graph.out_degree(vertex); - for (const auto &parent : graph.parents(vertex)) { + std::size_t numEdges = 0; + for (const auto &vertex : graph.Vertices()) { + numEdges += graph.OutDegree(vertex); + for (const auto &parent : graph.Parents(vertex)) { BOOST_CHECK(std::any_of( - graph.children(parent).cbegin(), graph.children(parent).cend(), [vertex](VertexType k) { return k == vertex; })); + graph.Children(parent).cbegin(), graph.Children(parent).cend(), [vertex](VertexType k) { return k == vertex; })); } } - for (const auto &vertex : graph.vertices()) { - for (const auto &child : graph.children(vertex)) { + for (const auto &vertex : graph.Vertices()) { + for (const auto &child : graph.Children(vertex)) { BOOST_CHECK(std::any_of( - graph.parents(child).cbegin(), graph.parents(child).cend(), [vertex](VertexType k) { return k == vertex; })); + graph.Parents(child).cbegin(), graph.Parents(child).cend(), [vertex](VertexType k) { return k == vertex; })); } } - std::vector top_order = GetTopOrder(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); - BOOST_CHECK(GetTopOrder(graph_empty).size() == graph_empty.num_vertices()); + std::vector topOrder = GetTopOrder(graph); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); + BOOST_CHECK(GetTopOrder(graphEmpty).size() == graphEmpty.NumVertices()); - std::vector index_in_top_order = sorting_arrangement(top_order); + std::vector indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - top_order = GetTopOrderMaxChildren(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); - BOOST_CHECK(GetTopOrder(graph_empty).size() == graph_empty.num_vertices()); + topOrder = GetTopOrderMaxChildren(graph); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); + BOOST_CHECK(GetTopOrder(graphEmpty).size() == graphEmpty.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - top_order = GetTopOrderRandom(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); - BOOST_CHECK(GetTopOrderRandom(graph_empty).size() == graph_empty.num_vertices()); + topOrder = GetTopOrderRandom(graph); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); + BOOST_CHECK(GetTopOrderRandom(graphEmpty).size() == graphEmpty.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - top_order = GetTopOrderMinIndex(graph); - BOOST_CHECK(top_order.size() == graph.num_vertices()); - BOOST_CHECK(GetTopOrderMinIndex(graph_empty).size() == graph_empty.num_vertices()); + topOrder = GetTopOrderMinIndex(graph); + BOOST_CHECK(topOrder.size() == graph.NumVertices()); + BOOST_CHECK(GetTopOrderMinIndex(graphEmpty).size() == graphEmpty.NumVertices()); - index_in_top_order = sorting_arrangement(top_order); + indexInTopOrder = SortingArrangement(topOrder); - for (const auto &i : top_order) { - for (const auto &j : graph.children(i)) { - BOOST_CHECK_LT(index_in_top_order[i], index_in_top_order[j]); + for (const auto &i : topOrder) { + for (const auto &j : graph.Children(i)) { + BOOST_CHECK_LT(indexInTopOrder[i], indexInTopOrder[j]); } } - std::set all_nodes; - for (const auto &vertex : graph.vertices()) { - all_nodes.emplace(vertex); + std::set allNodes; + for (const auto &vertex : graph.Vertices()) { + allNodes.emplace(vertex); } - std::set nodes_a({8, 0}); - std::set nodes_b({6, 2, 5, 3}); - std::set nodes_c({6, 9, 1}); + std::set nodesA({8, 0}); + std::set nodesB({6, 2, 5, 3}); + std::set nodesC({6, 9, 1}); - std::vector bool_a(graph.num_vertices(), false); - std::vector bool_b(graph.num_vertices(), false); - std::vector bool_c(graph.num_vertices(), false); + std::vector boolA(graph.NumVertices(), false); + std::vector boolB(graph.NumVertices(), false); + std::vector boolC(graph.NumVertices(), false); - for (auto &i : nodes_a) { - bool_a[i] = true; + for (auto &i : nodesA) { + boolA[i] = true; } - for (auto &i : nodes_b) { - bool_b[i] = true; + for (auto &i : nodesB) { + boolB[i] = true; } - for (auto &i : nodes_c) { - bool_c[i] = true; + for (auto &i : nodesC) { + boolC[i] = true; } - BOOST_CHECK(GetFilteredTopOrder(bool_a, graph) == std::vector({0, 8}) - || GetFilteredTopOrder(bool_a, graph) == std::vector({8, 0})); - BOOST_CHECK(GetFilteredTopOrder(bool_b, graph)[3] == 2); - BOOST_CHECK(GetFilteredTopOrder(bool_c, graph) == std::vector({9, 6, 1})); + BOOST_CHECK(GetFilteredTopOrder(boolA, graph) == std::vector({0, 8}) + || GetFilteredTopOrder(boolA, graph) == std::vector({8, 0})); + BOOST_CHECK(GetFilteredTopOrder(boolB, graph)[3] == 2); + BOOST_CHECK(GetFilteredTopOrder(boolC, graph) == std::vector({9, 6, 1})); - BOOST_CHECK_EQUAL(longestPath(all_nodes, graph), 4); - BOOST_CHECK_EQUAL(longestPath(nodes_a, graph), 0); - BOOST_CHECK_EQUAL(longestPath(nodes_b, graph), 1); - BOOST_CHECK_EQUAL(longestPath(nodes_c, graph), 2); + BOOST_CHECK_EQUAL(LongestPath(allNodes, graph), 4); + BOOST_CHECK_EQUAL(LongestPath(nodesA, graph), 0); + BOOST_CHECK_EQUAL(LongestPath(nodesB, graph), 1); + BOOST_CHECK_EQUAL(LongestPath(nodesC, graph), 2); - BOOST_CHECK_EQUAL(longestPath({}, graph_empty), 0); + BOOST_CHECK_EQUAL(LongestPath({}, graphEmpty), 0); - std::vector longest_path = longestChain(graph); + std::vector longestPath = LongestChain(graph); - std::vector long_chain1({9, 6, 2, 0, 7}); - std::vector long_chain2({9, 5, 2, 0, 7}); + std::vector longChain1({9, 6, 2, 0, 7}); + std::vector longChain2({9, 5, 2, 0, 7}); - BOOST_CHECK_EQUAL(longestPath(all_nodes, graph) + 1, longestChain(graph).size()); - BOOST_CHECK(longest_path == long_chain1 || longest_path == long_chain2); + BOOST_CHECK_EQUAL(LongestPath(allNodes, graph) + 1, LongestChain(graph).size()); + BOOST_CHECK(longestPath == longChain1 || longestPath == longChain2); - BOOST_CHECK(longestChain(graph_empty) == std::vector({})); + BOOST_CHECK(LongestChain(graphEmpty) == std::vector({})); - BOOST_CHECK(ancestors(9, graph) == std::vector({9})); - BOOST_CHECK(ancestors(2, graph) == std::vector({2, 3, 5, 6, 9})); - BOOST_CHECK(ancestors(4, graph) == std::vector({4, 8})); - BOOST_CHECK(ancestors(5, graph) == std::vector({5, 9})); - BOOST_CHECK(successors(9, graph) == std::vector({9, 6, 1, 5, 2, 0, 7})); - BOOST_CHECK(successors(3, graph) == std::vector({3, 2, 0, 7})); - BOOST_CHECK(successors(0, graph) == std::vector({0, 7})); - BOOST_CHECK(successors(8, graph) == std::vector({8, 4})); - BOOST_CHECK(successors(4, graph) == std::vector({4})); + BOOST_CHECK(Ancestors(9, graph) == std::vector({9})); + BOOST_CHECK(Ancestors(2, graph) == std::vector({2, 3, 5, 6, 9})); + BOOST_CHECK(Ancestors(4, graph) == std::vector({4, 8})); + BOOST_CHECK(Ancestors(5, graph) == std::vector({5, 9})); + BOOST_CHECK(Successors(9, graph) == std::vector({9, 6, 1, 5, 2, 0, 7})); + BOOST_CHECK(Successors(3, graph) == std::vector({3, 2, 0, 7})); + BOOST_CHECK(Successors(0, graph) == std::vector({0, 7})); + BOOST_CHECK(Successors(8, graph) == std::vector({8, 4})); + BOOST_CHECK(Successors(4, graph) == std::vector({4})); - std::vector top_dist({4, 3, 3, 1, 2, 2, 2, 5, 1, 1}); - std::vector bottom_dist({2, 1, 3, 4, 1, 4, 4, 1, 2, 5}); + std::vector topDist({4, 3, 3, 1, 2, 2, 2, 5, 1, 1}); + std::vector bottomDist({2, 1, 3, 4, 1, 4, 4, 1, 2, 5}); - BOOST_CHECK(get_top_node_distance(graph) == top_dist); - BOOST_CHECK(get_bottom_node_distance(graph) == bottom_dist); + BOOST_CHECK(GetTopNodeDistance(graph) == topDist); + BOOST_CHECK(GetBottomNodeDistance(graph) == bottomDist); - const std::vector> graph_second_Out = { + const std::vector> graphSecondOut = { {1, 2}, {3, 4}, {4, 5}, @@ -612,38 +612,38 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { {6}, {}, }; - const std::vector graph_second_workW = {1, 1, 1, 1, 1, 1, 3}; - const std::vector graph_second_commW = graph_second_workW; + const std::vector graphSecondWorkW = {1, 1, 1, 1, 1, 1, 3}; + const std::vector graphSecondCommW = graphSecondWorkW; - boost_graph_int_t graph_second(graph_second_Out, graph_second_workW, graph_second_commW); + BoostGraphIntT graphSecond(graphSecondOut, graphSecondWorkW, graphSecondCommW); - std::vector top_dist_second({1, 2, 2, 3, 3, 3, 4}); - std::vector bottom_dist_second({4, 3, 3, 2, 1, 2, 1}); + std::vector topDistSecond({1, 2, 2, 3, 3, 3, 4}); + std::vector bottomDistSecond({4, 3, 3, 2, 1, 2, 1}); - BOOST_CHECK(get_top_node_distance(graph_second) == top_dist_second); - BOOST_CHECK(get_bottom_node_distance(graph_second) == bottom_dist_second); + BOOST_CHECK(GetTopNodeDistance(graphSecond) == topDistSecond); + BOOST_CHECK(GetBottomNodeDistance(graphSecond) == bottomDistSecond); - std::vector poisson_params({0.0000001, 0.08, 0.1, 0.2, 0.5, 1, 4}); + std::vector poissonParams({0.0000001, 0.08, 0.1, 0.2, 0.5, 1, 4}); for (unsigned loops = 0; loops < 10; loops++) { for (unsigned noise = 0; noise < 6; noise++) { - for (auto &pois_para : poisson_params) { - std::vector poset_int_map = get_strict_poset_integer_map(noise, pois_para, graph); + for (auto &poisPara : poissonParams) { + std::vector posetIntMap = GetStrictPosetIntegerMap(noise, poisPara, graph); - for (const auto &vertex : graph.vertices()) { - for (const auto &child : graph.children(vertex)) { - BOOST_CHECK_LE(poset_int_map[vertex] + 1, poset_int_map[child]); + for (const auto &vertex : graph.Vertices()) { + for (const auto &child : graph.Children(vertex)) { + BOOST_CHECK_LE(posetIntMap[vertex] + 1, posetIntMap[child]); } } } } } - BOOST_CHECK(critical_path_weight(graph) == 7); + BOOST_CHECK(CriticalPathWeight(graph) == 7); - auto wavefronts = compute_wavefronts(graph); + auto wavefronts = ComputeWavefronts(graph); - std::vector> expected_wavefronts = { + std::vector> expectedWavefronts = { {3, 8, 9}, {4, 6, 5}, {1, 2}, @@ -658,21 +658,21 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { BOOST_CHECK(!wavefront.empty()); BOOST_CHECK_EQUAL_COLLECTIONS( - wavefront.begin(), wavefront.end(), expected_wavefronts[counter].begin(), expected_wavefronts[counter].end()); + wavefront.begin(), wavefront.end(), expectedWavefronts[counter].begin(), expectedWavefronts[counter].end()); counter++; } - BOOST_CHECK_EQUAL(size, graph.num_vertices()); + BOOST_CHECK_EQUAL(size, graph.NumVertices()); // const std::pair, ComputationalDag> rev_graph_pair = graph.reverse_graph(); // const std::vector &vertex_mapping_rev_graph = rev_graph_pair.first; // const ComputationalDag &rev_graph = rev_graph_pair.second; - // BOOST_CHECK_EQUAL(graph.numberOfVertices(), rev_graph.numberOfVertices()); + // BOOST_CHECK_EQUAL(graph.NumberOfVertices(), rev_graph.NumberOfVertices()); // BOOST_CHECK_EQUAL(graph.numberOfEdges(), rev_graph.numberOfEdges()); - // for (VertexType vert = 0; vert < graph.numberOfVertices(); vert++) { + // for (VertexType vert = 0; vert < graph.NumberOfVertices(); vert++) { // BOOST_CHECK_EQUAL(graph.nodeWorkWeight(vert), rev_graph.nodeWorkWeight(vertex_mapping_rev_graph[vert])); // BOOST_CHECK_EQUAL(graph.nodeCommunicationWeight(vert), // rev_graph.nodeCommunicationWeight(vertex_mapping_rev_graph[vert])); @@ -680,8 +680,8 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { // BOOST_CHECK_EQUAL(graph.nodeType(vert), rev_graph.nodeType(vertex_mapping_rev_graph[vert])); // } - // for (VertexType vert_1 = 0; vert_1 < graph.numberOfVertices(); vert_1++) { - // for (VertexType vert_2 = 0; vert_2 < graph.numberOfVertices(); vert_2++) { + // for (VertexType vert_1 = 0; vert_1 < graph.NumberOfVertices(); vert_1++) { + // for (VertexType vert_2 = 0; vert_2 < graph.NumberOfVertices(); vert_2++) { // bool edge_in_graph = boost::edge(vert_1, vert_2, graph.getGraph()).second; // bool rev_edge_in_rev_graph = boost::edge(vertex_mapping_rev_graph[vert_2], // vertex_mapping_rev_graph[vert_1], rev_graph.getGraph()).second; BOOST_CHECK_EQUAL(edge_in_graph, @@ -690,28 +690,28 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { // } } -BOOST_AUTO_TEST_CASE(test_edge_view_indexed_access) { - computational_dag_vector_impl_def_t graph = constr_graph_1(); - auto all_edges = edge_view(graph); +BOOST_AUTO_TEST_CASE(TestEdgeViewIndexedAccess) { + ComputationalDagVectorImplDefUnsignedT graph = ConstrGraph1(); + auto allEdges = EdgeView(graph); - // Check initial iterator - auto it = all_edges.begin(); + // Check initial Iterator + auto it = allEdges.begin(); // Check each edge by index - for (size_t i = 0; i < graph.num_edges(); ++i) { - // Construct iterator directly to index i - auto indexed_it = decltype(all_edges)::iterator(i, graph); - BOOST_CHECK(indexed_it == it); - BOOST_CHECK(*indexed_it == *it); + for (size_t i = 0; i < graph.NumEdges(); ++i) { + // Construct Iterator directly to index i + auto indexedIt = decltype(allEdges)::Iterator(i, graph); + BOOST_CHECK(indexedIt == it); + BOOST_CHECK(*indexedIt == *it); ++it; } // Check end condition - auto end_it = decltype(all_edges)::iterator(graph.num_edges(), graph); - BOOST_CHECK(end_it == all_edges.end()); + auto endIt = decltype(allEdges)::Iterator(graph.NumEdges(), graph); + BOOST_CHECK(endIt == allEdges.end()); // Check out of bounds - auto oob_it = decltype(all_edges)::iterator(graph.num_edges() + 5, graph); - BOOST_CHECK(oob_it == all_edges.end()); + auto oobIt = decltype(allEdges)::Iterator(graph.NumEdges() + 5, graph); + BOOST_CHECK(oobIt == allEdges.end()); } diff --git a/tests/divisors.cpp b/tests/divisors.cpp index f9f7956c..07006186 100644 --- a/tests/divisors.cpp +++ b/tests/divisors.cpp @@ -26,20 +26,20 @@ using namespace osp; BOOST_AUTO_TEST_CASE(IntegerSqrt) { for (std::size_t root = 1U; root < 200U; ++root) { for (std::size_t num = root * root; num < (root + 1U) * (root + 1U); ++num) { - BOOST_CHECK_EQUAL(intSqrtFloor(num), root); + BOOST_CHECK_EQUAL(IntSqrtFloor(num), root); } } for (int root = 1; root < 300; ++root) { for (int num = root * root; num < (root + 1) * (root + 1); ++num) { - BOOST_CHECK_EQUAL(intSqrtFloor(num), root); + BOOST_CHECK_EQUAL(IntSqrtFloor(num), root); } } } BOOST_AUTO_TEST_CASE(Divisors) { for (std::size_t num = 1U; num < 1000U; ++num) { - const std::vector divs = divisorsList(num); + const std::vector divs = DivisorsList(num); for (const std::size_t &div : divs) { std::cout << div << ", "; BOOST_CHECK_EQUAL(num % div, 0U); diff --git a/tests/eft_subgraph_scheduler.cpp b/tests/eft_subgraph_scheduler.cpp index 3869b8ec..329863dd 100644 --- a/tests/eft_subgraph_scheduler.cpp +++ b/tests/eft_subgraph_scheduler.cpp @@ -25,91 +25,91 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain) { - using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(EftSubgraphSchedulerSimpleChain) { + using GraphT = ComputationalDagVectorImplDefUnsignedT; // 1. Setup Instance - BspInstance instance; - auto &dag = instance.getComputationalDag(); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); // Create a simple coarse-grained DAG: 0 -> 1 -> 2 - dag.add_vertex(100, 1, 0); // node 0 - dag.add_vertex(200, 1, 0); // node 1 - dag.add_vertex(300, 1, 0); // node 2 - dag.add_edge(0, 1); - dag.add_edge(1, 2); + dag.AddVertex(100, 1, 0); // node 0 + dag.AddVertex(200, 1, 0); // node 1 + dag.AddVertex(300, 1, 0); // node 2 + dag.AddEdge(0, 1); + dag.AddEdge(1, 2); // Setup Architecture: 2 processors of type 0, 2 of type 1 - instance.getArchitecture().setProcessorsWithTypes({0, 0, 1, 1}); - instance.setDiagonalCompatibilityMatrix(2); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 1, 1}); + instance.SetDiagonalCompatibilityMatrix(2); // 2. Setup Scheduler Inputs std::vector multiplicities = {1, 2, 1}; - std::vector max_procs = {100, 100, 100}; - std::vector>> required_proc_types(3); + std::vector maxProcs = {100, 100, 100}; + std::vector>> requiredProcTypes(3); // Node 0: work 100, mult 1. Needs type 0. - required_proc_types[0] = {100, 0}; + requiredProcTypes[0] = {100, 0}; // Node 1: work 200, mult 2. Needs type 0 and 1. - required_proc_types[1] = {100, 100}; + requiredProcTypes[1] = {100, 100}; // Node 2: work 300, mult 1. Needs type 1. - required_proc_types[2] = {0, 300}; + requiredProcTypes[2] = {0, 300}; // 3. Run Scheduler - EftSubgraphScheduler scheduler; - scheduler.setMinWorkPerProcessor(1); - SubgraphSchedule schedule = scheduler.run(instance, multiplicities, required_proc_types, max_procs); + EftSubgraphScheduler scheduler; + scheduler.SetMinWorkPerProcessor(1); + SubgraphSchedule schedule = scheduler.Run(instance, multiplicities, requiredProcTypes, maxProcs); // 4. Assertions - BOOST_CHECK_CLOSE(schedule.makespan, 250.0, 1e-9); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type.size(), 3); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type[0].size(), 2); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[0][0], 2); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[0][1], 0); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type[1].size(), 2); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[1][0], 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[1][1], 1); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type[2].size(), 2); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[2][0], 0); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[2][1], 2); + BOOST_CHECK_CLOSE(schedule.makespan_, 250.0, 1e-9); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_.size(), 3); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_[0].size(), 2); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[0][0], 2); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[0][1], 0); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_[1].size(), 2); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[1][0], 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[1][1], 1); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_[2].size(), 2); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[2][0], 0); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[2][1], 2); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin) { - using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(EftSubgraphSchedulerForkJoin) { + using GraphT = ComputationalDagVectorImplDefUnsignedT; // 1. Setup Instance - BspInstance instance; - auto &dag = instance.getComputationalDag(); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); // Create a fork-join DAG: 0 -> {1,2} -> 3 - dag.add_vertex(100, 1, 0); // node 0 - dag.add_vertex(200, 1, 0); // node 1 - dag.add_vertex(300, 1, 0); // node 2 - dag.add_vertex(100, 1, 0); // node 3 - dag.add_edge(0, 1); - dag.add_edge(0, 2); - dag.add_edge(1, 3); - dag.add_edge(2, 3); + dag.AddVertex(100, 1, 0); // node 0 + dag.AddVertex(200, 1, 0); // node 1 + dag.AddVertex(300, 1, 0); // node 2 + dag.AddVertex(100, 1, 0); // node 3 + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); + dag.AddEdge(1, 3); + dag.AddEdge(2, 3); // Setup Architecture: 4 processors of type 0 - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0}); - instance.setDiagonalCompatibilityMatrix(1); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0}); + instance.SetDiagonalCompatibilityMatrix(1); // 2. Setup Scheduler Inputs std::vector multiplicities = {1, 2, 1, 4}; - std::vector max_procs = {100, 100, 100, 100}; - std::vector>> required_proc_types(4); + std::vector maxProcs = {100, 100, 100, 100}; + std::vector>> requiredProcTypes(4); // All nodes need type 0 - required_proc_types[0] = {100}; - required_proc_types[1] = {200}; - required_proc_types[2] = {300}; - required_proc_types[3] = {100}; + requiredProcTypes[0] = {100}; + requiredProcTypes[1] = {200}; + requiredProcTypes[2] = {300}; + requiredProcTypes[3] = {100}; // 3. Run Scheduler - EftSubgraphScheduler scheduler; - scheduler.setMinWorkPerProcessor(1); - SubgraphSchedule schedule = scheduler.run(instance, multiplicities, required_proc_types, max_procs); + EftSubgraphScheduler scheduler; + scheduler.SetMinWorkPerProcessor(1); + SubgraphSchedule schedule = scheduler.Run(instance, multiplicities, requiredProcTypes, maxProcs); // 4. Assertions // Manual calculation: @@ -124,141 +124,141 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin) { // Job 1 (work 200, 2w) duration 100. Finishes at 25 + 100 = 125. // T=125: Job 1 finishes. // T=175: Job 2 finishes. Job 3 becomes ready. Starts with 4w. Duration 100/4=25. Ends 200. - BOOST_CHECK_CLOSE(schedule.makespan, 200.0, 1e-9); - - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type.size(), 4); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type[0].size(), 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[0][0], 4); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type[1].size(), 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[1][0], 1); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type[2].size(), 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[2][0], 2); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type[3].size(), 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[3][0], 1); + BOOST_CHECK_CLOSE(schedule.makespan_, 200.0, 1e-9); + + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_.size(), 4); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_[0].size(), 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[0][0], 4); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_[1].size(), 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[1][0], 1); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_[2].size(), 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[2][0], 2); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_[3].size(), 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[3][0], 1); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_Deadlock) { - using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(EftSubgraphSchedulerDeadlock) { + using GraphT = ComputationalDagVectorImplDefUnsignedT; // 1. Setup Instance - BspInstance instance; - auto &dag = instance.getComputationalDag(); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); // Create a single-node DAG - dag.add_vertex(100, 1, 0); // node 0 + dag.AddVertex(100, 1, 0); // node 0 // Setup Architecture: 1 processor of type 0 - instance.getArchitecture().setProcessorsWithTypes({0}); - instance.setDiagonalCompatibilityMatrix(1); + instance.GetArchitecture().SetProcessorsWithTypes({0}); + instance.SetDiagonalCompatibilityMatrix(1); // 2. Setup Scheduler Inputs // Job needs 2 workers (multiplicity), but only 1 is available std::vector multiplicities = {2}; - std::vector max_procs = {100}; - std::vector>> required_proc_types(1); - required_proc_types[0] = {100}; + std::vector maxProcs = {100}; + std::vector>> requiredProcTypes(1); + requiredProcTypes[0] = {100}; // 3. Run Scheduler - EftSubgraphScheduler scheduler; - scheduler.setMinWorkPerProcessor(1); - SubgraphSchedule schedule = scheduler.run(instance, multiplicities, required_proc_types, max_procs); + EftSubgraphScheduler scheduler; + scheduler.SetMinWorkPerProcessor(1); + SubgraphSchedule schedule = scheduler.Run(instance, multiplicities, requiredProcTypes, maxProcs); // 4. Assertions // Expect a deadlock, indicated by a negative makespan - BOOST_CHECK_LT(schedule.makespan, 0.0); + BOOST_CHECK_LT(schedule.makespan_, 0.0); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG) { - using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(EftSubgraphSchedulerComplexDag) { + using GraphT = ComputationalDagVectorImplDefUnsignedT; // 1. Setup Instance - BspInstance instance; - auto &dag = instance.getComputationalDag(); - - dag.add_vertex(50, 1, 0); // 0 - dag.add_vertex(100, 1, 0); // 1 - dag.add_vertex(150, 1, 0); // 2 - dag.add_vertex(80, 1, 0); // 3 - dag.add_vertex(120, 1, 0); // 4 - dag.add_vertex(60, 1, 0); // 5 - dag.add_edge(0, 1); - dag.add_edge(0, 2); - dag.add_edge(1, 3); - dag.add_edge(2, 3); - dag.add_edge(2, 4); - dag.add_edge(3, 5); - dag.add_edge(4, 5); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); + + dag.AddVertex(50, 1, 0); // 0 + dag.AddVertex(100, 1, 0); // 1 + dag.AddVertex(150, 1, 0); // 2 + dag.AddVertex(80, 1, 0); // 3 + dag.AddVertex(120, 1, 0); // 4 + dag.AddVertex(60, 1, 0); // 5 + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); + dag.AddEdge(1, 3); + dag.AddEdge(2, 3); + dag.AddEdge(2, 4); + dag.AddEdge(3, 5); + dag.AddEdge(4, 5); // Setup Architecture: 4 processors of type 0, 4 of type 1 - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 1, 1, 1, 1}); - instance.setDiagonalCompatibilityMatrix(2); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0, 1, 1, 1, 1}); + instance.SetDiagonalCompatibilityMatrix(2); // 2. Setup Scheduler Inputs std::vector multiplicities = {1, 2, 1, 4, 2, 1}; - std::vector max_procs = {100, 100, 100, 100, 100, 100}; - std::vector>> required_proc_types(6); - required_proc_types[0] = {50, 0}; // Job 0: needs T0 - required_proc_types[1] = {100, 0}; // Job 1: needs T0 - required_proc_types[2] = {0, 150}; // Job 2: needs T1 - required_proc_types[3] = {40, 40}; // Job 3: needs T0 & T1 - required_proc_types[4] = {0, 120}; // Job 4: needs T1 - required_proc_types[5] = {60, 0}; // Job 5: needs T0 + std::vector maxProcs = {100, 100, 100, 100, 100, 100}; + std::vector>> requiredProcTypes(6); + requiredProcTypes[0] = {50, 0}; // Job 0: needs T0 + requiredProcTypes[1] = {100, 0}; // Job 1: needs T0 + requiredProcTypes[2] = {0, 150}; // Job 2: needs T1 + requiredProcTypes[3] = {40, 40}; // Job 3: needs T0 & T1 + requiredProcTypes[4] = {0, 120}; // Job 4: needs T1 + requiredProcTypes[5] = {60, 0}; // Job 5: needs T0 // 3. Run Scheduler - EftSubgraphScheduler scheduler; - scheduler.setMinWorkPerProcessor(1); - SubgraphSchedule schedule = scheduler.run(instance, multiplicities, required_proc_types, max_procs); - - BOOST_CHECK_CLOSE(schedule.makespan, 105.0, 1e-9); - - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type.size(), 6); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[0][0], 4); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[1][0], 2); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[2][1], 4); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[3][0], 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[3][1], 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[4][1], 2); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[5][0], 4); + EftSubgraphScheduler scheduler; + scheduler.SetMinWorkPerProcessor(1); + SubgraphSchedule schedule = scheduler.Run(instance, multiplicities, requiredProcTypes, maxProcs); + + BOOST_CHECK_CLOSE(schedule.makespan_, 105.0, 1e-9); + + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_.size(), 6); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[0][0], 4); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[1][0], 2); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[2][1], 4); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[3][0], 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[3][1], 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[4][1], 2); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[5][0], 4); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ResourceContention) { - using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(EftSubgraphSchedulerResourceContention) { + using GraphT = ComputationalDagVectorImplDefUnsignedT; // 1. Setup Instance - BspInstance instance; - auto &dag = instance.getComputationalDag(); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); // Create a fork-join DAG: 0 -> {1,2,3} -> 4 - dag.add_vertex(10, 1, 0); // 0 - dag.add_vertex(100, 1, 0); // 1 (high rank) - dag.add_vertex(50, 1, 0); // 2 (mid rank) - dag.add_vertex(20, 1, 0); // 3 (low rank) - dag.add_vertex(10, 1, 0); // 4 - dag.add_edge(0, 1); - dag.add_edge(0, 2); - dag.add_edge(0, 3); - dag.add_edge(1, 4); - dag.add_edge(2, 4); - dag.add_edge(3, 4); + dag.AddVertex(10, 1, 0); // 0 + dag.AddVertex(100, 1, 0); // 1 (high rank) + dag.AddVertex(50, 1, 0); // 2 (mid rank) + dag.AddVertex(20, 1, 0); // 3 (low rank) + dag.AddVertex(10, 1, 0); // 4 + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); + dag.AddEdge(0, 3); + dag.AddEdge(1, 4); + dag.AddEdge(2, 4); + dag.AddEdge(3, 4); // Setup Architecture: 4 processors of type 0 - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0}); - instance.setDiagonalCompatibilityMatrix(1); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0}); + instance.SetDiagonalCompatibilityMatrix(1); // 2. Setup Scheduler Inputs std::vector multiplicities = {1, 2, 2, 2, 1}; - std::vector max_procs = {100, 100, 100, 100, 100}; - std::vector>> required_proc_types(5); - required_proc_types[0] = {10}; - required_proc_types[1] = {100}; - required_proc_types[2] = {50}; - required_proc_types[3] = {20}; - required_proc_types[4] = {10}; + std::vector maxProcs = {100, 100, 100, 100, 100}; + std::vector>> requiredProcTypes(5); + requiredProcTypes[0] = {10}; + requiredProcTypes[1] = {100}; + requiredProcTypes[2] = {50}; + requiredProcTypes[3] = {20}; + requiredProcTypes[4] = {10}; // 3. Run Scheduler - EftSubgraphScheduler scheduler; - scheduler.setMinWorkPerProcessor(1); - SubgraphSchedule schedule = scheduler.run(instance, multiplicities, required_proc_types, max_procs); + EftSubgraphScheduler scheduler; + scheduler.SetMinWorkPerProcessor(1); + SubgraphSchedule schedule = scheduler.Run(instance, multiplicities, requiredProcTypes, maxProcs); // 4. Assertions // Manual calculation: @@ -271,46 +271,46 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ResourceContention) { // T=27.5: Job 2 finishes. 2 workers free. Job 3 starts. Duration 20/2=10 (ends 37.5). // T=37.5: Job 3 finishes. // T=52.5: Job 1 finishes. Job 4 becomes ready. Starts with 4 workers. Duration 10/4=2.5 (ends 55.0). - BOOST_CHECK_CLOSE(schedule.makespan, 55.0, 1e-9); - - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type.size(), 5); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[0][0], 4); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[1][0], 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[2][0], 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[3][0], 1); - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[4][0], 4); + BOOST_CHECK_CLOSE(schedule.makespan_, 55.0, 1e-9); + + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_.size(), 5); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[0][0], 4); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[1][0], 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[2][0], 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[3][0], 1); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[4][0], 4); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ProportionalAllocation) { - using graph_t = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(EftSubgraphSchedulerProportionalAllocation) { + using GraphT = ComputationalDagVectorImplDefUnsignedT; // 1. Setup Instance - BspInstance instance; - auto &dag = instance.getComputationalDag(); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); // Create a fork DAG: 0 -> {1,2} - dag.add_vertex(10, 1, 0); // 0 - dag.add_vertex(300, 1, 0); // 1 (high rank) - dag.add_vertex(100, 1, 0); // 2 (low rank) - dag.add_edge(0, 1); - dag.add_edge(0, 2); + dag.AddVertex(10, 1, 0); // 0 + dag.AddVertex(300, 1, 0); // 1 (high rank) + dag.AddVertex(100, 1, 0); // 2 (low rank) + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); // Setup Architecture: 10 processors of type 0 - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); - instance.setDiagonalCompatibilityMatrix(1); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0}); + instance.SetDiagonalCompatibilityMatrix(1); // 2. Setup Scheduler Inputs std::vector multiplicities = {1, 1, 1}; - std::vector max_procs = {100, 100, 100}; - std::vector>> required_proc_types(3); - required_proc_types[0] = {10}; - required_proc_types[1] = {300}; - required_proc_types[2] = {100}; + std::vector maxProcs = {100, 100, 100}; + std::vector>> requiredProcTypes(3); + requiredProcTypes[0] = {10}; + requiredProcTypes[1] = {300}; + requiredProcTypes[2] = {100}; // 3. Run Scheduler - EftSubgraphScheduler scheduler; - scheduler.setMinWorkPerProcessor(1); - SubgraphSchedule schedule = scheduler.run(instance, multiplicities, required_proc_types, max_procs); + EftSubgraphScheduler scheduler; + scheduler.SetMinWorkPerProcessor(1); + SubgraphSchedule schedule = scheduler.Run(instance, multiplicities, requiredProcTypes, maxProcs); // 4. Assertions // Manual calculation: @@ -324,13 +324,13 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ProportionalAllocation) { // Job 1 finishes at 1 + 300/7 = 1 + 42.857... = 43.857... // Job 2 finishes at 1 + 100/3 = 1 + 33.333... = 34.333... // Makespan is 43.857... - BOOST_CHECK_CLOSE(schedule.makespan, 1.0 + 300.0 / 7.0, 1e-9); + BOOST_CHECK_CLOSE(schedule.makespan_, 1.0 + 300.0 / 7.0, 1e-9); - BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type.size(), 3); + BOOST_REQUIRE_EQUAL(schedule.nodeAssignedWorkerPerType_.size(), 3); // Job 0: 10 workers - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[0][0], 10); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[0][0], 10); // Job 1 (high rank): gets 7 workers (75% of 10, floored) - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[1][0], 7); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[1][0], 7); // Job 2 (low rank): gets 3 workers - BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[2][0], 3); + BOOST_CHECK_EQUAL(schedule.nodeAssignedWorkerPerType_[2][0], 3); } diff --git a/tests/filereader.cpp b/tests/filereader.cpp index e95ad03c..0f6c0917 100644 --- a/tests/filereader.cpp +++ b/tests/filereader.cpp @@ -31,7 +31,7 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) { +BOOST_AUTO_TEST_CASE(TestMtxComputationalDagVectorImpl) { // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); std::cout << cwd << std::endl; @@ -40,67 +40,67 @@ BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) { std::cout << cwd << std::endl; } - computational_dag_vector_impl_def_t graph; + ComputationalDagVectorImplDefUnsignedT graph; bool status - = file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph); + = file_reader::ReadComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph); std::cout << "STATUS:" << status << std::endl; BOOST_CHECK(status); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(graph.num_edges(), 19); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 19); // ---- Node 0 std::vector p0{}; std::vector c0{4, 6, 3, 5, 2}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(0).begin(), graph.parents(0).end(), p0.begin(), p0.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(0).begin(), graph.children(0).end(), c0.begin(), c0.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(0).begin(), graph.Parents(0).end(), p0.begin(), p0.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(0).begin(), graph.Children(0).end(), c0.begin(), c0.end()); // ---- Node 1 std::vector p1{}; std::vector c1{3, 5, 2, 6}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(1).begin(), graph.parents(1).end(), p1.begin(), p1.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(1).begin(), graph.children(1).end(), c1.begin(), c1.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(1).begin(), graph.Parents(1).end(), p1.begin(), p1.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(1).begin(), graph.Children(1).end(), c1.begin(), c1.end()); // ---- Node 2 std::vector p2{0, 1}; std::vector c2{3, 5}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(2).begin(), graph.parents(2).end(), p2.begin(), p2.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(2).begin(), graph.children(2).end(), c2.begin(), c2.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(2).begin(), graph.Parents(2).end(), p2.begin(), p2.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(2).begin(), graph.Children(2).end(), c2.begin(), c2.end()); // ---- Node 3 std::vector p3{0, 1, 2}; std::vector c3{5, 4, 6, 7}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(3).begin(), graph.parents(3).end(), p3.begin(), p3.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(3).begin(), graph.children(3).end(), c3.begin(), c3.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(3).begin(), graph.Parents(3).end(), p3.begin(), p3.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(3).begin(), graph.Children(3).end(), c3.begin(), c3.end()); // ---- Node 4 std::vector p4{0, 3}; std::vector c4{5, 6, 7}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(4).begin(), graph.parents(4).end(), p4.begin(), p4.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(4).begin(), graph.children(4).end(), c4.begin(), c4.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(4).begin(), graph.Parents(4).end(), p4.begin(), p4.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(4).begin(), graph.Children(4).end(), c4.begin(), c4.end()); // ---- Node 5 std::vector p5{0, 1, 2, 3, 4}; std::vector c5{}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(5).begin(), graph.parents(5).end(), p5.begin(), p5.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(5).begin(), graph.children(5).end(), c5.begin(), c5.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(5).begin(), graph.Parents(5).end(), p5.begin(), p5.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(5).begin(), graph.Children(5).end(), c5.begin(), c5.end()); // ---- Node 6 std::vector p6{0, 1, 3, 4}; std::vector c6{7}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(6).begin(), graph.parents(6).end(), p6.begin(), p6.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(6).begin(), graph.children(6).end(), c6.begin(), c6.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(6).begin(), graph.Parents(6).end(), p6.begin(), p6.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(6).begin(), graph.Children(6).end(), c6.begin(), c6.end()); // ---- Node 7 std::vector p7{3, 4, 6}; std::vector c7{}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(7).begin(), graph.parents(7).end(), p7.begin(), p7.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(7).begin(), graph.children(7).end(), c7.begin(), c7.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(7).begin(), graph.Parents(7).end(), p7.begin(), p7.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(7).begin(), graph.Children(7).end(), c7.begin(), c7.end()); } -BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) { +BOOST_AUTO_TEST_CASE(TestMtxBoostGraph) { // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); std::cout << cwd << std::endl; @@ -109,67 +109,67 @@ BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) { std::cout << cwd << std::endl; } - boost_graph_int_t graph; + BoostGraphIntT graph; bool status - = file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph); + = file_reader::ReadComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph); std::cout << "STATUS:" << status << std::endl; BOOST_CHECK(status); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(graph.num_edges(), 19); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 19); // ---- Node 0 std::vector p0{}; std::vector c0{4, 6, 3, 5, 2}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(0).begin(), graph.parents(0).end(), p0.begin(), p0.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(0).begin(), graph.children(0).end(), c0.begin(), c0.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(0).begin(), graph.Parents(0).end(), p0.begin(), p0.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(0).begin(), graph.Children(0).end(), c0.begin(), c0.end()); // ---- Node 1 std::vector p1{}; std::vector c1{3, 5, 2, 6}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(1).begin(), graph.parents(1).end(), p1.begin(), p1.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(1).begin(), graph.children(1).end(), c1.begin(), c1.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(1).begin(), graph.Parents(1).end(), p1.begin(), p1.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(1).begin(), graph.Children(1).end(), c1.begin(), c1.end()); // ---- Node 2 std::vector p2{0, 1}; std::vector c2{3, 5}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(2).begin(), graph.parents(2).end(), p2.begin(), p2.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(2).begin(), graph.children(2).end(), c2.begin(), c2.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(2).begin(), graph.Parents(2).end(), p2.begin(), p2.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(2).begin(), graph.Children(2).end(), c2.begin(), c2.end()); // ---- Node 3 std::vector p3{0, 1, 2}; std::vector c3{5, 4, 6, 7}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(3).begin(), graph.parents(3).end(), p3.begin(), p3.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(3).begin(), graph.children(3).end(), c3.begin(), c3.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(3).begin(), graph.Parents(3).end(), p3.begin(), p3.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(3).begin(), graph.Children(3).end(), c3.begin(), c3.end()); // ---- Node 4 std::vector p4{0, 3}; std::vector c4{5, 6, 7}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(4).begin(), graph.parents(4).end(), p4.begin(), p4.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(4).begin(), graph.children(4).end(), c4.begin(), c4.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(4).begin(), graph.Parents(4).end(), p4.begin(), p4.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(4).begin(), graph.Children(4).end(), c4.begin(), c4.end()); // ---- Node 5 std::vector p5{0, 1, 2, 3, 4}; std::vector c5{}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(5).begin(), graph.parents(5).end(), p5.begin(), p5.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(5).begin(), graph.children(5).end(), c5.begin(), c5.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(5).begin(), graph.Parents(5).end(), p5.begin(), p5.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(5).begin(), graph.Children(5).end(), c5.begin(), c5.end()); // ---- Node 6 std::vector p6{0, 1, 3, 4}; std::vector c6{7}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(6).begin(), graph.parents(6).end(), p6.begin(), p6.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(6).begin(), graph.children(6).end(), c6.begin(), c6.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(6).begin(), graph.Parents(6).end(), p6.begin(), p6.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(6).begin(), graph.Children(6).end(), c6.begin(), c6.end()); // ---- Node 7 std::vector p7{3, 4, 6}; std::vector c7{}; - BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(7).begin(), graph.parents(7).end(), p7.begin(), p7.end()); - BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(7).begin(), graph.children(7).end(), c7.begin(), c7.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Parents(7).begin(), graph.Parents(7).end(), p7.begin(), p7.end()); + BOOST_CHECK_EQUAL_COLLECTIONS(graph.Children(7).begin(), graph.Children(7).end(), c7.begin(), c7.end()); } -BOOST_AUTO_TEST_CASE(test_bicgstab) { +BOOST_AUTO_TEST_CASE(TestBicgstab) { // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); std::cout << cwd << std::endl; @@ -178,16 +178,16 @@ BOOST_AUTO_TEST_CASE(test_bicgstab) { std::cout << cwd << std::endl; } - computational_dag_vector_impl_def_t graph; + ComputationalDagVectorImplDefUnsignedT graph; bool status - = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); + = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(graph.num_vertices(), 54); + BOOST_CHECK_EQUAL(graph.NumVertices(), 54); } -BOOST_AUTO_TEST_CASE(test_hdag_boost) { +BOOST_AUTO_TEST_CASE(TestHdagBoost) { // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); std::cout << cwd << std::endl; @@ -196,52 +196,52 @@ BOOST_AUTO_TEST_CASE(test_hdag_boost) { std::cout << cwd << std::endl; } - boost_graph_int_t graph; + BoostGraphIntT graph; bool status - = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); + = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(graph.num_vertices(), 54); + BOOST_CHECK_EQUAL(graph.NumVertices(), 54); } -BOOST_AUTO_TEST_CASE(test_arch_smpl) { +BOOST_AUTO_TEST_CASE(TestArchSmpl) { std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { cwd = cwd.parent_path(); } - BspArchitecture arch; + BspArchitecture arch; - bool status = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch); + bool status = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(arch.numberOfProcessors(), 3); - BOOST_CHECK_EQUAL(arch.communicationCosts(), 3); - BOOST_CHECK_EQUAL(arch.synchronisationCosts(), 5); - BOOST_CHECK_EQUAL(arch.getMemoryConstraintType(), MEMORY_CONSTRAINT_TYPE::NONE); + BOOST_CHECK_EQUAL(arch.NumberOfProcessors(), 3); + BOOST_CHECK_EQUAL(arch.CommunicationCosts(), 3); + BOOST_CHECK_EQUAL(arch.SynchronisationCosts(), 5); + BOOST_CHECK_EQUAL(arch.GetMemoryConstraintType(), MemoryConstraintType::NONE); } -BOOST_AUTO_TEST_CASE(test_arch_smpl_signed) { +BOOST_AUTO_TEST_CASE(TestArchSmplSigned) { std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { cwd = cwd.parent_path(); } - BspArchitecture arch; + BspArchitecture arch; - bool status = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch); + bool status = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(arch.numberOfProcessors(), 3); - BOOST_CHECK_EQUAL(arch.communicationCosts(), 3); - BOOST_CHECK_EQUAL(arch.synchronisationCosts(), 5); - BOOST_CHECK_EQUAL(arch.getMemoryConstraintType(), MEMORY_CONSTRAINT_TYPE::NONE); + BOOST_CHECK_EQUAL(arch.NumberOfProcessors(), 3); + BOOST_CHECK_EQUAL(arch.CommunicationCosts(), 3); + BOOST_CHECK_EQUAL(arch.SynchronisationCosts(), 5); + BOOST_CHECK_EQUAL(arch.GetMemoryConstraintType(), MemoryConstraintType::NONE); } -BOOST_AUTO_TEST_CASE(test_k_means) { +BOOST_AUTO_TEST_CASE(TestKMeans) { std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { @@ -253,34 +253,34 @@ BOOST_AUTO_TEST_CASE(test_k_means) { std::vector comm{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0}; - computational_dag_vector_impl_def_t graph; + ComputationalDagVectorImplDefUnsignedT graph; - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(graph.num_vertices(), 40); - BOOST_CHECK_EQUAL(graph.num_edges(), 45); + BOOST_CHECK_EQUAL(graph.NumVertices(), 40); + BOOST_CHECK_EQUAL(graph.NumEdges(), 45); - for (const auto &v : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(v), work[v]); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(v), comm[v]); + for (const auto &v : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(v), work[v]); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(v), comm[v]); } - computational_dag_edge_idx_vector_impl_def_t graph2; + ComputationalDagEdgeIdxVectorImplDefT graph2; - status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph2); + status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph2); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(graph2.num_vertices(), 40); - BOOST_CHECK_EQUAL(graph2.num_edges(), 45); + BOOST_CHECK_EQUAL(graph2.NumVertices(), 40); + BOOST_CHECK_EQUAL(graph2.NumEdges(), 45); - for (const auto &v : graph2.vertices()) { - BOOST_CHECK_EQUAL(graph2.vertex_work_weight(v), work[v]); - BOOST_CHECK_EQUAL(graph2.vertex_comm_weight(v), comm[v]); + for (const auto &v : graph2.Vertices()) { + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(v), work[v]); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(v), comm[v]); } } -BOOST_AUTO_TEST_CASE(test_dot_graph) { +BOOST_AUTO_TEST_CASE(TestDotGraph) { std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { @@ -292,24 +292,24 @@ BOOST_AUTO_TEST_CASE(test_dot_graph) { std::vector mem{3, 5, 5, 3, 5, 5, 3, 5, 5, 5, 5}; std::vector type{0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0}; - computational_dag_vector_impl_def_t graph; + ComputationalDagVectorImplDefUnsignedT graph; - bool status = file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph); + bool status = file_reader::ReadComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 10); - BOOST_CHECK_EQUAL(graph.num_vertex_types(), 2); - - for (const auto &v : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(v), work[v]); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(v), comm[v]); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(v), mem[v]); - BOOST_CHECK_EQUAL(graph.vertex_type(v), type[v]); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 10); + BOOST_CHECK_EQUAL(graph.NumVertexTypes(), 2); + + for (const auto &v : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(v), work[v]); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(v), comm[v]); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(v), mem[v]); + BOOST_CHECK_EQUAL(graph.VertexType(v), type[v]); } } -BOOST_AUTO_TEST_CASE(test_dot_graph_boost) { +BOOST_AUTO_TEST_CASE(TestDotGraphBoost) { std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { @@ -321,19 +321,19 @@ BOOST_AUTO_TEST_CASE(test_dot_graph_boost) { std::vector mem{3, 5, 5, 3, 5, 5, 3, 5, 5, 5, 5}; std::vector type{0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0}; - boost_graph_int_t graph; + BoostGraphIntT graph; - bool status = file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph); + bool status = file_reader::ReadComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(graph.num_vertices(), 11); - BOOST_CHECK_EQUAL(graph.num_edges(), 10); - BOOST_CHECK_EQUAL(graph.num_vertex_types(), 2); - - for (const auto &v : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(v), work[v]); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(v), comm[v]); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(v), mem[v]); - BOOST_CHECK_EQUAL(graph.vertex_type(v), type[v]); + BOOST_CHECK_EQUAL(graph.NumVertices(), 11); + BOOST_CHECK_EQUAL(graph.NumEdges(), 10); + BOOST_CHECK_EQUAL(graph.NumVertexTypes(), 2); + + for (const auto &v : graph.Vertices()) { + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(v), work[v]); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(v), comm[v]); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(v), mem[v]); + BOOST_CHECK_EQUAL(graph.VertexType(v), type[v]); } } diff --git a/tests/graph_vector_adapter.cpp b/tests/graph_vector_adapter.cpp index 66fd7595..dc501542 100644 --- a/tests/graph_vector_adapter.cpp +++ b/tests/graph_vector_adapter.cpp @@ -26,7 +26,7 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp" #include "osp/bsp/scheduler/Serial.hpp" #include "osp/coarser/Sarkar/Sarkar.hpp" #include "osp/coarser/Sarkar/SarkarMul.hpp" @@ -41,8 +41,8 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) { - std::vector> out_neighbors{ +BOOST_AUTO_TEST_CASE(TestDagVectorAdapterEdge) { + std::vector> outNeighbors{ {1, 2, 3}, {4, 6}, {4, 5}, @@ -53,7 +53,7 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) { {} }; - std::vector> in_neighbors{ + std::vector> inNeighbors{ {}, {0}, {0}, @@ -64,74 +64,74 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) { {4, 3} }; - using v_impl = cdag_vertex_impl; - using graph_t = dag_vector_adapter; - using graph_constr_t = computational_dag_edge_idx_vector_impl; - using CoarseGraphType = Compact_Sparse_Graph, - std::size_t, - v_workw_t, - v_workw_t, - v_workw_t, - v_type_t>; - - graph_t graph(out_neighbors, in_neighbors); - - for (auto v : graph.vertices()) { - graph.set_vertex_work_weight(v, 10); + using VImpl = CDagVertexImpl; + using GraphT = DagVectorAdapter; + using GraphConstrT = ComputationalDagEdgeIdxVectorImpl; + using CoarseGraphType = CompactSparseGraph, + std::size_t, + VWorkwT, + VWorkwT, + VWorkwT, + VTypeT>; + + GraphT graph(outNeighbors, inNeighbors); + + for (auto v : graph.Vertices()) { + graph.SetVertexWorkWeight(v, 10); } - BspInstance instance; - instance.getComputationalDag() = graph; + BspInstance instance; + instance.GetComputationalDag() = graph; - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - instance.setDiagonalCompatibilityMatrix(2); - instance.setSynchronisationCosts(1000); - instance.setCommunicationCosts(1); + instance.SetDiagonalCompatibilityMatrix(2); + instance.SetSynchronisationCosts(1000); + instance.SetCommunicationCosts(1); // Set up the scheduler - GrowLocalAutoCores growlocal; - BspLocking locking; - GreedyChildren children; - kl_total_lambda_comm_improver kl(42); - kl.setSuperstepRemoveStrengthParameter(2.0); - kl.setTimeQualityParameter(5.0); - ComboScheduler growlocal_kl(growlocal, kl); - ComboScheduler locking_kl(locking, kl); - ComboScheduler children_kl(children, kl); - - GreedyMetaScheduler scheduler; - scheduler.addScheduler(locking_kl); - scheduler.addScheduler(children_kl); - scheduler.addSerialScheduler(); - - IsomorphicSubgraphScheduler iso_scheduler(scheduler); - - auto partition = iso_scheduler.compute_partition(instance); - - graph_constr_t corase_graph; - coarser_util::construct_coarse_dag(instance.getComputationalDag(), corase_graph, partition); - bool acyc = is_acyclic(corase_graph); + GrowLocalAutoCores growlocal; + BspLocking locking; + GreedyChildren children; + KlTotalLambdaCommImprover kl(42); + kl.SetSuperstepRemoveStrengthParameter(2.0); + kl.SetTimeQualityParameter(5.0); + ComboScheduler growlocalKl(growlocal, kl); + ComboScheduler lockingKl(locking, kl); + ComboScheduler childrenKl(children, kl); + + GreedyMetaScheduler scheduler; + scheduler.AddScheduler(lockingKl); + scheduler.AddScheduler(childrenKl); + scheduler.AddSerialScheduler(); + + IsomorphicSubgraphScheduler isoScheduler(scheduler); + + auto partition = isoScheduler.ComputePartition(instance); + + GraphConstrT coraseGraph; + coarser_util::ConstructCoarseDag(instance.GetComputationalDag(), coraseGraph, partition); + bool acyc = IsAcyclic(coraseGraph); BOOST_CHECK(acyc); - SarkarMul coarser; + SarkarMul coarser; - CoarseGraphType coarse_dag; - std::vector reverse_vertex_map; - coarser.coarsenDag(graph, coarse_dag, reverse_vertex_map); + CoarseGraphType coarseDag; + std::vector reverseVertexMap; + coarser.CoarsenDag(graph, coarseDag, reverseVertexMap); - acyc = is_acyclic(coarse_dag); + acyc = IsAcyclic(coarseDag); BOOST_CHECK(acyc); } -BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { - std::vector> out_neighbors{ +BOOST_AUTO_TEST_CASE(TestDagVectorAdapter) { + std::vector> outNeighbors{ {1, 2, 3}, {4, 6}, {4, 5}, @@ -142,7 +142,7 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { {} }; - std::vector> in_neighbors{ + std::vector> inNeighbors{ {}, {0}, {0}, @@ -153,68 +153,68 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { {4, 3} }; - using v_impl = cdag_vertex_impl; - using graph_t = dag_vector_adapter; - using graph_constr_t = computational_dag_vector_impl; - using CoarseGraphType = Compact_Sparse_Graph, - std::size_t, - v_workw_t, - v_workw_t, - v_workw_t, - v_type_t>; - - graph_t graph(out_neighbors, in_neighbors); - - for (auto v : graph.vertices()) { - graph.set_vertex_work_weight(v, 10); + using VImpl = CDagVertexImpl; + using GraphT = DagVectorAdapter; + using GraphConstrT = ComputationalDagVectorImpl; + using CoarseGraphType = CompactSparseGraph, + std::size_t, + VWorkwT, + VWorkwT, + VWorkwT, + VTypeT>; + + GraphT graph(outNeighbors, inNeighbors); + + for (auto v : graph.Vertices()) { + graph.SetVertexWorkWeight(v, 10); } - BspInstance instance; - instance.getComputationalDag() = graph; + BspInstance instance; + instance.GetComputationalDag() = graph; - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); - instance.setDiagonalCompatibilityMatrix(2); - instance.setSynchronisationCosts(1000); - instance.setCommunicationCosts(1); + instance.SetDiagonalCompatibilityMatrix(2); + instance.SetSynchronisationCosts(1000); + instance.SetCommunicationCosts(1); // Set up the scheduler - GrowLocalAutoCores growlocal; - BspLocking locking; - GreedyChildren children; - kl_total_lambda_comm_improver kl(42); - kl.setSuperstepRemoveStrengthParameter(2.0); - kl.setTimeQualityParameter(5.0); - ComboScheduler growlocal_kl(growlocal, kl); - ComboScheduler locking_kl(locking, kl); - ComboScheduler children_kl(children, kl); - - GreedyMetaScheduler scheduler; - scheduler.addScheduler(locking_kl); - scheduler.addScheduler(children_kl); - scheduler.addSerialScheduler(); - - IsomorphicSubgraphScheduler iso_scheduler(scheduler); - - auto partition = iso_scheduler.compute_partition(instance); - - graph_constr_t corase_graph; - coarser_util::construct_coarse_dag(instance.getComputationalDag(), corase_graph, partition); - bool acyc = is_acyclic(corase_graph); + GrowLocalAutoCores growlocal; + BspLocking locking; + GreedyChildren children; + KlTotalLambdaCommImprover kl(42); + kl.SetSuperstepRemoveStrengthParameter(2.0); + kl.SetTimeQualityParameter(5.0); + ComboScheduler growlocalKl(growlocal, kl); + ComboScheduler lockingKl(locking, kl); + ComboScheduler childrenKl(children, kl); + + GreedyMetaScheduler scheduler; + scheduler.AddScheduler(lockingKl); + scheduler.AddScheduler(childrenKl); + scheduler.AddSerialScheduler(); + + IsomorphicSubgraphScheduler isoScheduler(scheduler); + + auto partition = isoScheduler.ComputePartition(instance); + + GraphConstrT coraseGraph; + coarser_util::ConstructCoarseDag(instance.GetComputationalDag(), coraseGraph, partition); + bool acyc = IsAcyclic(coraseGraph); BOOST_CHECK(acyc); - SarkarMul coarser; + SarkarMul coarser; - CoarseGraphType coarse_dag; - std::vector reverse_vertex_map; - coarser.coarsenDag(graph, coarse_dag, reverse_vertex_map); + CoarseGraphType coarseDag; + std::vector reverseVertexMap; + coarser.CoarsenDag(graph, coarseDag, reverseVertexMap); - acyc = is_acyclic(coarse_dag); + acyc = IsAcyclic(coarseDag); BOOST_CHECK(acyc); } diff --git a/tests/graph_vector_edge_desc_impl.cpp b/tests/graph_vector_edge_desc_impl.cpp index 1c6770c9..df91b022 100644 --- a/tests/graph_vector_edge_desc_impl.cpp +++ b/tests/graph_vector_edge_desc_impl.cpp @@ -30,105 +30,105 @@ limitations under the License. using namespace osp; -computational_dag_edge_idx_vector_impl_def_t constr_graph_1() { - computational_dag_edge_idx_vector_impl_def_t graph; +ComputationalDagEdgeIdxVectorImplDefT ConstrGraph1() { + ComputationalDagEdgeIdxVectorImplDefT graph; - using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx; + using VertexIdx = ComputationalDagEdgeIdxVectorImplDefT::VertexIdx; - vertex_idx v1 = graph.add_vertex(1, 2, 3, 4); - vertex_idx v2 = graph.add_vertex(5, 6, 7, 8); - vertex_idx v3 = graph.add_vertex(9, 10, 11, 12); - vertex_idx v4 = graph.add_vertex(13, 14, 15, 16); - vertex_idx v5 = graph.add_vertex(17, 18, 19, 20); - vertex_idx v6 = graph.add_vertex(21, 22, 23, 24); - vertex_idx v7 = graph.add_vertex(25, 26, 27, 28); - vertex_idx v8 = graph.add_vertex(29, 30, 31, 32); + VertexIdx v1 = graph.AddVertex(1, 2, 3, 4); + VertexIdx v2 = graph.AddVertex(5, 6, 7, 8); + VertexIdx v3 = graph.AddVertex(9, 10, 11, 12); + VertexIdx v4 = graph.AddVertex(13, 14, 15, 16); + VertexIdx v5 = graph.AddVertex(17, 18, 19, 20); + VertexIdx v6 = graph.AddVertex(21, 22, 23, 24); + VertexIdx v7 = graph.AddVertex(25, 26, 27, 28); + VertexIdx v8 = graph.AddVertex(29, 30, 31, 32); - auto pair = graph.add_edge(v1, v2); + auto pair = graph.AddEdge(v1, v2); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v1, v3); + pair = graph.AddEdge(v1, v3); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v1, v4); + pair = graph.AddEdge(v1, v4); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v2, v5); + pair = graph.AddEdge(v2, v5); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v2, v7); + pair = graph.AddEdge(v2, v7); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v3, v5); + pair = graph.AddEdge(v3, v5); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v3, v6); + pair = graph.AddEdge(v3, v6); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v4, v8); + pair = graph.AddEdge(v4, v8); BOOST_CHECK_EQUAL(pair.second, true); - pair = graph.add_edge(v5, v8); + pair = graph.AddEdge(v5, v8); BOOST_CHECK_EQUAL(pair.second, true); - BOOST_CHECK_EQUAL(graph.num_edges(), 9); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); return graph; } -BOOST_AUTO_TEST_CASE(test_empty_dag_edge_idx) { - computational_dag_edge_idx_vector_impl_def_t graph; - BOOST_CHECK_EQUAL(graph.num_edges(), 0); - BOOST_CHECK_EQUAL(graph.num_vertices(), 0); +BOOST_AUTO_TEST_CASE(TestEmptyDagEdgeIdx) { + ComputationalDagEdgeIdxVectorImplDefT graph; + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 0); } -BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { - computational_dag_edge_idx_vector_impl_def_t graph = constr_graph_1(); +BOOST_AUTO_TEST_CASE(TestDagEdgeIdx) { + ComputationalDagEdgeIdxVectorImplDefT graph = ConstrGraph1(); - using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx; + using VertexIdx = ComputationalDagEdgeIdxVectorImplDefT::VertexIdx; - std::vector edge_sources{0, 0, 0, 1, 1, 2, 2, 3, 4}; - std::vector edge_targets{1, 2, 3, 4, 6, 4, 5, 7, 7}; + std::vector edgeSources{0, 0, 0, 1, 1, 2, 2, 3, 4}; + std::vector edgeTargets{1, 2, 3, 4, 6, 4, 5, 7, 7}; - size_t edge_idx = 0; - for (const auto &edge : graph.edges()) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); - BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); - edge_idx++; + size_t edgeIdx = 0; + for (const auto &edge : graph.Edges()) { + BOOST_CHECK_EQUAL(edge.source_, edgeSources[edgeIdx]); + BOOST_CHECK_EQUAL(edge.target_, edgeTargets[edgeIdx]); + edgeIdx++; } - edge_idx = 0; - for (const auto &edge : edges(graph)) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); - BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); - edge_idx++; + edgeIdx = 0; + for (const auto &edge : Edges(graph)) { + BOOST_CHECK_EQUAL(edge.source_, edgeSources[edgeIdx]); + BOOST_CHECK_EQUAL(edge.target_, edgeTargets[edgeIdx]); + edgeIdx++; } - edge_idx = 0; - for (auto &edge : edges(graph)) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); - BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); - edge_idx++; + edgeIdx = 0; + for (auto &edge : Edges(graph)) { + BOOST_CHECK_EQUAL(edge.source_, edgeSources[edgeIdx]); + BOOST_CHECK_EQUAL(edge.target_, edgeTargets[edgeIdx]); + edgeIdx++; } - edge_idx = 0; - for (const auto edge : edges(graph)) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); - BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); - edge_idx++; + edgeIdx = 0; + for (const auto edge : Edges(graph)) { + BOOST_CHECK_EQUAL(edge.source_, edgeSources[edgeIdx]); + BOOST_CHECK_EQUAL(edge.target_, edgeTargets[edgeIdx]); + edgeIdx++; } - edge_idx = 0; - for (auto edge : edges(graph)) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); - BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); - edge_idx++; + edgeIdx = 0; + for (auto edge : Edges(graph)) { + BOOST_CHECK_EQUAL(edge.source_, edgeSources[edgeIdx]); + BOOST_CHECK_EQUAL(edge.target_, edgeTargets[edgeIdx]); + edgeIdx++; } - std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; + std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; - std::vector> out_neighbors{ + std::vector> outNeighbors{ {1, 2, 3}, {4, 6}, {4, 5}, @@ -139,7 +139,7 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { {} }; - std::vector> in_neighbors{ + std::vector> inNeighbors{ {}, {0}, {0}, @@ -152,200 +152,200 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { size_t idx = 0; - for (const auto &v : graph.vertices()) { + for (const auto &v : graph.Vertices()) { BOOST_CHECK_EQUAL(v, vertices[idx++]); size_t i = 0; - for (const auto &e : graph.children(v)) { - BOOST_CHECK_EQUAL(e, out_neighbors[v][i++]); + for (const auto &e : graph.Children(v)) { + BOOST_CHECK_EQUAL(e, outNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.out_edges(v)) { - BOOST_CHECK_EQUAL(e.target, out_neighbors[v][i++]); + for (const auto &e : graph.OutEdges(v)) { + BOOST_CHECK_EQUAL(e.target_, outNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.out_edges(v)) { - BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[v][i++]); + for (const auto &e : graph.OutEdges(v)) { + BOOST_CHECK_EQUAL(Target(e, graph), outNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.out_edges(v)) { - BOOST_CHECK_EQUAL(graph.target(e), out_neighbors[v][i++]); + for (const auto &e : graph.OutEdges(v)) { + BOOST_CHECK_EQUAL(graph.Target(e), outNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.parents(v)) { - BOOST_CHECK_EQUAL(e, in_neighbors[v][i++]); + for (const auto &e : graph.Parents(v)) { + BOOST_CHECK_EQUAL(e, inNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.in_edges(v)) { - BOOST_CHECK_EQUAL(e.source, in_neighbors[v][i++]); + for (const auto &e : graph.InEdges(v)) { + BOOST_CHECK_EQUAL(e.source_, inNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.in_edges(v)) { - BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[v][i++]); + for (const auto &e : graph.InEdges(v)) { + BOOST_CHECK_EQUAL(Source(e, graph), inNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.in_edges(v)) { - BOOST_CHECK_EQUAL(graph.source(e), in_neighbors[v][i++]); + for (const auto &e : graph.InEdges(v)) { + BOOST_CHECK_EQUAL(graph.Source(e), inNeighbors[v][i++]); } i = 0; - for (const auto &e : in_edges(v, graph)) { - BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[v][i++]); + for (const auto &e : InEdges(v, graph)) { + BOOST_CHECK_EQUAL(Source(e, graph), inNeighbors[v][i++]); } i = 0; - for (const auto &e : out_edges(v, graph)) { - BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[v][i++]); + for (const auto &e : OutEdges(v, graph)) { + BOOST_CHECK_EQUAL(Target(e, graph), outNeighbors[v][i++]); } - BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[v].size()); - BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[v].size()); + BOOST_CHECK_EQUAL(graph.InDegree(v), inNeighbors[v].size()); + BOOST_CHECK_EQUAL(graph.OutDegree(v), outNeighbors[v].size()); } } -BOOST_AUTO_TEST_CASE(test_util_1) { - const computational_dag_edge_idx_vector_impl_def_t graph = constr_graph_1(); +BOOST_AUTO_TEST_CASE(TestUtil1) { + const ComputationalDagEdgeIdxVectorImplDefT graph = ConstrGraph1(); - BOOST_CHECK_EQUAL(graph.num_edges(), 9); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); - auto sources = source_vertices(graph); + auto sources = SourceVertices(graph); BOOST_CHECK_EQUAL(sources.size(), 1); BOOST_CHECK_EQUAL(sources[0], 0); - auto sinks = sink_vertices(graph); + auto sinks = SinkVertices(graph); BOOST_CHECK_EQUAL(sinks.size(), 3); BOOST_CHECK_EQUAL(sinks[0], 5); BOOST_CHECK_EQUAL(sinks[1], 6); BOOST_CHECK_EQUAL(sinks[2], 7); - const auto pair = edge_desc(0, 1, graph); + const auto pair = EdgeDesc(0, 1, graph); BOOST_CHECK_EQUAL(pair.second, true); - BOOST_CHECK_EQUAL(source(pair.first, graph), 0); - BOOST_CHECK_EQUAL(target(pair.first, graph), 1); - BOOST_CHECK_EQUAL(edge(0, 1, graph), true); + BOOST_CHECK_EQUAL(Source(pair.first, graph), 0); + BOOST_CHECK_EQUAL(Target(pair.first, graph), 1); + BOOST_CHECK_EQUAL(Edge(0, 1, graph), true); - const auto pair2 = edge_desc(0, 4, graph); + const auto pair2 = EdgeDesc(0, 4, graph); BOOST_CHECK_EQUAL(pair2.second, false); - BOOST_CHECK_EQUAL(edge(0, 4, graph), false); + BOOST_CHECK_EQUAL(Edge(0, 4, graph), false); - const auto pair3 = edge_desc(1, 4, graph); + const auto pair3 = EdgeDesc(1, 4, graph); BOOST_CHECK_EQUAL(pair3.second, true); - BOOST_CHECK_EQUAL(source(pair3.first, graph), 1); - BOOST_CHECK_EQUAL(target(pair3.first, graph), 4); - BOOST_CHECK_EQUAL(edge(1, 4, graph), true); - - BOOST_CHECK_EQUAL(has_path(0, 1, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 2, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 3, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 5, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 6, graph), true); - BOOST_CHECK_EQUAL(has_path(0, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 0, graph), false); - BOOST_CHECK_EQUAL(has_path(1, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 6, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 4, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 5, graph), true); - BOOST_CHECK_EQUAL(has_path(2, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(3, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(4, 7, graph), true); - BOOST_CHECK_EQUAL(has_path(1, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(1, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(2, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(3, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(4, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 6, graph), false); - BOOST_CHECK_EQUAL(has_path(5, 7, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(6, 7, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 1, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 2, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 3, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 4, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 5, graph), false); - BOOST_CHECK_EQUAL(has_path(7, 6, graph), false); - - const auto long_edges = long_edges_in_triangles(graph); - - BOOST_CHECK_EQUAL(long_edges.size(), 0); + BOOST_CHECK_EQUAL(Source(pair3.first, graph), 1); + BOOST_CHECK_EQUAL(Target(pair3.first, graph), 4); + BOOST_CHECK_EQUAL(Edge(1, 4, graph), true); + + BOOST_CHECK_EQUAL(HasPath(0, 1, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 2, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 3, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 5, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 6, graph), true); + BOOST_CHECK_EQUAL(HasPath(0, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 0, graph), false); + BOOST_CHECK_EQUAL(HasPath(1, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 6, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 4, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 5, graph), true); + BOOST_CHECK_EQUAL(HasPath(2, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(3, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(4, 7, graph), true); + BOOST_CHECK_EQUAL(HasPath(1, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(1, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(2, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(3, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(4, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 6, graph), false); + BOOST_CHECK_EQUAL(HasPath(5, 7, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(6, 7, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 1, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 2, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 3, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 4, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 5, graph), false); + BOOST_CHECK_EQUAL(HasPath(7, 6, graph), false); + + const auto longEdges = LongEdgesInTriangles(graph); + + BOOST_CHECK_EQUAL(longEdges.size(), 0); } -BOOST_AUTO_TEST_CASE(test_constr_dag) { - computational_dag_edge_idx_vector_impl_def_int_t graph; - - graph.add_vertex(1, 2, 3); - graph.add_vertex(5, 6, 7); - graph.add_vertex(9, 10, 11); - graph.add_vertex(13, 14, 15); - - graph.add_edge(0, 1); - graph.add_edge(0, 2); - graph.add_edge(0, 3); - - computational_dag_edge_idx_vector_impl_def_int_t graph_2(graph); - - BOOST_CHECK_EQUAL(graph_2.num_edges(), 3); - BOOST_CHECK_EQUAL(graph_2.num_vertices(), 4); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(0), 1); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(0), 2); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(0), 3); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(1), 5); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(1), 6); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(1), 7); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(2), 9); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(2), 10); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(2), 11); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(3), 13); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(3), 14); - - boost_graph_int_t b_g1; - - b_g1.add_vertex(1, 2, 3, 4); - b_g1.add_vertex(5, 6, 7, 8); - - b_g1.add_edge(0, 1, 9); - - computational_dag_edge_idx_vector_impl_def_int_t graph_3(b_g1); - - BOOST_CHECK_EQUAL(graph_3.num_edges(), 1); - BOOST_CHECK_EQUAL(graph_3.num_vertices(), 2); - BOOST_CHECK_EQUAL(graph_3.vertex_work_weight(0), 1); - BOOST_CHECK_EQUAL(graph_3.vertex_comm_weight(0), 2); - BOOST_CHECK_EQUAL(graph_3.vertex_mem_weight(0), 3); - BOOST_CHECK_EQUAL(graph_3.vertex_work_weight(1), 5); - BOOST_CHECK_EQUAL(graph_3.vertex_comm_weight(1), 6); - BOOST_CHECK_EQUAL(graph_3.vertex_mem_weight(1), 7); +BOOST_AUTO_TEST_CASE(TestConstrDag) { + ComputationalDagEdgeIdxVectorImplDefIntT graph; + + graph.AddVertex(1, 2, 3); + graph.AddVertex(5, 6, 7); + graph.AddVertex(9, 10, 11); + graph.AddVertex(13, 14, 15); + + graph.AddEdge(0, 1); + graph.AddEdge(0, 2); + graph.AddEdge(0, 3); + + ComputationalDagEdgeIdxVectorImplDefIntT graph2(graph); + + BOOST_CHECK_EQUAL(graph2.NumEdges(), 3); + BOOST_CHECK_EQUAL(graph2.NumVertices(), 4); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(0), 1); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(0), 2); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(0), 3); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(1), 5); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(1), 6); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(1), 7); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(2), 9); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(2), 10); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(2), 11); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(3), 13); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(3), 14); + + BoostGraphIntT bG1; + + bG1.AddVertex(1, 2, 3, 4); + bG1.AddVertex(5, 6, 7, 8); + + bG1.AddEdge(0, 1, 9); + + ComputationalDagEdgeIdxVectorImplDefIntT graph3(bG1); + + BOOST_CHECK_EQUAL(graph3.NumEdges(), 1); + BOOST_CHECK_EQUAL(graph3.NumVertices(), 2); + BOOST_CHECK_EQUAL(graph3.VertexWorkWeight(0), 1); + BOOST_CHECK_EQUAL(graph3.VertexCommWeight(0), 2); + BOOST_CHECK_EQUAL(graph3.VertexMemWeight(0), 3); + BOOST_CHECK_EQUAL(graph3.VertexWorkWeight(1), 5); + BOOST_CHECK_EQUAL(graph3.VertexCommWeight(1), 6); + BOOST_CHECK_EQUAL(graph3.VertexMemWeight(1), 7); } diff --git a/tests/graph_vector_impl.cpp b/tests/graph_vector_impl.cpp index e0677f93..4b82b09d 100644 --- a/tests/graph_vector_impl.cpp +++ b/tests/graph_vector_impl.cpp @@ -29,58 +29,58 @@ limitations under the License. using namespace osp; -computational_dag_vector_impl_def_t constr_graph_1() { - computational_dag_vector_impl_def_t graph; - - using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; - - vertex_idx v1 = graph.add_vertex(1, 2, 3, 4); - vertex_idx v2 = graph.add_vertex(5, 6, 7, 8); - vertex_idx v3 = graph.add_vertex(9, 10, 11, 12); - vertex_idx v4 = graph.add_vertex(13, 14, 15, 16); - vertex_idx v5 = graph.add_vertex(17, 18, 19, 20); - vertex_idx v6 = graph.add_vertex(21, 22, 23, 24); - vertex_idx v7 = graph.add_vertex(25, 26, 27, 28); - vertex_idx v8 = graph.add_vertex(29, 30, 31, 32); - - graph.add_edge(v1, v2); - graph.add_edge(v1, v3); - graph.add_edge(v1, v4); - graph.add_edge(v2, v5); - - graph.add_edge(v3, v5); - graph.add_edge(v3, v6); - graph.add_edge(v2, v7); - graph.add_edge(v5, v8); - graph.add_edge(v4, v8); +ComputationalDagVectorImplDefUnsignedT ConstrGraph1() { + ComputationalDagVectorImplDefUnsignedT graph; + + using VertexIdx = ComputationalDagVectorImplDefUnsignedT::VertexIdx; + + VertexIdx v1 = graph.AddVertex(1, 2, 3, 4); + VertexIdx v2 = graph.AddVertex(5, 6, 7, 8); + VertexIdx v3 = graph.AddVertex(9, 10, 11, 12); + VertexIdx v4 = graph.AddVertex(13, 14, 15, 16); + VertexIdx v5 = graph.AddVertex(17, 18, 19, 20); + VertexIdx v6 = graph.AddVertex(21, 22, 23, 24); + VertexIdx v7 = graph.AddVertex(25, 26, 27, 28); + VertexIdx v8 = graph.AddVertex(29, 30, 31, 32); + + graph.AddEdge(v1, v2); + graph.AddEdge(v1, v3); + graph.AddEdge(v1, v4); + graph.AddEdge(v2, v5); + + graph.AddEdge(v3, v5); + graph.AddEdge(v3, v6); + graph.AddEdge(v2, v7); + graph.AddEdge(v5, v8); + graph.AddEdge(v4, v8); return graph; } -BOOST_AUTO_TEST_CASE(test_empty_dag) { - computational_dag_vector_impl_def_t graph; - BOOST_CHECK_EQUAL(graph.num_edges(), 0); - BOOST_CHECK_EQUAL(graph.num_vertices(), 0); +BOOST_AUTO_TEST_CASE(TestEmptyDag) { + ComputationalDagVectorImplDefUnsignedT graph; + BOOST_CHECK_EQUAL(graph.NumEdges(), 0); + BOOST_CHECK_EQUAL(graph.NumVertices(), 0); size_t idx = 0; - for (const auto &v : graph.vertices()) { - graph.in_degree(v); + for (const auto &v : graph.Vertices()) { + graph.InDegree(v); idx++; } BOOST_CHECK_EQUAL(idx, 0); } -BOOST_AUTO_TEST_CASE(test_dag) { - const computational_dag_vector_impl_def_t graph = constr_graph_1(); +BOOST_AUTO_TEST_CASE(TestDag) { + const ComputationalDagVectorImplDefUnsignedT graph = ConstrGraph1(); - using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; + using VertexIdx = ComputationalDagVectorImplDefUnsignedT::VertexIdx; - BOOST_CHECK_EQUAL(graph.num_edges(), 9); - BOOST_CHECK_EQUAL(graph.num_vertices(), 8); + BOOST_CHECK_EQUAL(graph.NumEdges(), 9); + BOOST_CHECK_EQUAL(graph.NumVertices(), 8); - std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; + std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; - std::vector> out_neighbors{ + std::vector> outNeighbors{ {1, 2, 3}, {4, 6}, {4, 5}, @@ -91,7 +91,7 @@ BOOST_AUTO_TEST_CASE(test_dag) { {} }; - std::vector> in_neighbors{ + std::vector> inNeighbors{ {}, {0}, {0}, @@ -104,117 +104,117 @@ BOOST_AUTO_TEST_CASE(test_dag) { size_t idx = 0; - for (const auto &v : graph.vertices()) { + for (const auto &v : graph.Vertices()) { BOOST_CHECK_EQUAL(v, vertices[idx++]); size_t i = 0; - for (const auto &e : graph.children(v)) { - BOOST_CHECK_EQUAL(e, out_neighbors[v][i++]); + for (const auto &e : graph.Children(v)) { + BOOST_CHECK_EQUAL(e, outNeighbors[v][i++]); } i = 0; - for (const auto &e : graph.parents(v)) { - BOOST_CHECK_EQUAL(e, in_neighbors[v][i++]); + for (const auto &e : graph.Parents(v)) { + BOOST_CHECK_EQUAL(e, inNeighbors[v][i++]); } i = 0; - for (const auto &e : out_edges(v, graph)) { - BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[v][i++]); + for (const auto &e : OutEdges(v, graph)) { + BOOST_CHECK_EQUAL(Target(e, graph), outNeighbors[v][i++]); } i = 0; - for (const auto &e : in_edges(v, graph)) { - BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[v][i++]); + for (const auto &e : InEdges(v, graph)) { + BOOST_CHECK_EQUAL(Source(e, graph), inNeighbors[v][i++]); } - BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[v].size()); - BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[v].size()); + BOOST_CHECK_EQUAL(graph.InDegree(v), inNeighbors[v].size()); + BOOST_CHECK_EQUAL(graph.OutDegree(v), outNeighbors[v].size()); } unsigned count = 0; - for (const auto &e : edges(graph)) { - std::cout << e.source << " -> " << e.target << std::endl; + for (const auto &e : Edges(graph)) { + std::cout << e.source_ << " -> " << e.target_ << std::endl; count++; } BOOST_CHECK_EQUAL(count, 9); } -BOOST_AUTO_TEST_CASE(test_constr_dag) { - computational_dag_vector_impl_def_int_t graph; - - graph.add_vertex(1, 2, 3); - graph.add_vertex(5, 6, 7); - graph.add_vertex(9, 10, 11); - graph.add_vertex(13, 14, 15); - - graph.add_edge(0, 1); - graph.add_edge(0, 2); - graph.add_edge(0, 3); - - computational_dag_vector_impl_def_int_t graph_2(graph); - - BOOST_CHECK_EQUAL(graph_2.num_edges(), 3); - BOOST_CHECK_EQUAL(graph_2.num_vertices(), 4); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(0), 1); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(0), 2); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(0), 3); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(1), 5); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(1), 6); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(1), 7); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(2), 9); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(2), 10); - BOOST_CHECK_EQUAL(graph_2.vertex_mem_weight(2), 11); - BOOST_CHECK_EQUAL(graph_2.vertex_work_weight(3), 13); - BOOST_CHECK_EQUAL(graph_2.vertex_comm_weight(3), 14); - - boost_graph_int_t b_g1; - - b_g1.add_vertex(1, 2, 3, 4); - b_g1.add_vertex(5, 6, 7, 8); - - b_g1.add_edge(0, 1, 9); - - computational_dag_vector_impl_def_int_t graph_3(b_g1); - - BOOST_CHECK_EQUAL(graph_3.num_edges(), 1); - BOOST_CHECK_EQUAL(graph_3.num_vertices(), 2); - BOOST_CHECK_EQUAL(graph_3.vertex_work_weight(0), 1); - BOOST_CHECK_EQUAL(graph_3.vertex_comm_weight(0), 2); - BOOST_CHECK_EQUAL(graph_3.vertex_mem_weight(0), 3); - BOOST_CHECK_EQUAL(graph_3.vertex_work_weight(1), 5); - BOOST_CHECK_EQUAL(graph_3.vertex_comm_weight(1), 6); - BOOST_CHECK_EQUAL(graph_3.vertex_mem_weight(1), 7); - - computational_dag_vector_impl_def_int_t graph_4(graph_3); - - BOOST_CHECK_EQUAL(graph_4.num_edges(), 1); - BOOST_CHECK_EQUAL(graph_4.num_vertices(), 2); - BOOST_CHECK_EQUAL(graph_4.vertex_work_weight(0), 1); - BOOST_CHECK_EQUAL(graph_4.vertex_comm_weight(0), 2); - BOOST_CHECK_EQUAL(graph_4.vertex_mem_weight(0), 3); - BOOST_CHECK_EQUAL(graph_4.vertex_work_weight(1), 5); - BOOST_CHECK_EQUAL(graph_4.vertex_comm_weight(1), 6); - BOOST_CHECK_EQUAL(graph_4.vertex_mem_weight(1), 7); - - computational_dag_vector_impl_def_int_t graph_move_1(std::move(graph_4)); - - BOOST_CHECK_EQUAL(graph_4.num_edges(), 0); - BOOST_CHECK_EQUAL(graph_4.num_vertices(), 0); - - BOOST_CHECK_EQUAL(graph_move_1.num_edges(), 1); - BOOST_CHECK_EQUAL(graph_move_1.num_vertices(), 2); - BOOST_CHECK_EQUAL(graph_move_1.vertex_work_weight(0), 1); - BOOST_CHECK_EQUAL(graph_move_1.vertex_comm_weight(0), 2); - BOOST_CHECK_EQUAL(graph_move_1.vertex_mem_weight(0), 3); - BOOST_CHECK_EQUAL(graph_move_1.vertex_work_weight(1), 5); - BOOST_CHECK_EQUAL(graph_move_1.vertex_comm_weight(1), 6); - BOOST_CHECK_EQUAL(graph_move_1.vertex_mem_weight(1), 7); +BOOST_AUTO_TEST_CASE(TestConstrDag) { + ComputationalDagVectorImplDefIntT graph; + + graph.AddVertex(1, 2, 3); + graph.AddVertex(5, 6, 7); + graph.AddVertex(9, 10, 11); + graph.AddVertex(13, 14, 15); + + graph.AddEdge(0, 1); + graph.AddEdge(0, 2); + graph.AddEdge(0, 3); + + ComputationalDagVectorImplDefIntT graph2(graph); + + BOOST_CHECK_EQUAL(graph2.NumEdges(), 3); + BOOST_CHECK_EQUAL(graph2.NumVertices(), 4); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(0), 1); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(0), 2); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(0), 3); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(1), 5); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(1), 6); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(1), 7); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(2), 9); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(2), 10); + BOOST_CHECK_EQUAL(graph2.VertexMemWeight(2), 11); + BOOST_CHECK_EQUAL(graph2.VertexWorkWeight(3), 13); + BOOST_CHECK_EQUAL(graph2.VertexCommWeight(3), 14); + + BoostGraphIntT bG1; + + bG1.AddVertex(1, 2, 3, 4); + bG1.AddVertex(5, 6, 7, 8); + + bG1.AddEdge(0, 1, 9); + + ComputationalDagVectorImplDefIntT graph3(bG1); + + BOOST_CHECK_EQUAL(graph3.NumEdges(), 1); + BOOST_CHECK_EQUAL(graph3.NumVertices(), 2); + BOOST_CHECK_EQUAL(graph3.VertexWorkWeight(0), 1); + BOOST_CHECK_EQUAL(graph3.VertexCommWeight(0), 2); + BOOST_CHECK_EQUAL(graph3.VertexMemWeight(0), 3); + BOOST_CHECK_EQUAL(graph3.VertexWorkWeight(1), 5); + BOOST_CHECK_EQUAL(graph3.VertexCommWeight(1), 6); + BOOST_CHECK_EQUAL(graph3.VertexMemWeight(1), 7); + + ComputationalDagVectorImplDefIntT graph4(graph3); + + BOOST_CHECK_EQUAL(graph4.NumEdges(), 1); + BOOST_CHECK_EQUAL(graph4.NumVertices(), 2); + BOOST_CHECK_EQUAL(graph4.VertexWorkWeight(0), 1); + BOOST_CHECK_EQUAL(graph4.VertexCommWeight(0), 2); + BOOST_CHECK_EQUAL(graph4.VertexMemWeight(0), 3); + BOOST_CHECK_EQUAL(graph4.VertexWorkWeight(1), 5); + BOOST_CHECK_EQUAL(graph4.VertexCommWeight(1), 6); + BOOST_CHECK_EQUAL(graph4.VertexMemWeight(1), 7); + + ComputationalDagVectorImplDefIntT graphMove1(std::move(graph4)); + + BOOST_CHECK_EQUAL(graph4.NumEdges(), 0); + BOOST_CHECK_EQUAL(graph4.NumVertices(), 0); + + BOOST_CHECK_EQUAL(graphMove1.NumEdges(), 1); + BOOST_CHECK_EQUAL(graphMove1.NumVertices(), 2); + BOOST_CHECK_EQUAL(graphMove1.VertexWorkWeight(0), 1); + BOOST_CHECK_EQUAL(graphMove1.VertexCommWeight(0), 2); + BOOST_CHECK_EQUAL(graphMove1.VertexMemWeight(0), 3); + BOOST_CHECK_EQUAL(graphMove1.VertexWorkWeight(1), 5); + BOOST_CHECK_EQUAL(graphMove1.VertexCommWeight(1), 6); + BOOST_CHECK_EQUAL(graphMove1.VertexMemWeight(1), 7); } -BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { +BOOST_AUTO_TEST_CASE(TestDagVectorAdapter) { std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; - std::vector> out_neighbors{ + std::vector> outNeighbors{ {1, 2, 3}, {4, 6}, {4, 5}, @@ -225,7 +225,7 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { {} }; - std::vector> in_neighbors{ + std::vector> inNeighbors{ {}, {0}, {0}, @@ -236,45 +236,45 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { {4, 3} }; - using v_impl = cdag_vertex_impl; - using graph_t = dag_vector_adapter; + using VImpl = CDagVertexImpl; + using GraphT = DagVectorAdapter; - graph_t graph(out_neighbors, in_neighbors); + GraphT graph(outNeighbors, inNeighbors); size_t idx = 0; - for (const auto &v : graph.vertices()) { + for (const auto &v : graph.Vertices()) { BOOST_CHECK_EQUAL(v, vertices[idx++]); unsigned vv = static_cast(v); size_t i = 0; - for (const auto &e : graph.children(v)) { - BOOST_CHECK_EQUAL(e, out_neighbors[vv][i++]); + for (const auto &e : graph.Children(v)) { + BOOST_CHECK_EQUAL(e, outNeighbors[vv][i++]); } i = 0; - for (const auto &e : graph.parents(v)) { - BOOST_CHECK_EQUAL(e, in_neighbors[vv][i++]); + for (const auto &e : graph.Parents(v)) { + BOOST_CHECK_EQUAL(e, inNeighbors[vv][i++]); } i = 0; - for (const auto &e : out_edges(v, graph)) { - BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[vv][i++]); + for (const auto &e : OutEdges(v, graph)) { + BOOST_CHECK_EQUAL(Target(e, graph), outNeighbors[vv][i++]); } i = 0; - for (const auto &e : in_edges(v, graph)) { - BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[vv][i++]); + for (const auto &e : InEdges(v, graph)) { + BOOST_CHECK_EQUAL(Source(e, graph), inNeighbors[vv][i++]); } - BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[vv].size()); - BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[vv].size()); + BOOST_CHECK_EQUAL(graph.InDegree(v), inNeighbors[vv].size()); + BOOST_CHECK_EQUAL(graph.OutDegree(v), outNeighbors[vv].size()); } unsigned count = 0; - for (const auto &e : edges(graph)) { - std::cout << e.source << " -> " << e.target << std::endl; + for (const auto &e : Edges(graph)) { + std::cout << e.source_ << " -> " << e.target_ << std::endl; count++; } BOOST_CHECK_EQUAL(count, 9); diff --git a/tests/hash_pair.cpp b/tests/hash_pair.cpp index 070c7406..9e4dcb79 100644 --- a/tests/hash_pair.cpp +++ b/tests/hash_pair.cpp @@ -19,11 +19,11 @@ limitations under the License. #define BOOST_TEST_MODULE Hash_Pair #include -#include "osp/auxiliary/misc.hpp" +#include "osp/auxiliary/hash_util.hpp" using namespace osp; -BOOST_AUTO_TEST_CASE(Hash_Pair) { +BOOST_AUTO_TEST_CASE(HashPair) { std::pair p1({0, 0}); std::pair p2({1, 1}); std::pair p3({1, 2}); @@ -32,7 +32,7 @@ BOOST_AUTO_TEST_CASE(Hash_Pair) { std::pair p6({2, 6}); std::pair p7 = p6; - pair_hash hasher; + PairHash hasher; BOOST_CHECK(hasher(p7) == hasher(p6)); diff --git a/tests/heaps.cpp b/tests/heaps.cpp index de177ed1..86a41aaf 100644 --- a/tests/heaps.cpp +++ b/tests/heaps.cpp @@ -34,91 +34,91 @@ limitations under the License. namespace osp::test { // Wrapper for boost::heap::fibonacci_heap to match the test interface -template +template class BoostFibonacciHeapWrapper { private: struct Node { - Key key; - Value value; + Key key_; + Value value_; }; struct NodeCompare { bool operator()(const Node &a, const Node &b) const { - if constexpr (IsMinHeap) { - return a.value > b.value; // For min-heap + if constexpr (isMinHeap) { + return a.value_ > b.value_; // For min-heap } else { - return a.value < b.value; // For max-heap + return a.value_ < b.value_; // For max-heap } } }; using BoostHeap = boost::heap::fibonacci_heap>; - using handle_type = typename BoostHeap::handle_type; + using HandleType = typename BoostHeap::handle_type; - BoostHeap heap; - std::unordered_map handles; + BoostHeap heap_; + std::unordered_map handles_; public: BoostFibonacciHeapWrapper() = default; - bool is_empty() const { return heap.empty(); } + bool IsEmpty() const { return heap_.empty(); } - size_t size() const { return heap.size(); } + size_t size() const { return heap_.size(); } - bool contains(const Key &key) const { return handles.count(key); } + bool Contains(const Key &key) const { return handles_.count(key); } - const Key &top() const { - if (is_empty()) { + const Key &Top() const { + if (IsEmpty()) { throw std::out_of_range("Heap is empty"); } - return heap.top().key; + return heap_.top().key_; } - Key pop() { - if (is_empty()) { + Key Pop() { + if (IsEmpty()) { throw std::out_of_range("Heap is empty"); } - Key top_key = heap.top().key; - heap.pop(); - handles.erase(top_key); - return top_key; + Key topKey = heap_.top().key_; + heap_.pop(); + handles_.erase(topKey); + return topKey; } - void push(const Key &key, const Value &value) { - if (contains(key)) { + void Push(const Key &key, const Value &value) { + if (Contains(key)) { throw std::invalid_argument("Key already exists"); } - handle_type handle = heap.push({key, value}); - handles[key] = handle; + HandleType handle = heap_.push({key, value}); + handles_[key] = handle; } - Value get_value(const Key &key) const { - if (!contains(key)) { + Value GetValue(const Key &key) const { + if (!Contains(key)) { throw std::out_of_range("Key not found"); } - return (*handles.at(key)).value; + return (*handles_.at(key)).value_; } - void update(const Key &key, const Value &new_value) { - if (!contains(key)) { - throw std::invalid_argument("Key not found for update"); + void Update(const Key &key, const Value &newValue) { + if (!Contains(key)) { + throw std::invalid_argument("Key not found for Update"); } - handle_type handle = handles.at(key); - (*handle).value = new_value; - heap.update(handle); + HandleType handle = handles_.at(key); + (*handle).value_ = newValue; + heap_.update(handle); } - void erase(const Key &key) { - if (!contains(key)) { - throw std::invalid_argument("Key not found for erase"); + void Erase(const Key &key) { + if (!Contains(key)) { + throw std::invalid_argument("Key not found for Erase"); } - heap.erase(handles.at(key)); - handles.erase(key); + heap_.erase(handles_.at(key)); + handles_.erase(key); } - void clear() { - heap.clear(); - handles.clear(); + void Clear() { + heap_.clear(); + handles_.clear(); } }; @@ -129,13 +129,13 @@ template using MaxBoostFibonacciHeap = BoostFibonacciHeapWrapper; // Wrapper for std::set to match the test interface -template +template class StdSetWrapper { private: struct NodeCompare { bool operator()(const std::pair &a, const std::pair &b) const { if (a.first != b.first) { - if constexpr (IsMinHeap) { + if constexpr (isMinHeap) { return a.first < b.first; // For min-heap } else { return a.first > b.first; // For max-heap @@ -146,75 +146,75 @@ class StdSetWrapper { }; using SetType = std::set, NodeCompare>; - SetType data_set; - std::unordered_map value_map; + SetType dataSet_; + std::unordered_map valueMap_; public: StdSetWrapper() = default; - bool is_empty() const { return data_set.empty(); } + bool IsEmpty() const { return dataSet_.empty(); } - size_t size() const { return data_set.size(); } + size_t size() const { return dataSet_.size(); } - bool contains(const Key &key) const { return value_map.count(key); } + bool Contains(const Key &key) const { return valueMap_.count(key); } - const Key &top() const { - if (is_empty()) { + const Key &Top() const { + if (IsEmpty()) { throw std::out_of_range("Heap is empty"); } - return data_set.begin()->second; + return dataSet_.begin()->second; } - Key pop() { - if (is_empty()) { + Key Pop() { + if (IsEmpty()) { throw std::out_of_range("Heap is empty"); } - auto top_node = *data_set.begin(); - data_set.erase(data_set.begin()); - value_map.erase(top_node.second); - return top_node.second; + auto topNode = *dataSet_.begin(); + dataSet_.erase(dataSet_.begin()); + valueMap_.erase(topNode.second); + return topNode.second; } - void push(const Key &key, const Value &value) { - if (contains(key)) { + void Push(const Key &key, const Value &value) { + if (Contains(key)) { throw std::invalid_argument("Key already exists"); } - data_set.insert({value, key}); - value_map[key] = value; + dataSet_.insert({value, key}); + valueMap_[key] = value; } - Value get_value(const Key &key) const { - if (!contains(key)) { + Value GetValue(const Key &key) const { + if (!Contains(key)) { throw std::out_of_range("Key not found"); } - return value_map.at(key); + return valueMap_.at(key); } - void update(const Key &key, const Value &new_value) { - if (!contains(key)) { - throw std::invalid_argument("Key not found for update"); + void Update(const Key &key, const Value &newValue) { + if (!Contains(key)) { + throw std::invalid_argument("Key not found for Update"); } - Value old_value = value_map.at(key); - if (old_value == new_value) { + Value oldValue = valueMap_.at(key); + if (oldValue == newValue) { return; } - data_set.erase({old_value, key}); - data_set.insert({new_value, key}); - value_map[key] = new_value; + dataSet_.erase({oldValue, key}); + dataSet_.insert({newValue, key}); + valueMap_[key] = newValue; } - void erase(const Key &key) { - if (!contains(key)) { - throw std::invalid_argument("Key not found for erase"); + void Erase(const Key &key) { + if (!Contains(key)) { + throw std::invalid_argument("Key not found for Erase"); } - Value value = value_map.at(key); - data_set.erase({value, key}); - value_map.erase(key); + Value value = valueMap_.at(key); + dataSet_.erase({value, key}); + valueMap_.erase(key); } - void clear() { - data_set.clear(); - value_map.clear(); + void Clear() { + dataSet_.clear(); + valueMap_.clear(); } }; @@ -226,137 +226,137 @@ using MaxStdSetHeap = StdSetWrapper; // Generic test suite for any min-heap implementation that follows the API. template -void test_min_heap_functionality() { +void TestMinHeapFunctionality() { HeapType heap; // Basic properties of an empty heap - BOOST_CHECK(heap.is_empty()); + BOOST_CHECK(heap.IsEmpty()); BOOST_CHECK_EQUAL(heap.size(), 0); - BOOST_CHECK(!heap.contains("A")); - BOOST_CHECK_THROW(heap.top(), std::out_of_range); - BOOST_CHECK_THROW(heap.pop(), std::out_of_range); + BOOST_CHECK(!heap.Contains("A")); + BOOST_CHECK_THROW(heap.Top(), std::out_of_range); + BOOST_CHECK_THROW(heap.Pop(), std::out_of_range); // Push elements - heap.push("A", 10); - heap.push("B", 5); - heap.push("C", 15); + heap.Push("A", 10); + heap.Push("B", 5); + heap.Push("C", 15); - BOOST_CHECK(!heap.is_empty()); + BOOST_CHECK(!heap.IsEmpty()); BOOST_CHECK_EQUAL(heap.size(), 3); - BOOST_CHECK(heap.contains("A")); - BOOST_CHECK(heap.contains("B")); - BOOST_CHECK(heap.contains("C")); - BOOST_CHECK(!heap.contains("D")); + BOOST_CHECK(heap.Contains("A")); + BOOST_CHECK(heap.Contains("B")); + BOOST_CHECK(heap.Contains("C")); + BOOST_CHECK(!heap.Contains("D")); // Check for duplicate key insertion - BOOST_CHECK_THROW(heap.push("A", 20), std::invalid_argument); + BOOST_CHECK_THROW(heap.Push("A", 20), std::invalid_argument); - // Test top() and pop() for min-heap - BOOST_CHECK_EQUAL(heap.top(), "B"); - BOOST_CHECK_EQUAL(heap.pop(), "B"); + // Test Top() and Pop() for min-heap + BOOST_CHECK_EQUAL(heap.Top(), "B"); + BOOST_CHECK_EQUAL(heap.Pop(), "B"); BOOST_CHECK_EQUAL(heap.size(), 2); - BOOST_CHECK(!heap.contains("B")); - - BOOST_CHECK_EQUAL(heap.top(), "A"); - BOOST_CHECK_EQUAL(heap.pop(), "A"); - - BOOST_CHECK_EQUAL(heap.top(), "C"); - BOOST_CHECK_EQUAL(heap.pop(), "C"); - BOOST_CHECK(heap.is_empty()); - - // Repopulate for update/erase tests - heap.push("A", 10); - heap.push("B", 5); - heap.push("C", 15); - heap.push("D", 2); - heap.push("E", 20); - - // Test get_value - BOOST_CHECK_EQUAL(heap.get_value("A"), 10); - BOOST_CHECK_EQUAL(heap.get_value("D"), 2); - BOOST_CHECK_THROW(heap.get_value("Z"), std::out_of_range); - - // Test update (decrease-key) - heap.update("B", 1); // B: 5 -> 1. Should be new top. - BOOST_CHECK_EQUAL(heap.top(), "B"); - BOOST_CHECK_EQUAL(heap.get_value("B"), 1); - - // Test update (increase-key) - heap.update("B", 25); // B: 1 -> 25. D (2) should be new top. - BOOST_CHECK_EQUAL(heap.top(), "D"); - BOOST_CHECK_EQUAL(heap.get_value("B"), 25); - - // Test update with same value - heap.update("A", 10); - BOOST_CHECK_EQUAL(heap.get_value("A"), 10); - - // Test erase - heap.erase("D"); // Erase top element + BOOST_CHECK(!heap.Contains("B")); + + BOOST_CHECK_EQUAL(heap.Top(), "A"); + BOOST_CHECK_EQUAL(heap.Pop(), "A"); + + BOOST_CHECK_EQUAL(heap.Top(), "C"); + BOOST_CHECK_EQUAL(heap.Pop(), "C"); + BOOST_CHECK(heap.IsEmpty()); + + // Repopulate for Update/Erase tests + heap.Push("A", 10); + heap.Push("B", 5); + heap.Push("C", 15); + heap.Push("D", 2); + heap.Push("E", 20); + + // Test GetValue + BOOST_CHECK_EQUAL(heap.GetValue("A"), 10); + BOOST_CHECK_EQUAL(heap.GetValue("D"), 2); + BOOST_CHECK_THROW(heap.GetValue("Z"), std::out_of_range); + + // Test Update (decrease-key) + heap.Update("B", 1); // B: 5 -> 1. Should be new Top. + BOOST_CHECK_EQUAL(heap.Top(), "B"); + BOOST_CHECK_EQUAL(heap.GetValue("B"), 1); + + // Test Update (increase-key) + heap.Update("B", 25); // B: 1 -> 25. D (2) should be new Top. + BOOST_CHECK_EQUAL(heap.Top(), "D"); + BOOST_CHECK_EQUAL(heap.GetValue("B"), 25); + + // Test Update with same value + heap.Update("A", 10); + BOOST_CHECK_EQUAL(heap.GetValue("A"), 10); + + // Test Erase + heap.Erase("D"); // Erase Top element BOOST_CHECK_EQUAL(heap.size(), 4); - BOOST_CHECK(!heap.contains("D")); - BOOST_CHECK_EQUAL(heap.top(), "A"); // A (10) is new top + BOOST_CHECK(!heap.Contains("D")); + BOOST_CHECK_EQUAL(heap.Top(), "A"); // A (10) is new Top - heap.erase("E"); // Erase non-top element + heap.Erase("E"); // Erase non-Top element BOOST_CHECK_EQUAL(heap.size(), 3); - BOOST_CHECK(!heap.contains("E")); - BOOST_CHECK_THROW(heap.erase("Z"), std::invalid_argument); + BOOST_CHECK(!heap.Contains("E")); + BOOST_CHECK_THROW(heap.Erase("Z"), std::invalid_argument); - // Test clear - heap.clear(); - BOOST_CHECK(heap.is_empty()); + // Test Clear + heap.Clear(); + BOOST_CHECK(heap.IsEmpty()); BOOST_CHECK_EQUAL(heap.size(), 0); } template -void test_max_heap_functionality() { +void TestMaxHeapFunctionality() { HeapType heap; - heap.push("A", 10); - heap.push("B", 5); - heap.push("C", 15); - - // Test pop order for max-heap - BOOST_CHECK_EQUAL(heap.top(), "C"); - heap.pop(); - BOOST_CHECK_EQUAL(heap.top(), "A"); - heap.pop(); - BOOST_CHECK_EQUAL(heap.top(), "B"); + heap.Push("A", 10); + heap.Push("B", 5); + heap.Push("C", 15); + + // Test Pop order for max-heap + BOOST_CHECK_EQUAL(heap.Top(), "C"); + heap.Pop(); + BOOST_CHECK_EQUAL(heap.Top(), "A"); + heap.Pop(); + BOOST_CHECK_EQUAL(heap.Top(), "B"); } // Stress test with a larger number of elements template -void stress_test_heap() { +void StressTestHeap() { HeapType heap; - const int num_items = 1000; + const int numItems = 1000; - for (int i = 0; i < num_items; ++i) { - heap.push(std::to_string(i), i); + for (int i = 0; i < numItems; ++i) { + heap.Push(std::to_string(i), i); } - for (int i = 0; i < num_items / 2; ++i) { - heap.update(std::to_string(i), i - num_items); + for (int i = 0; i < numItems / 2; ++i) { + heap.Update(std::to_string(i), i - numItems); } - std::vector popped_values; - while (!heap.is_empty()) { - popped_values.push_back(heap.get_value(heap.top())); - heap.pop(); + std::vector poppedValues; + while (!heap.IsEmpty()) { + poppedValues.push_back(heap.GetValue(heap.Top())); + heap.Pop(); } - BOOST_CHECK_EQUAL(popped_values.size(), num_items); - BOOST_CHECK(std::is_sorted(popped_values.begin(), popped_values.end())); + BOOST_CHECK_EQUAL(poppedValues.size(), numItems); + BOOST_CHECK(std::is_sorted(poppedValues.begin(), poppedValues.end())); } // Performance test suite for different heap workloads. template -void run_performance_test(const std::string &heap_name, size_t num_items, size_t num_updates, size_t num_random_ops) { - std::cout << "\n--- Performance Test for " << heap_name << " ---" << std::endl; +void RunPerformanceTest(const std::string &heapName, size_t numItems, size_t numUpdates, size_t numRandomOps) { + std::cout << "\n--- Performance Test for " << heapName << " ---" << std::endl; - std::vector keys(num_items); - std::vector priorities(num_items); + std::vector keys(numItems); + std::vector priorities(numItems); std::random_device rd; std::mt19937 gen(rd()); - std::uniform_int_distribution distrib(0, static_cast(num_items * 10)); + std::uniform_int_distribution distrib(0, static_cast(numItems * 10)); - for (size_t i = 0; i < num_items; ++i) { + for (size_t i = 0; i < numItems; ++i) { keys[i] = std::to_string(i); priorities[i] = distrib(gen); } @@ -365,133 +365,133 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t // Scenario 1: Bulk Insert auto start = std::chrono::high_resolution_clock::now(); - for (size_t i = 0; i < num_items; ++i) { - heap.push(keys[i], priorities[i]); + for (size_t i = 0; i < numItems; ++i) { + heap.Push(keys[i], priorities[i]); } auto end = std::chrono::high_resolution_clock::now(); std::chrono::duration duration = end - start; - std::cout << "Bulk Insert (" << num_items << " items): " << duration.count() << " ms" << std::endl; + std::cout << "Bulk Insert (" << numItems << " items): " << duration.count() << " ms" << std::endl; // Scenario 2: Decrease Key - std::uniform_int_distribution key_distrib(0, num_items - 1); - std::uniform_int_distribution<> dec_dist(1, 100); + std::uniform_int_distribution keyDistrib(0, numItems - 1); + std::uniform_int_distribution<> decDist(1, 100); start = std::chrono::high_resolution_clock::now(); - for (size_t i = 0; i < num_updates; ++i) { - size_t key_idx = key_distrib(gen); - int new_prio = heap.get_value(keys[key_idx]) - dec_dist(gen); - heap.update(keys[key_idx], new_prio); + for (size_t i = 0; i < numUpdates; ++i) { + size_t keyIdx = keyDistrib(gen); + int newPrio = heap.GetValue(keys[keyIdx]) - decDist(gen); + heap.Update(keys[keyIdx], newPrio); } end = std::chrono::high_resolution_clock::now(); duration = end - start; - std::cout << "Decrease Key (" << num_updates << " updates): " << duration.count() << " ms" << std::endl; + std::cout << "Decrease Key (" << numUpdates << " updates): " << duration.count() << " ms" << std::endl; // Scenario 3: Bulk Pop start = std::chrono::high_resolution_clock::now(); - while (!heap.is_empty()) { - heap.pop(); + while (!heap.IsEmpty()) { + heap.Pop(); } end = std::chrono::high_resolution_clock::now(); duration = end - start; - std::cout << "Bulk Pop (" << num_items << " items): " << duration.count() << " ms" << std::endl; + std::cout << "Bulk Pop (" << numItems << " items): " << duration.count() << " ms" << std::endl; - BOOST_CHECK(heap.is_empty()); + BOOST_CHECK(heap.IsEmpty()); // Scenario 4: Random Operations (Push, Erase, Update) - heap.clear(); - std::vector present_keys; - present_keys.reserve(num_items); - std::vector key_in_heap(num_items, false); - std::uniform_int_distribution op_dist(0, 2); // 0: push, 1: erase, 2: update + heap.Clear(); + std::vector presentKeys; + presentKeys.reserve(numItems); + std::vector keyInHeap(numItems, false); + std::uniform_int_distribution opDist(0, 2); // 0: Push, 1: Erase, 2: Update start = std::chrono::high_resolution_clock::now(); - for (size_t i = 0; i < num_random_ops; ++i) { - int op = op_dist(gen); - if (op == 0 || present_keys.empty()) { // Push - size_t key_idx = key_distrib(gen); - if (!key_in_heap[key_idx]) { - heap.push(keys[key_idx], priorities[key_idx]); - present_keys.push_back(keys[key_idx]); - key_in_heap[key_idx] = true; + for (size_t i = 0; i < numRandomOps; ++i) { + int op = opDist(gen); + if (op == 0 || presentKeys.empty()) { // Push + size_t keyIdx = keyDistrib(gen); + if (!keyInHeap[keyIdx]) { + heap.Push(keys[keyIdx], priorities[keyIdx]); + presentKeys.push_back(keys[keyIdx]); + keyInHeap[keyIdx] = true; } } else { // Erase or Update - std::uniform_int_distribution present_key_dist(0, present_keys.size() - 1); - size_t present_key_vec_idx = present_key_dist(gen); - std::string key_to_op = present_keys[present_key_vec_idx]; + std::uniform_int_distribution presentKeyDist(0, presentKeys.size() - 1); + size_t presentKeyVecIdx = presentKeyDist(gen); + std::string keyToOp = presentKeys[presentKeyVecIdx]; if (op == 1) { // Erase a random element - heap.erase(key_to_op); - key_in_heap[std::stoul(key_to_op)] = false; - std::swap(present_keys[present_key_vec_idx], present_keys.back()); - present_keys.pop_back(); + heap.Erase(keyToOp); + keyInHeap[std::stoul(keyToOp)] = false; + std::swap(presentKeys[presentKeyVecIdx], presentKeys.back()); + presentKeys.pop_back(); } else { // op == 2, Update a random element (decrease key) - int new_prio = heap.get_value(key_to_op) - dec_dist(gen); - heap.update(key_to_op, new_prio); + int newPrio = heap.GetValue(keyToOp) - decDist(gen); + heap.Update(keyToOp, newPrio); } } } end = std::chrono::high_resolution_clock::now(); duration = end - start; - std::cout << "Random Ops (" << num_random_ops << " ops of push/erase/update): " << duration.count() << " ms" << std::endl; + std::cout << "Random Ops (" << numRandomOps << " ops of Push/Erase/Update): " << duration.count() << " ms" << std::endl; // Scenario 5: Mixed Workload with Re-initialization - const size_t num_outer_loops_s5 = 500; - const size_t num_inner_loops_s5 = 10; - const size_t num_initial_pushes_s5 = 100; - const size_t num_pushes_per_iter_s5 = 25; - const size_t num_updates_per_iter_s5 = 25; + const size_t numOuterLoopsS5 = 500; + const size_t numInnerLoopsS5 = 10; + const size_t numInitialPushesS5 = 100; + const size_t numPushesPerIterS5 = 25; + const size_t numUpdatesPerIterS5 = 25; // A large pool of keys to draw from for pushes, to avoid collisions. - const size_t key_pool_size_s5 = num_outer_loops_s5 * (num_initial_pushes_s5 + num_inner_loops_s5 * num_pushes_per_iter_s5); - std::vector keys_s5(key_pool_size_s5); - std::vector priorities_s5(key_pool_size_s5); - for (size_t i = 0; i < key_pool_size_s5; ++i) { - keys_s5[i] = "s5_" + std::to_string(i); - priorities_s5[i] = distrib(gen); + const size_t keyPoolSizeS5 = numOuterLoopsS5 * (numInitialPushesS5 + numInnerLoopsS5 * numPushesPerIterS5); + std::vector keysS5(keyPoolSizeS5); + std::vector prioritiesS5(keyPoolSizeS5); + for (size_t i = 0; i < keyPoolSizeS5; ++i) { + keysS5[i] = "s5_" + std::to_string(i); + prioritiesS5[i] = distrib(gen); } - size_t key_idx_counter_s5 = 0; + size_t keyIdxCounterS5 = 0; start = std::chrono::high_resolution_clock::now(); - for (size_t outer_i = 0; outer_i < num_outer_loops_s5; ++outer_i) { - heap.clear(); - std::vector present_keys_s5; - present_keys_s5.reserve(num_initial_pushes_s5 + num_inner_loops_s5 * (num_pushes_per_iter_s5 - 1)); - - // Initial push - for (size_t i = 0; i < num_initial_pushes_s5; ++i) { - const auto &key = keys_s5[key_idx_counter_s5]; - heap.push(key, priorities_s5[key_idx_counter_s5]); - present_keys_s5.push_back(key); - key_idx_counter_s5++; + for (size_t outerI = 0; outerI < numOuterLoopsS5; ++outerI) { + heap.Clear(); + std::vector presentKeysS5; + presentKeysS5.reserve(numInitialPushesS5 + numInnerLoopsS5 * (numPushesPerIterS5 - 1)); + + // Initial Push + for (size_t i = 0; i < numInitialPushesS5; ++i) { + const auto &key = keysS5[keyIdxCounterS5]; + heap.Push(key, prioritiesS5[keyIdxCounterS5]); + presentKeysS5.push_back(key); + keyIdxCounterS5++; } - for (size_t inner_i = 0; inner_i < num_inner_loops_s5; ++inner_i) { + for (size_t innerI = 0; innerI < numInnerLoopsS5; ++innerI) { // 1. Pop once - if (!heap.is_empty()) { - std::string popped_key = heap.pop(); + if (!heap.IsEmpty()) { + std::string poppedKey = heap.Pop(); // Remove from present_keys_s5 efficiently - auto it = std::find(present_keys_s5.begin(), present_keys_s5.end(), popped_key); - if (it != present_keys_s5.end()) { - std::swap(*it, present_keys_s5.back()); - present_keys_s5.pop_back(); + auto it = std::find(presentKeysS5.begin(), presentKeysS5.end(), poppedKey); + if (it != presentKeysS5.end()) { + std::swap(*it, presentKeysS5.back()); + presentKeysS5.pop_back(); } } // 2. Push 25 keys - for (size_t j = 0; j < num_pushes_per_iter_s5; ++j) { - const auto &key = keys_s5[key_idx_counter_s5]; - heap.push(key, priorities_s5[key_idx_counter_s5]); - present_keys_s5.push_back(key); - key_idx_counter_s5++; + for (size_t j = 0; j < numPushesPerIterS5; ++j) { + const auto &key = keysS5[keyIdxCounterS5]; + heap.Push(key, prioritiesS5[keyIdxCounterS5]); + presentKeysS5.push_back(key); + keyIdxCounterS5++; } // 3. Update 25 keys - if (!present_keys_s5.empty()) { - std::uniform_int_distribution present_key_dist(0, present_keys_s5.size() - 1); - for (size_t j = 0; j < num_updates_per_iter_s5; ++j) { - const auto &key_to_update = present_keys_s5[present_key_dist(gen)]; - heap.update(key_to_update, heap.get_value(key_to_update) - dec_dist(gen)); + if (!presentKeysS5.empty()) { + std::uniform_int_distribution presentKeyDist(0, presentKeysS5.size() - 1); + for (size_t j = 0; j < numUpdatesPerIterS5; ++j) { + const auto &keyToUpdate = presentKeysS5[presentKeyDist(gen)]; + heap.Update(keyToUpdate, heap.GetValue(keyToUpdate) - decDist(gen)); } } } @@ -499,56 +499,56 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t end = std::chrono::high_resolution_clock::now(); duration = end - start; - std::cout << "Mixed Re-Init (" << num_outer_loops_s5 << " runs of init + " << num_inner_loops_s5 - << "x(pop/push/update)): " << duration.count() << " ms" << std::endl; + std::cout << "Mixed Re-Init (" << numOuterLoopsS5 << " runs of init + " << numInnerLoopsS5 + << "x(Pop/Push/Update)): " << duration.count() << " ms" << std::endl; } -BOOST_AUTO_TEST_SUITE(HeapTests) +BOOST_AUTO_TEST_SUITE(heap_tests) -BOOST_AUTO_TEST_CASE(PairingHeapTest) { test_min_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(PairingHeapTest) { TestMinHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(MaxPairingHeapTest) { test_max_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(MaxPairingHeapTest) { TestMaxHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(PairingHeapStressTest) { stress_test_heap>(); } +BOOST_AUTO_TEST_CASE(PairingHeapStressTest) { StressTestHeap>(); } -BOOST_AUTO_TEST_CASE(BoostFibonacciHeapTest) { test_min_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(BoostFibonacciHeapTest) { TestMinHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(MaxBoostFibonacciHeapTest) { test_max_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(MaxBoostFibonacciHeapTest) { TestMaxHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(BoostFibonacciHeapStressTest) { stress_test_heap>(); } +BOOST_AUTO_TEST_CASE(BoostFibonacciHeapStressTest) { StressTestHeap>(); } -BOOST_AUTO_TEST_CASE(StdSetHeapTest) { test_min_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(StdSetHeapTest) { TestMinHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(MaxStdSetHeapTest) { test_max_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(MaxStdSetHeapTest) { TestMaxHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(StdSetHeapStressTest) { stress_test_heap>(); } +BOOST_AUTO_TEST_CASE(StdSetHeapStressTest) { StressTestHeap>(); } -BOOST_AUTO_TEST_CASE(DaryHeap_D2_Test) { test_min_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(DaryHeapD2Test) { TestMinHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(MaxDaryHeap_D2_Test) { test_max_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(MaxDaryHeapD2Test) { TestMaxHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(DaryHeap_D2_StressTest) { stress_test_heap>(); } +BOOST_AUTO_TEST_CASE(DaryHeapD2StressTest) { StressTestHeap>(); } -BOOST_AUTO_TEST_CASE(DaryHeap_D4_Test) { test_min_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(DaryHeapD4Test) { TestMinHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(MaxDaryHeap_D4_Test) { test_max_heap_functionality>(); } +BOOST_AUTO_TEST_CASE(MaxDaryHeapD4Test) { TestMaxHeapFunctionality>(); } -BOOST_AUTO_TEST_CASE(DaryHeap_D4_StressTest) { stress_test_heap>(); } +BOOST_AUTO_TEST_CASE(DaryHeapD4StressTest) { StressTestHeap>(); } BOOST_AUTO_TEST_SUITE_END() -BOOST_AUTO_TEST_SUITE(HeapPerformanceTests) +BOOST_AUTO_TEST_SUITE(heap_performance_tests) BOOST_AUTO_TEST_CASE(HeapPerformanceComparison) { - const size_t num_items = 10000; - const size_t num_updates = 5000; - const size_t num_random_ops = 40000; - - run_performance_test>("Pairing Heap", num_items, num_updates, num_random_ops); - run_performance_test>("Boost Fibonacci Heap", num_items, num_updates, num_random_ops); - run_performance_test>("std::set", num_items, num_updates, num_random_ops); - run_performance_test>("Binary Heap (d=2)", num_items, num_updates, num_random_ops); - run_performance_test>("4-ary Heap (d=4)", num_items, num_updates, num_random_ops); - run_performance_test>("8-ary Heap (d=8)", num_items, num_updates, num_random_ops); + const size_t numItems = 10000; + const size_t numUpdates = 5000; + const size_t numRandomOps = 40000; + + RunPerformanceTest>("Pairing Heap", numItems, numUpdates, numRandomOps); + RunPerformanceTest>("Boost Fibonacci Heap", numItems, numUpdates, numRandomOps); + RunPerformanceTest>("std::set", numItems, numUpdates, numRandomOps); + RunPerformanceTest>("Binary Heap (d=2)", numItems, numUpdates, numRandomOps); + RunPerformanceTest>("4-ary Heap (d=4)", numItems, numUpdates, numRandomOps); + RunPerformanceTest>("8-ary Heap (d=8)", numItems, numUpdates, numRandomOps); } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/heavy_edge_preprocessing.cpp b/tests/heavy_edge_preprocessing.cpp index 6fcda0c2..2c1f3e33 100644 --- a/tests/heavy_edge_preprocessing.cpp +++ b/tests/heavy_edge_preprocessing.cpp @@ -12,9 +12,9 @@ using namespace osp; BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) { - using Graph_t = boost_graph_int_t; + using GraphT = BoostGraphIntT; - std::vector filenames_graph = test_graphs(); + std::vector filenamesGraph = TestGraphs(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -24,34 +24,34 @@ BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1, filename_graph.find_last_of(".")); + for (auto &filenameGraph : filenamesGraph) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1, filenameGraph.find_last_of(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << nameGraph << std::endl; - Graph_t graph; + GraphT graph; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), graph); + bool statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), graph); - if (!status_graph) { + if (!statusGraph) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } int weight = 0; - for (const auto &e : edges(graph)) { - graph.set_edge_comm_weight(e, 1 + (weight + 100 % 500)); + for (const auto &e : Edges(graph)) { + graph.SetEdgeCommWeight(e, 1 + (weight + 100 % 500)); } - auto partition = heavy_edge_preprocess(graph, 5.0, 0.7f, 0.34f); - std::vector vertex_in_partition(graph.num_vertices(), false); + auto partition = HeavyEdgePreprocess(graph, 5.0, 0.7f, 0.34f); + std::vector vertexInPartition(graph.NumVertices(), false); for (const auto &part : partition) { for (const auto &vert : part) { - BOOST_CHECK(!vertex_in_partition[vert]); - vertex_in_partition[vert] = true; + BOOST_CHECK(!vertexInPartition[vert]); + vertexInPartition[vert] = true; } } - for (const bool value : vertex_in_partition) { + for (const bool value : vertexInPartition) { BOOST_CHECK(value); } } diff --git a/tests/hill_climbing.cpp b/tests/hill_climbing.cpp index 3bbfcde1..d9a0d17b 100644 --- a/tests/hill_climbing.cpp +++ b/tests/hill_climbing.cpp @@ -30,13 +30,13 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(hill_climbing) { - using graph = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(HillClimbing) { + using Graph = ComputationalDagVectorImplDefUnsignedT; - BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + BspInstance instance; + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -46,43 +46,43 @@ BOOST_AUTO_TEST_CASE(hill_climbing) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB( + (cwd / "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); - GreedyBspScheduler greedy; - BspSchedule bsp_initial(instance); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(bsp_initial)); - BOOST_CHECK_EQUAL(bsp_initial.satisfiesPrecedenceConstraints(), true); - - HillClimbingScheduler scheduler; - BspSchedule schedule1 = bsp_initial; - scheduler.improveSchedule(schedule1); - BOOST_CHECK_EQUAL(schedule1.satisfiesPrecedenceConstraints(), true); - - scheduler.setSteepestAscend(true); - BspSchedule schedule2 = bsp_initial; - scheduler.improveSchedule(schedule2); - BOOST_CHECK_EQUAL(schedule2.satisfiesPrecedenceConstraints(), true); - - BspSchedule schedule3 = bsp_initial; - scheduler.setTimeLimitSeconds(1U); - scheduler.improveScheduleWithTimeLimit(schedule3); - BOOST_CHECK_EQUAL(schedule3.satisfiesPrecedenceConstraints(), true); - - BspSchedule schedule4 = bsp_initial; - scheduler.improveScheduleWithStepLimit(schedule4, 5); - BOOST_CHECK_EQUAL(schedule4.satisfiesPrecedenceConstraints(), true); + GreedyBspScheduler greedy; + BspSchedule bspInitial(instance); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, greedy.ComputeSchedule(bspInitial)); + BOOST_CHECK_EQUAL(bspInitial.SatisfiesPrecedenceConstraints(), true); + + HillClimbingScheduler scheduler; + BspSchedule schedule1 = bspInitial; + scheduler.ImproveSchedule(schedule1); + BOOST_CHECK_EQUAL(schedule1.SatisfiesPrecedenceConstraints(), true); + + scheduler.SetSteepestAscend(true); + BspSchedule schedule2 = bspInitial; + scheduler.ImproveSchedule(schedule2); + BOOST_CHECK_EQUAL(schedule2.SatisfiesPrecedenceConstraints(), true); + + BspSchedule schedule3 = bspInitial; + scheduler.SetTimeLimitSeconds(1U); + scheduler.ImproveScheduleWithTimeLimit(schedule3); + BOOST_CHECK_EQUAL(schedule3.SatisfiesPrecedenceConstraints(), true); + + BspSchedule schedule4 = bspInitial; + scheduler.ImproveScheduleWithStepLimit(schedule4, 5); + BOOST_CHECK_EQUAL(schedule4.SatisfiesPrecedenceConstraints(), true); } -BOOST_AUTO_TEST_CASE(hill_climbing_for_comm_schedule) { - using graph = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(HillClimbingForCommSchedule) { + using Graph = ComputationalDagVectorImplDefUnsignedT; - BspInstance instance; - instance.setNumberOfProcessors(2); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + BspInstance instance; + instance.SetNumberOfProcessors(2); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -92,33 +92,35 @@ BOOST_AUTO_TEST_CASE(hill_climbing_for_comm_schedule) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB( + (cwd / "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); - GreedyBspScheduler greedy; - BspSchedule initial(instance); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(initial)); - BOOST_CHECK_EQUAL(initial.satisfiesPrecedenceConstraints(), true); - - HillClimbingScheduler hc; - hc.improveSchedule(initial); - BOOST_CHECK_EQUAL(initial.satisfiesPrecedenceConstraints(), true); - - BspSchedule schedule = initial; - BspScheduleCS initial_cs(std::move(initial)); - // initial_cs.setAutoCommunicationSchedule(); - initial_cs.setEagerCommunicationSchedule(); - BOOST_CHECK_EQUAL(initial_cs.hasValidCommSchedule(), true); - - HillClimbingForCommSteps hc_cs; - BspScheduleCS schedule1 = initial_cs; - hc_cs.improveSchedule(schedule1); - BOOST_CHECK_EQUAL(schedule1.hasValidCommSchedule(), true); - - BspScheduleCS schedule2 = initial_cs; - hc_cs.setSteepestAscend(true); - hc_cs.improveSchedule(schedule2); - BOOST_CHECK_EQUAL(schedule2.hasValidCommSchedule(), true); + GreedyBspScheduler greedy; + BspSchedule initial(instance); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, greedy.ComputeSchedule(initial)); + BOOST_CHECK_EQUAL(initial.SatisfiesPrecedenceConstraints(), true); + + HillClimbingScheduler hc; + hc.ImproveSchedule(initial); + BOOST_CHECK_EQUAL(initial.SatisfiesPrecedenceConstraints(), true); + + BspSchedule schedule = initial; + BspScheduleCS initialCs(std::move(initial)); + // initial_cs.SetAutoCommunicationSchedule(); + BOOST_CHECK(initialCs.SatisfiesPrecedenceConstraints()); + initialCs.UpdateNumberOfSupersteps(); + initialCs.SetEagerCommunicationSchedule(); + BOOST_CHECK_EQUAL(initialCs.HasValidCommSchedule(), true); + + HillClimbingForCommSteps hcCs; + BspScheduleCS schedule1 = initialCs; + hcCs.ImproveSchedule(schedule1); + BOOST_CHECK_EQUAL(schedule1.HasValidCommSchedule(), true); + + BspScheduleCS schedule2 = initialCs; + hcCs.SetSteepestAscend(true); + hcCs.ImproveSchedule(schedule2); + BOOST_CHECK_EQUAL(schedule2.HasValidCommSchedule(), true); } diff --git a/tests/hypergraph_and_partition.cpp b/tests/hypergraph_and_partition.cpp index 4d934454..9e42f9b5 100644 --- a/tests/hypergraph_and_partition.cpp +++ b/tests/hypergraph_and_partition.cpp @@ -33,9 +33,9 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { - using graph = computational_dag_vector_impl_def_int_t; - using hypergraph = Hypergraph_def_t; +BOOST_AUTO_TEST_CASE(HypergraphAndPartitionTest) { + using Graph = ComputationalDagVectorImplDefIntT; + using HypergraphImpl = HypergraphDefT; // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -45,176 +45,176 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { std::cout << cwd << std::endl; } - graph DAG; + Graph dag; - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), dag); BOOST_CHECK(status); - hypergraph Hgraph; + HypergraphImpl hgraph; // Matrix format, one hyperedge for each row/column - status = file_reader::readHypergraphMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), Hgraph); + status = file_reader::ReadHypergraphMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), hgraph); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(Hgraph.num_vertices(), 27); - BOOST_CHECK_EQUAL(Hgraph.num_hyperedges(), 16); + BOOST_CHECK_EQUAL(hgraph.NumVertices(), 27); + BOOST_CHECK_EQUAL(hgraph.NumHyperedges(), 16); // DAG format, all hyperedges have size 2 - Hgraph = convert_from_cdag_as_dag(DAG); - BOOST_CHECK_EQUAL(DAG.num_vertices(), Hgraph.num_vertices()); - BOOST_CHECK_EQUAL(DAG.num_edges(), Hgraph.num_hyperedges()); - BOOST_CHECK_EQUAL(DAG.num_edges() * 2, Hgraph.num_pins()); + hgraph = ConvertFromCdagAsDag(dag); + BOOST_CHECK_EQUAL(dag.NumVertices(), hgraph.NumVertices()); + BOOST_CHECK_EQUAL(dag.NumEdges(), hgraph.NumHyperedges()); + BOOST_CHECK_EQUAL(dag.NumEdges() * 2, hgraph.NumPins()); // HyperDAG format, one hypredge for each non-sink node - unsigned nr_of_non_sinks = 0; - for (const auto &node : DAG.vertices()) { - if (DAG.out_degree(node) > 0) { - ++nr_of_non_sinks; + unsigned nrOfNonSinks = 0; + for (const auto &node : dag.Vertices()) { + if (dag.OutDegree(node) > 0) { + ++nrOfNonSinks; } } - Hgraph = convert_from_cdag_as_hyperdag(DAG); - BOOST_CHECK_EQUAL(DAG.num_vertices(), Hgraph.num_vertices()); - BOOST_CHECK_EQUAL(nr_of_non_sinks, Hgraph.num_hyperedges()); - BOOST_CHECK_EQUAL(DAG.num_edges() + nr_of_non_sinks, Hgraph.num_pins()); + hgraph = ConvertFromCdagAsHyperdag(dag); + BOOST_CHECK_EQUAL(dag.NumVertices(), hgraph.NumVertices()); + BOOST_CHECK_EQUAL(nrOfNonSinks, hgraph.NumHyperedges()); + BOOST_CHECK_EQUAL(dag.NumEdges() + nrOfNonSinks, hgraph.NumPins()); // Dummy partitioning - PartitioningProblem instance(Hgraph, 3, 30); + PartitioningProblem instance(hgraph, 3, 30); Partitioning partition(instance); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition.setAssignedPartition(node, node % 3); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partition.SetAssignedPartition(node, node % 3); } - BOOST_CHECK(partition.satisfiesBalanceConstraint()); - int cutNetCost = partition.computeCutNetCost(); - int connectivityCost = partition.computeConnectivityCost(); + BOOST_CHECK(partition.SatisfiesBalanceConstraint()); + int cutNetCost = partition.ComputeCutNetCost(); + int connectivityCost = partition.ComputeConnectivityCost(); BOOST_CHECK(connectivityCost >= cutNetCost); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - instance.getHypergraph().set_vertex_work_weight(node, 1); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + instance.GetHypergraph().SetVertexWorkWeight(node, 1); } - instance.setMaxWorkWeightViaImbalanceFactor(0); - BOOST_CHECK(partition.satisfiesBalanceConstraint()); + instance.SetMaxWorkWeightViaImbalanceFactor(0); + BOOST_CHECK(partition.SatisfiesBalanceConstraint()); - instance.setNumberOfPartitions(5); - instance.setMaxWorkWeightViaImbalanceFactor(0); - BOOST_CHECK(!partition.satisfiesBalanceConstraint()); + instance.SetNumberOfPartitions(5); + instance.SetMaxWorkWeightViaImbalanceFactor(0); + BOOST_CHECK(!partition.SatisfiesBalanceConstraint()); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition.setAssignedPartition(node, node % 5); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partition.SetAssignedPartition(node, node % 5); } - BOOST_CHECK(partition.satisfiesBalanceConstraint()); - BOOST_CHECK(partition.computeConnectivityCost() >= partition.computeCutNetCost()); + BOOST_CHECK(partition.SatisfiesBalanceConstraint()); + BOOST_CHECK(partition.ComputeConnectivityCost() >= partition.ComputeCutNetCost()); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - instance.getHypergraph().set_vertex_memory_weight(node, 1); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + instance.GetHypergraph().SetVertexMemoryWeight(node, 1); } - instance.setMaxMemoryWeightExplicitly(10); - BOOST_CHECK(partition.satisfiesBalanceConstraint() == false); - instance.setMaxMemoryWeightExplicitly(std::numeric_limits::max()); + instance.SetMaxMemoryWeightExplicitly(10); + BOOST_CHECK(partition.SatisfiesBalanceConstraint() == false); + instance.SetMaxMemoryWeightExplicitly(std::numeric_limits::max()); - file_writer::write_txt(std::cout, partition); + file_writer::WriteTxt(std::cout, partition); // Dummy partitioning with replication - instance.setHypergraph(convert_from_cdag_as_hyperdag(DAG)); - instance.setNumberOfPartitions(3); - instance.setMaxWorkWeightExplicitly(30); - PartitioningWithReplication partition_with_rep(instance); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition_with_rep.setAssignedPartitions(node, {node % 3}); + instance.SetHypergraph(ConvertFromCdagAsHyperdag(dag)); + instance.SetNumberOfPartitions(3); + instance.SetMaxWorkWeightExplicitly(30); + PartitioningWithReplication partitionWithRep(instance); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partitionWithRep.SetAssignedPartitions(node, {node % 3}); } - BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_with_rep.computeCutNetCost() == cutNetCost); - BOOST_CHECK(partition_with_rep.computeConnectivityCost() == connectivityCost); + BOOST_CHECK(partitionWithRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionWithRep.ComputeCutNetCost() == cutNetCost); + BOOST_CHECK(partitionWithRep.ComputeConnectivityCost() == connectivityCost); - instance.setMaxWorkWeightExplicitly(60); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition_with_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3}); + instance.SetMaxWorkWeightExplicitly(60); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partitionWithRep.SetAssignedPartitions(node, {node % 3, (node + 1) % 3}); } - BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_with_rep.computeConnectivityCost() >= partition_with_rep.computeCutNetCost()); + BOOST_CHECK(partitionWithRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionWithRep.ComputeConnectivityCost() >= partitionWithRep.ComputeCutNetCost()); - instance.setMaxWorkWeightExplicitly(compute_total_vertex_work_weight(Hgraph)); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition_with_rep.setAssignedPartitions(node, {0, 1, 2}); + instance.SetMaxWorkWeightExplicitly(ComputeTotalVertexWorkWeight(hgraph)); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partitionWithRep.SetAssignedPartitions(node, {0, 1, 2}); } - BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_with_rep.computeConnectivityCost() == 0); - BOOST_CHECK(partition_with_rep.computeCutNetCost() == 0); + BOOST_CHECK(partitionWithRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionWithRep.ComputeConnectivityCost() == 0); + BOOST_CHECK(partitionWithRep.ComputeCutNetCost() == 0); - file_writer::write_txt(std::cout, partition_with_rep); + file_writer::WriteTxt(std::cout, partitionWithRep); // Generic FM - instance.setNumberOfPartitions(2); - instance.setMaxWorkWeightExplicitly(35); - for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { - instance.getHypergraph().set_vertex_work_weight(node, 1); + instance.SetNumberOfPartitions(2); + instance.SetMaxWorkWeightExplicitly(35); + for (unsigned node = 0; node < instance.GetHypergraph().NumVertices(); ++node) { + instance.GetHypergraph().SetVertexWorkWeight(node, 1); } - Partitioning partition_to_improve(instance); - for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { - partition_to_improve.setAssignedPartition(node, node % 2); + Partitioning partitionToImprove(instance); + for (unsigned node = 0; node < instance.GetHypergraph().NumVertices(); ++node) { + partitionToImprove.SetAssignedPartition(node, node % 2); } - int original_cost = partition_to_improve.computeConnectivityCost(); + int originalCost = partitionToImprove.ComputeConnectivityCost(); - GenericFM fm; - fm.ImprovePartitioning(partition_to_improve); - int new_cost = partition_to_improve.computeConnectivityCost(); + GenericFM fm; + fm.ImprovePartitioning(partitionToImprove); + int newCost = partitionToImprove.ComputeConnectivityCost(); - BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint()); - BOOST_CHECK(new_cost <= original_cost); - std::cout << original_cost << " --> " << new_cost << std::endl; + BOOST_CHECK(partitionToImprove.SatisfiesBalanceConstraint()); + BOOST_CHECK(newCost <= originalCost); + std::cout << originalCost << " --> " << newCost << std::endl; - graph larger_DAG; - file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(), - larger_DAG); - instance.setHypergraph(convert_from_cdag_as_hyperdag(larger_DAG)); + Graph largerDag; + file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(), + largerDag); + instance.SetHypergraph(ConvertFromCdagAsHyperdag(largerDag)); - instance.setMaxWorkWeightExplicitly(4000); - for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { - instance.getHypergraph().set_vertex_work_weight(node, 1); + instance.SetMaxWorkWeightExplicitly(4000); + for (unsigned node = 0; node < instance.GetHypergraph().NumVertices(); ++node) { + instance.GetHypergraph().SetVertexWorkWeight(node, 1); } - partition_to_improve.resetPartition(); - for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { - partition_to_improve.setAssignedPartition(node, node % 2); + partitionToImprove.ResetPartition(); + for (unsigned node = 0; node < instance.GetHypergraph().NumVertices(); ++node) { + partitionToImprove.SetAssignedPartition(node, node % 2); } - original_cost = partition_to_improve.computeConnectivityCost(); + originalCost = partitionToImprove.ComputeConnectivityCost(); - fm.setMaxNodesInPart(0); - fm.ImprovePartitioning(partition_to_improve); - new_cost = partition_to_improve.computeConnectivityCost(); + fm.SetMaxNodesInPart(0); + fm.ImprovePartitioning(partitionToImprove); + newCost = partitionToImprove.ComputeConnectivityCost(); - BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint()); - BOOST_CHECK(new_cost <= original_cost); - std::cout << original_cost << " --> " << new_cost << std::endl; + BOOST_CHECK(partitionToImprove.SatisfiesBalanceConstraint()); + BOOST_CHECK(newCost <= originalCost); + std::cout << originalCost << " --> " << newCost << std::endl; // Recursive FM - instance.setNumberOfPartitions(16); - instance.setMaxWorkWeightViaImbalanceFactor(0.3); + instance.SetNumberOfPartitions(16); + instance.SetMaxWorkWeightViaImbalanceFactor(0.3); - for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { - partition_to_improve.setAssignedPartition(node, node % 16); + for (unsigned node = 0; node < instance.GetHypergraph().NumVertices(); ++node) { + partitionToImprove.SetAssignedPartition(node, node % 16); } - original_cost = partition_to_improve.computeConnectivityCost(); + originalCost = partitionToImprove.ComputeConnectivityCost(); - fm.setMaxNodesInPart(0); - fm.RecursiveFM(partition_to_improve); - new_cost = partition_to_improve.computeConnectivityCost(); + fm.SetMaxNodesInPart(0); + fm.RecursiveFM(partitionToImprove); + newCost = partitionToImprove.ComputeConnectivityCost(); - BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint()); - BOOST_CHECK(new_cost <= original_cost); - std::cout << original_cost << " --> " << new_cost << std::endl; + BOOST_CHECK(partitionToImprove.SatisfiesBalanceConstraint()); + BOOST_CHECK(newCost <= originalCost); + std::cout << originalCost << " --> " << newCost << std::endl; } diff --git a/tests/ilp_bsp_scheduler.cpp b/tests/ilp_bsp_scheduler.cpp index 201dd393..1b7edcc0 100644 --- a/tests/ilp_bsp_scheduler.cpp +++ b/tests/ilp_bsp_scheduler.cpp @@ -35,13 +35,13 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_total) { - using graph = computational_dag_edge_idx_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(TestTotal) { + using graph = ComputationalDagEdgeIdxVectorImplDefT; BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -51,37 +51,37 @@ BOOST_AUTO_TEST_CASE(test_total) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_spmv_N6_nzP0d4.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB( + (cwd / "data/spaa/tiny/instance_spmv_N6_nzP0d4.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); - BspSchedule schedule_to(instance); + BspSchedule scheduleTo(instance); - TotalCommunicationScheduler scheduler_to; - scheduler_to.setTimeLimitSeconds(10); + TotalCommunicationScheduler schedulerTo; + schedulerTo.SetTimeLimitSeconds(10); - const auto result_to = scheduler_to.computeSchedule(schedule_to); - BOOST_CHECK(result_to == RETURN_STATUS::OSP_SUCCESS || result_to == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule_to.satisfiesPrecedenceConstraints()); + const auto resultTo = schedulerTo.ComputeSchedule(scheduleTo); + BOOST_CHECK(resultTo == ReturnStatus::OSP_SUCCESS || resultTo == ReturnStatus::BEST_FOUND); + BOOST_CHECK(scheduleTo.SatisfiesPrecedenceConstraints()); BspSchedule schedule(instance); TotalCommunicationScheduler scheduler; - scheduler.setTimeLimitSeconds(3600); - const auto result = scheduler.computeSchedule(schedule); + scheduler.SetTimeLimitSeconds(3600); + const auto result = scheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); -}; + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); +} -BOOST_AUTO_TEST_CASE(test_full) { - using graph = computational_dag_edge_idx_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(TestFull) { + using graph = ComputationalDagEdgeIdxVectorImplDefT; BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -91,117 +91,117 @@ BOOST_AUTO_TEST_CASE(test_full) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_spmv_N6_nzP0d4.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB( + (cwd / "data/spaa/tiny/instance_spmv_N6_nzP0d4.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); - BspScheduleCS schedule_to(instance); + BspScheduleCS scheduleTo(instance); - CoptFullScheduler scheduler_to; - scheduler_to.setTimeLimitSeconds(10); + CoptFullScheduler schedulerTo; + schedulerTo.SetTimeLimitSeconds(10); - const auto result_to = scheduler_to.computeScheduleCS(schedule_to); - BOOST_CHECK_EQUAL(RETURN_STATUS::BEST_FOUND, result_to); - BOOST_CHECK(schedule_to.satisfiesPrecedenceConstraints()); + const auto resultTo = schedulerTo.ComputeScheduleCS(scheduleTo); + BOOST_CHECK_EQUAL(ReturnStatus::BEST_FOUND, resultTo); + BOOST_CHECK(scheduleTo.SatisfiesPrecedenceConstraints()); - CoptFullScheduler scheduler_recomp; - BspScheduleRecomp schedule_recomp(instance); - scheduler_recomp.setTimeLimitSeconds(10); - scheduler_recomp.computeScheduleRecomp(schedule_recomp); - BOOST_CHECK(schedule_recomp.satisfiesConstraints()); + CoptFullScheduler schedulerRecomp; + BspScheduleRecomp scheduleRecomp(instance); + schedulerRecomp.SetTimeLimitSeconds(10); + schedulerRecomp.ComputeScheduleRecomp(scheduleRecomp); + BOOST_CHECK(scheduleRecomp.SatisfiesConstraints()); // WITH INITIALIZATION - BspSchedule schedule_init(instance); + BspSchedule scheduleInit(instance); GreedyBspScheduler greedy; - greedy.computeSchedule(schedule_init); - BOOST_CHECK(schedule_init.satisfiesPrecedenceConstraints()); - BspScheduleCS schedule_init_cs(schedule_init); - BOOST_CHECK(schedule_init_cs.hasValidCommSchedule()); + greedy.ComputeSchedule(scheduleInit); + BOOST_CHECK(scheduleInit.SatisfiesPrecedenceConstraints()); + BspScheduleCS scheduleInitCs(scheduleInit); + BOOST_CHECK(scheduleInitCs.HasValidCommSchedule()); // initialize with standard schedule, return standard schedule - CoptFullScheduler scheduler_init; - BspScheduleCS schedule_improved(instance); - scheduler_init.setTimeLimitSeconds(10); - scheduler_init.setInitialSolutionFromBspSchedule(schedule_init_cs); - const auto result_init = scheduler_init.computeScheduleCS(schedule_improved); - BOOST_CHECK_EQUAL(RETURN_STATUS::BEST_FOUND, result_init); - BOOST_CHECK(schedule_improved.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule_improved.hasValidCommSchedule()); + CoptFullScheduler schedulerInit; + BspScheduleCS scheduleImproved(instance); + schedulerInit.SetTimeLimitSeconds(10); + schedulerInit.SetInitialSolutionFromBspSchedule(scheduleInitCs); + const auto resultInit = schedulerInit.ComputeScheduleCS(scheduleImproved); + BOOST_CHECK_EQUAL(ReturnStatus::BEST_FOUND, resultInit); + BOOST_CHECK(scheduleImproved.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(scheduleImproved.HasValidCommSchedule()); // initialize with standard schedule, return recomputing schedule - CoptFullScheduler scheduler_init2(schedule_init_cs); - BspScheduleRecomp schedule_improved2(instance); - scheduler_init2.setTimeLimitSeconds(10); - const auto result_init2 = scheduler_init2.computeScheduleRecomp(schedule_improved2); - BOOST_CHECK_EQUAL(RETURN_STATUS::BEST_FOUND, result_init2); - BOOST_CHECK(schedule_improved2.satisfiesConstraints()); + CoptFullScheduler schedulerInit2(scheduleInitCs); + BspScheduleRecomp scheduleImproved2(instance); + schedulerInit2.SetTimeLimitSeconds(10); + const auto resultInit2 = schedulerInit2.ComputeScheduleRecomp(scheduleImproved2); + BOOST_CHECK_EQUAL(ReturnStatus::BEST_FOUND, resultInit2); + BOOST_CHECK(scheduleImproved2.SatisfiesConstraints()); // initialize with recomputing schedule, return recomputing schedule - BspScheduleRecomp schedule_improved3(instance), schedule_init3(schedule_init_cs); - CoptFullScheduler scheduler_init3(schedule_init3); - scheduler_init3.setTimeLimitSeconds(10); - const auto result_init3 = scheduler_init3.computeScheduleRecomp(schedule_improved3); - BOOST_CHECK_EQUAL(RETURN_STATUS::BEST_FOUND, result_init3); - BOOST_CHECK(schedule_improved3.satisfiesConstraints()); + BspScheduleRecomp scheduleImproved3(instance), scheduleInit3(scheduleInitCs); + CoptFullScheduler schedulerInit3(scheduleInit3); + schedulerInit3.SetTimeLimitSeconds(10); + const auto resultInit3 = schedulerInit3.ComputeScheduleRecomp(scheduleImproved3); + BOOST_CHECK_EQUAL(ReturnStatus::BEST_FOUND, resultInit3); + BOOST_CHECK(scheduleImproved3.SatisfiesConstraints()); // with vertex types - BspInstance instance_typed = instance; - instance_typed.getArchitecture().setProcessorType(0, 1); - instance_typed.getArchitecture().setProcessorType(1, 1); - for (vertex_idx_t node = 0; node < static_cast >(instance_typed.numberOfVertices()); ++node) { - instance_typed.getComputationalDag().set_vertex_type(node, node % 2); + BspInstance instanceTyped = instance; + instanceTyped.GetArchitecture().SetProcessorType(0, 1); + instanceTyped.GetArchitecture().SetProcessorType(1, 1); + for (VertexIdxT node = 0; node < static_cast >(instanceTyped.NumberOfVertices()); ++node) { + instanceTyped.GetComputationalDag().SetVertexType(node, node % 2); } - instance_typed.setDiagonalCompatibilityMatrix(2); - - BspSchedule schedule_typed(instance_typed); - greedy.computeSchedule(schedule_typed); - BOOST_CHECK(schedule_typed.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule_typed.satisfiesNodeTypeConstraints()); - - CoptFullScheduler scheduler_typed; - BspScheduleCS schedule_typed_cs(schedule_typed); - scheduler_typed.setTimeLimitSeconds(10); - scheduler_typed.setInitialSolutionFromBspSchedule(schedule_typed_cs); - const auto result_typed = scheduler_typed.computeSchedule(schedule_typed); - BOOST_CHECK_EQUAL(RETURN_STATUS::BEST_FOUND, result_typed); - BOOST_CHECK(schedule_typed.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule_typed.satisfiesNodeTypeConstraints()); + instanceTyped.SetDiagonalCompatibilityMatrix(2); + + BspSchedule scheduleTyped(instanceTyped); + greedy.ComputeSchedule(scheduleTyped); + BOOST_CHECK(scheduleTyped.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(scheduleTyped.SatisfiesNodeTypeConstraints()); + + CoptFullScheduler schedulerTyped; + BspScheduleCS scheduleTypedCs(scheduleTyped); + schedulerTyped.SetTimeLimitSeconds(10); + schedulerTyped.SetInitialSolutionFromBspSchedule(scheduleTypedCs); + const auto resultTyped = schedulerTyped.ComputeSchedule(scheduleTyped); + BOOST_CHECK_EQUAL(ReturnStatus::BEST_FOUND, resultTyped); + BOOST_CHECK(scheduleTyped.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(scheduleTyped.SatisfiesNodeTypeConstraints()); // with MaxBSP schedule - CoptFullScheduler scheduler_max; - MaxBspScheduleCS schedule_max(instance); - scheduler_max.setTimeLimitSeconds(10); - const auto result_max = scheduler_max.computeMaxBspScheduleCS(schedule_max); - BOOST_CHECK(result_max == RETURN_STATUS::OSP_SUCCESS || result_max == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule_max.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule_max.hasValidCommSchedule()); - - scheduler_max.setInitialSolutionFromBspSchedule(schedule_max); - const auto result_max2 = scheduler_max.computeMaxBspScheduleCS(schedule_max); - BOOST_CHECK(result_max2 == RETURN_STATUS::OSP_SUCCESS || result_max2 == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule_max.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule_max.hasValidCommSchedule()); + CoptFullScheduler schedulerMax; + MaxBspScheduleCS scheduleMax(instance); + schedulerMax.SetTimeLimitSeconds(10); + const auto resultMax = schedulerMax.ComputeMaxBspScheduleCs(scheduleMax); + BOOST_CHECK(resultMax == ReturnStatus::OSP_SUCCESS || resultMax == ReturnStatus::BEST_FOUND); + BOOST_CHECK(scheduleMax.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(scheduleMax.HasValidCommSchedule()); + + schedulerMax.SetInitialSolutionFromBspSchedule(scheduleMax); + const auto resultMax2 = schedulerMax.ComputeMaxBspScheduleCs(scheduleMax); + BOOST_CHECK(resultMax2 == ReturnStatus::OSP_SUCCESS || resultMax2 == ReturnStatus::BEST_FOUND); + BOOST_CHECK(scheduleMax.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(scheduleMax.HasValidCommSchedule()); // longer time BspScheduleCS schedule(instance); CoptFullScheduler scheduler; - scheduler.setTimeLimitSeconds(3600); - const auto result = scheduler.computeScheduleCS(schedule); + scheduler.SetTimeLimitSeconds(3600); + const auto result = scheduler.ComputeScheduleCS(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); -}; + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); +} -BOOST_AUTO_TEST_CASE(test_cs) { - using graph = computational_dag_edge_idx_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(TestCs) { + using graph = ComputationalDagEdgeIdxVectorImplDefT; BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -211,38 +211,38 @@ BOOST_AUTO_TEST_CASE(test_cs) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), - instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), + instance.GetComputationalDag()); BOOST_CHECK(status); BspSchedule schedule(instance); GreedyBspScheduler greedy; - greedy.computeSchedule(schedule); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BspScheduleCS schedule_cs(schedule); - BOOST_CHECK(schedule_cs.hasValidCommSchedule()); + greedy.ComputeSchedule(schedule); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BspScheduleCS scheduleCs(schedule); + BOOST_CHECK(scheduleCs.HasValidCommSchedule()); CoptCommScheduleOptimizer scheduler; - scheduler.setTimeLimitSeconds(10); - const auto before = schedule_cs.compute_cs_communication_costs(); - const auto result = scheduler.improveSchedule(schedule_cs); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - const auto after = schedule_cs.compute_cs_communication_costs(); + scheduler.SetTimeLimitSeconds(10); + const auto before = scheduleCs.ComputeCsCommunicationCosts(); + const auto result = scheduler.ImproveSchedule(scheduleCs); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + const auto after = scheduleCs.ComputeCsCommunicationCosts(); std::cout << before << " --cs--> " << after << std::endl; - BOOST_CHECK(schedule_cs.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule_cs.hasValidCommSchedule()); + BOOST_CHECK(scheduleCs.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(scheduleCs.HasValidCommSchedule()); BOOST_CHECK(before >= after); -}; +} -BOOST_AUTO_TEST_CASE(test_partial) { - using graph = computational_dag_edge_idx_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(TestPartial) { + using graph = ComputationalDagEdgeIdxVectorImplDefT; BspInstance instance; - instance.setNumberOfProcessors(3); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + instance.SetNumberOfProcessors(3); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -252,33 +252,33 @@ BOOST_AUTO_TEST_CASE(test_partial) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), - instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), + instance.GetComputationalDag()); BOOST_CHECK(status); - BspSchedule schedule_init(instance); + BspSchedule scheduleInit(instance); GreedyBspScheduler greedy; - greedy.computeSchedule(schedule_init); - BOOST_CHECK(schedule_init.satisfiesPrecedenceConstraints()); - BspScheduleCS schedule(schedule_init); - BOOST_CHECK(schedule.hasValidCommSchedule()); + greedy.ComputeSchedule(scheduleInit); + BOOST_CHECK(scheduleInit.SatisfiesPrecedenceConstraints()); + BspScheduleCS schedule(scheduleInit); + BOOST_CHECK(schedule.HasValidCommSchedule()); CoptPartialScheduler scheduler; - scheduler.setTimeLimitSeconds(10); - scheduler.setStartAndEndSuperstep(0, 2); - auto cost_before = schedule.computeCosts(); - auto result = scheduler.improveSchedule(schedule); - BOOST_CHECK(result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.hasValidCommSchedule()); - auto cost_mid = schedule.computeCosts(); - BOOST_CHECK(cost_mid <= cost_before); - scheduler.setStartAndEndSuperstep(2, 5); - result = scheduler.improveSchedule(schedule); - BOOST_CHECK(result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.hasValidCommSchedule()); - auto cost_after = schedule.computeCosts(); - BOOST_CHECK(cost_after <= cost_mid); -}; + scheduler.SetTimeLimitSeconds(10); + scheduler.SetStartAndEndSuperstep(0, 2); + auto costBefore = schedule.ComputeCosts(); + auto result = scheduler.ImproveSchedule(schedule); + BOOST_CHECK(result == ReturnStatus::OSP_SUCCESS || result == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.HasValidCommSchedule()); + auto costMid = schedule.ComputeCosts(); + BOOST_CHECK(costMid <= costBefore); + scheduler.SetStartAndEndSuperstep(2, 5); + result = scheduler.ImproveSchedule(schedule); + BOOST_CHECK(result == ReturnStatus::OSP_SUCCESS || result == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.HasValidCommSchedule()); + auto costAfter = schedule.ComputeCosts(); + BOOST_CHECK(costAfter <= costMid); +} diff --git a/tests/ilp_hypergraph_partitioning.cpp b/tests/ilp_hypergraph_partitioning.cpp index 636c6545..8596c849 100644 --- a/tests/ilp_hypergraph_partitioning.cpp +++ b/tests/ilp_hypergraph_partitioning.cpp @@ -28,9 +28,9 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_full) { - using graph = computational_dag_vector_impl_def_int_t; - using Hypergraph = Hypergraph_def_t; +BOOST_AUTO_TEST_CASE(TestFull) { + using graph = ComputationalDagVectorImplDefIntT; + using HypergraphImpl = HypergraphDefT; // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -40,98 +40,98 @@ BOOST_AUTO_TEST_CASE(test_full) { std::cout << cwd << std::endl; } - graph DAG; + graph dag; - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), dag); BOOST_CHECK(status); - Hypergraph Hgraph = convert_from_cdag_as_hyperdag(DAG); - BOOST_CHECK_EQUAL(DAG.num_vertices(), Hgraph.num_vertices()); + HypergraphImpl hgraph = ConvertFromCdagAsHyperdag(dag); + BOOST_CHECK_EQUAL(dag.NumVertices(), hgraph.NumVertices()); - PartitioningProblem instance(Hgraph, 3, 35); + PartitioningProblem instance(hgraph, 3, 35); Partitioning partition(instance); // ILP without replication - HypergraphPartitioningILP partitioner; - partitioner.setTimeLimitSeconds(60); - partitioner.computePartitioning(partition); + HypergraphPartitioningILP partitioner; + partitioner.SetTimeLimitSeconds(60); + partitioner.ComputePartitioning(partition); - BOOST_CHECK(partition.satisfiesBalanceConstraint()); - BOOST_CHECK(partition.computeConnectivityCost() >= partition.computeCutNetCost()); + BOOST_CHECK(partition.SatisfiesBalanceConstraint()); + BOOST_CHECK(partition.ComputeConnectivityCost() >= partition.ComputeCutNetCost()); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition.setAssignedPartition(node, node % 3); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partition.SetAssignedPartition(node, node % 3); } - partitioner.setUseInitialSolution(true); - partitioner.computePartitioning(partition); + partitioner.SetUseInitialSolution(true); + partitioner.ComputePartitioning(partition); - BOOST_CHECK(partition.satisfiesBalanceConstraint()); - int cutNetCost = partition.computeCutNetCost(), connectivityCost = partition.computeConnectivityCost(); + BOOST_CHECK(partition.SatisfiesBalanceConstraint()); + int cutNetCost = partition.ComputeCutNetCost(), connectivityCost = partition.ComputeConnectivityCost(); BOOST_CHECK(connectivityCost >= cutNetCost); - instance.setMaxMemoryWeightExplicitly(37); - partitioner.computePartitioning(partition); - BOOST_CHECK(partition.satisfiesBalanceConstraint()); - BOOST_CHECK(cutNetCost == partition.computeCutNetCost()); - BOOST_CHECK(connectivityCost == partition.computeConnectivityCost()); - instance.setMaxMemoryWeightExplicitly(std::numeric_limits::max()); + instance.SetMaxMemoryWeightExplicitly(37); + partitioner.ComputePartitioning(partition); + BOOST_CHECK(partition.SatisfiesBalanceConstraint()); + BOOST_CHECK(cutNetCost == partition.ComputeCutNetCost()); + BOOST_CHECK(connectivityCost == partition.ComputeConnectivityCost()); + instance.SetMaxMemoryWeightExplicitly(std::numeric_limits::max()); // ILP with replication - HypergraphPartitioningILPWithReplication partitioner_rep; - PartitioningWithReplication partition_rep(instance); + HypergraphPartitioningILPWithReplication partitionerRep; + PartitioningWithReplication partitionRep(instance); - partitioner_rep.setTimeLimitSeconds(60); - partitioner_rep.computePartitioning(partition_rep); + partitionerRep.SetTimeLimitSeconds(60); + partitionerRep.ComputePartitioning(partitionRep); - BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); + BOOST_CHECK(partitionRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionRep.ComputeConnectivityCost() == 0); - partitioner_rep.setUseInitialSolution(true); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition_rep.setAssignedPartitions(node, {node % 3}); + partitionerRep.SetUseInitialSolution(true); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partitionRep.SetAssignedPartitions(node, {node % 3}); } - partitioner_rep.computePartitioning(partition_rep); - BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); + partitionerRep.ComputePartitioning(partitionRep); + BOOST_CHECK(partitionRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionRep.ComputeConnectivityCost() == 0); - instance.setMaxWorkWeightExplicitly(60); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3}); + instance.SetMaxWorkWeightExplicitly(60); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partitionRep.SetAssignedPartitions(node, {node % 3, (node + 1) % 3}); } - partitioner_rep.computePartitioning(partition_rep); - BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); + partitionerRep.ComputePartitioning(partitionRep); + BOOST_CHECK(partitionRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionRep.ComputeConnectivityCost() == 0); // same tests with other replication formulation - instance.setMaxWorkWeightExplicitly(35); - partitioner_rep.setReplicationModel(HypergraphPartitioningILPWithReplication::REPLICATION_MODEL_IN_ILP::GENERAL); - partitioner_rep.setUseInitialSolution(false); - partitioner_rep.computePartitioning(partition_rep); + instance.SetMaxWorkWeightExplicitly(35); + partitionerRep.SetReplicationModel(HypergraphPartitioningILPWithReplication::ReplicationModelInIlp::GENERAL); + partitionerRep.SetUseInitialSolution(false); + partitionerRep.ComputePartitioning(partitionRep); - BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); + BOOST_CHECK(partitionRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionRep.ComputeConnectivityCost() == 0); - partitioner_rep.setUseInitialSolution(true); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition_rep.setAssignedPartitions(node, {node % 3}); + partitionerRep.SetUseInitialSolution(true); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partitionRep.SetAssignedPartitions(node, {node % 3}); } - partitioner_rep.computePartitioning(partition_rep); - BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); + partitionerRep.ComputePartitioning(partitionRep); + BOOST_CHECK(partitionRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionRep.ComputeConnectivityCost() == 0); - instance.setMaxWorkWeightExplicitly(60); - for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { - partition_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3}); + instance.SetMaxWorkWeightExplicitly(60); + for (unsigned node = 0; node < hgraph.NumVertices(); ++node) { + partitionRep.SetAssignedPartitions(node, {node % 3, (node + 1) % 3}); } - partitioner_rep.computePartitioning(partition_rep); - BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); - BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); -}; + partitionerRep.ComputePartitioning(partitionRep); + BOOST_CHECK(partitionRep.SatisfiesBalanceConstraint()); + BOOST_CHECK(partitionRep.ComputeConnectivityCost() == 0); +} diff --git a/tests/ilp_pebbling_scheduler.cpp b/tests/ilp_pebbling_scheduler.cpp index 0d8b810f..f5a82327 100644 --- a/tests/ilp_pebbling_scheduler.cpp +++ b/tests/ilp_pebbling_scheduler.cpp @@ -31,13 +31,13 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_full) { - using graph = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(TestFull) { + using graph = ComputationalDagVectorImplDefUnsignedT; BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -47,38 +47,37 @@ BOOST_AUTO_TEST_CASE(test_full) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_spmv_N6_nzP0d4.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB( + (cwd / "data/spaa/tiny/instance_spmv_N6_nzP0d4.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); GreedyBspScheduler greedy; - BspSchedule bsp_initial(instance); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(bsp_initial)); + BspSchedule bspInitial(instance); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, greedy.ComputeSchedule(bspInitial)); - std::vector > minimum_memory_required_vector - = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); - v_memw_t max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); - instance.getArchitecture().setMemoryBound(max_required); + std::vector > minimumMemoryRequiredVector = PebblingSchedule::MinimumMemoryRequiredPerNodeType(instance); + VMemwT maxRequired = *std::max_element(minimumMemoryRequiredVector.begin(), minimumMemoryRequiredVector.end()); + instance.GetArchitecture().SetMemoryBound(maxRequired); - PebblingSchedule initial_sol(bsp_initial, PebblingSchedule::CACHE_EVICTION_STRATEGY::FORESIGHT); - BOOST_CHECK(initial_sol.isValid()); + PebblingSchedule initialSol(bspInitial, PebblingSchedule::CacheEvictionStrategy::FORESIGHT); + BOOST_CHECK(initialSol.IsValid()); MultiProcessorPebbling mpp; - mpp.setTimeLimitSeconds(10); + mpp.SetTimeLimitSeconds(10); PebblingSchedule schedule(instance); - mpp.computePebblingWithInitialSolution(initial_sol, schedule); - schedule.cleanSchedule(); - BOOST_CHECK(schedule.isValid()); -}; + mpp.ComputePebblingWithInitialSolution(initialSol, schedule); + schedule.CleanSchedule(); + BOOST_CHECK(schedule.IsValid()); +} -BOOST_AUTO_TEST_CASE(test_partial) { - using graph = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(TestPartial) { + using graph = ComputationalDagVectorImplDefUnsignedT; BspInstance instance; - instance.setNumberOfProcessors(2); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + instance.SetNumberOfProcessors(2); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -88,20 +87,19 @@ BOOST_AUTO_TEST_CASE(test_partial) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB( + (cwd / "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); - std::vector > minimum_memory_required_vector - = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); - v_memw_t max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); - instance.getArchitecture().setMemoryBound(max_required); + std::vector > minimumMemoryRequiredVector = PebblingSchedule::MinimumMemoryRequiredPerNodeType(instance); + VMemwT maxRequired = *std::max_element(minimumMemoryRequiredVector.begin(), minimumMemoryRequiredVector.end()); + instance.GetArchitecture().SetMemoryBound(maxRequired); PebblingPartialILP mpp; - mpp.setMinSize(15); - mpp.setSecondsForSubILP(5); + mpp.SetMinSize(15); + mpp.SetSecondsForSubIlp(5); PebblingSchedule schedule(instance); - mpp.computePebbling(schedule); - BOOST_CHECK(schedule.isValid()); -}; + mpp.ComputePebbling(schedule); + BOOST_CHECK(schedule.IsValid()); +} diff --git a/tests/intpower.cpp b/tests/intpower.cpp index 53ab140d..01660c6a 100644 --- a/tests/intpower.cpp +++ b/tests/intpower.cpp @@ -24,33 +24,33 @@ limitations under the License. using namespace osp; BOOST_AUTO_TEST_CASE(IntegerPowers) { - BOOST_CHECK_EQUAL(intpow(0, 0), 1); - BOOST_CHECK_EQUAL(intpow(5, 0), 1); - BOOST_CHECK_EQUAL(intpow(9, 1), 9); - BOOST_CHECK_EQUAL(intpow(2, 10), 1024); - BOOST_CHECK_EQUAL(intpow(7, 3), 343); - BOOST_CHECK_EQUAL(intpow(1, 349), 1); - BOOST_CHECK_EQUAL(intpow(1, 4), 1); - BOOST_CHECK_EQUAL(intpow(3, 2), 9); - BOOST_CHECK_EQUAL(intpow(4, 3), 64); + BOOST_CHECK_EQUAL(Intpow(0, 0), 1); + BOOST_CHECK_EQUAL(Intpow(5, 0), 1); + BOOST_CHECK_EQUAL(Intpow(9, 1), 9); + BOOST_CHECK_EQUAL(Intpow(2, 10), 1024); + BOOST_CHECK_EQUAL(Intpow(7, 3), 343); + BOOST_CHECK_EQUAL(Intpow(1, 349), 1); + BOOST_CHECK_EQUAL(Intpow(1, 4), 1); + BOOST_CHECK_EQUAL(Intpow(3, 2), 9); + BOOST_CHECK_EQUAL(Intpow(4, 3), 64); } -BOOST_AUTO_TEST_CASE(Median_set) { +BOOST_AUTO_TEST_CASE(MedianSet) { std::set a({0, 10, 20}); std::set b({-5, 8, 10, 732}); std::set c({-5, 10, 9, 732}); - BOOST_CHECK_EQUAL(Get_Median(a), 10); - BOOST_CHECK_EQUAL(Get_Median(b), 9); - BOOST_CHECK_EQUAL(Get_Median(c), 9); + BOOST_CHECK_EQUAL(GetMedian(a), 10); + BOOST_CHECK_EQUAL(GetMedian(b), 9); + BOOST_CHECK_EQUAL(GetMedian(c), 9); } -BOOST_AUTO_TEST_CASE(Median_multiset) { +BOOST_AUTO_TEST_CASE(MedianMultiset) { std::multiset a({0, 10, 20, 10}); std::multiset b({0, 0, 1}); std::multiset c({2, 4, 7, 233}); - BOOST_CHECK_EQUAL(Get_Median(a), 10); - BOOST_CHECK_EQUAL(Get_Median(b), 0); - BOOST_CHECK_EQUAL(Get_Median(c), 5); + BOOST_CHECK_EQUAL(GetMedian(a), 10); + BOOST_CHECK_EQUAL(GetMedian(b), 0); + BOOST_CHECK_EQUAL(GetMedian(c), 5); } diff --git a/tests/isomorphic_subgraph_scheduler.cpp b/tests/isomorphic_subgraph_scheduler.cpp index e5abd52b..aeae479a 100644 --- a/tests/isomorphic_subgraph_scheduler.cpp +++ b/tests/isomorphic_subgraph_scheduler.cpp @@ -28,218 +28,218 @@ limitations under the License. using namespace osp; -using graph_t = computational_dag_vector_impl_def_t; -using constr_graph_t = computational_dag_vector_impl_def_t; +using GraphT = ComputationalDagVectorImplDefUnsignedT; +using ConstrGraphT = ComputationalDagVectorImplDefUnsignedT; -using group_t = typename OrbitGraphProcessor::Group; +using GroupT = typename OrbitGraphProcessor::Group; // A test class to expose private methods of IsomorphicSubgraphScheduler -template -class IsomorphicSubgraphSchedulerTester : public IsomorphicSubgraphScheduler { +template +class IsomorphicSubgraphSchedulerTester : public IsomorphicSubgraphScheduler { public: - using IsomorphicSubgraphScheduler::IsomorphicSubgraphScheduler; + using IsomorphicSubgraphScheduler::IsomorphicSubgraphScheduler; - void test_trim_subgraph_groups(std::vector &isomorphic_groups, - const BspInstance &instance, - std::vector &was_trimmed) { - this->trim_subgraph_groups(isomorphic_groups, instance, was_trimmed); + void TestTrimSubgraphGroups(std::vector &isomorphicGroups, + const BspInstance &instance, + std::vector &wasTrimmed) { + this->TrimSubgraphGroups(isomorphicGroups, instance, wasTrimmed); } - void test_schedule_isomorphic_group(const BspInstance &instance, - const std::vector &isomorphic_groups, - const SubgraphSchedule &sub_sched, - std::vector> &partition) { - this->schedule_isomorphic_group(instance, isomorphic_groups, sub_sched, partition); + void TestScheduleIsomorphicGroup(const BspInstance &instance, + const std::vector &isomorphicGroups, + const SubgraphSchedule &subSched, + std::vector> &partition) { + this->ScheduleIsomorphicGroup(instance, isomorphicGroups, subSched, partition); } }; -BOOST_AUTO_TEST_SUITE(IsomorphicSubgraphSchedulerTestSuite) +BOOST_AUTO_TEST_SUITE(isomorphic_subgraph_scheduler_test_suite) BOOST_AUTO_TEST_CASE(EmptyGraphTest) { - BspInstance instance; - instance.getArchitecture().setNumberOfProcessors(4); + BspInstance instance; + instance.GetArchitecture().SetNumberOfProcessors(4); - GreedyBspScheduler greedy_scheduler; - IsomorphicSubgraphScheduler iso_scheduler(greedy_scheduler); + GreedyBspScheduler greedyScheduler; + IsomorphicSubgraphScheduler isoScheduler(greedyScheduler); - auto partition = iso_scheduler.compute_partition(instance); + auto partition = isoScheduler.ComputePartition(instance); BOOST_CHECK(partition.empty()); } -BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_NoTrim) { - GreedyBspScheduler greedy_scheduler; - IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); +BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTestNoTrim) { + GreedyBspScheduler greedyScheduler; + IsomorphicSubgraphSchedulerTester tester(greedyScheduler); - BspInstance instance; - auto &dag = instance.getComputationalDag(); - dag.add_vertex(1, 1, 1, 0); // 0 - dag.add_vertex(1, 1, 1, 0); // 1 - dag.add_vertex(1, 1, 1, 0); // 2 - dag.add_vertex(1, 1, 1, 0); // 3 - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0}); // 8 processors of type 0 - instance.setDiagonalCompatibilityMatrix(1); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); + dag.AddVertex(1, 1, 1, 0); // 0 + dag.AddVertex(1, 1, 1, 0); // 1 + dag.AddVertex(1, 1, 1, 0); // 2 + dag.AddVertex(1, 1, 1, 0); // 3 + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0}); // 8 processors of type 0 + instance.SetDiagonalCompatibilityMatrix(1); // A single group with 4 subgraphs, each with 1 node. - std::vector iso_groups = {group_t{{{0}, {1}, {2}, {3}}}}; + std::vector isoGroups = {GroupT{{{0}, {1}, {2}, {3}}}}; - std::vector was_trimmed(iso_groups.size()); + std::vector wasTrimmed(isoGroups.size()); // Group size (4) is a divisor of processor count for type 0 (8), so no trim. - tester.test_trim_subgraph_groups(iso_groups, instance, was_trimmed); + tester.TestTrimSubgraphGroups(isoGroups, instance, wasTrimmed); - BOOST_REQUIRE_EQUAL(was_trimmed.size(), 1); - BOOST_CHECK(!was_trimmed[0]); - BOOST_CHECK_EQUAL(iso_groups.size(), 1); - BOOST_CHECK_EQUAL(iso_groups[0].subgraphs.size(), 4); // Still 4 subgraphs in the group + BOOST_REQUIRE_EQUAL(wasTrimmed.size(), 1); + BOOST_CHECK(!wasTrimmed[0]); + BOOST_CHECK_EQUAL(isoGroups.size(), 1); + BOOST_CHECK_EQUAL(isoGroups[0].subgraphs_.size(), 4); // Still 4 subgraphs in the group } -BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_WithTrim) { - GreedyBspScheduler greedy_scheduler; - IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); - tester.setAllowTrimmedScheduler(false); - - BspInstance instance; - auto &dag = instance.getComputationalDag(); - dag.add_vertex(10, 1, 1, 0); // 0 - dag.add_vertex(10, 1, 1, 0); // 1 - dag.add_vertex(10, 1, 1, 0); // 2 - dag.add_vertex(10, 1, 1, 0); // 3 - dag.add_vertex(10, 1, 1, 0); // 4 - dag.add_vertex(10, 1, 1, 0); // 5 - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0}); // 8 processors of type 0 - instance.setDiagonalCompatibilityMatrix(1); +BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTestWithTrim) { + GreedyBspScheduler greedyScheduler; + IsomorphicSubgraphSchedulerTester tester(greedyScheduler); + tester.SetAllowTrimmedScheduler(false); + + BspInstance instance; + auto &dag = instance.GetComputationalDag(); + dag.AddVertex(10, 1, 1, 0); // 0 + dag.AddVertex(10, 1, 1, 0); // 1 + dag.AddVertex(10, 1, 1, 0); // 2 + dag.AddVertex(10, 1, 1, 0); // 3 + dag.AddVertex(10, 1, 1, 0); // 4 + dag.AddVertex(10, 1, 1, 0); // 5 + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0}); // 8 processors of type 0 + instance.SetDiagonalCompatibilityMatrix(1); // 6 subgraphs, each with 1 node and work weight 10. - std::vector iso_groups = {group_t{{{0}, {1}, {2}, {3}, {4}, {5}}}}; + std::vector isoGroups = {GroupT{{{0}, {1}, {2}, {3}, {4}, {5}}}}; - std::vector was_trimmed(iso_groups.size()); + std::vector wasTrimmed(isoGroups.size()); // Group size (6) is not a divisor of processor count for type 0 (8). // gcd(6, 8) = 2. // merge_size = 6 / 2 = 3. // The 6 subgraphs should be merged into 2 new subgraphs, each containing 3 old ones. - tester.test_trim_subgraph_groups(iso_groups, instance, was_trimmed); + tester.TestTrimSubgraphGroups(isoGroups, instance, wasTrimmed); - BOOST_REQUIRE_EQUAL(was_trimmed.size(), 1); - BOOST_CHECK(was_trimmed[0]); - BOOST_CHECK_EQUAL(iso_groups.size(), 1); - BOOST_REQUIRE_EQUAL(iso_groups[0].subgraphs.size(), 2); // Group now contains 2 merged subgraphs + BOOST_REQUIRE_EQUAL(wasTrimmed.size(), 1); + BOOST_CHECK(wasTrimmed[0]); + BOOST_CHECK_EQUAL(isoGroups.size(), 1); + BOOST_REQUIRE_EQUAL(isoGroups[0].subgraphs_.size(), 2); // Group now contains 2 merged subgraphs // Check that the new subgraphs are correctly merged. - BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[0].size(), 3); - BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[1].size(), 3); - - const auto &final_sgs = iso_groups[0].subgraphs; - std::set vertices_sg0(final_sgs[0].begin(), final_sgs[0].end()); - std::set vertices_sg1(final_sgs[1].begin(), final_sgs[1].end()); - std::set expected_sg0 = {0, 1, 2}; - std::set expected_sg1 = {3, 4, 5}; - BOOST_CHECK(vertices_sg0 == expected_sg0); - BOOST_CHECK(vertices_sg1 == expected_sg1); + BOOST_CHECK_EQUAL(isoGroups[0].subgraphs_[0].size(), 3); + BOOST_CHECK_EQUAL(isoGroups[0].subgraphs_[1].size(), 3); + + const auto &finalSgs = isoGroups[0].subgraphs_; + std::set verticesSg0(finalSgs[0].begin(), finalSgs[0].end()); + std::set verticesSg1(finalSgs[1].begin(), finalSgs[1].end()); + std::set expectedSg0 = {0, 1, 2}; + std::set expectedSg1 = {3, 4, 5}; + BOOST_CHECK(verticesSg0 == expectedSg0); + BOOST_CHECK(verticesSg1 == expectedSg1); } -BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_MultipleGroups) { - GreedyBspScheduler greedy_scheduler; - IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); - tester.setAllowTrimmedScheduler(false); +BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTestMultipleGroups) { + GreedyBspScheduler greedyScheduler; + IsomorphicSubgraphSchedulerTester tester(greedyScheduler); + tester.SetAllowTrimmedScheduler(false); - BspInstance instance; - auto &dag = instance.getComputationalDag(); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); for (int i = 0; i < 6; ++i) { - dag.add_vertex(1, 1, 1, 0); // 0-5 + dag.AddVertex(1, 1, 1, 0); // 0-5 } for (int i = 0; i < 3; ++i) { - dag.add_vertex(1, 1, 1, 0); // 6-8, but we will use 10-12 in test + dag.AddVertex(1, 1, 1, 0); // 6-8, but we will use 10-12 in test } for (int i = 0; i < 2; ++i) { - dag.add_vertex(1, 1, 1, 0); // 9-10 + dag.AddVertex(1, 1, 1, 0); // 9-10 } for (int i = 0; i < 2; ++i) { - dag.add_vertex(1, 1, 1, 0); // 11-12 + dag.AddVertex(1, 1, 1, 0); // 11-12 } for (int i = 0; i < 8; ++i) { - dag.add_vertex(1, 1, 1, 0); // 13-20 + dag.AddVertex(1, 1, 1, 0); // 13-20 } for (int i = 0; i < 5; ++i) { - dag.add_vertex(1, 1, 1, 0); // 21-25 + dag.AddVertex(1, 1, 1, 0); // 21-25 } // Make sure all vertices used in iso_groups exist. // All are type 0. - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0}); // 9 processors of type 0 - instance.setDiagonalCompatibilityMatrix(1); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0}); // 9 processors of type 0 + instance.SetDiagonalCompatibilityMatrix(1); // Group 1: size 6. gcd(6, 9) = 3. merge_size = 6/3 = 2. -> 3 subgraphs of size 2. // Group 2: size 3. gcd(3, 9) = 3. merge_size = 3/3 = 1. -> no trim. // Group 3: size 5. gcd(5, 9) = 1. merge_size = 5/1 = 5. -> 1 subgraph of size 5. - std::vector iso_groups = { - group_t{{{0}, {1}, {2}, {3}, {4}, {5}}}, // Group 1 - group_t{{{10}, {11}, {12}}}, // Group 2 - group_t{{{20}, {21}, {22}, {23}, {24}}} // Group 3 + std::vector isoGroups = { + GroupT{{{0}, {1}, {2}, {3}, {4}, {5}}}, // Group 1 + GroupT{{{10}, {11}, {12}}}, // Group 2 + GroupT{{{20}, {21}, {22}, {23}, {24}}} // Group 3 }; - std::vector was_trimmed(iso_groups.size()); - tester.test_trim_subgraph_groups(iso_groups, instance, was_trimmed); + std::vector wasTrimmed(isoGroups.size()); + tester.TestTrimSubgraphGroups(isoGroups, instance, wasTrimmed); - BOOST_REQUIRE_EQUAL(iso_groups.size(), 3); - BOOST_REQUIRE_EQUAL(was_trimmed.size(), 3); + BOOST_REQUIRE_EQUAL(isoGroups.size(), 3); + BOOST_REQUIRE_EQUAL(wasTrimmed.size(), 3); - BOOST_CHECK(was_trimmed[0]); // Group 1 should be trimmed - BOOST_CHECK(!was_trimmed[1]); // Group 2 should not be trimmed - BOOST_CHECK(was_trimmed[2]); // Group 3 should be trimmed + BOOST_CHECK(wasTrimmed[0]); // Group 1 should be trimmed + BOOST_CHECK(!wasTrimmed[1]); // Group 2 should not be trimmed + BOOST_CHECK(wasTrimmed[2]); // Group 3 should be trimmed // Check Group 1 - BOOST_REQUIRE_EQUAL(iso_groups[0].subgraphs.size(), 3); - BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[0].size(), 2); - BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[1].size(), 2); - BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[2].size(), 2); + BOOST_REQUIRE_EQUAL(isoGroups[0].subgraphs_.size(), 3); + BOOST_CHECK_EQUAL(isoGroups[0].subgraphs_[0].size(), 2); + BOOST_CHECK_EQUAL(isoGroups[0].subgraphs_[1].size(), 2); + BOOST_CHECK_EQUAL(isoGroups[0].subgraphs_[2].size(), 2); // Check Group 2 - BOOST_REQUIRE_EQUAL(iso_groups[1].subgraphs.size(), 3); - BOOST_CHECK_EQUAL(iso_groups[1].subgraphs[0].size(), 1); + BOOST_REQUIRE_EQUAL(isoGroups[1].subgraphs_.size(), 3); + BOOST_CHECK_EQUAL(isoGroups[1].subgraphs_[0].size(), 1); // Check Group 3 - BOOST_REQUIRE_EQUAL(iso_groups[2].subgraphs.size(), 1); - BOOST_CHECK_EQUAL(iso_groups[2].subgraphs[0].size(), 5); + BOOST_REQUIRE_EQUAL(isoGroups[2].subgraphs_.size(), 1); + BOOST_CHECK_EQUAL(isoGroups[2].subgraphs_[0].size(), 5); } -BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_HeterogeneousArch) { +BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroupHeterogeneousArch) { // --- Setup --- - BspInstance instance; - auto &dag = instance.getComputationalDag(); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); // Two isomorphic groups: // Group 0: {0,1}, {2,3} (type 0) // Group 1: {4}, {5} (type 1) - dag.add_vertex(10, 1, 1, 0); - dag.add_vertex(10, 1, 1, 0); // 0, 1 - dag.add_vertex(10, 1, 1, 0); - dag.add_vertex(10, 1, 1, 0); // 2, 3 - dag.add_vertex(20, 1, 1, 1); // 4 - dag.add_vertex(20, 1, 1, 1); // 5 - dag.add_edge(0, 1); - dag.add_edge(2, 3); - dag.add_edge(1, 4); - dag.add_edge(3, 5); + dag.AddVertex(10, 1, 1, 0); + dag.AddVertex(10, 1, 1, 0); // 0, 1 + dag.AddVertex(10, 1, 1, 0); + dag.AddVertex(10, 1, 1, 0); // 2, 3 + dag.AddVertex(20, 1, 1, 1); // 4 + dag.AddVertex(20, 1, 1, 1); // 5 + dag.AddEdge(0, 1); + dag.AddEdge(2, 3); + dag.AddEdge(1, 4); + dag.AddEdge(3, 5); // 2 procs of type 0, 2 procs of type 1 - instance.getArchitecture().setProcessorsWithTypes({0, 0, 1, 1}); - instance.setDiagonalCompatibilityMatrix(2); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 1, 1}); + instance.SetDiagonalCompatibilityMatrix(2); - std::vector iso_groups = {group_t{{{0, 1}, {2, 3}}}, group_t{{{4}, {5}}}}; + std::vector isoGroups = {GroupT{{{0, 1}, {2, 3}}}, GroupT{{{4}, {5}}}}; // Mock SubgraphSchedule from EFT scheduler // Group 0 (2 subgraphs) gets 2 workers of type 0 // Group 1 (2 subgraphs) gets 2 workers of type 1 - SubgraphSchedule sub_sched; - sub_sched.node_assigned_worker_per_type.resize(2); - sub_sched.node_assigned_worker_per_type[0] = {2, 0}; // 2xT0 for group 0 - sub_sched.node_assigned_worker_per_type[1] = {0, 2}; // 2xT1 for group 1 - sub_sched.was_trimmed = {false, false}; // No trimming occurred + SubgraphSchedule subSched; + subSched.nodeAssignedWorkerPerType_.resize(2); + subSched.nodeAssignedWorkerPerType_[0] = {2, 0}; // 2xT0 for group 0 + subSched.nodeAssignedWorkerPerType_[1] = {0, 2}; // 2xT1 for group 1 + subSched.wasTrimmed_ = {false, false}; // No trimming occurred - std::vector> partition(dag.num_vertices()); + std::vector> partition(dag.NumVertices()); - GreedyBspScheduler greedy_scheduler; - IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); + GreedyBspScheduler greedyScheduler; + IsomorphicSubgraphSchedulerTester tester(greedyScheduler); // --- Execute --- - tester.test_schedule_isomorphic_group(instance, iso_groups, sub_sched, partition); + tester.TestScheduleIsomorphicGroup(instance, isoGroups, subSched, partition); // --- Assert --- // Group 0 has 2 subgraphs, scheduled on 2 processors. @@ -260,53 +260,53 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_HeterogeneousArch) { BOOST_CHECK_NE(partition[2], partition[5]); // Verify all partitions are unique as expected - std::set> partition_ids; - for (const auto &p_id : partition) { - partition_ids.insert(p_id); + std::set> partitionIds; + for (const auto &pId : partition) { + partitionIds.insert(pId); } - BOOST_CHECK_EQUAL(partition_ids.size(), 4); + BOOST_CHECK_EQUAL(partitionIds.size(), 4); } -BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) { +BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroupShuffledIDs) { // --- Setup --- // This test ensures that the isomorphism mapping works correctly even if // the vertex IDs of isomorphic subgraphs are not in the same relative order. - BspInstance instance; - auto &dag = instance.getComputationalDag(); + BspInstance instance; + auto &dag = instance.GetComputationalDag(); // Group 0, Subgraph 1: 0 -> 1 - dag.add_vertex(10, 1, 1, 0); // 0 - dag.add_vertex(20, 1, 1, 0); // 1 - dag.add_edge(0, 1); + dag.AddVertex(10, 1, 1, 0); // 0 + dag.AddVertex(20, 1, 1, 0); // 1 + dag.AddEdge(0, 1); // Group 0, Subgraph 2 (isomorphic to 1, but with shuffled IDs): 3 -> 2 - dag.add_vertex(20, 1, 1, 0); // 2 (work 20, corresponds to node 1) - dag.add_vertex(10, 1, 1, 0); // 3 (work 10, corresponds to node 0) - dag.add_edge(3, 2); + dag.AddVertex(20, 1, 1, 0); // 2 (work 20, corresponds to node 1) + dag.AddVertex(10, 1, 1, 0); // 3 (work 10, corresponds to node 0) + dag.AddEdge(3, 2); // Architecture: 2 processors, so each subgraph gets its own partition space. - instance.getArchitecture().setProcessorsWithTypes({0, 0}); - instance.setDiagonalCompatibilityMatrix(1); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0}); + instance.SetDiagonalCompatibilityMatrix(1); // Manually define the isomorphic groups. // Subgraph 1 vertices: {0, 1} // Subgraph 2 vertices: {2, 3} - std::vector iso_groups = {group_t{{{0, 1}, {2, 3}}}}; + std::vector isoGroups = {GroupT{{{0, 1}, {2, 3}}}}; // Mock SubgraphSchedule: The single group gets all 2 processors. - SubgraphSchedule sub_sched; - sub_sched.node_assigned_worker_per_type.resize(1); - sub_sched.node_assigned_worker_per_type[0] = {2}; - sub_sched.was_trimmed = {false}; // No trimming occurred + SubgraphSchedule subSched; + subSched.nodeAssignedWorkerPerType_.resize(1); + subSched.nodeAssignedWorkerPerType_[0] = {2}; + subSched.wasTrimmed_ = {false}; // No trimming occurred - std::vector> partition(dag.num_vertices()); + std::vector> partition(dag.NumVertices()); // Use a simple greedy scheduler for the sub-problems. - GreedyBspScheduler greedy_scheduler; - IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); + GreedyBspScheduler greedyScheduler; + IsomorphicSubgraphSchedulerTester tester(greedyScheduler); // --- Execute --- - tester.test_schedule_isomorphic_group(instance, iso_groups, sub_sched, partition); + tester.TestScheduleIsomorphicGroup(instance, isoGroups, subSched, partition); // --- Assert --- // The representative subgraph is {0, 1}. The greedy scheduler will likely put @@ -332,32 +332,32 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) { // // This test uses a more complex structure (fork-join) with shuffled IDs // // to ensure the pattern replication is robust. // BspInstance instance; -// auto& dag = instance.getComputationalDag(); +// auto& dag = instance.GetComputationalDag(); // // Group 0, Subgraph 1: 0 -> {1,2} -> 3 -// dag.add_vertex(10, 1, 1, 0); // 0 (source) -// dag.add_vertex(20, 1, 1, 0); // 1 (middle) -// dag.add_vertex(20, 1, 1, 0); // 2 (middle) -// dag.add_vertex(30, 1, 1, 0); // 3 (sink) -// dag.add_edge(0, 1); -// dag.add_edge(0, 2); -// dag.add_edge(1, 3); -// dag.add_edge(2, 3); +// dag.AddVertex(10, 1, 1, 0); // 0 (source) +// dag.AddVertex(20, 1, 1, 0); // 1 (middle) +// dag.AddVertex(20, 1, 1, 0); // 2 (middle) +// dag.AddVertex(30, 1, 1, 0); // 3 (sink) +// dag.AddEdge(0, 1); +// dag.AddEdge(0, 2); +// dag.AddEdge(1, 3); +// dag.AddEdge(2, 3); // // Group 0, Subgraph 2 (isomorphic, but with shuffled IDs and different topology) // // Structure: 7 -> {5,4} -> 6 -// dag.add_vertex(20, 1, 1, 0); // 4 (middle, corresponds to node 2) -// dag.add_vertex(20, 1, 1, 0); // 5 (middle, corresponds to node 1) -// dag.add_vertex(30, 1, 1, 0); // 6 (sink, corresponds to node 3) -// dag.add_vertex(10, 1, 1, 0); // 7 (source, corresponds to node 0) -// dag.add_edge(7, 4); -// dag.add_edge(7, 5); -// dag.add_edge(4, 6); -// dag.add_edge(5, 6); +// dag.AddVertex(20, 1, 1, 0); // 4 (middle, corresponds to node 2) +// dag.AddVertex(20, 1, 1, 0); // 5 (middle, corresponds to node 1) +// dag.AddVertex(30, 1, 1, 0); // 6 (sink, corresponds to node 3) +// dag.AddVertex(10, 1, 1, 0); // 7 (source, corresponds to node 0) +// dag.AddEdge(7, 4); +// dag.AddEdge(7, 5); +// dag.AddEdge(4, 6); +// dag.AddEdge(5, 6); // // Architecture: 4 processors, so each subgraph gets its own partition space. -// instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0}); -// instance.setDiagonalCompatibilityMatrix(1); +// instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 0, 0}); +// instance.SetDiagonalCompatibilityMatrix(1); // // Manually define the isomorphic groups. // std::vector iso_groups = { @@ -369,7 +369,7 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) { // sub_sched.node_assigned_worker_per_type.resize(1); // sub_sched.node_assigned_worker_per_type[0] = {4}; -// std::vector> partition(dag.num_vertices()); +// std::vector> partition(dag.NumVertices()); // GreedyBspScheduler greedy_scheduler; // IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); diff --git a/tests/isomorphism_mapper.cpp b/tests/isomorphism_mapper.cpp index 72b6ecbb..630264ca 100644 --- a/tests/isomorphism_mapper.cpp +++ b/tests/isomorphism_mapper.cpp @@ -28,126 +28,126 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_SUITE(IsomorphismMapperTestSuite) +BOOST_AUTO_TEST_SUITE(isomorphism_mapper_test_suite) -using graph_t = computational_dag_vector_impl_def_t; -using constr_graph_t = computational_dag_vector_impl_def_t; +using GraphT = ComputationalDagVectorImplDefUnsignedT; +using ConstrGraphT = ComputationalDagVectorImplDefUnsignedT; -BOOST_AUTO_TEST_CASE(Mapper_SimpleChain) { +BOOST_AUTO_TEST_CASE(MapperSimpleChain) { // Rep: 0 -> 1 -> 2 - constr_graph_t rep_graph; - rep_graph.add_vertex(10, 1, 1); - rep_graph.add_vertex(20, 1, 1); - rep_graph.add_vertex(30, 1, 1); - rep_graph.add_edge(0, 1); - rep_graph.add_edge(1, 2); - std::vector> rep_map = {100, 101, 102}; + ConstrGraphT repGraph; + repGraph.AddVertex(10, 1, 1); + repGraph.AddVertex(20, 1, 1); + repGraph.AddVertex(30, 1, 1); + repGraph.AddEdge(0, 1); + repGraph.AddEdge(1, 2); + std::vector> repMap = {100, 101, 102}; // Current: 2 -> 0 -> 1 (isomorphic, but different local IDs) - constr_graph_t current_graph; - current_graph.add_vertex(20, 1, 1); // local 0 (work 20) - current_graph.add_vertex(30, 1, 1); // local 1 (work 30) - current_graph.add_vertex(10, 1, 1); // local 2 (work 10) - current_graph.add_edge(2, 0); - current_graph.add_edge(0, 1); - std::vector> current_map = {201, 202, 200}; + ConstrGraphT currentGraph; + currentGraph.AddVertex(20, 1, 1); // local 0 (work 20) + currentGraph.AddVertex(30, 1, 1); // local 1 (work 30) + currentGraph.AddVertex(10, 1, 1); // local 2 (work 10) + currentGraph.AddEdge(2, 0); + currentGraph.AddEdge(0, 1); + std::vector> currentMap = {201, 202, 200}; - IsomorphismMapper mapper(rep_graph); - auto result_map_local = mapper.find_mapping(current_graph); + IsomorphismMapper mapper(repGraph); + auto resultMapLocal = mapper.FindMapping(currentGraph); // Translate local map to global map for the test - std::unordered_map, vertex_idx_t> result_map; - for (const auto &[curr_local, rep_local] : result_map_local) { - result_map[current_map[curr_local]] = rep_map[rep_local]; + std::unordered_map, VertexIdxT> resultMap; + for (const auto &[curr_local, rep_local] : resultMapLocal) { + resultMap[currentMap[curr_local]] = repMap[rep_local]; } - BOOST_REQUIRE_EQUAL(result_map.size(), 3); + BOOST_REQUIRE_EQUAL(resultMap.size(), 3); // current global ID -> rep global ID // 200 (work 10) -> 100 (work 10) // 201 (work 20) -> 101 (work 20) // 202 (work 30) -> 102 (work 30) - BOOST_CHECK_EQUAL(result_map.at(200), 100); - BOOST_CHECK_EQUAL(result_map.at(201), 101); - BOOST_CHECK_EQUAL(result_map.at(202), 102); + BOOST_CHECK_EQUAL(resultMap.at(200), 100); + BOOST_CHECK_EQUAL(resultMap.at(201), 101); + BOOST_CHECK_EQUAL(resultMap.at(202), 102); } -BOOST_AUTO_TEST_CASE(Mapper_ForkJoin) { +BOOST_AUTO_TEST_CASE(MapperForkJoin) { // Rep: 0 -> {1,2} -> 3 - constr_graph_t rep_graph; - rep_graph.add_vertex(10, 1, 1); - rep_graph.add_vertex(20, 1, 1); - rep_graph.add_vertex(20, 1, 1); - rep_graph.add_vertex(30, 1, 1); - rep_graph.add_edge(0, 1); - rep_graph.add_edge(0, 2); - rep_graph.add_edge(1, 3); - rep_graph.add_edge(2, 3); - std::vector> rep_map = {10, 11, 12, 13}; + ConstrGraphT repGraph; + repGraph.AddVertex(10, 1, 1); + repGraph.AddVertex(20, 1, 1); + repGraph.AddVertex(20, 1, 1); + repGraph.AddVertex(30, 1, 1); + repGraph.AddEdge(0, 1); + repGraph.AddEdge(0, 2); + repGraph.AddEdge(1, 3); + repGraph.AddEdge(2, 3); + std::vector> repMap = {10, 11, 12, 13}; // Current: 3 -> {0,2} -> 1 - constr_graph_t current_graph; - current_graph.add_vertex(20, 1, 1); // local 0 - current_graph.add_vertex(30, 1, 1); // local 1 - current_graph.add_vertex(20, 1, 1); // local 2 - current_graph.add_vertex(10, 1, 1); // local 3 - current_graph.add_edge(3, 0); - current_graph.add_edge(3, 2); - current_graph.add_edge(0, 1); - current_graph.add_edge(2, 1); - std::vector> current_map = {21, 23, 22, 20}; - - IsomorphismMapper mapper(rep_graph); - auto result_map_local = mapper.find_mapping(current_graph); - - std::unordered_map, vertex_idx_t> result_map; - for (const auto &[curr_local, rep_local] : result_map_local) { - result_map[current_map[curr_local]] = rep_map[rep_local]; + ConstrGraphT currentGraph; + currentGraph.AddVertex(20, 1, 1); // local 0 + currentGraph.AddVertex(30, 1, 1); // local 1 + currentGraph.AddVertex(20, 1, 1); // local 2 + currentGraph.AddVertex(10, 1, 1); // local 3 + currentGraph.AddEdge(3, 0); + currentGraph.AddEdge(3, 2); + currentGraph.AddEdge(0, 1); + currentGraph.AddEdge(2, 1); + std::vector> currentMap = {21, 23, 22, 20}; + + IsomorphismMapper mapper(repGraph); + auto resultMapLocal = mapper.FindMapping(currentGraph); + + std::unordered_map, VertexIdxT> resultMap; + for (const auto &[curr_local, rep_local] : resultMapLocal) { + resultMap[currentMap[curr_local]] = repMap[rep_local]; } - BOOST_REQUIRE_EQUAL(result_map.size(), 4); + BOOST_REQUIRE_EQUAL(resultMap.size(), 4); // current global ID -> rep global ID // 20 (work 10) -> 10 (work 10) // 23 (work 30) -> 13 (work 30) - BOOST_CHECK_EQUAL(result_map.at(20), 10); - BOOST_CHECK_EQUAL(result_map.at(23), 13); + BOOST_CHECK_EQUAL(resultMap.at(20), 10); + BOOST_CHECK_EQUAL(resultMap.at(23), 13); // The two middle nodes are symmetric. The mapping could be either way. // current {21, 22} -> rep {11, 12} - bool mapping1 = (result_map.at(21) == 11 && result_map.at(22) == 12); - bool mapping2 = (result_map.at(21) == 12 && result_map.at(22) == 11); + bool mapping1 = (resultMap.at(21) == 11 && resultMap.at(22) == 12); + bool mapping2 = (resultMap.at(21) == 12 && resultMap.at(22) == 11); BOOST_CHECK(mapping1 || mapping2); } -BOOST_AUTO_TEST_CASE(Mapper_DisconnectedComponents) { +BOOST_AUTO_TEST_CASE(MapperDisconnectedComponents) { // Rep: {0->1}, {2->3}. Two identical but disconnected components. - constr_graph_t rep_graph; - rep_graph.add_vertex(10, 1, 1); - rep_graph.add_vertex(20, 1, 1); // 0, 1 - rep_graph.add_vertex(10, 1, 1); - rep_graph.add_vertex(20, 1, 1); // 2, 3 - rep_graph.add_edge(0, 1); - rep_graph.add_edge(2, 3); - std::vector> rep_map = {10, 11, 12, 13}; + ConstrGraphT repGraph; + repGraph.AddVertex(10, 1, 1); + repGraph.AddVertex(20, 1, 1); // 0, 1 + repGraph.AddVertex(10, 1, 1); + repGraph.AddVertex(20, 1, 1); // 2, 3 + repGraph.AddEdge(0, 1); + repGraph.AddEdge(2, 3); + std::vector> repMap = {10, 11, 12, 13}; // Current: {2->3}, {0->1}. Same components, but different local IDs. - constr_graph_t current_graph; - current_graph.add_vertex(10, 1, 1); - current_graph.add_vertex(20, 1, 1); // 0, 1 - current_graph.add_vertex(10, 1, 1); - current_graph.add_vertex(20, 1, 1); // 2, 3 - current_graph.add_edge(2, 3); - current_graph.add_edge(0, 1); - std::vector> current_map = {22, 23, 20, 21}; - - IsomorphismMapper mapper(rep_graph); - auto result_map_local = mapper.find_mapping(current_graph); - - std::unordered_map, vertex_idx_t> result_map; - for (const auto &[curr_local, rep_local] : result_map_local) { - result_map[current_map[curr_local]] = rep_map[rep_local]; + ConstrGraphT currentGraph; + currentGraph.AddVertex(10, 1, 1); + currentGraph.AddVertex(20, 1, 1); // 0, 1 + currentGraph.AddVertex(10, 1, 1); + currentGraph.AddVertex(20, 1, 1); // 2, 3 + currentGraph.AddEdge(2, 3); + currentGraph.AddEdge(0, 1); + std::vector> currentMap = {22, 23, 20, 21}; + + IsomorphismMapper mapper(repGraph); + auto resultMapLocal = mapper.FindMapping(currentGraph); + + std::unordered_map, VertexIdxT> resultMap; + for (const auto &[curr_local, rep_local] : resultMapLocal) { + resultMap[currentMap[curr_local]] = repMap[rep_local]; } - BOOST_REQUIRE_EQUAL(result_map.size(), 4); + BOOST_REQUIRE_EQUAL(resultMap.size(), 4); // The two components are symmetric. The mapping could be component {0,1} -> {0,1} // and {2,3} -> {2,3}, OR component {0,1} -> {2,3} and {2,3} -> {0,1}. @@ -155,65 +155,65 @@ BOOST_AUTO_TEST_CASE(Mapper_DisconnectedComponents) { // Mapping Option 1: // rep {10,11} -> current {20,21} // rep {12,13} -> current {22,23} - bool mapping1 = (result_map.at(20) == 12 && result_map.at(21) == 13 && result_map.at(22) == 10 && result_map.at(23) == 11); + bool mapping1 = (resultMap.at(20) == 12 && resultMap.at(21) == 13 && resultMap.at(22) == 10 && resultMap.at(23) == 11); // Mapping Option 2: // rep {10,11} -> current {22,23} // rep {12,13} -> current {20,21} - bool mapping2 = (result_map.at(22) == 12 && result_map.at(23) == 13 && result_map.at(20) == 10 && result_map.at(21) == 11); + bool mapping2 = (resultMap.at(22) == 12 && resultMap.at(23) == 13 && resultMap.at(20) == 10 && resultMap.at(21) == 11); BOOST_CHECK(mapping1 || mapping2); } -BOOST_AUTO_TEST_CASE(Mapper_MultiPipeline) { +BOOST_AUTO_TEST_CASE(MapperMultiPipeline) { // This test checks the mapping of a graph that is composed of multiple // isomorphic disconnected components (two parallel pipelines). // Rep: Two pipelines {0->1->2} and {3->4->5} // All nodes at the same stage have the same work weight. - constr_graph_t rep_graph = construct_multi_pipeline_dag(2, 3); - std::vector> rep_map = {10, 11, 12, 20, 21, 22}; + ConstrGraphT repGraph = ConstructMultiPipelineDag(2, 3); + std::vector> repMap = {10, 11, 12, 20, 21, 22}; // Current: Isomorphic to rep, but the pipelines are swapped and vertex IDs are shuffled. // Pipeline 1 (local IDs 0,1,2) corresponds to rep pipeline 2 (global 20,21,22) // Pipeline 2 (local IDs 3,4,5) corresponds to rep pipeline 1 (global 10,11,12) - constr_graph_t current_graph; - current_graph.add_vertex(10, 1, 1); // local 0, stage 0 - current_graph.add_vertex(20, 1, 1); // local 1, stage 1 - current_graph.add_vertex(30, 1, 1); // local 2, stage 2 - current_graph.add_vertex(10, 1, 1); // local 3, stage 0 - current_graph.add_vertex(20, 1, 1); // local 4, stage 1 - current_graph.add_vertex(30, 1, 1); // local 5, stage 2 - current_graph.add_edge(0, 1); - current_graph.add_edge(1, 2); // First pipeline - current_graph.add_edge(3, 4); - current_graph.add_edge(4, 5); // Second pipeline - std::vector> current_map = {120, 121, 122, 110, 111, 112}; - - IsomorphismMapper mapper(rep_graph); - auto result_map_local = mapper.find_mapping(current_graph); - - std::unordered_map, vertex_idx_t> result_map; - for (const auto &[curr_local, rep_local] : result_map_local) { - result_map[current_map[curr_local]] = rep_map[rep_local]; + ConstrGraphT currentGraph; + currentGraph.AddVertex(10, 1, 1); // local 0, stage 0 + currentGraph.AddVertex(20, 1, 1); // local 1, stage 1 + currentGraph.AddVertex(30, 1, 1); // local 2, stage 2 + currentGraph.AddVertex(10, 1, 1); // local 3, stage 0 + currentGraph.AddVertex(20, 1, 1); // local 4, stage 1 + currentGraph.AddVertex(30, 1, 1); // local 5, stage 2 + currentGraph.AddEdge(0, 1); + currentGraph.AddEdge(1, 2); // First pipeline + currentGraph.AddEdge(3, 4); + currentGraph.AddEdge(4, 5); // Second pipeline + std::vector> currentMap = {120, 121, 122, 110, 111, 112}; + + IsomorphismMapper mapper(repGraph); + auto resultMapLocal = mapper.FindMapping(currentGraph); + + std::unordered_map, VertexIdxT> resultMap; + for (const auto &[curr_local, rep_local] : resultMapLocal) { + resultMap[currentMap[curr_local]] = repMap[rep_local]; } - BOOST_REQUIRE_EQUAL(result_map.size(), 6); + BOOST_REQUIRE_EQUAL(resultMap.size(), 6); // The two pipelines are symmetric, so the mapping can go either way. // Mapping Option 1: current pipeline 1 -> rep pipeline 1, current pipeline 2 -> rep pipeline 2 - bool mapping1 = (result_map.at(110) == 10 && result_map.at(111) == 11 && result_map.at(112) == 12 && result_map.at(120) == 20 - && result_map.at(121) == 21 && result_map.at(122) == 22); + bool mapping1 = (resultMap.at(110) == 10 && resultMap.at(111) == 11 && resultMap.at(112) == 12 && resultMap.at(120) == 20 + && resultMap.at(121) == 21 && resultMap.at(122) == 22); // Mapping Option 2: current pipeline 1 -> rep pipeline 2, current pipeline 2 -> rep pipeline 1 - bool mapping2 = (result_map.at(110) == 20 && result_map.at(111) == 21 && result_map.at(112) == 22 && result_map.at(120) == 10 - && result_map.at(121) == 11 && result_map.at(122) == 12); + bool mapping2 = (resultMap.at(110) == 20 && resultMap.at(111) == 21 && resultMap.at(112) == 22 && resultMap.at(120) == 10 + && resultMap.at(121) == 11 && resultMap.at(122) == 12); BOOST_CHECK(mapping1 || mapping2); } -BOOST_AUTO_TEST_CASE(Mapper_ShuffledSymmetric) { +BOOST_AUTO_TEST_CASE(MapperShuffledSymmetric) { // This test uses a symmetric graph (a ladder) and shuffles the vertex IDs // of the 'current' graph to ensure the mapper correctly finds the structural // isomorphism, not just a naive index-based mapping. @@ -222,45 +222,45 @@ BOOST_AUTO_TEST_CASE(Mapper_ShuffledSymmetric) { // Structure: {0,1} -> {2,3} -> {4,5} // Nodes {0,2,4} have work 10 (left side). // Nodes {1,3,5} have work 20 (right side). - constr_graph_t rep_graph = construct_ladder_dag(2); - std::vector> rep_map = {10, 11, 12, 13, 14, 15}; + ConstrGraphT repGraph = ConstructLadderDag(2); + std::vector> repMap = {10, 11, 12, 13, 14, 15}; // Current: Isomorphic to rep, but with shuffled local IDs. // A naive mapping of local IDs (0->0, 1->1, etc.) would be incorrect // because the work weights would not match. - constr_graph_t current_graph; - current_graph.add_vertex(20, 1, 1); // local 0 (work 20, right) - current_graph.add_vertex(10, 1, 1); // local 1 (work 10, left) - current_graph.add_vertex(20, 1, 1); // local 2 (work 20, right) - current_graph.add_vertex(10, 1, 1); // local 3 (work 10, left) - current_graph.add_vertex(20, 1, 1); // local 4 (work 20, right) - current_graph.add_vertex(10, 1, 1); // local 5 (work 10, left) + ConstrGraphT currentGraph; + currentGraph.AddVertex(20, 1, 1); // local 0 (work 20, right) + currentGraph.AddVertex(10, 1, 1); // local 1 (work 10, left) + currentGraph.AddVertex(20, 1, 1); // local 2 (work 20, right) + currentGraph.AddVertex(10, 1, 1); // local 3 (work 10, left) + currentGraph.AddVertex(20, 1, 1); // local 4 (work 20, right) + currentGraph.AddVertex(10, 1, 1); // local 5 (work 10, left) // Edges for {5,0} -> {3,2} -> {1,4} - current_graph.add_edge(5, 3); - current_graph.add_edge(5, 2); // Rung 1 - current_graph.add_edge(0, 3); - current_graph.add_edge(0, 2); + currentGraph.AddEdge(5, 3); + currentGraph.AddEdge(5, 2); // Rung 1 + currentGraph.AddEdge(0, 3); + currentGraph.AddEdge(0, 2); - current_graph.add_edge(3, 1); - current_graph.add_edge(3, 4); // Rung 2 - current_graph.add_edge(2, 1); - current_graph.add_edge(2, 4); + currentGraph.AddEdge(3, 1); + currentGraph.AddEdge(3, 4); // Rung 2 + currentGraph.AddEdge(2, 1); + currentGraph.AddEdge(2, 4); - std::vector> current_map = {111, 114, 113, 112, 115, 110}; + std::vector> currentMap = {111, 114, 113, 112, 115, 110}; - IsomorphismMapper mapper(rep_graph); - auto result_map_local = mapper.find_mapping(current_graph); + IsomorphismMapper mapper(repGraph); + auto resultMapLocal = mapper.FindMapping(currentGraph); - std::unordered_map, vertex_idx_t> result_map; - for (const auto &[curr_local, rep_local] : result_map_local) { - result_map[current_map[curr_local]] = rep_map[rep_local]; + std::unordered_map, VertexIdxT> resultMap; + for (const auto &[curr_local, rep_local] : resultMapLocal) { + resultMap[currentMap[curr_local]] = repMap[rep_local]; } - BOOST_REQUIRE_EQUAL(result_map.size(), 6); + BOOST_REQUIRE_EQUAL(resultMap.size(), 6); // Check that structurally identical nodes are mapped, regardless of their original IDs. // E.g., current global 110 (from local 5, work 10) must map to a rep node with work 10. - BOOST_CHECK_EQUAL(result_map.at(110), 10); // current 5 (work 10) -> rep 0 (work 10) - BOOST_CHECK_EQUAL(result_map.at(111), 11); // current 0 (work 20) -> rep 1 (work 20) + BOOST_CHECK_EQUAL(resultMap.at(110), 10); // current 5 (work 10) -> rep 0 (work 10) + BOOST_CHECK_EQUAL(resultMap.at(111), 11); // current 0 (work 20) -> rep 1 (work 20) } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/iterators.cpp b/tests/iterators.cpp index 55c9052e..fc76577d 100644 --- a/tests/iterators.cpp +++ b/tests/iterators.cpp @@ -23,8 +23,8 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(integral_range_test) { - integral_range range(0, 10); +BOOST_AUTO_TEST_CASE(IntegralRangeTest) { + IntegralRange range(0, 10); BOOST_CHECK_EQUAL(range.size(), 10); int count = 0; @@ -53,7 +53,7 @@ BOOST_AUTO_TEST_CASE(integral_range_test) { BOOST_CHECK_EQUAL(count, -1); count = 0; - integral_range range2(10); + IntegralRange range2(10); BOOST_CHECK_EQUAL(range2.size(), 10); for (auto v : range2) { @@ -70,7 +70,7 @@ BOOST_AUTO_TEST_CASE(integral_range_test) { BOOST_CHECK_EQUAL(count, -1); count = 5; - integral_range range3(5, 15); + IntegralRange range3(5, 15); BOOST_CHECK_EQUAL(range3.size(), 10); for (auto v : range3) { diff --git a/tests/kl.cpp b/tests/kl.cpp deleted file mode 100644 index 3a8a506f..00000000 --- a/tests/kl.cpp +++ /dev/null @@ -1,323 +0,0 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#define BOOST_TEST_MODULE kl -#include -#include - -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "test_graphs.hpp" - -using namespace osp; - -template -void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; - int comm_weight = 1; - - for (const auto &v : dag.vertices()) { - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 3 + 1)); - dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 3 + 1)); - } -} - -BOOST_AUTO_TEST_CASE(kl_base_1) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; - - graph dag; - - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - const VertexType v7 = dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); - - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); - - BspArchitecture arch; - - BspInstance instance(dag, arch); - - BspSchedule schedule(instance); - - schedule.setAssignedProcessors({0, 0, 0, 0, 0, 0, 0, 0}); - schedule.setAssignedSupersteps({0, 0, 0, 0, 0, 0, 0, 0}); - - schedule.updateNumberOfSupersteps(); - - using kl_move = kl_move; - - kl_total_comm_test kl; - - kl.test_setup_schedule(schedule); - - auto &kl_current_schedule = kl.get_current_schedule(); - - BOOST_CHECK_EQUAL(kl_current_schedule.step_max_work[0], 44.0); - BOOST_CHECK_EQUAL(kl_current_schedule.step_second_max_work[0], 0.0); - BOOST_CHECK_EQUAL(kl_current_schedule.num_steps(), 1); - BOOST_CHECK_EQUAL(kl_current_schedule.current_cost, 44.0); - BOOST_CHECK_EQUAL(kl_current_schedule.current_feasible, true); - - kl_move move_1(v1, 0, 6.0 - 2.0, 0, 0, 1, 0); - - kl_current_schedule.apply_move(move_1); - - BOOST_CHECK_EQUAL(kl_current_schedule.step_max_work[0], 42.0); - BOOST_CHECK_EQUAL(kl_current_schedule.step_second_max_work[0], 2.0); - BOOST_CHECK_EQUAL(kl_current_schedule.num_steps(), 1); - BOOST_CHECK_EQUAL(kl_current_schedule.current_cost, 48.0); - BOOST_CHECK_EQUAL(kl_current_schedule.current_feasible, false); - BOOST_CHECK_EQUAL(kl_current_schedule.cost_f->compute_current_costs(), 48.0); - - kl_move move_2(v2, 0, 7.0, 0, 0, 1, 0); - - kl_current_schedule.apply_move(move_2); - - BOOST_CHECK_EQUAL(kl_current_schedule.step_max_work[0], 39.0); - BOOST_CHECK_EQUAL(kl_current_schedule.step_second_max_work[0], 5.0); - BOOST_CHECK_EQUAL(kl_current_schedule.num_steps(), 1); - BOOST_CHECK_EQUAL(kl_current_schedule.current_cost, 55.0); - BOOST_CHECK_EQUAL(kl_current_schedule.current_feasible, false); - BOOST_CHECK_EQUAL(kl_current_schedule.cost_f->compute_current_costs(), 55.0); - - kl.initialize_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); - - auto &node_gains = kl.get_node_gains(); - auto &node_change_in_costs = kl.get_node_change_in_costs(); - - BOOST_CHECK_EQUAL(node_gains[v1][0][1], 4.0); - BOOST_CHECK_EQUAL(node_change_in_costs[v1][0][1], 0.0); - - BOOST_CHECK_EQUAL(node_gains[v1][1][1], std::numeric_limits::lowest()); - BOOST_CHECK_EQUAL(node_change_in_costs[v1][1][1], 0.0); - - BOOST_CHECK_EQUAL(node_gains[v2][0][1], 19.0); - BOOST_CHECK_EQUAL(node_change_in_costs[v2][0][1], -7.0); - - kl_move move_3(v7, 0, 7.0, 0, 0, 1, 0); - kl_current_schedule.apply_move(move_3); - BOOST_CHECK_EQUAL(kl_current_schedule.current_feasible, false); - - kl_move move_4(v2, 0, 7.0, 1, 0, 0, 0); - kl_current_schedule.apply_move(move_4); - BOOST_CHECK_EQUAL(kl_current_schedule.current_feasible, false); - - kl_move move_5(v1, 0, 7.0, 1, 0, 0, 0); - kl_current_schedule.apply_move(move_5); - BOOST_CHECK_EQUAL(kl_current_schedule.current_feasible, true); -}; - -BOOST_AUTO_TEST_CASE(kl_total_comm_test_1) { - std::vector filenames_graph = test_graphs(); - - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - - // Getting root git directory - std::filesystem::path cwd = std::filesystem::current_path(); - std::cout << cwd << std::endl; - while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { - cwd = cwd.parent_path(); - std::cout << cwd << std::endl; - } - - GreedyBspScheduler test_scheduler; - - for (auto &filename_graph : filenames_graph) { - BspInstance instance; - - bool status_graph - = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); - - instance.getArchitecture().setSynchronisationCosts(5); - instance.getArchitecture().setCommunicationCosts(5); - instance.getArchitecture().setNumberOfProcessors(4); - - if (!status_graph) { - std::cout << "Reading files failed." << std::endl; - BOOST_CHECK(false); - } - - BspSchedule schedule(instance); - const auto result = test_scheduler.computeSchedule(schedule); - - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - - kl_total_comm_test kl; - - auto status = kl.improve_schedule_test_1(schedule); - - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); - } -} - -BOOST_AUTO_TEST_CASE(kl_total_comm_test_2) { - std::vector filenames_graph = test_graphs(); - - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - - // Getting root git directory - std::filesystem::path cwd = std::filesystem::current_path(); - std::cout << cwd << std::endl; - while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { - cwd = cwd.parent_path(); - std::cout << cwd << std::endl; - } - - GreedyBspScheduler test_scheduler; - - for (auto &filename_graph : filenames_graph) { - BspInstance instance; - - bool status_graph - = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); - - instance.getArchitecture().setSynchronisationCosts(5); - instance.getArchitecture().setCommunicationCosts(5); - instance.getArchitecture().setNumberOfProcessors(4); - - if (!status_graph) { - std::cout << "Reading files failed." << std::endl; - BOOST_CHECK(false); - } - - BspSchedule schedule(instance); - const auto result = test_scheduler.computeSchedule(schedule); - - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - - kl_total_comm_test kl; - - auto status = kl.improve_schedule_test_2(schedule); - - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); - } -} - -BOOST_AUTO_TEST_CASE(kl_total_cut_test_1) { - std::vector filenames_graph = test_graphs(); - - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - - // Getting root git directory - std::filesystem::path cwd = std::filesystem::current_path(); - std::cout << cwd << std::endl; - while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { - cwd = cwd.parent_path(); - std::cout << cwd << std::endl; - } - - GreedyBspScheduler test_scheduler; - - for (auto &filename_graph : filenames_graph) { - BspInstance instance; - - bool status_graph - = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); - instance.getArchitecture().setSynchronisationCosts(5); - instance.getArchitecture().setCommunicationCosts(5); - instance.getArchitecture().setNumberOfProcessors(4); - - if (!status_graph) { - std::cout << "Reading files failed." << std::endl; - BOOST_CHECK(false); - } - - BspSchedule schedule(instance); - const auto result = test_scheduler.computeSchedule(schedule); - - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - - kl_total_cut_test kl; - - auto status = kl.improve_schedule_test_1(schedule); - - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); - } -} - -BOOST_AUTO_TEST_CASE(kl_total_cut_test_2) { - std::vector filenames_graph = test_graphs(); - - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - - // Getting root git directory - std::filesystem::path cwd = std::filesystem::current_path(); - std::cout << cwd << std::endl; - while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { - cwd = cwd.parent_path(); - std::cout << cwd << std::endl; - } - - GreedyBspScheduler test_scheduler; - - for (auto &filename_graph : filenames_graph) { - BspInstance instance; - - bool status_graph - = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); - instance.getArchitecture().setSynchronisationCosts(5); - instance.getArchitecture().setCommunicationCosts(5); - instance.getArchitecture().setNumberOfProcessors(4); - - if (!status_graph) { - std::cout << "Reading files failed." << std::endl; - BOOST_CHECK(false); - } - - BspSchedule schedule(instance); - const auto result = test_scheduler.computeSchedule(schedule); - - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - - kl_total_cut_test kl; - - auto status = kl.improve_schedule_test_2(schedule); - - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); - } -} diff --git a/tests/kl_bsp_affinity_test.cpp b/tests/kl_bsp_affinity_test.cpp index 24418309..ea6e481e 100644 --- a/tests/kl_bsp_affinity_test.cpp +++ b/tests/kl_bsp_affinity_test.cpp @@ -3,730 +3,729 @@ #include #include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_bsp_comm_cost.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_test.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "test_graphs.hpp" using namespace osp; -using graph = computational_dag_edge_idx_vector_impl_def_int_t; -using kl_active_schedule_t = kl_active_schedule; +using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; +using KlActiveScheduleT = KlActiveSchedule; -BOOST_AUTO_TEST_CASE(simple_parent_child_test) { - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(SimpleParentChildTest) { + using VertexType = Graph::VertexIdx; - graph dag; - const VertexType v0 = dag.add_vertex(10, 5, 2); // work=10, mem=5, comm=2 - const VertexType v1 = dag.add_vertex(8, 4, 1); // work=8, mem=4, comm=1 - dag.add_edge(v0, v1, 3); // edge weight=3 + Graph dag; + const VertexType v0 = dag.AddVertex(10, 5, 2); // work=10, mem=5, comm=2 + const VertexType v1 = dag.AddVertex(8, 4, 1); // work=8, mem=4, comm=1 + dag.AddEdge(v0, v1, 3); // edge weight=3 - BspArchitecture arch; - arch.setNumberOfProcessors(2); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); - BspInstance instance(dag, arch); - instance.setCommunicationCosts(10); // comm multiplier - instance.setSynchronisationCosts(5); + BspInstance instance(dag, arch); + instance.SetCommunicationCosts(10); // comm multiplier + instance.SetSynchronisationCosts(5); BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1}); // v0 on p0, v1 on p1 - schedule.setAssignedSupersteps({0, 1}); // v0 in step 0, v1 in step 1 - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1}); // v0 on p0, v1 on p1 + schedule.SetAssignedSupersteps({0, 1}); // v0 in step 0, v1 in step 1 + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); // Insert only v0 into gain heap to control which node moves - auto node_selection = kl.insert_gain_heap_test({0}); + auto nodeSelection = kl.InsertGainHeapTest({0}); // Run one iteration - this will move v0 to its best position - auto recompute_max_gain = kl.run_inner_iteration_test(); + auto recomputeMaxGain = kl.RunInnerIterationTest(); // Compare costs after move - double after_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_tracked = kl.get_current_cost(); + double afterRecomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterTracked = kl.GetCurrentCost(); - BOOST_CHECK_CLOSE(after_recomputed, after_tracked, 0.00001); + BOOST_CHECK_CLOSE(afterRecomputed, afterTracked, 0.00001); } /** * Helper to validate comm datastructures by comparing with freshly computed ones */ template -bool validate_comm_datastructures(const max_comm_datastructure &comm_ds_incremental, - kl_active_schedule_t &active_sched, - const BspInstance &instance, - const std::string &context) { +bool ValidateCommDatastructures(const MaxCommDatastructure &commDsIncremental, + KlActiveScheduleT &activeSched, + const BspInstance &instance, + const std::string &context) { // 1. Clone Schedule - BspSchedule current_schedule(instance); - active_sched.write_schedule(current_schedule); + BspSchedule currentSchedule(instance); + activeSched.WriteSchedule(currentSchedule); // 2. Fresh Computation - kl_active_schedule_t kl_sched_fresh; - kl_sched_fresh.initialize(current_schedule); + KlActiveScheduleT klSchedFresh; + klSchedFresh.Initialize(currentSchedule); - max_comm_datastructure comm_ds_fresh; - comm_ds_fresh.initialize(kl_sched_fresh); + MaxCommDatastructure commDsFresh; + commDsFresh.Initialize(klSchedFresh); // Compute for all steps - unsigned max_step = current_schedule.numberOfSupersteps(); - comm_ds_fresh.compute_comm_datastructures(0, max_step > 0 ? max_step - 1 : 0); + unsigned maxStep = currentSchedule.NumberOfSupersteps(); + commDsFresh.ComputeCommDatastructures(0, maxStep > 0 ? maxStep - 1 : 0); - bool all_match = true; + bool allMatch = true; // std::cout << "\nValidating comm datastructures " << context << ":" << std::endl; // 3. Validate Comm Costs - for (unsigned step = 0; step < max_step; ++step) { - for (unsigned p = 0; p < instance.numberOfProcessors(); ++p) { - auto send_inc = comm_ds_incremental.step_proc_send(step, p); - auto send_fresh = comm_ds_fresh.step_proc_send(step, p); - auto recv_inc = comm_ds_incremental.step_proc_receive(step, p); - auto recv_fresh = comm_ds_fresh.step_proc_receive(step, p); - - if (std::abs(send_inc - send_fresh) > 1e-6 || std::abs(recv_inc - recv_fresh) > 1e-6) { - all_match = false; + for (unsigned step = 0; step < maxStep; ++step) { + for (unsigned p = 0; p < instance.NumberOfProcessors(); ++p) { + auto sendInc = commDsIncremental.StepProcSend(step, p); + auto sendFresh = commDsFresh.StepProcSend(step, p); + auto recvInc = commDsIncremental.StepProcReceive(step, p); + auto recvFresh = commDsFresh.StepProcReceive(step, p); + + if (std::abs(sendInc - sendFresh) > 1e-6 || std::abs(recvInc - recvFresh) > 1e-6) { + allMatch = false; std::cout << " MISMATCH at step " << step << " proc " << p << ":" << std::endl; - std::cout << " Incremental: send=" << send_inc << ", recv=" << recv_inc << std::endl; - std::cout << " Fresh: send=" << send_fresh << ", recv=" << recv_fresh << std::endl; + std::cout << " Incremental: send=" << sendInc << ", recv=" << recvInc << std::endl; + std::cout << " Fresh: send=" << sendFresh << ", recv=" << recvFresh << std::endl; } } } // 4. Validate Lambda Maps - for (const auto v : instance.vertices()) { - for (unsigned p = 0; p < instance.numberOfProcessors(); ++p) { - unsigned count_inc = 0; - if (comm_ds_incremental.node_lambda_map.has_proc_entry(v, p)) { - count_inc = comm_ds_incremental.node_lambda_map.get_proc_entry(v, p); + for (const auto v : instance.Vertices()) { + for (unsigned p = 0; p < instance.NumberOfProcessors(); ++p) { + unsigned countInc = 0; + if (commDsIncremental.nodeLambdaMap_.HasProcEntry(v, p)) { + countInc = commDsIncremental.nodeLambdaMap_.GetProcEntry(v, p); } - unsigned count_fresh = 0; - if (comm_ds_fresh.node_lambda_map.has_proc_entry(v, p)) { - count_fresh = comm_ds_fresh.node_lambda_map.get_proc_entry(v, p); + unsigned countFresh = 0; + if (commDsFresh.nodeLambdaMap_.HasProcEntry(v, p)) { + countFresh = commDsFresh.nodeLambdaMap_.GetProcEntry(v, p); } - if (count_inc != count_fresh) { - all_match = false; + if (countInc != countFresh) { + allMatch = false; std::cout << " LAMBDA MISMATCH at node " << v << " proc " << p << ":" << std::endl; - std::cout << " Incremental: " << count_inc << std::endl; - std::cout << " Fresh: " << count_fresh << std::endl; + std::cout << " Incremental: " << countInc << std::endl; + std::cout << " Fresh: " << countFresh << std::endl; } } } - return all_match; + return allMatch; } /** * Helper to validate affinity tables by comparing with freshly computed ones */ -template -bool validate_affinity_tables(kl_improver_test &kl_incremental, - const BspInstance &instance, - const std::string &context) { +template +bool ValidateAffinityTables(KlImproverTest &klIncremental, + const BspInstance &instance, + const std::string &context) { // 1. Get current schedule from incremental - BspSchedule current_schedule(instance); - kl_incremental.get_active_schedule_test(current_schedule); + BspSchedule currentSchedule(instance); + klIncremental.GetActiveScheduleTest(currentSchedule); // 2. Create fresh kl_improver and compute all affinities from scratch - kl_improver_test kl_fresh; - kl_fresh.setup_schedule(current_schedule); + KlImproverTest klFresh; + klFresh.SetupSchedule(currentSchedule); // Get selected nodes from incremental - std::vector> selected_nodes; + std::vector> selectedNodes; - const size_t active_count = kl_incremental.get_affinity_table().size(); - for (size_t i = 0; i < active_count; ++i) { - selected_nodes.push_back(kl_incremental.get_affinity_table().get_selected_nodes()[i]); + const size_t activeCount = klIncremental.GetAffinityTable().size(); + for (size_t i = 0; i < activeCount; ++i) { + selectedNodes.push_back(klIncremental.GetAffinityTable().GetSelectedNodes()[i]); } - std::cout << "\n [" << context << "] Validating " << selected_nodes.size() << " selected nodes: { "; - for (const auto n : selected_nodes) { + std::cout << "\n [" << context << "] Validating " << selectedNodes.size() << " selected nodes: { "; + for (const auto n : selectedNodes) { std::cout << n << " "; } std::cout << "}" << std::endl; // Compute affinities for all selected nodes - kl_fresh.insert_gain_heap_test(selected_nodes); + klFresh.InsertGainHeapTest(selectedNodes); - bool all_match = true; - const unsigned num_procs = instance.numberOfProcessors(); - const unsigned num_steps = kl_incremental.get_active_schedule().num_steps(); + bool allMatch = true; + const unsigned numProcs = instance.NumberOfProcessors(); + const unsigned numSteps = klIncremental.GetActiveSchedule().NumSteps(); // 3. Compare affinity tables for each selected node - for (const auto &node : selected_nodes) { - const auto &affinity_inc = kl_incremental.get_affinity_table().get_affinity_table(node); - const auto &affinity_fresh = kl_fresh.get_affinity_table().get_affinity_table(node); + for (const auto &node : selectedNodes) { + const auto &affinityInc = klIncremental.GetAffinityTable().GetAffinityTable(node); + const auto &affinityFresh = klFresh.GetAffinityTable().GetAffinityTable(node); - unsigned node_step = kl_incremental.get_active_schedule().assigned_superstep(node); + unsigned nodeStep = klIncremental.GetActiveSchedule().AssignedSuperstep(node); - for (unsigned p = 0; p < num_procs; ++p) { - if (p >= affinity_inc.size() || p >= affinity_fresh.size()) { + for (unsigned p = 0; p < numProcs; ++p) { + if (p >= affinityInc.size() || p >= affinityFresh.size()) { continue; } - for (unsigned idx = 0; idx < affinity_inc[p].size() && idx < affinity_fresh[p].size(); ++idx) { - int step_offset = static_cast(idx) - static_cast(window_size); - int target_step_signed = static_cast(node_step) + step_offset; + for (unsigned idx = 0; idx < affinityInc[p].size() && idx < affinityFresh[p].size(); ++idx) { + int stepOffset = static_cast(idx) - static_cast(windowSize); + int targetStepSigned = static_cast(nodeStep) + stepOffset; // Skip affinities for supersteps that don't exist - if (target_step_signed < 0 || target_step_signed >= static_cast(num_steps)) { + if (targetStepSigned < 0 || targetStepSigned >= static_cast(numSteps)) { continue; } - double val_inc = affinity_inc[p][idx]; - double val_fresh = affinity_fresh[p][idx]; + double valInc = affinityInc[p][idx]; + double valFresh = affinityFresh[p][idx]; - if (std::abs(val_inc - val_fresh) > 1e-4) { - all_match = false; + if (std::abs(valInc - valFresh) > 1e-4) { + allMatch = false; std::cout << " AFFINITY MISMATCH [" << context << "]: node=" << node << " to P" << p << " S" - << target_step_signed << " (offset=" << step_offset << ")" << std::endl; - std::cout << " Incremental: " << val_inc << std::endl; - std::cout << " Fresh: " << val_fresh << std::endl; - std::cout << " Difference: " << (val_inc - val_fresh) << std::endl; + << targetStepSigned << " (offset=" << stepOffset << ")" << std::endl; + std::cout << " Incremental: " << valInc << std::endl; + std::cout << " Fresh: " << valFresh << std::endl; + std::cout << " Difference: " << (valInc - valFresh) << std::endl; } } } } - return all_match; + return allMatch; } -BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) { - graph dag; +BOOST_AUTO_TEST_CASE(TestUpdateDatastructureAfterMove) { + Graph dag; // Create 6 vertices with specific comm weights - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 5, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 - dag.add_vertex(1, 2, 1); // 4 - dag.add_vertex(1, 1, 1); // 5 + dag.AddVertex(1, 10, 1); // 0 + dag.AddVertex(1, 1, 1); // 1 + dag.AddVertex(1, 5, 1); // 2 + dag.AddVertex(1, 1, 1); // 3 + dag.AddVertex(1, 2, 1); // 4 + dag.AddVertex(1, 1, 1); // 5 // Add edges - dag.add_edge(0, 1, 1); - dag.add_edge(2, 3, 1); - dag.add_edge(4, 5, 1); + dag.AddEdge(0, 1, 1); + dag.AddEdge(2, 3, 1); + dag.AddEdge(4, 5, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(3); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(3); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Schedule: // Proc 0: Node 0, 4, 5 // Proc 1: Node 1, 2 // Proc 2: Node 3 - schedule.setAssignedProcessors({0, 1, 1, 2, 0, 0}); + schedule.SetAssignedProcessors({0, 1, 1, 2, 0, 0}); // Steps: 0, 1, 0, 1, 0, 0 - schedule.setAssignedSupersteps({0, 1, 0, 1, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedSupersteps({0, 1, 0, 1, 0, 0}); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({0}); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({0}); + kl.RunInnerIterationTest(); - double after_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_tracked = kl.get_current_cost(); + double afterRecomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterTracked = kl.GetCurrentCost(); - BOOST_CHECK(validate_comm_datastructures( - kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_update_datastructure_after_move")); - BOOST_CHECK_CLOSE(after_recomputed, after_tracked, 0.00001); + BOOST_CHECK(ValidateCommDatastructures( + kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "test_update_datastructure_after_move")); + BOOST_CHECK_CLOSE(afterRecomputed, afterTracked, 0.00001); } -BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) { - graph dag; +BOOST_AUTO_TEST_CASE(TestMultipleSequentialMoves) { + Graph dag; // Create a linear chain: 0 -> 1 -> 2 -> 3 - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 8, 1); // 1 - dag.add_vertex(1, 6, 1); // 2 - dag.add_vertex(1, 4, 1); // 3 + dag.AddVertex(1, 10, 1); // 0 + dag.AddVertex(1, 8, 1); // 1 + dag.AddVertex(1, 6, 1); // 2 + dag.AddVertex(1, 4, 1); // 3 - dag.add_edge(0, 1, 1); - dag.add_edge(1, 2, 1); - dag.add_edge(2, 3, 1); + dag.AddEdge(0, 1, 1); + dag.AddEdge(1, 2, 1); + dag.AddEdge(2, 3, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(4); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(4); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1, 2, 3}); - schedule.setAssignedSupersteps({0, 0, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1, 2, 3}); + schedule.SetAssignedSupersteps({0, 0, 0, 0}); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({1}); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({1}); + kl.RunInnerIterationTest(); - double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures( - kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_1")); - BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); + double afterMove1Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove1Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures( + kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "test_multiple_sequential_moves_1")); + BOOST_CHECK_CLOSE(afterMove1Recomputed, afterMove1Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures( - kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_2")); - BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); + double afterMove2Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove2Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures( + kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "test_multiple_sequential_moves_2")); + BOOST_CHECK_CLOSE(afterMove2Recomputed, afterMove2Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures( - kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_3")); - BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); + double afterMove3Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove3Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures( + kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "test_multiple_sequential_moves_3")); + BOOST_CHECK_CLOSE(afterMove3Recomputed, afterMove3Tracked, 0.00001); // After: Node 0 has 3 local children // Send cost = 10 * 0 = 0 (all local) // Work cost 4 - BOOST_CHECK_CLOSE(after_move3_tracked, 4.0, 0.00001); + BOOST_CHECK_CLOSE(afterMove3Tracked, 4.0, 0.00001); } -BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) { - graph dag; +BOOST_AUTO_TEST_CASE(TestNodeWithMultipleChildren) { + Graph dag; // Tree structure: Node 0 has three children (1, 2, 3) - dag.add_vertex(1, 1, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 1, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 + dag.AddVertex(1, 1, 1); // 0 + dag.AddVertex(1, 1, 1); // 1 + dag.AddVertex(1, 1, 1); // 2 + dag.AddVertex(1, 1, 1); // 3 - dag.add_edge(0, 1, 1); - dag.add_edge(0, 2, 1); - dag.add_edge(0, 3, 1); + dag.AddEdge(0, 1, 1); + dag.AddEdge(0, 2, 1); + dag.AddEdge(0, 3, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(4); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(4); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1, 2, 3}); - schedule.setAssignedSupersteps({0, 0, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1, 2, 3}); + schedule.SetAssignedSupersteps({0, 0, 0, 0}); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({1}); - kl.get_comm_cost_f().compute_schedule_cost(); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({1}); + kl.GetCommCostF().ComputeScheduleCost(); + kl.RunInnerIterationTest(); - double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures( - kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children")); - BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); + double afterMove1Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove1Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures( + kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "test_node_with_multiple_children")); + BOOST_CHECK_CLOSE(afterMove1Recomputed, afterMove1Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures( - kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children_2")); - BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); + double afterMove2Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove2Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures( + kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "test_node_with_multiple_children_2")); + BOOST_CHECK_CLOSE(afterMove2Recomputed, afterMove2Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures( - kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children_3")); - BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); + double afterMove3Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove3Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures( + kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "test_node_with_multiple_children_3")); + BOOST_CHECK_CLOSE(afterMove3Recomputed, afterMove3Tracked, 0.00001); // After: Node 0 has 3 local children // Send cost = 10 * 0 = 0 (all local) // Work cost 4 - BOOST_CHECK_CLOSE(after_move3_tracked, 4.0, 0.00001); + BOOST_CHECK_CLOSE(afterMove3Tracked, 4.0, 0.00001); } -BOOST_AUTO_TEST_CASE(test_cross_step_moves) { - graph dag; +BOOST_AUTO_TEST_CASE(TestCrossStepMoves) { + Graph dag; // 0 -> 1 -> 2 - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 8, 1); // 1 - dag.add_vertex(1, 6, 1); // 2 + dag.AddVertex(1, 10, 1); // 0 + dag.AddVertex(1, 8, 1); // 1 + dag.AddVertex(1, 6, 1); // 2 - dag.add_edge(0, 1, 1); - dag.add_edge(1, 2, 1); + dag.AddEdge(0, 1, 1); + dag.AddEdge(1, 2, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(2); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1, 0}); - schedule.setAssignedSupersteps({0, 1, 2}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1, 0}); + schedule.SetAssignedSupersteps({0, 1, 2}); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({1}); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({1}); + kl.RunInnerIterationTest(); - double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures( - kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_cross_step_moves_1")); - BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); + double afterMove1Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove1Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "test_cross_step_moves_1")); + BOOST_CHECK_CLOSE(afterMove1Recomputed, afterMove1Tracked, 0.00001); } -BOOST_AUTO_TEST_CASE(test_complex_scenario) { +BOOST_AUTO_TEST_CASE(TestComplexScenario) { std::cout << "Test case complex scenario" << std::endl; - graph dag; - - const auto v1 = dag.add_vertex(2, 9, 2); - const auto v2 = dag.add_vertex(3, 8, 4); - const auto v3 = dag.add_vertex(4, 7, 3); - const auto v4 = dag.add_vertex(5, 6, 2); - const auto v5 = dag.add_vertex(6, 5, 6); - const auto v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); // v7 (index 6) - const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) - - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); - - BspArchitecture arch; - arch.setNumberOfProcessors(2); // P0, P1 - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); - - BspInstance instance(dag, arch); - BspSchedule schedule(instance); - - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); - - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; - - kl_improver_test kl; - kl.setup_schedule(schedule); - - kl.insert_gain_heap_test({v3, v1}); - kl.run_inner_iteration_test(); - - double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1")); - BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); - - kl.run_inner_iteration_test(); - - double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move2")); - BOOST_CHECK(validate_affinity_tables(kl, instance, "complex_move2")); - BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); - - kl.run_inner_iteration_test(); - - double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move3")); - BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); - - kl.run_inner_iteration_test(); - - double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move4_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move4")); - BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001); - - kl.run_inner_iteration_test(); - - double after_move5_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move5_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move5")); - BOOST_CHECK_CLOSE(after_move5_recomputed, after_move5_tracked, 0.00001); + Graph dag; + + const auto v1 = dag.AddVertex(2, 9, 2); + const auto v2 = dag.AddVertex(3, 8, 4); + const auto v3 = dag.AddVertex(4, 7, 3); + const auto v4 = dag.AddVertex(5, 6, 2); + const auto v5 = dag.AddVertex(6, 5, 6); + const auto v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); // v7 (index 6) + const auto v8 = dag.AddVertex(9, 2, 1); // v8 (index 7) + + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); + + BspArchitecture arch; + arch.SetNumberOfProcessors(2); // P0, P1 + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); + + BspInstance instance(dag, arch); + BspSchedule schedule(instance); + + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.UpdateNumberOfSupersteps(); + + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; + + KlImproverTest kl; + kl.SetupSchedule(schedule); + + kl.InsertGainHeapTest({v3, v1}); + kl.RunInnerIterationTest(); + + double afterMove1Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove1Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "complex_move1")); + BOOST_CHECK_CLOSE(afterMove1Recomputed, afterMove1Tracked, 0.00001); + + kl.RunInnerIterationTest(); + + double afterMove2Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove2Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "complex_move2")); + BOOST_CHECK(ValidateAffinityTables(kl, instance, "complex_move2")); + BOOST_CHECK_CLOSE(afterMove2Recomputed, afterMove2Tracked, 0.00001); + + kl.RunInnerIterationTest(); + + double afterMove3Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove3Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "complex_move3")); + BOOST_CHECK_CLOSE(afterMove3Recomputed, afterMove3Tracked, 0.00001); + + kl.RunInnerIterationTest(); + + double afterMove4Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove4Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "complex_move4")); + BOOST_CHECK_CLOSE(afterMove4Recomputed, afterMove4Tracked, 0.00001); + + kl.RunInnerIterationTest(); + + double afterMove5Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove5Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "complex_move5")); + BOOST_CHECK_CLOSE(afterMove5Recomputed, afterMove5Tracked, 0.00001); } -BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { - graph dag; +BOOST_AUTO_TEST_CASE(TestComplexScenarioOnlyCompute) { + Graph dag; - const auto v1 = dag.add_vertex(2, 9, 2); - const auto v2 = dag.add_vertex(3, 8, 4); - const auto v3 = dag.add_vertex(4, 7, 3); - const auto v4 = dag.add_vertex(5, 6, 2); - const auto v5 = dag.add_vertex(6, 5, 6); - const auto v6 = dag.add_vertex(7, 4, 2); - const auto v7 = dag.add_vertex(8, 3, 4); // v7 (index 6) - const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) + const auto v1 = dag.AddVertex(2, 9, 2); + const auto v2 = dag.AddVertex(3, 8, 4); + const auto v3 = dag.AddVertex(4, 7, 3); + const auto v4 = dag.AddVertex(5, 6, 2); + const auto v5 = dag.AddVertex(6, 5, 6); + const auto v6 = dag.AddVertex(7, 4, 2); + const auto v7 = dag.AddVertex(8, 3, 4); // v7 (index 6) + const auto v8 = dag.AddVertex(9, 2, 1); // v8 (index 7) - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; - arch.setNumberOfProcessors(2); // P0, P1 - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); // P0, P1 + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({v1}); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({v1}); + kl.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1")); - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "complex_move1")); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - kl_improver_test kl2; - kl2.setup_schedule(schedule); + KlImproverTest kl2; + kl2.SetupSchedule(schedule); - kl2.insert_gain_heap_test({v2}); - kl2.run_inner_iteration_test(); + kl2.InsertGainHeapTest({v2}); + kl2.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance, "complex_move2")); - BOOST_CHECK_CLOSE(kl2.get_comm_cost_f().compute_schedule_cost_test(), kl2.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl2.GetCommCostF().commDs_, kl2.GetActiveSchedule(), instance, "complex_move2")); + BOOST_CHECK_CLOSE(kl2.GetCommCostF().ComputeScheduleCostTest(), kl2.GetCurrentCost(), 0.00001); - kl_improver_test kl3; - kl3.setup_schedule(schedule); + KlImproverTest kl3; + kl3.SetupSchedule(schedule); - kl3.insert_gain_heap_test({v3}); - kl3.run_inner_iteration_test(); + kl3.InsertGainHeapTest({v3}); + kl3.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance, "complex_move3")); - BOOST_CHECK_CLOSE(kl3.get_comm_cost_f().compute_schedule_cost_test(), kl3.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl3.GetCommCostF().commDs_, kl3.GetActiveSchedule(), instance, "complex_move3")); + BOOST_CHECK_CLOSE(kl3.GetCommCostF().ComputeScheduleCostTest(), kl3.GetCurrentCost(), 0.00001); - kl_improver_test kl4; - kl4.setup_schedule(schedule); + KlImproverTest kl4; + kl4.SetupSchedule(schedule); - kl4.insert_gain_heap_test({v4}); - kl4.run_inner_iteration_test(); + kl4.InsertGainHeapTest({v4}); + kl4.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance, "complex_move4")); - BOOST_CHECK_CLOSE(kl4.get_comm_cost_f().compute_schedule_cost_test(), kl4.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl4.GetCommCostF().commDs_, kl4.GetActiveSchedule(), instance, "complex_move4")); + BOOST_CHECK_CLOSE(kl4.GetCommCostF().ComputeScheduleCostTest(), kl4.GetCurrentCost(), 0.00001); - kl_improver_test kl5; - kl5.setup_schedule(schedule); + KlImproverTest kl5; + kl5.SetupSchedule(schedule); - kl5.insert_gain_heap_test({v5}); - kl5.run_inner_iteration_test(); + kl5.InsertGainHeapTest({v5}); + kl5.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance, "complex_move5")); - BOOST_CHECK_CLOSE(kl5.get_comm_cost_f().compute_schedule_cost_test(), kl5.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl5.GetCommCostF().commDs_, kl5.GetActiveSchedule(), instance, "complex_move5")); + BOOST_CHECK_CLOSE(kl5.GetCommCostF().ComputeScheduleCostTest(), kl5.GetCurrentCost(), 0.00001); - kl_improver_test kl6; - kl6.setup_schedule(schedule); + KlImproverTest kl6; + kl6.SetupSchedule(schedule); - kl6.insert_gain_heap_test({v6}); - kl6.run_inner_iteration_test(); + kl6.InsertGainHeapTest({v6}); + kl6.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance, "complex_move6")); - BOOST_CHECK_CLOSE(kl6.get_comm_cost_f().compute_schedule_cost_test(), kl6.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl6.GetCommCostF().commDs_, kl6.GetActiveSchedule(), instance, "complex_move6")); + BOOST_CHECK_CLOSE(kl6.GetCommCostF().ComputeScheduleCostTest(), kl6.GetCurrentCost(), 0.00001); - kl_improver_test kl7; - kl7.setup_schedule(schedule); + KlImproverTest kl7; + kl7.SetupSchedule(schedule); - kl7.insert_gain_heap_test({v7}); - kl7.run_inner_iteration_test(); + kl7.InsertGainHeapTest({v7}); + kl7.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance, "complex_move7")); - BOOST_CHECK_CLOSE(kl7.get_comm_cost_f().compute_schedule_cost_test(), kl7.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl7.GetCommCostF().commDs_, kl7.GetActiveSchedule(), instance, "complex_move7")); + BOOST_CHECK_CLOSE(kl7.GetCommCostF().ComputeScheduleCostTest(), kl7.GetCurrentCost(), 0.00001); - kl_improver_test kl8; - kl8.setup_schedule(schedule); + KlImproverTest kl8; + kl8.SetupSchedule(schedule); - kl8.insert_gain_heap_test({v8}); - kl8.run_inner_iteration_test(); + kl8.InsertGainHeapTest({v8}); + kl8.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance, "complex_move8")); - BOOST_CHECK_CLOSE(kl8.get_comm_cost_f().compute_schedule_cost_test(), kl8.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl8.GetCommCostF().commDs_, kl8.GetActiveSchedule(), instance, "complex_move8")); + BOOST_CHECK_CLOSE(kl8.GetCommCostF().ComputeScheduleCostTest(), kl8.GetCurrentCost(), 0.00001); } -BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { - graph dag; +BOOST_AUTO_TEST_CASE(TestComplexScenarioOnlyCompute2) { + Graph dag; - const auto v1 = dag.add_vertex(2, 9, 2); - const auto v2 = dag.add_vertex(3, 8, 4); - const auto v3 = dag.add_vertex(4, 7, 3); - const auto v4 = dag.add_vertex(5, 6, 2); - const auto v5 = dag.add_vertex(6, 5, 6); - const auto v6 = dag.add_vertex(7, 4, 2); - const auto v7 = dag.add_vertex(8, 3, 4); // v7 (index 6) - const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) + const auto v1 = dag.AddVertex(2, 9, 2); + const auto v2 = dag.AddVertex(3, 8, 4); + const auto v3 = dag.AddVertex(4, 7, 3); + const auto v4 = dag.AddVertex(5, 6, 2); + const auto v5 = dag.AddVertex(6, 5, 6); + const auto v6 = dag.AddVertex(7, 4, 2); + const auto v7 = dag.AddVertex(8, 3, 4); // v7 (index 6) + const auto v8 = dag.AddVertex(9, 2, 1); // v8 (index 7) - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v5, 2); - dag.add_edge(v1, v6, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v2, v6, 2); - dag.add_edge(v2, v7, 2); - dag.add_edge(v2, v8, 2); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v3, v7, 2); - dag.add_edge(v3, v8, 2); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); - dag.add_edge(v5, v7, 2); - dag.add_edge(v6, v7, 2); - dag.add_edge(v7, v8, 2); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v5, 2); + dag.AddEdge(v1, v6, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v2, v6, 2); + dag.AddEdge(v2, v7, 2); + dag.AddEdge(v2, v8, 2); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v3, v7, 2); + dag.AddEdge(v3, v8, 2); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); + dag.AddEdge(v5, v7, 2); + dag.AddEdge(v6, v7, 2); + dag.AddEdge(v7, v8, 2); - BspArchitecture arch; - arch.setNumberOfProcessors(2); // P0, P1 - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); // P0, P1 + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({v1}); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({v1}); + kl.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1")); - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "complex_move1")); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - kl_improver_test kl2; - kl2.setup_schedule(schedule); + KlImproverTest kl2; + kl2.SetupSchedule(schedule); - kl2.insert_gain_heap_test({v2}); - kl2.run_inner_iteration_test(); + kl2.InsertGainHeapTest({v2}); + kl2.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance, "complex_move2")); - BOOST_CHECK_CLOSE(kl2.get_comm_cost_f().compute_schedule_cost_test(), kl2.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl2.GetCommCostF().commDs_, kl2.GetActiveSchedule(), instance, "complex_move2")); + BOOST_CHECK_CLOSE(kl2.GetCommCostF().ComputeScheduleCostTest(), kl2.GetCurrentCost(), 0.00001); - kl_improver_test kl3; - kl3.setup_schedule(schedule); + KlImproverTest kl3; + kl3.SetupSchedule(schedule); - kl3.insert_gain_heap_test({v3}); - kl3.run_inner_iteration_test(); + kl3.InsertGainHeapTest({v3}); + kl3.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance, "complex_move3")); - BOOST_CHECK_CLOSE(kl3.get_comm_cost_f().compute_schedule_cost_test(), kl3.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl3.GetCommCostF().commDs_, kl3.GetActiveSchedule(), instance, "complex_move3")); + BOOST_CHECK_CLOSE(kl3.GetCommCostF().ComputeScheduleCostTest(), kl3.GetCurrentCost(), 0.00001); - kl_improver_test kl4; - kl4.setup_schedule(schedule); + KlImproverTest kl4; + kl4.SetupSchedule(schedule); - kl4.insert_gain_heap_test({v4}); - kl4.run_inner_iteration_test(); + kl4.InsertGainHeapTest({v4}); + kl4.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance, "complex_move4")); - BOOST_CHECK_CLOSE(kl4.get_comm_cost_f().compute_schedule_cost_test(), kl4.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl4.GetCommCostF().commDs_, kl4.GetActiveSchedule(), instance, "complex_move4")); + BOOST_CHECK_CLOSE(kl4.GetCommCostF().ComputeScheduleCostTest(), kl4.GetCurrentCost(), 0.00001); - kl_improver_test kl5; - kl5.setup_schedule(schedule); + KlImproverTest kl5; + kl5.SetupSchedule(schedule); - kl5.insert_gain_heap_test({v5}); - kl5.run_inner_iteration_test(); + kl5.InsertGainHeapTest({v5}); + kl5.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance, "complex_move5")); - BOOST_CHECK_CLOSE(kl5.get_comm_cost_f().compute_schedule_cost_test(), kl5.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl5.GetCommCostF().commDs_, kl5.GetActiveSchedule(), instance, "complex_move5")); + BOOST_CHECK_CLOSE(kl5.GetCommCostF().ComputeScheduleCostTest(), kl5.GetCurrentCost(), 0.00001); - kl_improver_test kl6; - kl6.setup_schedule(schedule); + KlImproverTest kl6; + kl6.SetupSchedule(schedule); - kl6.insert_gain_heap_test({v6}); - kl6.run_inner_iteration_test(); + kl6.InsertGainHeapTest({v6}); + kl6.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance, "complex_move6")); - BOOST_CHECK_CLOSE(kl6.get_comm_cost_f().compute_schedule_cost_test(), kl6.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl6.GetCommCostF().commDs_, kl6.GetActiveSchedule(), instance, "complex_move6")); + BOOST_CHECK_CLOSE(kl6.GetCommCostF().ComputeScheduleCostTest(), kl6.GetCurrentCost(), 0.00001); - kl_improver_test kl7; - kl7.setup_schedule(schedule); + KlImproverTest kl7; + kl7.SetupSchedule(schedule); - kl7.insert_gain_heap_test({v7}); - kl7.run_inner_iteration_test(); + kl7.InsertGainHeapTest({v7}); + kl7.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance, "complex_move7")); - BOOST_CHECK_CLOSE(kl7.get_comm_cost_f().compute_schedule_cost_test(), kl7.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl7.GetCommCostF().commDs_, kl7.GetActiveSchedule(), instance, "complex_move7")); + BOOST_CHECK_CLOSE(kl7.GetCommCostF().ComputeScheduleCostTest(), kl7.GetCurrentCost(), 0.00001); - kl_improver_test kl8; - kl8.setup_schedule(schedule); + KlImproverTest kl8; + kl8.SetupSchedule(schedule); - kl8.insert_gain_heap_test({v8}); - kl8.run_inner_iteration_test(); + kl8.InsertGainHeapTest({v8}); + kl8.RunInnerIterationTest(); - BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance, "complex_move8")); - BOOST_CHECK_CLOSE(kl8.get_comm_cost_f().compute_schedule_cost_test(), kl8.get_current_cost(), 0.00001); + BOOST_CHECK(ValidateCommDatastructures(kl8.GetCommCostF().commDs_, kl8.GetActiveSchedule(), instance, "complex_move8")); + BOOST_CHECK_CLOSE(kl8.GetCommCostF().ComputeScheduleCostTest(), kl8.GetCurrentCost(), 0.00001); } -BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) { +BOOST_AUTO_TEST_CASE(TestGridGraphComplexMoves) { // Construct 5x5 Grid Graph (25 nodes, indices 0-24) - graph dag = osp::construct_grid_dag(5, 5); + Graph dag = osp::ConstructGridDag(5, 5); - BspArchitecture arch; - arch.setNumberOfProcessors(4); // P0..P3 - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(4); // P0..P3 + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Assign Processors and Supersteps std::vector procs(25); @@ -752,58 +751,58 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) { procs[7] = 3; steps[7] = 1; - schedule.setAssignedProcessors(procs); - schedule.setAssignedSupersteps(steps); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors(procs); + schedule.SetAssignedSupersteps(steps); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({12, 8, 7}); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({12, 8, 7}); + kl.RunInnerIterationTest(); - double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move1")); - BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); + double afterMove1Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove1Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "grid_move1")); + BOOST_CHECK_CLOSE(afterMove1Recomputed, afterMove1Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move2")); - BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); + double afterMove2Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove2Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "grid_move2")); + BOOST_CHECK_CLOSE(afterMove2Recomputed, afterMove2Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move3")); - BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); + double afterMove3Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove3Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "grid_move3")); + BOOST_CHECK_CLOSE(afterMove3Recomputed, afterMove3Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move4_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move4")); - BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001); + double afterMove4Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove4Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "grid_move4")); + BOOST_CHECK_CLOSE(afterMove4Recomputed, afterMove4Tracked, 0.00001); } -BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { +BOOST_AUTO_TEST_CASE(TestButterflyGraphMoves) { // Stages=2 -> 3 levels of 4 nodes each = 12 nodes. // Level 0: 0-3. Level 1: 4-7. Level 2: 8-11. - graph dag = osp::construct_butterfly_dag(2); + Graph dag = osp::ConstructButterflyDag(2); - BspArchitecture arch; - arch.setNumberOfProcessors(2); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Assign: // Level 0: P0, Step 0 @@ -824,58 +823,58 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { } } - schedule.setAssignedProcessors(procs); - schedule.setAssignedSupersteps(steps); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors(procs); + schedule.SetAssignedSupersteps(steps); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({4, 6, 0}); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({4, 6, 0}); + kl.RunInnerIterationTest(); - double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move1")); - BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); + double afterMove1Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove1Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "butterfly_move1")); + BOOST_CHECK_CLOSE(afterMove1Recomputed, afterMove1Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move2")); - BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); + double afterMove2Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove2Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "butterfly_move2")); + BOOST_CHECK_CLOSE(afterMove2Recomputed, afterMove2Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move3")); - BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); + double afterMove3Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove3Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "butterfly_move3")); + BOOST_CHECK_CLOSE(afterMove3Recomputed, afterMove3Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move4_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move4")); - BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001); + double afterMove4Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove4Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "butterfly_move4")); + BOOST_CHECK_CLOSE(afterMove4Recomputed, afterMove4Tracked, 0.00001); } -BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { +BOOST_AUTO_TEST_CASE(TestLadderGraphMoves) { // Ladder with 5 rungs -> 6 pairs of nodes = 12 nodes. // Pairs: (0,1), (2,3), ... (10,11). - graph dag = osp::construct_ladder_dag(5); + Graph dag = osp::ConstructLadderDag(5); - BspArchitecture arch; - arch.setNumberOfProcessors(2); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Assign: // Even nodes (Left rail): P0 @@ -890,42 +889,42 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { steps[2 * i + 1] = i; } - schedule.setAssignedProcessors(procs); - schedule.setAssignedSupersteps(steps); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors(procs); + schedule.SetAssignedSupersteps(steps); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; - kl.setup_schedule(schedule); + KlImproverTest kl; + kl.SetupSchedule(schedule); - kl.insert_gain_heap_test({1, 3, 0, 2}); - kl.run_inner_iteration_test(); + kl.InsertGainHeapTest({1, 3, 0, 2}); + kl.RunInnerIterationTest(); - double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move1")); - BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); + double afterMove1Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove1Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "ladder_move1")); + BOOST_CHECK_CLOSE(afterMove1Recomputed, afterMove1Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move2")); - BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); + double afterMove2Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove2Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "ladder_move2")); + BOOST_CHECK_CLOSE(afterMove2Recomputed, afterMove2Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move3")); - BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); + double afterMove3Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove3Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "ladder_move3")); + BOOST_CHECK_CLOSE(afterMove3Recomputed, afterMove3Tracked, 0.00001); - kl.run_inner_iteration_test(); + kl.RunInnerIterationTest(); - double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double after_move4_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move4")); - BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001); + double afterMove4Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double afterMove4Tracked = kl.GetCurrentCost(); + BOOST_CHECK(ValidateCommDatastructures(kl.GetCommCostF().commDs_, kl.GetActiveSchedule(), instance, "ladder_move4")); + BOOST_CHECK_CLOSE(afterMove4Recomputed, afterMove4Tracked, 0.00001); } diff --git a/tests/kl_bsp_cost.cpp b/tests/kl_bsp_cost.cpp index 01779f45..49c50201 100644 --- a/tests/kl_bsp_cost.cpp +++ b/tests/kl_bsp_cost.cpp @@ -20,199 +20,199 @@ limitations under the License. #include #include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_bsp_comm_cost.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/max_comm_datastructure.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_active_schedule.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_util.hpp" #include "osp/concepts/graph_traits.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "test_graphs.hpp" using namespace osp; -using graph = computational_dag_edge_idx_vector_impl_def_int_t; -using kl_active_schedule_t = kl_active_schedule; +using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; +using KlActiveScheduleT = KlActiveSchedule; -BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) { - graph dag; +BOOST_AUTO_TEST_CASE(TestArrangeSuperstepCommData) { + Graph dag; - dag.add_vertex(1, 1, 1); - dag.add_vertex(1, 1, 1); - dag.add_vertex(1, 1, 1); - dag.add_vertex(1, 1, 1); + dag.AddVertex(1, 1, 1); + dag.AddVertex(1, 1, 1); + dag.AddVertex(1, 1, 1); + dag.AddVertex(1, 1, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(4); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(4); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Initialize schedule with 1 step - schedule.setAssignedProcessors({0, 1, 2, 3}); - schedule.setAssignedSupersteps({0, 0, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1, 2, 3}); + schedule.SetAssignedSupersteps({0, 0, 0, 0}); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); unsigned step = 0; // Case 1: Unique Max - comm_ds.step_proc_send(step, 0) = 10; - comm_ds.step_proc_send(step, 1) = 5; - comm_ds.step_proc_send(step, 2) = 2; - comm_ds.step_proc_send(step, 3) = 1; + commDs.StepProcSend(step, 0) = 10; + commDs.StepProcSend(step, 1) = 5; + commDs.StepProcSend(step, 2) = 2; + commDs.StepProcSend(step, 3) = 1; - comm_ds.step_proc_receive(step, 0) = 8; - comm_ds.step_proc_receive(step, 1) = 8; - comm_ds.step_proc_receive(step, 2) = 2; - comm_ds.step_proc_receive(step, 3) = 1; + commDs.StepProcReceive(step, 0) = 8; + commDs.StepProcReceive(step, 1) = 8; + commDs.StepProcReceive(step, 2) = 2; + commDs.StepProcReceive(step, 3) = 1; - comm_ds.arrange_superstep_comm_data(step); + commDs.ArrangeSuperstepCommData(step); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 1); // Only proc 0 has 10 - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 8); // Next highest is 8 (from recv) + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 10); + BOOST_CHECK_EQUAL(commDs.StepMaxCommCount(step), 1); // Only proc 0 has 10 + BOOST_CHECK_EQUAL(commDs.StepSecondMaxComm(step), 8); // Next highest is 8 (from recv) // Case 2: Shared Max - comm_ds.reset_superstep(step); - comm_ds.step_proc_send(step, 0) = 10; // Need to re-set this as reset clears it - comm_ds.step_proc_send(step, 1) = 10; - comm_ds.step_proc_send(step, 2) = 2; - comm_ds.step_proc_send(step, 3) = 1; - - comm_ds.step_proc_receive(step, 0) = 5; - comm_ds.step_proc_receive(step, 1) = 5; - comm_ds.step_proc_receive(step, 2) = 2; - comm_ds.step_proc_receive(step, 3) = 1; - comm_ds.arrange_superstep_comm_data(step); - - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 2); // Proc 0 and 1 - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 5); // Next highest is 5 (from recv) + commDs.ResetSuperstep(step); + commDs.StepProcSend(step, 0) = 10; // Need to re-set this as reset clears it + commDs.StepProcSend(step, 1) = 10; + commDs.StepProcSend(step, 2) = 2; + commDs.StepProcSend(step, 3) = 1; + + commDs.StepProcReceive(step, 0) = 5; + commDs.StepProcReceive(step, 1) = 5; + commDs.StepProcReceive(step, 2) = 2; + commDs.StepProcReceive(step, 3) = 1; + commDs.ArrangeSuperstepCommData(step); + + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 10); + BOOST_CHECK_EQUAL(commDs.StepMaxCommCount(step), 2); // Proc 0 and 1 + BOOST_CHECK_EQUAL(commDs.StepSecondMaxComm(step), 5); // Next highest is 5 (from recv) // Case 3: Max in Recv - comm_ds.reset_superstep(step); + commDs.ResetSuperstep(step); - comm_ds.step_proc_send(step, 0) = 5; - comm_ds.step_proc_send(step, 1) = 5; - comm_ds.step_proc_send(step, 2) = 2; - comm_ds.step_proc_send(step, 3) = 1; + commDs.StepProcSend(step, 0) = 5; + commDs.StepProcSend(step, 1) = 5; + commDs.StepProcSend(step, 2) = 2; + commDs.StepProcSend(step, 3) = 1; - comm_ds.step_proc_receive(step, 0) = 12; - comm_ds.step_proc_receive(step, 1) = 8; - comm_ds.step_proc_receive(step, 2) = 2; - comm_ds.step_proc_receive(step, 3) = 1; - comm_ds.arrange_superstep_comm_data(step); + commDs.StepProcReceive(step, 0) = 12; + commDs.StepProcReceive(step, 1) = 8; + commDs.StepProcReceive(step, 2) = 2; + commDs.StepProcReceive(step, 3) = 1; + commDs.ArrangeSuperstepCommData(step); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 12); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 1); - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 8); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 12); + BOOST_CHECK_EQUAL(commDs.StepMaxCommCount(step), 1); + BOOST_CHECK_EQUAL(commDs.StepSecondMaxComm(step), 8); // Case 4: All same - comm_ds.reset_superstep(step); + commDs.ResetSuperstep(step); // Send: 10, 10, 10, 10 // Recv: 10, 10, 10, 10 for (unsigned i = 0; i < 4; ++i) { - comm_ds.step_proc_send(step, i) = 10; - comm_ds.step_proc_receive(step, i) = 10; + commDs.StepProcSend(step, i) = 10; + commDs.StepProcReceive(step, i) = 10; } - comm_ds.arrange_superstep_comm_data(step); + commDs.ArrangeSuperstepCommData(step); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 8); // 4 sends + 4 recvs - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 0); // If all removed, 0. + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 10); + BOOST_CHECK_EQUAL(commDs.StepMaxCommCount(step), 8); // 4 sends + 4 recvs + BOOST_CHECK_EQUAL(commDs.StepSecondMaxComm(step), 0); // If all removed, 0. // Case 5: Max removed, second max is from same type (Send) - comm_ds.reset_superstep(step); - comm_ds.step_proc_send(step, 0) = 10; - comm_ds.step_proc_send(step, 1) = 8; - comm_ds.step_proc_send(step, 2) = 2; - comm_ds.step_proc_send(step, 3) = 1; + commDs.ResetSuperstep(step); + commDs.StepProcSend(step, 0) = 10; + commDs.StepProcSend(step, 1) = 8; + commDs.StepProcSend(step, 2) = 2; + commDs.StepProcSend(step, 3) = 1; for (unsigned i = 0; i < 4; ++i) { - comm_ds.step_proc_receive(step, i) = 5; + commDs.StepProcReceive(step, i) = 5; } - comm_ds.arrange_superstep_comm_data(step); + commDs.ArrangeSuperstepCommData(step); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 1); - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 8); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 10); + BOOST_CHECK_EQUAL(commDs.StepMaxCommCount(step), 1); + BOOST_CHECK_EQUAL(commDs.StepSecondMaxComm(step), 8); // Case 6: Max removed, second max is from other type (Recv) - comm_ds.reset_superstep(step); + commDs.ResetSuperstep(step); - comm_ds.step_proc_send(step, 0) = 10; - comm_ds.step_proc_send(step, 1) = 4; - comm_ds.step_proc_send(step, 2) = 2; - comm_ds.step_proc_send(step, 3) = 1; + commDs.StepProcSend(step, 0) = 10; + commDs.StepProcSend(step, 1) = 4; + commDs.StepProcSend(step, 2) = 2; + commDs.StepProcSend(step, 3) = 1; - comm_ds.step_proc_receive(step, 0) = 8; - comm_ds.step_proc_receive(step, 1) = 5; - comm_ds.step_proc_receive(step, 2) = 2; - comm_ds.step_proc_receive(step, 3) = 1; + commDs.StepProcReceive(step, 0) = 8; + commDs.StepProcReceive(step, 1) = 5; + commDs.StepProcReceive(step, 2) = 2; + commDs.StepProcReceive(step, 3) = 1; - comm_ds.arrange_superstep_comm_data(step); + commDs.ArrangeSuperstepCommData(step); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 1); - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 8); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 10); + BOOST_CHECK_EQUAL(commDs.StepMaxCommCount(step), 1); + BOOST_CHECK_EQUAL(commDs.StepSecondMaxComm(step), 8); } -BOOST_AUTO_TEST_CASE(test_compute_comm_datastructures) { - graph dag; +BOOST_AUTO_TEST_CASE(TestComputeCommDatastructures) { + Graph dag; // Create 6 vertices with specific comm weights // Node 0: weight 10 (sends to 1) - dag.add_vertex(1, 10, 1); + dag.AddVertex(1, 10, 1); // Node 1: weight 1 - dag.add_vertex(1, 1, 1); + dag.AddVertex(1, 1, 1); // Node 2: weight 5 (sends to 3) - dag.add_vertex(1, 5, 1); + dag.AddVertex(1, 5, 1); // Node 3: weight 1 - dag.add_vertex(1, 1, 1); + dag.AddVertex(1, 1, 1); // Node 4: weight 2 (local to 5) - dag.add_vertex(1, 2, 1); + dag.AddVertex(1, 2, 1); // Node 5: weight 1 - dag.add_vertex(1, 1, 1); + dag.AddVertex(1, 1, 1); // Add edges // 0 -> 1 - dag.add_edge(0, 1, 1); // Edge weight ignored by max_comm_datastructure + dag.AddEdge(0, 1, 1); // Edge weight ignored by max_comm_datastructure // 2 -> 3 - dag.add_edge(2, 3, 1); + dag.AddEdge(2, 3, 1); // 4 -> 5 - dag.add_edge(4, 5, 1); + dag.AddEdge(4, 5, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(3); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(3); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Schedule: // Proc 0: Node 0, 4, 5 // Proc 1: Node 1, 2 // Proc 2: Node 3 - schedule.setAssignedProcessors({0, 1, 1, 2, 0, 0}); - schedule.setAssignedSupersteps({0, 1, 0, 1, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1, 1, 2, 0, 0}); + schedule.SetAssignedSupersteps({0, 1, 0, 1, 0, 0}); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); // Compute for steps 0 and 1 - comm_ds.compute_comm_datastructures(0, 1); + commDs.ComputeCommDatastructures(0, 1); unsigned step = 0; @@ -224,13 +224,13 @@ BOOST_AUTO_TEST_CASE(test_compute_comm_datastructures) { // Proc 2 sends: 0 // Proc 0 receives: 0 - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 1), 5); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 2), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 0), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 1), 5); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 2), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 1), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 2), 5); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 1), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 2), 5); // Max Comm Calculation Step 0 // Send Max: 10 (P0) @@ -239,144 +239,144 @@ BOOST_AUTO_TEST_CASE(test_compute_comm_datastructures) { // Count: 2 (P0 send, P1 recv) // Second Max: 5 (P1 send, P2 recv) - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 2); - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 5); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 10); + BOOST_CHECK_EQUAL(commDs.StepMaxCommCount(step), 2); + BOOST_CHECK_EQUAL(commDs.StepSecondMaxComm(step), 5); // Verify Step 1 (Should be empty as Nodes 1 and 3 are leaves) step = 1; - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 2), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 2), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 2), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 2), 0); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 0); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 0); } /** * Helper to validate comm datastructures by comparing with freshly computed ones */ template -bool validate_comm_datastructures(const max_comm_datastructure &comm_ds_incremental, - kl_active_schedule_t &active_sched, - const BspInstance &instance, - const std::string &context) { +bool ValidateCommDatastructures(const MaxCommDatastructure &commDsIncremental, + KlActiveScheduleT &activeSched, + const BspInstance &instance, + const std::string &context) { // 1. Clone Schedule - BspSchedule current_schedule(instance); - active_sched.write_schedule(current_schedule); + BspSchedule currentSchedule(instance); + activeSched.WriteSchedule(currentSchedule); // 2. Fresh Computation - kl_active_schedule_t kl_sched_fresh; - kl_sched_fresh.initialize(current_schedule); + KlActiveScheduleT klSchedFresh; + klSchedFresh.Initialize(currentSchedule); - max_comm_datastructure comm_ds_fresh; - comm_ds_fresh.initialize(kl_sched_fresh); + MaxCommDatastructure commDsFresh; + commDsFresh.Initialize(klSchedFresh); // Compute for all steps - unsigned max_step = current_schedule.numberOfSupersteps(); - comm_ds_fresh.compute_comm_datastructures(0, max_step > 0 ? max_step - 1 : 0); + unsigned maxStep = currentSchedule.NumberOfSupersteps(); + commDsFresh.ComputeCommDatastructures(0, maxStep > 0 ? maxStep - 1 : 0); - bool all_match = true; + bool allMatch = true; // std::cout << "\nValidating comm datastructures " << context << ":" << std::endl; // 3. Validate Comm Costs - for (unsigned step = 0; step < max_step; ++step) { - for (unsigned p = 0; p < instance.numberOfProcessors(); ++p) { - auto send_inc = comm_ds_incremental.step_proc_send(step, p); - auto send_fresh = comm_ds_fresh.step_proc_send(step, p); - auto recv_inc = comm_ds_incremental.step_proc_receive(step, p); - auto recv_fresh = comm_ds_fresh.step_proc_receive(step, p); - - if (std::abs(send_inc - send_fresh) > 1e-6 || std::abs(recv_inc - recv_fresh) > 1e-6) { - all_match = false; + for (unsigned step = 0; step < maxStep; ++step) { + for (unsigned p = 0; p < instance.NumberOfProcessors(); ++p) { + auto sendInc = commDsIncremental.StepProcSend(step, p); + auto sendFresh = commDsFresh.StepProcSend(step, p); + auto recvInc = commDsIncremental.StepProcReceive(step, p); + auto recvFresh = commDsFresh.StepProcReceive(step, p); + + if (std::abs(sendInc - sendFresh) > 1e-6 || std::abs(recvInc - recvFresh) > 1e-6) { + allMatch = false; std::cout << " MISMATCH at step " << step << " proc " << p << ":" << std::endl; - std::cout << " Incremental: send=" << send_inc << ", recv=" << recv_inc << std::endl; - std::cout << " Fresh: send=" << send_fresh << ", recv=" << recv_fresh << std::endl; + std::cout << " Incremental: send=" << sendInc << ", recv=" << recvInc << std::endl; + std::cout << " Fresh: send=" << sendFresh << ", recv=" << recvFresh << std::endl; } } } // 4. Validate Lambda Maps - for (const auto v : instance.vertices()) { - for (unsigned p = 0; p < instance.numberOfProcessors(); ++p) { - unsigned count_inc = 0; - if (comm_ds_incremental.node_lambda_map.has_proc_entry(v, p)) { - count_inc = comm_ds_incremental.node_lambda_map.get_proc_entry(v, p); + for (const auto v : instance.Vertices()) { + for (unsigned p = 0; p < instance.NumberOfProcessors(); ++p) { + unsigned countInc = 0; + if (commDsIncremental.nodeLambdaMap_.HasProcEntry(v, p)) { + countInc = commDsIncremental.nodeLambdaMap_.GetProcEntry(v, p); } - unsigned count_fresh = 0; - if (comm_ds_fresh.node_lambda_map.has_proc_entry(v, p)) { - count_fresh = comm_ds_fresh.node_lambda_map.get_proc_entry(v, p); + unsigned countFresh = 0; + if (commDsFresh.nodeLambdaMap_.HasProcEntry(v, p)) { + countFresh = commDsFresh.nodeLambdaMap_.GetProcEntry(v, p); } - if (count_inc != count_fresh) { - all_match = false; + if (countInc != countFresh) { + allMatch = false; std::cout << " LAMBDA MISMATCH at node " << v << " proc " << p << ":" << std::endl; - std::cout << " Incremental: " << count_inc << std::endl; - std::cout << " Fresh: " << count_fresh << std::endl; + std::cout << " Incremental: " << countInc << std::endl; + std::cout << " Fresh: " << countFresh << std::endl; } } } - return all_match; + return allMatch; } -BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) { - graph dag; +BOOST_AUTO_TEST_CASE(TestUpdateDatastructureAfterMove) { + Graph dag; // Create 6 vertices with specific comm weights - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 5, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 - dag.add_vertex(1, 2, 1); // 4 - dag.add_vertex(1, 1, 1); // 5 + dag.AddVertex(1, 10, 1); // 0 + dag.AddVertex(1, 1, 1); // 1 + dag.AddVertex(1, 5, 1); // 2 + dag.AddVertex(1, 1, 1); // 3 + dag.AddVertex(1, 2, 1); // 4 + dag.AddVertex(1, 1, 1); // 5 // Add edges - dag.add_edge(0, 1, 1); - dag.add_edge(2, 3, 1); - dag.add_edge(4, 5, 1); + dag.AddEdge(0, 1, 1); + dag.AddEdge(2, 3, 1); + dag.AddEdge(4, 5, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(3); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(3); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Schedule: // Proc 0: Node 0, 4, 5 // Proc 1: Node 1, 2 // Proc 2: Node 3 - schedule.setAssignedProcessors({0, 1, 1, 2, 0, 0}); + schedule.SetAssignedProcessors({0, 1, 1, 2, 0, 0}); // Steps: 0, 1, 0, 1, 0, 0 - schedule.setAssignedSupersteps({0, 1, 0, 1, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedSupersteps({0, 1, 0, 1, 0, 0}); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 1); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 1); // Move Node 0 from Proc 0 (Step 0) to Proc 2 (Step 0) // kl_move_struct(node, gain, from_proc, from_step, to_proc, to_step) - using kl_move = kl_move_struct; - kl_move move(0, 0.0, 0, 0, 2, 0); + using KlMove = KlMoveStruct; + KlMove move(0, 0.0, 0, 0, 2, 0); // Apply the move to the schedule first - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); - kl_sched.apply_move(move, active_schedule_data); + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); + klSched.ApplyMove(move, activeScheduleData); // Then update the communication datastructures - comm_ds.update_datastructure_after_move(move, 0, 1); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_update_datastructure_after_move")); + commDs.UpdateDatastructureAfterMove(move, 0, 1); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "test_update_datastructure_after_move")); unsigned step = 0; @@ -390,13 +390,13 @@ BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) { // P1 Send: 5 // P2 Recv: 5 - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 1), 5); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(step, 2), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 1), 5); + BOOST_CHECK_EQUAL(commDs.StepProcSend(step, 2), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 1), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(step, 2), 5); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 1), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(step, 2), 5); // Max Comm: // Send Max: 10 (P2) @@ -405,320 +405,320 @@ BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) { // Count: 2 (P2 send, P1 recv) // Second Max: 5 (P1 send, P2 recv) - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 2); - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 5); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(step), 10); + BOOST_CHECK_EQUAL(commDs.StepMaxCommCount(step), 2); + BOOST_CHECK_EQUAL(commDs.StepSecondMaxComm(step), 5); } -BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) { - graph dag; +BOOST_AUTO_TEST_CASE(TestMultipleSequentialMoves) { + Graph dag; // Create a linear chain: 0 -> 1 -> 2 -> 3 - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 8, 1); // 1 - dag.add_vertex(1, 6, 1); // 2 - dag.add_vertex(1, 4, 1); // 3 + dag.AddVertex(1, 10, 1); // 0 + dag.AddVertex(1, 8, 1); // 1 + dag.AddVertex(1, 6, 1); // 2 + dag.AddVertex(1, 4, 1); // 3 - dag.add_edge(0, 1, 1); - dag.add_edge(1, 2, 1); - dag.add_edge(2, 3, 1); + dag.AddEdge(0, 1, 1); + dag.AddEdge(1, 2, 1); + dag.AddEdge(2, 3, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(4); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(4); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Initial: All at step 0, on different processors // 0@P0, 1@P1, 2@P2, 3@P3 - schedule.setAssignedProcessors({0, 1, 2, 3}); - schedule.setAssignedSupersteps({0, 0, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1, 2, 3}); + schedule.SetAssignedSupersteps({0, 0, 0, 0}); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 0); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 0); // Initial state: // P0 sends to P1 (10), P1 sends to P2 (8), P2 sends to P3 (6) - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 8); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 2), 6); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 3), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 8); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 2), 6); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 3), 0); - using kl_move = kl_move_struct; - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); + using KlMove = KlMoveStruct; + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); // Move 1: Move node 1 from P1 to P0 (make 0->1 local) - kl_move move1(1, 0.0, 1, 0, 0, 0); - kl_sched.apply_move(move1, active_schedule_data); - comm_ds.update_datastructure_after_move(move1, 0, 0); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_1")); + KlMove move1(1, 0.0, 1, 0, 0, 0); + klSched.ApplyMove(move1, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move1, 0, 0); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "test_multiple_sequential_moves_1")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 8); // Node 1 sends - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0); // Node was moved away - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); // No receives at P0 + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 8); // Node 1 sends + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 0); // Node was moved away + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 0), 0); // No receives at P0 // Move 2: Move node 2 from P2 to P0 (chain more local) - kl_move move2(2, 0.0, 2, 0, 0, 0); - kl_sched.apply_move(move2, active_schedule_data); - comm_ds.update_datastructure_after_move(move2, 0, 0); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_2")); + KlMove move2(2, 0.0, 2, 0, 0, 0); + klSched.ApplyMove(move2, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move2, 0, 0); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "test_multiple_sequential_moves_2")); // After move2: Nodes 0,1,2 all at P0, only 3 at P3 - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 6); // Only node 2 sends off-proc - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 2), 0); // Node moved away - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 6); // P3 receives from node 2 + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 6); // Only node 2 sends off-proc + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 2), 0); // Node moved away + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 3), 6); // P3 receives from node 2 // Move 3: Move node 3 to P0 (everything local) - kl_move move3(3, 0.0, 3, 0, 0, 0); - kl_sched.apply_move(move3, active_schedule_data); - comm_ds.update_datastructure_after_move(move3, 0, 0); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_3")); + KlMove move3(3, 0.0, 3, 0, 0, 0); + klSched.ApplyMove(move3, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move3, 0, 0); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "test_multiple_sequential_moves_3")); // After move3: All nodes at P0, all communication is local - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // All local - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(0), 0); // No communication cost + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 0); // All local + BOOST_CHECK_EQUAL(commDs.StepMaxComm(0), 0); // No communication cost } -BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) { - graph dag; +BOOST_AUTO_TEST_CASE(TestNodeWithMultipleChildren) { + Graph dag; // Tree structure: Node 0 has three children (1, 2, 3) - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 1, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 + dag.AddVertex(1, 10, 1); // 0 + dag.AddVertex(1, 1, 1); // 1 + dag.AddVertex(1, 1, 1); // 2 + dag.AddVertex(1, 1, 1); // 3 - dag.add_edge(0, 1, 1); - dag.add_edge(0, 2, 1); - dag.add_edge(0, 3, 1); + dag.AddEdge(0, 1, 1); + dag.AddEdge(0, 2, 1); + dag.AddEdge(0, 3, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(4); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(4); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1, 2, 3}); - schedule.setAssignedSupersteps({0, 0, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1, 2, 3}); + schedule.SetAssignedSupersteps({0, 0, 0, 0}); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 0); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 0); // Initial: Node 0 has 3 children on P1, P2, P3 (3 unique off-proc) // Send cost = 10 * 3 = 30 - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 30); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 30); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 1), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 2), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 3), 10); - using kl_move = kl_move_struct; - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); + using KlMove = KlMoveStruct; + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); // Move child 1 to P0 (same as parent) - kl_move move1(1, 0.0, 1, 0, 0, 0); - kl_sched.apply_move(move1, active_schedule_data); - comm_ds.update_datastructure_after_move(move1, 0, 0); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_node_with_multiple_children")); + KlMove move1(1, 0.0, 1, 0, 0, 0); + klSched.ApplyMove(move1, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move1, 0, 0); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "test_node_with_multiple_children")); // After: Node 0 has 1 local child, 2 off-proc (P2, P3) // Send cost = 10 * 2 = 20 - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 20); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0); // No longer receives - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 20); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 1), 0); // No longer receives + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 2), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 3), 10); - kl_move move2(2, 0.0, 2, 0, 0, 0); - kl_sched.apply_move(move2, active_schedule_data); - comm_ds.update_datastructure_after_move(move2, 0, 0); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_node_with_multiple_children_2")); + KlMove move2(2, 0.0, 2, 0, 0, 0); + klSched.ApplyMove(move2, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move2, 0, 0); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "test_node_with_multiple_children_2")); // After: Node 0 has 2 local children, 1 off-proc (P3) // Send cost = 10 * 1 = 10 - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 0); // No longer receives - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 2), 0); // No longer receives + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 3), 10); // Move child 3 to P0 (all local) - kl_move move3(3, 0.0, 3, 0, 0, 0); - kl_sched.apply_move(move3, active_schedule_data); - comm_ds.update_datastructure_after_move(move3, 0, 0); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_node_with_multiple_children_3")); + KlMove move3(3, 0.0, 3, 0, 0, 0); + klSched.ApplyMove(move3, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move3, 0, 0); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "test_node_with_multiple_children_3")); // After: Node 0 has 3 local children // Send cost = 10 * 0 = 0 (all local) - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 0); // No longer receives + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 3), 0); // No longer receives } -BOOST_AUTO_TEST_CASE(test_cross_step_moves) { - graph dag; +BOOST_AUTO_TEST_CASE(TestCrossStepMoves) { + Graph dag; // 0 -> 1 -> 2 - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 8, 1); // 1 - dag.add_vertex(1, 6, 1); // 2 + dag.AddVertex(1, 10, 1); // 0 + dag.AddVertex(1, 8, 1); // 1 + dag.AddVertex(1, 6, 1); // 2 - dag.add_edge(0, 1, 1); - dag.add_edge(1, 2, 1); + dag.AddEdge(0, 1, 1); + dag.AddEdge(1, 2, 1); - BspArchitecture arch; - arch.setNumberOfProcessors(2); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1, 0}); - schedule.setAssignedSupersteps({0, 1, 2}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors({0, 1, 0}); + schedule.SetAssignedSupersteps({0, 1, 2}); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 2); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 2); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 8); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 0), 8); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 8); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 1), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 0), 8); - using kl_move = kl_move_struct; - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); + using KlMove = KlMoveStruct; + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); // Move node 1 from (P1, step1) to (P0, step1) // This makes 0->1 edge stay cross-step but changes processor - kl_move move1(1, 0.0, 1, 1, 0, 1); - kl_sched.apply_move(move1, active_schedule_data); - comm_ds.update_datastructure_after_move(move1, 0, 2); + KlMove move1(1, 0.0, 1, 1, 0, 1); + klSched.ApplyMove(move1, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move1, 0, 2); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // Local (same processor) - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); // No receive needed + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 0); // Local (same processor) + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 0), 0); // No receive needed - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); // Local (same processor) - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0); // Node moved away + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 0); // Local (same processor) + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 0); // Node moved away - kl_move move2(1, 0.0, 0, 1, 0, 0); - kl_sched.apply_move(move2, active_schedule_data); - comm_ds.update_datastructure_after_move(move2, 0, 2); + KlMove move2(1, 0.0, 0, 1, 0, 0); + klSched.ApplyMove(move2, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move2, 0, 2); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // All local at P0 + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 0); // All local at P0 } -BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) { - graph dag; +BOOST_AUTO_TEST_CASE(TestComplexScenarioUserProvided) { + Graph dag; // Vertices from user request // v1(0): w=2, c=9, m=2 - const auto v1 = dag.add_vertex(2, 9, 2); - const auto v2 = dag.add_vertex(3, 8, 4); - const auto v3 = dag.add_vertex(4, 7, 3); - const auto v4 = dag.add_vertex(5, 6, 2); - const auto v5 = dag.add_vertex(6, 5, 6); - const auto v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); // v7 (index 6) - const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) + const auto v1 = dag.AddVertex(2, 9, 2); + const auto v2 = dag.AddVertex(3, 8, 4); + const auto v3 = dag.AddVertex(4, 7, 3); + const auto v4 = dag.AddVertex(5, 6, 2); + const auto v5 = dag.AddVertex(6, 5, 6); + const auto v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); // v7 (index 6) + const auto v8 = dag.AddVertex(9, 2, 1); // v8 (index 7) // Edges - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); - - BspArchitecture arch; - arch.setNumberOfProcessors(2); // P0, P1 - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); - - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); + + BspArchitecture arch; + arch.SetNumberOfProcessors(2); // P0, P1 + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); + + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Schedule: {1, 1, 0, 0, 1, 0, 0, 1} // v1@P1, v2@P1, v3@P0, v4@P0, v5@P1, v6@P0, v7@P0, v8@P1 - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); // Supersteps: {0, 0, 1, 1, 2, 2, 3, 3} - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 3); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 3); // === Initial State Verification === // ... (Same as before) ... - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 9); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 9); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(0), 9); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 13); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 1), 13); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(1), 13); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(2), 0); - - using kl_move = kl_move_struct; - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 9); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 0), 9); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(0), 9); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 13); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 1), 13); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(1), 13); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(2), 0); + + using KlMove = KlMoveStruct; + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); // === Move 1: Move v3 from P0 to P1 (at Step 1) === - kl_move move1(v3, 0.0, 0, 1, 1, 1); - kl_sched.apply_move(move1, active_schedule_data); - comm_ds.update_datastructure_after_move(move1, 0, 3); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "complex_move1")); + KlMove move1(v3, 0.0, 0, 1, 1, 1); + klSched.ApplyMove(move1, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move1, 0, 3); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "complex_move1")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 9); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 6); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 7); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(1), 7); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 9); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 6); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 7); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(1), 7); // === Move 2: Move v4 from P0 to P1 (at Step 1) === - kl_move move2(v4, 0.0, 0, 1, 1, 1); - kl_sched.apply_move(move2, active_schedule_data); - comm_ds.update_datastructure_after_move(move2, 0, 3); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "complex_move2")); + KlMove move2(v4, 0.0, 0, 1, 1, 1); + klSched.ApplyMove(move2, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move2, 0, 3); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "complex_move2")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 7); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(1), 7); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 7); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(1), 7); // === Move 3: Move v5 from P1 to P0 (at Step 2) === - kl_move move3(v5, 0.0, 1, 2, 0, 2); - kl_sched.apply_move(move3, active_schedule_data); - comm_ds.update_datastructure_after_move(move3, 0, 3); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "complex_move3")); + KlMove move3(v5, 0.0, 1, 2, 0, 2); + klSched.ApplyMove(move3, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move3, 0, 3); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "complex_move3")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 8); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(0), 8); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 7); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(2, 0), 5); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(2), 5); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 8); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(0), 8); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 7); + BOOST_CHECK_EQUAL(commDs.StepProcSend(2, 0), 5); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(2), 5); // === Move 4: Move v6 from P0 to P1 (at Step 2) === // v6 is child of v3 (P1, S1). @@ -727,24 +727,24 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) { // v3 also sends to v5(P0). // So v3 targets: {P0}. Count = 1. // Send Cost v3 = 7. Unchanged. - kl_move move4(v6, 0.0, 0, 2, 1, 2); - kl_sched.apply_move(move4, active_schedule_data); - comm_ds.update_datastructure_after_move(move4, 0, 3); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "complex_move4")); + KlMove move4(v6, 0.0, 0, 2, 1, 2); + klSched.ApplyMove(move4, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move4, 0, 3); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "complex_move4")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 7); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 7); // === Move 5: Move v5 from P0 to P1 (at Step 2) === // v5 moves back to P1. // v3(P1) -> v5(P1), v6(P1). All local. // Send Cost v3 = 0. - kl_move move5(v5, 0.0, 0, 2, 1, 2); - kl_sched.apply_move(move5, active_schedule_data); - comm_ds.update_datastructure_after_move(move5, 0, 3); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "complex_move5")); + KlMove move5(v5, 0.0, 0, 2, 1, 2); + klSched.ApplyMove(move5, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move5, 0, 3); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "complex_move5")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepMaxComm(1), 0); } /** @@ -752,17 +752,17 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) { * Uses a 5x5 Grid Graph (25 nodes) with 6 Supersteps and 4 Processors. * Performs various moves to verify incremental updates in a dense graph. */ -BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) { +BOOST_AUTO_TEST_CASE(TestGridGraphComplexMoves) { // Construct 5x5 Grid Graph (25 nodes, indices 0-24) - graph dag = osp::construct_grid_dag(5, 5); + Graph dag = osp::ConstructGridDag(5, 5); - BspArchitecture arch; - arch.setNumberOfProcessors(4); // P0..P3 - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(4); // P0..P3 + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Assign Processors and Supersteps std::vector procs(25); @@ -788,55 +788,55 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) { procs[7] = 3; steps[7] = 1; - schedule.setAssignedProcessors(procs); - schedule.setAssignedSupersteps(steps); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors(procs); + schedule.SetAssignedSupersteps(steps); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 5); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 5); // Initial check - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 3), 2); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 3), 2); - using kl_move = kl_move_struct; - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); + using KlMove = KlMoveStruct; + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); // === Move 1: Node 12 (P1->P0) === - kl_move move1(12, 0.0, 1, 2, 0, 2); - kl_sched.apply_move(move1, active_schedule_data); - comm_ds.update_datastructure_after_move(move1, 0, 5); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "grid_move1")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 3), 1); + KlMove move1(12, 0.0, 1, 2, 0, 2); + klSched.ApplyMove(move1, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move1, 0, 5); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "grid_move1")); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 3), 1); // === Move 2: Node 8 (P0->P3) === - kl_move move2(8, 0.0, 0, 1, 3, 1); - kl_sched.apply_move(move2, active_schedule_data); - comm_ds.update_datastructure_after_move(move2, 0, 5); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "grid_move2")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 3), 3); + KlMove move2(8, 0.0, 0, 1, 3, 1); + klSched.ApplyMove(move2, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move2, 0, 5); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "grid_move2")); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 3), 3); // === Move 3: Node 12 (P0->P3) === - kl_move move3(12, 0.0, 0, 2, 3, 2); - kl_sched.apply_move(move3, active_schedule_data); - comm_ds.update_datastructure_after_move(move3, 0, 5); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "grid_move3")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 3), 2); + KlMove move3(12, 0.0, 0, 2, 3, 2); + klSched.ApplyMove(move3, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move3, 0, 5); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "grid_move3")); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 3), 2); // === Move 4: Node 7 (P3->P0) === - kl_move move4(7, 0.0, 3, 1, 0, 1); - kl_sched.apply_move(move4, active_schedule_data); - comm_ds.update_datastructure_after_move(move4, 0, 5); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "grid_move4")); + KlMove move4(7, 0.0, 3, 1, 0, 1); + klSched.ApplyMove(move4, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move4, 0, 5); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "grid_move4")); // Check P0 send contribution from Node 7. // Node 7 contributes 10. // We can check if P0 send >= 10. - BOOST_CHECK_GE(comm_ds.step_proc_send(1, 0), 1); + BOOST_CHECK_GE(commDs.StepProcSend(1, 0), 1); } /** @@ -844,18 +844,18 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) { * Uses a Butterfly Graph (FFT pattern) to test structured communication patterns. * Stages = 2 (12 nodes). 3 Supersteps. 2 Processors. */ -BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { +BOOST_AUTO_TEST_CASE(TestButterflyGraphMoves) { // Stages=2 -> 3 levels of 4 nodes each = 12 nodes. // Level 0: 0-3. Level 1: 4-7. Level 2: 8-11. - graph dag = osp::construct_butterfly_dag(2); + Graph dag = osp::ConstructButterflyDag(2); - BspArchitecture arch; - arch.setNumberOfProcessors(2); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Assign: // Level 0: P0, Step 0 @@ -876,16 +876,16 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { } } - schedule.setAssignedProcessors(procs); - schedule.setAssignedSupersteps(steps); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors(procs); + schedule.SetAssignedSupersteps(steps); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 2); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 2); // Initial State: // Step 0 (P0): Nodes 0-3 send to Level 1 (P1). @@ -893,15 +893,15 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { // 0 -> 4, 6. (Both P1). Count=1. Cost=10. // 1 -> 5, 7. (Both P1). Count=1. Cost=10. // ... All 4 nodes send to P1. Total P0 Send = 40. - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 4); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 4); // Step 1 (P1): Nodes 4-7 send to Level 2 (P0). // All 4 nodes send to P0. Total P1 Send = 40. - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 4); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 4); - using kl_move = kl_move_struct; - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); + using KlMove = KlMoveStruct; + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); // === Move 1: Move Node 4 (Level 1) P1 -> P0 === // Node 4 moves to P0. @@ -909,20 +909,20 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { // Node 0 -> 4(P0), 6(P1). Targets {P0, P1}. P0 is local. Targets {P1}. Count=1. // Node 1 -> 5(P1), 7(P1). Targets {P1}. Count=1. // Step 0 Send Cost unchanged (still 40). - kl_move move1(4, 0.0, 1, 1, 0, 1); - kl_sched.apply_move(move1, active_schedule_data); - comm_ds.update_datastructure_after_move(move1, 0, 2); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "butterfly_move1")); + KlMove move1(4, 0.0, 1, 1, 0, 1); + klSched.ApplyMove(move1, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move1, 0, 2); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "butterfly_move1")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 4); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 4); // Impact on Step 1 (Node 4): // Node 4 (P0) -> 8(P0), 10(P0). All local. // Node 4 stops sending. (Was 10). // P1 Send decreases by 10 -> 30. // P0 Send increases by 0 (all local). - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 3); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 3); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 0); // === Move 2: Move Node 6 (Level 1) P1 -> P0 === // Node 6 moves to P0. @@ -930,17 +930,17 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { // Node 0 -> 4(P0), 6(P0). All local. // Node 0 stops sending. (Was 10). // P0 Send decreases by 10 -> 30. - kl_move move2(6, 0.0, 1, 1, 0, 1); - kl_sched.apply_move(move2, active_schedule_data); - comm_ds.update_datastructure_after_move(move2, 0, 2); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "butterfly_move2")); + KlMove move2(6, 0.0, 1, 1, 0, 1); + klSched.ApplyMove(move2, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move2, 0, 2); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "butterfly_move2")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 2); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 2); // Impact on Step 1 (Node 6): // Node 6 (P0) -> 8(P0), 10(P0). All local. // P1 Send decreases by 10 -> 20. - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 2); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 2); // === Move 3: Move Node 0 (Level 0) P0 -> P1 === // Node 0 moves to P1. @@ -949,10 +949,10 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { // Node 1 (P0) -> 5(P1), 7(P1). Targets {P1}. Count=1. Cost=10. // P0 Send: 10 (from Node 1). // P1 Send: 10 (from Node 0). - kl_move move3(0, 0.0, 0, 0, 1, 0); - kl_sched.apply_move(move3, active_schedule_data); - comm_ds.update_datastructure_after_move(move3, 0, 2); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "butterfly_move3")); + KlMove move3(0, 0.0, 0, 0, 1, 0); + klSched.ApplyMove(move3, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move3, 0, 2); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "butterfly_move3")); // === Move 4: Move Node 8 (Level 2) P0 -> P1 === // Node 8 moves to P1. @@ -960,10 +960,10 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { // Node 4 (P0) -> 8(P1), 10(P0). Targets {P1}. Count=1. Cost=10. // Node 6 (P0) -> 8(P1), 10(P0). Targets {P1}. Count=1. Cost=10. // P0 Send increases. - kl_move move4(8, 0.0, 0, 2, 1, 2); - kl_sched.apply_move(move4, active_schedule_data); - comm_ds.update_datastructure_after_move(move4, 0, 2); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "butterfly_move4")); + KlMove move4(8, 0.0, 0, 2, 1, 2); + klSched.ApplyMove(move4, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move4, 0, 2); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "butterfly_move4")); } /** @@ -971,18 +971,18 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { * Uses a Ladder Graph (Rungs=5 -> 12 nodes). * Tests moving rungs between processors. */ -BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { +BOOST_AUTO_TEST_CASE(TestLadderGraphMoves) { // Ladder with 5 rungs -> 6 pairs of nodes = 12 nodes. // Pairs: (0,1), (2,3), ... (10,11). - graph dag = osp::construct_ladder_dag(5); + Graph dag = osp::ConstructLadderDag(5); - BspArchitecture arch; - arch.setNumberOfProcessors(2); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); + BspArchitecture arch; + arch.SetNumberOfProcessors(2); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(1); - BspInstance instance(dag, arch); - BspSchedule schedule(instance); + BspInstance instance(dag, arch); + BspSchedule schedule(instance); // Assign: // Even nodes (Left rail): P0 @@ -997,16 +997,16 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { steps[2 * i + 1] = i; } - schedule.setAssignedProcessors(procs); - schedule.setAssignedSupersteps(steps); - schedule.updateNumberOfSupersteps(); + schedule.SetAssignedProcessors(procs); + schedule.SetAssignedSupersteps(steps); + schedule.UpdateNumberOfSupersteps(); - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 5); + MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 5); // Initial State: // Rung i (u1, v1) connects to Rung i+1 (u2, v2). @@ -1015,13 +1015,13 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { // This applies for Steps 0 to 4. for (unsigned s = 0; s < 5; ++s) { - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(s, 0), 1); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(s, 1), 1); + BOOST_CHECK_EQUAL(commDs.StepProcSend(s, 0), 1); + BOOST_CHECK_EQUAL(commDs.StepProcSend(s, 1), 1); } - using kl_move = kl_move_struct; - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); + using KlMove = KlMoveStruct; + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); // === Move 1: Move Node 1 (Rung 0, Right) P1 -> P0 === // Node 1 moves to P0. @@ -1031,13 +1031,13 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { // v1(1) -> u2(2, P0), v2(3, P1). Targets {P1}. Cost=10. // P0 Send = 10 + 10 = 20. // P1 Send = 0. - kl_move move1(1, 0.0, 1, 0, 0, 0); - kl_sched.apply_move(move1, active_schedule_data); - comm_ds.update_datastructure_after_move(move1, 0, 5); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "ladder_move1")); + KlMove move1(1, 0.0, 1, 0, 0, 0); + klSched.ApplyMove(move1, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move1, 0, 5); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "ladder_move1")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 2); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 2); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 0); // === Move 2: Move Node 3 (Rung 1, Right) P1 -> P0 === // Node 3 moves to P0. @@ -1046,18 +1046,18 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { // u1(0) -> u2(2, P0), v2(3, P0). All local. Cost=0. // v1(1) -> u2(2, P0), v2(3, P0). All local. Cost=0. // P0 Send at Step 0 = 0. - kl_move move2(3, 0.0, 1, 1, 0, 1); - kl_sched.apply_move(move2, active_schedule_data); - comm_ds.update_datastructure_after_move(move2, 0, 5); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "ladder_move2")); + KlMove move2(3, 0.0, 1, 1, 0, 1); + klSched.ApplyMove(move2, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move2, 0, 5); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "ladder_move2")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 0); // Impact on Step 1 (Nodes 2, 3): // u2(2, P0) -> u3(4, P0), v3(5, P1). Targets {P1}. Cost=10. // v2(3, P0) -> u3(4, P0), v3(5, P1). Targets {P1}. Cost=10. // P0 Send at Step 1 = 20. - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 2); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 2); // === Move 3: Move Node 0 (Rung 0, Left) P0 -> P1 === // Node 0 moves to P1. @@ -1067,10 +1067,10 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { // v1(1, P0) -> u2(2, P0), v2(3, P0). All local. Cost=0. // P0 Send: 0. // P1 Send: 10. - kl_move move3(0, 0.0, 0, 0, 1, 0); - kl_sched.apply_move(move3, active_schedule_data); - comm_ds.update_datastructure_after_move(move3, 0, 5); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "ladder_move3")); + KlMove move3(0, 0.0, 0, 0, 1, 0); + klSched.ApplyMove(move3, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move3, 0, 5); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "ladder_move3")); // === Move 4: Move Node 2 (Rung 1, Left) P0 -> P1 === // Node 2 moves to P1. @@ -1080,69 +1080,69 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { // v1(1, P0) -> u2(2, P1), v2(3, P0). Targets {P1}. Cost=10. // P0 Send: 10. // P1 Send: 10. - kl_move move4(2, 0.0, 0, 1, 1, 1); - kl_sched.apply_move(move4, active_schedule_data); - comm_ds.update_datastructure_after_move(move4, 0, 5); - BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "ladder_move4")); + KlMove move4(2, 0.0, 0, 1, 1, 1); + klSched.ApplyMove(move4, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move4, 0, 5); + BOOST_CHECK(ValidateCommDatastructures(commDs, klSched, instance, "ladder_move4")); } -BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) { +BOOST_AUTO_TEST_CASE(TestLazyAndBufferedModes) { std::cout << "Setup Graph" << std::endl; - graph instance; - instance.add_vertex(1, 10, 1); - instance.add_vertex(1, 10, 1); - instance.add_vertex(1, 10, 1); + Graph instance; + instance.AddVertex(1, 10, 1); + instance.AddVertex(1, 10, 1); + instance.AddVertex(1, 10, 1); - instance.add_edge(0, 1, 1); - instance.add_edge(0, 2, 1); + instance.AddEdge(0, 1, 1); + instance.AddEdge(0, 2, 1); std::cout << "Setup Arch" << std::endl; - osp::BspArchitecture arch; - arch.setNumberOfProcessors(2); - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(0); + osp::BspArchitecture arch; + arch.SetNumberOfProcessors(2); + arch.SetCommunicationCosts(1); + arch.SetSynchronisationCosts(0); std::cout << "Setup BspInstance" << std::endl; - osp::BspInstance bsp_instance(instance, arch); + osp::BspInstance bspInstance(instance, arch); std::cout << "Setup Schedule" << std::endl; - osp::BspSchedule schedule(bsp_instance); - schedule.setAssignedProcessor(0, 0); - schedule.setAssignedProcessor(1, 1); - schedule.setAssignedProcessor(2, 1); + osp::BspSchedule schedule(bspInstance); + schedule.SetAssignedProcessor(0, 0); + schedule.SetAssignedProcessor(1, 1); + schedule.SetAssignedProcessor(2, 1); - schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedSuperstep(1, 2); - schedule.setAssignedSuperstep(2, 4); + schedule.SetAssignedSuperstep(0, 0); + schedule.SetAssignedSuperstep(1, 2); + schedule.SetAssignedSuperstep(2, 4); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); std::cout << "Setup KL Sched" << std::endl; - kl_active_schedule_t kl_sched; - kl_sched.initialize(schedule); + KlActiveScheduleT klSched; + klSched.Initialize(schedule); - thread_local_active_schedule_data active_schedule_data; - active_schedule_data.initialize_cost(0.0); + ThreadLocalActiveScheduleData activeScheduleData; + activeScheduleData.InitializeCost(0.0); std::cout << "Setup Complete" << std::endl; - std::cout << "Num Vertices: " << instance.num_vertices() << std::endl; - std::cout << "Num Procs: " << arch.numberOfProcessors() << std::endl; + std::cout << "Num Vertices: " << instance.NumVertices() << std::endl; + std::cout << "Num Procs: " << arch.NumberOfProcessors() << std::endl; std::cout << "Start Eager Test" << std::endl; { using CommPolicy = osp::EagerCommCostPolicy; - osp::max_comm_datastructure comm_ds; + MaxCommDatastructure commDs; std::cout << "Initialize Eager Comm DS" << std::endl; - comm_ds.initialize(kl_sched); + commDs.Initialize(klSched); std::cout << "Checking node_lambda_map" << std::endl; - std::cout << "node_lambda_vec size: " << comm_ds.node_lambda_map.node_lambda_vec.size() << std::endl; - if (comm_ds.node_lambda_map.node_lambda_vec.size() > 0) { - std::cout << "node_lambda_vec[0] size: " << comm_ds.node_lambda_map.node_lambda_vec[0].size() << std::endl; + std::cout << "node_lambda_vec size: " << commDs.nodeLambdaMap_.nodeLambdaVec_.size() << std::endl; + if (commDs.nodeLambdaMap_.nodeLambdaVec_.size() > 0) { + std::cout << "node_lambda_vec[0] size: " << commDs.nodeLambdaMap_.nodeLambdaVec_[0].size() << std::endl; } std::cout << "Compute Eager Comm DS" << std::endl; - comm_ds.compute_comm_datastructures(0, 4); + commDs.ComputeCommDatastructures(0, 4); std::cout << "Eager Done" << std::endl; } @@ -1150,11 +1150,11 @@ BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) { // --- Test Lazy Policy --- { using CommPolicy = osp::LazyCommCostPolicy; - osp::max_comm_datastructure comm_ds; + MaxCommDatastructure commDs; std::cout << "Initialize Comm DS" << std::endl; - comm_ds.initialize(kl_sched); + commDs.Initialize(klSched); std::cout << "Compute Comm DS" << std::endl; - comm_ds.compute_comm_datastructures(0, 4); + commDs.ComputeCommDatastructures(0, 4); // Expected Behavior for Lazy: // Node 0 (P0) sends to P1. @@ -1163,113 +1163,113 @@ BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) { // Cost = 10 * 1.0 = 10. // Lazy: Send and Recv at min(2, 4) - 1 = Step 1. - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(2, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(3, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(4, 0), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(2, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(3, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(4, 1), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(2, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 0), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 1), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(2, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0); - - using kl_move = osp::kl_move_struct; - kl_move move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1 - kl_sched.apply_move(move, active_schedule_data); - comm_ds.update_datastructure_after_move(move, 0, 4); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(2, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(3, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(4, 0), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(2, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(3, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(4, 1), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(2, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(3, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(4, 0), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 1), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(2, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(3, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(4, 1), 0); + + using KlMove = osp::KlMoveStruct; + KlMove move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1 + klSched.ApplyMove(move, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move, 0, 4); // After move: Children at {3, 4}. Min = 3. Send/Recv at Step 2. - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(2, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(3, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(4, 0), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(2, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(3, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(4, 1), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(2, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 0), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(2, 1), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(2, 0), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(3, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(4, 0), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(2, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(3, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(4, 1), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(2, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(3, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(4, 0), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(2, 1), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(3, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(4, 1), 0); // Reset Node 1 to Step 2 for next test - kl_move move_back(1, 0.0, 1, 3, 1, 2); - kl_sched.apply_move(move_back, active_schedule_data); + KlMove moveBack(1, 0.0, 1, 3, 1, 2); + klSched.ApplyMove(moveBack, activeScheduleData); } // --- Test Buffered Policy --- { using CommPolicy = osp::BufferedCommCostPolicy; - osp::max_comm_datastructure comm_ds; - comm_ds.initialize(kl_sched); - comm_ds.compute_comm_datastructures(0, 4); + osp::MaxCommDatastructure commDs; + commDs.Initialize(klSched); + commDs.ComputeCommDatastructures(0, 4); // Buffered: Send at Step 0. Recv at min(2, 4) - 1 = Step 1. - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(2, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(3, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(4, 0), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(2, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(3, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(4, 1), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(2, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 0), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 1), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(2, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0); - - using kl_move = osp::kl_move_struct; - kl_move move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1 - kl_sched.apply_move(move, active_schedule_data); - comm_ds.update_datastructure_after_move(move, 0, 4); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(2, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(3, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(4, 0), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(2, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(3, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(4, 1), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(2, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(3, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(4, 0), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 1), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(2, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(3, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(4, 1), 0); + + using KlMove = osp::KlMoveStruct; + KlMove move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1 + klSched.ApplyMove(move, activeScheduleData); + commDs.UpdateDatastructureAfterMove(move, 0, 4); // After move: Children at {3, 4}. Min = 3. Recv at Step 2. Send still at Step 0. - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(2, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(3, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(4, 0), 0); - - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(1, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(2, 1), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 1), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(0, 0), 10); + BOOST_CHECK_EQUAL(commDs.StepProcSend(1, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(2, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(3, 0), 0); + BOOST_CHECK_EQUAL(commDs.StepProcSend(4, 0), 0); + + BOOST_CHECK_EQUAL(commDs.StepProcReceive(0, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(1, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(2, 1), 10); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(3, 1), 0); + BOOST_CHECK_EQUAL(commDs.StepProcReceive(4, 1), 0); } } diff --git a/tests/kl_bsp_improver_test.cpp b/tests/kl_bsp_improver_test.cpp index b0a0a3db..434b40cc 100644 --- a/tests/kl_bsp_improver_test.cpp +++ b/tests/kl_bsp_improver_test.cpp @@ -8,127 +8,127 @@ #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/comm_cost_modules/kl_bsp_comm_cost.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_test.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "test_graphs.hpp" using namespace osp; -template -void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; - int comm_weight = 7; +template +void AddMemWeights(GraphT &dag) { + int memWeight = 1; + int commWeight = 7; - for (const auto &v : dag.vertices()) { - dag.set_vertex_work_weight(v, static_cast>(mem_weight++ % 10 + 2)); - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 10 + 2)); - dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 10 + 2)); + for (const auto &v : dag.Vertices()) { + dag.SetVertexWorkWeight(v, static_cast>(memWeight++ % 10 + 2)); + dag.SetVertexMemWeight(v, static_cast>(memWeight++ % 10 + 2)); + dag.SetVertexCommWeight(v, static_cast>(commWeight++ % 10 + 2)); } } -BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlImproverInnerLoopTest) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using comm_cost_t = kl_bsp_comm_cost_function; - using kl_improver_test = kl_improver_test; + using CommCostT = KlBspCommCostFunction; + using KlImproverTest = KlImproverTest; - kl_improver_test kl; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - auto &kl_active_schedule = kl.get_active_schedule(); + auto &klActiveSchedule = kl.GetActiveSchedule(); // Verify work datastructures are set up correctly - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 5.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); - - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4); - BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 5.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(1), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(1), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(2), 7.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(2), 6.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(3), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(3), 8.0); + + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 4); + BOOST_CHECK_EQUAL(klActiveSchedule.IsFeasible(), true); // Check initial cost consistency - double initial_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double initial_tracked = kl.get_current_cost(); - BOOST_CHECK_CLOSE(initial_recomputed, initial_tracked, 0.00001); + double initialRecomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double initialTracked = kl.GetCurrentCost(); + BOOST_CHECK_CLOSE(initialRecomputed, initialTracked, 0.00001); // Insert nodes into gain heap - auto node_selection = kl.insert_gain_heap_test_penalty({2, 3}); + auto nodeSelection = kl.InsertGainHeapTestPenalty({2, 3}); // Run first iteration and check cost consistency - auto recompute_max_gain = kl.run_inner_iteration_test(); + auto recomputeMaxGain = kl.RunInnerIterationTest(); - double iter1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double iter1_tracked = kl.get_current_cost(); - BOOST_CHECK_CLOSE(iter1_recomputed, iter1_tracked, 0.00001); + double iter1Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double iter1Tracked = kl.GetCurrentCost(); + BOOST_CHECK_CLOSE(iter1Recomputed, iter1Tracked, 0.00001); // Run second iteration - auto &node3_affinity = kl.get_affinity_table()[3]; + auto &node3Affinity = kl.GetAffinityTable()[3]; - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); - double iter2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double iter2_tracked = kl.get_current_cost(); + double iter2Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double iter2Tracked = kl.GetCurrentCost(); - BOOST_CHECK_CLOSE(iter2_recomputed, iter2_tracked, 0.00001); + BOOST_CHECK_CLOSE(iter2Recomputed, iter2Tracked, 0.00001); // Run third iteration - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); - double iter3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double iter3_tracked = kl.get_current_cost(); - BOOST_CHECK_CLOSE(iter3_recomputed, iter3_tracked, 0.00001); + double iter3Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double iter3Tracked = kl.GetCurrentCost(); + BOOST_CHECK_CLOSE(iter3Recomputed, iter3Tracked, 0.00001); // Run fourth iteration - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); - double iter4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); - double iter4_tracked = kl.get_current_cost(); - BOOST_CHECK_CLOSE(iter4_recomputed, iter4_tracked, 0.00001); + double iter4Recomputed = kl.GetCommCostF().ComputeScheduleCostTest(); + double iter4Tracked = kl.GetCurrentCost(); + BOOST_CHECK_CLOSE(iter4Recomputed, iter4Tracked, 0.00001); } // BOOST_AUTO_TEST_CASE(kl_lambda_total_comm_large_test_graphs) { // std::vector filenames_graph = large_spaa_graphs(); -// using graph = computational_dag_edge_idx_vector_impl_def_int_t; +// using graph = ComputationalDagEdgeIdxVectorImplDefIntT; // // Getting root git directory // std::filesystem::path cwd = std::filesystem::current_path(); // std::cout << cwd << std::endl; @@ -138,18 +138,18 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { // } // for (auto &filename_graph : filenames_graph) { -// GreedyBspScheduler test_scheduler; +// GreedyBspScheduler test_scheduler; // BspInstance instance; -// bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), -// instance.getComputationalDag()); +// bool status_graph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), +// instance.GetComputationalDag()); -// instance.getArchitecture().setSynchronisationCosts(500); -// instance.getArchitecture().setCommunicationCosts(5); -// instance.getArchitecture().setNumberOfProcessors(4); +// instance.GetArchitecture().SetSynchronisationCosts(500); +// instance.GetArchitecture().SetCommunicationCosts(5); +// instance.GetArchitecture().SetNumberOfProcessors(4); // std::vector> send_cost = {{0, 1, 4, 4}, {1, 0, 4, 4}, {4, 4, 0, 1}, {4, 4, 1, 0}}; -// instance.getArchitecture().SetSendCosts(send_cost); +// instance.GetArchitecture().SetSendCosts(send_cost); // if (!status_graph) { @@ -157,91 +157,91 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { // BOOST_CHECK(false); // } -// add_mem_weights(instance.getComputationalDag()); +// add_mem_weights(instance.GetComputationalDag()); // BspSchedule schedule(instance); -// const auto result = test_scheduler.computeSchedule(schedule); +// const auto result = test_scheduler.ComputeSchedule(schedule); -// schedule.updateNumberOfSupersteps(); +// schedule.UpdateNumberOfSupersteps(); -// std::cout << "initial scedule with costs: " << schedule.computeCosts() << " and " -// << schedule.numberOfSupersteps() << " number of supersteps" << std::endl; +// std::cout << "initial scedule with costs: " << schedule.ComputeCosts() << " and " +// << schedule.NumberOfSupersteps() << " number of supersteps" << std::endl; // BspSchedule schedule_2(schedule); -// BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); -// BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); -// BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); +// BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); +// BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); +// BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); // kl_total_lambda_comm_improver kl_total_lambda; // auto start_time = std::chrono::high_resolution_clock::now(); -// auto status = kl_total_lambda.improveSchedule(schedule); +// auto status = kl_total_lambda.ImproveSchedule(schedule); // auto finish_time = std::chrono::high_resolution_clock::now(); // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "kl lambda new finished in " << duration << " seconds, costs: " << schedule.computeCosts() +// std::cout << "kl lambda new finished in " << duration << " seconds, costs: " << schedule.ComputeCosts() // << " and lambda costs: " << schedule.computeTotalLambdaCosts() << " with " -// << schedule.numberOfSupersteps() << " number of supersteps" << std::endl; +// << schedule.NumberOfSupersteps() << " number of supersteps" << std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); // kl_bsp_comm_improver_mt kl(42); // kl.setTimeQualityParameter(2.0); // start_time = std::chrono::high_resolution_clock::now(); -// status = kl.improveSchedule(schedule); +// status = kl.ImproveSchedule(schedule); // finish_time = std::chrono::high_resolution_clock::now(); // duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeCosts() << " with " -// << schedule.numberOfSupersteps() << " number of supersteps" << std::endl; +// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.ComputeCosts() << " with " +// << schedule.NumberOfSupersteps() << " number of supersteps" << std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); // BspScheduleCS schedule_cs(schedule); // HillClimbingForCommSteps hc_comm_steps; // start_time = std::chrono::high_resolution_clock::now(); -// status = hc_comm_steps.improveSchedule(schedule_cs); +// status = hc_comm_steps.ImproveSchedule(schedule_cs); // finish_time = std::chrono::high_resolution_clock::now(); // duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "hc_comm_steps finished in " << duration << " seconds, costs: " << schedule_cs.computeCosts() -// << " with " << schedule_cs.numberOfSupersteps() << " number of supersteps" << std::endl; +// std::cout << "hc_comm_steps finished in " << duration << " seconds, costs: " << schedule_cs.ComputeCosts() +// << " with " << schedule_cs.NumberOfSupersteps() << " number of supersteps" << std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); -// kl_total_lambda.improveSchedule(schedule_2); +// kl_total_lambda.ImproveSchedule(schedule_2); // HillClimbingScheduler hc; // start_time = std::chrono::high_resolution_clock::now(); -// status = hc.improveSchedule(schedule_2); +// status = hc.ImproveSchedule(schedule_2); // finish_time = std::chrono::high_resolution_clock::now(); // duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "hc finished in " << duration << " seconds, costs: " << schedule_2.computeCosts() << " with " -// << schedule_2.numberOfSupersteps() << " number of supersteps" << std::endl; +// std::cout << "hc finished in " << duration << " seconds, costs: " << schedule_2.ComputeCosts() << " with " +// << schedule_2.NumberOfSupersteps() << " number of supersteps" << std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule_2.SatisfiesPrecedenceConstraints(), true); // BspScheduleCS schedule_cs_2(schedule_2); // start_time = std::chrono::high_resolution_clock::now(); -// status = hc_comm_steps.improveSchedule(schedule_cs_2); +// status = hc_comm_steps.ImproveSchedule(schedule_cs_2); // finish_time = std::chrono::high_resolution_clock::now(); // duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "hc_comm_steps finished in " << duration << " seconds, costs: " << schedule_cs_2.computeCosts() -// << " with " << schedule_cs_2.numberOfSupersteps() << " number of supersteps" << std::endl; +// std::cout << "hc_comm_steps finished in " << duration << " seconds, costs: " << schedule_cs_2.ComputeCosts() +// << " with " << schedule_cs_2.NumberOfSupersteps() << " number of supersteps" << std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule_cs_2.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule_cs_2.SatisfiesPrecedenceConstraints(), true); // } // } diff --git a/tests/kl_lambda.cpp b/tests/kl_lambda.cpp index 8090fa1d..bb0fcfc3 100644 --- a/tests/kl_lambda.cpp +++ b/tests/kl_lambda.cpp @@ -23,85 +23,82 @@ limitations under the License. #include "osp/auxiliary/io/arch_file_reader.hpp" #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_test.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "test_graphs.hpp" using namespace osp; -template -void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; - int comm_weight = 7; +template +void AddMemWeights(GraphT &dag) { + int memWeight = 1; + int commWeight = 7; - for (const auto &v : dag.vertices()) { - dag.set_vertex_work_weight(v, static_cast>(mem_weight++ % 10 + 2)); - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 10 + 2)); - dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 10 + 2)); + for (const auto &v : dag.Vertices()) { + dag.SetVertexWorkWeight(v, static_cast>(memWeight++ % 10 + 2)); + dag.SetVertexMemWeight(v, static_cast>(memWeight++ % 10 + 2)); + dag.SetVertexCommWeight(v, static_cast>(commWeight++ % 10 + 2)); } } -template -void add_node_types(Graph_t &dag) { - unsigned node_type = 0; +template +void AddNodeTypes(GraphT &dag) { + unsigned nodeType = 0; - for (const auto &v : dag.vertices()) { - dag.set_vertex_type(v, node_type++ % 2); + for (const auto &v : dag.Vertices()) { + dag.SetVertexType(v, nodeType++ % 2); } } -template -void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::set &nodes) { +template +void CheckEqualAffinityTable(TableT &table1, TableT &table2, const std::set &nodes) { for (auto i : nodes) { - BOOST_CHECK_EQUAL(table_1[i].size(), table_2[i].size()); - if (table_1[i].size() != table_2[i].size()) { + BOOST_CHECK_EQUAL(table1[i].size(), table2[i].size()); + if (table1[i].size() != table2[i].size()) { continue; } - for (size_t j = 0; j < table_1[i].size(); ++j) { - BOOST_CHECK_EQUAL(table_1[i][j].size(), table_2[i][j].size()); - if (table_1[i][j].size() != table_2[i][j].size()) { + for (size_t j = 0; j < table1[i].size(); ++j) { + BOOST_CHECK_EQUAL(table1[i][j].size(), table2[i][j].size()); + if (table1[i][j].size() != table2[i][j].size()) { continue; } - for (size_t k = 0; k < table_1[i][j].size(); ++k) { - BOOST_CHECK(std::abs(table_1[i][j][k] - table_2[i][j][k]) < 0.000001); + for (size_t k = 0; k < table1[i][j].size(); ++k) { + BOOST_CHECK(std::abs(table1[i][j][k] - table2[i][j][k]) < 0.000001); - if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) { - std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] - << ", table_2=" << table_2[i][j][k] << std::endl; + if (std::abs(table1[i][j][k] - table2[i][j][k]) > 0.000001) { + std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table1[i][j][k] + << ", table_2=" << table2[i][j][k] << std::endl; } } } } } -void check_equal_lambda_map(const std::vector> &map_1, - const std::vector> &map_2) { - BOOST_CHECK_EQUAL(map_1.size(), map_2.size()); - if (map_1.size() != map_2.size()) { +void CheckEqualLambdaMap(const std::vector> &map1, + const std::vector> &map2) { + BOOST_CHECK_EQUAL(map1.size(), map2.size()); + if (map1.size() != map2.size()) { return; } - for (size_t i = 0; i < map_1.size(); ++i) { - for (const auto &[key, value] : map_1[i]) { - BOOST_CHECK_EQUAL(value, map_2[i].at(key)); + for (size_t i = 0; i < map1.size(); ++i) { + for (const auto &[key, value] : map1[i]) { + BOOST_CHECK_EQUAL(value, map2[i].at(key)); - if (value != map_2[i].at(key)) { - std::cout << "Mismatch at [" << i << "][" << key << "]: map_1=" << value << ", map_2=" << map_2[i].at(key) + if (value != map2[i].at(key)) { + std::cout << "Mismatch at [" << i << "][" << key << "]: map_1=" << value << ", map_2=" << map2[i].at(key) << std::endl; } } } } -BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) { - std::vector filenames_graph = test_graphs(); +BOOST_AUTO_TEST_CASE(KlLambdaImproverWithNodeTypesTest) { + std::vector filenamesGraph = TestGraphs(); - using graph = computational_dag_edge_idx_vector_impl_def_int_t; + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -111,54 +108,54 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) { std::cout << cwd << std::endl; } - GreedyBspScheduler test_scheduler; + GreedyBspScheduler testScheduler; - for (auto &filename_graph : filenames_graph) { - BspInstance instance; + for (auto &filenameGraph : filenamesGraph) { + BspInstance instance; - bool status_graph - = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph + = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), instance.GetComputationalDag()); - instance.getArchitecture().setSynchronisationCosts(5); - instance.getArchitecture().setCommunicationCosts(5); - instance.getArchitecture().setNumberOfProcessors(4); + instance.GetArchitecture().SetSynchronisationCosts(5); + instance.GetArchitecture().SetCommunicationCosts(5); + instance.GetArchitecture().SetNumberOfProcessors(4); - if (!status_graph) { + if (!statusGraph) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - std::cout << "Instance: " << filename_graph << std::endl; + std::cout << "Instance: " << filenameGraph << std::endl; - add_mem_weights(instance.getComputationalDag()); - add_node_types(instance.getComputationalDag()); + AddMemWeights(instance.GetComputationalDag()); + AddNodeTypes(instance.GetComputationalDag()); - instance.getArchitecture().setProcessorsWithTypes({0, 0, 1, 1}); + instance.GetArchitecture().SetProcessorsWithTypes({0, 0, 1, 1}); - instance.setDiagonalCompatibilityMatrix(2); + instance.SetDiagonalCompatibilityMatrix(2); - BspSchedule schedule(instance); - const auto result = test_scheduler.computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesNodeTypeConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesNodeTypeConstraints()); - kl_total_lambda_comm_improver kl; + KlTotalLambdaCommImprover kl; - auto status = kl.improveSchedule(schedule); + auto status = kl.ImproveSchedule(schedule); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesNodeTypeConstraints()); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesNodeTypeConstraints()); } } -BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) { - std::vector filenames_graph = test_graphs(); +BOOST_AUTO_TEST_CASE(KlLambdaImproverOnTestGraphs) { + std::vector filenamesGraph = TestGraphs(); - using graph = computational_dag_edge_idx_vector_impl_def_int_t; + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -168,72 +165,72 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) { std::cout << cwd << std::endl; } - GreedyBspScheduler test_scheduler; + GreedyBspScheduler testScheduler; - for (auto &filename_graph : filenames_graph) { - BspInstance instance; + for (auto &filenameGraph : filenamesGraph) { + BspInstance instance; - bool status_graph - = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph + = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), instance.GetComputationalDag()); - instance.getArchitecture().setSynchronisationCosts(5); - instance.getArchitecture().setCommunicationCosts(5); - instance.getArchitecture().setNumberOfProcessors(4); + instance.GetArchitecture().SetSynchronisationCosts(5); + instance.GetArchitecture().SetCommunicationCosts(5); + instance.GetArchitecture().SetNumberOfProcessors(4); - if (!status_graph) { + if (!statusGraph) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - std::cout << "Instance: " << filename_graph << std::endl; + std::cout << "Instance: " << filenameGraph << std::endl; - add_mem_weights(instance.getComputationalDag()); + AddMemWeights(instance.GetComputationalDag()); - BspSchedule schedule(instance); - const auto result = test_scheduler.computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - kl_total_lambda_comm_improver kl; + KlTotalLambdaCommImprover kl; - auto status = kl.improveSchedule(schedule); + auto status = kl.ImproveSchedule(schedule); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); } } // BOOST_AUTO_TEST_CASE(kl_lambda_improver_incremental_update_test) { -// using graph = computational_dag_edge_idx_vector_impl_def_int_t; -// using VertexType = graph::vertex_idx; -// using kl_move = kl_move_struct; +// using graph = ComputationalDagEdgeIdxVectorImplDefIntT; +// using VertexType = graph::VertexIdx; +// using KlMove = KlMoveStruct; // graph dag; -// const VertexType v1 = dag.add_vertex(2, 9, 2); -// const VertexType v2 = dag.add_vertex(3, 8, 4); -// const VertexType v3 = dag.add_vertex(4, 7, 3); -// const VertexType v4 = dag.add_vertex(5, 6, 2); -// const VertexType v5 = dag.add_vertex(6, 5, 6); -// const VertexType v6 = dag.add_vertex(7, 4, 2); -// const VertexType v7 = dag.add_vertex(8, 3, 4); -// const VertexType v8 = dag.add_vertex(9, 2, 1); - -// dag.add_edge(v1, v2, 2); -// dag.add_edge(v1, v3, 2); -// dag.add_edge(v1, v4, 2); -// dag.add_edge(v1, v5, 2); -// dag.add_edge(v1, v8, 2); -// dag.add_edge(v2, v5, 12); -// dag.add_edge(v2, v6, 12); -// dag.add_edge(v2, v8, 12); -// dag.add_edge(v3, v5, 6); -// dag.add_edge(v3, v6, 7); -// dag.add_edge(v5, v8, 9); -// dag.add_edge(v4, v8, 9); +// const VertexType v1 = dag.AddVertex(2, 9, 2); +// const VertexType v2 = dag.AddVertex(3, 8, 4); +// const VertexType v3 = dag.AddVertex(4, 7, 3); +// const VertexType v4 = dag.AddVertex(5, 6, 2); +// const VertexType v5 = dag.AddVertex(6, 5, 6); +// const VertexType v6 = dag.AddVertex(7, 4, 2); +// const VertexType v7 = dag.AddVertex(8, 3, 4); +// const VertexType v8 = dag.AddVertex(9, 2, 1); + +// dag.AddEdge(v1, v2, 2); +// dag.AddEdge(v1, v3, 2); +// dag.AddEdge(v1, v4, 2); +// dag.AddEdge(v1, v5, 2); +// dag.AddEdge(v1, v8, 2); +// dag.AddEdge(v2, v5, 12); +// dag.AddEdge(v2, v6, 12); +// dag.AddEdge(v2, v8, 12); +// dag.AddEdge(v3, v5, 6); +// dag.AddEdge(v3, v6, 7); +// dag.AddEdge(v5, v8, 9); +// dag.AddEdge(v4, v8, 9); // BspArchitecture arch; @@ -241,147 +238,147 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) { // BspSchedule schedule(instance); -// schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); -// schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); +// schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); +// schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); -// schedule.updateNumberOfSupersteps(); +// schedule.UpdateNumberOfSupersteps(); -// using cost_f = kl_hyper_total_comm_cost_function; -// using kl_improver_test = kl_improver_test; -// kl_improver_test kl; +// using cost_f = KlHyperTotalCommCostFunction; +// using KlImproverTest = KlImproverTest; +// KlImproverTest kl; -// kl.setup_schedule(schedule); +// kl.SetupSchedule(schedule); -// auto &kl_active_schedule = kl.get_active_schedule(); +// auto &kl_active_schedule = kl.GetActiveSchedule(); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 5.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 0.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepMaxWork(0), 5.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepSecondMaxWork(0), 0.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepMaxWork(1), 9.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepSecondMaxWork(1), 0.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepMaxWork(2), 7.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepSecondMaxWork(2), 6.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepMaxWork(3), 9.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepSecondMaxWork(3), 8.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4); -// BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true); +// BOOST_CHECK_EQUAL(kl_active_schedule.NumSteps(), 4); +// BOOST_CHECK_EQUAL(kl_active_schedule.IsFeasible(), true); -// auto node_selection = kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// auto node_selection = kl.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // std::set nodes_to_check = {0, 1, 2, 3, 4, 5, 6, 7}; -// auto& affinity = kl.get_affinity_table(); -// auto& lambda_map = kl.get_comm_cost_f().node_lambda_map; +// auto& affinity = kl.GetAffinityTable(); +// auto& lambda_map = kl.GetCommCostF().node_lambda_map; -// kl_move move_1(v7, 0.0, 0, 3, 0, 2); -// kl.update_affinity_table_test(move_1, node_selection); +// KlMove move_1(v7, 0.0, 0, 3, 0, 2); +// kl.UpdateAffinityTableTest(move_1, node_selection); // BspSchedule test_sched_1(instance); -// kl.get_active_schedule_test(test_sched_1); -// kl_improver_test kl_1; -// kl_1.setup_schedule(test_sched_1); -// kl_1.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_1); +// KlImproverTest kl_1; +// kl_1.SetupSchedule(test_sched_1); +// kl_1.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v7); -// check_equal_lambda_map(lambda_map, kl_1.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_1.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_1.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_1.GetAffinityTable(), nodes_to_check); -// kl_move move_2(v4, 0.0, 0, 1 , 0, 2); -// kl.update_affinity_table_test(move_2, node_selection); +// KlMove move_2(v4, 0.0, 0, 1 , 0, 2); +// kl.UpdateAffinityTableTest(move_2, node_selection); // BspSchedule test_sched_2(instance); -// kl.get_active_schedule_test(test_sched_2); -// kl_improver_test kl_2; -// kl_2.setup_schedule(test_sched_2); -// kl_2.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_2); +// KlImproverTest kl_2; +// kl_2.SetupSchedule(test_sched_2); +// kl_2.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v4); -// check_equal_lambda_map(lambda_map, kl_2.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_2.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_2.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_2.GetAffinityTable(), nodes_to_check); -// kl_move move_3(v2, 0.0, 1, 0 , 0, 0); -// kl.update_affinity_table_test(move_3, node_selection); +// KlMove move_3(v2, 0.0, 1, 0 , 0, 0); +// kl.UpdateAffinityTableTest(move_3, node_selection); // BspSchedule test_sched_3(instance); -// kl.get_active_schedule_test(test_sched_3); -// kl_improver_test kl_3; -// kl_3.setup_schedule(test_sched_3); -// kl_3.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_3); +// KlImproverTest kl_3; +// kl_3.SetupSchedule(test_sched_3); +// kl_3.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v2); -// check_equal_lambda_map(lambda_map, kl_3.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_3.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_3.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_3.GetAffinityTable(), nodes_to_check); -// kl_move move_4(v6, 0.0, 0, 2 , 1, 3); -// kl.update_affinity_table_test(move_4, node_selection); +// KlMove move_4(v6, 0.0, 0, 2 , 1, 3); +// kl.UpdateAffinityTableTest(move_4, node_selection); // BspSchedule test_sched_4(instance); -// kl.get_active_schedule_test(test_sched_4); -// kl_improver_test kl_4; -// kl_4.setup_schedule(test_sched_4); -// kl_4.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_4); +// KlImproverTest kl_4; +// kl_4.SetupSchedule(test_sched_4); +// kl_4.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v6); -// check_equal_lambda_map(lambda_map, kl_4.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_4.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_4.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_4.GetAffinityTable(), nodes_to_check); -// kl_move move_5(v8, 0.0, 1, 3 , 0, 2); -// kl.update_affinity_table_test(move_5, node_selection); +// KlMove move_5(v8, 0.0, 1, 3 , 0, 2); +// kl.UpdateAffinityTableTest(move_5, node_selection); // BspSchedule test_sched_5(instance); -// kl.get_active_schedule_test(test_sched_5); -// kl_improver_test kl_5; -// kl_5.setup_schedule(test_sched_5); -// kl_5.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_5); +// KlImproverTest kl_5; +// kl_5.SetupSchedule(test_sched_5); +// kl_5.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v8); -// check_equal_lambda_map(lambda_map, kl_5.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_5.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_5.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_5.GetAffinityTable(), nodes_to_check); -// kl_move move_6(v3, 0.0, 0, 1 , 1, 1); -// kl.update_affinity_table_test(move_6, node_selection); +// KlMove move_6(v3, 0.0, 0, 1 , 1, 1); +// kl.UpdateAffinityTableTest(move_6, node_selection); // BspSchedule test_sched_6(instance); -// kl.get_active_schedule_test(test_sched_6); -// kl_improver_test kl_6; -// kl_6.setup_schedule(test_sched_6); -// kl_6.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_6); +// KlImproverTest kl_6; +// kl_6.SetupSchedule(test_sched_6); +// kl_6.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v3); -// check_equal_lambda_map(lambda_map, kl_6.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_6.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_6.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_6.GetAffinityTable(), nodes_to_check); // }; // BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_test) { -// using graph = computational_dag_edge_idx_vector_impl_def_int_t; -// using VertexType = graph::vertex_idx; -// using kl_move = kl_move_struct; +// using graph = ComputationalDagEdgeIdxVectorImplDefIntT; +// using VertexType = graph::VertexIdx; +// using KlMove = KlMoveStruct; // graph dag; -// const VertexType v1 = dag.add_vertex(2, 9, 2); -// const VertexType v2 = dag.add_vertex(3, 8, 4); -// const VertexType v3 = dag.add_vertex(4, 7, 3); -// const VertexType v4 = dag.add_vertex(5, 6, 2); -// const VertexType v5 = dag.add_vertex(6, 5, 6); -// const VertexType v6 = dag.add_vertex(7, 4, 2); -// dag.add_vertex(8, 3, 4); -// const VertexType v8 = dag.add_vertex(9, 2, 1); - -// dag.add_edge(v1, v2, 2); -// dag.add_edge(v1, v3, 2); -// dag.add_edge(v1, v4, 2); -// dag.add_edge(v2, v5, 12); -// dag.add_edge(v3, v5, 6); -// dag.add_edge(v3, v6, 7); -// dag.add_edge(v5, v8, 9); -// dag.add_edge(v4, v8, 9); +// const VertexType v1 = dag.AddVertex(2, 9, 2); +// const VertexType v2 = dag.AddVertex(3, 8, 4); +// const VertexType v3 = dag.AddVertex(4, 7, 3); +// const VertexType v4 = dag.AddVertex(5, 6, 2); +// const VertexType v5 = dag.AddVertex(6, 5, 6); +// const VertexType v6 = dag.AddVertex(7, 4, 2); +// dag.AddVertex(8, 3, 4); +// const VertexType v8 = dag.AddVertex(9, 2, 1); + +// dag.AddEdge(v1, v2, 2); +// dag.AddEdge(v1, v3, 2); +// dag.AddEdge(v1, v4, 2); +// dag.AddEdge(v2, v5, 12); +// dag.AddEdge(v3, v5, 6); +// dag.AddEdge(v3, v6, 7); +// dag.AddEdge(v5, v8, 9); +// dag.AddEdge(v4, v8, 9); // BspArchitecture arch; @@ -389,304 +386,304 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) { // BspSchedule schedule(instance); -// schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); -// schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); +// schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); +// schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); -// schedule.updateNumberOfSupersteps(); +// schedule.UpdateNumberOfSupersteps(); -// using cost_f = kl_hyper_total_comm_cost_function; -// using kl_improver_test = kl_improver_test; -// kl_improver_test kl; +// using cost_f = KlHyperTotalCommCostFunction; +// using KlImproverTest = KlImproverTest; +// KlImproverTest kl; -// kl.setup_schedule(schedule); +// kl.SetupSchedule(schedule); -// auto &kl_active_schedule = kl.get_active_schedule(); +// auto &kl_active_schedule = kl.GetActiveSchedule(); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 5.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 0.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); -// BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepMaxWork(0), 5.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepSecondMaxWork(0), 0.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepMaxWork(1), 9.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepSecondMaxWork(1), 0.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepMaxWork(2), 7.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepSecondMaxWork(2), 6.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepMaxWork(3), 9.0); +// BOOST_CHECK_EQUAL(kl_active_schedule.workDatastructures_.StepSecondMaxWork(3), 8.0); -// auto node_selection = kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// auto node_selection = kl.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // std::set nodes_to_check = {0, 1, 2, 3, 4, 5, 6, 7}; -// auto& affinity = kl.get_affinity_table(); -// auto& lambda_map = kl.get_comm_cost_f().node_lambda_map; +// auto& affinity = kl.GetAffinityTable(); +// auto& lambda_map = kl.GetCommCostF().node_lambda_map; -// kl_move move_2(v4, 0.0, 0, 1 , 1, 2); -// kl.update_affinity_table_test(move_2, node_selection); +// KlMove move_2(v4, 0.0, 0, 1 , 1, 2); +// kl.UpdateAffinityTableTest(move_2, node_selection); // BspSchedule test_sched_2(instance); -// kl.get_active_schedule_test(test_sched_2); -// kl_improver_test kl_2; -// kl_2.setup_schedule(test_sched_2); -// kl_2.insert_gain_heap_test_penalty({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_2); +// KlImproverTest kl_2; +// kl_2.SetupSchedule(test_sched_2); +// kl_2.InsertGainHeapTestPenalty({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v4); -// check_equal_lambda_map(lambda_map, kl_2.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_2.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_2.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_2.GetAffinityTable(), nodes_to_check); -// kl_move move_3(v2, 0.0, 1, 0 , 0, 1); -// kl.update_affinity_table_test(move_3, node_selection); +// KlMove move_3(v2, 0.0, 1, 0 , 0, 1); +// kl.UpdateAffinityTableTest(move_3, node_selection); // BspSchedule test_sched_3(instance); -// kl.get_active_schedule_test(test_sched_3); -// kl_improver_test kl_3; -// kl_3.setup_schedule(test_sched_3); -// kl_3.insert_gain_heap_test_penalty({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_3); +// KlImproverTest kl_3; +// kl_3.SetupSchedule(test_sched_3); +// kl_3.InsertGainHeapTestPenalty({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v2); -// check_equal_lambda_map(lambda_map, kl_3.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_3.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_3.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_3.GetAffinityTable(), nodes_to_check); -// kl_move move_4(v6, 0.0, 0, 2 , 1, 3); -// kl.update_affinity_table_test(move_4, node_selection); +// KlMove move_4(v6, 0.0, 0, 2 , 1, 3); +// kl.UpdateAffinityTableTest(move_4, node_selection); // BspSchedule test_sched_4(instance); -// kl.get_active_schedule_test(test_sched_4); -// kl_improver_test kl_4; -// kl_4.setup_schedule(test_sched_4); -// kl_4.insert_gain_heap_test_penalty({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_4); +// KlImproverTest kl_4; +// kl_4.SetupSchedule(test_sched_4); +// kl_4.InsertGainHeapTestPenalty({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v6); -// check_equal_lambda_map(lambda_map, kl_4.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_4.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_4.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_4.GetAffinityTable(), nodes_to_check); -// kl_move move_5(v8, 0.0, 1, 3 , 0, 3); -// kl.update_affinity_table_test(move_5, node_selection); +// KlMove move_5(v8, 0.0, 1, 3 , 0, 3); +// kl.UpdateAffinityTableTest(move_5, node_selection); // BspSchedule test_sched_5(instance); -// kl.get_active_schedule_test(test_sched_5); -// kl_improver_test kl_5; -// kl_5.setup_schedule(test_sched_5); -// kl_5.insert_gain_heap_test_penalty({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_5); +// KlImproverTest kl_5; +// kl_5.SetupSchedule(test_sched_5); +// kl_5.InsertGainHeapTestPenalty({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v8); -// check_equal_lambda_map(lambda_map, kl_5.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_5.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_5.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_5.GetAffinityTable(), nodes_to_check); -// kl_move move_6(v3, 0.0, 0, 1 , 1, 1); -// kl.update_affinity_table_test(move_6, node_selection); +// KlMove move_6(v3, 0.0, 0, 1 , 1, 1); +// kl.UpdateAffinityTableTest(move_6, node_selection); // BspSchedule test_sched_6(instance); -// kl.get_active_schedule_test(test_sched_6); -// kl_improver_test kl_6; -// kl_6.setup_schedule(test_sched_6); -// kl_6.insert_gain_heap_test_penalty({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_6); +// KlImproverTest kl_6; +// kl_6.SetupSchedule(test_sched_6); +// kl_6.InsertGainHeapTestPenalty({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v3); -// check_equal_lambda_map(lambda_map, kl_6.get_comm_cost_f().node_lambda_map); -// check_equal_affinity_table(affinity, kl_6.get_affinity_table(), nodes_to_check); +// CheckEqualLambdaMap(lambda_map, kl_6.GetCommCostF().node_lambda_map); +// CheckEqualAffinityTable(affinity, kl_6.GetAffinityTable(), nodes_to_check); // }; -BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlLambdaImproverInnerLoopPenaltyTest) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using cost_f = kl_hyper_total_comm_cost_function; - using kl_improver_test = kl_improver_test; - kl_improver_test kl; + using CostF = KlHyperTotalCommCostFunction; + using KlImproverTest = KlImproverTest; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - auto &kl_active_schedule = kl.get_active_schedule(); + auto &klActiveSchedule = kl.GetActiveSchedule(); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 5.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 5.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(1), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(1), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(2), 7.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(2), 6.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(3), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(3), 8.0); - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4); - BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true); + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 4); + BOOST_CHECK_EQUAL(klActiveSchedule.IsFeasible(), true); - auto node_selection = kl.insert_gain_heap_test_penalty({2, 3}); + auto nodeSelection = kl.InsertGainHeapTestPenalty({2, 3}); - auto recompute_max_gain = kl.run_inner_iteration_test(); // best move 3 + auto recomputeMaxGain = kl.RunInnerIterationTest(); // best move 3 std::cout << "------------------------recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); // best move 0 + recomputeMaxGain = kl.RunInnerIterationTest(); // best move 0 std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); // best move 1 + recomputeMaxGain = kl.RunInnerIterationTest(); // best move 1 std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); } -BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlLambdaImproverInnerLoopLambdaMapTest) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - const VertexType v7 = dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + const VertexType v7 = dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using cost_f = kl_hyper_total_comm_cost_function; - using kl_improver_test = kl_improver_test; - kl_improver_test kl; + using CostF = KlHyperTotalCommCostFunction; + using KlImproverTest = KlImproverTest; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - auto node_selection = kl.insert_gain_heap_test_penalty({7}); + auto nodeSelection = kl.InsertGainHeapTestPenalty({7}); - auto recompute_max_gain = kl.run_inner_iteration_test(); + auto recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "-----------recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); - - auto &lambda_map = kl.get_comm_cost_f().node_lambda_map; - - BOOST_CHECK(lambda_map.get_proc_entry(v1, 0) == 2); - BOOST_CHECK(lambda_map.get_proc_entry(v1, 1) == 1); - BOOST_CHECK(lambda_map.has_no_proc_entry(v2, 0)); - BOOST_CHECK(lambda_map.get_proc_entry(v2, 1) == 1); - BOOST_CHECK(lambda_map.get_proc_entry(v3, 0) == 1); - BOOST_CHECK(lambda_map.get_proc_entry(v3, 1) == 1); - BOOST_CHECK(lambda_map.has_no_proc_entry(v4, 0)); - BOOST_CHECK(lambda_map.get_proc_entry(v4, 1) == 1); - BOOST_CHECK(lambda_map.has_no_proc_entry(v5, 0)); - BOOST_CHECK(lambda_map.get_proc_entry(v5, 1) == 1); - BOOST_CHECK(lambda_map.has_no_proc_entry(v6, 0)); - BOOST_CHECK(lambda_map.has_no_proc_entry(v6, 0)); - BOOST_CHECK(lambda_map.has_no_proc_entry(v7, 0)); - BOOST_CHECK(lambda_map.has_no_proc_entry(v7, 0)); - BOOST_CHECK(lambda_map.has_no_proc_entry(v8, 0)); - BOOST_CHECK(lambda_map.has_no_proc_entry(v8, 0)); - - recompute_max_gain = kl.run_inner_iteration_test(); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); + + auto &lambdaMap = kl.GetCommCostF().nodeLambdaMap_; + + BOOST_CHECK(lambdaMap.GetProcEntry(v1, 0) == 2); + BOOST_CHECK(lambdaMap.GetProcEntry(v1, 1) == 1); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v2, 0)); + BOOST_CHECK(lambdaMap.GetProcEntry(v2, 1) == 1); + BOOST_CHECK(lambdaMap.GetProcEntry(v3, 0) == 1); + BOOST_CHECK(lambdaMap.GetProcEntry(v3, 1) == 1); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v4, 0)); + BOOST_CHECK(lambdaMap.GetProcEntry(v4, 1) == 1); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v5, 0)); + BOOST_CHECK(lambdaMap.GetProcEntry(v5, 1) == 1); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v6, 0)); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v6, 0)); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v7, 0)); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v7, 0)); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v8, 0)); + BOOST_CHECK(lambdaMap.HasNoProcEntry(v8, 0)); + + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); } // BOOST_AUTO_TEST_CASE(kl_lambda_total_comm_large_test_graphs) { // std::vector filenames_graph = large_spaa_graphs(); -// using graph = computational_dag_edge_idx_vector_impl_def_int_t; +// using graph = ComputationalDagEdgeIdxVectorImplDefIntT; // // Getting root git directory // std::filesystem::path cwd = std::filesystem::current_path(); // std::cout << cwd << std::endl; @@ -696,21 +693,21 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // } // for (auto &filename_graph : filenames_graph) { -// GreedyBspScheduler test_scheduler; +// GreedyBspScheduler test_scheduler; // BspInstance instance; -// bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), -// instance.getComputationalDag()); +// bool status_graph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), +// instance.GetComputationalDag()); -// instance.getArchitecture().setSynchronisationCosts(500); -// instance.getArchitecture().setCommunicationCosts(5); -// instance.getArchitecture().setNumberOfProcessors(4); +// instance.GetArchitecture().SetSynchronisationCosts(500); +// instance.GetArchitecture().SetCommunicationCosts(5); +// instance.GetArchitecture().SetNumberOfProcessors(4); // std::vector> send_cost = {{0,1,4,4}, // {1,0,4,4}, // {4,4,0,1}, // {4,4,1,0}}; -// instance.getArchitecture().SetSendCosts(send_cost); +// instance.GetArchitecture().SetSendCosts(send_cost); // if (!status_graph) { @@ -718,33 +715,33 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // BOOST_CHECK(false); // } -// add_mem_weights(instance.getComputationalDag()); +// add_mem_weights(instance.GetComputationalDag()); // BspSchedule schedule(instance); -// const auto result = test_scheduler.computeSchedule(schedule); +// const auto result = test_scheduler.ComputeSchedule(schedule); -// schedule.updateNumberOfSupersteps(); +// schedule.UpdateNumberOfSupersteps(); // std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " << -// schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// schedule.NumberOfSupersteps() << " number of supersteps"<< std::endl; // BspSchedule schedule_2(schedule); -// BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); -// BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); -// BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); +// BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); +// BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); +// BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); -// kl_total_lambda_comm_improver kl; +// KlTotalLambdaCommImprover kl; // auto start_time = std::chrono::high_resolution_clock::now(); -// auto status = kl.improveSchedule(schedule); +// auto status = kl.ImproveSchedule(schedule); // auto finish_time = std::chrono::high_resolution_clock::now(); // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); // std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " -// << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// << schedule.NumberOfSupersteps() << " number of supersteps"<< std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); // // kl_total_comm_test kl_old; @@ -755,17 +752,17 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // // duration = std::chrono::duration_cast(finish_time - start_time).count(); // // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " -// << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; +// << schedule_2.NumberOfSupersteps() << " number of supersteps"<< std::endl; -// // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); +// // BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// // BOOST_CHECK_EQUAL(schedule_2.SatisfiesPrecedenceConstraints(), true); // } // } // BOOST_AUTO_TEST_CASE(kl_lambda_total_comm_large_test_graphs_mt) { // std::vector filenames_graph = large_spaa_graphs(); -// using graph = computational_dag_edge_idx_vector_impl_def_int_t; +// using graph = ComputationalDagEdgeIdxVectorImplDefIntT; // // Getting root git directory // std::filesystem::path cwd = std::filesystem::current_path(); // std::cout << cwd << std::endl; @@ -775,21 +772,21 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // } // for (auto &filename_graph : filenames_graph) { -// GreedyBspScheduler test_scheduler; +// GreedyBspScheduler test_scheduler; // BspInstance instance; -// bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), -// instance.getComputationalDag()); +// bool status_graph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), +// instance.GetComputationalDag()); -// instance.getArchitecture().setSynchronisationCosts(500); -// instance.getArchitecture().setCommunicationCosts(5); -// instance.getArchitecture().setNumberOfProcessors(4); +// instance.GetArchitecture().SetSynchronisationCosts(500); +// instance.GetArchitecture().SetCommunicationCosts(5); +// instance.GetArchitecture().SetNumberOfProcessors(4); // std::vector> send_cost = {{0,1,4,4}, // {1,0,4,4}, // {4,4,0,1}, // {4,4,1,0}}; -// instance.getArchitecture().SetSendCosts(send_cost); +// instance.GetArchitecture().SetSendCosts(send_cost); // if (!status_graph) { @@ -797,33 +794,33 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // BOOST_CHECK(false); // } -// add_mem_weights(instance.getComputationalDag()); +// add_mem_weights(instance.GetComputationalDag()); // BspSchedule schedule(instance); -// const auto result = test_scheduler.computeSchedule(schedule); +// const auto result = test_scheduler.ComputeSchedule(schedule); -// schedule.updateNumberOfSupersteps(); +// schedule.UpdateNumberOfSupersteps(); // std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " << -// schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// schedule.NumberOfSupersteps() << " number of supersteps"<< std::endl; // BspSchedule schedule_2(schedule); -// BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); -// BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); -// BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); +// BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); +// BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); +// BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); -// kl_total_lambda_comm_improver_mt kl; +// KlTotalLambdaCommImprover_mt kl; // auto start_time = std::chrono::high_resolution_clock::now(); -// auto status = kl.improveSchedule(schedule); +// auto status = kl.ImproveSchedule(schedule); // auto finish_time = std::chrono::high_resolution_clock::now(); // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); // std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " -// << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// << schedule.NumberOfSupersteps() << " number of supersteps"<< std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); // // kl_total_comm_test kl_old; @@ -834,10 +831,10 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // // duration = std::chrono::duration_cast(finish_time - start_time).count(); // // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " -// << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; +// << schedule_2.NumberOfSupersteps() << " number of supersteps"<< std::endl; -// // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); +// // BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// // BOOST_CHECK_EQUAL(schedule_2.SatisfiesPrecedenceConstraints(), true); // } // } diff --git a/tests/kl_mem_constr.cpp b/tests/kl_mem_constr.cpp index 1e2de362..ad3dd2f4 100644 --- a/tests/kl_mem_constr.cpp +++ b/tests/kl_mem_constr.cpp @@ -23,30 +23,27 @@ limitations under the License. #include "osp/auxiliary/io/arch_file_reader.hpp" #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "test_graphs.hpp" using namespace osp; -template -void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; - int comm_weight = 1; +template +void AddMemWeights(GraphT &dag) { + int memWeight = 1; + int commWeight = 1; - for (const auto &v : dag.vertices()) { - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 3 + 1)); - dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 3 + 1)); + for (const auto &v : dag.Vertices()) { + dag.SetVertexMemWeight(v, static_cast>(memWeight++ % 3 + 1)); + dag.SetVertexCommWeight(v, static_cast>(commWeight++ % 3 + 1)); } } -BOOST_AUTO_TEST_CASE(kl_local_memconst) { - std::vector filenames_graph = test_graphs(); +BOOST_AUTO_TEST_CASE(KlLocalMemconst) { + std::vector filenamesGraph = TestGraphs(); - using graph = computational_dag_edge_idx_vector_impl_def_int_t; + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -56,46 +53,46 @@ BOOST_AUTO_TEST_CASE(kl_local_memconst) { std::cout << cwd << std::endl; } - GreedyBspScheduler> test_scheduler; + GreedyBspScheduler> testScheduler; - for (auto &filename_graph : filenames_graph) { - std::cout << filename_graph << std::endl; - BspInstance instance; + for (auto &filenameGraph : filenamesGraph) { + std::cout << filenameGraph << std::endl; + BspInstance instance; - bool status_graph - = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); - instance.getArchitecture().setSynchronisationCosts(10); - instance.getArchitecture().setCommunicationCosts(5); - instance.getArchitecture().setNumberOfProcessors(4); - instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL); - instance.getArchitecture().setSynchronisationCosts(0); + bool statusGraph + = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), instance.GetComputationalDag()); + instance.GetArchitecture().SetSynchronisationCosts(10); + instance.GetArchitecture().SetCommunicationCosts(5); + instance.GetArchitecture().SetNumberOfProcessors(4); + instance.GetArchitecture().SetMemoryConstraintType(MemoryConstraintType::LOCAL); + instance.GetArchitecture().SetSynchronisationCosts(0); - const std::vector bounds_to_test = {10, 20}; + const std::vector boundsToTest = {10, 20}; - add_mem_weights(instance.getComputationalDag()); + AddMemWeights(instance.GetComputationalDag()); - if (!status_graph) { + if (!statusGraph) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); + for (const auto &bound : boundsToTest) { + instance.GetArchitecture().SetMemoryBound(bound); - BspSchedule schedule(instance); - const auto result = test_scheduler.computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesMemoryConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesMemoryConstraints()); - kl_total_comm_improver_local_mem_constr kl; + KlTotalCommImproverLocalMemConstr kl; - auto status = kl.improveSchedule(schedule); + auto status = kl.ImproveSchedule(schedule); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK(schedule.satisfiesMemoryConstraints()); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesMemoryConstraints()); } } } diff --git a/tests/kl_total.cpp b/tests/kl_total.cpp index 21b565c0..28a441cd 100644 --- a/tests/kl_total.cpp +++ b/tests/kl_total.cpp @@ -23,92 +23,92 @@ limitations under the License. #include "osp/auxiliary/io/arch_file_reader.hpp" #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_improver_test.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_include_mt.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "test_graphs.hpp" using namespace osp; -template -void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; - int comm_weight = 7; +template +void AddMemWeights(GraphT &dag) { + int memWeight = 1; + int commWeight = 7; - for (const auto &v : dag.vertices()) { - dag.set_vertex_work_weight(v, static_cast>(mem_weight++ % 10 + 2)); - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 10 + 2)); - dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 10 + 2)); + for (const auto &v : dag.Vertices()) { + dag.SetVertexWorkWeight(v, static_cast>(memWeight++ % 10 + 2)); + dag.SetVertexMemWeight(v, static_cast>(memWeight++ % 10 + 2)); + dag.SetVertexCommWeight(v, static_cast>(commWeight++ % 10 + 2)); } } -template -void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::set &nodes) { - BOOST_CHECK_EQUAL(table_1.size(), table_2.size()); +template +void CheckEqualAffinityTable(TableT &table1, TableT &table2, const std::set &nodes) { + BOOST_CHECK_EQUAL(table1.size(), table2.size()); for (auto i : nodes) { - for (size_t j = 0; j < table_1[i].size(); ++j) { - for (size_t k = 0; k < table_1[i][j].size(); ++k) { - BOOST_CHECK(std::abs(table_1[i][j][k] - table_2[i][j][k]) < 0.000001); + for (size_t j = 0; j < table1[i].size(); ++j) { + for (size_t k = 0; k < table1[i][j].size(); ++k) { + BOOST_CHECK(std::abs(table1[i][j][k] - table2[i][j][k]) < 0.000001); - if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) { - std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] - << ", table_2=" << table_2[i][j][k] << std::endl; + if (std::abs(table1[i][j][k] - table2[i][j][k]) > 0.000001) { + std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table1[i][j][k] + << ", table_2=" << table2[i][j][k] << std::endl; } } } } } -BOOST_AUTO_TEST_CASE(kl_improver_smoke_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlImproverSmokeTest) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using kl_improver_t = kl_total_comm_improver; - kl_improver_t kl; + using KlImproverT = KlTotalCommImprover; + KlImproverT kl; - auto status = kl.improveSchedule(schedule); + auto status = kl.ImproveSchedule(schedule); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); } -BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) { - std::vector filenames_graph = test_graphs(); +BOOST_AUTO_TEST_CASE(KlImproverOnTestGraphs) { + std::vector filenamesGraph = TestGraphs(); - using graph = computational_dag_edge_idx_vector_impl_def_int_t; + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -118,146 +118,146 @@ BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) { std::cout << cwd << std::endl; } - GreedyBspScheduler test_scheduler; + GreedyBspScheduler testScheduler; - for (auto &filename_graph : filenames_graph) { - BspInstance instance; + for (auto &filenameGraph : filenamesGraph) { + BspInstance instance; - bool status_graph - = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); + bool statusGraph + = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), instance.GetComputationalDag()); - instance.getArchitecture().setSynchronisationCosts(5); - instance.getArchitecture().setCommunicationCosts(5); - instance.getArchitecture().setNumberOfProcessors(4); + instance.GetArchitecture().SetSynchronisationCosts(5); + instance.GetArchitecture().SetCommunicationCosts(5); + instance.GetArchitecture().SetNumberOfProcessors(4); - if (!status_graph) { + if (!statusGraph) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - add_mem_weights(instance.getComputationalDag()); + AddMemWeights(instance.GetComputationalDag()); - BspSchedule schedule(instance); - const auto result = test_scheduler.computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - kl_total_comm_improver kl; + KlTotalCommImprover kl; - auto status = kl.improveSchedule(schedule); + auto status = kl.ImproveSchedule(schedule); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); } } -BOOST_AUTO_TEST_CASE(kl_improver_superstep_removal_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlImproverSuperstepRemovalTest) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(1, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(1, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v2, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v2, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - instance.getArchitecture().setSynchronisationCosts(50); + instance.GetArchitecture().SetSynchronisationCosts(50); // Create a schedule with an almost empty superstep (step 1) - schedule.setAssignedProcessors({0, 0, 0, 0, 1, 1, 1, 1}); - schedule.setAssignedSupersteps({0, 0, 0, 0, 1, 2, 2, 2}); + schedule.SetAssignedProcessors({0, 0, 0, 0, 1, 1, 1, 1}); + schedule.SetAssignedSupersteps({0, 0, 0, 0, 1, 2, 2, 2}); - schedule.updateNumberOfSupersteps(); - unsigned original_steps = schedule.numberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); + unsigned originalSteps = schedule.NumberOfSupersteps(); - using cost_f = kl_total_comm_cost_function; - kl_improver kl; + using CostF = KlTotalCommCostFunction; + KlImprover kl; - auto status = kl.improveSchedule(schedule); + auto status = kl.ImproveSchedule(schedule); - BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); - BOOST_CHECK_LT(schedule.numberOfSupersteps(), original_steps); + BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); + BOOST_CHECK_LT(schedule.NumberOfSupersteps(), originalSteps); } -BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlImproverInnerLoopTest) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using cost_f = kl_total_comm_cost_function; - using kl_improver_test = kl_improver_test; - kl_improver_test kl; + using CostF = KlTotalCommCostFunction; + using KlImproverTest = KlImproverTest; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - auto &kl_active_schedule = kl.get_active_schedule(); + auto &klActiveSchedule = kl.GetActiveSchedule(); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 5.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 5.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(1), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(1), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(2), 7.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(2), 6.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(3), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(3), 8.0); - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4); - BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true); + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 4); + BOOST_CHECK_EQUAL(klActiveSchedule.IsFeasible(), true); - auto node_selection = kl.insert_gain_heap_test_penalty({2, 3}); + auto nodeSelection = kl.InsertGainHeapTestPenalty({2, 3}); - auto &affinity = kl.get_affinity_table(); + auto &affinity = kl.GetAffinityTable(); BOOST_CHECK_CLOSE(affinity[v3][0][0], 5.5, 0.00001); BOOST_CHECK_CLOSE(affinity[v3][0][1], 4.0, 0.00001); @@ -273,251 +273,251 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { BOOST_CHECK_CLOSE(affinity[v4][1][1], -6.5, 0.00001); BOOST_CHECK_CLOSE(affinity[v4][1][2], -3.5, 0.00001); - auto recompute_max_gain = kl.run_inner_iteration_test(); + auto recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "------------------------recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); } -BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_penalty_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlImproverInnerLoopPenaltyTest) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using cost_f = kl_total_comm_cost_function; - using kl_improver_test = kl_improver_test; - kl_improver_test kl; + using CostF = KlTotalCommCostFunction; + using KlImproverTest = KlImproverTest; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - // auto &kl_active_schedule = kl.get_active_schedule(); + // auto &kl_active_schedule = kl.GetActiveSchedule(); - BOOST_CHECK_CLOSE(51.5, kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(51.5, kl.GetCurrentCost(), 0.00001); - auto node_selection = kl.insert_gain_heap_test_penalty({7}); + auto nodeSelection = kl.InsertGainHeapTestPenalty({7}); - auto recompute_max_gain = kl.run_inner_iteration_test(); + auto recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "-----------recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); + recomputeMaxGain = kl.RunInnerIterationTest(); std::cout << "recompute max_gain: { "; - for (const auto &[key, value] : recompute_max_gain) { + for (const auto &[key, value] : recomputeMaxGain) { std::cout << key << " "; } std::cout << "}" << std::endl; - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCostTest(), kl.GetCurrentCost(), 0.00001); } -BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlImproverViolationHandlingTest) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1, 0, 0, 1, 0, 0, 1}); // v1->v2 is on same step, different procs - schedule.setAssignedSupersteps({0, 0, 2, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({0, 1, 0, 0, 1, 0, 0, 1}); // v1->v2 is on same step, different procs + schedule.SetAssignedSupersteps({0, 0, 2, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using cost_f = kl_total_comm_cost_function; - kl_improver_test kl; + using CostF = KlTotalCommCostFunction; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - kl.compute_violations_test(); + kl.ComputeViolationsTest(); - BOOST_CHECK_EQUAL(kl.is_feasible(), false); + BOOST_CHECK_EQUAL(kl.IsFeasible(), false); - kl_improver kl_improver; - kl_improver.improveSchedule(schedule); + KlImprover klImprover; + klImprover.ImproveSchedule(schedule); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); } -BOOST_AUTO_TEST_CASE(kl_base_1) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlBase1) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - const VertexType v7 = dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + const VertexType v7 = dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 0, 0, 0, 0, 0, 0, 0}); - schedule.setAssignedSupersteps({0, 0, 0, 0, 0, 0, 0, 0}); + schedule.SetAssignedProcessors({0, 0, 0, 0, 0, 0, 0, 0}); + schedule.SetAssignedSupersteps({0, 0, 0, 0, 0, 0, 0, 0}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using cost_f = kl_total_comm_cost_function; - kl_improver_test kl; + using CostF = KlTotalCommCostFunction; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - auto &kl_active_schedule = kl.get_active_schedule(); + auto &klActiveSchedule = kl.GetActiveSchedule(); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 44.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 1); - BOOST_CHECK_CLOSE(kl.get_current_cost(), 44.0, 0.00001); - BOOST_CHECK_EQUAL(kl.is_feasible(), true); - BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost(), 44.0, 0.00001); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 44.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 1); + BOOST_CHECK_CLOSE(kl.GetCurrentCost(), 44.0, 0.00001); + BOOST_CHECK_EQUAL(kl.IsFeasible(), true); + BOOST_CHECK_CLOSE(kl.GetCommCostF().ComputeScheduleCost(), 44.0, 0.00001); - using kl_move = kl_move_struct; + using KlMove = KlMoveStruct; - kl_move move_1(v1, 2.0 - 13.5, 0, 0, 1, 0); + KlMove move1(v1, 2.0 - 13.5, 0, 0, 1, 0); - kl.apply_move_test(move_1); + kl.ApplyMoveTest(move1); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 42.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 2.0); - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 1); - BOOST_CHECK_EQUAL(kl.is_feasible(), false); - BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 42.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 2.0); + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 1); + BOOST_CHECK_EQUAL(kl.IsFeasible(), false); + BOOST_CHECK_CLOSE(kl.GetCurrentCost(), kl.GetCommCostF().ComputeScheduleCost(), 0.00001); - kl_move move_2(v2, 3.0 + 4.5 - 4.0, 0, 0, 1, 0); + KlMove move2(v2, 3.0 + 4.5 - 4.0, 0, 0, 1, 0); - kl.apply_move_test(move_2); + kl.ApplyMoveTest(move2); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 39.0); // 42-3 - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 5.0); // 2+3 - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 1); - BOOST_CHECK_EQUAL(kl.is_feasible(), false); - BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 39.0); // 42-3 + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 5.0); // 2+3 + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 1); + BOOST_CHECK_EQUAL(kl.IsFeasible(), false); + BOOST_CHECK_CLOSE(kl.GetCurrentCost(), kl.GetCommCostF().ComputeScheduleCost(), 0.00001); - kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); + kl.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); - auto &affinity = kl.get_affinity_table(); + auto &affinity = kl.GetAffinityTable(); BOOST_CHECK_CLOSE(affinity[v1][0][1], 2.0 - 4.5, 0.00001); BOOST_CHECK_CLOSE(affinity[v1][1][1], 0.0, 0.00001); @@ -525,126 +525,126 @@ BOOST_AUTO_TEST_CASE(kl_base_1) { BOOST_CHECK_CLOSE(affinity[v3][0][1], 4.0, 0.00001); - kl_move move_3(v7, 7.0, 0, 0, 1, 0); - kl.apply_move_test(move_3); - BOOST_CHECK_EQUAL(kl.is_feasible(), false); + KlMove move3(v7, 7.0, 0, 0, 1, 0); + kl.ApplyMoveTest(move3); + BOOST_CHECK_EQUAL(kl.IsFeasible(), false); - kl_move move_4(v2, 7.0, 1, 0, 0, 0); - kl.apply_move_test(move_4); - BOOST_CHECK_EQUAL(kl.is_feasible(), false); + KlMove move4(v2, 7.0, 1, 0, 0, 0); + kl.ApplyMoveTest(move4); + BOOST_CHECK_EQUAL(kl.IsFeasible(), false); - kl_move move_5(v1, 7.0, 1, 0, 0, 0); - kl.apply_move_test(move_5); - BOOST_CHECK_EQUAL(kl.is_feasible(), true); + KlMove move5(v1, 7.0, 1, 0, 0, 0); + kl.ApplyMoveTest(move5); + BOOST_CHECK_EQUAL(kl.IsFeasible(), true); } -BOOST_AUTO_TEST_CASE(kl_base_2) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlBase2) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - const VertexType v7 = dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + const VertexType v7 = dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 1, 1, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({0, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 1, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using cost_f = kl_total_comm_cost_function; - kl_improver_test kl; + using CostF = KlTotalCommCostFunction; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - auto &kl_active_schedule = kl.get_active_schedule(); + auto &klActiveSchedule = kl.GetActiveSchedule(); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 2.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 3.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 2.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(1), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(1), 3.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(2), 7.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(2), 6.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(3), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(3), 8.0); - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4); - BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001); - BOOST_CHECK_EQUAL(kl.is_feasible(), true); + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 4); + BOOST_CHECK_CLOSE(kl.GetCurrentCost(), kl.GetCommCostF().ComputeScheduleCost(), 0.00001); + BOOST_CHECK_EQUAL(kl.IsFeasible(), true); - using kl_move = kl_move_struct; + using KlMove = KlMoveStruct; - kl_move move_1(v1, 0.0 - 4.5, 0, 0, 1, 0); + KlMove move1(v1, 0.0 - 4.5, 0, 0, 1, 0); - kl.apply_move_test(move_1); + kl.ApplyMoveTest(move1); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 2.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 3.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); - BOOST_CHECK_EQUAL(kl.is_feasible(), true); - BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 2.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(1), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(1), 3.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(2), 7.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(2), 6.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(3), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(3), 8.0); + BOOST_CHECK_EQUAL(kl.IsFeasible(), true); + BOOST_CHECK_CLOSE(kl.GetCurrentCost(), kl.GetCommCostF().ComputeScheduleCost(), 0.00001); - kl_move move_2(v2, -1.0 - 8.5, 1, 1, 0, 0); + KlMove move2(v2, -1.0 - 8.5, 1, 1, 0, 0); - kl.apply_move_test(move_2); + kl.ApplyMoveTest(move2); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 3.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 2.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4); - BOOST_CHECK_EQUAL(kl.is_feasible(), false); - BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 3.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 2.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(1), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(1), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(2), 7.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(2), 6.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(3), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(3), 8.0); + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 4); + BOOST_CHECK_EQUAL(kl.IsFeasible(), false); + BOOST_CHECK_CLOSE(kl.GetCurrentCost(), kl.GetCommCostF().ComputeScheduleCost(), 0.00001); - kl_move move_x(v2, -2.0 + 8.5, 0, 0, 1, 0); + KlMove moveX(v2, -2.0 + 8.5, 0, 0, 1, 0); - kl.apply_move_test(move_x); + kl.ApplyMoveTest(moveX); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 5.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4); - BOOST_CHECK_EQUAL(kl.is_feasible(), true); - BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 5.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(1), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(1), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(2), 7.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(2), 6.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(3), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(3), 8.0); + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 4); + BOOST_CHECK_EQUAL(kl.IsFeasible(), true); + BOOST_CHECK_CLOSE(kl.GetCurrentCost(), kl.GetCommCostF().ComputeScheduleCost(), 0.00001); - kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); + kl.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); - auto &affinity = kl.get_affinity_table(); + auto &affinity = kl.GetAffinityTable(); BOOST_CHECK_CLOSE(affinity[v1][0][1], -4.5, 0.00001); BOOST_CHECK_CLOSE(affinity[v1][0][2], -2.5, 0.00001); @@ -699,63 +699,63 @@ BOOST_AUTO_TEST_CASE(kl_base_2) { BOOST_CHECK_CLOSE(affinity[v8][1][1], 1.0, 0.00001); } -BOOST_AUTO_TEST_CASE(kl_base_3) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; - using VertexType = graph::vertex_idx; +BOOST_AUTO_TEST_CASE(KlBase3) { + using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; + using VertexType = Graph::VertexIdx; - graph dag; + Graph dag; - const VertexType v1 = dag.add_vertex(2, 9, 2); - const VertexType v2 = dag.add_vertex(3, 8, 4); - const VertexType v3 = dag.add_vertex(4, 7, 3); - const VertexType v4 = dag.add_vertex(5, 6, 2); - const VertexType v5 = dag.add_vertex(6, 5, 6); - const VertexType v6 = dag.add_vertex(7, 4, 2); - const VertexType v7 = dag.add_vertex(8, 3, 4); - const VertexType v8 = dag.add_vertex(9, 2, 1); + const VertexType v1 = dag.AddVertex(2, 9, 2); + const VertexType v2 = dag.AddVertex(3, 8, 4); + const VertexType v3 = dag.AddVertex(4, 7, 3); + const VertexType v4 = dag.AddVertex(5, 6, 2); + const VertexType v5 = dag.AddVertex(6, 5, 6); + const VertexType v6 = dag.AddVertex(7, 4, 2); + const VertexType v7 = dag.AddVertex(8, 3, 4); + const VertexType v8 = dag.AddVertex(9, 2, 1); - dag.add_edge(v1, v2, 2); - dag.add_edge(v1, v3, 2); - dag.add_edge(v1, v4, 2); - dag.add_edge(v2, v5, 12); - dag.add_edge(v3, v5, 6); - dag.add_edge(v3, v6, 7); - dag.add_edge(v5, v8, 9); - dag.add_edge(v4, v8, 9); + dag.AddEdge(v1, v2, 2); + dag.AddEdge(v1, v3, 2); + dag.AddEdge(v1, v4, 2); + dag.AddEdge(v2, v5, 12); + dag.AddEdge(v3, v5, 6); + dag.AddEdge(v3, v6, 7); + dag.AddEdge(v5, v8, 9); + dag.AddEdge(v4, v8, 9); - BspArchitecture arch; + BspArchitecture arch; - BspInstance instance(dag, arch); + BspInstance instance(dag, arch); BspSchedule schedule(instance); - schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); - schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); + schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); + schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); - schedule.updateNumberOfSupersteps(); + schedule.UpdateNumberOfSupersteps(); - using cost_f = kl_total_comm_cost_function; - kl_improver_test kl; + using CostF = KlTotalCommCostFunction; + KlImproverTest kl; - kl.setup_schedule(schedule); + kl.SetupSchedule(schedule); - auto &kl_active_schedule = kl.get_active_schedule(); + auto &klActiveSchedule = kl.GetActiveSchedule(); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 5.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(1), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(1), 0.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(2), 7.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(2), 6.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(3), 9.0); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(3), 8.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(0), 5.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(0), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(1), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(1), 0.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(2), 7.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(2), 6.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepMaxWork(3), 9.0); + BOOST_CHECK_EQUAL(klActiveSchedule.workDatastructures_.StepSecondMaxWork(3), 8.0); - BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 4); - BOOST_CHECK_EQUAL(kl_active_schedule.is_feasible(), true); + BOOST_CHECK_EQUAL(klActiveSchedule.NumSteps(), 4); + BOOST_CHECK_EQUAL(klActiveSchedule.IsFeasible(), true); - kl.insert_gain_heap_test_penalty({0, 1, 2, 3, 4, 5, 6, 7}); + kl.InsertGainHeapTestPenalty({0, 1, 2, 3, 4, 5, 6, 7}); - auto &affinity = kl.get_affinity_table(); + auto &affinity = kl.GetAffinityTable(); BOOST_CHECK_CLOSE(affinity[v1][0][1], 1.0, 0.00001); BOOST_CHECK_CLOSE(affinity[v1][0][2], 3.0, 0.00001); @@ -810,31 +810,31 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { BOOST_CHECK_CLOSE(affinity[v8][1][1], 1.0, 0.00001); } -// BOOST_AUTO_TEST_CASE(kl_improver_incremental_update_test) { +// BOOST_AUTO_TEST_CASE(KlImprover_incremental_update_test) { -// using graph = computational_dag_edge_idx_vector_impl_def_int_t; -// using VertexType = graph::vertex_idx; -// using kl_move = kl_move_struct; +// using graph = ComputationalDagEdgeIdxVectorImplDefIntT; +// using VertexType = graph::VertexIdx; +// using KlMove = KlMoveStruct; // graph dag; -// const VertexType v1 = dag.add_vertex(2, 9, 2); -// const VertexType v2 = dag.add_vertex(3, 8, 4); -// const VertexType v3 = dag.add_vertex(4, 7, 3); -// const VertexType v4 = dag.add_vertex(5, 6, 2); -// const VertexType v5 = dag.add_vertex(6, 5, 6); -// const VertexType v6 = dag.add_vertex(7, 4, 2); -// const VertexType v7 = dag.add_vertex(8, 3, 4); -// const VertexType v8 = dag.add_vertex(9, 2, 1); - -// dag.add_edge(v1, v2, 2); -// dag.add_edge(v1, v3, 2); -// dag.add_edge(v1, v4, 2); -// dag.add_edge(v2, v5, 12); -// dag.add_edge(v3, v5, 6); -// dag.add_edge(v3, v6, 7); -// dag.add_edge(v5, v8, 9); -// dag.add_edge(v4, v8, 9); +// const VertexType v1 = dag.AddVertex(2, 9, 2); +// const VertexType v2 = dag.AddVertex(3, 8, 4); +// const VertexType v3 = dag.AddVertex(4, 7, 3); +// const VertexType v4 = dag.AddVertex(5, 6, 2); +// const VertexType v5 = dag.AddVertex(6, 5, 6); +// const VertexType v6 = dag.AddVertex(7, 4, 2); +// const VertexType v7 = dag.AddVertex(8, 3, 4); +// const VertexType v8 = dag.AddVertex(9, 2, 1); + +// dag.AddEdge(v1, v2, 2); +// dag.AddEdge(v1, v3, 2); +// dag.AddEdge(v1, v4, 2); +// dag.AddEdge(v2, v5, 12); +// dag.AddEdge(v3, v5, 6); +// dag.AddEdge(v3, v6, 7); +// dag.AddEdge(v5, v8, 9); +// dag.AddEdge(v4, v8, 9); // BspArchitecture arch; @@ -842,105 +842,105 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // BspSchedule schedule(instance); -// schedule.setAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); -// schedule.setAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); +// schedule.SetAssignedProcessors({1, 1, 0, 0, 1, 0, 0, 1}); +// schedule.SetAssignedSupersteps({0, 0, 1, 1, 2, 2, 3, 3}); -// schedule.updateNumberOfSupersteps(); +// schedule.UpdateNumberOfSupersteps(); -// using cost_f = kl_total_comm_cost_function; -// using kl_improver_test = kl_improver_test; -// kl_improver_test kl; +// using cost_f = KlTotalCommCostFunction; +// using KlImproverTest = KlImproverTest; +// KlImproverTest kl; -// kl.setup_schedule(schedule); +// kl.SetupSchedule(schedule); -// auto node_selection = kl.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// auto node_selection = kl.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // std::set nodes_to_check = {0, 1, 2, 3, 4, 5, 6, 7}; -// auto& affinity = kl.get_affinity_table(); +// auto& affinity = kl.GetAffinityTable(); -// kl_move move_1(v7, 0.0, 0, 3, 0, 2); -// kl.update_affinity_table_test(move_1, node_selection); +// KlMove move_1(v7, 0.0, 0, 3, 0, 2); +// kl.UpdateAffinityTableTest(move_1, node_selection); // BspSchedule test_sched_1(instance); -// kl.get_active_schedule_test(test_sched_1); -// kl_improver_test kl_1; -// kl_1.setup_schedule(test_sched_1); -// kl_1.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_1); +// KlImproverTest kl_1; +// kl_1.SetupSchedule(test_sched_1); +// kl_1.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v7); -// check_equal_affinity_table(affinity, kl_1.get_affinity_table(), nodes_to_check); +// check_equal_affinity_table(affinity, kl_1.GetAffinityTable(), nodes_to_check); -// kl_move move_2(v4, 0.0, 0, 1 , 0, 2); -// kl.update_affinity_table_test(move_2, node_selection); +// KlMove move_2(v4, 0.0, 0, 1 , 0, 2); +// kl.UpdateAffinityTableTest(move_2, node_selection); // BspSchedule test_sched_2(instance); -// kl.get_active_schedule_test(test_sched_2); -// kl_improver_test kl_2; -// kl_2.setup_schedule(test_sched_2); -// kl_2.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_2); +// KlImproverTest kl_2; +// kl_2.SetupSchedule(test_sched_2); +// kl_2.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v4); -// check_equal_affinity_table(affinity, kl_2.get_affinity_table(), nodes_to_check); +// check_equal_affinity_table(affinity, kl_2.GetAffinityTable(), nodes_to_check); -// kl_move move_3(v2, 0.0, 1, 0 , 0, 0); -// kl.update_affinity_table_test(move_3, node_selection); +// KlMove move_3(v2, 0.0, 1, 0 , 0, 0); +// kl.UpdateAffinityTableTest(move_3, node_selection); // BspSchedule test_sched_3(instance); -// kl.get_active_schedule_test(test_sched_3); -// kl_improver_test kl_3; -// kl_3.setup_schedule(test_sched_3); -// kl_3.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_3); +// KlImproverTest kl_3; +// kl_3.SetupSchedule(test_sched_3); +// kl_3.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v2); -// check_equal_affinity_table(affinity, kl_3.get_affinity_table(), nodes_to_check); +// check_equal_affinity_table(affinity, kl_3.GetAffinityTable(), nodes_to_check); -// kl_move move_4(v6, 0.0, 0, 2 , 1, 3); -// kl.update_affinity_table_test(move_4, node_selection); +// KlMove move_4(v6, 0.0, 0, 2 , 1, 3); +// kl.UpdateAffinityTableTest(move_4, node_selection); // BspSchedule test_sched_4(instance); -// kl.get_active_schedule_test(test_sched_4); -// kl_improver_test kl_4; -// kl_4.setup_schedule(test_sched_4); -// kl_4.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_4); +// KlImproverTest kl_4; +// kl_4.SetupSchedule(test_sched_4); +// kl_4.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v6); -// check_equal_affinity_table(affinity, kl_4.get_affinity_table(), nodes_to_check); +// check_equal_affinity_table(affinity, kl_4.GetAffinityTable(), nodes_to_check); -// kl_move move_5(v8, 0.0, 1, 3 , 0, 2); -// kl.update_affinity_table_test(move_5, node_selection); +// KlMove move_5(v8, 0.0, 1, 3 , 0, 2); +// kl.UpdateAffinityTableTest(move_5, node_selection); // BspSchedule test_sched_5(instance); -// kl.get_active_schedule_test(test_sched_5); -// kl_improver_test kl_5; -// kl_5.setup_schedule(test_sched_5); -// kl_5.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_5); +// KlImproverTest kl_5; +// kl_5.SetupSchedule(test_sched_5); +// kl_5.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v8); -// check_equal_affinity_table(affinity, kl_5.get_affinity_table(), nodes_to_check); +// check_equal_affinity_table(affinity, kl_5.GetAffinityTable(), nodes_to_check); -// kl_move move_6(v3, 0.0, 0, 1 , 1, 1); -// kl.update_affinity_table_test(move_6, node_selection); +// KlMove move_6(v3, 0.0, 0, 1 , 1, 1); +// kl.UpdateAffinityTableTest(move_6, node_selection); // BspSchedule test_sched_6(instance); -// kl.get_active_schedule_test(test_sched_6); -// kl_improver_test kl_6; -// kl_6.setup_schedule(test_sched_6); -// kl_6.insert_gain_heap_test({0, 1, 2, 3, 4, 5, 6, 7}); +// kl.GetActiveSchedule_test(test_sched_6); +// KlImproverTest kl_6; +// kl_6.SetupSchedule(test_sched_6); +// kl_6.InsertGainHeapTest({0, 1, 2, 3, 4, 5, 6, 7}); // nodes_to_check.erase(v3); -// check_equal_affinity_table(affinity, kl_6.get_affinity_table(), nodes_to_check); +// check_equal_affinity_table(affinity, kl_6.GetAffinityTable(), nodes_to_check); // }; // BOOST_AUTO_TEST_CASE(kl_total_comm_large_test_graphs) { // std::vector filenames_graph = large_spaa_graphs(); -// using graph = computational_dag_edge_idx_vector_impl_def_int_t; +// using graph = ComputationalDagEdgeIdxVectorImplDefIntT; // // Getting root git directory // std::filesystem::path cwd = std::filesystem::current_path(); @@ -951,21 +951,21 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // } // for (auto &filename_graph : filenames_graph) { -// GreedyBspScheduler test_scheduler; +// GreedyBspScheduler test_scheduler; // BspInstance instance; -// bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), -// instance.getComputationalDag()); +// bool status_graph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), +// instance.GetComputationalDag()); -// instance.getArchitecture().setSynchronisationCosts(500); -// instance.getArchitecture().setCommunicationCosts(5); -// instance.getArchitecture().setNumberOfProcessors(4); +// instance.GetArchitecture().SetSynchronisationCosts(500); +// instance.GetArchitecture().SetCommunicationCosts(5); +// instance.GetArchitecture().SetNumberOfProcessors(4); // std::vector> send_cost = {{0,1,4,4}, // {1,0,4,4}, // {4,4,0,1}, // {4,4,1,0}}; -// instance.getArchitecture().SetSendCosts(send_cost); +// instance.GetArchitecture().SetSendCosts(send_cost); // if (!status_graph) { @@ -973,35 +973,35 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // BOOST_CHECK(false); // } -// add_mem_weights(instance.getComputationalDag()); +// add_mem_weights(instance.GetComputationalDag()); // BspSchedule schedule(instance); -// const auto result = test_scheduler.computeSchedule(schedule); +// const auto result = test_scheduler.ComputeSchedule(schedule); -// schedule.updateNumberOfSupersteps(); +// schedule.UpdateNumberOfSupersteps(); -// std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps() +// std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.NumberOfSupersteps() // << " number of supersteps"<< std::endl; // BspSchedule schedule_2(schedule); -// BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); -// BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); -// BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); +// BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); +// BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); +// BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); -// kl_total_comm_improver kl; +// KlTotalCommImprover kl; // auto start_time = std::chrono::high_resolution_clock::now(); -// auto status = kl.improveSchedule(schedule); +// auto status = kl.ImproveSchedule(schedule); // auto finish_time = std::chrono::high_resolution_clock::now(); // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); // std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << -// schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// schedule.NumberOfSupersteps() << " number of supersteps"<< std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); // // kl_total_comm_test kl_old; @@ -1012,17 +1012,17 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // // duration = std::chrono::duration_cast(finish_time - start_time).count(); // // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " -// << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; +// << schedule_2.NumberOfSupersteps() << " number of supersteps"<< std::endl; -// // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); +// // BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// // BOOST_CHECK_EQUAL(schedule_2.SatisfiesPrecedenceConstraints(), true); // } // } // BOOST_AUTO_TEST_CASE(kl_total_comm_large_test_graphs_mt) { // std::vector filenames_graph = large_spaa_graphs(); -// using graph = computational_dag_edge_idx_vector_impl_def_int_t; +// using graph = ComputationalDagEdgeIdxVectorImplDefIntT; // // Getting root git directory // std::filesystem::path cwd = std::filesystem::current_path(); @@ -1033,21 +1033,21 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // } // for (auto &filename_graph : filenames_graph) { -// GreedyBspScheduler test_scheduler; +// GreedyBspScheduler test_scheduler; // BspInstance instance; -// bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), -// instance.getComputationalDag()); +// bool status_graph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), +// instance.GetComputationalDag()); -// instance.getArchitecture().setSynchronisationCosts(500); -// instance.getArchitecture().setCommunicationCosts(5); -// instance.getArchitecture().setNumberOfProcessors(4); +// instance.GetArchitecture().SetSynchronisationCosts(500); +// instance.GetArchitecture().SetCommunicationCosts(5); +// instance.GetArchitecture().SetNumberOfProcessors(4); // std::vector> send_cost = {{0,1,4,4}, // {1,0,4,4}, // {4,4,0,1}, // {4,4,1,0}}; -// instance.getArchitecture().SetSendCosts(send_cost); +// instance.GetArchitecture().SetSendCosts(send_cost); // if (!status_graph) { @@ -1055,35 +1055,35 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // BOOST_CHECK(false); // } -// add_mem_weights(instance.getComputationalDag()); +// add_mem_weights(instance.GetComputationalDag()); // BspSchedule schedule(instance); -// const auto result = test_scheduler.computeSchedule(schedule); +// const auto result = test_scheduler.ComputeSchedule(schedule); -// schedule.updateNumberOfSupersteps(); +// schedule.UpdateNumberOfSupersteps(); -// std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps() +// std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.NumberOfSupersteps() // << " number of supersteps"<< std::endl; // BspSchedule schedule_2(schedule); -// BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); -// BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); -// BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); +// BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); +// BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); +// BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); -// kl_total_comm_improver_mt kl; +// KlTotalCommImprover_mt kl; // auto start_time = std::chrono::high_resolution_clock::now(); -// auto status = kl.improveSchedule(schedule); +// auto status = kl.ImproveSchedule(schedule); // auto finish_time = std::chrono::high_resolution_clock::now(); // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); // std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << -// schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// schedule.NumberOfSupersteps() << " number of supersteps"<< std::endl; -// BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); +// BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// BOOST_CHECK_EQUAL(schedule.SatisfiesPrecedenceConstraints(), true); // // kl_total_comm_test kl_old; @@ -1094,10 +1094,10 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // // duration = std::chrono::duration_cast(finish_time - start_time).count(); // // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " -// << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; +// << schedule_2.NumberOfSupersteps() << " number of supersteps"<< std::endl; -// // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); -// // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); +// // BOOST_CHECK(status == ReturnStatus::OSP_SUCCESS || status == ReturnStatus::BEST_FOUND); +// // BOOST_CHECK_EQUAL(schedule_2.SatisfiesPrecedenceConstraints(), true); // } // } diff --git a/tests/kl_util.cpp b/tests/kl_util.cpp index 0057779f..61919f9b 100644 --- a/tests/kl_util.cpp +++ b/tests/kl_util.cpp @@ -17,39 +17,39 @@ limitations under the License. */ #define BOOST_TEST_MODULE kl_util -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_util.hpp" #include #include #include #include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_active_schedule.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" using namespace osp; -using graph = computational_dag_edge_idx_vector_impl_def_int_t; -using kl_active_schedule_t = kl_active_schedule; +using Graph = ComputationalDagEdgeIdxVectorImplDefIntT; +using KlActiveScheduleT = KlActiveSchedule; // Test fixture for setting up a schedule struct ScheduleFixture { - BspInstance instance; - BspSchedule schedule; - kl_active_schedule_t active_schedule; + BspInstance instance_; + BspSchedule schedule_; + KlActiveScheduleT activeSchedule_; - ScheduleFixture() : schedule(instance) { + ScheduleFixture() : schedule_(instance_) { // Setup a simple graph and schedule - auto &dag = instance.getComputationalDag(); + auto &dag = instance_.GetComputationalDag(); for (int i = 0; i < 20; ++i) { - dag.add_vertex(i + 1, i + 1, i + 1); + dag.AddVertex(i + 1, i + 1, i + 1); } for (unsigned i = 0; i < 19; ++i) { - dag.add_edge(i, i + 1, 1); + dag.AddEdge(i, i + 1, 1); } - instance.getArchitecture().setNumberOfProcessors(4); - instance.getArchitecture().setCommunicationCosts(1); - instance.getArchitecture().setSynchronisationCosts(10); + instance_.GetArchitecture().SetNumberOfProcessors(4); + instance_.GetArchitecture().SetCommunicationCosts(1); + instance_.GetArchitecture().SetSynchronisationCosts(10); std::vector procs(20); std::vector steps(20); @@ -59,441 +59,441 @@ struct ScheduleFixture { steps[i] = i; } - schedule.setAssignedProcessors(std::move(procs)); - schedule.setAssignedSupersteps(std::move(steps)); - schedule.updateNumberOfSupersteps(); + schedule_.SetAssignedProcessors(std::move(procs)); + schedule_.SetAssignedSupersteps(std::move(steps)); + schedule_.UpdateNumberOfSupersteps(); - active_schedule.initialize(schedule); + activeSchedule_.Initialize(schedule_); } }; BOOST_FIXTURE_TEST_SUITE(kl_util_tests, ScheduleFixture) -// Tests for reward_penalty_strategy -BOOST_AUTO_TEST_CASE(reward_penalty_strategy_test) { - reward_penalty_strategy rps; - rps.initialize(active_schedule, 10.0, 20.0); +// Tests for RewardPenaltyStrategy +BOOST_AUTO_TEST_CASE(RewardPenaltyStrategyTest) { + RewardPenaltyStrategy rps; + rps.Initialize(activeSchedule_, 10.0, 20.0); - BOOST_CHECK_EQUAL(rps.max_weight, 20.0); - BOOST_CHECK_CLOSE(rps.initial_penalty, std::sqrt(20.0), 1e-9); + BOOST_CHECK_EQUAL(rps.maxWeight_, 20.0); + BOOST_CHECK_CLOSE(rps.initialPenalty_, std::sqrt(20.0), 1e-9); - rps.init_reward_penalty(2.0); - BOOST_CHECK_CLOSE(rps.penalty, std::sqrt(20.0) * 2.0, 1e-9); - BOOST_CHECK_CLOSE(rps.reward, 20.0 * 2.0, 1e-9); + rps.InitRewardPenalty(2.0); + BOOST_CHECK_CLOSE(rps.penalty_, std::sqrt(20.0) * 2.0, 1e-9); + BOOST_CHECK_CLOSE(rps.reward_, 20.0 * 2.0, 1e-9); } // Tests for lock managers template -void test_lock_manager() { +void TestLockManager() { LockManager lm; - lm.initialize(10); - - BOOST_CHECK(!lm.is_locked(5)); - lm.lock(5); - BOOST_CHECK(lm.is_locked(5)); - BOOST_CHECK(!lm.is_locked(6)); - lm.unlock(5); - BOOST_CHECK(!lm.is_locked(5)); - - lm.lock(1); - lm.lock(3); - lm.lock(5); - BOOST_CHECK(lm.is_locked(3)); - lm.clear(); - BOOST_CHECK(!lm.is_locked(1)); - BOOST_CHECK(!lm.is_locked(3)); - BOOST_CHECK(!lm.is_locked(5)); + lm.Initialize(10); + + BOOST_CHECK(!lm.IsLocked(5)); + lm.Lock(5); + BOOST_CHECK(lm.IsLocked(5)); + BOOST_CHECK(!lm.IsLocked(6)); + lm.Unlock(5); + BOOST_CHECK(!lm.IsLocked(5)); + + lm.Lock(1); + lm.Lock(3); + lm.Lock(5); + BOOST_CHECK(lm.IsLocked(3)); + lm.Clear(); + BOOST_CHECK(!lm.IsLocked(1)); + BOOST_CHECK(!lm.IsLocked(3)); + BOOST_CHECK(!lm.IsLocked(5)); } -BOOST_AUTO_TEST_CASE(lock_managers_test) { - test_lock_manager>(); - test_lock_manager>(); +BOOST_AUTO_TEST_CASE(LockManagersTest) { + TestLockManager>(); + TestLockManager>(); } -// Tests for adaptive_affinity_table -BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) { - using affinity_table_t = adaptive_affinity_table; - affinity_table_t table; - table.initialize(active_schedule, 5); +// Tests for AdaptiveAffinityTable +BOOST_AUTO_TEST_CASE(AdaptiveAffinityTableTest) { + using AffinityTableT = AdaptiveAffinityTable; + AffinityTableT table; + table.Initialize(activeSchedule_, 5); BOOST_CHECK_EQUAL(table.size(), 0); // Insert - BOOST_CHECK(table.insert(0)); + BOOST_CHECK(table.Insert(0)); BOOST_CHECK_EQUAL(table.size(), 1); - BOOST_CHECK(table.is_selected(0)); - BOOST_CHECK(!table.is_selected(1)); - BOOST_CHECK(!table.insert(0)); // already present + BOOST_CHECK(table.IsSelected(0)); + BOOST_CHECK(!table.IsSelected(1)); + BOOST_CHECK(!table.Insert(0)); // already present // Remove - table.remove(0); + table.Remove(0); BOOST_CHECK_EQUAL(table.size(), 0); - BOOST_CHECK(!table.is_selected(0)); + BOOST_CHECK(!table.IsSelected(0)); // Insert more to test resizing for (unsigned i = 0; i < 10; ++i) { - BOOST_CHECK(table.insert(i)); + BOOST_CHECK(table.Insert(i)); } BOOST_CHECK_EQUAL(table.size(), 10); for (unsigned i = 0; i < 10; ++i) { - BOOST_CHECK(table.is_selected(i)); + BOOST_CHECK(table.IsSelected(i)); } // Test trim - table.remove(3); - table.remove(5); - table.remove(7); + table.Remove(3); + table.Remove(5); + table.Remove(7); BOOST_CHECK_EQUAL(table.size(), 7); - table.trim(); + table.Trim(); BOOST_CHECK_EQUAL(table.size(), 7); // After trim, the gaps should be filled. - std::set expected_selected = {0, 1, 2, 4, 6, 8, 9}; - std::set actual_selected; - const auto &selected_nodes_vec = table.get_selected_nodes(); + std::set expectedSelected = {0, 1, 2, 4, 6, 8, 9}; + std::set actualSelected; + const auto &selectedNodesVec = table.GetSelectedNodes(); for (size_t i = 0; i < table.size(); ++i) { - actual_selected.insert(static_cast(selected_nodes_vec[i])); + actualSelected.insert(static_cast(selectedNodesVec[i])); } - BOOST_CHECK(expected_selected == actual_selected); + BOOST_CHECK(expectedSelected == actualSelected); for (unsigned i = 0; i < 20; ++i) { - if (expected_selected.count(i)) { - BOOST_CHECK(table.is_selected(i)); + if (expectedSelected.count(i)) { + BOOST_CHECK(table.IsSelected(i)); } else { - BOOST_CHECK(!table.is_selected(i)); + BOOST_CHECK(!table.IsSelected(i)); } } // Check that indices are correct for (size_t i = 0; i < table.size(); ++i) { - BOOST_CHECK_EQUAL(table.get_selected_nodes_idx(selected_nodes_vec[i]), i); + BOOST_CHECK_EQUAL(table.GetSelectedNodesIdx(selectedNodesVec[i]), i); } // Test reset - table.reset_node_selection(); + table.ResetNodeSelection(); BOOST_CHECK_EQUAL(table.size(), 0); - BOOST_CHECK(!table.is_selected(0)); - BOOST_CHECK(!table.is_selected(1)); + BOOST_CHECK(!table.IsSelected(0)); + BOOST_CHECK(!table.IsSelected(1)); } -// Tests for static_affinity_table -BOOST_AUTO_TEST_CASE(static_affinity_table_test) { - using affinity_table_t = static_affinity_table; - affinity_table_t table; - table.initialize(active_schedule, 0); // size is ignored +// Tests for StaticAffinityTable +BOOST_AUTO_TEST_CASE(StaticAffinityTableTest) { + using AffinityTableT = StaticAffinityTable; + AffinityTableT table; + table.Initialize(activeSchedule_, 0); // size is ignored BOOST_CHECK_EQUAL(table.size(), 0); // Insert - BOOST_CHECK(table.insert(0)); + BOOST_CHECK(table.Insert(0)); BOOST_CHECK_EQUAL(table.size(), 1); - BOOST_CHECK(table.is_selected(0)); - BOOST_CHECK(!table.is_selected(1)); - table.insert(0); // should be a no-op on size + BOOST_CHECK(table.IsSelected(0)); + BOOST_CHECK(!table.IsSelected(1)); + table.Insert(0); // should be a no-op on size BOOST_CHECK_EQUAL(table.size(), 1); // Remove - table.remove(0); + table.Remove(0); BOOST_CHECK_EQUAL(table.size(), 0); - BOOST_CHECK(!table.is_selected(0)); + BOOST_CHECK(!table.IsSelected(0)); // Insert multiple for (unsigned i = 0; i < 10; ++i) { - table.insert(i); + table.Insert(i); } BOOST_CHECK_EQUAL(table.size(), 10); // Test reset - table.reset_node_selection(); + table.ResetNodeSelection(); BOOST_CHECK_EQUAL(table.size(), 0); - BOOST_CHECK(!table.is_selected(0)); + BOOST_CHECK(!table.IsSelected(0)); } -// Tests for vertex_selection_strategy -BOOST_AUTO_TEST_CASE(vertex_selection_strategy_test) { - using affinity_table_t = adaptive_affinity_table; - using selection_strategy_t = vertex_selection_strategy; +// Tests for VertexSelectionStrategy +BOOST_AUTO_TEST_CASE(VertexSelectionStrategyTest) { + using AffinityTableT = AdaptiveAffinityTable; + using SelectionStrategyT = VertexSelectionStrategy; - selection_strategy_t strategy; + SelectionStrategyT strategy; std::mt19937 gen(0); - const unsigned end_step = active_schedule.num_steps() - 1; - strategy.initialize(active_schedule, gen, 0, end_step); - strategy.selection_threshold = 5; + const unsigned endStep = activeSchedule_.NumSteps() - 1; + strategy.Initialize(activeSchedule_, gen, 0, endStep); + strategy.selectionThreshold_ = 5; // Test permutation selection - strategy.setup(0, end_step); - BOOST_CHECK_EQUAL(strategy.permutation.size(), 20); + strategy.Setup(0, endStep); + BOOST_CHECK_EQUAL(strategy.permutation_.size(), 20); - affinity_table_t table; - table.initialize(active_schedule, 20); + AffinityTableT table; + table.Initialize(activeSchedule_, 20); - strategy.select_nodes_permutation_threshold(5, table); + strategy.SelectNodesPermutationThreshold(5, table); BOOST_CHECK_EQUAL(table.size(), 5); - BOOST_CHECK_EQUAL(strategy.permutation_idx, 5); + BOOST_CHECK_EQUAL(strategy.permutationIdx_, 5); - strategy.select_nodes_permutation_threshold(5, table); + strategy.SelectNodesPermutationThreshold(5, table); BOOST_CHECK_EQUAL(table.size(), 10); - BOOST_CHECK_EQUAL(strategy.permutation_idx, 10); + BOOST_CHECK_EQUAL(strategy.permutationIdx_, 10); - strategy.select_nodes_permutation_threshold(15, table); + strategy.SelectNodesPermutationThreshold(15, table); BOOST_CHECK_EQUAL(table.size(), 20); - BOOST_CHECK_EQUAL(strategy.permutation_idx, 0); // should wrap around and reshuffle + BOOST_CHECK_EQUAL(strategy.permutationIdx_, 0); // should wrap around and reshuffle - table.reset_node_selection(); - strategy.max_work_counter = 0; - strategy.select_nodes_max_work_proc(5, table, 0, 4); + table.ResetNodeSelection(); + strategy.maxWorkCounter_ = 0; + strategy.SelectNodesMaxWorkProc(5, table, 0, 4); // In the new fixture, steps 0-4 contain nodes 0-4 respectively. // select_nodes_max_work_proc will select one node from each step. BOOST_CHECK_EQUAL(table.size(), 5); - BOOST_CHECK(table.is_selected(0)); - BOOST_CHECK(table.is_selected(1)); - BOOST_CHECK(table.is_selected(2)); - BOOST_CHECK(table.is_selected(3)); - BOOST_CHECK(table.is_selected(4)); - BOOST_CHECK_EQUAL(strategy.max_work_counter, 5); + BOOST_CHECK(table.IsSelected(0)); + BOOST_CHECK(table.IsSelected(1)); + BOOST_CHECK(table.IsSelected(2)); + BOOST_CHECK(table.IsSelected(3)); + BOOST_CHECK(table.IsSelected(4)); + BOOST_CHECK_EQUAL(strategy.maxWorkCounter_, 5); } BOOST_AUTO_TEST_SUITE_END() -BOOST_FIXTURE_TEST_SUITE(kl_active_schedule_tests, ScheduleFixture) +BOOST_FIXTURE_TEST_SUITE(KlActiveSchedule_tests, ScheduleFixture) -using VertexType = graph::vertex_idx; +using VertexType = Graph::VertexIdx; -BOOST_AUTO_TEST_CASE(kl_move_struct_test) { - using kl_move = kl_move_struct; - kl_move move(5, 10.0, 1, 2, 3, 4); +BOOST_AUTO_TEST_CASE(KlMoveStructTest) { + using KlMove = KlMoveStruct; + KlMove move(5, 10.0, 1, 2, 3, 4); - kl_move reversed = move.reverse_move(); + KlMove reversed = move.ReverseMove(); - BOOST_CHECK_EQUAL(reversed.node, 5); - BOOST_CHECK_EQUAL(reversed.gain, -10.0); - BOOST_CHECK_EQUAL(reversed.from_proc, 3); - BOOST_CHECK_EQUAL(reversed.from_step, 4); - BOOST_CHECK_EQUAL(reversed.to_proc, 1); - BOOST_CHECK_EQUAL(reversed.to_step, 2); + BOOST_CHECK_EQUAL(reversed.node_, 5); + BOOST_CHECK_EQUAL(reversed.gain_, -10.0); + BOOST_CHECK_EQUAL(reversed.fromProc_, 3); + BOOST_CHECK_EQUAL(reversed.fromStep_, 4); + BOOST_CHECK_EQUAL(reversed.toProc_, 1); + BOOST_CHECK_EQUAL(reversed.toStep_, 2); } -BOOST_AUTO_TEST_CASE(work_datastructures_initialization_test) { - auto &wd = active_schedule.work_datastructures; +BOOST_AUTO_TEST_CASE(WorkDatastructuresInitializationTest) { + auto &wd = activeSchedule_.workDatastructures_; // Step 0: node 0 on proc 0, work 1. Other procs have 0 work. - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 0), 1); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 1), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 2), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 3), 0); - BOOST_CHECK_EQUAL(wd.step_max_work(0), 1); - BOOST_CHECK_EQUAL(wd.step_second_max_work(0), 0); - BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[0], 1); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 0), 1); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 1), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 2), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 3), 0); + BOOST_CHECK_EQUAL(wd.StepMaxWork(0), 1); + BOOST_CHECK_EQUAL(wd.StepSecondMaxWork(0), 0); + BOOST_CHECK_EQUAL(wd.stepMaxWorkProcessorCount_[0], 1); // Step 4: node 4 on proc 0, work 5. - BOOST_CHECK_EQUAL(wd.step_proc_work(4, 0), 5); - BOOST_CHECK_EQUAL(wd.step_proc_work(4, 1), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(4, 2), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(4, 3), 0); - BOOST_CHECK_EQUAL(wd.step_max_work(4), 5); - BOOST_CHECK_EQUAL(wd.step_second_max_work(4), 0); - BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[4], 1); + BOOST_CHECK_EQUAL(wd.StepProcWork(4, 0), 5); + BOOST_CHECK_EQUAL(wd.StepProcWork(4, 1), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(4, 2), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(4, 3), 0); + BOOST_CHECK_EQUAL(wd.StepMaxWork(4), 5); + BOOST_CHECK_EQUAL(wd.StepSecondMaxWork(4), 0); + BOOST_CHECK_EQUAL(wd.stepMaxWorkProcessorCount_[4], 1); } -BOOST_AUTO_TEST_CASE(work_datastructures_apply_move_test) { - auto &wd = active_schedule.work_datastructures; - using kl_move = kl_move_struct; +BOOST_AUTO_TEST_CASE(WorkDatastructuresApplyMoveTest) { + auto &wd = activeSchedule_.workDatastructures_; + using KlMove = KlMoveStruct; // Move within same superstep // Move node 0 (work 1) from proc 0 to proc 3 in step 0 - kl_move move1(0, 0.0, 0, 0, 3, 0); - wd.apply_move(move1, 1); // work_weight of node 0 is 1 + KlMove move1(0, 0.0, 0, 0, 3, 0); + wd.ApplyMove(move1, 1); // work_weight of node 0 is 1 // Before: {1,0,0,0}, After: {0,0,0,1} - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 0), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 1), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 2), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 3), 1); - BOOST_CHECK_EQUAL(wd.step_max_work(0), 1); - BOOST_CHECK_EQUAL(wd.step_second_max_work(0), 0); - BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[0], 1); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 0), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 1), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 2), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 3), 1); + BOOST_CHECK_EQUAL(wd.StepMaxWork(0), 1); + BOOST_CHECK_EQUAL(wd.StepSecondMaxWork(0), 0); + BOOST_CHECK_EQUAL(wd.stepMaxWorkProcessorCount_[0], 1); // Move to different superstep // Move node 4 (work 5) from proc 0, step 4 to proc 1, step 0 - kl_move move2(4, 0.0, 0, 4, 1, 0); - wd.apply_move(move2, 5); // work_weight of node 4 is 5 + KlMove move2(4, 0.0, 0, 4, 1, 0); + wd.ApplyMove(move2, 5); // work_weight of node 4 is 5 // Step 0 state after move1: {0,0,0,1}. max=1 // After move2: {0,5,0,1}. max=5 - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 0), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 1), 5); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 2), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(0, 3), 1); - BOOST_CHECK_EQUAL(wd.step_max_work(0), 5); - BOOST_CHECK_EQUAL(wd.step_second_max_work(0), 1); - BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[0], 1); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 0), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 1), 5); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 2), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(0, 3), 1); + BOOST_CHECK_EQUAL(wd.StepMaxWork(0), 5); + BOOST_CHECK_EQUAL(wd.StepSecondMaxWork(0), 1); + BOOST_CHECK_EQUAL(wd.stepMaxWorkProcessorCount_[0], 1); // Step 4 state before move2: {5,0,0,0}. max=5 // After move2: {0,0,0,0}. max=0 - BOOST_CHECK_EQUAL(wd.step_proc_work(4, 0), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(4, 1), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(4, 2), 0); - BOOST_CHECK_EQUAL(wd.step_proc_work(4, 3), 0); - BOOST_CHECK_EQUAL(wd.step_max_work(4), 0); - BOOST_CHECK_EQUAL(wd.step_second_max_work(4), 0); - BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[4], 3); // All 4 procs have work 0, so count is 3. + BOOST_CHECK_EQUAL(wd.StepProcWork(4, 0), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(4, 1), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(4, 2), 0); + BOOST_CHECK_EQUAL(wd.StepProcWork(4, 3), 0); + BOOST_CHECK_EQUAL(wd.StepMaxWork(4), 0); + BOOST_CHECK_EQUAL(wd.StepSecondMaxWork(4), 0); + BOOST_CHECK_EQUAL(wd.stepMaxWorkProcessorCount_[4], 3); // All 4 procs have work 0, so count is 3. } -BOOST_AUTO_TEST_CASE(active_schedule_initialization_test) { - BOOST_CHECK_EQUAL(active_schedule.num_steps(), 20); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(0), 0); - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(0), 0); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(19), 3); - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(19), 19); - BOOST_CHECK(active_schedule.is_feasible()); +BOOST_AUTO_TEST_CASE(ActiveScheduleInitializationTest) { + BOOST_CHECK_EQUAL(activeSchedule_.NumSteps(), 20); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedProcessor(0), 0); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(0), 0); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedProcessor(19), 3); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(19), 19); + BOOST_CHECK(activeSchedule_.IsFeasible()); } -BOOST_AUTO_TEST_CASE(active_schedule_apply_move_test) { - using kl_move = kl_move_struct; - using thread_data_t = thread_local_active_schedule_data; - thread_data_t thread_data; - thread_data.initialize_cost(0); +BOOST_AUTO_TEST_CASE(ActiveScheduleApplyMoveTest) { + using KlMove = KlMoveStruct; + using ThreadDataT = ThreadLocalActiveScheduleData; + ThreadDataT threadData; + threadData.InitializeCost(0); // Move node 1 (step 1) to step 0. This should create a violation with node 0 (step 0). // Edge 0 -> 1. - kl_move move(1, 0.0, 1, 1, 1, 0); - active_schedule.apply_move(move, thread_data); + KlMove move(1, 0.0, 1, 1, 1, 0); + activeSchedule_.ApplyMove(move, threadData); - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(1), 0); - BOOST_CHECK_EQUAL(active_schedule.getSetSchedule().step_processor_vertices[1][1].count(1), 0); - BOOST_CHECK_EQUAL(active_schedule.getSetSchedule().step_processor_vertices[0][1].count(1), 1); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(1), 0); + BOOST_CHECK_EQUAL(activeSchedule_.GetSetSchedule().stepProcessorVertices_[1][1].count(1), 0); + BOOST_CHECK_EQUAL(activeSchedule_.GetSetSchedule().stepProcessorVertices_[0][1].count(1), 1); - BOOST_CHECK(!thread_data.feasible); - BOOST_CHECK_EQUAL(thread_data.current_violations.size(), 1); - BOOST_CHECK_EQUAL(thread_data.new_violations.size(), 1); - BOOST_CHECK(thread_data.new_violations.count(0)); + BOOST_CHECK(!threadData.feasible_); + BOOST_CHECK_EQUAL(threadData.currentViolations_.size(), 1); + BOOST_CHECK_EQUAL(threadData.newViolations_.size(), 1); + BOOST_CHECK(threadData.newViolations_.count(0)); } -BOOST_AUTO_TEST_CASE(active_schedule_compute_violations_test) { - using thread_data_t = thread_local_active_schedule_data; - thread_data_t thread_data; +BOOST_AUTO_TEST_CASE(ActiveScheduleComputeViolationsTest) { + using ThreadDataT = ThreadLocalActiveScheduleData; + ThreadDataT threadData; // Manually create a violation - schedule.setAssignedSuperstep(1, 0); // node 1 is now in step 0 (was 1) - schedule.setAssignedSuperstep(0, 1); // node 0 is now in step 1 (was 0) + schedule_.SetAssignedSuperstep(1, 0); // node 1 is now in step 0 (was 1) + schedule_.SetAssignedSuperstep(0, 1); // node 0 is now in step 1 (was 0) // Now we have a violation for edge 0 -> 1, since step(0) > step(1) - active_schedule.initialize(schedule); + activeSchedule_.Initialize(schedule_); - active_schedule.compute_violations(thread_data); + activeSchedule_.ComputeViolations(threadData); - BOOST_CHECK(!thread_data.feasible); - BOOST_CHECK_EQUAL(thread_data.current_violations.size(), 1); + BOOST_CHECK(!threadData.feasible_); + BOOST_CHECK_EQUAL(threadData.currentViolations_.size(), 1); } -BOOST_AUTO_TEST_CASE(active_schedule_revert_moves_test) { - using kl_move = kl_move_struct; - using thread_data_t = thread_local_active_schedule_data; +BOOST_AUTO_TEST_CASE(ActiveScheduleRevertMovesTest) { + using KlMove = KlMoveStruct; + using ThreadDataT = ThreadLocalActiveScheduleData; - kl_active_schedule_t original_schedule; - original_schedule.initialize(schedule); + KlActiveScheduleT originalSchedule; + originalSchedule.Initialize(schedule_); - thread_data_t thread_data; - thread_data.initialize_cost(0); + ThreadDataT threadData; + threadData.InitializeCost(0); - kl_move move1(0, 0.0, 0, 0, 1, 0); - kl_move move2(1, 0.0, 1, 1, 2, 1); - active_schedule.apply_move(move1, thread_data); - active_schedule.apply_move(move2, thread_data); + KlMove move1(0, 0.0, 0, 0, 1, 0); + KlMove move2(1, 0.0, 1, 1, 2, 1); + activeSchedule_.ApplyMove(move1, threadData); + activeSchedule_.ApplyMove(move2, threadData); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(0), 1); - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(1), 1); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedProcessor(0), 1); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(1), 1); - struct dummy_comm_ds { - void update_datastructure_after_move(const kl_move &, unsigned, unsigned) {} - } comm_ds; + struct DummyCommDs { + void UpdateDatastructureAfterMove(const KlMove &, unsigned, unsigned) {} + } commDs; // Revert both moves - active_schedule.revert_schedule_to_bound(0, 0.0, true, comm_ds, thread_data, 0, 4); + activeSchedule_.RevertScheduleToBound(0, 0.0, true, commDs, threadData, 0, 4); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(0), original_schedule.assigned_processor(0)); - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(0), original_schedule.assigned_superstep(0)); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(1), original_schedule.assigned_processor(1)); - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(1), original_schedule.assigned_superstep(1)); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedProcessor(0), originalSchedule.AssignedProcessor(0)); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(0), originalSchedule.AssignedSuperstep(0)); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedProcessor(1), originalSchedule.AssignedProcessor(1)); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(1), originalSchedule.AssignedSuperstep(1)); } -BOOST_AUTO_TEST_CASE(active_schedule_revert_to_best_schedule_test) { - using kl_move = kl_move_struct; - using thread_data_t = thread_local_active_schedule_data; +BOOST_AUTO_TEST_CASE(ActiveScheduleRevertToBestScheduleTest) { + using KlMove = KlMoveStruct; + using ThreadDataT = ThreadLocalActiveScheduleData; - thread_data_t thread_data; - thread_data.initialize_cost(100); + ThreadDataT threadData; + threadData.InitializeCost(100); // Apply 3 moves - kl_move move1(0, 0.0, 0, 0, 1, 0); // node 0 from (p0,s0) to (p1,s0) - active_schedule.apply_move(move1, thread_data); - thread_data.update_cost(-10); // cost 90 + KlMove move1(0, 0.0, 0, 0, 1, 0); // node 0 from (p0,s0) to (p1,s0) + activeSchedule_.ApplyMove(move1, threadData); + threadData.UpdateCost(-10); // cost 90 - kl_move move2(1, 0.0, 1, 1, 2, 1); // node 1 from (p1,s1) to (p2,s1) - active_schedule.apply_move(move2, thread_data); - thread_data.update_cost(-10); // cost 80, best is here + KlMove move2(1, 0.0, 1, 1, 2, 1); // node 1 from (p1,s1) to (p2,s1) + activeSchedule_.ApplyMove(move2, threadData); + threadData.UpdateCost(-10); // cost 80, best is here - kl_move move3(2, 0.0, 2, 2, 3, 2); // node 2 from (p2,s2) to (p3,s2) - active_schedule.apply_move(move3, thread_data); - thread_data.update_cost(+5); // cost 85 + KlMove move3(2, 0.0, 2, 2, 3, 2); // node 2 from (p2,s2) to (p3,s2) + activeSchedule_.ApplyMove(move3, threadData); + threadData.UpdateCost(+5); // cost 85 - BOOST_CHECK_EQUAL(thread_data.best_schedule_idx, 2); - BOOST_CHECK_EQUAL(thread_data.applied_moves.size(), 3); + BOOST_CHECK_EQUAL(threadData.bestScheduleIdx_, 2); + BOOST_CHECK_EQUAL(threadData.appliedMoves_.size(), 3); - struct dummy_comm_ds { - void update_datastructure_after_move(const kl_move &, unsigned, unsigned) {} - } comm_ds; + struct DummyCommDs { + void UpdateDatastructureAfterMove(const KlMove &, unsigned, unsigned) {} + } commDs; - unsigned end_step = active_schedule.num_steps() - 1; + unsigned endStep = activeSchedule_.NumSteps() - 1; // Revert to best. start_move=0 means no step removal logic is triggered. - active_schedule.revert_to_best_schedule(0, 0, comm_ds, thread_data, 0, end_step); + activeSchedule_.RevertToBestSchedule(0, 0, commDs, threadData, 0, endStep); - BOOST_CHECK_EQUAL(thread_data.cost, 80.0); // Check cost is reverted to best - BOOST_CHECK_EQUAL(thread_data.applied_moves.size(), 0); - BOOST_CHECK_EQUAL(thread_data.best_schedule_idx, 0); // Reset for next iteration + BOOST_CHECK_EQUAL(threadData.cost_, 80.0); // Check cost is reverted to best + BOOST_CHECK_EQUAL(threadData.appliedMoves_.size(), 0); + BOOST_CHECK_EQUAL(threadData.bestScheduleIdx_, 0); // Reset for next iteration // Check schedule state is after move2 - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(0), 1); // from move1 - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(0), 0); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(1), 2); // from move2 - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(1), 1); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(2), 2); // Reverted, so original - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(2), 2); // Reverted, so original + BOOST_CHECK_EQUAL(activeSchedule_.AssignedProcessor(0), 1); // from move1 + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(0), 0); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedProcessor(1), 2); // from move2 + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(1), 1); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedProcessor(2), 2); // Reverted, so original + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(2), 2); // Reverted, so original } -BOOST_AUTO_TEST_CASE(active_schedule_swap_empty_step_fwd_test) { +BOOST_AUTO_TEST_CASE(ActiveScheduleSwapEmptyStepFwdTest) { // Make step 1 empty by moving node 1 to step 0 - active_schedule.getVectorSchedule().setAssignedSuperstep(1, 0); - active_schedule.initialize(active_schedule.getVectorSchedule()); // re-init to update set_schedule and work_ds + activeSchedule_.GetVectorSchedule().SetAssignedSuperstep(1, 0); + activeSchedule_.Initialize(activeSchedule_.GetVectorSchedule()); // re-init to update set_schedule and work_ds - BOOST_CHECK_EQUAL(active_schedule.get_step_total_work(1), 0); + BOOST_CHECK_EQUAL(activeSchedule_.GetStepTotalWork(1), 0); // Swap empty step 1 forward to position 3 - active_schedule.swap_empty_step_fwd(1, 3); + activeSchedule_.SwapEmptyStepFwd(1, 3); // Node from original step 2 should be in step 1 - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(2), 1); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(2), 1); // Node from original step 3 should be in step 2 - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(3), 2); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(3), 2); // Step 3 should now be empty - BOOST_CHECK_EQUAL(active_schedule.get_step_total_work(3), 0); + BOOST_CHECK_EQUAL(activeSchedule_.GetStepTotalWork(3), 0); } -BOOST_AUTO_TEST_CASE(active_schedule_remove_empty_step_test) { +BOOST_AUTO_TEST_CASE(ActiveScheduleRemoveEmptyStepTest) { // Make step 1 empty by moving node 1 to step 0 - active_schedule.getVectorSchedule().setAssignedSuperstep(1, 0); - active_schedule.initialize(active_schedule.getVectorSchedule()); + activeSchedule_.GetVectorSchedule().SetAssignedSuperstep(1, 0); + activeSchedule_.Initialize(activeSchedule_.GetVectorSchedule()); - unsigned original_num_steps = active_schedule.num_steps(); - unsigned original_step_of_node_8 = active_schedule.assigned_superstep(8); // should be 2 + unsigned originalNumSteps = activeSchedule_.NumSteps(); + unsigned originalStepOfNode8 = activeSchedule_.AssignedSuperstep(8); // should be 2 - active_schedule.remove_empty_step(1); + activeSchedule_.RemoveEmptyStep(1); - BOOST_CHECK_EQUAL(active_schedule.num_steps(), original_num_steps - 1); + BOOST_CHECK_EQUAL(activeSchedule_.NumSteps(), originalNumSteps - 1); // Node 8 should be shifted back by one step - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(8), original_step_of_node_8 - 1); // 8 -> 7 + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(8), originalStepOfNode8 - 1); // 8 -> 7 // Node 3 (in step 3) should be shifted back by one step - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(3), 2); + BOOST_CHECK_EQUAL(activeSchedule_.AssignedSuperstep(3), 2); } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp index cb13a16d..908b0493 100644 --- a/tests/max_bsp_schedulers.cpp +++ b/tests/max_bsp_schedulers.cpp @@ -34,13 +34,13 @@ limitations under the License. using namespace osp; -std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } +std::vector TestArchitectures() { return {"data/machine_params/p3.arch"}; } -template -void run_test(Scheduler *test_scheduler) { +template +void RunTest(Scheduler *testScheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); - std::vector filenames_graph = tiny_spaa_graphs(); - std::vector filenames_architectures = test_architectures(); + std::vector filenamesGraph = TinySpaaGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -50,41 +50,41 @@ void run_test(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Scheduler: " << test_scheduler->getScheduleName() << std::endl; - std::cout << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Scheduler: " << testScheduler->GetScheduleName() << std::endl; + std::cout << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), instance.GetComputationalDag()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + BspSchedule schedule(instance); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); } } } -template -void run_test_max_bsp(MaxBspScheduler *test_scheduler) { - std::vector filenames_graph = tiny_spaa_graphs(); - std::vector filenames_architectures = test_architectures(); +template +void RunTestMaxBsp(MaxBspScheduler *testScheduler) { + std::vector filenamesGraph = TinySpaaGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Locate project root std::filesystem::path cwd = std::filesystem::current_path(); @@ -92,53 +92,53 @@ void run_test_max_bsp(MaxBspScheduler *test_scheduler) { cwd = cwd.parent_path(); } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); - name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameGraph.find_last_of("/\\") + 1); + nameGraph = nameGraph.substr(0, nameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); std::cout << std::endl - << "Scheduler (MaxBsp): " << test_scheduler->getScheduleName() << std::endl - << "Graph: " << name_graph << std::endl - << "Architecture: " << name_machine << std::endl; + << "Scheduler (MaxBsp): " << testScheduler->GetScheduleName() << std::endl + << "Graph: " << nameGraph << std::endl + << "Architecture: " << nameMachine << std::endl; - computational_dag_edge_idx_vector_impl_def_int_t graph; - BspArchitecture arch; + ComputationalDagEdgeIdxVectorImplDefIntT graph; + BspArchitecture arch; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph); - bool status_architecture = file_reader::readBspArchitecture((cwd / filename_machine).string(), arch); + bool statusGraph = file_reader::ReadGraph((cwd / filenameGraph).string(), graph); + bool statusArchitecture = file_reader::ReadBspArchitecture((cwd / filenameMachine).string(), arch); - BOOST_REQUIRE_MESSAGE(status_graph, "Failed to read graph: " << filename_graph); - BOOST_REQUIRE_MESSAGE(status_architecture, "Failed to read architecture: " << filename_machine); + BOOST_REQUIRE_MESSAGE(statusGraph, "Failed to read graph: " << filenameGraph); + BOOST_REQUIRE_MESSAGE(statusArchitecture, "Failed to read architecture: " << filenameMachine); - BspInstance instance(graph, arch); + BspInstance instance(graph, arch); - MaxBspSchedule schedule(instance); + MaxBspSchedule schedule(instance); - const auto result = test_scheduler->computeSchedule(schedule); + const auto result = testScheduler->ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(result, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(result, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); } } } -// Tests computeSchedule(BspSchedule&) → staleness = 1 -BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_vector_impl) { - GreedyVarianceSspScheduler test; - run_test(&test); +// Tests ComputeSchedule(BspSchedule&) → staleness = 1 +BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerTestVectorImpl) { + GreedyVarianceSspScheduler test; + RunTest(&test); } -// Tests computeSchedule(BspSchedule&) → staleness = 1 (different graph impl) -BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_test_edge_idx_impl) { - GreedyVarianceSspScheduler test; - run_test(&test); +// Tests ComputeSchedule(BspSchedule&) → staleness = 1 (different graph impl) +BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerTestEdgeIdxImpl) { + GreedyVarianceSspScheduler test; + RunTest(&test); } -// Tests computeSchedule(MaxBspSchedule&) → staleness = 2 -BOOST_AUTO_TEST_CASE(GreedyVarianceSspScheduler_MaxBspSchedule_large_test) { - GreedyVarianceSspScheduler test; - run_test_max_bsp(&test); +// Tests ComputeSchedule(MaxBspSchedule&) → staleness = 2 +BOOST_AUTO_TEST_CASE(GreedyVarianceSspSchedulerMaxBspScheduleLargeTest) { + GreedyVarianceSspScheduler test; + RunTestMaxBsp(&test); } diff --git a/tests/maxbsp_converter_and_hc.cpp b/tests/maxbsp_converter_and_hc.cpp index a36fa7c4..b4620c33 100644 --- a/tests/maxbsp_converter_and_hc.cpp +++ b/tests/maxbsp_converter_and_hc.cpp @@ -29,13 +29,13 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(maxbsp_scheduling) { - using graph = computational_dag_vector_impl_def_t; +BOOST_AUTO_TEST_CASE(MaxbspScheduling) { + using Graph = ComputationalDagVectorImplDefUnsignedT; - BspInstance instance; - instance.setNumberOfProcessors(4); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(3); + BspInstance instance; + instance.SetNumberOfProcessors(4); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(3); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -45,81 +45,81 @@ BOOST_AUTO_TEST_CASE(maxbsp_scheduling) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB( + (cwd / "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag").string(), instance.GetComputationalDag()); BOOST_CHECK(status); - GreedyBspScheduler greedy; - BspSchedule bsp_initial(instance); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(bsp_initial)); - BOOST_CHECK(bsp_initial.satisfiesPrecedenceConstraints()); + GreedyBspScheduler greedy; + BspSchedule bspInitial(instance); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, greedy.ComputeSchedule(bspInitial)); + BOOST_CHECK(bspInitial.SatisfiesPrecedenceConstraints()); // PART I: from BspSchedule to MaxBspSchedule conversion - std::cout << "Original Bsp Cost: " << bsp_initial.computeCosts() << std::endl; - GreedyBspToMaxBspConverter converter; - MaxBspSchedule maxbsp = converter.Convert(bsp_initial); - BOOST_CHECK(maxbsp.satisfiesPrecedenceConstraints()); - auto cost_conversion = maxbsp.computeCosts(); - std::cout << "Cost after maxBsp conversion: " << cost_conversion << std::endl; + std::cout << "Original Bsp Cost: " << bspInitial.ComputeCosts() << std::endl; + GreedyBspToMaxBspConverter converter; + MaxBspSchedule maxbsp = converter.Convert(bspInitial); + BOOST_CHECK(maxbsp.SatisfiesPrecedenceConstraints()); + auto costConversion = maxbsp.ComputeCosts(); + std::cout << "Cost after maxBsp conversion: " << costConversion << std::endl; // hill climbing - HillClimbingScheduler HC; - HC.improveSchedule(maxbsp); - BOOST_CHECK(maxbsp.satisfiesPrecedenceConstraints()); - auto cost_hc = maxbsp.computeCosts(); - std::cout << "Cost after Hill Climbing: " << cost_hc << std::endl; - BOOST_CHECK(cost_hc <= cost_conversion); + HillClimbingScheduler hc; + hc.ImproveSchedule(maxbsp); + BOOST_CHECK(maxbsp.SatisfiesPrecedenceConstraints()); + auto costHc = maxbsp.ComputeCosts(); + std::cout << "Cost after Hill Climbing: " << costHc << std::endl; + BOOST_CHECK(costHc <= costConversion); // PART II: from BspScheduleCS to MaxBspScheduleCS conversion - BspScheduleCS bsp_initial_cs(bsp_initial); - BOOST_CHECK(bsp_initial_cs.hasValidCommSchedule()); - std::cout << "Original BspCS Cost: " << bsp_initial_cs.computeCosts() << std::endl; + BspScheduleCS bspInitialCs(bspInitial); + BOOST_CHECK(bspInitialCs.HasValidCommSchedule()); + std::cout << "Original BspCS Cost: " << bspInitialCs.ComputeCosts() << std::endl; - MaxBspScheduleCS maxbsp_cs = converter.Convert(bsp_initial_cs); - BOOST_CHECK(maxbsp_cs.satisfiesPrecedenceConstraints()); - BOOST_CHECK(maxbsp_cs.hasValidCommSchedule()); - auto cost_conversion_cs = maxbsp_cs.computeCosts(); - std::cout << "Cost after maxBsp(CS) conversion: " << cost_conversion_cs << std::endl; + MaxBspScheduleCS maxbspCs = converter.Convert(bspInitialCs); + BOOST_CHECK(maxbspCs.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(maxbspCs.HasValidCommSchedule()); + auto costConversionCs = maxbspCs.ComputeCosts(); + std::cout << "Cost after maxBsp(CS) conversion: " << costConversionCs << std::endl; // hill climbing for comm. schedule - HillClimbingForCommSteps HCcs; - HCcs.improveSchedule(maxbsp_cs); - BOOST_CHECK(maxbsp_cs.satisfiesPrecedenceConstraints()); - BOOST_CHECK(maxbsp_cs.hasValidCommSchedule()); - auto cost_hccs = maxbsp_cs.computeCosts(); - std::cout << "Cost after comm. sched. hill climbing: " << cost_hccs << std::endl; - BOOST_CHECK(cost_hccs <= cost_conversion_cs); + HillClimbingForCommSteps hCcs; + hCcs.ImproveSchedule(maxbspCs); + BOOST_CHECK(maxbspCs.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(maxbspCs.HasValidCommSchedule()); + auto costHccs = maxbspCs.ComputeCosts(); + std::cout << "Cost after comm. sched. hill climbing: " << costHccs << std::endl; + BOOST_CHECK(costHccs <= costConversionCs); // PART III: same for larger DAG - status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(), - instance.getComputationalDag()); + status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(), + instance.GetComputationalDag()); BOOST_CHECK(status); - instance.setSynchronisationCosts(7); - - BspSchedule bsp_initial_large(instance); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(bsp_initial_large)); - - BspScheduleCS bsp_initial_large_cs(bsp_initial_large); - BOOST_CHECK(bsp_initial_large_cs.hasValidCommSchedule()); - std::cout << "Original Bsp Cost on large DAG: " << bsp_initial_large_cs.computeCosts() << std::endl; - - MaxBspScheduleCS maxbsp_cs_large = converter.Convert(bsp_initial_large_cs); - BOOST_CHECK(maxbsp_cs_large.satisfiesPrecedenceConstraints()); - BOOST_CHECK(maxbsp_cs_large.hasValidCommSchedule()); - auto cost_maxbsp_cs_large = maxbsp_cs_large.computeCosts(); - std::cout << "Cost after maxBsp conversion on large DAG: " << cost_maxbsp_cs_large << std::endl; - - HCcs.improveSchedule(maxbsp_cs_large); - BOOST_CHECK(maxbsp_cs_large.satisfiesPrecedenceConstraints()); - BOOST_CHECK(maxbsp_cs_large.hasValidCommSchedule()); - auto cost_hccs_large = maxbsp_cs_large.computeCosts(); - std::cout << "Cost after comm. sched. hill climbing on large DAG: " << cost_hccs_large << std::endl; - BOOST_CHECK(cost_hccs_large <= cost_maxbsp_cs_large); + instance.SetSynchronisationCosts(7); + + BspSchedule bspInitialLarge(instance); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, greedy.ComputeSchedule(bspInitialLarge)); + + BspScheduleCS bspInitialLargeCs(bspInitialLarge); + BOOST_CHECK(bspInitialLargeCs.HasValidCommSchedule()); + std::cout << "Original Bsp Cost on large DAG: " << bspInitialLargeCs.ComputeCosts() << std::endl; + + MaxBspScheduleCS maxbspCsLarge = converter.Convert(bspInitialLargeCs); + BOOST_CHECK(maxbspCsLarge.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(maxbspCsLarge.HasValidCommSchedule()); + auto costMaxbspCsLarge = maxbspCsLarge.ComputeCosts(); + std::cout << "Cost after maxBsp conversion on large DAG: " << costMaxbspCsLarge << std::endl; + + hCcs.ImproveSchedule(maxbspCsLarge); + BOOST_CHECK(maxbspCsLarge.SatisfiesPrecedenceConstraints()); + BOOST_CHECK(maxbspCsLarge.HasValidCommSchedule()); + auto costHccsLarge = maxbspCsLarge.ComputeCosts(); + std::cout << "Cost after comm. sched. hill climbing on large DAG: " << costHccsLarge << std::endl; + BOOST_CHECK(costHccsLarge <= costMaxbspCsLarge); } diff --git a/tests/merkle_hash_computer.cpp b/tests/merkle_hash_computer.cpp index d8f231a8..3a156c2c 100644 --- a/tests/merkle_hash_computer.cpp +++ b/tests/merkle_hash_computer.cpp @@ -27,23 +27,23 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) { - using graph_t = computational_dag_vector_impl_def_t; - graph_t graph; +BOOST_AUTO_TEST_CASE(BspScheduleRecompTest) { + using GraphT = ComputationalDagVectorImplDefUnsignedT; + GraphT graph; - const auto project_root = get_project_root(); - file_reader::readComputationalDagHyperdagFormatDB((project_root / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); + const auto projectRoot = GetProjectRoot(); + file_reader::ReadComputationalDagHyperdagFormatDB((projectRoot / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); - MerkleHashComputer>> m_hash(graph); + MerkleHashComputer>> mHash(graph); - BOOST_CHECK_EQUAL(m_hash.get_vertex_hashes().size(), graph.num_vertices()); + BOOST_CHECK_EQUAL(mHash.GetVertexHashes().size(), graph.NumVertices()); - for (const auto &v : source_vertices_view(graph)) { - BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(v), 11); + for (const auto &v : SourceVerticesView(graph)) { + BOOST_CHECK_EQUAL(mHash.GetVertexHash(v), 11); } size_t num = 0; - for (const auto &pair : m_hash.get_orbits()) { + for (const auto &pair : mHash.GetOrbits()) { num += pair.second.size(); std::cout << "orbit " << pair.first << ": "; for (const auto &v : pair.second) { @@ -52,32 +52,31 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) { std::cout << std::endl; } - BOOST_CHECK_EQUAL(num, graph.num_vertices()); + BOOST_CHECK_EQUAL(num, graph.NumVertices()); - BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(41), m_hash.get_vertex_hash(47)); - BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(28), m_hash.get_vertex_hash(18)); - BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(43), m_hash.get_vertex_hash(48)); - BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(29), m_hash.get_vertex_hash(22)); - BOOST_CHECK(m_hash.get_vertex_hash(3) != m_hash.get_vertex_hash(12)); - BOOST_CHECK(m_hash.get_vertex_hash(53) != m_hash.get_vertex_hash(29)); + BOOST_CHECK_EQUAL(mHash.GetVertexHash(41), mHash.GetVertexHash(47)); + BOOST_CHECK_EQUAL(mHash.GetVertexHash(28), mHash.GetVertexHash(18)); + BOOST_CHECK_EQUAL(mHash.GetVertexHash(43), mHash.GetVertexHash(48)); + BOOST_CHECK_EQUAL(mHash.GetVertexHash(29), mHash.GetVertexHash(22)); + BOOST_CHECK(mHash.GetVertexHash(3) != mHash.GetVertexHash(12)); + BOOST_CHECK(mHash.GetVertexHash(53) != mHash.GetVertexHash(29)); } -BOOST_AUTO_TEST_CASE(MerkleHashComputer_test_fw_bw_precomp) { - using graph_t = computational_dag_vector_impl_def_t; - graph_t graph_test; +BOOST_AUTO_TEST_CASE(MerkleHashComputerTestFwBwPrecomp) { + using GraphT = ComputationalDagVectorImplDefUnsignedT; + GraphT graphTest; - const auto project_root = get_project_root(); - file_reader::readComputationalDagHyperdagFormatDB((project_root / "data/spaa/tiny/instance_bicgstab.hdag").string(), - graph_test); + const auto projectRoot = GetProjectRoot(); + file_reader::ReadComputationalDagHyperdagFormatDB((projectRoot / "data/spaa/tiny/instance_bicgstab.hdag").string(), graphTest); - std::vector precom_node_hashes(graph_test.num_vertices(), 5); + std::vector precomNodeHashes(graphTest.NumVertices(), 5); - MerkleHashComputer> m_hash(graph_test, graph_test, precom_node_hashes); + MerkleHashComputer> mHash(graphTest, graphTest, precomNodeHashes); - BOOST_CHECK_EQUAL(m_hash.get_vertex_hashes().size(), graph_test.num_vertices()); + BOOST_CHECK_EQUAL(mHash.GetVertexHashes().size(), graphTest.NumVertices()); size_t num = 0; - for (const auto &pair : m_hash.get_orbits()) { + for (const auto &pair : mHash.GetOrbits()) { num += pair.second.size(); std::cout << "orbit " << pair.first << ": "; for (const auto &v : pair.second) { @@ -86,134 +85,134 @@ BOOST_AUTO_TEST_CASE(MerkleHashComputer_test_fw_bw_precomp) { std::cout << std::endl; } - BOOST_CHECK_EQUAL(num, graph_test.num_vertices()); + BOOST_CHECK_EQUAL(num, graphTest.NumVertices()); } -using graphType = computational_dag_vector_impl_def_t; -using VertexType = vertex_idx_t; - -BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_IdenticalGraphsAreIsomorphic) { - graphType dag1; - const auto v1 = dag1.add_vertex(0, 10, 1); - const auto v2 = dag1.add_vertex(1, 20, 1); - const auto v3 = dag1.add_vertex(0, 30, 1); - dag1.add_edge(v1, v2); - dag1.add_edge(v2, v3); - - graphType dag2; - const auto vA = dag2.add_vertex(0, 10, 1); - const auto vB = dag2.add_vertex(1, 20, 1); - const auto vC = dag2.add_vertex(0, 30, 1); - dag2.add_edge(vA, vB); - dag2.add_edge(vB, vC); - - bool test = are_isomorphic_by_merkle_hash, true>(dag1, dag2); +using GraphType = ComputationalDagVectorImplDefUnsignedT; +using VertexType = VertexIdxT; + +BOOST_AUTO_TEST_CASE(MerkleIsomorphismTestIdenticalGraphsAreIsomorphic) { + GraphType dag1; + const auto v1 = dag1.AddVertex(0, 10, 1); + const auto v2 = dag1.AddVertex(1, 20, 1); + const auto v3 = dag1.AddVertex(0, 30, 1); + dag1.AddEdge(v1, v2); + dag1.AddEdge(v2, v3); + + GraphType dag2; + const auto vA = dag2.AddVertex(0, 10, 1); + const auto vB = dag2.AddVertex(1, 20, 1); + const auto vC = dag2.AddVertex(0, 30, 1); + dag2.AddEdge(vA, vB); + dag2.AddEdge(vB, vC); + + bool test = AreIsomorphicByMerkleHash, true>(dag1, dag2); BOOST_CHECK(test); - test = are_isomorphic_by_merkle_hash, false>(dag1, dag2); + test = AreIsomorphicByMerkleHash, false>(dag1, dag2); BOOST_CHECK(test); } // Test case 2: Graphs with different numbers of vertices should not be isomorphic. -BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_DifferentVertexCount) { - graphType dag1; - dag1.add_vertex(0, 10, 1); - dag1.add_vertex(1, 20, 1); +BOOST_AUTO_TEST_CASE(MerkleIsomorphismTestDifferentVertexCount) { + GraphType dag1; + dag1.AddVertex(0, 10, 1); + dag1.AddVertex(1, 20, 1); - graphType dag2; - dag2.add_vertex(0, 10, 1); + GraphType dag2; + dag2.AddVertex(0, 10, 1); - BOOST_CHECK_EQUAL(are_isomorphic_by_merkle_hash(dag1, dag2), false); + BOOST_CHECK_EQUAL(AreIsomorphicByMerkleHash(dag1, dag2), false); } // Test case 3: Graphs with the same size but different structures should not be isomorphic. -BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_SameSizeDifferentStructure) { - graphType dag1; // A -> B -> C - const auto v1_1 = dag1.add_vertex(0, 1, 1); - const auto v1_2 = dag1.add_vertex(0, 1, 1); - const auto v1_3 = dag1.add_vertex(0, 1, 1); - dag1.add_edge(v1_1, v1_2); - dag1.add_edge(v1_2, v1_3); - - graphType dag2; // A -> B, A -> C - const auto v2_1 = dag2.add_vertex(0, 1, 1); - const auto v2_2 = dag2.add_vertex(0, 1, 1); - const auto v2_3 = dag2.add_vertex(0, 1, 1); - dag2.add_edge(v2_1, v2_2); - dag2.add_edge(v2_1, v2_3); - - BOOST_CHECK_EQUAL(are_isomorphic_by_merkle_hash(dag1, dag2), false); +BOOST_AUTO_TEST_CASE(MerkleIsomorphismTestSameSizeDifferentStructure) { + GraphType dag1; // A -> B -> C + const auto v11 = dag1.AddVertex(0, 1, 1); + const auto v12 = dag1.AddVertex(0, 1, 1); + const auto v13 = dag1.AddVertex(0, 1, 1); + dag1.AddEdge(v11, v12); + dag1.AddEdge(v12, v13); + + GraphType dag2; // A -> B, A -> C + const auto v21 = dag2.AddVertex(0, 1, 1); + const auto v22 = dag2.AddVertex(0, 1, 1); + const auto v23 = dag2.AddVertex(0, 1, 1); + dag2.AddEdge(v21, v22); + dag2.AddEdge(v21, v23); + + BOOST_CHECK_EQUAL(AreIsomorphicByMerkleHash(dag1, dag2), false); } // Test case 4: Structurally identical graphs with different vertex labeling should be isomorphic. -BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_IsomorphicWithDifferentLabels) { - graphType dag1; - const auto v1_1 = dag1.add_vertex(0, 1, 1); // Source - const auto v1_2 = dag1.add_vertex(0, 1, 1); - const auto v1_3 = dag1.add_vertex(0, 1, 1); // Sink - dag1.add_edge(v1_1, v1_2); - dag1.add_edge(v1_2, v1_3); - - graphType dag2; +BOOST_AUTO_TEST_CASE(MerkleIsomorphismTestIsomorphicWithDifferentLabels) { + GraphType dag1; + const auto v11 = dag1.AddVertex(0, 1, 1); // Source + const auto v12 = dag1.AddVertex(0, 1, 1); + const auto v13 = dag1.AddVertex(0, 1, 1); // Sink + dag1.AddEdge(v11, v12); + dag1.AddEdge(v12, v13); + + GraphType dag2; // Same structure as dag1, but vertices are added in a different order. - const auto v2_3 = dag2.add_vertex(0, 1, 1); // Sink - const auto v2_1 = dag2.add_vertex(0, 1, 1); // Source - const auto v2_2 = dag2.add_vertex(0, 1, 1); - dag2.add_edge(v2_1, v2_2); - dag2.add_edge(v2_2, v2_3); + const auto v23 = dag2.AddVertex(0, 1, 1); // Sink + const auto v21 = dag2.AddVertex(0, 1, 1); // Source + const auto v22 = dag2.AddVertex(0, 1, 1); + dag2.AddEdge(v21, v22); + dag2.AddEdge(v22, v23); - BOOST_CHECK(are_isomorphic_by_merkle_hash(dag1, dag2)); + BOOST_CHECK(AreIsomorphicByMerkleHash(dag1, dag2)); } // Test case 5: A more complex example based on your provided DAG. -BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_ComplexIsomorphicGraphs) { - graphType dag1; +BOOST_AUTO_TEST_CASE(MerkleIsomorphismTestComplexIsomorphicGraphs) { + GraphType dag1; { - const auto v1 = dag1.add_vertex(2, 9, 2); - const auto v2 = dag1.add_vertex(3, 8, 4); - const auto v3 = dag1.add_vertex(4, 7, 3); - const auto v4 = dag1.add_vertex(5, 6, 2); - const auto v5 = dag1.add_vertex(6, 5, 6); - const auto v6 = dag1.add_vertex(7, 4, 2); - dag1.add_vertex(8, 3, 4); - const auto v8 = dag1.add_vertex(9, 2, 1); - dag1.add_edge(v1, v2); - dag1.add_edge(v1, v3); - dag1.add_edge(v1, v4); - dag1.add_edge(v1, v5); - dag1.add_edge(v1, v8); - dag1.add_edge(v2, v5); - dag1.add_edge(v2, v6); - dag1.add_edge(v2, v8); - dag1.add_edge(v3, v5); - dag1.add_edge(v3, v6); - dag1.add_edge(v5, v8); - dag1.add_edge(v4, v8); + const auto v1 = dag1.AddVertex(2, 9, 2); + const auto v2 = dag1.AddVertex(3, 8, 4); + const auto v3 = dag1.AddVertex(4, 7, 3); + const auto v4 = dag1.AddVertex(5, 6, 2); + const auto v5 = dag1.AddVertex(6, 5, 6); + const auto v6 = dag1.AddVertex(7, 4, 2); + dag1.AddVertex(8, 3, 4); + const auto v8 = dag1.AddVertex(9, 2, 1); + dag1.AddEdge(v1, v2); + dag1.AddEdge(v1, v3); + dag1.AddEdge(v1, v4); + dag1.AddEdge(v1, v5); + dag1.AddEdge(v1, v8); + dag1.AddEdge(v2, v5); + dag1.AddEdge(v2, v6); + dag1.AddEdge(v2, v8); + dag1.AddEdge(v3, v5); + dag1.AddEdge(v3, v6); + dag1.AddEdge(v5, v8); + dag1.AddEdge(v4, v8); } - graphType dag2; + GraphType dag2; { // Same structure, different vertex variable names and creation order. - const auto n8 = dag2.add_vertex(9, 2, 1); - dag2.add_vertex(8, 3, 4); - const auto n6 = dag2.add_vertex(7, 4, 2); - const auto n5 = dag2.add_vertex(6, 5, 6); - const auto n4 = dag2.add_vertex(5, 6, 2); - const auto n3 = dag2.add_vertex(4, 7, 3); - const auto n2 = dag2.add_vertex(3, 8, 4); - const auto n1 = dag2.add_vertex(2, 9, 2); - dag2.add_edge(n1, n2); - dag2.add_edge(n1, n3); - dag2.add_edge(n1, n4); - dag2.add_edge(n1, n5); - dag2.add_edge(n1, n8); - dag2.add_edge(n2, n5); - dag2.add_edge(n2, n6); - dag2.add_edge(n2, n8); - dag2.add_edge(n3, n5); - dag2.add_edge(n3, n6); - dag2.add_edge(n5, n8); - dag2.add_edge(n4, n8); + const auto n8 = dag2.AddVertex(9, 2, 1); + dag2.AddVertex(8, 3, 4); + const auto n6 = dag2.AddVertex(7, 4, 2); + const auto n5 = dag2.AddVertex(6, 5, 6); + const auto n4 = dag2.AddVertex(5, 6, 2); + const auto n3 = dag2.AddVertex(4, 7, 3); + const auto n2 = dag2.AddVertex(3, 8, 4); + const auto n1 = dag2.AddVertex(2, 9, 2); + dag2.AddEdge(n1, n2); + dag2.AddEdge(n1, n3); + dag2.AddEdge(n1, n4); + dag2.AddEdge(n1, n5); + dag2.AddEdge(n1, n8); + dag2.AddEdge(n2, n5); + dag2.AddEdge(n2, n6); + dag2.AddEdge(n2, n8); + dag2.AddEdge(n3, n5); + dag2.AddEdge(n3, n6); + dag2.AddEdge(n5, n8); + dag2.AddEdge(n4, n8); } - BOOST_CHECK(are_isomorphic_by_merkle_hash(dag1, dag2)); + BOOST_CHECK(AreIsomorphicByMerkleHash(dag1, dag2)); } diff --git a/tests/orbit_graph_processor.cpp b/tests/orbit_graph_processor.cpp index 6ac34228..64bc5abc 100644 --- a/tests/orbit_graph_processor.cpp +++ b/tests/orbit_graph_processor.cpp @@ -33,61 +33,61 @@ limitations under the License. #include "test_utils.hpp" using namespace osp; -using graph_t = computational_dag_vector_impl_def_t; +using GraphT = ComputationalDagVectorImplDefUnsignedT; -template -void check_partitioning(const Graph_t &dag, const OrbitGraphProcessor &processor) { - const auto &final_coarse_graph = processor.get_final_coarse_graph(); - const auto &final_groups = processor.get_final_groups(); +template +void CheckPartitioning(const GraphT &dag, const OrbitGraphProcessor &processor) { + const auto &finalCoarseGraph = processor.GetFinalCoarseGraph(); + const auto &finalGroups = processor.GetFinalGroups(); // Check that the final coarse graph is acyclic - BOOST_CHECK(is_acyclic(final_coarse_graph)); + BOOST_CHECK(IsAcyclic(finalCoarseGraph)); // Check that the final groups form a valid partition of the original DAG's vertices - std::vector vertex_counts(dag.num_vertices(), 0); - size_t total_vertices_in_groups = 0; - for (const auto &group : final_groups) { - for (const auto &subgraph : group.subgraphs) { - total_vertices_in_groups += subgraph.size(); + std::vector vertexCounts(dag.NumVertices(), 0); + size_t totalVerticesInGroups = 0; + for (const auto &group : finalGroups) { + for (const auto &subgraph : group.subgraphs_) { + totalVerticesInGroups += subgraph.size(); for (const auto &vertex : subgraph) { - BOOST_REQUIRE_LT(vertex, dag.num_vertices()); - vertex_counts[vertex]++; + BOOST_REQUIRE_LT(vertex, dag.NumVertices()); + vertexCounts[vertex]++; } } } - BOOST_CHECK_EQUAL(total_vertices_in_groups, dag.num_vertices()); - for (size_t i = 0; i < dag.num_vertices(); ++i) { - BOOST_CHECK_EQUAL(vertex_counts[i], 1); + BOOST_CHECK_EQUAL(totalVerticesInGroups, dag.NumVertices()); + for (size_t i = 0; i < dag.NumVertices(); ++i) { + BOOST_CHECK_EQUAL(vertexCounts[i], 1); } } // BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_SmokeTest) { // // The test reads a file, but the path is absolute, so we don't need the project root here. // graph_t dag; -// file_reader::readComputationalDagDotFormat("", dag); +// file_reader::ReadComputationalDagDotFormat("", dag); // OrbitGraphProcessor processor(2); // Using a symmetry threshold of 2 -// MerkleHashComputer, true> hasher(dag, dag); -// processor.discover_isomorphic_groups(dag, hasher); +// MerkleHashComputer, true> hasher(dag, dag); +// processor.DiscoverIsomorphicGroups(dag, hasher); -// const auto& coarse_graph = processor.get_coarse_graph(); -// const auto& final_coarse_graph = processor.get_final_coarse_graph(); -// const auto& final_groups = processor.get_final_groups(); +// const auto& coarse_graph = processor.GetCoarseGraph(); +// const auto& final_coarse_graph = processor.GetFinalCoarseGraph(); +// const auto& final_groups = processor.GetFinalGroups(); // const auto& final_contraction_map = processor.get_final_contraction_map(); // DotFileWriter writer; // // Color by initial orbits // writer.write_colored_graph("orbit_graph_orbits_colored.dot", dag, processor.get_contraction_map()); -// writer.write_graph("orbit_graph_coarse_graph.dot", coarse_graph); +// writer.WriteGraph("orbit_graph_coarse_graph.dot", coarse_graph); // // Color by final merged groups // writer.write_colored_graph("orbit_graph_groups_colored.dot", dag, final_contraction_map); // // Color by final subgraphs (each subgraph gets a unique color) -// std::vector subgraph_colors(dag.num_vertices()); +// std::vector subgraph_colors(dag.NumVertices()); // unsigned current_subgraph_color = 0; // for (const auto& group : final_groups) { -// for (const auto& subgraph : group.subgraphs) { +// for (const auto& subgraph : group.subgraphs_) { // for (const auto& vertex : subgraph) { // subgraph_colors[vertex] = current_subgraph_color; // } @@ -95,213 +95,213 @@ void check_partitioning(const Graph_t &dag, const OrbitGraphProcessor 1 // 2 -> 3 - dag.add_vertex(10, 1, 1); // 0 - dag.add_vertex(10, 1, 1); // 1 - dag.add_vertex(10, 1, 1); // 2 - dag.add_vertex(10, 1, 1); // 3 - dag.add_edge(0, 1); - dag.add_edge(2, 3); + dag.AddVertex(10, 1, 1); // 0 + dag.AddVertex(10, 1, 1); // 1 + dag.AddVertex(10, 1, 1); // 2 + dag.AddVertex(10, 1, 1); // 3 + dag.AddEdge(0, 1); + dag.AddEdge(2, 3); // Initial orbits: {0, 2} and {1, 3}. Coarse graph: 0 -> 1 // With threshold 2, these should be merged. - OrbitGraphProcessor processor; - MerkleHashComputer, true> hasher(dag, dag); - processor.discover_isomorphic_groups(dag, hasher); + OrbitGraphProcessor processor; + MerkleHashComputer, true> hasher(dag, dag); + processor.DiscoverIsomorphicGroups(dag, hasher); - const auto &final_coarse_graph = processor.get_final_coarse_graph(); - const auto &final_groups = processor.get_final_groups(); + const auto &finalCoarseGraph = processor.GetFinalCoarseGraph(); + const auto &finalGroups = processor.GetFinalGroups(); // Expect a single node in the final coarse graph - BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1); - BOOST_CHECK_EQUAL(final_groups.size(), 1); + BOOST_CHECK_EQUAL(finalCoarseGraph.NumVertices(), 1); + BOOST_CHECK_EQUAL(finalGroups.size(), 1); // The single group should contain two subgraphs: {0,1} and {2,3} - BOOST_REQUIRE_EQUAL(final_groups[0].subgraphs.size(), 2); - std::set sg1(final_groups[0].subgraphs[0].begin(), final_groups[0].subgraphs[0].end()); - std::set sg2(final_groups[0].subgraphs[1].begin(), final_groups[0].subgraphs[1].end()); - std::set expected_sgA = {0, 1}; - std::set expected_sgB = {2, 3}; + BOOST_REQUIRE_EQUAL(finalGroups[0].subgraphs_.size(), 2); + std::set sg1(finalGroups[0].subgraphs_[0].begin(), finalGroups[0].subgraphs_[0].end()); + std::set sg2(finalGroups[0].subgraphs_[1].begin(), finalGroups[0].subgraphs_[1].end()); + std::set expectedSgA = {0, 1}; + std::set expectedSgB = {2, 3}; - BOOST_CHECK((sg1 == expected_sgA && sg2 == expected_sgB) || (sg1 == expected_sgB && sg2 == expected_sgA)); + BOOST_CHECK((sg1 == expectedSgA && sg2 == expectedSgB) || (sg1 == expectedSgB && sg2 == expectedSgA)); - check_partitioning(dag, processor); + CheckPartitioning(dag, processor); } -BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ForkJoinNoMerge) { - graph_t dag; +BOOST_AUTO_TEST_CASE(OrbitGraphProcessorForkJoinNoMerge) { + GraphT dag; // 0 -> {1, 2} -> 3. Nodes 1 and 2 are in the same orbit. - dag.add_vertex(10, 1, 1); // 0 - dag.add_vertex(20, 1, 1); // 1 - dag.add_vertex(20, 1, 1); // 2 - dag.add_vertex(30, 1, 1); // 3 - dag.add_edge(0, 1); - dag.add_edge(0, 2); - dag.add_edge(1, 3); - dag.add_edge(2, 3); + dag.AddVertex(10, 1, 1); // 0 + dag.AddVertex(20, 1, 1); // 1 + dag.AddVertex(20, 1, 1); // 2 + dag.AddVertex(30, 1, 1); // 3 + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); + dag.AddEdge(1, 3); + dag.AddEdge(2, 3); // Initial orbits: {0}, {1,2}, {3}. Coarse graph: 0 -> 1 -> 2 // Merging 0 and 1 would result in a group of size 1 ({0,1,2}), which is not viable (threshold 2). // Merging 1 and 2 would also result in a group of size 1 ({1,2,3}), not viable. - OrbitGraphProcessor processor; - MerkleHashComputer, true> hasher(dag, dag); - processor.discover_isomorphic_groups(dag, hasher); + OrbitGraphProcessor processor; + MerkleHashComputer, true> hasher(dag, dag); + processor.DiscoverIsomorphicGroups(dag, hasher); - const auto &final_coarse_graph = processor.get_final_coarse_graph(); - const auto &final_groups = processor.get_final_groups(); + const auto &finalCoarseGraph = processor.GetFinalCoarseGraph(); + const auto &finalGroups = processor.GetFinalGroups(); // Expect no merges, so final graph is same as initial coarse graph. - BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 3); - BOOST_CHECK_EQUAL(final_groups.size(), 3); + BOOST_CHECK_EQUAL(finalCoarseGraph.NumVertices(), 3); + BOOST_CHECK_EQUAL(finalGroups.size(), 3); // Check group structures // Group 0: {{0}} // Group 1: {{1}, {2}} // Group 2: {{3}} - size_t group_of_1_count = 0; - size_t group_of_2_count = 0; - for (const auto &group : final_groups) { - if (group.subgraphs.size() == 1) { - group_of_1_count++; + size_t groupOf1Count = 0; + size_t groupOf2Count = 0; + for (const auto &group : finalGroups) { + if (group.subgraphs_.size() == 1) { + groupOf1Count++; } - if (group.subgraphs.size() == 2) { - group_of_2_count++; + if (group.subgraphs_.size() == 2) { + groupOf2Count++; } } - BOOST_CHECK_EQUAL(group_of_1_count, 2); - BOOST_CHECK_EQUAL(group_of_2_count, 1); + BOOST_CHECK_EQUAL(groupOf1Count, 2); + BOOST_CHECK_EQUAL(groupOf2Count, 1); - check_partitioning(dag, processor); + CheckPartitioning(dag, processor); } -BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_PartitionCheck_MediumGraph) { - const auto project_root = get_project_root(); - graph_t dag; - file_reader::readComputationalDagHyperdagFormatDB((project_root / "data/spaa/tiny/instance_bicgstab.hdag").string(), dag); +BOOST_AUTO_TEST_CASE(OrbitGraphProcessorPartitionCheckMediumGraph) { + const auto projectRoot = GetProjectRoot(); + GraphT dag; + file_reader::ReadComputationalDagHyperdagFormatDB((projectRoot / "data/spaa/tiny/instance_bicgstab.hdag").string(), dag); - BOOST_REQUIRE_GT(dag.num_vertices(), 0); + BOOST_REQUIRE_GT(dag.NumVertices(), 0); // Use a higher threshold to encourage more merging on this larger graph - OrbitGraphProcessor processor; - MerkleHashComputer, true> hasher(dag, dag); - processor.discover_isomorphic_groups(dag, hasher); + OrbitGraphProcessor processor; + MerkleHashComputer, true> hasher(dag, dag); + processor.DiscoverIsomorphicGroups(dag, hasher); // The main purpose of this test is to ensure the output is a valid partition. - check_partitioning(dag, processor); + CheckPartitioning(dag, processor); } -BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_MultiPipelineMerge) { +BOOST_AUTO_TEST_CASE(OrbitGraphProcessorMultiPipelineMerge) { // 5 parallel pipelines of 4 nodes each. // Initial orbits: 4 groups of 5 identical nodes. Coarse graph: 0->1->2->3 // With a threshold of 5, the entire graph should merge into a single group. - const auto dag = construct_multi_pipeline_dag(5, 4); - BOOST_REQUIRE_EQUAL(dag.num_vertices(), 20); + const auto dag = ConstructMultiPipelineDag(5, 4); + BOOST_REQUIRE_EQUAL(dag.NumVertices(), 20); - OrbitGraphProcessor processor; // Set threshold to match pipeline count - MerkleHashComputer, true> hasher(dag, dag); - processor.discover_isomorphic_groups(dag, hasher); + OrbitGraphProcessor processor; // Set threshold to match pipeline count + MerkleHashComputer, true> hasher(dag, dag); + processor.DiscoverIsomorphicGroups(dag, hasher); - const auto &final_coarse_graph = processor.get_final_coarse_graph(); - const auto &final_groups = processor.get_final_groups(); + const auto &finalCoarseGraph = processor.GetFinalCoarseGraph(); + const auto &finalGroups = processor.GetFinalGroups(); // Expect a single node in the final coarse graph - BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1); - BOOST_CHECK_EQUAL(final_groups.size(), 1); + BOOST_CHECK_EQUAL(finalCoarseGraph.NumVertices(), 1); + BOOST_CHECK_EQUAL(finalGroups.size(), 1); // The single group should contain 5 subgraphs, each with 4 nodes. - BOOST_REQUIRE_EQUAL(final_groups[0].subgraphs.size(), 5); - BOOST_CHECK_EQUAL(final_groups[0].subgraphs[0].size(), 4); + BOOST_REQUIRE_EQUAL(finalGroups[0].subgraphs_.size(), 5); + BOOST_CHECK_EQUAL(finalGroups[0].subgraphs_[0].size(), 4); - check_partitioning(dag, processor); + CheckPartitioning(dag, processor); } -BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_LadderNoMerge) { +BOOST_AUTO_TEST_CASE(OrbitGraphProcessorLadderNoMerge) { // A ladder graph with 10 rungs (22 nodes). // The bwd_merkle_hash is more discerning and creates more than 2 initial orbits // due to the different structures at the start and end of the ladder. // The coarsening logic will merge some of these, but the core cyclic structure // prevents a full merge. The exact number of final nodes is non-trivial, // but it should be greater than 1. - const auto dag = construct_ladder_dag(10); - BOOST_REQUIRE_EQUAL(dag.num_vertices(), 22); + const auto dag = ConstructLadderDag(10); + BOOST_REQUIRE_EQUAL(dag.NumVertices(), 22); - OrbitGraphProcessor processor; - MerkleHashComputer, true> hasher(dag, dag); - processor.discover_isomorphic_groups(dag, hasher); + OrbitGraphProcessor processor; + MerkleHashComputer, true> hasher(dag, dag); + processor.DiscoverIsomorphicGroups(dag, hasher); - const auto &initial_coarse_graph = processor.get_coarse_graph(); - const auto &final_coarse_graph = processor.get_final_coarse_graph(); + const auto &initialCoarseGraph = processor.GetCoarseGraph(); + const auto &finalCoarseGraph = processor.GetFinalCoarseGraph(); // Expect no merges, so final graph is the same as the initial coarse graph. - BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), initial_coarse_graph.num_vertices()); - BOOST_CHECK_GT(final_coarse_graph.num_vertices(), 1); + BOOST_CHECK_EQUAL(finalCoarseGraph.NumVertices(), initialCoarseGraph.NumVertices()); + BOOST_CHECK_GT(finalCoarseGraph.NumVertices(), 1); - check_partitioning(dag, processor); + CheckPartitioning(dag, processor); } -BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_AsymmetricNoMerge) { +BOOST_AUTO_TEST_CASE(OrbitGraphProcessorAsymmetricNoMerge) { // A simple chain where every node is unique. // Since all groups are below the threshold, they will all be merged into one. - const auto dag = construct_asymmetric_dag(30); - BOOST_REQUIRE_EQUAL(dag.num_vertices(), 30); + const auto dag = ConstructAsymmetricDag(30); + BOOST_REQUIRE_EQUAL(dag.NumVertices(), 30); - OrbitGraphProcessor processor; - MerkleHashComputer, true> hasher(dag, dag); - processor.discover_isomorphic_groups(dag, hasher); + OrbitGraphProcessor processor; + MerkleHashComputer, true> hasher(dag, dag); + processor.DiscoverIsomorphicGroups(dag, hasher); - const auto &final_coarse_graph = processor.get_final_coarse_graph(); + const auto &finalCoarseGraph = processor.GetFinalCoarseGraph(); // Expect all nodes to be merged into a single coarse node. - BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1); + BOOST_CHECK_EQUAL(finalCoarseGraph.NumVertices(), 1); - check_partitioning(dag, processor); + CheckPartitioning(dag, processor); } -BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_BinaryTreeNoMerge) { +BOOST_AUTO_TEST_CASE(OrbitGraphProcessorBinaryTreeNoMerge) { // A binary out-tree of height 4. // Initial orbits are one per level. Coarse graph is a simple chain: 0->1->2->3->4 (5 nodes). // The logic allows merging groups that are below the symmetry threshold. // However, the `critical_path_weight` check prevents merges that would increase the // longest path in the coarse graph. This results in the chain being partially, but not // fully, collapsed. The expected outcome is 2 final coarse nodes. - const auto dag = construct_binary_out_tree(4); - BOOST_REQUIRE_EQUAL(dag.num_vertices(), (1 << 5) - 1); + const auto dag = ConstructBinaryOutTree(4); + BOOST_REQUIRE_EQUAL(dag.NumVertices(), (1 << 5) - 1); - OrbitGraphProcessor processor; - MerkleHashComputer, true> hasher(dag, dag); - processor.discover_isomorphic_groups(dag, hasher); + OrbitGraphProcessor processor; + MerkleHashComputer, true> hasher(dag, dag); + processor.DiscoverIsomorphicGroups(dag, hasher); - const auto &final_coarse_graph = processor.get_final_coarse_graph(); + const auto &finalCoarseGraph = processor.GetFinalCoarseGraph(); - BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 3); + BOOST_CHECK_EQUAL(finalCoarseGraph.NumVertices(), 3); - check_partitioning(dag, processor); + CheckPartitioning(dag, processor); } -BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ButterflyMerge) { - const auto dag = construct_butterfly_dag(3); - BOOST_REQUIRE_EQUAL(dag.num_vertices(), (3 + 1) * 8); +BOOST_AUTO_TEST_CASE(OrbitGraphProcessorButterflyMerge) { + const auto dag = ConstructButterflyDag(3); + BOOST_REQUIRE_EQUAL(dag.NumVertices(), (3 + 1) * 8); - OrbitGraphProcessor processor; - MerkleHashComputer, true> hasher(dag, dag); - processor.discover_isomorphic_groups(dag, hasher); + OrbitGraphProcessor processor; + MerkleHashComputer, true> hasher(dag, dag); + processor.DiscoverIsomorphicGroups(dag, hasher); - const auto &final_coarse_graph = processor.get_final_coarse_graph(); - BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 4); + const auto &finalCoarseGraph = processor.GetFinalCoarseGraph(); + BOOST_CHECK_EQUAL(finalCoarseGraph.NumVertices(), 4); - check_partitioning(dag, processor); + CheckPartitioning(dag, processor); } diff --git a/tests/pebbling_schedule_class.cpp b/tests/pebbling_schedule_class.cpp index 097b1b7f..8124420c 100644 --- a/tests/pebbling_schedule_class.cpp +++ b/tests/pebbling_schedule_class.cpp @@ -32,7 +32,7 @@ limitations under the License. using namespace osp; -std::vector tiny_spaa_graphs() { +std::vector TinySpaaGraphs() { return {"data/spaa/tiny/instance_bicgstab.hdag", "data/spaa/tiny/instance_CG_N2_K2_nzP0d75.hdag", "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag", @@ -51,12 +51,12 @@ std::vector tiny_spaa_graphs() { "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag"}; } -std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } +std::vector TestArchitectures() { return {"data/machine_params/p3.arch"}; } -template -void run_test(Scheduler *test_scheduler) { - std::vector filenames_graph = tiny_spaa_graphs(); - std::vector filenames_architectures = test_architectures(); +template +void RunTest(Scheduler *testScheduler) { + std::vector filenamesGraph = TinySpaaGraphs(); + std::vector filenamesArchitectures = TestArchitectures(); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -66,78 +66,76 @@ void run_test(Scheduler *test_scheduler) { std::cout << cwd << std::endl; } - for (auto &filename_graph : filenames_graph) { - for (auto &filename_machine : filenames_architectures) { - std::string name_graph - = filename_graph.substr(filename_machine.find_last_of("/\\") + 1, filename_graph.find_last_of(".")); - std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); - name_machine = name_machine.substr(0, name_machine.rfind(".")); + for (auto &filenameGraph : filenamesGraph) { + for (auto &filenameMachine : filenamesArchitectures) { + std::string nameGraph = filenameGraph.substr(filenameMachine.find_last_of("/\\") + 1, filenameGraph.find_last_of(".")); + std::string nameMachine = filenameMachine.substr(filenameMachine.find_last_of("/\\") + 1); + nameMachine = nameMachine.substr(0, nameMachine.rfind(".")); - std::cout << std::endl << "Graph: " << name_graph << std::endl; - std::cout << "Architecture: " << name_machine << std::endl; + std::cout << std::endl << "Graph: " << nameGraph << std::endl; + std::cout << "Architecture: " << nameMachine << std::endl; - BspInstance instance; + BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / filenameGraph).string(), + instance.GetComputationalDag()); - bool status_architecture - = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool statusArchitecture + = file_reader::ReadBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.GetArchitecture()); - if (!status_graph || !status_architecture) { + if (!statusGraph || !statusArchitecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } - BspSchedule bsp_schedule(instance); + BspSchedule bspSchedule(instance); - RETURN_STATUS result = test_scheduler->computeSchedule(bsp_schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); + ReturnStatus result = testScheduler->ComputeSchedule(bspSchedule); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); - std::vector > minimum_memory_required_vector - = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); - v_memw_t max_required - = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); - instance.getArchitecture().setMemoryBound(max_required); + std::vector > minimumMemoryRequiredVector + = PebblingSchedule::MinimumMemoryRequiredPerNodeType(instance); + VMemwT maxRequired = *std::max_element(minimumMemoryRequiredVector.begin(), minimumMemoryRequiredVector.end()); + instance.GetArchitecture().SetMemoryBound(maxRequired); - PebblingSchedule memSchedule1(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::LARGEST_ID); - BOOST_CHECK_EQUAL(&memSchedule1.getInstance(), &instance); - BOOST_CHECK(memSchedule1.isValid()); + PebblingSchedule memSchedule1(bspSchedule, PebblingSchedule::CacheEvictionStrategy::LARGEST_ID); + BOOST_CHECK_EQUAL(&memSchedule1.GetInstance(), &instance); + BOOST_CHECK(memSchedule1.IsValid()); - PebblingSchedule memSchedule3(bsp_schedule, - PebblingSchedule::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED); - BOOST_CHECK(memSchedule3.isValid()); + PebblingSchedule memSchedule3(bspSchedule, + PebblingSchedule::CacheEvictionStrategy::LEAST_RECENTLY_USED); + BOOST_CHECK(memSchedule3.IsValid()); - PebblingSchedule memSchedule5(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::FORESIGHT); - BOOST_CHECK(memSchedule5.isValid()); + PebblingSchedule memSchedule5(bspSchedule, PebblingSchedule::CacheEvictionStrategy::FORESIGHT); + BOOST_CHECK(memSchedule5.IsValid()); - instance.getArchitecture().setMemoryBound(2 * max_required); + instance.GetArchitecture().SetMemoryBound(2 * maxRequired); - PebblingSchedule memSchedule2(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::LARGEST_ID); - BOOST_CHECK(memSchedule2.isValid()); + PebblingSchedule memSchedule2(bspSchedule, PebblingSchedule::CacheEvictionStrategy::LARGEST_ID); + BOOST_CHECK(memSchedule2.IsValid()); - PebblingSchedule memSchedule4(bsp_schedule, - PebblingSchedule::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED); - BOOST_CHECK(memSchedule4.isValid()); + PebblingSchedule memSchedule4(bspSchedule, + PebblingSchedule::CacheEvictionStrategy::LEAST_RECENTLY_USED); + BOOST_CHECK(memSchedule4.IsValid()); - PebblingSchedule memSchedule6(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::FORESIGHT); - BOOST_CHECK(memSchedule6.isValid()); + PebblingSchedule memSchedule6(bspSchedule, PebblingSchedule::CacheEvictionStrategy::FORESIGHT); + BOOST_CHECK(memSchedule6.IsValid()); } } } -BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test) { - GreedyBspScheduler test; - run_test(&test); +BOOST_AUTO_TEST_CASE(GreedyBspSchedulerTest) { + GreedyBspScheduler test; + RunTest(&test); } -BOOST_AUTO_TEST_CASE(test_pebbling_schedule_writer) { - using graph = computational_dag_vector_impl_def_int_t; +BOOST_AUTO_TEST_CASE(TestPebblingScheduleWriter) { + using Graph = ComputationalDagVectorImplDefIntT; - BspInstance instance; - instance.setNumberOfProcessors(3); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + BspInstance instance; + instance.SetNumberOfProcessors(3); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -147,27 +145,26 @@ BOOST_AUTO_TEST_CASE(test_pebbling_schedule_writer) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), - instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), + instance.GetComputationalDag()); BOOST_CHECK(status); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54); - BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertex_types(), 1); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertices(), 54); + BOOST_CHECK_EQUAL(instance.GetComputationalDag().NumVertexTypes(), 1); - BspSchedule bsp_schedule(instance); - GreedyBspScheduler scheduler; + BspSchedule bspSchedule(instance); + GreedyBspScheduler scheduler; - RETURN_STATUS result = scheduler.computeSchedule(bsp_schedule); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); + ReturnStatus result = scheduler.ComputeSchedule(bspSchedule); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); - std::vector > minimum_memory_required_vector - = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); - v_memw_t max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); - instance.getArchitecture().setMemoryBound(max_required + 3); + std::vector > minimumMemoryRequiredVector = PebblingSchedule::MinimumMemoryRequiredPerNodeType(instance); + VMemwT maxRequired = *std::max_element(minimumMemoryRequiredVector.begin(), minimumMemoryRequiredVector.end()); + instance.GetArchitecture().SetMemoryBound(maxRequired + 3); - PebblingSchedule memSchedule(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED); - BOOST_CHECK(memSchedule.isValid()); + PebblingSchedule memSchedule(bspSchedule, PebblingSchedule::CacheEvictionStrategy::LEAST_RECENTLY_USED); + BOOST_CHECK(memSchedule.IsValid()); std::cout << "Writing pebbling schedule" << std::endl; - file_writer::write_txt(std::cout, memSchedule); + file_writer::WriteTxt(std::cout, memSchedule); } diff --git a/tests/permutations.cpp b/tests/permutations.cpp index 05622968..cfb6a04a 100644 --- a/tests/permutations.cpp +++ b/tests/permutations.cpp @@ -26,7 +26,7 @@ limitations under the License. namespace osp { -BOOST_AUTO_TEST_CASE(In_Place_Permutation_random) { +BOOST_AUTO_TEST_CASE(InPlacePermutationRandom) { std::vector vec(20); std::iota(vec.begin(), vec.end(), 0); std::vector sol(vec); @@ -38,7 +38,7 @@ BOOST_AUTO_TEST_CASE(In_Place_Permutation_random) { std::shuffle(vec.begin(), vec.end(), gen); std::vector perm(vec); - permute_inplace(vec, perm); + PermuteInplace(vec, perm); for (std::size_t j = 0; j < sol.size(); ++j) { BOOST_CHECK_EQUAL(vec[j], sol[j]); BOOST_CHECK_EQUAL(perm[j], sol[j]); @@ -46,21 +46,21 @@ BOOST_AUTO_TEST_CASE(In_Place_Permutation_random) { } } -BOOST_AUTO_TEST_CASE(In_Place_Permutation_char) { +BOOST_AUTO_TEST_CASE(InPlacePermutationChar) { std::vector vec({'a', 'b', 'c', 'd', 'e', 'f', 'g'}); std::vector perm({4, 0, 1, 2, 3, 6, 5}); std::vector sol({'b', 'c', 'd', 'e', 'a', 'g', 'f'}); - std::vector perm_sol(perm.size()); - std::iota(perm_sol.begin(), perm_sol.end(), 0); + std::vector permSol(perm.size()); + std::iota(permSol.begin(), permSol.end(), 0); - permute_inplace(vec, perm); + PermuteInplace(vec, perm); for (std::size_t j = 0; j < sol.size(); ++j) { BOOST_CHECK_EQUAL(vec[j], sol[j]); - BOOST_CHECK_EQUAL(perm[j], perm_sol[j]); + BOOST_CHECK_EQUAL(perm[j], permSol[j]); } } -BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_random) { +BOOST_AUTO_TEST_CASE(InPlaceInversePermutationRandom) { std::vector vec(20); std::iota(vec.begin(), vec.end(), 0); std::vector sol(vec); @@ -71,30 +71,30 @@ BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_random) { for (unsigned i = 0; i < 5U; ++i) { std::shuffle(vec.begin(), vec.end(), gen); - std::vector inv_perm(vec.size()); + std::vector invPerm(vec.size()); for (unsigned j = 0; j < vec.size(); ++j) { - inv_perm[vec[j]] = j; + invPerm[vec[j]] = j; } - inverse_permute_inplace(vec, inv_perm); + InversePermuteInplace(vec, invPerm); for (std::size_t j = 0; j < sol.size(); ++j) { BOOST_CHECK_EQUAL(vec[j], sol[j]); - BOOST_CHECK_EQUAL(inv_perm[j], sol[j]); + BOOST_CHECK_EQUAL(invPerm[j], sol[j]); } } } -BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_char) { +BOOST_AUTO_TEST_CASE(InPlaceInversePermutationChar) { std::vector vec({'a', 'b', 'c', 'd', 'e', 'f', 'g'}); std::vector perm({4, 0, 1, 2, 3, 6, 5}); std::vector sol({'e', 'a', 'b', 'c', 'd', 'g', 'f'}); - std::vector perm_sol(perm.size()); - std::iota(perm_sol.begin(), perm_sol.end(), 0); + std::vector permSol(perm.size()); + std::iota(permSol.begin(), permSol.end(), 0); - inverse_permute_inplace(vec, perm); + InversePermuteInplace(vec, perm); for (std::size_t j = 0; j < sol.size(); ++j) { BOOST_CHECK_EQUAL(vec[j], sol[j]); - BOOST_CHECK_EQUAL(perm[j], perm_sol[j]); + BOOST_CHECK_EQUAL(perm[j], permSol[j]); } } diff --git a/tests/random_graph_gen.cpp b/tests/random_graph_gen.cpp index 7a7fce52..ae6173dc 100644 --- a/tests/random_graph_gen.cpp +++ b/tests/random_graph_gen.cpp @@ -32,29 +32,29 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(Erdos_Renyi_graph_test) { - std::vector graph_sizes({100, 500, 500}); - std::vector graph_chances({10, 8, 20}); +BOOST_AUTO_TEST_CASE(ErdosRenyiGraphTest) { + std::vector graphSizes({100, 500, 500}); + std::vector graphChances({10, 8, 20}); - for (size_t i = 0; i < graph_sizes.size(); i++) { - computational_dag_vector_impl_def_int_t graph; - erdos_renyi_graph_gen(graph, graph_sizes[i], graph_chances[i]); + for (size_t i = 0; i < graphSizes.size(); i++) { + ComputationalDagVectorImplDefIntT graph; + ErdosRenyiGraphGen(graph, graphSizes[i], graphChances[i]); - BOOST_CHECK_EQUAL(graph.num_vertices(), graph_sizes[i]); - BOOST_CHECK_EQUAL(is_acyclic(graph), true); + BOOST_CHECK_EQUAL(graph.NumVertices(), graphSizes[i]); + BOOST_CHECK_EQUAL(IsAcyclic(graph), true); } } -BOOST_AUTO_TEST_CASE(near_diag_random_graph_test) { - std::vector graph_sizes({100, 500, 500}); - std::vector graph_bw({10, 20, 30}); - std::vector graph_prob({0.14, 0.02, 0.07}); +BOOST_AUTO_TEST_CASE(NearDiagRandomGraphTest) { + std::vector graphSizes({100, 500, 500}); + std::vector graphBw({10, 20, 30}); + std::vector graphProb({0.14, 0.02, 0.07}); - for (size_t i = 0; i < graph_sizes.size(); i++) { - computational_dag_vector_impl_def_int_t graph; - near_diag_random_graph(graph, graph_sizes[i], graph_bw[i], graph_prob[i]); + for (size_t i = 0; i < graphSizes.size(); i++) { + ComputationalDagVectorImplDefIntT graph; + NearDiagRandomGraph(graph, graphSizes[i], graphBw[i], graphProb[i]); - BOOST_CHECK_EQUAL(graph.num_vertices(), graph_sizes[i]); - BOOST_CHECK_EQUAL(is_acyclic(graph), true); + BOOST_CHECK_EQUAL(graph.NumVertices(), graphSizes[i]); + BOOST_CHECK_EQUAL(IsAcyclic(graph), true); } } diff --git a/tests/set_operations.cpp b/tests/set_operations.cpp index 73496e6e..69325aae 100644 --- a/tests/set_operations.cpp +++ b/tests/set_operations.cpp @@ -38,41 +38,41 @@ BOOST_AUTO_TEST_CASE(SetIntersection) { std::unordered_set i({3, 2}); std::unordered_set j({1}); - BOOST_CHECK(get_intersection(a, b) == c); - BOOST_CHECK(get_intersection(b, a) == c); - BOOST_CHECK(get_intersection(c, a) == c); - BOOST_CHECK(get_intersection(g, g) == g); - BOOST_CHECK(get_intersection(a, g) == g); - BOOST_CHECK(get_intersection(a, a) == g); - BOOST_CHECK(get_intersection(a, f) == i); - BOOST_CHECK(get_intersection(a, e) == e); - BOOST_CHECK(get_intersection(d, f) == j); + BOOST_CHECK(GetIntersection(a, b) == c); + BOOST_CHECK(GetIntersection(b, a) == c); + BOOST_CHECK(GetIntersection(c, a) == c); + BOOST_CHECK(GetIntersection(g, g) == g); + BOOST_CHECK(GetIntersection(a, g) == g); + BOOST_CHECK(GetIntersection(a, a) == g); + BOOST_CHECK(GetIntersection(a, f) == i); + BOOST_CHECK(GetIntersection(a, e) == e); + BOOST_CHECK(GetIntersection(d, f) == j); } BOOST_AUTO_TEST_CASE(SetIntersectionLarge) { - std::vector iota_0_to_10k(10'000); - std::iota(iota_0_to_10k.begin(), iota_0_to_10k.end(), 0); + std::vector iota0To10k(10'000); + std::iota(iota0To10k.begin(), iota0To10k.end(), 0); - std::vector iota_10k_to_20k(10'000); - std::iota(iota_10k_to_20k.begin(), iota_10k_to_20k.end(), 10'000); + std::vector iota10kTo20k(10'000); + std::iota(iota10kTo20k.begin(), iota10kTo20k.end(), 10'000); - std::unordered_set iota_0_to_10k_set(iota_0_to_10k.begin(), iota_0_to_10k.end()); + std::unordered_set iota0To10kSet(iota0To10k.begin(), iota0To10k.end()); { // Intersection of [0,10k] and [10k,20k] --> [] - std::unordered_set iota_10k_to_20k_set(iota_10k_to_20k.begin(), iota_10k_to_20k.end()); - BOOST_CHECK(get_intersection(iota_0_to_10k_set, iota_10k_to_20k_set).empty()); + std::unordered_set iota10kTo20kSet(iota10kTo20k.begin(), iota10kTo20k.end()); + BOOST_CHECK(GetIntersection(iota0To10kSet, iota10kTo20kSet).empty()); } { // Intersection of [0,10k] and [0k,10k] --> [0k,10k] - BOOST_CHECK(get_intersection(iota_0_to_10k_set, iota_0_to_10k_set) == iota_0_to_10k_set); + BOOST_CHECK(GetIntersection(iota0To10kSet, iota0To10kSet) == iota0To10kSet); } { // Intersection of [0,10k] and [5k,10k] --> [5k,10k] - std::vector iota_5k_to_10k(5'000); - std::iota(iota_5k_to_10k.begin(), iota_5k_to_10k.end(), 5'000); - std::unordered_set iota_5k_to_10k_set(iota_5k_to_10k.begin(), iota_5k_to_10k.end()); + std::vector iota5kTo10k(5'000); + std::iota(iota5kTo10k.begin(), iota5kTo10k.end(), 5'000); + std::unordered_set iota5kTo10kSet(iota5kTo10k.begin(), iota5kTo10k.end()); - BOOST_CHECK(get_intersection(iota_0_to_10k_set, iota_5k_to_10k_set) == iota_5k_to_10k_set); + BOOST_CHECK(GetIntersection(iota0To10kSet, iota5kTo10kSet) == iota5kTo10kSet); } } @@ -90,43 +90,43 @@ BOOST_AUTO_TEST_CASE(SetUnions) { std::unordered_set k({1, 2, 3, 6, 7}); std::unordered_set l({1, 2, 3, 5}); - BOOST_CHECK(get_union(a, b) == g); - BOOST_CHECK(get_union(b, a) == a); - BOOST_CHECK(get_union(c, a) == g); - BOOST_CHECK(get_union(g, g) == g); - BOOST_CHECK(get_union(a, g) == g); - BOOST_CHECK(get_union(a, a) == g); - BOOST_CHECK(get_union(a, f) == k); - BOOST_CHECK(get_union(a, e) == a); - BOOST_CHECK(get_union(d, f) == l); + BOOST_CHECK(GetUnion(a, b) == g); + BOOST_CHECK(GetUnion(b, a) == a); + BOOST_CHECK(GetUnion(c, a) == g); + BOOST_CHECK(GetUnion(g, g) == g); + BOOST_CHECK(GetUnion(a, g) == g); + BOOST_CHECK(GetUnion(a, a) == g); + BOOST_CHECK(GetUnion(a, f) == k); + BOOST_CHECK(GetUnion(a, e) == a); + BOOST_CHECK(GetUnion(d, f) == l); } BOOST_AUTO_TEST_CASE(SetUnionLarge) { - std::vector iota_0_to_10k(10'000); - std::iota(iota_0_to_10k.begin(), iota_0_to_10k.end(), 0); + std::vector iota0To10k(10'000); + std::iota(iota0To10k.begin(), iota0To10k.end(), 0); - std::vector iota_10k_to_20k(10'000); - std::iota(iota_10k_to_20k.begin(), iota_10k_to_20k.end(), 10'000); + std::vector iota10kTo20k(10'000); + std::iota(iota10kTo20k.begin(), iota10kTo20k.end(), 10'000); - std::unordered_set iota_0_to_10k_set(iota_0_to_10k.begin(), iota_0_to_10k.end()); + std::unordered_set iota0To10kSet(iota0To10k.begin(), iota0To10k.end()); { // Union of [0,10k] and [10k,20k] --> [0k,20k] - std::unordered_set iota_10k_to_20k_set(iota_10k_to_20k.begin(), iota_10k_to_20k.end()); - std::unordered_set expected_union(iota_0_to_10k.begin(), iota_0_to_10k.end()); - expected_union.insert(iota_10k_to_20k.begin(), iota_10k_to_20k.end()); - BOOST_CHECK(get_union(iota_0_to_10k_set, iota_10k_to_20k_set) == expected_union); + std::unordered_set iota10kTo20kSet(iota10kTo20k.begin(), iota10kTo20k.end()); + std::unordered_set expectedUnion(iota0To10k.begin(), iota0To10k.end()); + expectedUnion.insert(iota10kTo20k.begin(), iota10kTo20k.end()); + BOOST_CHECK(GetUnion(iota0To10kSet, iota10kTo20kSet) == expectedUnion); } { // Union of [0,10k] and [0k,10k] --> [0k,10k] - BOOST_CHECK(get_union(iota_0_to_10k_set, iota_0_to_10k_set) == iota_0_to_10k_set); + BOOST_CHECK(GetUnion(iota0To10kSet, iota0To10kSet) == iota0To10kSet); } { // Union of [0,10k] and [5k,15k] --> [0k,15k] - std::vector iota_5k_to_15k(10'000); - std::iota(iota_5k_to_15k.begin(), iota_5k_to_15k.end(), 5'000); - std::unordered_set iota_5k_to_15k_set(iota_5k_to_15k.begin(), iota_5k_to_15k.end()); - std::unordered_set expected_union(iota_0_to_10k.begin(), iota_0_to_10k.end()); - expected_union.insert(iota_5k_to_15k.begin(), iota_5k_to_15k.end()); - BOOST_CHECK(get_union(iota_0_to_10k_set, iota_5k_to_15k_set) == expected_union); + std::vector iota5kTo15k(10'000); + std::iota(iota5kTo15k.begin(), iota5kTo15k.end(), 5'000); + std::unordered_set iota5kTo15kSet(iota5kTo15k.begin(), iota5kTo15k.end()); + std::unordered_set expectedUnion(iota0To10k.begin(), iota0To10k.end()); + expectedUnion.insert(iota5kTo15k.begin(), iota5kTo15k.end()); + BOOST_CHECK(GetUnion(iota0To10kSet, iota5kTo15kSet) == expectedUnion); } } diff --git a/tests/sorts_and_arrangements.cpp b/tests/sorts_and_arrangements.cpp index 328f228e..a44f5e6f 100644 --- a/tests/sorts_and_arrangements.cpp +++ b/tests/sorts_and_arrangements.cpp @@ -26,23 +26,23 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(Sorts_and_Arrangements1) { +BOOST_AUTO_TEST_CASE(SortsAndArrangements1) { std::vector a({4, 7, 2, -2, 4}); - std::vector neg_test1({4, 7, 2, 8, 4}); - std::vector neg_test2({8, 2, 4, 4, 7}); + std::vector negTest1({4, 7, 2, 8, 4}); + std::vector negTest2({8, 2, 4, 4, 7}); std::vector b = a; - std::vector a_sort({-2, 2, 4, 4, 7}); - std::vector a_re1({3, 2, 0, 4, 1}); - std::vector a_re2({3, 2, 4, 0, 1}); + std::vector aSort({-2, 2, 4, 4, 7}); + std::vector aRe1({3, 2, 0, 4, 1}); + std::vector aRe2({3, 2, 4, 0, 1}); - std::vector re = sort_and_sorting_arrangement(a); - BOOST_CHECK(re == a_re1 || re == a_re2); - BOOST_CHECK(a == a_sort); + std::vector re = SortAndSortingArrangement(a); + BOOST_CHECK(re == aRe1 || re == aRe2); + BOOST_CHECK(a == aSort); - BOOST_CHECK(check_vector_is_rearrangement_of_0_to_N(re)); - BOOST_CHECK(check_vector_is_rearrangement_of_0_to_N(a_re1)); - BOOST_CHECK(!check_vector_is_rearrangement_of_0_to_N(neg_test1)); - BOOST_CHECK(!check_vector_is_rearrangement_of_0_to_N(neg_test2)); + BOOST_CHECK(CheckVectorIsRearrangementOf0ToN(re)); + BOOST_CHECK(CheckVectorIsRearrangementOf0ToN(aRe1)); + BOOST_CHECK(!CheckVectorIsRearrangementOf0ToN(negTest1)); + BOOST_CHECK(!CheckVectorIsRearrangementOf0ToN(negTest2)); std::cout << "b: "; for (auto &i : b) { @@ -50,7 +50,7 @@ BOOST_AUTO_TEST_CASE(Sorts_and_Arrangements1) { } std::cout << std::endl; - sort_like_arrangement(b, re); + SortLikeArrangement(b, re); std::cout << "re: "; for (auto &i : re) { @@ -73,52 +73,52 @@ BOOST_AUTO_TEST_CASE(Sorts_and_Arrangements1) { std::cout << std::endl; } -BOOST_AUTO_TEST_CASE(Sorts_and_Arrangements2) { +BOOST_AUTO_TEST_CASE(SortsAndArrangements2) { std::vector a({"aa", "z", "b", "trace", "racket"}); std::vector c({16, 901, 2, 8, 29}); std::vector b = c; - std::vector a_sort({"b", "trace", "aa", "racket", "z"}); - std::vector c_re({2, 3, 0, 4, 1}); + std::vector aSort({"b", "trace", "aa", "racket", "z"}); + std::vector cRe({2, 3, 0, 4, 1}); - BOOST_CHECK(check_vector_is_rearrangement_of_0_to_N(c_re)); - BOOST_CHECK(!check_vector_is_rearrangement_of_0_to_N(c)); + BOOST_CHECK(CheckVectorIsRearrangementOf0ToN(cRe)); + BOOST_CHECK(!CheckVectorIsRearrangementOf0ToN(c)); - BOOST_CHECK(sorting_arrangement(c) == c_re); + BOOST_CHECK(SortingArrangement(c) == cRe); BOOST_CHECK(c == b); - sort_like(a, c); + SortLike(a, c); - BOOST_CHECK(a == a_sort); + BOOST_CHECK(a == aSort); BOOST_CHECK(c == b); } -BOOST_AUTO_TEST_CASE(Sorts_and_Arrangements3) { +BOOST_AUTO_TEST_CASE(SortsAndArrangements3) { std::vector id({0, 1, 2, 3, 4, 5, 6}); std::vector v = id; - std::vector perm_a({0, 2, 1, 3, 4, 5, 6}); - std::vector perm_b({0, 2, 1, 4, 5, 6, 3}); - std::vector perm_c({1, 2, 0, 3, 4, 5, 6}); + std::vector permA({0, 2, 1, 3, 4, 5, 6}); + std::vector permB({0, 2, 1, 4, 5, 6, 3}); + std::vector permC({1, 2, 0, 3, 4, 5, 6}); - sort_like(v, id); + SortLike(v, id); BOOST_CHECK(v == id); - sort_like(v, perm_a); - BOOST_CHECK(v == perm_a); - sort_like(v, perm_a); + SortLike(v, permA); + BOOST_CHECK(v == permA); + SortLike(v, permA); BOOST_CHECK(v == id); - sort_like(v, perm_b); - BOOST_CHECK(v != perm_b); - sort_like(v, perm_b); + SortLike(v, permB); + BOOST_CHECK(v != permB); + SortLike(v, permB); BOOST_CHECK(v != id); - sort_like(v, perm_b); - BOOST_CHECK(v == perm_b); - sort_like(v, perm_b); + SortLike(v, permB); + BOOST_CHECK(v == permB); + SortLike(v, permB); BOOST_CHECK(v == id); - sort_like(v, perm_c); - BOOST_CHECK(v != perm_c); - sort_like(v, perm_c); - BOOST_CHECK(v == perm_c); - sort_like(v, perm_c); + SortLike(v, permC); + BOOST_CHECK(v != permC); + SortLike(v, permC); + BOOST_CHECK(v == permC); + SortLike(v, permC); BOOST_CHECK(v == id); } diff --git a/tests/sparse_matrix_impl.cpp b/tests/sparse_matrix_impl.cpp index 8957fae7..0da63420 100644 --- a/tests/sparse_matrix_impl.cpp +++ b/tests/sparse_matrix_impl.cpp @@ -30,7 +30,7 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(test_sparse_matrix_adapter_1) { +BOOST_AUTO_TEST_CASE(TestSparseMatrixAdapter1) { /* ---0 @@ -57,8 +57,8 @@ BOOST_AUTO_TEST_CASE(test_sparse_matrix_adapter_1) { 6 | 0.0 10.0 11.0 0 0 12.0 0 */ - using SM_csr = Eigen::SparseMatrix; - using SM_csc = Eigen::SparseMatrix; + using SmCsr = Eigen::SparseMatrix; + using SmCsc = Eigen::SparseMatrix; using Triplet = Eigen::Triplet; const int size = 7; std::vector triplets; @@ -82,31 +82,31 @@ BOOST_AUTO_TEST_CASE(test_sparse_matrix_adapter_1) { triplets.emplace_back(6, 5, 12.0); // x6 ← x5 // Construct matrix - SM_csr L_csr(size, size); - L_csr.setFromTriplets(triplets.begin(), triplets.end()); + SmCsr lCsr(size, size); + lCsr.setFromTriplets(triplets.begin(), triplets.end()); SparseMatrixImp graph; - graph.setCSR(&L_csr); - SM_csc L_csc{}; - L_csc = L_csr; - graph.setCSC(&L_csc); - - BOOST_CHECK_EQUAL(graph.num_edges(), 11); - BOOST_CHECK_EQUAL(graph.num_vertices(), 7); - BOOST_CHECK_EQUAL(graph.in_degree(0), 0); - BOOST_CHECK_EQUAL(graph.in_degree(1), 1); - BOOST_CHECK_EQUAL(graph.in_degree(3), 2); - BOOST_CHECK_EQUAL(graph.in_degree(6), 3); - BOOST_CHECK_EQUAL(graph.out_degree(0), 4); - BOOST_CHECK_EQUAL(graph.out_degree(1), 2); - BOOST_CHECK_EQUAL(graph.out_degree(3), 1); - BOOST_CHECK_EQUAL(graph.out_degree(6), 0); - - using vertex_idx = int32_t; - - std::vector vertices{0, 1, 2, 3, 4, 5, 6}; - - std::vector> out_neighbors{ + graph.SetCsr(&lCsr); + SmCsc lCsc{}; + lCsc = lCsr; + graph.SetCsc(&lCsc); + + BOOST_CHECK_EQUAL(graph.NumEdges(), 11); + BOOST_CHECK_EQUAL(graph.NumVertices(), 7); + BOOST_CHECK_EQUAL(graph.InDegree(0), 0); + BOOST_CHECK_EQUAL(graph.InDegree(1), 1); + BOOST_CHECK_EQUAL(graph.InDegree(3), 2); + BOOST_CHECK_EQUAL(graph.InDegree(6), 3); + BOOST_CHECK_EQUAL(graph.OutDegree(0), 4); + BOOST_CHECK_EQUAL(graph.OutDegree(1), 2); + BOOST_CHECK_EQUAL(graph.OutDegree(3), 1); + BOOST_CHECK_EQUAL(graph.OutDegree(6), 0); + + using VertexIdx = int32_t; + + std::vector vertices{0, 1, 2, 3, 4, 5, 6}; + + std::vector> outNeighbors{ {1, 2, 3, 5}, {4, 6}, {3, 6}, @@ -116,7 +116,7 @@ BOOST_AUTO_TEST_CASE(test_sparse_matrix_adapter_1) { {} }; - std::vector> in_neighbors{ + std::vector> inNeighbors{ {}, {0}, {0}, @@ -128,38 +128,38 @@ BOOST_AUTO_TEST_CASE(test_sparse_matrix_adapter_1) { size_t idx = 0; - for (const long unsigned int &v : graph.vertices()) { + for (const long unsigned int &v : graph.Vertices()) { BOOST_CHECK_EQUAL(v, vertices[idx++]); size_t i = 0; const size_t vi = static_cast(v); - for (const auto &e : graph.children(v)) { - BOOST_CHECK_EQUAL(e, out_neighbors[vi][i++]); + for (const auto &e : graph.Children(v)) { + BOOST_CHECK_EQUAL(e, outNeighbors[vi][i++]); } i = 0; - for (const auto &e : graph.parents(v)) { - BOOST_CHECK_EQUAL(e, in_neighbors[vi][i++]); + for (const auto &e : graph.Parents(v)) { + BOOST_CHECK_EQUAL(e, inNeighbors[vi][i++]); } i = 0; - for (const auto &e : out_edges(v, graph)) { - BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[vi][i++]); + for (const auto &e : OutEdges(v, graph)) { + BOOST_CHECK_EQUAL(Target(e, graph), outNeighbors[vi][i++]); } i = 0; - for (const auto &e : in_edges(v, graph)) { - BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[vi][i++]); + for (const auto &e : InEdges(v, graph)) { + BOOST_CHECK_EQUAL(Source(e, graph), inNeighbors[vi][i++]); } - BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[vi].size()); - BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[vi].size()); + BOOST_CHECK_EQUAL(graph.InDegree(v), inNeighbors[vi].size()); + BOOST_CHECK_EQUAL(graph.OutDegree(v), outNeighbors[vi].size()); } unsigned count = 0; - for (const auto &e : edges(graph)) { - std::cout << e.source << " -> " << e.target << std::endl; + for (const auto &e : Edges(graph)) { + std::cout << e.source_ << " -> " << e.target_ << std::endl; count++; } BOOST_CHECK_EQUAL(count, 11); diff --git a/tests/sptrsv.cpp b/tests/sptrsv.cpp index 6bc49c93..59605ae8 100644 --- a/tests/sptrsv.cpp +++ b/tests/sptrsv.cpp @@ -39,7 +39,7 @@ limitations under the License. using namespace osp; -bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { +bool CompareVectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { std::cout << std::fixed; std::cout << std::setprecision(15); @@ -58,9 +58,9 @@ bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { return same; } -BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { - using SM_csr = Eigen::SparseMatrix; - using SM_csc = Eigen::SparseMatrix; +BOOST_AUTO_TEST_CASE(TestEigenSptrsv) { + using SmCsr = Eigen::SparseMatrix; + using SmCsc = Eigen::SparseMatrix; // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -73,179 +73,178 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { SparseMatrixImp graph; - SM_csr L_csr; - bool matrix_load_success = Eigen::loadMarket(L_csr, filename); - BOOST_CHECK(matrix_load_success); + SmCsr lCsr; + bool matrixLoadSuccess = Eigen::loadMarket(lCsr, filename); + BOOST_CHECK(matrixLoadSuccess); - if (!matrix_load_success) { + if (!matrixLoadSuccess) { std::cerr << "Failed to read matrix from " << filename << std::endl; return; } - std::cout << "Loaded matrix of size " << L_csr.rows() << " x " << L_csr.cols() << " with " << L_csr.nonZeros() - << " non-zeros.\n"; + std::cout << "Loaded matrix of size " << lCsr.rows() << " x " << lCsr.cols() << " with " << lCsr.nonZeros() << " non-zeros.\n"; - graph.setCSR(&L_csr); - SM_csc L_csc{}; - L_csc = L_csr; - graph.setCSC(&L_csc); + graph.SetCsr(&lCsr); + SmCsc lCsc{}; + lCsc = lCsr; + graph.SetCsc(&lCsc); BspArchitecture> architecture(16, 1, 500); BspInstance> instance(graph, architecture); GrowLocalAutoCores> scheduler; BspSchedule> schedule(instance); - auto result = scheduler.computeSchedule(schedule); + auto result = scheduler.ComputeSchedule(schedule); - BOOST_CHECK_EQUAL(result, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK_EQUAL(&schedule.getInstance(), &instance); - BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); + BOOST_CHECK_EQUAL(result, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK_EQUAL(&schedule.GetInstance(), &instance); + BOOST_CHECK(schedule.SatisfiesPrecedenceConstraints()); - BspScheduleCS> schedule_cs(instance); - auto result_cs = scheduler.computeScheduleCS(schedule_cs); + BspScheduleCS> scheduleCs(instance); + auto resultCs = scheduler.ComputeScheduleCS(scheduleCs); /* - for (const auto &node : instance.vertices()) { + for (const auto &node : instance.Vertices()) { std::cout << "Vertex " << node << " children:" << std::endl; - for (const auto &target : instance.getComputationalDag().children(node)) { + for (const auto &target : instance.GetComputationalDag().Children(node)) { std::cout << "target:" << target << std::endl; } std::cout << std::endl; } */ - BOOST_CHECK_EQUAL(result_cs, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK(schedule_cs.hasValidCommSchedule()); + BOOST_CHECK_EQUAL(resultCs, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK(scheduleCs.HasValidCommSchedule()); - // std::cout << "Scheduling Costs:" << schedule_cs.computeCosts() << std::endl; + // std::cout << "Scheduling Costs:" << schedule_cs.ComputeCosts() << std::endl; // std::cout << "lazy com Costs:" <(); - L_b_ref.setOnes(); // Initialize RHS vector with all ones - L_x_ref.setZero(); - L_x_ref = L_view.solve(L_b_ref); + Eigen::VectorXd lBRef, lXRef; // Declare vectors + auto n = lCsc.cols(); // Get the number of columns (assuming square matrix) + lXRef.resize(n); // Resize solution vector + lBRef.resize(n); // Resize RHS vector + auto lView = lCsc.triangularView(); + lBRef.setOnes(); // Initialize RHS vector with all ones + lXRef.setZero(); + lXRef = lView.solve(lBRef); // OSP no permutation setup Sptrsv sim{instance}; - sim.setup_csr_no_permutation(schedule_cs); + sim.SetupCsrNoPermutation(scheduleCs); // osp no permutation L_solve - auto L_x_osp = L_x_ref; - auto L_b_osp = L_b_ref; - L_b_osp.setOnes(); + auto lXOsp = lXRef; + auto lBOsp = lBRef; + lBOsp.setOnes(); // L_x_osp.setZero(); - sim.x = &L_x_osp[0]; - sim.b = &L_b_osp[0]; - sim.lsolve_no_permutation(); - BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); + sim.x_ = &lXOsp[0]; + sim.b_ = &lBOsp[0]; + sim.LsolveNoPermutation(); + BOOST_CHECK(CompareVectors(lXRef, lXOsp)); // Comparisson with osp serial L solve // Eigen - L_b_ref.setOnes(); - L_x_ref.setZero(); - L_x_ref = L_view.solve(L_b_ref); + lBRef.setOnes(); + lXRef.setZero(); + lXRef = lView.solve(lBRef); // OSP - L_b_osp.setOnes(); + lBOsp.setOnes(); // L_x_osp.setZero(); - sim.lsolve_serial(); - BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); + sim.LsolveSerial(); + BOOST_CHECK(CompareVectors(lXRef, lXOsp)); // INPLACE case eigen L solve vs osp L solve // Eigen - L_b_ref.setConstant(0.1); - L_x_ref.setConstant(0.1); - L_x_ref = L_view.solve(L_b_ref); + lBRef.setConstant(0.1); + lXRef.setConstant(0.1); + lXRef = lView.solve(lBRef); // OSP - L_x_osp.setConstant(0.1); - L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values - sim.lsolve_no_permutation_in_place(); - BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); + lXOsp.setConstant(0.1); + lBOsp.setZero(); // this will not be used as x will take the values that already has instead of the b values + sim.LsolveNoPermutationInPlace(); + BOOST_CHECK(CompareVectors(lXRef, lXOsp)); // Comparisson with osp serial in place L solve // Eigen - L_b_ref.setConstant(0.1); - L_x_ref.setConstant(0.1); - L_x_ref = L_view.solve(L_b_ref); + lBRef.setConstant(0.1); + lXRef.setConstant(0.1); + lXRef = lView.solve(lBRef); // OSP - L_x_osp.setConstant(0.1); - L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values - sim.lsolve_serial_in_place(); - BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); + lXOsp.setConstant(0.1); + lBOsp.setZero(); // this will not be used as x will take the values that already has instead of the b values + sim.LsolveSerialInPlace(); + BOOST_CHECK(CompareVectors(lXRef, lXOsp)); // Upper Solve - SM_csr U_csr = L_csc.transpose(); - SM_csc U_csc = U_csr; // Convert to column-major - Eigen::VectorXd U_b_ref(n), U_x_ref(n); - Eigen::VectorXd U_b_osp(n), U_x_osp(n); + SmCsr uCsr = lCsc.transpose(); + SmCsc uCsc = uCsr; // Convert to column-major + Eigen::VectorXd uBRef(n), uXRef(n); + Eigen::VectorXd uBOsp(n), uXOsp(n); // Eigen reference U solve - U_b_ref.setOnes(); - U_x_ref.setZero(); - auto U_view = U_csc.triangularView(); - U_x_ref = U_view.solve(U_b_ref); + uBRef.setOnes(); + uXRef.setZero(); + auto uView = uCsc.triangularView(); + uXRef = uView.solve(uBRef); // OSP U solve - U_b_osp.setOnes(); - U_x_osp.setZero(); - sim.x = &U_x_osp[0]; - sim.b = &U_b_osp[0]; - sim.usolve_no_permutation(); - BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp)); + uBOsp.setOnes(); + uXOsp.setZero(); + sim.x_ = &uXOsp[0]; + sim.b_ = &uBOsp[0]; + sim.UsolveNoPermutation(); + BOOST_CHECK(CompareVectors(uXRef, uXOsp)); // Comparisson with osp serial U solve // Eigen - U_b_ref.setOnes(); - U_x_ref.setZero(); - U_x_ref = U_view.solve(U_b_ref); + uBRef.setOnes(); + uXRef.setZero(); + uXRef = uView.solve(uBRef); // OSP - U_b_osp.setOnes(); - U_x_osp.setZero(); - sim.usolve_serial(); - BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp)); + uBOsp.setOnes(); + uXOsp.setZero(); + sim.UsolveSerial(); + BOOST_CHECK(CompareVectors(uXRef, uXOsp)); // INPLACE case eigen U solve vs osp U solve // Eigen - U_b_ref.setConstant(0.1); - U_x_ref.setConstant(0.1); - U_x_ref = U_view.solve(U_b_ref); + uBRef.setConstant(0.1); + uXRef.setConstant(0.1); + uXRef = uView.solve(uBRef); // OSP - U_x_osp.setConstant(0.1); - U_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values - sim.usolve_no_permutation_in_place(); - BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp)); + uXOsp.setConstant(0.1); + uBOsp.setZero(); // this will not be used as x will take the values that already has instead of the b values + sim.UsolveNoPermutationInPlace(); + BOOST_CHECK(CompareVectors(uXRef, uXOsp)); // Comparisson with osp serial in place U solve // Eigen - U_b_ref.setConstant(0.1); - U_x_ref.setConstant(0.1); - U_x_ref = U_view.solve(U_b_ref); + uBRef.setConstant(0.1); + uXRef.setConstant(0.1); + uXRef = uView.solve(uBRef); // OSP - U_x_osp.setConstant(0.1); - U_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values - sim.usolve_serial_in_place(); - BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp)); + uXOsp.setConstant(0.1); + uBOsp.setZero(); // this will not be used as x will take the values that already has instead of the b values + sim.UsolveSerialInPlace(); + BOOST_CHECK(CompareVectors(uXRef, uXOsp)); // Lsolve in-place With PERMUTATION - std::vector perm = schedule_node_permuter_basic(schedule_cs, LOOP_PROCESSORS); - sim.setup_csr_with_permutation(schedule_cs, perm); + std::vector perm = ScheduleNodePermuterBasic(scheduleCs, LOOP_PROCESSORS); + sim.SetupCsrWithPermutation(scheduleCs, perm); // Comparisson with osp serial in place L solve // Eigen - L_b_ref.setConstant(0.1); - L_x_ref.setConstant(0.1); - L_x_ref = L_view.solve(L_b_ref); + lBRef.setConstant(0.1); + lXRef.setConstant(0.1); + lXRef = lView.solve(lBRef); // OSP - L_x_osp.setConstant(0.1); - L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values - sim.x = &L_x_osp[0]; - sim.b = &L_b_osp[0]; + lXOsp.setConstant(0.1); + lBOsp.setZero(); // this will not be used as x will take the values that already has instead of the b values + sim.x_ = &lXOsp[0]; + sim.b_ = &lBOsp[0]; // sim.permute_x_vector(perm); - sim.lsolve_with_permutation_in_place(); + sim.LsolveWithPermutationInPlace(); - sim.permute_x_vector(perm); - BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); + sim.PermuteXVector(perm); + BOOST_CHECK(CompareVectors(lXRef, lXOsp)); } #endif diff --git a/tests/stepbystep_coarsen_and_multilevel.cpp b/tests/stepbystep_coarsen_and_multilevel.cpp index 270893e9..56bd7a50 100644 --- a/tests/stepbystep_coarsen_and_multilevel.cpp +++ b/tests/stepbystep_coarsen_and_multilevel.cpp @@ -33,9 +33,9 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(StepByStepCoarser_test) { - using graph = boost_graph_uint_t; - StepByStepCoarser test; +BOOST_AUTO_TEST_CASE(StepByStepCoarserTest) { + using Graph = BoostGraphUintT; + StepByStepCoarser test; // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -45,37 +45,37 @@ BOOST_AUTO_TEST_CASE(StepByStepCoarser_test) { std::cout << cwd << std::endl; } - graph DAG; + Graph dag; - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag").string(), DAG); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB( + (cwd / "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag").string(), dag); BOOST_CHECK(status); - StepByStepCoarser coarser; + StepByStepCoarser coarser; - coarser.setTargetNumberOfNodes(static_cast(DAG.num_vertices()) / 2); + coarser.SetTargetNumberOfNodes(static_cast(dag.NumVertices()) / 2); - graph coarsened_dag1, coarsened_dag2; - std::vector>> old_vertex_ids; - std::vector> new_vertex_id; + Graph coarsenedDag1, coarsenedDag2; + std::vector>> oldVertexIds; + std::vector> newVertexId; - coarser.coarsenDag(DAG, coarsened_dag1, new_vertex_id); - old_vertex_ids = coarser_util::invert_vertex_contraction_map(new_vertex_id); + coarser.CoarsenDag(dag, coarsenedDag1, newVertexId); + oldVertexIds = coarser_util::InvertVertexContractionMap(newVertexId); - coarser.setTargetNumberOfNodes(static_cast(DAG.num_vertices()) * 2 / 3); - coarser.coarsenForPebbling(DAG, coarsened_dag2, new_vertex_id); - old_vertex_ids = coarser_util::invert_vertex_contraction_map(new_vertex_id); + coarser.SetTargetNumberOfNodes(static_cast(dag.NumVertices()) * 2 / 3); + coarser.CoarsenForPebbling(dag, coarsenedDag2, newVertexId); + oldVertexIds = coarser_util::InvertVertexContractionMap(newVertexId); } -BOOST_AUTO_TEST_CASE(Multilevel_test) { - using graph = boost_graph_uint_t; - StepByStepCoarser test; +BOOST_AUTO_TEST_CASE(MultilevelTest) { + using Graph = BoostGraphUintT; + StepByStepCoarser test; - BspInstance instance; - instance.setNumberOfProcessors(2); - instance.setCommunicationCosts(3); - instance.setSynchronisationCosts(5); + BspInstance instance; + instance.SetNumberOfProcessors(2); + instance.SetCommunicationCosts(3); + instance.SetSynchronisationCosts(5); // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); @@ -85,25 +85,25 @@ BOOST_AUTO_TEST_CASE(Multilevel_test) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), - instance.getComputationalDag()); + bool status = file_reader::ReadComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), + instance.GetComputationalDag()); BOOST_CHECK(status); - MultiLevelHillClimbingScheduler multi1, multi2; - BspSchedule schedule1(instance), schedule2(instance); + MultiLevelHillClimbingScheduler multi1, multi2; + BspSchedule schedule1(instance), schedule2(instance); - multi1.setContractionRate(0.3); - multi1.useLinearRefinementSteps(5); + multi1.SetContractionRate(0.3); + multi1.UseLinearRefinementSteps(5); - auto result = multi1.computeSchedule(schedule1); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule1.satisfiesPrecedenceConstraints()); + auto result = multi1.ComputeSchedule(schedule1); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule1.SatisfiesPrecedenceConstraints()); - multi2.setContractionRate(0.3); - multi2.useExponentialRefinementPoints(1.2); + multi2.SetContractionRate(0.3); + multi2.UseExponentialRefinementPoints(1.2); - result = multi2.computeSchedule(schedule2); - BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints()); + result = multi2.ComputeSchedule(schedule2); + BOOST_CHECK_EQUAL(ReturnStatus::OSP_SUCCESS, result); + BOOST_CHECK(schedule2.SatisfiesPrecedenceConstraints()); } diff --git a/tests/strongly_connected_components.cpp b/tests/strongly_connected_components.cpp index 65e294e8..b3cef5c6 100644 --- a/tests/strongly_connected_components.cpp +++ b/tests/strongly_connected_components.cpp @@ -28,150 +28,150 @@ limitations under the License. // Helper function to compare SCC results. template -void check_sccs_equal(const std::vector> &result, const std::vector> &expected) { - auto to_set_of_sets = [](const std::vector> &vec_of_vecs) { - std::set> set_of_sets; - for (const auto &inner_vec : vec_of_vecs) { - set_of_sets.insert(std::set(inner_vec.begin(), inner_vec.end())); +void CheckSccsEqual(const std::vector> &result, const std::vector> &expected) { + auto toSetOfSets = [](const std::vector> &vecOfVecs) { + std::set> setOfSets; + for (const auto &innerVec : vecOfVecs) { + setOfSets.insert(std::set(innerVec.begin(), innerVec.end())); } - return set_of_sets; + return setOfSets; }; - auto result_set = to_set_of_sets(result); - auto expected_set = to_set_of_sets(expected); + auto resultSet = toSetOfSets(result); + auto expectedSet = toSetOfSets(expected); - BOOST_CHECK(result_set == expected_set); + BOOST_CHECK(resultSet == expectedSet); } -using graph = osp::computational_dag_edge_idx_vector_impl_def_int_t; -using VertexType = graph::vertex_idx; +using Graph = osp::ComputationalDagEdgeIdxVectorImplDefIntT; +using VertexType = Graph::VertexIdx; -BOOST_AUTO_TEST_SUITE(StronglyConnectedComponentsTestSuite) +BOOST_AUTO_TEST_SUITE(strongly_connected_components_test_suite) BOOST_AUTO_TEST_CASE(EmptyGraphTest) { - graph g; - auto sccs = osp::strongly_connected_components(g); + Graph g; + auto sccs = osp::StronglyConnectedComponents(g); BOOST_CHECK(sccs.empty()); } BOOST_AUTO_TEST_CASE(NoEdgesTest) { - graph g; - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); + Graph g; + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); - auto sccs = osp::strongly_connected_components(g); + auto sccs = osp::StronglyConnectedComponents(g); std::vector> expected = {{0}, {1}, {2}}; - check_sccs_equal(sccs, expected); + CheckSccsEqual(sccs, expected); } BOOST_AUTO_TEST_CASE(LineGraphTest) { - graph g; - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); - g.add_edge(0, 1); - g.add_edge(1, 2); - g.add_edge(2, 3); - - auto sccs = osp::strongly_connected_components(g); + Graph g; + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); + g.AddEdge(0, 1); + g.AddEdge(1, 2); + g.AddEdge(2, 3); + + auto sccs = osp::StronglyConnectedComponents(g); std::vector> expected = {{0}, {1}, {2}, {3}}; - check_sccs_equal(sccs, expected); + CheckSccsEqual(sccs, expected); } BOOST_AUTO_TEST_CASE(SimpleCycleTest) { - graph g; - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); - g.add_edge(0, 1); - g.add_edge(1, 2); - g.add_edge(2, 0); - - auto sccs = osp::strongly_connected_components(g); + Graph g; + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); + g.AddEdge(0, 1); + g.AddEdge(1, 2); + g.AddEdge(2, 0); + + auto sccs = osp::StronglyConnectedComponents(g); std::vector> expected = { {0, 1, 2} }; - check_sccs_equal(sccs, expected); + CheckSccsEqual(sccs, expected); } BOOST_AUTO_TEST_CASE(FullGraphIsSCCTest) { - graph g; - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); - g.add_vertex(1, 1, 1); - g.add_edge(0, 1); - g.add_edge(1, 0); - g.add_edge(1, 2); - g.add_edge(2, 1); - g.add_edge(0, 2); - g.add_edge(2, 0); - - auto sccs = osp::strongly_connected_components(g); + Graph g; + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); + g.AddVertex(1, 1, 1); + g.AddEdge(0, 1); + g.AddEdge(1, 0); + g.AddEdge(1, 2); + g.AddEdge(2, 1); + g.AddEdge(0, 2); + g.AddEdge(2, 0); + + auto sccs = osp::StronglyConnectedComponents(g); std::vector> expected = { {0, 1, 2} }; - check_sccs_equal(sccs, expected); + CheckSccsEqual(sccs, expected); } BOOST_AUTO_TEST_CASE(MultipleSCCsTest) { - graph g; + Graph g; for (int i = 0; i < 8; ++i) { - g.add_vertex(1, 1, 1); + g.AddVertex(1, 1, 1); } - g.add_edge(0, 1); - g.add_edge(1, 2); - g.add_edge(2, 0); // SCC {0,1,2} - g.add_edge(3, 4); - g.add_edge(4, 3); // SCC {3,4} - g.add_edge(5, 6); - g.add_edge(6, 5); // SCC {5,6} + g.AddEdge(0, 1); + g.AddEdge(1, 2); + g.AddEdge(2, 0); // SCC {0,1,2} + g.AddEdge(3, 4); + g.AddEdge(4, 3); // SCC {3,4} + g.AddEdge(5, 6); + g.AddEdge(6, 5); // SCC {5,6} // SCC {7} - g.add_edge(2, 3); - g.add_edge(3, 5); - g.add_edge(4, 6); - g.add_edge(5, 7); + g.AddEdge(2, 3); + g.AddEdge(3, 5); + g.AddEdge(4, 6); + g.AddEdge(5, 7); - auto sccs = osp::strongly_connected_components(g); + auto sccs = osp::StronglyConnectedComponents(g); std::vector> expected = { {0, 1, 2}, {3, 4}, {5, 6}, {7} }; - check_sccs_equal(sccs, expected); + CheckSccsEqual(sccs, expected); } BOOST_AUTO_TEST_CASE(ComplexGraphFromPaperTest) { - graph g; + Graph g; for (int i = 0; i < 8; ++i) { - g.add_vertex(1, 1, 1); + g.AddVertex(1, 1, 1); } - g.add_edge(0, 1); - g.add_edge(1, 2); - g.add_edge(1, 4); - g.add_edge(1, 5); - g.add_edge(2, 3); - g.add_edge(2, 6); - g.add_edge(3, 2); - g.add_edge(3, 7); - g.add_edge(4, 0); - g.add_edge(4, 5); - g.add_edge(5, 6); - g.add_edge(6, 5); - g.add_edge(7, 3); - g.add_edge(7, 6); - - auto sccs = osp::strongly_connected_components(g); + g.AddEdge(0, 1); + g.AddEdge(1, 2); + g.AddEdge(1, 4); + g.AddEdge(1, 5); + g.AddEdge(2, 3); + g.AddEdge(2, 6); + g.AddEdge(3, 2); + g.AddEdge(3, 7); + g.AddEdge(4, 0); + g.AddEdge(4, 5); + g.AddEdge(5, 6); + g.AddEdge(6, 5); + g.AddEdge(7, 3); + g.AddEdge(7, 6); + + auto sccs = osp::StronglyConnectedComponents(g); std::vector> expected = { {0, 1, 4}, {2, 3, 7}, {5, 6} }; - check_sccs_equal(sccs, expected); + CheckSccsEqual(sccs, expected); } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/subgraph.cpp b/tests/subgraph.cpp index 3e1d5931..eb2ff31e 100644 --- a/tests/subgraph.cpp +++ b/tests/subgraph.cpp @@ -39,21 +39,21 @@ BOOST_AUTO_TEST_CASE(SubGraphCompactSparseGraph) { {3, 7}, {1, 2} }); - Compact_Sparse_Graph graph(11, edges); - Compact_Sparse_Graph subGraph; + CompactSparseGraph graph(11, edges); + CompactSparseGraph subGraph; unsigned cntr = 0; - for (const auto &vert : graph.vertices()) { - graph.set_vertex_work_weight(vert, cntr++); - graph.set_vertex_comm_weight(vert, cntr++); - graph.set_vertex_mem_weight(vert, cntr++); - graph.set_vertex_type(vert, cntr++); + for (const auto &vert : graph.Vertices()) { + graph.SetVertexWorkWeight(vert, cntr++); + graph.SetVertexCommWeight(vert, cntr++); + graph.SetVertexMemWeight(vert, cntr++); + graph.SetVertexType(vert, cntr++); } - const std::vector>> selectVert({2, 3, 10, 6, 7}); - const auto vertCorrespondence = create_induced_subgraph_map(graph, subGraph, selectVert); - BOOST_CHECK_EQUAL(subGraph.num_vertices(), selectVert.size()); - BOOST_CHECK_EQUAL(subGraph.num_edges(), 4); + const std::vector>> selectVert({2, 3, 10, 6, 7}); + const auto vertCorrespondence = CreateInducedSubgraphMap(graph, subGraph, selectVert); + BOOST_CHECK_EQUAL(subGraph.NumVertices(), selectVert.size()); + BOOST_CHECK_EQUAL(subGraph.NumEdges(), 4); for (const auto &vert : selectVert) { BOOST_CHECK_LT(vertCorrespondence.at(vert), selectVert.size()); @@ -66,18 +66,18 @@ BOOST_AUTO_TEST_CASE(SubGraphCompactSparseGraph) { } for (const auto &vert : selectVert) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), subGraph.vertex_work_weight(vertCorrespondence.at(vert))); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), subGraph.vertex_comm_weight(vertCorrespondence.at(vert))); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), subGraph.vertex_mem_weight(vertCorrespondence.at(vert))); - BOOST_CHECK_EQUAL(graph.vertex_type(vert), subGraph.vertex_type(vertCorrespondence.at(vert))); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), subGraph.VertexWorkWeight(vertCorrespondence.at(vert))); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), subGraph.VertexCommWeight(vertCorrespondence.at(vert))); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), subGraph.VertexMemWeight(vertCorrespondence.at(vert))); + BOOST_CHECK_EQUAL(graph.VertexType(vert), subGraph.VertexType(vertCorrespondence.at(vert))); } } BOOST_AUTO_TEST_CASE(SubGraphDagVectorImpl) { - using v_impl = cdag_vertex_impl; + using VImpl = CDagVertexImpl; - computational_dag_vector_impl graph; - computational_dag_vector_impl subGraph; + ComputationalDagVectorImpl graph; + ComputationalDagVectorImpl subGraph; const std::size_t numVert = 11; const std::vector> edges({ @@ -96,17 +96,17 @@ BOOST_AUTO_TEST_CASE(SubGraphDagVectorImpl) { unsigned cntr = 0; for (std::size_t i = 0U; i < numVert; ++i) { - graph.add_vertex(cntr, cntr + 1U, cntr + 2U, cntr + 3U); + graph.AddVertex(cntr, cntr + 1U, cntr + 2U, cntr + 3U); cntr += 4U; } for (const auto &[src, tgt] : edges) { - graph.add_edge(src, tgt); + graph.AddEdge(src, tgt); } - const std::vector>> selectVert({2, 3, 10, 6, 7}); - const auto vertCorrespondence = create_induced_subgraph_map(graph, subGraph, selectVert); - BOOST_CHECK_EQUAL(subGraph.num_vertices(), selectVert.size()); - BOOST_CHECK_EQUAL(subGraph.num_edges(), 4); + const std::vector>> selectVert({2, 3, 10, 6, 7}); + const auto vertCorrespondence = CreateInducedSubgraphMap(graph, subGraph, selectVert); + BOOST_CHECK_EQUAL(subGraph.NumVertices(), selectVert.size()); + BOOST_CHECK_EQUAL(subGraph.NumEdges(), 4); for (const auto &vert : selectVert) { BOOST_CHECK_LT(vertCorrespondence.at(vert), selectVert.size()); @@ -119,9 +119,9 @@ BOOST_AUTO_TEST_CASE(SubGraphDagVectorImpl) { } for (const auto &vert : selectVert) { - BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), subGraph.vertex_work_weight(vertCorrespondence.at(vert))); - BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), subGraph.vertex_comm_weight(vertCorrespondence.at(vert))); - BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), subGraph.vertex_mem_weight(vertCorrespondence.at(vert))); - BOOST_CHECK_EQUAL(graph.vertex_type(vert), subGraph.vertex_type(vertCorrespondence.at(vert))); + BOOST_CHECK_EQUAL(graph.VertexWorkWeight(vert), subGraph.VertexWorkWeight(vertCorrespondence.at(vert))); + BOOST_CHECK_EQUAL(graph.VertexCommWeight(vert), subGraph.VertexCommWeight(vertCorrespondence.at(vert))); + BOOST_CHECK_EQUAL(graph.VertexMemWeight(vert), subGraph.VertexMemWeight(vertCorrespondence.at(vert))); + BOOST_CHECK_EQUAL(graph.VertexType(vert), subGraph.VertexType(vertCorrespondence.at(vert))); } } diff --git a/tests/test_graphs.hpp b/tests/test_graphs.hpp index fd31c74a..df2fe22f 100644 --- a/tests/test_graphs.hpp +++ b/tests/test_graphs.hpp @@ -25,7 +25,7 @@ limitations under the License. namespace osp { -std::vector tiny_spaa_graphs() { +std::vector TinySpaaGraphs() { return {"data/spaa/tiny/instance_bicgstab.hdag", "data/spaa/tiny/instance_CG_N2_K2_nzP0d75.hdag", "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag", @@ -44,14 +44,14 @@ std::vector tiny_spaa_graphs() { "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag"}; } -std::vector large_spaa_graphs() { +std::vector LargeSpaaGraphs() { return {"data/spaa/large/instance_exp_N50_K12_nzP0d15.hdag", "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag", "data/spaa/large/instance_kNN_N45_K15_nzP0d16.hdag", "data/spaa/large/instance_spmv_N120_nzP0d18.hdag"}; } -std::vector test_graphs() { +std::vector TestGraphs() { return {"data/spaa/tiny/instance_k-means.hdag", "data/spaa/tiny/instance_bicgstab.hdag", "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag"}; @@ -66,29 +66,29 @@ std::vector test_graphs() { * * Nodes at the same stage in different pipelines are identical (same work weight). * - * @tparam Graph_t The graph type to construct, must be a constructable computational DAG. + * @tparam GraphT The graph type to construct, must be a constructable computational DAG. * @param num_pipelines The number of parallel pipelines. * @param pipeline_len The length of each pipeline. - * @return A Graph_t object representing the DAG. + * @return A GraphT object representing the DAG. */ -template -inline Graph_t construct_multi_pipeline_dag(unsigned num_pipelines, unsigned pipeline_len) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); - Graph_t dag; - if (num_pipelines == 0 || pipeline_len == 0) { +template +inline GraphT ConstructMultiPipelineDag(unsigned numPipelines, unsigned pipelineLen) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG"); + GraphT dag; + if (numPipelines == 0 || pipelineLen == 0) { return dag; } - for (unsigned i = 0; i < num_pipelines; ++i) { - for (unsigned j = 0; j < pipeline_len; ++j) { + for (unsigned i = 0; i < numPipelines; ++i) { + for (unsigned j = 0; j < pipelineLen; ++j) { // Nodes at the same stage 'j' have the same work weight - dag.add_vertex(10 * (j + 1), 1, 1); + dag.AddVertex(10 * (j + 1), 1, 1); } } - for (unsigned i = 0; i < num_pipelines; ++i) { - for (unsigned j = 0; j < pipeline_len - 1; ++j) { - dag.add_edge(i * pipeline_len + j, i * pipeline_len + j + 1); + for (unsigned i = 0; i < numPipelines; ++i) { + for (unsigned j = 0; j < pipelineLen - 1; ++j) { + dag.AddEdge(i * pipelineLen + j, i * pipelineLen + j + 1); } } return dag; @@ -100,32 +100,32 @@ inline Graph_t construct_multi_pipeline_dag(unsigned num_pipelines, unsigned pip * Each rung is a complete bipartite graph K(2,2) connecting to the next rung. * All "left" side nodes are identical, and all "right" side nodes are identical. * - * @tparam Graph_t The graph type to construct. + * @tparam GraphT The graph type to construct. * @param num_rungs The number of rungs in the ladder. - * @return A Graph_t object representing the DAG. + * @return A GraphT object representing the DAG. */ -template -inline Graph_t construct_ladder_dag(unsigned num_rungs) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); - Graph_t dag; - if (num_rungs == 0) { +template +inline GraphT ConstructLadderDag(unsigned numRungs) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG"); + GraphT dag; + if (numRungs == 0) { return dag; } - for (unsigned i = 0; i < num_rungs + 1; ++i) { - dag.add_vertex(10, 1, 1); // Left side node - dag.add_vertex(20, 1, 1); // Right side node + for (unsigned i = 0; i < numRungs + 1; ++i) { + dag.AddVertex(10, 1, 1); // Left side node + dag.AddVertex(20, 1, 1); // Right side node } - for (unsigned i = 0; i < num_rungs; ++i) { + for (unsigned i = 0; i < numRungs; ++i) { auto u1 = 2 * i; auto v1 = 2 * i + 1; auto u2 = 2 * (i + 1); auto v2 = 2 * (i + 1) + 1; - dag.add_edge(u1, u2); - dag.add_edge(u1, v2); - dag.add_edge(v1, u2); - dag.add_edge(v1, v2); + dag.AddEdge(u1, u2); + dag.AddEdge(u1, v2); + dag.AddEdge(v1, u2); + dag.AddEdge(v1, v2); } return dag; } @@ -136,18 +136,18 @@ inline Graph_t construct_ladder_dag(unsigned num_rungs) { * Creates a simple chain where each node has a unique work weight, * ensuring no two nodes will be in the same initial orbit. * - * @tparam Graph_t The graph type to construct. + * @tparam GraphT The graph type to construct. * @param num_nodes The number of nodes in the chain. - * @return A Graph_t object representing the DAG. + * @return A GraphT object representing the DAG. */ -template -inline Graph_t construct_asymmetric_dag(unsigned num_nodes) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); - Graph_t dag; - for (unsigned i = 0; i < num_nodes; ++i) { - dag.add_vertex(10 * (i + 1), 1, 1); +template +inline GraphT ConstructAsymmetricDag(unsigned numNodes) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG"); + GraphT dag; + for (unsigned i = 0; i < numNodes; ++i) { + dag.AddVertex(10 * (i + 1), 1, 1); if (i > 0) { - dag.add_edge(i - 1, i); + dag.AddEdge(i - 1, i); } } return dag; @@ -155,82 +155,82 @@ inline Graph_t construct_asymmetric_dag(unsigned num_nodes) { /** * @brief Constructs a complete binary tree that fans out from a single source. - * @tparam Graph_t The graph type to construct. + * @tparam GraphT The graph type to construct. * @param height The height of the tree. A height of 0 is a single node. Total nodes: 2^(height+1) - 1. - * @return A Graph_t object representing the out-tree. + * @return A GraphT object representing the out-tree. */ -template -inline Graph_t construct_binary_out_tree(unsigned height) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); - Graph_t dag; - unsigned num_nodes = (1U << (height + 1)) - 1; - if (num_nodes == 0) { +template +inline GraphT ConstructBinaryOutTree(unsigned height) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG"); + GraphT dag; + unsigned numNodes = (1U << (height + 1)) - 1; + if (numNodes == 0) { return dag; } - for (unsigned i = 0; i < num_nodes; ++i) { - dag.add_vertex(10, 1, 1); + for (unsigned i = 0; i < numNodes; ++i) { + dag.AddVertex(10, 1, 1); } - for (unsigned i = 0; i < num_nodes / 2; ++i) { - dag.add_edge(i, 2 * i + 1); - dag.add_edge(i, 2 * i + 2); + for (unsigned i = 0; i < numNodes / 2; ++i) { + dag.AddEdge(i, 2 * i + 1); + dag.AddEdge(i, 2 * i + 2); } return dag; } /** * @brief Constructs a complete binary tree that fans into a single sink (root). - * @tparam Graph_t The graph type to construct. + * @tparam GraphT The graph type to construct. * @param height The height of the tree. A height of 0 is a single node. Total nodes: 2^(height+1) - 1. - * @return A Graph_t object representing the in-tree. + * @return A GraphT object representing the in-tree. */ -template -inline Graph_t construct_binary_in_tree(unsigned height) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); - Graph_t dag; - unsigned num_nodes = (1U << (height + 1)) - 1; - if (num_nodes == 0) { +template +inline GraphT ConstructBinaryInTree(unsigned height) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG"); + GraphT dag; + unsigned numNodes = (1U << (height + 1)) - 1; + if (numNodes == 0) { return dag; } - for (unsigned i = 0; i < num_nodes; ++i) { - dag.add_vertex(10, 1, 1); + for (unsigned i = 0; i < numNodes; ++i) { + dag.AddVertex(10, 1, 1); } - for (unsigned i = 0; i < num_nodes / 2; ++i) { - dag.add_edge(2 * i + 1, i); - dag.add_edge(2 * i + 2, i); + for (unsigned i = 0; i < numNodes / 2; ++i) { + dag.AddEdge(2 * i + 1, i); + dag.AddEdge(2 * i + 2, i); } return dag; } /** * @brief Constructs a 2D grid graph. - * @tparam Graph_t The graph type to construct. + * @tparam GraphT The graph type to construct. * @param rows The number of rows in the grid. * @param cols The number of columns in the grid. - * @return A Graph_t object representing the grid. + * @return A GraphT object representing the grid. */ -template -inline Graph_t construct_grid_dag(unsigned rows, unsigned cols) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); - Graph_t dag; +template +inline GraphT ConstructGridDag(unsigned rows, unsigned cols) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG"); + GraphT dag; if (rows == 0 || cols == 0) { return dag; } for (unsigned i = 0; i < rows * cols; ++i) { - dag.add_vertex(10, 1, 1); + dag.AddVertex(10, 1, 1); } for (unsigned r = 0; r < rows; ++r) { for (unsigned c = 0; c < cols; ++c) { if (r + 1 < rows) { - dag.add_edge(r * cols + c, (r + 1) * cols + c); + dag.AddEdge(r * cols + c, (r + 1) * cols + c); } if (c + 1 < cols) { - dag.add_edge(r * cols + c, r * cols + (c + 1)); + dag.AddEdge(r * cols + c, r * cols + (c + 1)); } } } @@ -239,30 +239,30 @@ inline Graph_t construct_grid_dag(unsigned rows, unsigned cols) { /** * @brief Constructs a butterfly graph, similar to FFT communication patterns. - * @tparam Graph_t The graph type to construct. + * @tparam GraphT The graph type to construct. * @param stages The number of stages (log2 of the number of inputs). Total nodes: (stages+1) * 2^stages. - * @return A Graph_t object representing the butterfly graph. + * @return A GraphT object representing the butterfly graph. */ -template -inline Graph_t construct_butterfly_dag(unsigned stages) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); - Graph_t dag; +template +inline GraphT ConstructButterflyDag(unsigned stages) { + static_assert(isConstructableCdagV, "GraphT must be a constructable computational DAG"); + GraphT dag; if (stages == 0) { return dag; } - unsigned N = 1U << stages; - for (unsigned i = 0; i < (stages + 1) * N; ++i) { - dag.add_vertex(10, 1, 1); + unsigned n = 1U << stages; + for (unsigned i = 0; i < (stages + 1) * n; ++i) { + dag.AddVertex(10, 1, 1); } for (unsigned s = 0; s < stages; ++s) { - for (unsigned i = 0; i < N; ++i) { - unsigned current_node = s * N + i; - unsigned next_node_straight = (s + 1) * N + i; - unsigned next_node_cross = (s + 1) * N + (i ^ (1U << (stages - 1 - s))); - dag.add_edge(current_node, next_node_straight); - dag.add_edge(current_node, next_node_cross); + for (unsigned i = 0; i < n; ++i) { + unsigned currentNode = s * n + i; + unsigned nextNodeStraight = (s + 1) * n + i; + unsigned nextNodeCross = (s + 1) * n + (i ^ (1U << (stages - 1 - s))); + dag.AddEdge(currentNode, nextNodeStraight); + dag.AddEdge(currentNode, nextNodeCross); } } return dag; diff --git a/tests/test_utils.hpp b/tests/test_utils.hpp index c496af97..06a621ba 100644 --- a/tests/test_utils.hpp +++ b/tests/test_utils.hpp @@ -18,7 +18,7 @@ limitations under the License. #include -inline std::filesystem::path get_project_root() { +inline std::filesystem::path GetProjectRoot() { std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { cwd = cwd.parent_path(); diff --git a/tests/transitive_reduction.cpp b/tests/transitive_reduction.cpp index 272b3807..edb881d2 100644 --- a/tests/transitive_reduction.cpp +++ b/tests/transitive_reduction.cpp @@ -26,47 +26,47 @@ limitations under the License. #include "test_graphs.hpp" using namespace osp; -using graph_t = computational_dag_vector_impl_def_t; +using graph_t = ComputationalDagVectorImplDefUnsignedT; -BOOST_AUTO_TEST_SUITE(TransitiveReduction) +BOOST_AUTO_TEST_SUITE(transitive_reduction) // Test with a simple chain graph that has a transitive edge BOOST_AUTO_TEST_CASE(SimpleTransitiveEdge) { graph_t dag; - dag.add_vertex(1, 1, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 1, 1); // 2 - dag.add_edge(0, 1); - dag.add_edge(1, 2); - dag.add_edge(0, 2); // Transitive edge + dag.AddVertex(1, 1, 1); // 0 + dag.AddVertex(1, 1, 1); // 1 + dag.AddVertex(1, 1, 1); // 2 + dag.AddEdge(0, 1); + dag.AddEdge(1, 2); + dag.AddEdge(0, 2); // Transitive edge - BOOST_REQUIRE_EQUAL(dag.num_vertices(), 3); - BOOST_REQUIRE_EQUAL(dag.num_edges(), 3); + BOOST_REQUIRE_EQUAL(dag.NumVertices(), 3); + BOOST_REQUIRE_EQUAL(dag.NumEdges(), 3); - graph_t reduced_sparse, reduced_dense; + graph_t reducedSparse, reduced_dense; transitive_reduction_sparse(dag, reduced_sparse); transitive_reduction_dense(dag, reduced_dense); - BOOST_CHECK_EQUAL(reduced_sparse.num_vertices(), 3); - BOOST_CHECK_EQUAL(reduced_sparse.num_edges(), 2); - BOOST_CHECK_EQUAL(reduced_dense.num_vertices(), 3); - BOOST_CHECK_EQUAL(reduced_dense.num_edges(), 2); + BOOST_CHECK_EQUAL(reduced_sparse.NumVertices(), 3); + BOOST_CHECK_EQUAL(reduced_sparse.NumEdges(), 2); + BOOST_CHECK_EQUAL(reduced_dense.NumVertices(), 3); + BOOST_CHECK_EQUAL(reduced_dense.NumEdges(), 2); BOOST_CHECK(checkOrderedIsomorphism(reduced_sparse, reduced_dense)); } // Test with a graph that has no transitive edges BOOST_AUTO_TEST_CASE(NoTransitiveEdges) { - const auto dag = construct_ladder_dag(3); // A ladder graph has no transitive edges - BOOST_REQUIRE_EQUAL(dag.num_vertices(), 8); - BOOST_REQUIRE_EQUAL(dag.num_edges(), 11); + const auto dag = ConstructLadderDag(3); // A ladder graph has no transitive edges + BOOST_REQUIRE_EQUAL(dag.NumVertices(), 8); + BOOST_REQUIRE_EQUAL(dag.NumEdges(), 11); - graph_t reduced_sparse, reduced_dense; + graph_t reducedSparse, reduced_dense; transitive_reduction_sparse(dag, reduced_sparse); transitive_reduction_dense(dag, reduced_dense); - BOOST_CHECK_EQUAL(reduced_sparse.num_edges(), dag.num_edges()); - BOOST_CHECK_EQUAL(reduced_dense.num_edges(), dag.num_edges()); + BOOST_CHECK_EQUAL(reduced_sparse.NumEdges(), dag.NumEdges()); + BOOST_CHECK_EQUAL(reduced_dense.NumEdges(), dag.NumEdges()); BOOST_CHECK(checkOrderedIsomorphism(reduced_sparse, reduced_dense)); } @@ -79,32 +79,32 @@ BOOST_AUTO_TEST_CASE(ComplexGraph) { // 2 -> 3 // 3 -> 4 // 0 -> 4 (transitive) - dag.add_vertex(1, 1, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 1, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 - dag.add_vertex(1, 1, 1); // 4 - - dag.add_edge(0, 1); - dag.add_edge(0, 2); - dag.add_edge(1, 3); - dag.add_edge(2, 3); - dag.add_edge(3, 4); + dag.AddVertex(1, 1, 1); // 0 + dag.AddVertex(1, 1, 1); // 1 + dag.AddVertex(1, 1, 1); // 2 + dag.AddVertex(1, 1, 1); // 3 + dag.AddVertex(1, 1, 1); // 4 + + dag.AddEdge(0, 1); + dag.AddEdge(0, 2); + dag.AddEdge(1, 3); + dag.AddEdge(2, 3); + dag.AddEdge(3, 4); // Add transitive edges - dag.add_edge(0, 3); // transitive via 0->1->3 or 0->2->3 - dag.add_edge(0, 4); // transitive via 0->...->3->4 + dag.AddEdge(0, 3); // transitive via 0->1->3 or 0->2->3 + dag.AddEdge(0, 4); // transitive via 0->...->3->4 - BOOST_REQUIRE_EQUAL(dag.num_vertices(), 5); - BOOST_REQUIRE_EQUAL(dag.num_edges(), 7); + BOOST_REQUIRE_EQUAL(dag.NumVertices(), 5); + BOOST_REQUIRE_EQUAL(dag.NumEdges(), 7); - graph_t reduced_sparse, reduced_dense; + graph_t reducedSparse, reduced_dense; transitive_reduction_sparse(dag, reduced_sparse); transitive_reduction_dense(dag, reduced_dense); - BOOST_CHECK_EQUAL(reduced_sparse.num_vertices(), 5); - BOOST_CHECK_EQUAL(reduced_sparse.num_edges(), 5); - BOOST_CHECK_EQUAL(reduced_dense.num_vertices(), 5); - BOOST_CHECK_EQUAL(reduced_dense.num_edges(), 5); + BOOST_CHECK_EQUAL(reduced_sparse.NumVertices(), 5); + BOOST_CHECK_EQUAL(reduced_sparse.NumEdges(), 5); + BOOST_CHECK_EQUAL(reduced_dense.NumVertices(), 5); + BOOST_CHECK_EQUAL(reduced_dense.NumEdges(), 5); BOOST_CHECK(checkOrderedIsomorphism(reduced_sparse, reduced_dense)); } diff --git a/tests/trimmed_group_scheduler.cpp b/tests/trimmed_group_scheduler.cpp index 63ba9ed9..9e597846 100644 --- a/tests/trimmed_group_scheduler.cpp +++ b/tests/trimmed_group_scheduler.cpp @@ -27,187 +27,187 @@ limitations under the License. using namespace osp; -using graph_t = computational_dag_vector_impl_def_t; +using GraphT = ComputationalDagVectorImplDefUnsignedT; // Mock SubScheduler for TrimmedGroupScheduler tests -template -class MockSubScheduler : public Scheduler { +template +class MockSubScheduler : public Scheduler { public: // This mock scheduler assigns all nodes to local processor 0 and superstep 0. // This simplifies verification of the TrimmedGroupScheduler's mapping logic. - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - for (vertex_idx_t v = 0; v < schedule.getInstance().getComputationalDag().num_vertices(); ++v) { - schedule.setAssignedProcessor(v, 0); - schedule.setAssignedSuperstep(v, 0); + ReturnStatus ComputeSchedule(BspSchedule &schedule) override { + for (VertexIdxT v = 0; v < schedule.GetInstance().GetComputationalDag().NumVertices(); ++v) { + schedule.SetAssignedProcessor(v, 0); + schedule.SetAssignedSuperstep(v, 0); } - schedule.setNumberOfSupersteps(1); - return RETURN_STATUS::OSP_SUCCESS; + schedule.SetNumberOfSupersteps(1); + return ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return "MockSubScheduler"; } + std::string GetScheduleName() const override { return "MockSubScheduler"; } }; struct TrimmedGroupSchedulerFixture { - graph_t dag; - BspArchitecture arch; - BspInstance instance; - MockSubScheduler mock_sub_scheduler; + GraphT dag_; + BspArchitecture arch_; + BspInstance instance_; + MockSubScheduler mockSubScheduler_; - TrimmedGroupSchedulerFixture() : instance(dag, arch) { + TrimmedGroupSchedulerFixture() : instance_(dag_, arch_) { // Default architecture: 1 processor type, 100 memory bound - arch.setCommunicationCosts(1); - arch.setSynchronisationCosts(1); - instance.setAllOnesCompatibilityMatrix(); // All node types compatible with all processor types + arch_.SetCommunicationCosts(1); + arch_.SetSynchronisationCosts(1); + instance_.SetAllOnesCompatibilityMatrix(); // All node types compatible with all processor types } }; -BOOST_FIXTURE_TEST_SUITE(TrimmedGroupSchedulerTestSuite, TrimmedGroupSchedulerFixture) +BOOST_FIXTURE_TEST_SUITE(trimmed_group_scheduler_test_suite, TrimmedGroupSchedulerFixture) BOOST_AUTO_TEST_CASE(EmptyGraphTest) { // Graph is empty by default - arch.setNumberOfProcessors(4); - instance.getArchitecture() = arch; + arch_.SetNumberOfProcessors(4); + instance_.GetArchitecture() = arch_; - TrimmedGroupScheduler scheduler(mock_sub_scheduler, 1); - BspSchedule schedule(instance); + TrimmedGroupScheduler scheduler(mockSubScheduler_, 1); + BspSchedule schedule(instance_); - auto status = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(status, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 0); + auto status = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(status, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 0); } BOOST_AUTO_TEST_CASE(SingleComponentSingleProcessorTypeTest) { // Graph: 0-1-2 (single component) - dag.add_vertex(1, 1, 1, 0); // 0 - dag.add_vertex(1, 1, 1, 0); // 1 - dag.add_vertex(1, 1, 1, 0); // 2 - dag.add_edge(0, 1); - dag.add_edge(1, 2); - instance.getComputationalDag() = dag; + dag_.AddVertex(1, 1, 1, 0); // 0 + dag_.AddVertex(1, 1, 1, 0); // 1 + dag_.AddVertex(1, 1, 1, 0); // 2 + dag_.AddEdge(0, 1); + dag_.AddEdge(1, 2); + instance_.GetComputationalDag() = dag_; // Architecture: 4 processors of type 0 - arch.setProcessorsWithTypes({0, 0, 0, 0}); - instance.getArchitecture() = arch; + arch_.SetProcessorsWithTypes({0, 0, 0, 0}); + instance_.GetArchitecture() = arch_; // min_non_zero_procs_ = 1 (all 4 processors assigned to this single component group) - TrimmedGroupScheduler scheduler(mock_sub_scheduler, 1); - BspSchedule schedule(instance); + TrimmedGroupScheduler scheduler(mockSubScheduler_, 1); + BspSchedule schedule(instance_); - auto status = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(status, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 1); + auto status = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(status, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 1); // MockSubScheduler assigns to local proc 0. // TrimmedGroupScheduler should map this to global proc 0. - BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(2), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(2), 0); } BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeEvenDistributionTest) { // Graph: 0-1 (component 0), 2-3 (component 1) - dag.add_vertex(1, 1, 1, 0); // 0 - dag.add_vertex(1, 1, 1, 0); // 1 - dag.add_vertex(1, 1, 1, 0); // 2 - dag.add_vertex(1, 1, 1, 0); // 3 - dag.add_edge(0, 1); - dag.add_edge(2, 3); - instance.getComputationalDag() = dag; + dag_.AddVertex(1, 1, 1, 0); // 0 + dag_.AddVertex(1, 1, 1, 0); // 1 + dag_.AddVertex(1, 1, 1, 0); // 2 + dag_.AddVertex(1, 1, 1, 0); // 3 + dag_.AddEdge(0, 1); + dag_.AddEdge(2, 3); + instance_.GetComputationalDag() = dag_; // Architecture: 4 processors of type 0 - arch.setProcessorsWithTypes({0, 0, 0, 0}); - instance.getArchitecture() = arch; + arch_.SetProcessorsWithTypes({0, 0, 0, 0}); + instance_.GetArchitecture() = arch_; // min_non_zero_procs_ = 2 (2 component groups, each gets 2 processors) - TrimmedGroupScheduler scheduler(mock_sub_scheduler, 2); - BspSchedule schedule(instance); + TrimmedGroupScheduler scheduler(mockSubScheduler_, 2); + BspSchedule schedule(instance_); - auto status = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(status, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 1); + auto status = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(status, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 1); // Component 0 (vertices 0,1) assigned to global processors 0,1. Mock scheduler uses local 0. // Global proc for group 0: offset 0 + local 0 = 0. - BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(1), 0); // Component 1 (vertices 2,3) assigned to global processors 2,3. Mock scheduler uses local 0. // Global proc for group 1: offset 2 + local 0 = 2. - BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 2); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(3), 2); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(2), 2); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(3), 2); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(3), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(2), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(3), 0); } BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeUnevenDistributionTest) { // Graph: 0 (component 0), 1 (component 1), 2 (component 2) - all isolated - dag.add_vertex(1, 1, 1, 0); // 0 - dag.add_vertex(1, 1, 1, 0); // 1 - dag.add_vertex(1, 1, 1, 0); // 2 - instance.getComputationalDag() = dag; + dag_.AddVertex(1, 1, 1, 0); // 0 + dag_.AddVertex(1, 1, 1, 0); // 1 + dag_.AddVertex(1, 1, 1, 0); // 2 + instance_.GetComputationalDag() = dag_; // Architecture: 6 processors of type 0 - arch.setProcessorsWithTypes({0, 0, 0, 0, 0, 0}); - instance.getArchitecture() = arch; + arch_.SetProcessorsWithTypes({0, 0, 0, 0, 0, 0}); + instance_.GetArchitecture() = arch_; // min_non_zero_procs_ = 2 (3 components, 2 groups) // base_count = 3 / 2 = 1, remainder = 3 % 2 = 1 // Group 0 gets 2 components (0, 1) // Group 1 gets 1 component (2) // sub_proc_counts for type 0: 6 / 2 = 3 - TrimmedGroupScheduler scheduler(mock_sub_scheduler, 2); - BspSchedule schedule(instance); + TrimmedGroupScheduler scheduler(mockSubScheduler_, 2); + BspSchedule schedule(instance_); - auto status = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(status, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 1); + auto status = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(status, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 1); // Group 0 (components 0, 1) maps to global procs 0,1,2. Mock scheduler uses local 0. // Global proc for group 0: offset 0 + local 0 = 0. - BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(1), 0); // Group 1 (component 2) maps to global procs 3,4,5. Mock scheduler uses local 0. // Global proc for group 1: offset 3 + local 0 = 3. - BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 3); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(2), 3); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(2), 0); } BOOST_AUTO_TEST_CASE(MultipleComponentsHeterogeneousArchitectureTest) { // Graph: 0 (type 0), 1 (type 1) - isolated nodes - dag.add_vertex(1, 1, 1, 0); // 0 (component 0, type 0) - dag.add_vertex(1, 1, 1, 1); // 1 (component 1, type 1) - instance.getComputationalDag() = dag; + dag_.AddVertex(1, 1, 1, 0); // 0 (component 0, type 0) + dag_.AddVertex(1, 1, 1, 1); // 1 (component 1, type 1) + instance_.GetComputationalDag() = dag_; // Architecture: 2 processors of type 0 (global 0,1), 2 processors of type 1 (global 2,3) - arch.setProcessorsWithTypes({0, 0, 1, 1}); - instance.getArchitecture() = arch; - instance.setDiagonalCompatibilityMatrix(2); // Node type 0 compatible with proc type 0, etc. + arch_.SetProcessorsWithTypes({0, 0, 1, 1}); + instance_.GetArchitecture() = arch_; + instance_.SetDiagonalCompatibilityMatrix(2); // Node type 0 compatible with proc type 0, etc. // min_non_zero_procs_ = 2 (2 components, 2 groups) // sub_proc_counts for type 0: 2 / 2 = 1 // sub_proc_counts for type 1: 2 / 2 = 1 - TrimmedGroupScheduler scheduler(mock_sub_scheduler, 2); - BspSchedule schedule(instance); + TrimmedGroupScheduler scheduler(mockSubScheduler_, 2); + BspSchedule schedule(instance_); - auto status = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(status, RETURN_STATUS::OSP_SUCCESS); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 1); + auto status = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(status, ReturnStatus::OSP_SUCCESS); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 1); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 1); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(1), 1); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(1), 0); } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/uf_structures.cpp b/tests/uf_structures.cpp index f19bc9be..fcc993e4 100644 --- a/tests/uf_structures.cpp +++ b/tests/uf_structures.cpp @@ -26,372 +26,362 @@ limitations under the License. using namespace osp; -BOOST_AUTO_TEST_CASE(Union_find_structure1) { +BOOST_AUTO_TEST_CASE(UnionFindStructure1) { std::vector names({"a", "b", "c", "d", "e", "f"}); - Union_Find_Universe test_universe(names); + UnionFindUniverse testUniverse(names); for (auto &name : names) { - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name(name), name); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName(name), name); } - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 6); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 6); - BOOST_CHECK_THROW(test_universe.add_object("a"), std::runtime_error); - BOOST_CHECK_THROW(test_universe.add_object("e"), std::runtime_error); + BOOST_CHECK_THROW(testUniverse.AddObject("a"), std::runtime_error); + BOOST_CHECK_THROW(testUniverse.AddObject("e"), std::runtime_error); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 6); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 6); - test_universe.join_by_name("a", "b"); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("b")); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 5); + testUniverse.JoinByName("a", "b"); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("b")); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 5); - test_universe.join_by_name("b", "c"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 4); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("c")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("c")); + testUniverse.JoinByName("b", "c"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 4); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("c")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("b"), testUniverse.FindOriginByName("c")); - test_universe.join_by_name("d", "b"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 3); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("d")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("d"), test_universe.find_origin_by_name("b")); + testUniverse.JoinByName("d", "b"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 3); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("d")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("d"), testUniverse.FindOriginByName("b")); - test_universe.join_by_name("a", "c"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 3); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("c")); + testUniverse.JoinByName("a", "c"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 3); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("c")); - test_universe.join_by_name("a", "d"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 3); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("d")); + testUniverse.JoinByName("a", "d"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 3); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("d")); - test_universe.join_by_name("e", "f"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 2); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("e"), test_universe.find_origin_by_name("f")); - BOOST_CHECK_NE(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("f")); + testUniverse.JoinByName("e", "f"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 2); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("e"), testUniverse.FindOriginByName("f")); + BOOST_CHECK_NE(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("f")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("c")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("c"), test_universe.find_origin_by_name("d")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("d")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("c"), test_universe.find_origin_by_name("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("b"), testUniverse.FindOriginByName("c")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("c"), testUniverse.FindOriginByName("d")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("d")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("c"), testUniverse.FindOriginByName("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("e"), test_universe.find_origin_by_name("f")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("e"), testUniverse.FindOriginByName("f")); - BOOST_CHECK_NE(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("f")); + BOOST_CHECK_NE(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("f")); } -BOOST_AUTO_TEST_CASE(Union_find_structure2) { +BOOST_AUTO_TEST_CASE(UnionFindStructure2) { std::vector names({"a", "b", "c", "d", "e", "f", "g", "h", "i"}); - Union_Find_Universe test_universe; + UnionFindUniverse testUniverse; for (auto &name : names) { - test_universe.add_object(name); + testUniverse.AddObject(name); } for (auto &name : names) { - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name(name), name); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName(name), name); } - BOOST_CHECK_THROW(test_universe.add_object("c"), std::runtime_error); - BOOST_CHECK_THROW(test_universe.add_object("i"), std::runtime_error); + BOOST_CHECK_THROW(testUniverse.AddObject("c"), std::runtime_error); + BOOST_CHECK_THROW(testUniverse.AddObject("i"), std::runtime_error); for (auto &name : names) { - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name(name), name); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName(name), name); } - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 9); - - test_universe.join_by_name("a", "b"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 8); - test_universe.join_by_name("b", "c"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 7); - test_universe.join_by_name("c", "d"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 6); - test_universe.join_by_name("d", "e"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 5); - test_universe.join_by_name("e", "f"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 4); - - test_universe.join_by_name("c", "f"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 4); - - test_universe.join_by_name("g", "h"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 3); - test_universe.join_by_name("h", "i"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 2); - - test_universe.join_by_name("b", "h"); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 1); - - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("c")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("c"), test_universe.find_origin_by_name("d")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("h")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("c"), test_universe.find_origin_by_name("i")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("f"), test_universe.find_origin_by_name("g")); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 9); + + testUniverse.JoinByName("a", "b"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 8); + testUniverse.JoinByName("b", "c"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 7); + testUniverse.JoinByName("c", "d"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 6); + testUniverse.JoinByName("d", "e"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 5); + testUniverse.JoinByName("e", "f"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 4); + + testUniverse.JoinByName("c", "f"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 4); + + testUniverse.JoinByName("g", "h"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 3); + testUniverse.JoinByName("h", "i"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 2); + + testUniverse.JoinByName("b", "h"); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 1); + + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("b"), testUniverse.FindOriginByName("c")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("c"), testUniverse.FindOriginByName("d")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("h")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("c"), testUniverse.FindOriginByName("i")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("f"), testUniverse.FindOriginByName("g")); } -BOOST_AUTO_TEST_CASE(Union_find_weight_structure) { +BOOST_AUTO_TEST_CASE(UnionFindWeightStructure) { std::vector names({"a", "b", "c", "d", "e", "f"}); std::vector weights({1, 2, 1, 3, 1, 1}); - Union_Find_Universe test_universe(names, weights, weights); + UnionFindUniverse testUniverse(names, weights, weights); for (size_t i = 0; i < names.size(); i++) { - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name(names[i]), names[i]); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name(names[i]), weights[i]); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name(names[i]), weights[i]); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName(names[i]), names[i]); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName(names[i]), weights[i]); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName(names[i]), weights[i]); } - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 6); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 6); - BOOST_CHECK_THROW(test_universe.add_object("a"), std::runtime_error); - BOOST_CHECK_THROW(test_universe.add_object("e"), std::runtime_error); + BOOST_CHECK_THROW(testUniverse.AddObject("a"), std::runtime_error); + BOOST_CHECK_THROW(testUniverse.AddObject("e"), std::runtime_error); - test_universe.join_by_name("a", "b"); - test_universe.join_by_name("b", "c"); - test_universe.join_by_name("d", "b"); - test_universe.join_by_name("a", "c"); - test_universe.join_by_name("a", "d"); + testUniverse.JoinByName("a", "b"); + testUniverse.JoinByName("b", "c"); + testUniverse.JoinByName("d", "b"); + testUniverse.JoinByName("a", "c"); + testUniverse.JoinByName("a", "d"); - test_universe.join_by_name("e", "f"); + testUniverse.JoinByName("e", "f"); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("c")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("c"), test_universe.find_origin_by_name("d")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("d")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("c"), test_universe.find_origin_by_name("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("b"), testUniverse.FindOriginByName("c")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("c"), testUniverse.FindOriginByName("d")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("d")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("c"), testUniverse.FindOriginByName("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("e"), test_universe.find_origin_by_name("f")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("e"), testUniverse.FindOriginByName("f")); - BOOST_CHECK_NE(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("f")); + BOOST_CHECK_NE(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("f")); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 7); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("b"), 7); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("e"), 2); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 7); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("b"), 7); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("e"), 2); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 7); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("e"), 2); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("b"), 7); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 7); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("e"), 2); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("b"), 7); - std::vector, unsigned>> components_n_weights - = test_universe.get_connected_components_and_weights(); - unsigned total_comp_weights = 0; - unsigned total_elements = 0; - for (auto &[comp, wt] : components_n_weights) { - total_comp_weights += wt; - total_elements += static_cast(comp.size()); + std::vector, unsigned>> componentsNWeights = testUniverse.GetConnectedComponentsAndWeights(); + unsigned totalCompWeights = 0; + unsigned totalElements = 0; + for (auto &[comp, wt] : componentsNWeights) { + totalCompWeights += wt; + totalElements += static_cast(comp.size()); for (auto &name : comp) { - BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; })); + BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string otherName) { return name == otherName; })); } } - std::vector, unsigned, unsigned>> components_n_weights_n_memory - = test_universe.get_connected_components_weights_and_memories(); - unsigned total_comp_weights_2 = 0; - unsigned total_comp_memory = 0; - unsigned total_elements_2 = 0; - for (const auto &[comp, wt, mem] : components_n_weights_n_memory) { - total_comp_weights_2 += wt; - total_comp_memory += mem; - total_elements_2 += static_cast(comp.size()); + std::vector, unsigned, unsigned>> componentsNWeightsNMemory + = testUniverse.GetConnectedComponentsWeightsAndMemories(); + unsigned totalCompWeights2 = 0; + unsigned totalCompMemory = 0; + unsigned totalElements2 = 0; + for (const auto &[comp, wt, mem] : componentsNWeightsNMemory) { + totalCompWeights2 += wt; + totalCompMemory += mem; + totalElements2 += static_cast(comp.size()); for (auto &name : comp) { - BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; })); + BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string otherName) { return name == otherName; })); } } - unsigned total_weight = 0; + unsigned totalWeight = 0; for (auto &wt : weights) { - total_weight += wt; + totalWeight += wt; } - BOOST_CHECK_EQUAL(total_elements, names.size()); - BOOST_CHECK_EQUAL(total_elements_2, names.size()); - BOOST_CHECK_EQUAL(total_weight, total_comp_weights); - BOOST_CHECK_EQUAL(total_weight, total_comp_weights_2); - BOOST_CHECK_EQUAL(total_weight, total_comp_memory); + BOOST_CHECK_EQUAL(totalElements, names.size()); + BOOST_CHECK_EQUAL(totalElements2, names.size()); + BOOST_CHECK_EQUAL(totalWeight, totalCompWeights); + BOOST_CHECK_EQUAL(totalWeight, totalCompWeights2); + BOOST_CHECK_EQUAL(totalWeight, totalCompMemory); for (auto &name : names) { - BOOST_CHECK(std::any_of(components_n_weights.cbegin(), - components_n_weights.cend(), - [name](std::pair, unsigned> comp_pair) { - return std::any_of(comp_pair.first.cbegin(), - comp_pair.first.cend(), - [name](std::string other_name) { return name == other_name; }); - })); + BOOST_CHECK(std::any_of( + componentsNWeights.cbegin(), componentsNWeights.cend(), [name](std::pair, unsigned> compPair) { + return std::any_of( + compPair.first.cbegin(), compPair.first.cend(), [name](std::string otherName) { return name == otherName; }); + })); } } -BOOST_AUTO_TEST_CASE(Union_find_structure_weight_comp_count) { +BOOST_AUTO_TEST_CASE(UnionFindStructureWeightCompCount) { std::vector names({"a", "b", "c", "d", "e", "f"}); std::vector weights({1, 2, 1, 3, 1, 1}); - Union_Find_Universe test_universe; + UnionFindUniverse testUniverse; for (size_t i = 0; i < names.size(); i++) { - test_universe.add_object(names[i], weights[i], weights[i]); + testUniverse.AddObject(names[i], weights[i], weights[i]); } for (size_t i = 0; i < names.size(); i++) { - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name(names[i]), names[i]); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name(names[i]), weights[i]); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name(names[i]), weights[i]); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName(names[i]), names[i]); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName(names[i]), weights[i]); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName(names[i]), weights[i]); } - test_universe.join_by_name("a", "b"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 3); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 3); - test_universe.join_by_name("a", "b"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 3); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 3); - test_universe.join_by_name("b", "a"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 3); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 3); - - test_universe.join_by_name("a", "c"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("c"), 4); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("c"), 4); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("c")); - - test_universe.join_by_name("d", "e"); - test_universe.join_by_name("e", "f"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("f"), 5); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("f"), 5); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("e"), test_universe.find_origin_by_name("f")); - BOOST_CHECK_NE(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("e")); - BOOST_CHECK_NE(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("d")); - - std::vector, unsigned>> comp_n_weights - = test_universe.get_connected_components_and_weights(); - BOOST_CHECK(comp_n_weights.size() == 2); - BOOST_CHECK(comp_n_weights.size() == test_universe.get_number_of_connected_components()); - BOOST_CHECK(comp_n_weights[0].first.size() == 3); - BOOST_CHECK(comp_n_weights[1].first.size() == 3); - BOOST_CHECK((comp_n_weights[0].second == 4 && comp_n_weights[1].second == 5) - || (comp_n_weights[0].second == 5 && comp_n_weights[1].second == 4)); - - std::vector, unsigned, unsigned>> comp_n_weight_n_memory - = test_universe.get_connected_components_weights_and_memories(); - BOOST_CHECK(comp_n_weight_n_memory.size() == 2); - BOOST_CHECK(comp_n_weight_n_memory.size() == test_universe.get_number_of_connected_components()); - BOOST_CHECK(std::get<0>(comp_n_weight_n_memory[0]).size() == 3); - BOOST_CHECK(std::get<0>(comp_n_weight_n_memory[1]).size() == 3); - BOOST_CHECK((std::get<1>(comp_n_weight_n_memory[0]) == 4 && std::get<1>(comp_n_weight_n_memory[1]) == 5) - || (std::get<1>(comp_n_weight_n_memory[0]) == 5 && std::get<1>(comp_n_weight_n_memory[1]) == 4)); - BOOST_CHECK((std::get<2>(comp_n_weight_n_memory[0]) == 4 && std::get<2>(comp_n_weight_n_memory[1]) == 5) - || (std::get<2>(comp_n_weight_n_memory[0]) == 5 && std::get<2>(comp_n_weight_n_memory[1]) == 4)); - - std::vector, unsigned>> components_n_weights - = test_universe.get_connected_components_and_weights(); - unsigned total_comp_weights = 0; - unsigned total_elements = 0; - for (auto &[comp, wt] : components_n_weights) { - total_comp_weights += wt; - total_elements += static_cast(comp.size()); + testUniverse.JoinByName("a", "b"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 3); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 3); + testUniverse.JoinByName("a", "b"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 3); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 3); + testUniverse.JoinByName("b", "a"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 3); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 3); + + testUniverse.JoinByName("a", "c"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("c"), 4); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("c"), 4); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("b"), testUniverse.FindOriginByName("c")); + + testUniverse.JoinByName("d", "e"); + testUniverse.JoinByName("e", "f"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("f"), 5); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("f"), 5); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("e"), testUniverse.FindOriginByName("f")); + BOOST_CHECK_NE(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("e")); + BOOST_CHECK_NE(testUniverse.FindOriginByName("b"), testUniverse.FindOriginByName("d")); + + std::vector, unsigned>> compNWeights = testUniverse.GetConnectedComponentsAndWeights(); + BOOST_CHECK(compNWeights.size() == 2); + BOOST_CHECK(compNWeights.size() == testUniverse.GetNumberOfConnectedComponents()); + BOOST_CHECK(compNWeights[0].first.size() == 3); + BOOST_CHECK(compNWeights[1].first.size() == 3); + BOOST_CHECK((compNWeights[0].second == 4 && compNWeights[1].second == 5) + || (compNWeights[0].second == 5 && compNWeights[1].second == 4)); + + std::vector, unsigned, unsigned>> compNWeightNMemory + = testUniverse.GetConnectedComponentsWeightsAndMemories(); + BOOST_CHECK(compNWeightNMemory.size() == 2); + BOOST_CHECK(compNWeightNMemory.size() == testUniverse.GetNumberOfConnectedComponents()); + BOOST_CHECK(std::get<0>(compNWeightNMemory[0]).size() == 3); + BOOST_CHECK(std::get<0>(compNWeightNMemory[1]).size() == 3); + BOOST_CHECK((std::get<1>(compNWeightNMemory[0]) == 4 && std::get<1>(compNWeightNMemory[1]) == 5) + || (std::get<1>(compNWeightNMemory[0]) == 5 && std::get<1>(compNWeightNMemory[1]) == 4)); + BOOST_CHECK((std::get<2>(compNWeightNMemory[0]) == 4 && std::get<2>(compNWeightNMemory[1]) == 5) + || (std::get<2>(compNWeightNMemory[0]) == 5 && std::get<2>(compNWeightNMemory[1]) == 4)); + + std::vector, unsigned>> componentsNWeights = testUniverse.GetConnectedComponentsAndWeights(); + unsigned totalCompWeights = 0; + unsigned totalElements = 0; + for (auto &[comp, wt] : componentsNWeights) { + totalCompWeights += wt; + totalElements += static_cast(comp.size()); for (auto &name : comp) { - BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; })); + BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string otherName) { return name == otherName; })); } } - unsigned total_weight = 0; + unsigned totalWeight = 0; for (auto &wt : weights) { - total_weight += wt; + totalWeight += wt; } - BOOST_CHECK_EQUAL(total_elements, names.size()); - BOOST_CHECK_EQUAL(total_weight, total_comp_weights); + BOOST_CHECK_EQUAL(totalElements, names.size()); + BOOST_CHECK_EQUAL(totalWeight, totalCompWeights); for (auto &name : names) { - BOOST_CHECK(std::any_of(components_n_weights.cbegin(), - components_n_weights.cend(), - [name](std::pair, unsigned> comp_pair) { - return std::any_of(comp_pair.first.cbegin(), - comp_pair.first.cend(), - [name](std::string other_name) { return name == other_name; }); - })); + BOOST_CHECK(std::any_of( + componentsNWeights.cbegin(), componentsNWeights.cend(), [name](std::pair, unsigned> compPair) { + return std::any_of( + compPair.first.cbegin(), compPair.first.cend(), [name](std::string otherName) { return name == otherName; }); + })); } } -BOOST_AUTO_TEST_CASE(Union_find_structure_weight_chains_comp_count) { +BOOST_AUTO_TEST_CASE(UnionFindStructureWeightChainsCompCount) { std::vector names({"a", "b", "c", "d", "e", "f", "g", "h", "i"}); std::vector weights({1, 1, 1, 1, 1, 1, 1, 1, 1}); - Union_Find_Universe test_universe(names, weights, weights); - - test_universe.join_by_name("a", "b"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 2); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 2); - test_universe.join_by_name("b", "c"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 3); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 3); - test_universe.join_by_name("c", "d"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("c"), 4); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("c"), 4); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("e"), 1); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("e"), 1); - test_universe.join_by_name("d", "e"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("e"), 5); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("e"), 5); - test_universe.join_by_name("a", "b"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 5); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 5); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("e"), 5); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("e"), 5); - test_universe.join_by_name("e", "f"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 6); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 6); - - test_universe.join_by_name("c", "f"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 6); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 6); - BOOST_CHECK_EQUAL(test_universe.get_number_of_connected_components(), 4); - - test_universe.join_by_name("g", "h"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("g"), 2); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("g"), 2); - - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("i"), "i"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("i"), 1); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("i"), 1); - - test_universe.join_by_name("h", "i"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("i"), 3); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("i"), 3); - - test_universe.join_by_name("b", "h"); - BOOST_CHECK_EQUAL(test_universe.get_weight_of_component_by_name("a"), 9); - BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("a"), 9); - - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("b")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("d"), test_universe.find_origin_by_name("i")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("e"), test_universe.find_origin_by_name("h")); - BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("i")); - - std::vector, unsigned>> components_n_weights - = test_universe.get_connected_components_and_weights(); - unsigned total_comp_weights = 0; - unsigned total_elements = 0; - for (auto &[comp, wt] : components_n_weights) { - total_comp_weights += wt; - total_elements += static_cast(comp.size()); + UnionFindUniverse testUniverse(names, weights, weights); + + testUniverse.JoinByName("a", "b"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 2); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 2); + testUniverse.JoinByName("b", "c"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 3); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 3); + testUniverse.JoinByName("c", "d"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("c"), 4); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("c"), 4); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("e"), 1); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("e"), 1); + testUniverse.JoinByName("d", "e"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("e"), 5); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("e"), 5); + testUniverse.JoinByName("a", "b"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 5); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 5); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("e"), 5); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("e"), 5); + testUniverse.JoinByName("e", "f"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 6); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 6); + + testUniverse.JoinByName("c", "f"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 6); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 6); + BOOST_CHECK_EQUAL(testUniverse.GetNumberOfConnectedComponents(), 4); + + testUniverse.JoinByName("g", "h"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("g"), 2); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("g"), 2); + + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("i"), "i"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("i"), 1); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("i"), 1); + + testUniverse.JoinByName("h", "i"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("i"), 3); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("i"), 3); + + testUniverse.JoinByName("b", "h"); + BOOST_CHECK_EQUAL(testUniverse.GetWeightOfComponentByName("a"), 9); + BOOST_CHECK_EQUAL(testUniverse.GetMemoryOfComponentByName("a"), 9); + + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("a"), testUniverse.FindOriginByName("b")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("d"), testUniverse.FindOriginByName("i")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("e"), testUniverse.FindOriginByName("h")); + BOOST_CHECK_EQUAL(testUniverse.FindOriginByName("b"), testUniverse.FindOriginByName("i")); + + std::vector, unsigned>> componentsNWeights = testUniverse.GetConnectedComponentsAndWeights(); + unsigned totalCompWeights = 0; + unsigned totalElements = 0; + for (auto &[comp, wt] : componentsNWeights) { + totalCompWeights += wt; + totalElements += static_cast(comp.size()); for (auto &name : comp) { - BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; })); + BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string otherName) { return name == otherName; })); } } - unsigned total_weight = 0; + unsigned totalWeight = 0; for (auto &wt : weights) { - total_weight += wt; + totalWeight += wt; } - BOOST_CHECK_EQUAL(total_elements, names.size()); - BOOST_CHECK_EQUAL(total_weight, total_comp_weights); + BOOST_CHECK_EQUAL(totalElements, names.size()); + BOOST_CHECK_EQUAL(totalWeight, totalCompWeights); for (auto &name : names) { - BOOST_CHECK(std::any_of(components_n_weights.cbegin(), - components_n_weights.cend(), - [name](std::pair, unsigned> comp_pair) { - return std::any_of(comp_pair.first.cbegin(), - comp_pair.first.cend(), - [name](std::string other_name) { return name == other_name; }); - })); + BOOST_CHECK(std::any_of( + componentsNWeights.cbegin(), componentsNWeights.cend(), [name](std::pair, unsigned> compPair) { + return std::any_of( + compPair.first.cbegin(), compPair.first.cend(), [name](std::string otherName) { return name == otherName; }); + })); } } diff --git a/tests/wavefront_component_divider.cpp b/tests/wavefront_component_divider.cpp index 37cd28fa..3ed0594f 100644 --- a/tests/wavefront_component_divider.cpp +++ b/tests/wavefront_component_divider.cpp @@ -31,28 +31,28 @@ BOOST_AUTO_TEST_CASE(VarianceSplitterTest) { // Test case 1: Clear split point std::vector seq1 = {1, 1, 1, 1, 10, 10, 10, 10}; - std::vector splits1 = splitter.split(seq1); + std::vector splits1 = splitter.Split(seq1); std::vector expected1 = {4}; BOOST_CHECK_EQUAL_COLLECTIONS(splits1.begin(), splits1.end(), expected1.begin(), expected1.end()); // Test case 2: No split needed (low variance) std::vector seq2 = {1, 1.1, 1.2, 1.1, 1.3}; - std::vector splits2 = splitter.split(seq2); + std::vector splits2 = splitter.Split(seq2); BOOST_CHECK(splits2.empty()); // Test case 3: Empty sequence std::vector seq3 = {}; - std::vector splits3 = splitter.split(seq3); + std::vector splits3 = splitter.Split(seq3); BOOST_CHECK(splits3.empty()); // Test case 4: Single element sequence std::vector seq4 = {100.0}; - std::vector splits4 = splitter.split(seq4); + std::vector splits4 = splitter.Split(seq4); BOOST_CHECK(splits4.empty()); // Test case 5: Multiple splits std::vector seq5 = {1, 1, 1, 20, 20, 20, 1, 1, 1}; - std::vector splits5 = splitter.split(seq5); + std::vector splits5 = splitter.Split(seq5); std::vector expected5 = {3, 6}; BOOST_CHECK_EQUAL_COLLECTIONS(splits5.begin(), splits5.end(), expected5.begin(), expected5.end()); } @@ -62,29 +62,29 @@ BOOST_AUTO_TEST_CASE(LargestStepSplitterTest) { // Test case 1: Clear step std::vector seq1 = {1, 2, 3, 10, 11, 12}; - std::vector splits1 = splitter.split(seq1); + std::vector splits1 = splitter.Split(seq1); std::vector expected1 = {3}; BOOST_CHECK_EQUAL_COLLECTIONS(splits1.begin(), splits1.end(), expected1.begin(), expected1.end()); // Test case 2: No significant step std::vector seq2 = {1, 2, 3, 4, 5, 6}; - std::vector splits2 = splitter.split(seq2); + std::vector splits2 = splitter.Split(seq2); BOOST_CHECK(splits2.empty()); // Test case 3: Decreasing sequence std::vector seq3 = {12, 11, 10, 3, 2, 1}; - std::vector splits3 = splitter.split(seq3); + std::vector splits3 = splitter.Split(seq3); std::vector expected3 = {3}; BOOST_CHECK_EQUAL_COLLECTIONS(splits3.begin(), splits3.end(), expected3.begin(), expected3.end()); // Test case 4: Sequence too short std::vector seq4 = {1, 10}; - std::vector splits4 = splitter.split(seq4); + std::vector splits4 = splitter.Split(seq4); BOOST_CHECK(splits4.empty()); // Test case 5: Multiple large steps std::vector seq5 = {0, 1, 10, 11, 20, 21}; - std::vector splits5 = splitter.split(seq5); + std::vector splits5 = splitter.Split(seq5); std::vector expected5 = {2, 4}; BOOST_CHECK_EQUAL_COLLECTIONS(splits5.begin(), splits5.end(), expected5.begin(), expected5.end()); } @@ -94,59 +94,59 @@ BOOST_AUTO_TEST_CASE(ThresholdScanSplitterTest) { // Test case 1: Significant drop std::vector seq1 = {20, 18, 16, 9, 8, 7}; - std::vector splits1 = splitter.split(seq1); + std::vector splits1 = splitter.Split(seq1); std::vector expected1 = {3}; BOOST_CHECK_EQUAL_COLLECTIONS(splits1.begin(), splits1.end(), expected1.begin(), expected1.end()); // Test case 2: Crossing absolute threshold (rising) std::vector seq2 = {5, 7, 9, 11, 13}; - std::vector splits2 = splitter.split(seq2); + std::vector splits2 = splitter.Split(seq2); std::vector expected2 = {3}; BOOST_CHECK_EQUAL_COLLECTIONS(splits2.begin(), splits2.end(), expected2.begin(), expected2.end()); // Test case 3: Crossing absolute threshold (dropping) std::vector seq3 = {15, 12, 11, 9, 8}; - std::vector splits3 = splitter.split(seq3); + std::vector splits3 = splitter.Split(seq3); std::vector expected3 = {3}; BOOST_CHECK_EQUAL_COLLECTIONS(splits3.begin(), splits3.end(), expected3.begin(), expected3.end()); // Test case 4: No splits std::vector seq4 = {1, 2, 3, 4, 5}; - std::vector splits4 = splitter.split(seq4); + std::vector splits4 = splitter.Split(seq4); BOOST_CHECK(splits4.empty()); // Test case 5: Empty sequence std::vector seq5 = {}; - std::vector splits5 = splitter.split(seq5); + std::vector splits5 = splitter.Split(seq5); BOOST_CHECK(splits5.empty()); } -using graph = osp::computational_dag_edge_idx_vector_impl_def_int_t; -using VertexType = graph::vertex_idx; +using Graph = osp::ComputationalDagEdgeIdxVectorImplDefIntT; +using VertexType = Graph::VertexIdx; BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) { - graph dag; - const auto v1 = dag.add_vertex(2, 1, 9); - const auto v2 = dag.add_vertex(3, 1, 8); - const auto v3 = dag.add_vertex(4, 1, 7); - const auto v4 = dag.add_vertex(5, 1, 6); - const auto v5 = dag.add_vertex(6, 1, 5); - const auto v6 = dag.add_vertex(7, 1, 4); - const auto v7 = dag.add_vertex(8, 1, 3); // Note: v7 is not connected in the example - const auto v8 = dag.add_vertex(9, 1, 2); - - dag.add_edge(v1, v2); - dag.add_edge(v1, v3); - dag.add_edge(v1, v4); - dag.add_edge(v2, v5); - dag.add_edge(v2, v6); - dag.add_edge(v3, v5); - dag.add_edge(v3, v6); - dag.add_edge(v5, v8); - dag.add_edge(v4, v8); + Graph dag; + const auto v1 = dag.AddVertex(2, 1, 9); + const auto v2 = dag.AddVertex(3, 1, 8); + const auto v3 = dag.AddVertex(4, 1, 7); + const auto v4 = dag.AddVertex(5, 1, 6); + const auto v5 = dag.AddVertex(6, 1, 5); + const auto v6 = dag.AddVertex(7, 1, 4); + const auto v7 = dag.AddVertex(8, 1, 3); // Note: v7 is not connected in the example + const auto v8 = dag.AddVertex(9, 1, 2); + + dag.AddEdge(v1, v2); + dag.AddEdge(v1, v3); + dag.AddEdge(v1, v4); + dag.AddEdge(v2, v5); + dag.AddEdge(v2, v6); + dag.AddEdge(v3, v5); + dag.AddEdge(v3, v6); + dag.AddEdge(v5, v8); + dag.AddEdge(v4, v8); // Manually defined level sets for this DAG - const std::vector> level_sets = { + const std::vector> levelSets = { {v1}, // Level 0 {v2, v3, v4}, // Level 1 {v5, v6}, // Level 2 @@ -154,80 +154,80 @@ BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) { {v7} // Level 4 (isolated vertex) }; - osp::WavefrontStatisticsCollector collector(dag, level_sets); + osp::WavefrontStatisticsCollector collector(dag, levelSets); // --- Test Forward Pass --- - auto forward_stats = collector.compute_forward(); - BOOST_REQUIRE_EQUAL(forward_stats.size(), 5); + auto forwardStats = collector.ComputeForward(); + BOOST_REQUIRE_EQUAL(forwardStats.size(), 5); // Level 0 - BOOST_CHECK_EQUAL(forward_stats[0].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(forward_stats[0].connected_components_weights[0], 2); - BOOST_CHECK_EQUAL(forward_stats[0].connected_components_memories[0], 9); + BOOST_CHECK_EQUAL(forwardStats[0].connectedComponentsVertices_.size(), 1); + BOOST_CHECK_EQUAL(forwardStats[0].connectedComponentsWeights_[0], 2); + BOOST_CHECK_EQUAL(forwardStats[0].connectedComponentsMemories_[0], 9); // Level 1 - BOOST_CHECK_EQUAL(forward_stats[1].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(forward_stats[1].connected_components_weights[0], 2 + 3 + 4 + 5); // v1,v2,v3,v4 - BOOST_CHECK_EQUAL(forward_stats[1].connected_components_memories[0], 9 + 8 + 7 + 6); + BOOST_CHECK_EQUAL(forwardStats[1].connectedComponentsVertices_.size(), 1); + BOOST_CHECK_EQUAL(forwardStats[1].connectedComponentsWeights_[0], 2 + 3 + 4 + 5); // v1,v2,v3,v4 + BOOST_CHECK_EQUAL(forwardStats[1].connectedComponentsMemories_[0], 9 + 8 + 7 + 6); // Level 2 - BOOST_CHECK_EQUAL(forward_stats[2].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(forward_stats[2].connected_components_weights[0], 14 + 6 + 7); // v1-v6 - BOOST_CHECK_EQUAL(forward_stats[2].connected_components_memories[0], 30 + 5 + 4); + BOOST_CHECK_EQUAL(forwardStats[2].connectedComponentsVertices_.size(), 1); + BOOST_CHECK_EQUAL(forwardStats[2].connectedComponentsWeights_[0], 14 + 6 + 7); // v1-v6 + BOOST_CHECK_EQUAL(forwardStats[2].connectedComponentsMemories_[0], 30 + 5 + 4); // Level 3 - BOOST_CHECK_EQUAL(forward_stats[3].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(forward_stats[3].connected_components_weights[0], 27 + 9); // v1-v6, v8 - BOOST_CHECK_EQUAL(forward_stats[3].connected_components_memories[0], 39 + 2); + BOOST_CHECK_EQUAL(forwardStats[3].connectedComponentsVertices_.size(), 1); + BOOST_CHECK_EQUAL(forwardStats[3].connectedComponentsWeights_[0], 27 + 9); // v1-v6, v8 + BOOST_CHECK_EQUAL(forwardStats[3].connectedComponentsMemories_[0], 39 + 2); // Level 4 (isolated vertex shows up as a new component) - BOOST_CHECK_EQUAL(forward_stats[4].connected_components_vertices.size(), 2); + BOOST_CHECK_EQUAL(forwardStats[4].connectedComponentsVertices_.size(), 2); // --- Test Backward Pass --- - auto backward_stats = collector.compute_backward(); - BOOST_REQUIRE_EQUAL(backward_stats.size(), 5); + auto backwardStats = collector.ComputeBackward(); + BOOST_REQUIRE_EQUAL(backwardStats.size(), 5); // Level 4 - BOOST_CHECK_EQUAL(backward_stats[4].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(backward_stats[4].connected_components_weights[0], 8); // v7 - BOOST_CHECK_EQUAL(backward_stats[4].connected_components_memories[0], 3); + BOOST_CHECK_EQUAL(backwardStats[4].connectedComponentsVertices_.size(), 1); + BOOST_CHECK_EQUAL(backwardStats[4].connectedComponentsWeights_[0], 8); // v7 + BOOST_CHECK_EQUAL(backwardStats[4].connectedComponentsMemories_[0], 3); // Level 3 - BOOST_CHECK_EQUAL(backward_stats[3].connected_components_vertices.size(), 2); // {v8}, {v7} + BOOST_CHECK_EQUAL(backwardStats[3].connectedComponentsVertices_.size(), 2); // {v8}, {v7} // Level 2 - BOOST_CHECK_EQUAL(backward_stats[2].connected_components_vertices.size(), 3); // {v5,v8}, {v6}, {v7} + BOOST_CHECK_EQUAL(backwardStats[2].connectedComponentsVertices_.size(), 3); // {v5,v8}, {v6}, {v7} // Level 1 - BOOST_CHECK_EQUAL(backward_stats[1].connected_components_vertices.size(), 2); // {v2,v3,v4,v5,v6,v8}, {v7} + BOOST_CHECK_EQUAL(backwardStats[1].connectedComponentsVertices_.size(), 2); // {v2,v3,v4,v5,v6,v8}, {v7} // Level 0 - BOOST_CHECK_EQUAL(backward_stats[0].connected_components_vertices.size(), 2); // {v1-v6,v8}, {v7} + BOOST_CHECK_EQUAL(backwardStats[0].connectedComponentsVertices_.size(), 2); // {v1-v6,v8}, {v7} } BOOST_AUTO_TEST_CASE(SequenceGenerationTest) { // --- Test Setup --- - graph dag; - const auto v1 = dag.add_vertex(2, 1, 9); - const auto v2 = dag.add_vertex(3, 1, 8); - const auto v3 = dag.add_vertex(4, 1, 7); - const auto v4 = dag.add_vertex(5, 1, 6); - const auto v5 = dag.add_vertex(6, 1, 5); - const auto v6 = dag.add_vertex(7, 1, 4); - const auto v7 = dag.add_vertex(8, 1, 3); // Isolated vertex - const auto v8 = dag.add_vertex(9, 1, 2); - - dag.add_edge(v1, v2); - dag.add_edge(v1, v3); - dag.add_edge(v1, v4); - dag.add_edge(v2, v5); - dag.add_edge(v2, v6); - dag.add_edge(v3, v5); - dag.add_edge(v3, v6); - dag.add_edge(v5, v8); - dag.add_edge(v4, v8); - - const std::vector> level_sets = { + Graph dag; + const auto v1 = dag.AddVertex(2, 1, 9); + const auto v2 = dag.AddVertex(3, 1, 8); + const auto v3 = dag.AddVertex(4, 1, 7); + const auto v4 = dag.AddVertex(5, 1, 6); + const auto v5 = dag.AddVertex(6, 1, 5); + const auto v6 = dag.AddVertex(7, 1, 4); + const auto v7 = dag.AddVertex(8, 1, 3); // Isolated vertex + const auto v8 = dag.AddVertex(9, 1, 2); + + dag.AddEdge(v1, v2); + dag.AddEdge(v1, v3); + dag.AddEdge(v1, v4); + dag.AddEdge(v2, v5); + dag.AddEdge(v2, v6); + dag.AddEdge(v3, v5); + dag.AddEdge(v3, v6); + dag.AddEdge(v5, v8); + dag.AddEdge(v4, v8); + + const std::vector> levelSets = { {v1}, {v2, v3, v4}, {v5, v6}, @@ -235,16 +235,15 @@ BOOST_AUTO_TEST_CASE(SequenceGenerationTest) { {v7} }; - osp::SequenceGenerator generator(dag, level_sets); + osp::SequenceGenerator generator(dag, levelSets); // --- Test Component Count --- - auto component_seq = generator.generate(osp::SequenceMetric::COMPONENT_COUNT); - std::vector expected_components = {1.0, 1.0, 1.0, 1.0, 2.0}; - BOOST_CHECK_EQUAL_COLLECTIONS( - component_seq.begin(), component_seq.end(), expected_components.begin(), expected_components.end()); + auto componentSeq = generator.Generate(osp::SequenceMetric::COMPONENT_COUNT); + std::vector expectedComponents = {1.0, 1.0, 1.0, 1.0, 2.0}; + BOOST_CHECK_EQUAL_COLLECTIONS(componentSeq.begin(), componentSeq.end(), expectedComponents.begin(), expectedComponents.end()); // --- Test Available Parallelism --- - auto parallelism_seq = generator.generate(osp::SequenceMetric::AVAILABLE_PARALLELISM); + auto parallelismSeq = generator.Generate(osp::SequenceMetric::AVAILABLE_PARALLELISM); // Manual calculation for expected values: // L0: 2 / 1 = 2 @@ -252,17 +251,17 @@ BOOST_AUTO_TEST_CASE(SequenceGenerationTest) { // L2: (14 + 6+7) / 3 = 27 / 3 = 9 // L3: (27 + 9) / 4 = 36 / 4 = 9 // L4: (36 + 8) / 5 = 44 / 5 = 8.8 - std::vector expected_parallelism = {2.0, 7.0, 9.0, 9.0, 8.8}; + std::vector expectedParallelism = {2.0, 7.0, 9.0, 9.0, 8.8}; - BOOST_REQUIRE_EQUAL(parallelism_seq.size(), expected_parallelism.size()); - for (size_t i = 0; i < parallelism_seq.size(); ++i) { - BOOST_CHECK_CLOSE(parallelism_seq[i], expected_parallelism[i], 1e-9); + BOOST_REQUIRE_EQUAL(parallelismSeq.size(), expectedParallelism.size()); + for (size_t i = 0; i < parallelismSeq.size(); ++i) { + BOOST_CHECK_CLOSE(parallelismSeq[i], expectedParallelism[i], 1e-9); } } struct TestFixture { - graph dag; - std::vector vertices; + Graph dag_; + std::vector vertices_; TestFixture() { // --- Test Setup --- @@ -272,37 +271,37 @@ struct TestFixture { // Level 1: {v2, v3, v4} // Level 2: {v5, v6} // Level 3: {v8} - const auto v1 = dag.add_vertex(2, 1, 9); - const auto v2 = dag.add_vertex(3, 1, 8); - const auto v3 = dag.add_vertex(4, 1, 7); - const auto v4 = dag.add_vertex(5, 1, 6); - const auto v5 = dag.add_vertex(6, 1, 5); - const auto v6 = dag.add_vertex(7, 1, 4); - const auto v7 = dag.add_vertex(8, 1, 3); // Isolated vertex - const auto v8 = dag.add_vertex(9, 1, 2); - - vertices = {v1, v2, v3, v4, v5, v6, v7, v8}; - - dag.add_edge(v1, v2); - dag.add_edge(v1, v3); - dag.add_edge(v1, v4); - dag.add_edge(v2, v5); - dag.add_edge(v2, v6); - dag.add_edge(v3, v5); - dag.add_edge(v3, v6); - dag.add_edge(v5, v8); - dag.add_edge(v4, v8); + const auto v1 = dag_.AddVertex(2, 1, 9); + const auto v2 = dag_.AddVertex(3, 1, 8); + const auto v3 = dag_.AddVertex(4, 1, 7); + const auto v4 = dag_.AddVertex(5, 1, 6); + const auto v5 = dag_.AddVertex(6, 1, 5); + const auto v6 = dag_.AddVertex(7, 1, 4); + const auto v7 = dag_.AddVertex(8, 1, 3); // Isolated vertex + const auto v8 = dag_.AddVertex(9, 1, 2); + + vertices_ = {v1, v2, v3, v4, v5, v6, v7, v8}; + + dag_.AddEdge(v1, v2); + dag_.AddEdge(v1, v3); + dag_.AddEdge(v1, v4); + dag_.AddEdge(v2, v5); + dag_.AddEdge(v2, v6); + dag_.AddEdge(v3, v5); + dag_.AddEdge(v3, v6); + dag_.AddEdge(v5, v8); + dag_.AddEdge(v4, v8); } }; -BOOST_FIXTURE_TEST_SUITE(ScanWavefrontDividerTestSuite, TestFixture) +BOOST_FIXTURE_TEST_SUITE(scan_wavefront_divider_test_suite, TestFixture) BOOST_AUTO_TEST_CASE(LargestStepDivisionTest) { - osp::ScanWavefrontDivider divider; - divider.set_metric(osp::SequenceMetric::AVAILABLE_PARALLELISM); - divider.use_largest_step_splitter(0.9, 1); + osp::ScanWavefrontDivider divider; + divider.SetMetric(osp::SequenceMetric::AVAILABLE_PARALLELISM); + divider.UseLargestStepSplitter(0.9, 1); - auto sections = divider.divide(dag); + auto sections = divider.Divide(dag_); // Expecting a cut after level 0. This results in 2 sections. BOOST_REQUIRE_EQUAL(sections.size(), 2); @@ -316,11 +315,11 @@ BOOST_AUTO_TEST_CASE(LargestStepDivisionTest) { } BOOST_AUTO_TEST_CASE(ThresholdScanDivisionTest) { - osp::ScanWavefrontDivider divider; - divider.set_metric(osp::SequenceMetric::AVAILABLE_PARALLELISM); - divider.use_threshold_scan_splitter(2.0, 11.5); + osp::ScanWavefrontDivider divider; + divider.SetMetric(osp::SequenceMetric::AVAILABLE_PARALLELISM); + divider.UseThresholdScanSplitter(2.0, 11.5); - auto sections = divider.divide(dag); + auto sections = divider.Divide(dag_); // A cut is expected when the sequence crosses 11.5 (at level 2) and crosses back (at level 3) // The splitter should return cuts at levels 2 and 3. @@ -336,11 +335,11 @@ BOOST_AUTO_TEST_CASE(ThresholdScanDivisionTest) { } BOOST_AUTO_TEST_CASE(NoCutDivisionTest) { - osp::ScanWavefrontDivider divider; - divider.set_metric(osp::SequenceMetric::COMPONENT_COUNT); - divider.use_largest_step_splitter(2.0, 2); + osp::ScanWavefrontDivider divider; + divider.SetMetric(osp::SequenceMetric::COMPONENT_COUNT); + divider.UseLargestStepSplitter(2.0, 2); - auto sections = divider.divide(dag); + auto sections = divider.Divide(dag_); // Expecting a single section containing all components BOOST_REQUIRE_EQUAL(sections.size(), 1); @@ -348,74 +347,74 @@ BOOST_AUTO_TEST_CASE(NoCutDivisionTest) { } BOOST_AUTO_TEST_CASE(EmptyGraphTest) { - osp::ScanWavefrontDivider divider; - graph empty_dag; - auto sections = divider.divide(empty_dag); + osp::ScanWavefrontDivider divider; + Graph emptyDag; + auto sections = divider.Divide(emptyDag); BOOST_CHECK(sections.empty()); } BOOST_AUTO_TEST_SUITE_END() -struct TestFixture_2 { - graph dag; - std::vector vertices; +struct TestFixture2 { + Graph dag_; + std::vector vertices_; - TestFixture_2() { + TestFixture2() { // --- Test Setup --- // This graph is designed to have a component count sequence of {2, 2, 2, 1} // to properly test the recursive divider's splitting logic. // Levels: {v1,v2}, {v3,v4}, {v5,v6}, {v7} - const auto v1 = dag.add_vertex(1, 1, 1); - const auto v2 = dag.add_vertex(1, 1, 1); - const auto v3 = dag.add_vertex(1, 1, 1); - const auto v4 = dag.add_vertex(1, 1, 1); - const auto v5 = dag.add_vertex(1, 1, 1); - const auto v6 = dag.add_vertex(1, 1, 1); - const auto v7 = dag.add_vertex(1, 1, 1); - - vertices = {v1, v2, v3, v4, v5, v6, v7}; - - dag.add_edge(v1, v3); - dag.add_edge(v2, v4); - dag.add_edge(v3, v5); - dag.add_edge(v4, v6); - dag.add_edge(v5, v7); - dag.add_edge(v6, v7); + const auto v1 = dag_.AddVertex(1, 1, 1); + const auto v2 = dag_.AddVertex(1, 1, 1); + const auto v3 = dag_.AddVertex(1, 1, 1); + const auto v4 = dag_.AddVertex(1, 1, 1); + const auto v5 = dag_.AddVertex(1, 1, 1); + const auto v6 = dag_.AddVertex(1, 1, 1); + const auto v7 = dag_.AddVertex(1, 1, 1); + + vertices_ = {v1, v2, v3, v4, v5, v6, v7}; + + dag_.AddEdge(v1, v3); + dag_.AddEdge(v2, v4); + dag_.AddEdge(v3, v5); + dag_.AddEdge(v4, v6); + dag_.AddEdge(v5, v7); + dag_.AddEdge(v6, v7); } }; -BOOST_AUTO_TEST_SUITE(RecursiveWavefrontDividerTestSuite) +BOOST_AUTO_TEST_SUITE(recursive_wavefront_divider_test_suite) // --- Test Fixture 1: A simple DAG that merges from 2 components to 1 --- -struct TestFixture_SimpleMerge { - graph dag; +struct TestFixtureSimpleMerge { + Graph dag_; - TestFixture_SimpleMerge() { + TestFixtureSimpleMerge() { // This graph is designed to have a component count sequence of {2, 2, 2, 1} // Levels: {v0,v1}, {v2,v3}, {v4,v5}, {v6} - const auto v0 = dag.add_vertex(1, 1, 1); - const auto v1 = dag.add_vertex(1, 1, 1); - const auto v2 = dag.add_vertex(1, 1, 1); - const auto v3 = dag.add_vertex(1, 1, 1); - const auto v4 = dag.add_vertex(1, 1, 1); - const auto v5 = dag.add_vertex(1, 1, 1); - const auto v6 = dag.add_vertex(1, 1, 1); - - dag.add_edge(v0, v2); - dag.add_edge(v1, v3); - dag.add_edge(v2, v4); - dag.add_edge(v3, v5); - dag.add_edge(v4, v6); - dag.add_edge(v5, v6); + const auto v0 = dag_.AddVertex(1, 1, 1); + const auto v1 = dag_.AddVertex(1, 1, 1); + const auto v2 = dag_.AddVertex(1, 1, 1); + const auto v3 = dag_.AddVertex(1, 1, 1); + const auto v4 = dag_.AddVertex(1, 1, 1); + const auto v5 = dag_.AddVertex(1, 1, 1); + const auto v6 = dag_.AddVertex(1, 1, 1); + + dag_.AddEdge(v0, v2); + dag_.AddEdge(v1, v3); + dag_.AddEdge(v2, v4); + dag_.AddEdge(v3, v5); + dag_.AddEdge(v4, v6); + dag_.AddEdge(v5, v6); } }; -BOOST_FIXTURE_TEST_SUITE(SimpleMergeTests, TestFixture_SimpleMerge) +BOOST_FIXTURE_TEST_SUITE(simple_merge_tests, TestFixtureSimpleMerge) BOOST_AUTO_TEST_CASE(BasicRecursionTest) { - osp::RecursiveWavefrontDivider divider; - divider.use_largest_step_splitter(0.5, 1); - auto sections = divider.divide(dag); + osp::RecursiveWavefrontDivider divider; + divider.UseLargestStepSplitter(0.5, 1); + auto sections = divider.Divide(dag_); // Expecting a cut after level 2, where component count drops from 2 to 1. // This results in 2 sections: {levels 0,1,2} and {level 3}. @@ -431,9 +430,9 @@ BOOST_AUTO_TEST_CASE(BasicRecursionTest) { BOOST_AUTO_TEST_CASE(NoCutHighThresholdTest) { // A high threshold should prevent any cuts. - osp::RecursiveWavefrontDivider divider; - divider.use_largest_step_splitter(2.0, 2); - auto sections = divider.divide(dag); + osp::RecursiveWavefrontDivider divider; + divider.UseLargestStepSplitter(2.0, 2); + auto sections = divider.Divide(dag_); // Expecting a single section containing all components, which merge into one. BOOST_REQUIRE_EQUAL(sections.size(), 1); @@ -442,9 +441,9 @@ BOOST_AUTO_TEST_CASE(NoCutHighThresholdTest) { BOOST_AUTO_TEST_CASE(MinSubsequenceLengthTest) { // The graph has 4 wavefronts. A min_subseq_len of 5 should prevent division. - osp::RecursiveWavefrontDivider divider; - divider.use_largest_step_splitter(0.5, 5); - auto sections = divider.divide(dag); + osp::RecursiveWavefrontDivider divider; + divider.UseLargestStepSplitter(0.5, 5); + auto sections = divider.Divide(dag_); BOOST_REQUIRE_EQUAL(sections.size(), 1); BOOST_REQUIRE_EQUAL(sections[0].size(), 1); @@ -452,63 +451,63 @@ BOOST_AUTO_TEST_CASE(MinSubsequenceLengthTest) { BOOST_AUTO_TEST_CASE(MaxDepthTest) { // Setting max_depth to 0 should prevent any recursion. - osp::RecursiveWavefrontDivider divider; - divider.use_largest_step_splitter(0.5, 2).set_max_depth(0); - auto sections = divider.divide(dag); + osp::RecursiveWavefrontDivider divider; + divider.UseLargestStepSplitter(0.5, 2).SetMaxDepth(0); + auto sections = divider.Divide(dag_); BOOST_REQUIRE_EQUAL(sections.size(), 1); BOOST_REQUIRE_EQUAL(sections[0].size(), 1); } BOOST_AUTO_TEST_CASE(EmptyGraphTest) { - osp::RecursiveWavefrontDivider divider; - graph empty_dag; - auto sections = divider.divide(empty_dag); + osp::RecursiveWavefrontDivider divider; + Graph emptyDag; + auto sections = divider.Divide(emptyDag); BOOST_CHECK(sections.empty()); } BOOST_AUTO_TEST_SUITE_END() // --- Test Fixture 2: A DAG with multiple merge points for deeper recursion --- -struct TestFixture_MultiMerge { - graph dag; +struct TestFixtureMultiMerge { + Graph dag_; - TestFixture_MultiMerge() { + TestFixtureMultiMerge() { // Sequence: {4, 4, 2, 2, 1, 1}. Two significant drops. // L0: 4 comp -> L2: 2 comp (drop of 2) // L2: 2 comp -> L4: 1 comp (drop of 1) - const auto v_l0_1 = dag.add_vertex(1, 1, 1), v_l0_2 = dag.add_vertex(1, 1, 1), v_l0_3 = dag.add_vertex(1, 1, 1), - v_l0_4 = dag.add_vertex(1, 1, 1); - const auto v_l1_1 = dag.add_vertex(1, 1, 1), v_l1_2 = dag.add_vertex(1, 1, 1), v_l1_3 = dag.add_vertex(1, 1, 1), - v_l1_4 = dag.add_vertex(1, 1, 1); - const auto v_l2_1 = dag.add_vertex(1, 1, 1), v_l2_2 = dag.add_vertex(1, 1, 1); - const auto v_l3_1 = dag.add_vertex(1, 1, 1), v_l3_2 = dag.add_vertex(1, 1, 1); - const auto v_l4_1 = dag.add_vertex(1, 1, 1); - const auto v_l5_1 = dag.add_vertex(1, 1, 1); - - dag.add_edge(v_l0_1, v_l1_1); - dag.add_edge(v_l0_2, v_l1_2); - dag.add_edge(v_l0_3, v_l1_3); - dag.add_edge(v_l0_4, v_l1_4); - dag.add_edge(v_l1_1, v_l2_1); - dag.add_edge(v_l1_2, v_l2_1); - dag.add_edge(v_l1_3, v_l2_2); - dag.add_edge(v_l1_4, v_l2_2); - dag.add_edge(v_l2_1, v_l3_1); - dag.add_edge(v_l2_2, v_l3_2); - dag.add_edge(v_l3_1, v_l4_1); - dag.add_edge(v_l3_2, v_l4_1); - dag.add_edge(v_l4_1, v_l5_1); + const auto vL01 = dag_.AddVertex(1, 1, 1), vL02 = dag_.AddVertex(1, 1, 1), vL03 = dag_.AddVertex(1, 1, 1), + vL04 = dag_.AddVertex(1, 1, 1); + const auto vL11 = dag_.AddVertex(1, 1, 1), vL12 = dag_.AddVertex(1, 1, 1), vL13 = dag_.AddVertex(1, 1, 1), + vL14 = dag_.AddVertex(1, 1, 1); + const auto vL21 = dag_.AddVertex(1, 1, 1), vL22 = dag_.AddVertex(1, 1, 1); + const auto vL31 = dag_.AddVertex(1, 1, 1), vL32 = dag_.AddVertex(1, 1, 1); + const auto vL41 = dag_.AddVertex(1, 1, 1); + const auto vL51 = dag_.AddVertex(1, 1, 1); + + dag_.AddEdge(vL01, vL11); + dag_.AddEdge(vL02, vL12); + dag_.AddEdge(vL03, vL13); + dag_.AddEdge(vL04, vL14); + dag_.AddEdge(vL11, vL21); + dag_.AddEdge(vL12, vL21); + dag_.AddEdge(vL13, vL22); + dag_.AddEdge(vL14, vL22); + dag_.AddEdge(vL21, vL31); + dag_.AddEdge(vL22, vL32); + dag_.AddEdge(vL31, vL41); + dag_.AddEdge(vL32, vL41); + dag_.AddEdge(vL41, vL51); } }; -BOOST_FIXTURE_TEST_SUITE(MultiMergeTests, TestFixture_MultiMerge) +BOOST_FIXTURE_TEST_SUITE(multi_merge_tests, TestFixtureMultiMerge) BOOST_AUTO_TEST_CASE(MultipleRecursionTest) { - osp::RecursiveWavefrontDivider divider; + osp::RecursiveWavefrontDivider divider; // Threshold is 0.5. First cut is for drop of 2.0 (4->2). Second is for drop of 1.0 (2->1). - divider.use_largest_step_splitter(0.5, 2); - auto sections = divider.divide(dag); + divider.UseLargestStepSplitter(0.5, 2); + auto sections = divider.Divide(dag_); // Expect 3 sections: // 1. Levels 0-1 (before first major cut) @@ -527,11 +526,11 @@ BOOST_AUTO_TEST_CASE(MultipleRecursionTest) { BOOST_AUTO_TEST_CASE(VarianceSplitterTest) { // This test uses the same multi-merge graph but with the variance splitter. // The sequence {4,4,2,2,1,1} has high variance and should be split. - osp::RecursiveWavefrontDivider divider; + osp::RecursiveWavefrontDivider divider; // var_mult of 0.99 ensures any reduction is accepted. // var_threshold of 0.1 ensures we start splitting. - divider.use_variance_splitter(0.99, 0.1, 2); - auto sections = divider.divide(dag); + divider.UseVarianceSplitter(0.99, 0.1, 2); + auto sections = divider.Divide(dag_); // The variance splitter should also identify the two main merge points. BOOST_REQUIRE_EQUAL(sections.size(), 3); diff --git a/tests/wavefront_divider.cpp b/tests/wavefront_divider.cpp index 40b48783..1557a457 100644 --- a/tests/wavefront_divider.cpp +++ b/tests/wavefront_divider.cpp @@ -22,8 +22,6 @@ limitations under the License. #include "osp/auxiliary/io/dot_graph_file_reader.hpp" #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" #include "osp/dag_divider/WavefrontComponentScheduler.hpp" #include "osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp" #include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp" @@ -32,86 +30,86 @@ limitations under the License. using namespace osp; -std::vector test_graphs_dot() { return {"data/dot/smpl_dot_graph_1.dot"}; } +std::vector TestGraphsDot() { return {"data/dot/smpl_dot_graph_1.dot"}; } -std::vector tiny_spaa_graphs() { +std::vector TinySpaaGraphs() { return {"data/spaa/tiny/instance_bicgstab.hdag", "data/spaa/tiny/instance_CG_N2_K2_nzP0d75.hdag", "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag", "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag"}; } -template -bool check_vertex_maps(const std::vector>>> &maps, const Graph_t &dag) { - std::unordered_set> all_vertices; +template +bool CheckVertexMaps(const std::vector>>> &maps, const GraphT &dag) { + std::unordered_set> allVertices; for (const auto &step : maps) { for (const auto &subgraph : step) { for (const auto &vertex : subgraph) { - all_vertices.insert(vertex); + allVertices.insert(vertex); } } } - return all_vertices.size() == dag.num_vertices(); + return allVertices.size() == dag.NumVertices(); } -BOOST_AUTO_TEST_CASE(wavefront_component_divider) { - std::vector filenames_graph = test_graphs_dot(); +BOOST_AUTO_TEST_CASE(WavefrontComponentDivider) { + std::vector filenamesGraph = TestGraphsDot(); - const auto project_root = get_project_root(); + const auto projectRoot = GetProjectRoot(); - using graph_t = computational_dag_edge_idx_vector_impl_def_t; + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; - for (auto &filename_graph : filenames_graph) { - BspInstance instance; - auto &graph = instance.getComputationalDag(); + for (auto &filenameGraph : filenamesGraph) { + BspInstance instance; + auto &graph = instance.GetComputationalDag(); - auto status_graph = file_reader::readComputationalDagDotFormat((project_root / filename_graph).string(), graph); + auto statusGraph = file_reader::ReadComputationalDagDotFormat((projectRoot / filenameGraph).string(), graph); - if (!status_graph) { + if (!statusGraph) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } else { - std::cout << "File read:" << filename_graph << std::endl; + std::cout << "File read:" << filenameGraph << std::endl; } - ScanWavefrontDivider wavefront; - auto maps = wavefront.divide(graph); + ScanWavefrontDivider wavefront; + auto maps = wavefront.Divide(graph); if (!maps.empty()) { - BOOST_CHECK(check_vertex_maps(maps, graph)); + BOOST_CHECK(CheckVertexMaps(maps, graph)); } } } -BOOST_AUTO_TEST_CASE(wavefront_component_parallelism_divider) { - std::vector filenames_graph = tiny_spaa_graphs(); +BOOST_AUTO_TEST_CASE(WavefrontComponentParallelismDivider) { + std::vector filenamesGraph = TinySpaaGraphs(); - const auto project_root = get_project_root(); + const auto projectRoot = GetProjectRoot(); - using graph_t = computational_dag_edge_idx_vector_impl_def_t; + using GraphT = ComputationalDagEdgeIdxVectorImplDefT; - for (auto &filename_graph : filenames_graph) { - BspInstance instance; - auto &graph = instance.getComputationalDag(); + for (auto &filenameGraph : filenamesGraph) { + BspInstance instance; + auto &graph = instance.GetComputationalDag(); - auto status_graph = file_reader::readComputationalDagHyperdagFormatDB((project_root / filename_graph).string(), graph); + auto statusGraph = file_reader::ReadComputationalDagHyperdagFormatDB((projectRoot / filenameGraph).string(), graph); - if (!status_graph) { + if (!statusGraph) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } else { - std::cout << "File read:" << filename_graph << std::endl; + std::cout << "File read:" << filenameGraph << std::endl; } - ScanWavefrontDivider wavefront; - wavefront.set_metric(SequenceMetric::AVAILABLE_PARALLELISM); - wavefront.use_variance_splitter(1.0, 1.0, 1); + ScanWavefrontDivider wavefront; + wavefront.SetMetric(SequenceMetric::AVAILABLE_PARALLELISM); + wavefront.UseVarianceSplitter(1.0, 1.0, 1); - auto maps = wavefront.divide(graph); + auto maps = wavefront.Divide(graph); if (!maps.empty()) { - BOOST_CHECK(check_vertex_maps(maps, graph)); + BOOST_CHECK(CheckVertexMaps(maps, graph)); } } } diff --git a/tests/wavefront_scheduler.cpp b/tests/wavefront_scheduler.cpp index 3fb2b203..e7e145b1 100644 --- a/tests/wavefront_scheduler.cpp +++ b/tests/wavefront_scheduler.cpp @@ -24,242 +24,242 @@ limitations under the License. #include "osp/dag_divider/WavefrontComponentScheduler.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -using graph_t = osp::computational_dag_edge_idx_vector_impl_def_t; +using GraphT = osp::ComputationalDagEdgeIdxVectorImplDefT; -template -class ConcreteWavefrontScheduler : public osp::AbstractWavefrontScheduler { +template +class ConcreteWavefrontScheduler : public osp::AbstractWavefrontScheduler { public: - ConcreteWavefrontScheduler(osp::IDagDivider &div, osp::Scheduler &sched) - : osp::AbstractWavefrontScheduler(div, sched) {} + ConcreteWavefrontScheduler(osp::IDagDivider &div, osp::Scheduler &sched) + : osp::AbstractWavefrontScheduler(div, sched) {} // Expose the protected method for testing with the new signature - bool test_distributeProcessors(unsigned total_processors, - const std::vector &work_weights, - std::vector &allocation) const { - return this->distributeProcessors(total_processors, work_weights, allocation); + bool TestDistributeProcessors(unsigned totalProcessors, + const std::vector &workWeights, + std::vector &allocation) const { + return this->DistributeProcessors(totalProcessors, workWeights, allocation); } // Dummy implementation for the pure virtual method - osp::RETURN_STATUS computeSchedule(osp::BspSchedule &) override { return osp::RETURN_STATUS::OSP_SUCCESS; } + osp::ReturnStatus ComputeSchedule(osp::BspSchedule &) override { return osp::ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return "ConcreteScheduler"; } + std::string GetScheduleName() const override { return "ConcreteScheduler"; } }; // Mock dependencies for the test -struct MockDivider : public osp::IDagDivider { - std::vector>> divide(const graph_t &) override { return {}; } +struct MockDivider : public osp::IDagDivider { + std::vector>> Divide(const GraphT &) override { return {}; } }; -struct MockScheduler : public osp::Scheduler { - osp::RETURN_STATUS computeSchedule(osp::BspSchedule &) override { return osp::RETURN_STATUS::OSP_SUCCESS; } +struct MockScheduler : public osp::Scheduler { + osp::ReturnStatus ComputeSchedule(osp::BspSchedule &) override { return osp::ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return "Mock"; } + std::string GetScheduleName() const override { return "Mock"; } }; -BOOST_AUTO_TEST_SUITE(AbstractWavefrontSchedulerTestSuite) +BOOST_AUTO_TEST_SUITE(abstract_wavefront_scheduler_test_suite) BOOST_AUTO_TEST_CASE(DistributeProcessorsTest) { - MockDivider mock_divider; - MockScheduler mock_scheduler; - ConcreteWavefrontScheduler scheduler(mock_divider, mock_scheduler); + MockDivider mockDivider; + MockScheduler mockScheduler; + ConcreteWavefrontScheduler scheduler(mockDivider, mockScheduler); std::vector allocation; - bool starvation_hit; + bool starvationHit; // Test 1: Proportional distribution with anti-starvation (Abundance) std::vector work1 = {100.0, 200.0, 700.0}; - starvation_hit = scheduler.test_distributeProcessors(10, work1, allocation); + starvationHit = scheduler.TestDistributeProcessors(10, work1, allocation); std::vector expected1 = {1, 2, 7}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected1.begin(), expected1.end()); - BOOST_CHECK(!starvation_hit); + BOOST_CHECK(!starvationHit); // Test 2: Proportional with remainders and anti-starvation (Abundance) std::vector work2 = {10.0, 10.0, 10.0, 70.0}; - starvation_hit = scheduler.test_distributeProcessors(10, work2, allocation); + starvationHit = scheduler.TestDistributeProcessors(10, work2, allocation); std::vector expected2 = {1, 1, 1, 7}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected2.begin(), expected2.end()); - BOOST_CHECK(!starvation_hit); + BOOST_CHECK(!starvationHit); // Test 3: Scarcity case (fewer processors than components) std::vector work3 = {50.0, 100.0, 20.0, 80.0}; - starvation_hit = scheduler.test_distributeProcessors(2, work3, allocation); + starvationHit = scheduler.TestDistributeProcessors(2, work3, allocation); std::vector expected3 = {0, 1, 0, 1}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected3.begin(), expected3.end()); - BOOST_CHECK(starvation_hit); + BOOST_CHECK(starvationHit); // Test 4: More processors than components, with remainders (Abundance) std::vector work4 = {10, 90}; - starvation_hit = scheduler.test_distributeProcessors(12, work4, allocation); + starvationHit = scheduler.TestDistributeProcessors(12, work4, allocation); std::vector expected4 = {1, 11}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected4.begin(), expected4.end()); - BOOST_CHECK(!starvation_hit); + BOOST_CHECK(!starvationHit); // Test 5: Edge case - zero processors std::vector work5 = {100.0, 200.0}; - starvation_hit = scheduler.test_distributeProcessors(0, work5, allocation); + starvationHit = scheduler.TestDistributeProcessors(0, work5, allocation); std::vector expected5 = {0, 0}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected5.begin(), expected5.end()); - BOOST_CHECK(!starvation_hit); + BOOST_CHECK(!starvationHit); // Test 6: Edge case - zero work std::vector work6 = {0.0, 0.0, 0.0}; - starvation_hit = scheduler.test_distributeProcessors(10, work6, allocation); + starvationHit = scheduler.TestDistributeProcessors(10, work6, allocation); std::vector expected6 = {0, 0, 0}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected6.begin(), expected6.end()); - BOOST_CHECK(!starvation_hit); + BOOST_CHECK(!starvationHit); // Test 7: Inactive components (work is zero) std::vector work7 = {100.0, 0.0, 300.0, 0.0}; - starvation_hit = scheduler.test_distributeProcessors(8, work7, allocation); + starvationHit = scheduler.TestDistributeProcessors(8, work7, allocation); std::vector expected7 = {2, 0, 6, 0}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected7.begin(), expected7.end()); - BOOST_CHECK(!starvation_hit); + BOOST_CHECK(!starvationHit); // Test 8: Scarcity with equal work std::vector work8 = {100.0, 100.0, 100.0, 100.0}; - starvation_hit = scheduler.test_distributeProcessors(3, work8, allocation); + starvationHit = scheduler.TestDistributeProcessors(3, work8, allocation); // Expect processors to be given to the first components due to stable sort std::vector expected8 = {0, 1, 1, 1}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected8.begin(), expected8.end()); - BOOST_CHECK(starvation_hit); + BOOST_CHECK(starvationHit); // Test 9: Scarcity with one dominant component std::vector work9 = {10.0, 10.0, 1000.0}; - starvation_hit = scheduler.test_distributeProcessors(2, work9, allocation); + starvationHit = scheduler.TestDistributeProcessors(2, work9, allocation); // Both processors should go to the largest component std::vector expected9 = {0, 1, 1}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected9.begin(), expected9.end()); - BOOST_CHECK(starvation_hit); + BOOST_CHECK(starvationHit); // Test 10: Scarcity with a single processor std::vector work10 = {10.0, 50.0, 20.0}; - starvation_hit = scheduler.test_distributeProcessors(1, work10, allocation); + starvationHit = scheduler.TestDistributeProcessors(1, work10, allocation); // The single processor should go to the component with the most work std::vector expected10 = {0, 1, 0}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected10.begin(), expected10.end()); - BOOST_CHECK(starvation_hit); + BOOST_CHECK(starvationHit); } BOOST_AUTO_TEST_SUITE_END() // Mock implementations for dependencies -using graph_t = osp::computational_dag_edge_idx_vector_impl_def_t; -using VertexType = graph_t::vertex_idx; +using GraphT = osp::ComputationalDagEdgeIdxVectorImplDefT; +using VertexType = GraphT::VertexIdx; // A mock divider that returns a predictable set of sections. -struct MockDivider_2 : public osp::IDagDivider { - std::vector>> sections_to_return; +struct MockDivider2 : public osp::IDagDivider { + std::vector>> sectionsToReturn_; - std::vector>> divide(const graph_t &) override { return sections_to_return; } + std::vector>> Divide(const GraphT &) override { return sectionsToReturn_; } }; // A mock sub-scheduler that returns a simple, predictable schedule. -struct MockSubScheduler : public osp::Scheduler { - osp::RETURN_STATUS computeSchedule(osp::BspSchedule &schedule) override { +struct MockSubScheduler : public osp::Scheduler { + osp::ReturnStatus ComputeSchedule(osp::BspSchedule &schedule) override { // Assign all tasks to the first processor in a single superstep - for (VertexType v = 0; v < schedule.getInstance().getComputationalDag().num_vertices(); ++v) { - schedule.setAssignedProcessor(v, 0); - schedule.setAssignedSuperstep(v, 0); + for (VertexType v = 0; v < schedule.GetInstance().GetComputationalDag().NumVertices(); ++v) { + schedule.SetAssignedProcessor(v, 0); + schedule.SetAssignedSuperstep(v, 0); } - schedule.setNumberOfSupersteps(1); - return osp::RETURN_STATUS::OSP_SUCCESS; + schedule.SetNumberOfSupersteps(1); + return osp::ReturnStatus::OSP_SUCCESS; } - std::string getScheduleName() const override { return "MockSubScheduler"; } + std::string GetScheduleName() const override { return "MockSubScheduler"; } }; struct TestFixture { - graph_t dag; - osp::BspArchitecture arch; - MockDivider_2 mock_divider; - MockSubScheduler mock_sub_scheduler; + GraphT dag_; + osp::BspArchitecture arch_; + MockDivider2 mockDivider_; + MockSubScheduler mockSubScheduler_; TestFixture() { // A simple DAG: v0 -> v1, v2 -> v3 // Two components that will be in the same wavefront set. - dag.add_vertex(10, 1, 1); // v0 - dag.add_vertex(20, 1, 1); // v1 - dag.add_vertex(30, 1, 1); // v2 - dag.add_vertex(40, 1, 1); // v3 - dag.add_edge(0, 1); - dag.add_edge(2, 3); + dag_.AddVertex(10, 1, 1); // v0 + dag_.AddVertex(20, 1, 1); // v1 + dag_.AddVertex(30, 1, 1); // v2 + dag_.AddVertex(40, 1, 1); // v3 + dag_.AddEdge(0, 1); + dag_.AddEdge(2, 3); // An architecture with 10 processors of one type - arch.setNumberOfProcessors(10); + arch_.SetNumberOfProcessors(10); } }; -BOOST_FIXTURE_TEST_SUITE(WavefrontComponentSchedulerTestSuite, TestFixture) +BOOST_FIXTURE_TEST_SUITE(wavefront_component_scheduler_test_suite, TestFixture) BOOST_AUTO_TEST_CASE(BasicSchedulingTest) { // Setup the mock divider to return one section with our two components - mock_divider.sections_to_return = {{{0, 1}}, {{2, 3}}}; + mockDivider_.sectionsToReturn_ = {{{0, 1}}, {{2, 3}}}; - osp::WavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); - osp::BspInstance instance(dag, arch); - osp::BspSchedule schedule(instance); + osp::WavefrontComponentScheduler scheduler(mockDivider_, mockSubScheduler_); + osp::BspInstance instance(dag_, arch_); + osp::BspSchedule schedule(instance); - auto status = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::OSP_SUCCESS); + auto status = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(status, osp::ReturnStatus::OSP_SUCCESS); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(3), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(2), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(3), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(3), 1); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(2), 1); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(3), 1); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 2); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 2); } BOOST_AUTO_TEST_CASE(MultipleSectionsTest) { // Setup the mock divider to return two separate sections - mock_divider.sections_to_return = { + mockDivider_.sectionsToReturn_ = { {{0}, {1}}, {{2}, {3}} }; - osp::WavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); - osp::BspInstance instance(dag, arch); - osp::BspSchedule schedule(instance); + osp::WavefrontComponentScheduler scheduler(mockDivider_, mockSubScheduler_); + osp::BspInstance instance(dag_, arch_); + osp::BspSchedule schedule(instance); - auto status = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::OSP_SUCCESS); + auto status = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(status, osp::ReturnStatus::OSP_SUCCESS); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 3); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(1), 3); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(0), 0); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(1), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(3), 4); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1); - BOOST_CHECK_EQUAL(schedule.assignedSuperstep(3), 1); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(2), 0); + BOOST_CHECK_EQUAL(schedule.AssignedProcessor(3), 4); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(2), 1); + BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(3), 1); - BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 2); + BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 2); } BOOST_AUTO_TEST_CASE(StarvationReturnsErrorTest) { // Use an architecture with only 1 processor - osp::BspArchitecture scarce_arch; - scarce_arch.setNumberOfProcessors(1); + osp::BspArchitecture scarceArch; + scarceArch.SetNumberOfProcessors(1); // Setup the mock divider to return one section with two components - mock_divider.sections_to_return = { + mockDivider_.sectionsToReturn_ = { {{0}, {1}}, {{2, 3}} }; - osp::WavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); - osp::BspInstance instance(dag, scarce_arch); - osp::BspSchedule schedule(instance); + osp::WavefrontComponentScheduler scheduler(mockDivider_, mockSubScheduler_); + osp::BspInstance instance(dag_, scarceArch); + osp::BspSchedule schedule(instance); // With 2 components and only 1 processor, the starvation case should be hit. - auto status = scheduler.computeSchedule(schedule); - BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::ERROR); + auto status = scheduler.ComputeSchedule(schedule); + BOOST_CHECK_EQUAL(status, osp::ReturnStatus::ERROR); } BOOST_AUTO_TEST_SUITE_END() @@ -272,10 +272,10 @@ BOOST_AUTO_TEST_SUITE_END() // TestFixture_2() { // // A DAG with two isomorphic components {0,1} and {2,3}, and one unique one {4,5} -// dag.add_vertex(10, 1, 1); dag.add_vertex(20, 1, 1); // v0, v1 -// dag.add_vertex(10, 1, 1); dag.add_vertex(20, 1, 1); // v2, v3 -// dag.add_vertex(50, 1, 1); dag.add_vertex(50, 1, 1); // v4, v5 -// dag.add_edge(0, 1); dag.add_edge(2, 3); dag.add_edge(4, 5); +// dag.AddVertex(10, 1, 1); dag.AddVertex(20, 1, 1); // v0, v1 +// dag.AddVertex(10, 1, 1); dag.AddVertex(20, 1, 1); // v2, v3 +// dag.AddVertex(50, 1, 1); dag.AddVertex(50, 1, 1); // v4, v5 +// dag.AddEdge(0, 1); dag.AddEdge(2, 3); dag.AddEdge(4, 5); // } // }; @@ -283,62 +283,62 @@ BOOST_AUTO_TEST_SUITE_END() // BOOST_AUTO_TEST_CASE(AbundanceSchedulingTest) { -// arch.setNumberOfProcessors(6); +// arch.SetNumberOfProcessors(6); // mock_divider.sections_to_return = {{{0, 1}, {2, 3}, {4, 5}}}; // osp::IsomorphicWavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); // osp::BspInstance instance(dag, arch); // osp::BspSchedule schedule(instance); -// auto status = scheduler.computeSchedule(schedule); -// BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::OSP_SUCCESS); +// auto status = scheduler.ComputeSchedule(schedule); +// BOOST_CHECK_EQUAL(status, osp::ReturnStatus::OSP_SUCCESS); // // Member 1 of iso group {0,1} gets 1 proc (global proc 0) -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 0); -// BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(0), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(1), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(0), 0); // // Member 2 of iso group {2,3} gets 1 proc (global proc 1) -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 1); -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(3), 1); -// BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(2), 1); +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(3), 1); +// BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(2), 0); // // Unique group {4,5} gets 4 procs (global procs 2,3,4,5), sub-schedule uses first one. -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(4), 2); -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(5), 2); -// BOOST_CHECK_EQUAL(schedule.assignedSuperstep(4), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(4), 2); +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(5), 2); +// BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(4), 0); -// BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 1); +// BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 1); // } // BOOST_AUTO_TEST_CASE(IndivisibleScarcitySchedulingTest) { // // 2 isomorphic components, 1 unique. 3 processors available. -// arch.setNumberOfProcessors(3); +// arch.SetNumberOfProcessors(3); // mock_divider.sections_to_return = {{{0, 1}, {2, 3}, {4, 5}}}; // osp::IsomorphicWavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); // osp::BspInstance instance(dag, arch); // osp::BspSchedule schedule(instance); -// auto status = scheduler.computeSchedule(schedule); -// BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::OSP_SUCCESS); +// auto status = scheduler.ComputeSchedule(schedule); +// BOOST_CHECK_EQUAL(status, osp::ReturnStatus::OSP_SUCCESS); -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); -// BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 0); -// BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1); // Sequential +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(0), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(0), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(2), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(2), 1); // Sequential // // Unique group scheduled on its 2 processors (global procs 1, 2) -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(4), 1); -// BOOST_CHECK_EQUAL(schedule.assignedSuperstep(4), 0); +// BOOST_CHECK_EQUAL(schedule.AssignedProcessor(4), 1); +// BOOST_CHECK_EQUAL(schedule.AssignedSuperstep(4), 0); -// BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 2); +// BOOST_CHECK_EQUAL(schedule.NumberOfSupersteps(), 2); // } // BOOST_AUTO_TEST_CASE(StarvationReturnsErrorTest) { // // IsomorphismGroups will find 2 groups: {{0,1}, {2,3}} and {{4,5}}. // // With only 1 processor, this is a starvation scenario. -// arch.setNumberOfProcessors(1); +// arch.SetNumberOfProcessors(1); // mock_divider.sections_to_return = {{{0, 1}, {2, 3}, {4, 5}}}; // osp::IsomorphicWavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); @@ -346,8 +346,8 @@ BOOST_AUTO_TEST_SUITE_END() // osp::BspSchedule schedule(instance); // // With 2 active groups and only 1 processor, starvation is hit. -// auto status = scheduler.computeSchedule(schedule); -// BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::ERROR); +// auto status = scheduler.ComputeSchedule(schedule); +// BOOST_CHECK_EQUAL(status, osp::ReturnStatus::ERROR); // } // BOOST_AUTO_TEST_SUITE_END() diff --git a/third/SankeyPlots/SankeyPlots_version.jl/src/SankeyPlots_version.jl b/third/SankeyPlots/SankeyPlots_version.jl/src/SankeyPlots_version.jl index 13ff617f..3b4f9773 100644 --- a/third/SankeyPlots/SankeyPlots_version.jl/src/SankeyPlots_version.jl +++ b/third/SankeyPlots/SankeyPlots_version.jl/src/SankeyPlots_version.jl @@ -58,7 +58,7 @@ In addition to [Plots.jl attributes](http://docs.juliaplots.org/latest/attribute size --> (bsp_position_force.second * 200, bsp_position_force.first * 200) # Change the size here if not everything is printed end - vw = vertex_weight.(Ref(g), vertices(g)) + vw = vertex_weight.(Ref(g), Vertices(g)) m = maximum(vw) if node_widths !== nothing @@ -72,7 +72,7 @@ In addition to [Plots.jl attributes](http://docs.juliaplots.org/latest/attribute y = make_compact(x, y, vw / m) end - src_offsets = get_src_offsets(g, perm) ./ (m / (2*max_height)) + src_offsets = get_src_offsets(g, perm) ./ (m / (2*max_height)) dst_offsets = get_dst_offsets(g, perm) ./ (m / (2*max_height)) if label_position ∉ (:inside, :left, :right, :top, :bottom, :node, :legend) @@ -240,7 +240,7 @@ function sankey_graph(src::Vector, dst::Vector, w) # Parse src and dst to match all ids in unique_nodes parser_dict = Dict(unique_nodes[id]=>id for id = 1:length(unique_nodes)) - + src = [parser_dict[src_val] for src_val in src] dst = [parser_dict[dst_val] for dst_val in dst]