diff --git a/.clang-format b/.clang-format index 93fcdef5..19c0ad9c 100644 --- a/.clang-format +++ b/.clang-format @@ -1,136 +1,336 @@ --- Language: Cpp -# BasedOnStyle: LLVM AccessModifierOffset: -2 AlignAfterOpenBracket: Align -AlignConsecutiveMacros: false -AlignConsecutiveAssignments: false -AlignConsecutiveDeclarations: false -AlignEscapedNewlines: Right -AlignOperands: true -AlignTrailingComments: true +AlignArrayOfStructures: Right +AlignConsecutiveAssignments: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveBitFields: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveDeclarations: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: true + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveMacros: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveShortCaseStatements: + Enabled: true + AcrossEmptyLines: true + AcrossComments: true + AlignCaseArrows: true + AlignCaseColons: false +AlignConsecutiveTableGenBreakingDAGArgColons: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: true +AlignConsecutiveTableGenCondOperatorColons: + Enabled: false + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: false +AlignConsecutiveTableGenDefinitionColons: + Enabled: true + AcrossEmptyLines: false + AcrossComments: false + AlignCompound: false + AlignFunctionDeclarations: false + AlignFunctionPointers: false + PadOperators: false +AlignEscapedNewlines: LeftWithLastLine +AlignOperands: Align +AlignTrailingComments: + Kind: Always + OverEmptyLines: 0 AllowAllArgumentsOnNextLine: true -AllowAllConstructorInitializersOnNextLine: true AllowAllParametersOfDeclarationOnNextLine: true -AllowShortBlocksOnASingleLine: Never +AllowBreakBeforeNoexceptSpecifier: Never +AllowShortBlocksOnASingleLine: Empty +AllowShortCaseExpressionOnASingleLine: true AllowShortCaseLabelsOnASingleLine: false +AllowShortCompoundRequirementOnASingleLine: true +AllowShortEnumsOnASingleLine: true AllowShortFunctionsOnASingleLine: All +AllowShortIfStatementsOnASingleLine: WithoutElse AllowShortLambdasOnASingleLine: All -AllowShortIfStatementsOnASingleLine: Never -AllowShortLoopsOnASingleLine: false +AllowShortLoopsOnASingleLine: true +AllowShortNamespacesOnASingleLine: false AlwaysBreakAfterDefinitionReturnType: None -AlwaysBreakAfterReturnType: None AlwaysBreakBeforeMultilineStrings: false -AlwaysBreakTemplateDeclarations: true -BinPackArguments: true -BinPackParameters: true +AttributeMacros: + - __capability +BinPackArguments: false +#BinPackLongBracedList: true +BinPackParameters: OnePerLine +BitFieldColonSpacing: Both BraceWrapping: AfterCaseLabel: false AfterClass: false - AfterControlStatement: false + AfterControlStatement: Never AfterEnum: false + AfterExternBlock: false AfterFunction: false AfterNamespace: false AfterObjCDeclaration: false AfterStruct: false AfterUnion: false - AfterExternBlock: false BeforeCatch: false BeforeElse: false + BeforeLambdaBody: false + BeforeWhile: false IndentBraces: false - SplitEmptyFunction: true - SplitEmptyRecord: true + SplitEmptyFunction: false + SplitEmptyRecord: false SplitEmptyNamespace: true -BreakBeforeBinaryOperators: None -BreakBeforeBraces: Attach -BreakBeforeInheritanceComma: false -BreakInheritanceList: BeforeColon -BreakBeforeTernaryOperators: true -BreakConstructorInitializersBeforeComma: false -BreakConstructorInitializers: BeforeColon +BreakAdjacentStringLiterals: true +BreakAfterAttributes: Leave BreakAfterJavaFieldAnnotations: false +#BreakAfterOpenBracketBracedList: true +#BreakAfterOpenBracketFunction: true +#BreakAfterOpenBracketIf: true +#BreakAfterOpenBracketLoop: true +#BreakAfterOpenBracketSwitch: true +BreakAfterReturnType: ExceptShortType +BreakArrays: false +BreakBeforeBinaryOperators: All +BreakBeforeBraces: Custom +#BreakBeforeCloseBracketBracedList: true +#BreakBeforeCloseBracketFunction: true +#BreakBeforeCloseBracketIf: true +#BreakBeforeCloseBracketLoop: true +#BreakBeforeCloseBracketSwitch: true +BreakBeforeConceptDeclarations: Always +BreakBeforeInlineASMColon: OnlyMultiline +#BreakBeforeTemplateCloser: true +#BreakBeforeTernaryOperators: false +#BreakBinaryOperations: RespectPrecedence +#BreakConstructorInitializers: AfterColon +#BreakFunctionDefinitionParameters: false +#BreakInheritanceList: AfterColon BreakStringLiterals: true -ColumnLimit: 0 +BreakTemplateDeclarations: Yes +ColumnLimit: 130 CommentPragmas: '^ IWYU pragma:' -CompactNamespaces: true -ConstructorInitializerAllOnOneLineOrOnePerLine: false +CompactNamespaces: false ConstructorInitializerIndentWidth: 4 ContinuationIndentWidth: 4 Cpp11BracedListStyle: true -DeriveLineEnding: true DerivePointerAlignment: false DisableFormat: false +EmptyLineAfterAccessModifier: Never +EmptyLineBeforeAccessModifier: LogicalBlock ExperimentalAutoDetectBinPacking: false FixNamespaceComments: true ForEachMacros: - foreach - Q_FOREACH - BOOST_FOREACH -IncludeBlocks: Preserve +IfMacros: + - KJ_IF_MAYBE +IncludeBlocks: Regroup IncludeCategories: - - Regex: '^"(llvm|llvm-c|clang|clang-c)/' + - Regex: '^' Priority: 2 SortPriority: 0 - - Regex: '^(<|"(gtest|gmock|isl|json)/)' - Priority: 3 + CaseSensitive: false + - Regex: '^<.*\.h>' + Priority: 1 + SortPriority: 0 + CaseSensitive: false + - Regex: '^<.*' + Priority: 2 SortPriority: 0 + CaseSensitive: false - Regex: '.*' - Priority: 1 + Priority: 3 SortPriority: 0 -IncludeIsMainRegex: '(Test)?$' + CaseSensitive: false +IncludeIsMainRegex: '([-_](test|unittest))?$' IncludeIsMainSourceRegex: '' -IndentCaseLabels: false +IndentAccessModifiers: false +IndentCaseBlocks: false +IndentCaseLabels: true +IndentExportBlock: true +IndentExternBlock: Indent IndentGotoLabels: true -IndentPPDirectives: None +IndentPPDirectives: AfterHash +IndentRequiresClause: true IndentWidth: 4 IndentWrappedFunctionNames: false +InsertBraces: true +InsertNewlineAtEOF: true +InsertTrailingCommas: Wrapped +IntegerLiteralSeparator: + Binary: 0 + BinaryMinDigits: 0 + Decimal: 0 + DecimalMinDigits: 0 + Hex: 0 + HexMinDigits: 0 JavaScriptQuotes: Leave JavaScriptWrapImports: true -KeepEmptyLinesAtTheStartOfBlocks: true +KeepEmptyLines: + AtEndOfFile: false + AtStartOfBlock: false + AtStartOfFile: true +KeepFormFeed: false +LambdaBodyIndentation: Signature +LineEnding: DeriveLF MacroBlockBegin: '' MacroBlockEnd: '' +MainIncludeChar: Quote MaxEmptyLinesToKeep: 1 NamespaceIndentation: None -ObjCBinPackProtocolList: Auto +ObjCBinPackProtocolList: Never ObjCBlockIndentWidth: 4 +ObjCBreakBeforeNestedBlockParam: true ObjCSpaceAfterProperty: false ObjCSpaceBeforeProtocolList: true -PenaltyBreakAssignment: 2 -PenaltyBreakBeforeFirstCallParameter: 19 +PackConstructorInitializers: NextLine +PenaltyBreakAssignment: 10 +PenaltyBreakBeforeFirstCallParameter: 1 +PenaltyBreakBeforeMemberAccess: 150 PenaltyBreakComment: 300 -PenaltyBreakFirstLessLess: 120 -PenaltyBreakString: 1000 -PenaltyBreakTemplateDeclaration: 10 -PenaltyExcessCharacter: 1000000 -PenaltyReturnTypeOnItsOwnLine: 60 +PenaltyBreakFirstLessLess: 50 +PenaltyBreakOpenParenthesis: 0 +PenaltyBreakScopeResolution: 500 +PenaltyBreakString: 400 +PenaltyBreakTemplateDeclaration: 5 +PenaltyExcessCharacter: 15 +PenaltyIndentedWhitespace: 0 +PenaltyReturnTypeOnItsOwnLine: 400 PointerAlignment: Right -ReflowComments: true -SortIncludes: true -SortUsingDeclarations: true -SpaceAfterCStyleCast: false +PPIndentWidth: -1 +QualifierAlignment: Leave +RawStringFormats: + - Language: Cpp + Delimiters: + - cc + - CC + - cpp + - Cpp + - CPP + - 'c++' + - 'C++' + CanonicalDelimiter: '' + BasedOnStyle: google + - Language: TextProto + Delimiters: + - pb + - PB + - proto + - PROTO + EnclosingFunctions: + - EqualsProto + - EquivToProto + - PARSE_PARTIAL_TEXT_PROTO + - PARSE_TEST_PROTO + - PARSE_TEXT_PROTO + - ParseTextOrDie + - ParseTextProtoOrDie + - ParseTestProto + - ParsePartialTestProto + CanonicalDelimiter: pb + BasedOnStyle: google +ReferenceAlignment: Pointer +ReflowComments: Always +RemoveBracesLLVM: false +RemoveEmptyLinesInUnwrappedLines: true +RemoveParentheses: Leave +RemoveSemicolon: false +RequiresClausePosition: OwnLine +RequiresExpressionIndentation: OuterScope +SeparateDefinitionBlocks: Always +ShortNamespaceLines: 0 +SkipMacroDefinitionBody: false +SortIncludes: CaseSensitive +SortJavaStaticImport: Before +SortUsingDeclarations: Lexicographic +SpaceAfterCStyleCast: true SpaceAfterLogicalNot: false -SpaceAfterTemplateKeyword: false +#SpaceAfterOperatorKeyword: false +SpaceAfterTemplateKeyword: true +SpaceAroundPointerQualifiers: Default SpaceBeforeAssignmentOperators: true +SpaceBeforeCaseColon: false SpaceBeforeCpp11BracedList: false SpaceBeforeCtorInitializerColon: true SpaceBeforeInheritanceColon: true -SpaceBeforeParens: ControlStatements +SpaceBeforeJsonColon: false +SpaceBeforeParens: Custom +SpaceBeforeParensOptions: + AfterControlStatements: true + AfterForeachMacros: true + AfterFunctionDefinitionName: false + AfterFunctionDeclarationName: false + AfterIfMacros: true + #AfterNot: true + AfterOverloadedOperator: false + AfterPlacementOperator: true + AfterRequiresInClause: true + AfterRequiresInExpression: true + BeforeNonEmptyParentheses: false SpaceBeforeRangeBasedForLoopColon: true -SpaceInEmptyBlock: false -SpaceInEmptyParentheses: false -SpacesBeforeTrailingComments: 1 -SpacesInAngles: false -SpacesInConditionalStatement: false -SpacesInContainerLiterals: false -SpacesInCStyleCastParentheses: false -SpacesInParentheses: false -SpacesInSquareBrackets: false SpaceBeforeSquareBrackets: false -Standard: Latest +#SpaceInEmptyBlock: true +#SpaceInEmptyBraces: Block +SpacesBeforeTrailingComments: 4 +SpacesInAngles: Never +SpacesInContainerLiterals: true +SpacesInLineCommentPrefix: + Minimum: 1 + Maximum: -1 +SpacesInParens: Custom +SpacesInParensOptions: + ExceptDoubleParentheses: true + InConditionalStatements: false + InCStyleCasts: false + InEmptyParentheses: false + Other: false +SpacesInSquareBrackets: false +Standard: Auto +StatementAttributeLikeMacros: + - Q_EMIT StatementMacros: - Q_UNUSED - QT_REQUIRE_VERSION TabWidth: 8 -UseCRLF: false +TableGenBreakInsideDAGArg: DontBreak UseTab: Never +VerilogBreakBetweenInstancePorts: true +WhitespaceSensitiveMacros: + - BOOST_PP_STRINGIZE + - CF_SWIFT_NAME + - NS_SWIFT_NAME + - PP_STRINGIZE + - STRINGIZE +WrapNamespaceBodyWithEmptyLines: Always ... diff --git a/.clang-tidy b/.clang-tidy new file mode 100644 index 00000000..7299225d --- /dev/null +++ b/.clang-tidy @@ -0,0 +1,24 @@ +Checks: '-*,readability-identifier-naming' + +CheckOptions: + # 1. Member Variables: camelBack with a trailing underscore (e.g., memberVariableTest_) + - key: readability-identifier-naming.MemberCase + value: camelBack + - key: readability-identifier-naming.MemberSuffix + value: _ + + # 2. Functions: CamelCase (e.g., CalculateTotal) + - key: readability-identifier-naming.FunctionCase + value: CamelCase + + # 3. Local Variables/Parameters: camelBack (e.g., totalValue) + - key: readability-identifier-naming.VariableCase + value: camelBack + - key: readability-identifier-naming.ParameterCase + value: camelBack + + # 4. Classes/Structs: CamelCase (e.g., MyClass) + - key: readability-identifier-naming.ClassCase + value: CamelCase + - key: readability-identifier-naming.StructCase + value: CamelCase \ No newline at end of file diff --git a/apps/bsp_test_suite.cpp b/apps/bsp_test_suite.cpp index c42f67fd..f31c1972 100644 --- a/apps/bsp_test_suite.cpp +++ b/apps/bsp_test_suite.cpp @@ -26,7 +26,6 @@ limitations under the License. using graph_t = osp::computational_dag_edge_idx_vector_impl_def_int_t; int main(int argc, char *argv[]) { - osp::BspScheduleRecompTestSuiteRunner runner; return runner.run(argc, argv); diff --git a/apps/coarser_plotter.cpp b/apps/coarser_plotter.cpp index d9093b97..93cfae2c 100644 --- a/apps/coarser_plotter.cpp +++ b/apps/coarser_plotter.cpp @@ -35,9 +35,7 @@ int main(int argc, char *argv[]) { } std::string graph_file = argv[1]; - std::string graph_name = graph_file.substr(graph_file.rfind("/") + 1, - graph_file.rfind(".") - graph_file.rfind("/") - 1); - + std::string graph_name = graph_file.substr(graph_file.rfind("/") + 1, graph_file.rfind(".") - graph_file.rfind("/") - 1); Graph_t graph; bool status = file_reader::readGraph(graph_file, graph); @@ -46,8 +44,7 @@ int main(int argc, char *argv[]) { return 1; } - - SarkarParams::MulParameters< v_workw_t > params; + SarkarParams::MulParameters> params; params.commCostVec = std::vector>({1, 2, 5, 10, 20, 50, 100, 200, 500, 1000}); params.max_num_iteration_without_changes = 3; params.leniency = 0.005; @@ -63,7 +60,7 @@ int main(int argc, char *argv[]) { Graph_t graph_copy = graph; bool ignore_vertex_types = false; - + if (ignore_vertex_types) { for (const auto &vert : graph_copy.vertices()) { graph_copy.set_vertex_type(vert, 0); @@ -74,7 +71,7 @@ int main(int argc, char *argv[]) { std::vector colours(contraction_map.size()); for (std::size_t i = 0; i < contraction_map.size(); ++i) { - colours[i] = static_cast( contraction_map[i] ); + colours[i] = static_cast(contraction_map[i]); } std::ofstream out_dot(argv[2]); @@ -86,7 +83,7 @@ int main(int argc, char *argv[]) { DotFileWriter writer; writer.write_colored_graph(out_dot, graph, colours); - if (argc >=4 ) { + if (argc >= 4) { std::ofstream coarse_out_dot(argv[3]); if (!coarse_out_dot.is_open()) { std::cout << "Unable to write/open output file.\n"; @@ -100,4 +97,4 @@ int main(int argc, char *argv[]) { } return 0; -} \ No newline at end of file +} diff --git a/apps/graph_analyser.cpp b/apps/graph_analyser.cpp index 25106519..48e4f3ee 100644 --- a/apps/graph_analyser.cpp +++ b/apps/graph_analyser.cpp @@ -23,11 +23,11 @@ limitations under the License. #include #include +#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" +#include "osp/auxiliary/io/general_file_reader.hpp" #include "osp/auxiliary/misc.hpp" #include "osp/graph_algorithms/directed_graph_path_util.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" -#include "osp/auxiliary/io/general_file_reader.hpp" using namespace osp; @@ -151,22 +151,24 @@ int main(int argc, char *argv[]) { << std::endl; for (const auto &dirEntry : std::filesystem::recursive_directory_iterator(graph_dir)) { - if (std::filesystem::is_directory(dirEntry)) + if (std::filesystem::is_directory(dirEntry)) { continue; + } std::cout << "Processing: " << dirEntry << std::endl; std::string path_str = dirEntry.path(); - + ComputationalDag graph; bool status = file_reader::readGraph(dirEntry.path(), graph); if (!status) { std::cout << "Failed to read graph\n"; return 1; - } + } - if (!status) + if (!status) { continue; + } std::string graph_name = path_str.substr(path_str.rfind("/") + 1); graph_name = graph_name.substr(0, graph_name.rfind(".")); @@ -177,4 +179,4 @@ int main(int argc, char *argv[]) { } return 0; -} \ No newline at end of file +} diff --git a/apps/graph_converter.cpp b/apps/graph_converter.cpp index 6c0d50db..3ffedd4f 100644 --- a/apps/graph_converter.cpp +++ b/apps/graph_converter.cpp @@ -33,23 +33,19 @@ void print_usage(const char *prog_name) { std::cerr << "Graph Format Converter" << std::endl; std::cerr << "----------------------" << std::endl; std::cerr << "This tool converts a directed graph from one file format to another. The desired output" << std::endl; - std::cerr << "format is determined by the file extension of the output file." << std::endl - << std::endl; + std::cerr << "format is determined by the file extension of the output file." << std::endl << std::endl; std::cerr << "Usage: " << prog_name << " " << std::endl << std::endl; std::cerr << "Arguments:" << std::endl; - std::cerr << " Path to the input graph file." << std::endl - << std::endl; + std::cerr << " Path to the input graph file." << std::endl << std::endl; std::cerr << " Path for the output graph file. Special values of '.dot' or '.hdag' can be" << std::endl; std::cerr << " used to automatically generate the output filename by replacing the input" << std::endl; std::cerr << " file's extension with the specified one." << std::endl; std::cerr << std::endl; std::cerr << "Supported Formats:" << std::endl; std::cerr << " Input (by extension): .hdag, .mtx, .dot" << std::endl; - std::cerr << " Output (by extension): .hdag, .dot" << std::endl - << std::endl; + std::cerr << " Output (by extension): .hdag, .dot" << std::endl << std::endl; std::cerr << "The .hdag format is the HyperdagDB format. A detailed description can be found at:" << std::endl; - std::cerr << "https://github.com/Algebraic-Programming/HyperDAG_DB" << std::endl - << std::endl; + std::cerr << "https://github.com/Algebraic-Programming/HyperDAG_DB" << std::endl << std::endl; std::cerr << "Examples:" << std::endl; std::cerr << " " << prog_name << " my_graph.mtx my_graph.hdag" << std::endl; std::cerr << " " << prog_name << " my_graph.hdag my_graph.dot" << std::endl; @@ -98,8 +94,8 @@ int main(int argc, char *argv[]) { return 1; } - std::cout << "Successfully read graph with " << graph.num_vertices() << " vertices and " << graph.num_edges() - << " edges." << std::endl; + std::cout << "Successfully read graph with " << graph.num_vertices() << " vertices and " << graph.num_edges() << " edges." + << std::endl; std::filesystem::path output_path(output_filename); std::string output_ext = output_path.extension().string(); @@ -118,4 +114,4 @@ int main(int argc, char *argv[]) { std::cout << "Successfully wrote graph to " << output_filename << std::endl; return 0; -} \ No newline at end of file +} diff --git a/apps/graph_generator/gen_Erdos-Renyi_graph.cpp b/apps/graph_generator/gen_Erdos-Renyi_graph.cpp index 5a429624..7c816302 100644 --- a/apps/graph_generator/gen_Erdos-Renyi_graph.cpp +++ b/apps/graph_generator/gen_Erdos-Renyi_graph.cpp @@ -16,15 +16,15 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ -#include "osp/auxiliary/misc.hpp" -#include "osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" - #include #include #include #include +#include "osp/auxiliary/misc.hpp" +#include "osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" + using namespace osp; using ComputationalDag = computational_dag_vector_impl_def_int_t; @@ -32,8 +32,7 @@ using VertexType = vertex_idx_t; int main(int argc, char *argv[]) { if (argc < 3) { - std::cerr << "Usage: " << argv[0] - << " (optional:) \n" + std::cerr << "Usage: " << argv[0] << " (optional:) \n" << std::endl; return 1; } @@ -97,19 +96,18 @@ int main(int argc, char *argv[]) { std::ofstream graph_write; graph_write.open(graph_name); graph_write << header; - graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " " + - std::to_string(graph.num_edges() + graph.num_vertices()) + "\n"; + graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " " + + std::to_string(graph.num_edges() + graph.num_vertices()) + "\n"; for (VertexType i = 0; i < num_vert; i++) { double val = (1 - 2 * randInt(2)) * std::exp(unif_log(re)); graph_write << std::to_string(i + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + "\n"; for (const auto &chld : graph.children(i)) { val = unif(re); - graph_write << std::to_string(chld + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + - "\n"; + graph_write << std::to_string(chld + 1) + " " + std::to_string(i + 1) + " " + std::to_string(val) + "\n"; } } graph_write.close(); } return 0; -} \ No newline at end of file +} diff --git a/apps/graph_generator/gen_near_diag_random_graph.cpp b/apps/graph_generator/gen_near_diag_random_graph.cpp index 30e4fb2e..ede87c5c 100644 --- a/apps/graph_generator/gen_near_diag_random_graph.cpp +++ b/apps/graph_generator/gen_near_diag_random_graph.cpp @@ -16,15 +16,15 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ -#include "osp/auxiliary/misc.hpp" -#include "osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" - #include #include #include #include +#include "osp/auxiliary/misc.hpp" +#include "osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" + using namespace osp; using ComputationalDag = computational_dag_vector_impl_def_int_t; @@ -32,8 +32,7 @@ using VertexType = vertex_idx_t; int main(int argc, char *argv[]) { if (argc < 4) { - std::cerr << "Usage: " << argv[0] - << " (optional:) \n" + std::cerr << "Usage: " << argv[0] << " (optional:) \n" << std::endl; return 1; } @@ -103,19 +102,18 @@ int main(int argc, char *argv[]) { std::ofstream graph_write; graph_write.open(graph_name); graph_write << header; - graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " " + - std::to_string(graph.num_edges() + graph.num_vertices()) + "\n"; + graph_write << std::to_string(graph.num_vertices()) + " " + std::to_string(graph.num_vertices()) + " " + + std::to_string(graph.num_edges() + graph.num_vertices()) + "\n"; for (VertexType j = 0; j < num_vert; j++) { double val = (1 - 2 * randInt(2)) * std::exp(unif_log(re)); graph_write << std::to_string(j + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + "\n"; for (const auto &chld : graph.children(j)) { val = unif(re); - graph_write << std::to_string(chld + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + - "\n"; + graph_write << std::to_string(chld + 1) + " " + std::to_string(j + 1) + " " + std::to_string(val) + "\n"; } } graph_write.close(); } return 0; -} \ No newline at end of file +} diff --git a/apps/graph_generator/post_incomplete_cholesky.cpp b/apps/graph_generator/post_incomplete_cholesky.cpp index ccc4f0d2..757b569d 100644 --- a/apps/graph_generator/post_incomplete_cholesky.cpp +++ b/apps/graph_generator/post_incomplete_cholesky.cpp @@ -15,16 +15,15 @@ limitations under the License. @author Christos Matzoros, Toni Boehnlein, Pal Andras Papp, Raphael S. Steiner */ +#include +#include +#include +#include +#include #include #include -#include - -#include -#include #include -#include -#include -#include +#include int main(int argc, char *argv[]) { if (argc < 2) { @@ -38,21 +37,22 @@ int main(int argc, char *argv[]) { name_graph = name_graph.substr(0, name_graph.find_last_of(".")); std::cout << "Graph: " << name_graph << std::endl; - - using SM_csc = Eigen::SparseMatrix; // Compressed Sparse Column format - using SM_csr = Eigen::SparseMatrix; // Compressed Sparse Row format - SM_csc L_csc; // Initialize a sparse matrix in CSC format + using SM_csc = Eigen::SparseMatrix; // Compressed Sparse Column format + using SM_csr = Eigen::SparseMatrix; // Compressed Sparse Row format + + SM_csc L_csc; // Initialize a sparse matrix in CSC format Eigen::loadMarket(L_csc, filename_graph); - SM_csr L_csr = L_csc; // Reformat the sparse matrix from CSC to CSR format + SM_csr L_csr = L_csc; // Reformat the sparse matrix from CSC to CSR format Eigen::IncompleteCholesky> ichol(L_csc); SM_csc LChol_csc = ichol.matrixL(); - Eigen::saveMarket(LChol_csc, filename_graph.substr(0, filename_graph.find_last_of(".")) + "_postChol.mtx", Eigen::UpLoType::Symmetric); + Eigen::saveMarket( + LChol_csc, filename_graph.substr(0, filename_graph.find_last_of(".")) + "_postChol.mtx", Eigen::UpLoType::Symmetric); return 0; -} \ No newline at end of file +} diff --git a/apps/ilp_bsp_scheduler.cpp b/apps/ilp_bsp_scheduler.cpp index d0c44e37..90fe30f7 100644 --- a/apps/ilp_bsp_scheduler.cpp +++ b/apps/ilp_bsp_scheduler.cpp @@ -23,14 +23,14 @@ limitations under the License. #include #include -#include "osp/auxiliary/misc.hpp" -#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp" -#include "osp/graph_algorithms/directed_graph_path_util.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/auxiliary/io/arch_file_reader.hpp" #include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" #include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/auxiliary/misc.hpp" +#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp" +#include "osp/graph_algorithms/directed_graph_path_util.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" using namespace osp; @@ -38,8 +38,7 @@ using ComputationalDag = computational_dag_edge_idx_vector_impl_def_int_t; int main(int argc, char *argv[]) { if (argc < 4) { - std::cerr << "Usage: " << argv[0] << " " - << std::endl; + std::cerr << "Usage: " << argv[0] << " " << std::endl; return 1; } @@ -78,7 +77,6 @@ int main(int argc, char *argv[]) { // instance.getArchitecture().setProcessorsWithTypes({0,0,1,1,1,1}); if (!status_graph || !status_arch) { - std::cout << "Reading files failed." << std::endl; return 1; } @@ -90,23 +88,20 @@ int main(int argc, char *argv[]) { CoptFullScheduler scheduler; scheduler.setMaxNumberOfSupersteps(steps); - - if (recomp) { + if (recomp) { BspScheduleRecomp schedule(instance); auto status_schedule = scheduler.computeScheduleRecomp(schedule); if (status_schedule == RETURN_STATUS::OSP_SUCCESS || status_schedule == RETURN_STATUS::BEST_FOUND) { - DotFileWriter dot_writer; - dot_writer.write_schedule_recomp(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" + - scheduler.getScheduleName() + "_recomp_schedule.dot", + dot_writer.write_schedule_recomp(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" + + scheduler.getScheduleName() + "_recomp_schedule.dot", schedule); - dot_writer.write_schedule_recomp_duplicate(name_graph + "_" + name_machine + "_maxS_" + - std::to_string(steps) + "_" + scheduler.getScheduleName() + - "_duplicate_recomp_schedule.dot", + dot_writer.write_schedule_recomp_duplicate(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" + + scheduler.getScheduleName() + "_duplicate_recomp_schedule.dot", schedule); std::cout << "Recomp Schedule computed with costs: " << schedule.computeCosts() << std::endl; @@ -117,16 +112,14 @@ int main(int argc, char *argv[]) { } } else { - BspSchedule schedule(instance); auto status_schedule = scheduler.computeSchedule(schedule); if (status_schedule == RETURN_STATUS::OSP_SUCCESS || status_schedule == RETURN_STATUS::BEST_FOUND) { - DotFileWriter dot_writer; - dot_writer.write_schedule(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" + - scheduler.getScheduleName() + "_schedule.dot", + dot_writer.write_schedule(name_graph + "_" + name_machine + "_maxS_" + std::to_string(steps) + "_" + + scheduler.getScheduleName() + "_schedule.dot", schedule); std::cout << "Schedule computed with costs: " << schedule.computeCosts() << std::endl; diff --git a/apps/ilp_hypergraph_partitioner.cpp b/apps/ilp_hypergraph_partitioner.cpp index fd184358..78b02a47 100644 --- a/apps/ilp_hypergraph_partitioner.cpp +++ b/apps/ilp_hypergraph_partitioner.cpp @@ -23,18 +23,17 @@ limitations under the License. #include #include +#include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" +#include "osp/auxiliary/io/mtx_hypergraph_file_reader.hpp" +#include "osp/auxiliary/io/partitioning_file_writer.hpp" #include "osp/auxiliary/misc.hpp" #include "osp/graph_algorithms/directed_graph_path_util.hpp" -#include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/partitioning/model/hypergraph_utility.hpp" #include "osp/partitioning/partitioners/generic_FM.hpp" #include "osp/partitioning/partitioners/partitioning_ILP.hpp" #include "osp/partitioning/partitioners/partitioning_ILP_replication.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "osp/auxiliary/io/mtx_hypergraph_file_reader.hpp" -#include "osp/auxiliary/io/partitioning_file_writer.hpp" - using namespace osp; @@ -43,8 +42,7 @@ using hypergraph = Hypergraph_def_t; int main(int argc, char *argv[]) { if (argc < 4) { - std::cerr << "Usage: " << argv[0] << " " - << std::endl; + std::cerr << "Usage: " << argv[0] << " " << std::endl; return 1; } @@ -83,12 +81,13 @@ int main(int argc, char *argv[]) { PartitioningProblem instance; - bool file_status = true; + bool file_status = true; if (file_ending == "hdag") { graph dag; file_status = file_reader::readComputationalDagHyperdagFormatDB(filename_hgraph, dag); - if(file_status) + if (file_status) { instance.getHypergraph() = convert_from_cdag_as_hyperdag(dag); + } } else if (file_ending == "mtx") { file_status = file_reader::readHypergraphMartixMarketFormat(filename_hgraph, instance.getHypergraph()); } else { @@ -96,7 +95,6 @@ int main(int argc, char *argv[]) { return 1; } if (!file_status) { - std::cout << "Reading input file failed." << std::endl; return 1; } @@ -106,55 +104,65 @@ int main(int argc, char *argv[]) { Partitioning initial_partition(instance); GenericFM fm; - for(size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) + for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) { initial_partition.setAssignedPartition(node, static_cast(node % static_cast(nr_parts))); - if(nr_parts == 2) + } + if (nr_parts == 2) { fm.ImprovePartitioning(initial_partition); - if(nr_parts == 4 || nr_parts == 8 || nr_parts == 16 || nr_parts == 32) + } + if (nr_parts == 4 || nr_parts == 8 || nr_parts == 16 || nr_parts == 32) { fm.RecursiveFM(initial_partition); + } if (replicate > 0) { - PartitioningWithReplication partition(instance); HypergraphPartitioningILPWithReplication partitioner; - for(size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) + for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) { partition.setAssignedPartitions(node, {initial_partition.assignedPartition(node)}); - if(partition.satisfiesBalanceConstraint()) + } + if (partition.satisfiesBalanceConstraint()) { partitioner.setUseInitialSolution(true); + } partitioner.setTimeLimitSeconds(600); - if(replicate == 2) - partitioner.setReplicationModel(HypergraphPartitioningILPWithReplication::REPLICATION_MODEL_IN_ILP::GENERAL); + if (replicate == 2) { + partitioner.setReplicationModel( + HypergraphPartitioningILPWithReplication::REPLICATION_MODEL_IN_ILP::GENERAL); + } auto solve_status = partitioner.computePartitioning(partition); if (solve_status == RETURN_STATUS::OSP_SUCCESS || solve_status == RETURN_STATUS::BEST_FOUND) { - file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) + - "_ILP_rep" + std::to_string(replicate) + ".txt", partition); - std::cout << "Partitioning (with replicaiton) computed with costs: " << partition.computeConnectivityCost() << std::endl; + file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) + "_ILP_rep" + + std::to_string(replicate) + ".txt", + partition); + std::cout << "Partitioning (with replicaiton) computed with costs: " << partition.computeConnectivityCost() + << std::endl; } else { std::cout << "Computing partition failed." << std::endl; return 1; } } else { - Partitioning partition(instance); HypergraphPartitioningILP partitioner; - for(size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) + for (size_t node = 0; node < instance.getHypergraph().num_vertices(); ++node) { partition.setAssignedPartition(node, initial_partition.assignedPartition(node)); - if(partition.satisfiesBalanceConstraint()) + } + if (partition.satisfiesBalanceConstraint()) { partitioner.setUseInitialSolution(true); + } partitioner.setTimeLimitSeconds(600); auto solve_status = partitioner.computePartitioning(partition); if (solve_status == RETURN_STATUS::OSP_SUCCESS || solve_status == RETURN_STATUS::BEST_FOUND) { - file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) + - "_ILP_rep" + std::to_string(replicate) + ".txt", partition); + file_writer::write_txt(name_hgraph + "_" + std::to_string(nr_parts) + "_" + std::to_string(imbalance) + "_ILP_rep" + + std::to_string(replicate) + ".txt", + partition); std::cout << "Partitioning computed with costs: " << partition.computeConnectivityCost() << std::endl; } else { std::cout << "Computing partition failed." << std::endl; diff --git a/apps/osp.cpp b/apps/osp.cpp index 7ea2b0de..7c66224b 100644 --- a/apps/osp.cpp +++ b/apps/osp.cpp @@ -16,7 +16,6 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ -#include "boost/log/utility/setup.hpp" #include #include #include @@ -26,13 +25,14 @@ limitations under the License. #include #include -#include "osp/auxiliary/misc.hpp" -#include "osp/bsp/model/BspSchedule.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" +#include "boost/log/utility/setup.hpp" #include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/general_file_reader.hpp" #include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" +#include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/auxiliary/misc.hpp" +#include "osp/bsp/model/BspSchedule.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "test_suite_runner/ConfigParser.hpp" #include "test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp" @@ -45,7 +45,6 @@ std::filesystem::path getExecutablePath() { return std::filesystem::canonical("/ // invoked upon program call int main(int argc, char *argv[]) { - ConfigParser parser(getExecutablePath().remove_filename().string() += "osp_config.json"); try { @@ -56,17 +55,16 @@ int main(int argc, char *argv[]) { } for (auto &instance : parser.instances) { - BspInstance bsp_instance; std::string filename_graph = instance.second.get_child("graphFile").get_value(); - std::string name_graph = filename_graph.substr(filename_graph.rfind("/") + 1, - filename_graph.rfind(".") - filename_graph.rfind("/") - 1); + std::string name_graph + = filename_graph.substr(filename_graph.rfind("/") + 1, filename_graph.rfind(".") - filename_graph.rfind("/") - 1); std::string filename_machine = instance.second.get_child("machineParamsFile").get_value(); - std::string name_machine = filename_machine.substr( - filename_machine.rfind("/") + 1, filename_machine.rfind(".") - filename_machine.rfind("/") - 1); + std::string name_machine = filename_machine.substr(filename_machine.rfind("/") + 1, + filename_machine.rfind(".") - filename_machine.rfind("/") - 1); bool status_architecture = file_reader::readBspArchitecture(filename_machine, bsp_instance.getArchitecture()); @@ -75,7 +73,7 @@ int main(int argc, char *argv[]) { continue; } - bool status_graph = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag()); + bool status_graph = file_reader::readGraph(filename_graph, bsp_instance.getComputationalDag()); if (!status_graph) { std::cerr << "Reading graph files " + filename_graph << " failed." << std::endl; continue; @@ -93,7 +91,6 @@ int main(int argc, char *argv[]) { size_t algorithm_counter = 0; for (auto &algorithm : parser.scheduler) { - schedulers_name[algorithm_counter] = algorithm.second.get_child("name").get_value(); const auto start_time = std::chrono::high_resolution_clock::now(); @@ -105,48 +102,45 @@ int main(int argc, char *argv[]) { return_status = run_bsp_scheduler(parser, algorithm.second, schedule); } catch (...) { schedulers_failed[algorithm_counter] = true; - std::cerr << "Error during execution of Scheduler " + - algorithm.second.get_child("name").get_value() + "." + std::cerr << "Error during execution of Scheduler " + algorithm.second.get_child("name").get_value() + + "." << std::endl; continue; } const auto finish_time = std::chrono::high_resolution_clock::now(); - schedulers_compute_time[algorithm_counter] = - std::chrono::duration_cast(finish_time - start_time).count(); + schedulers_compute_time[algorithm_counter] + = std::chrono::duration_cast(finish_time - start_time).count(); if (return_status != RETURN_STATUS::OSP_SUCCESS && return_status != RETURN_STATUS::BEST_FOUND) { - schedulers_failed[algorithm_counter] = true; if (return_status == RETURN_STATUS::ERROR) { - std::cerr << "Error while computing schedule " + - algorithm.second.get_child("name").get_value() + "." + std::cerr << "Error while computing schedule " + algorithm.second.get_child("name").get_value() + + "." << std::endl; } else if (return_status == RETURN_STATUS::TIMEOUT) { - std::cerr << "Timeout while computing schedule " + - algorithm.second.get_child("name").get_value() + "." + std::cerr << "Timeout while computing schedule " + algorithm.second.get_child("name").get_value() + + "." << std::endl; } else { - std::cerr << "Unknown return status while computing schedule " + - algorithm.second.get_child("name").get_value() + "." + std::cerr << "Unknown return status while computing schedule " + + algorithm.second.get_child("name").get_value() + "." << std::endl; } } else { - schedulers_costs[algorithm_counter] = BspScheduleCS(schedule).computeCosts(); schedulers_work_costs[algorithm_counter] = schedule.computeWorkCosts(); schedulers_supersteps[algorithm_counter] = schedule.numberOfSupersteps(); if (parser.global_params.get_child("outputSchedule").get_value()) { try { - - file_writer::write_txt(name_graph + "_" + name_machine + "_" + - algorithm.second.get_child("name").get_value() + - "_schedule.txt", schedule); + file_writer::write_txt(name_graph + "_" + name_machine + "_" + + algorithm.second.get_child("name").get_value() + "_schedule.txt", + schedule); } catch (std::exception &e) { - std::cerr << "Writing schedule file for " + name_graph + ", " + name_machine + ", " + - schedulers_name[algorithm_counter] + " has failed." + std::cerr << "Writing schedule file for " + name_graph + ", " + name_machine + ", " + + schedulers_name[algorithm_counter] + " has failed." << std::endl; std::cerr << e.what() << std::endl; } @@ -154,12 +148,13 @@ int main(int argc, char *argv[]) { if (parser.global_params.get_child("outputSankeySchedule").get_value()) { try { - file_writer::write_sankey(name_graph + "_" + name_machine + "_" + - algorithm.second.get_child("name").get_value() + - "_sankey.sankey", BspScheduleCS(schedule)); + file_writer::write_sankey(name_graph + "_" + name_machine + "_" + + algorithm.second.get_child("name").get_value() + + "_sankey.sankey", + BspScheduleCS(schedule)); } catch (std::exception &e) { - std::cerr << "Writing sankey file for " + name_graph + ", " + name_machine + ", " + - schedulers_name[algorithm_counter] + " has failed." + std::cerr << "Writing sankey file for " + name_graph + ", " + name_machine + ", " + + schedulers_name[algorithm_counter] + " has failed." << std::endl; std::cerr << e.what() << std::endl; } @@ -167,16 +162,15 @@ int main(int argc, char *argv[]) { if (parser.global_params.get_child("outputDotSchedule").get_value()) { try { - DotFileWriter sched_writer; - sched_writer.write_schedule(name_graph + "_" + name_machine + "_" + - algorithm.second.get_child("name").get_value() + - "_schedule.dot", + sched_writer.write_schedule(name_graph + "_" + name_machine + "_" + + algorithm.second.get_child("name").get_value() + + "_schedule.dot", schedule); } catch (std::exception &e) { - std::cerr << "Writing dot file for " + name_graph + ", " + name_machine + ", " + - schedulers_name[algorithm_counter] + " has failed." + std::cerr << "Writing dot file for " + name_graph + ", " + name_machine + ", " + + schedulers_name[algorithm_counter] + " has failed." << std::endl; std::cerr << e.what() << std::endl; } @@ -188,8 +182,9 @@ int main(int argc, char *argv[]) { int tw = 1, ww = 1, cw = 1, nsw = 1, ct = 1; for (size_t i = 0; i < parser.scheduler.size(); i++) { - if (schedulers_failed[i]) + if (schedulers_failed[i]) { continue; + } tw = std::max(tw, 1 + int(std::log10(schedulers_costs[i]))); ww = std::max(ww, 1 + int(std::log10(schedulers_work_costs[i]))); cw = std::max(cw, 1 + int(std::log10(schedulers_costs[i] - schedulers_work_costs[i]))); @@ -200,8 +195,8 @@ int main(int argc, char *argv[]) { std::vector ordering = sorting_arrangement(schedulers_costs); std::cout << std::endl << name_graph << " - " << name_machine << std::endl; - std::cout << "Number of Vertices: " + std::to_string(bsp_instance.getComputationalDag().num_vertices()) + - " Number of Edges: " + std::to_string(bsp_instance.getComputationalDag().num_edges()) + std::cout << "Number of Vertices: " + std::to_string(bsp_instance.getComputationalDag().num_vertices()) + + " Number of Edges: " + std::to_string(bsp_instance.getComputationalDag().num_edges()) << std::endl; for (size_t j = 0; j < parser.scheduler.size(); j++) { size_t i = j; @@ -213,8 +208,7 @@ int main(int argc, char *argv[]) { } else { std::cout << "total costs: " << std::right << std::setw(tw) << schedulers_costs[i] << " work costs: " << std::right << std::setw(ww) << schedulers_work_costs[i] - << " comm costs: " << std::right << std::setw(cw) - << schedulers_costs[i] - schedulers_work_costs[i] + << " comm costs: " << std::right << std::setw(cw) << schedulers_costs[i] - schedulers_work_costs[i] << " number of supersteps: " << std::right << std::setw(nsw) << schedulers_supersteps[i] << " compute time: " << std::right << std::setw(ct) << schedulers_compute_time[i] << "ms" << " scheduler: " << schedulers_name[i] << std::endl; diff --git a/apps/osp_turnus.cpp b/apps/osp_turnus.cpp index c8ba01f2..ea5f114d 100644 --- a/apps/osp_turnus.cpp +++ b/apps/osp_turnus.cpp @@ -20,12 +20,12 @@ limitations under the License. #include #include -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/auxiliary/io/general_file_reader.hpp" #include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" +#include "osp/auxiliary/io/general_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" using namespace osp; @@ -34,7 +34,6 @@ using mem_constr = persistent_transient_memory_constraint; // invoked upon program call int main(int argc, char *argv[]) { - if (argc != 5) { std::cout << "Usage: " << argv[0] << " " << std::endl; std::cout << "Available algorithms: bsp, etf, variance" << std::endl; @@ -63,25 +62,21 @@ int main(int argc, char *argv[]) { return 1; } - boost::algorithm::to_lower(algorithm_name); // modifies str + boost::algorithm::to_lower(algorithm_name); // modifies str BspSchedule bsp_schedule(bsp_instance); Scheduler *scheduler = nullptr; if (algorithm_name == "bsp") { - float max_percent_idle_processors = 0.2f; bool increase_parallelism_in_new_superstep = true; - scheduler = new GreedyBspScheduler( - max_percent_idle_processors, increase_parallelism_in_new_superstep); + scheduler = new GreedyBspScheduler(max_percent_idle_processors, increase_parallelism_in_new_superstep); } else if (algorithm_name == "etf") { - scheduler = new EtfScheduler(BL_EST); } else if (algorithm_name == "variance") { - const double max_percent_idle_processors = 0.0; const bool increase_parallelism_in_new_superstep = true; const double variance_power = 6.0; @@ -91,10 +86,15 @@ int main(int argc, char *argv[]) { const float bound_component_weight_percent = 4.0f; const float slack = 0.0f; - scheduler = new LightEdgeVariancePartitioner( - max_percent_idle_processors, variance_power, heavy_is_x_times_median, min_percent_components_retained, - bound_component_weight_percent, increase_parallelism_in_new_superstep, - max_priority_difference_percent, slack); + scheduler = new LightEdgeVariancePartitioner( + max_percent_idle_processors, + variance_power, + heavy_is_x_times_median, + min_percent_components_retained, + bound_component_weight_percent, + increase_parallelism_in_new_superstep, + max_priority_difference_percent, + slack); } else { std::cout << "Unknown algorithm: " << algorithm_name << std::endl; diff --git a/apps/sptrsv_test_suite.cpp b/apps/sptrsv_test_suite.cpp index c065e32b..47643a9b 100644 --- a/apps/sptrsv_test_suite.cpp +++ b/apps/sptrsv_test_suite.cpp @@ -19,6 +19,7 @@ limitations under the License. #include #include #include + #include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" #include "test_suite_runner/BspScheduleTestSuiteRunner.hpp" @@ -27,11 +28,10 @@ limitations under the License. using graph_t = osp::sparse_matrix_graph_int32_t; int main(int argc, char *argv[]) { - osp::BspScheduleTestSuiteRunner runner; return runner.run(argc, argv); return 0; } -#endif \ No newline at end of file +#endif diff --git a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp index f023f937..01d07714 100644 --- a/apps/test_suite_runner/AbstractTestSuiteRunner.hpp +++ b/apps/test_suite_runner/AbstractTestSuiteRunner.hpp @@ -18,6 +18,8 @@ limitations under the License. #pragma once +#include +#include #include #include #include @@ -34,23 +36,21 @@ limitations under the License. #include "osp/auxiliary/io/general_file_reader.hpp" #include "osp/auxiliary/return_status.hpp" #include "osp/bsp/model/BspInstance.hpp" -#include -#include // #define EIGEN_FOUND 1 #ifdef EIGEN_FOUND -#include -#include +# include +# include -#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" +# include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" #endif namespace osp { namespace pt = boost::property_tree; -template +template class AbstractTestSuiteRunner { protected: std::string executable_dir; @@ -60,8 +60,7 @@ class AbstractTestSuiteRunner { std::vector all_csv_headers; std::vector>> active_stats_modules; - std::string graph_dir_path, machine_dir_path, output_target_object_dir_path, log_file_path, - statistics_output_file_path; + std::string graph_dir_path, machine_dir_path, output_target_object_dir_path, log_file_path, statistics_output_file_path; bool write_target_object_to_file = false; unsigned time_limit_seconds = 0; @@ -71,35 +70,38 @@ class AbstractTestSuiteRunner { try { executable_dir = getExecutablePath().remove_filename().string(); time_limit_seconds = parser.global_params.get_child("timeLimit").get_value(); - write_target_object_to_file = - parser.global_params.get_child("outputSchedule").get_value_optional().value_or(false); + write_target_object_to_file + = parser.global_params.get_child("outputSchedule").get_value_optional().value_or(false); graph_dir_path = parser.global_params.get_child("graphDirectory").get_value(); - if (graph_dir_path.substr(0, 1) != "/") + if (graph_dir_path.substr(0, 1) != "/") { graph_dir_path = executable_dir + graph_dir_path; + } machine_dir_path = parser.global_params.get_child("archDirectory").get_value(); - if (machine_dir_path.substr(0, 1) != "/") + if (machine_dir_path.substr(0, 1) != "/") { machine_dir_path = executable_dir + machine_dir_path; + } if (write_target_object_to_file) { - output_target_object_dir_path = parser.global_params.get_child("scheduleDirectory") - .get_value(); - if (output_target_object_dir_path.substr(0, 1) != "/") + output_target_object_dir_path = parser.global_params.get_child("scheduleDirectory").get_value(); + if (output_target_object_dir_path.substr(0, 1) != "/") { output_target_object_dir_path = executable_dir + output_target_object_dir_path; + } if (!output_target_object_dir_path.empty() && !std::filesystem::exists(output_target_object_dir_path)) { std::filesystem::create_directories(output_target_object_dir_path); } } log_file_path = parser.global_params.get_child("outputLogFile").get_value(); - if (log_file_path.substr(0, 1) != "/") + if (log_file_path.substr(0, 1) != "/") { log_file_path = executable_dir + log_file_path; + } - statistics_output_file_path = - parser.global_params.get_child("outputStatsFile").get_value(); - if (statistics_output_file_path.substr(0, 1) != "/") + statistics_output_file_path = parser.global_params.get_child("outputStatsFile").get_value(); + if (statistics_output_file_path.substr(0, 1) != "/") { statistics_output_file_path = executable_dir + statistics_output_file_path; + } return true; } catch (const std::exception &e) { @@ -130,8 +132,7 @@ class AbstractTestSuiteRunner { } } - all_csv_headers.insert(all_csv_headers.end(), unique_module_metric_headers.begin(), - unique_module_metric_headers.end()); + all_csv_headers.insert(all_csv_headers.end(), unique_module_metric_headers.begin(), unique_module_metric_headers.end()); std::filesystem::path stats_p(statistics_output_file_path); if (stats_p.has_parent_path() && !std::filesystem::exists(stats_p.parent_path())) { @@ -155,10 +156,8 @@ class AbstractTestSuiteRunner { stats_out_stream.open(statistics_output_file_path, std::ios_base::app); if (!stats_out_stream.is_open()) { - log_stream << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path - << std::endl; - std::cerr << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path - << std::endl; + log_stream << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path << std::endl; + std::cerr << "CRITICAL ERROR: Could not open statistics output file: " << statistics_output_file_path << std::endl; } else if (!file_exists_and_has_header) { for (size_t i = 0; i < all_csv_headers.size(); ++i) { stats_out_stream << all_csv_headers[i] << (i == all_csv_headers.size() - 1 ? "" : ","); @@ -168,28 +167,30 @@ class AbstractTestSuiteRunner { } } - virtual RETURN_STATUS compute_target_object_impl(const BspInstance &instance, std::unique_ptr &target_object, + virtual RETURN_STATUS compute_target_object_impl(const BspInstance &instance, + std::unique_ptr &target_object, const pt::ptree &algo_config, - long long &computation_time_ms) = 0; + long long &computation_time_ms) + = 0; virtual void create_and_register_statistic_modules(const std::string &module_name) = 0; - virtual void write_target_object_hook(const TargetObjectType &, const std::string &, const std::string &, - const std::string &) { - } // default in case TargetObjectType cannot be written to file + virtual void write_target_object_hook(const TargetObjectType &, const std::string &, const std::string &, const std::string &) { + } // default in case TargetObjectType cannot be written to file public: AbstractTestSuiteRunner() {} virtual ~AbstractTestSuiteRunner() { - if (log_stream.is_open()) + if (log_stream.is_open()) { log_stream.close(); - if (stats_out_stream.is_open()) + } + if (stats_out_stream.is_open()) { stats_out_stream.close(); + } } int run(int argc, char *argv[]) { - try { parser.parse_args(argc, argv); } catch (const std::exception &e) { @@ -197,8 +198,9 @@ class AbstractTestSuiteRunner { return 1; } - if (!parse_common_config()) + if (!parse_common_config()) { return 1; + } setup_log_file(); @@ -229,8 +231,9 @@ class AbstractTestSuiteRunner { } std::string filename_machine = machine_entry.path().string(); std::string name_machine = filename_machine.substr(filename_machine.rfind('/') + 1); - if (name_machine.rfind('.') != std::string::npos) + if (name_machine.rfind('.') != std::string::npos) { name_machine = name_machine.substr(0, name_machine.rfind('.')); + } BspArchitecture arch; if (!file_reader::readBspArchitecture(filename_machine, arch)) { @@ -246,16 +249,18 @@ class AbstractTestSuiteRunner { } std::string filename_graph = graph_entry.path().string(); std::string name_graph = filename_graph.substr(filename_graph.rfind('/') + 1); - if (name_graph.rfind('.') != std::string::npos) + if (name_graph.rfind('.') != std::string::npos) { name_graph = name_graph.substr(0, name_graph.rfind('.')); + } log_stream << "Start Graph: " + filename_graph + "\n"; BspInstance bsp_instance; bsp_instance.getArchitecture() = arch; bool graph_status = false; std::string ext; - if (filename_graph.rfind('.') != std::string::npos) + if (filename_graph.rfind('.') != std::string::npos) { ext = filename_graph.substr(filename_graph.rfind('.') + 1); + } #ifdef EIGEN_FOUND @@ -268,7 +273,8 @@ class AbstractTestSuiteRunner { SM_csc_int32 L_csc_int32{}; SM_csc_int64 L_csc_int64{}; - if constexpr (std::is_same_v || std::is_same_v) { + if constexpr (std::is_same_v + || std::is_same_v) { if (ext != "mtx") { log_stream << "Error: Only .mtx file is accepted for SpTRSV" << std::endl; return 0; @@ -285,7 +291,6 @@ class AbstractTestSuiteRunner { L_csc_int32 = L_csr_int32; bsp_instance.getComputationalDag().setCSC(&L_csc_int32); } else { - graph_status = Eigen::loadMarket(L_csr_int64, filename_graph); if (!graph_status) { std::cerr << "Failed to read matrix from " << filename_graph << std::endl; @@ -317,13 +322,15 @@ class AbstractTestSuiteRunner { long long computation_time_ms; std::unique_ptr target_object; - RETURN_STATUS exec_status = compute_target_object_impl(bsp_instance, target_object, algo_config, computation_time_ms); + RETURN_STATUS exec_status + = compute_target_object_impl(bsp_instance, target_object, algo_config, computation_time_ms); if (exec_status != RETURN_STATUS::OSP_SUCCESS && exec_status != RETURN_STATUS::BEST_FOUND) { - if (exec_status == RETURN_STATUS::ERROR) + if (exec_status == RETURN_STATUS::ERROR) { log_stream << "Error computing with " << current_algo_name << "." << std::endl; - else if (exec_status == RETURN_STATUS::TIMEOUT) + } else if (exec_status == RETURN_STATUS::TIMEOUT) { log_stream << "Scheduler " << current_algo_name << " timed out." << std::endl; + } continue; } @@ -331,8 +338,8 @@ class AbstractTestSuiteRunner { try { write_target_object_hook(*target_object, name_graph, name_machine, current_algo_name); } catch (const std::exception &e) { - log_stream << "Writing target object file for " << name_graph << ", " << name_machine - << ", " << current_algo_name << " has failed: " << e.what() << std::endl; + log_stream << "Writing target object file for " << name_graph << ", " << name_machine << ", " + << current_algo_name << " has failed: " << e.what() << std::endl; } } @@ -361,4 +368,4 @@ class AbstractTestSuiteRunner { } }; -} // namespace osp +} // namespace osp diff --git a/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp b/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp index 16afc890..dd161798 100644 --- a/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp +++ b/apps/test_suite_runner/BspScheduleRecompTestSuiteRunner.hpp @@ -25,29 +25,28 @@ limitations under the License. #include "StatsModules/GraphStatsModule.hpp" #include "StringToScheduler/run_bsp_recomp_scheduler.hpp" #include "StringToScheduler/run_bsp_scheduler.hpp" +#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/model/BspScheduleRecomp.hpp" #include "osp/bsp/model/IBspScheduleEval.hpp" -#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" namespace osp { -template -class BspScheduleRecompTestSuiteRunner - : public AbstractTestSuiteRunner, concrete_graph_t> { +template +class BspScheduleRecompTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t> { private: bool use_memory_constraint_for_bsp; protected: - RETURN_STATUS compute_target_object_impl(const BspInstance &instance, std::unique_ptr>& schedule, const pt::ptree &algo_config, + RETURN_STATUS compute_target_object_impl(const BspInstance &instance, + std::unique_ptr> &schedule, + const pt::ptree &algo_config, long long &computation_time_ms) override { - std::string algo_name = algo_config.get_child("id").get_value(); const std::set scheduler_names = get_available_bsp_scheduler_names(); const std::set scheduler_recomp_names = get_available_bsp_recomp_scheduler_names(); if (scheduler_names.find(algo_name) != scheduler_names.end()) { - auto bsp_schedule = std::make_unique>(instance); const auto start_time = std::chrono::high_resolution_clock::now(); @@ -55,15 +54,13 @@ class BspScheduleRecompTestSuiteRunner RETURN_STATUS status = run_bsp_scheduler(this->parser, algo_config, *bsp_schedule); const auto finish_time = std::chrono::high_resolution_clock::now(); - computation_time_ms = - std::chrono::duration_cast(finish_time - start_time).count(); + computation_time_ms = std::chrono::duration_cast(finish_time - start_time).count(); schedule = std::move(bsp_schedule); return status; } else if (scheduler_recomp_names.find(algo_name) != scheduler_recomp_names.end()) { - auto bsp_recomp_schedule = std::make_unique>(instance); const auto start_time = std::chrono::high_resolution_clock::now(); @@ -71,14 +68,12 @@ class BspScheduleRecompTestSuiteRunner RETURN_STATUS status = run_bsp_recomp_scheduler(this->parser, algo_config, *bsp_recomp_schedule); const auto finish_time = std::chrono::high_resolution_clock::now(); - computation_time_ms = - std::chrono::duration_cast(finish_time - start_time).count(); + computation_time_ms = std::chrono::duration_cast(finish_time - start_time).count(); schedule = std::move(bsp_recomp_schedule); return status; } else { - std::cerr << "No matching category found for algorithm" << std::endl; return RETURN_STATUS::ERROR; } @@ -86,11 +81,9 @@ class BspScheduleRecompTestSuiteRunner void create_and_register_statistic_modules(const std::string &module_name) override { if (module_name == "BasicBspStats") { - this->active_stats_modules.push_back( - std::make_unique>>()); + this->active_stats_modules.push_back(std::make_unique>>()); } else if (module_name == "GraphStats") { - this->active_stats_modules.push_back( - std::make_unique>>()); + this->active_stats_modules.push_back(std::make_unique>>()); } } @@ -107,4 +100,4 @@ class BspScheduleRecompTestSuiteRunner BspScheduleRecompTestSuiteRunner() : AbstractTestSuiteRunner, concrete_graph_t>() {} }; -} // namespace osp +} // namespace osp diff --git a/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp b/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp index ba9bac6e..d1338e4c 100644 --- a/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp +++ b/apps/test_suite_runner/BspScheduleTestSuiteRunner.hpp @@ -19,25 +19,24 @@ limitations under the License. #pragma once #include "AbstractTestSuiteRunner.hpp" -#include "osp/bsp/model/BspSchedule.hpp" -#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" -#include "StringToScheduler/run_bsp_scheduler.hpp" #include "StatsModules/BasicBspStatsModule.hpp" #include "StatsModules/BspCommStatsModule.hpp" #include "StatsModules/BspSptrsvStatsModule.hpp" #include "StatsModules/GraphStatsModule.hpp" +#include "StringToScheduler/run_bsp_scheduler.hpp" +#include "osp/auxiliary/io/bsp_schedule_file_writer.hpp" +#include "osp/bsp/model/BspSchedule.hpp" namespace osp { -template +template class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t> { private: - protected: - RETURN_STATUS compute_target_object_impl(const BspInstance &instance, std::unique_ptr>& schedule, - const pt::ptree &algo_config, - long long &computation_time_ms) override { - + RETURN_STATUS compute_target_object_impl(const BspInstance &instance, + std::unique_ptr> &schedule, + const pt::ptree &algo_config, + long long &computation_time_ms) override { schedule = std::make_unique>(instance); const auto start_time = std::chrono::high_resolution_clock::now(); @@ -59,13 +58,14 @@ class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunneractive_stats_modules.push_back(std::make_unique>>(NO_PERMUTE)); } else if (module_name == "BspSptrsvPermLoopProcessorsStats") { - this->active_stats_modules.push_back(std::make_unique>>(LOOP_PROCESSORS)); + this->active_stats_modules.push_back( + std::make_unique>>(LOOP_PROCESSORS)); } else if (module_name == "BspSptrsvPermSnakeProcessorsStats") { - this->active_stats_modules.push_back(std::make_unique>>(SNAKE_PROCESSORS)); + this->active_stats_modules.push_back( + std::make_unique>>(SNAKE_PROCESSORS)); #endif } else if (module_name == "GraphStats") { - this->active_stats_modules.push_back( - std::make_unique>>()); + this->active_stats_modules.push_back(std::make_unique>>()); } } @@ -78,8 +78,7 @@ class BspScheduleTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t>() {} + BspScheduleTestSuiteRunner() : AbstractTestSuiteRunner, concrete_graph_t>() {} }; -} // namespace osp +} // namespace osp diff --git a/apps/test_suite_runner/ConfigParser.hpp b/apps/test_suite_runner/ConfigParser.hpp index f362e424..1750ff9f 100644 --- a/apps/test_suite_runner/ConfigParser.hpp +++ b/apps/test_suite_runner/ConfigParser.hpp @@ -66,7 +66,6 @@ struct ConfigParser { } void add_algorithm(std::string name) { - bool algorithm_found = false; std::string algorithm_identifier = name; @@ -75,7 +74,6 @@ struct ConfigParser { } for (auto &algorithm : scheduler_config) { - std::string alg_name = algorithm.second.get_child("name").get_value(); if (alg_name == algorithm_identifier) { @@ -90,21 +88,17 @@ struct ConfigParser { } void parse_config_file(std::string filename) { - pt::ptree loadPtreeRoot; pt::read_json(filename, loadPtreeRoot); global_params = loadPtreeRoot.get_child("globalParameters"); - + try { instances = loadPtreeRoot.get_child("inputInstances"); - } catch (const pt::ptree_bad_path &e) { - - } + } catch (const pt::ptree_bad_path &e) {} pt::ptree scheduler_config_parse = loadPtreeRoot.get_child("algorithms"); for (auto &algorithm : scheduler_config_parse) { - if (algorithm.second.get_child("run").get_value()) { scheduler.push_back(algorithm); } @@ -113,17 +107,15 @@ struct ConfigParser { public: ConfigParser() = default; + ConfigParser(std::string main_config_file_) : main_config_file(main_config_file_), has_config_file(true) {} void parse_args(const int argc, const char *const argv[]) { - if (has_config_file) { - if (argc < 3) { usage(); throw std::invalid_argument("Parameter error: not enough parameters specified.\n"); } else if (std::string(argv[1]) == "--config") { - std::string config_file = argv[2]; if (config_file.empty() || config_file.substr(config_file.size() - 5) != ".json") { throw std::invalid_argument("Parameter error: config file ending is not \".json\".\n"); @@ -140,10 +132,19 @@ struct ConfigParser { throw std::invalid_argument("Parameter error: config file does not specify global parameters!\n"); } } else { - - const std::set parameters_requiring_value( - {"--config", "--inputDag", "--g", "-inputDag", "-g", "--timeLimit", "--t", "-timeLimit", "-t", - "--inputMachine", "--m", "-inputMachine", "-m"}); + const std::set parameters_requiring_value({"--config", + "--inputDag", + "--g", + "-inputDag", + "-g", + "--timeLimit", + "--t", + "-timeLimit", + "-t", + "--inputMachine", + "--m", + "-inputMachine", + "-m"}); pt::ptree loadPtreeRoot; pt::read_json(main_config_file, loadPtreeRoot); @@ -159,8 +160,8 @@ struct ConfigParser { for (int i = 1; i < argc; ++i) { // Check parameters that require an argument afterwards if (parameters_requiring_value.count(argv[i]) == 1 && i + 1 >= argc) { - throw std::invalid_argument("Parameter error: no parameter value after the \"" + - std::string(argv[i]) + "\" option.\n"); + throw std::invalid_argument("Parameter error: no parameter value after the \"" + std::string(argv[i]) + + "\" option.\n"); } std::string flag = argv[i]; @@ -169,30 +170,30 @@ struct ConfigParser { usage(); throw std::invalid_argument("Parameter error: usage \"" + std::string(argv[i]) + "\".\n"); - } else if (std::string(flag) == "--timelimit" || std::string(flag) == "--t" || - std::string(flag) == "-t" || std::string(flag) == "-timelimit") { + } else if (std::string(flag) == "--timelimit" || std::string(flag) == "--t" || std::string(flag) == "-t" + || std::string(flag) == "-timelimit") { global_params.put("timeLimit", std::stoi(argv[++i])); - } else if (std::string(flag) == "--sankey" || std::string(flag) == "--s" || - std::string(flag) == "-s" || std::string(flag) == "-sankey") { + } else if (std::string(flag) == "--sankey" || std::string(flag) == "--s" || std::string(flag) == "-s" + || std::string(flag) == "-sankey") { global_params.put("outputSankeySchedule", true); - } else if (std::string(flag) == "--dot" || std::string(flag) == "--d" || - std::string(flag) == "-d" || std::string(flag) == "-dot") { + } else if (std::string(flag) == "--dot" || std::string(flag) == "--d" || std::string(flag) == "-d" + || std::string(flag) == "-dot") { global_params.put("outputDotSchedule", true); - } else if (std::string(flag) == "--inputDag" || std::string(flag) == "--g" || - std::string(flag) == "-inputDag" || std::string(flag) == "-g") { + } else if (std::string(flag) == "--inputDag" || std::string(flag) == "--g" || std::string(flag) == "-inputDag" + || std::string(flag) == "-g") { instance.put("graphFile", argv[++i]); graph_specified = true; - } else if (std::string(flag) == "--inputMachine" || std::string(flag) == "--m" || - std::string(flag) == "-inputMachine" || std::string(flag) == "-m") { + } else if (std::string(flag) == "--inputMachine" || std::string(flag) == "--m" + || std::string(flag) == "-inputMachine" || std::string(flag) == "-m") { instance.put("machineParamsFile", argv[++i]); machine_specified = true; - } else if (std::string(flag) == "--output" || std::string(flag) == "--o" || - std::string(flag) == "-output" || std::string(flag) == "-o") { + } else if (std::string(flag) == "--output" || std::string(flag) == "--o" || std::string(flag) == "-output" + || std::string(flag) == "-o") { global_params.put("outputSchedule", true); } else { add_algorithm(flag); @@ -210,16 +211,13 @@ struct ConfigParser { instances.push_back(std::make_pair("", instance)); } } else { - if (argc < 3 || std::string(argv[1]) != "--config") { - std::cout << "Usage: read config file: \n" << " --config *.json \t\tSpecify config .json file.\n"; throw std::invalid_argument("Parameter error: not enough parameters specified.\n"); } else { - std::string config_file = argv[2]; if (config_file.empty() || config_file.substr(config_file.size() - 5) != ".json") { throw std::invalid_argument("Parameter error: config file ending is not \".json\".\n"); diff --git a/apps/test_suite_runner/PebblingTestSuiteRunner.hpp b/apps/test_suite_runner/PebblingTestSuiteRunner.hpp index abbfe998..2cbcfc5d 100644 --- a/apps/test_suite_runner/PebblingTestSuiteRunner.hpp +++ b/apps/test_suite_runner/PebblingTestSuiteRunner.hpp @@ -19,13 +19,13 @@ limitations under the License. #pragma once #include "AbstractTestSuiteRunner.hpp" -#include "osp/pebbling/PebblingSchedule.hpp" -#include "StringToScheduler/run_pebbler.hpp" #include "StatsModules/IStatsModule.hpp" +#include "StringToScheduler/run_pebbler.hpp" +#include "osp/pebbling/PebblingSchedule.hpp" namespace osp { -template +template class BasicPebblingStatsModule : public IStatisticModule> { public: private: @@ -44,17 +44,18 @@ class BasicPebblingStatsModule : public IStatisticModule +template class PebblingTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t> { private: bool use_memory_constraint; protected: - RETURN_STATUS compute_target_object_impl(const BspInstance &instance, std::unique_ptr>& schedule, const pt::ptree &algo_config, + RETURN_STATUS compute_target_object_impl(const BspInstance &instance, + std::unique_ptr> &schedule, + const pt::ptree &algo_config, long long &computation_time_ms) override { - schedule = std::make_unique>(instance); - + const auto start_time = std::chrono::high_resolution_clock::now(); RETURN_STATUS status = run_pebbler(this->parser, algo_config, *schedule); @@ -84,4 +85,4 @@ class PebblingTestSuiteRunner : public AbstractTestSuiteRunner, concrete_graph_t>() {} }; -} // namespace osp +} // namespace osp diff --git a/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp b/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp index 59d6d457..21229567 100644 --- a/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/BasicBspStatsModule.hpp @@ -18,43 +18,36 @@ limitations under the License. #pragma once +#include #include #include -#include #include "IStatsModule.hpp" #include "osp/bsp/model/IBspScheduleEval.hpp" -#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t +#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t namespace osp { -template -class BasicBspStatsModule : public IStatisticModule { -public: - -private: - const std::vector metric_headers = { - "BspCost", "WorkCost", "CommCost", "Supersteps" - }; +template +class BasicBspStatsModule : public IStatisticModule { + public: + private: + const std::vector metric_headers = {"BspCost", "WorkCost", "CommCost", "Supersteps"}; -public: - - std::vector get_metric_headers() const override { - return metric_headers; - } + public: + std::vector get_metric_headers() const override { return metric_headers; } - std::map record_statistics( - const TargetObjectType& schedule, - std::ofstream& /*log_stream*/) const override { + std::map record_statistics(const TargetObjectType &schedule, + std::ofstream & /*log_stream*/) const override { std::map stats; const auto bsp_cost = schedule.computeCosts(); const auto work_cost = schedule.computeWorkCosts(); stats["BspCost"] = std::to_string(bsp_cost); stats["WorkCost"] = std::to_string(work_cost); - stats["CommCost"] = std::to_string(bsp_cost - work_cost); + stats["CommCost"] = std::to_string(bsp_cost - work_cost); stats["Supersteps"] = std::to_string(schedule.numberOfSupersteps()); return stats; } }; -} // namespace osp +} // namespace osp diff --git a/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp b/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp index 7f1066ee..83f6f1b9 100644 --- a/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/BspCommStatsModule.hpp @@ -18,32 +18,29 @@ limitations under the License. #pragma once +#include +#include +#include + #include "IStatsModule.hpp" -#include "osp/bsp/model/BspSchedule.hpp" // Still needed +#include "osp/bsp/model/BspSchedule.hpp" // Still needed #include "osp/bsp/model/cost/BufferedSendingCost.hpp" #include "osp/bsp/model/cost/TotalCommunicationCost.hpp" #include "osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp" -#include -#include -#include namespace osp { -template +template class BspCommStatsModule : public IStatisticModule> { public: private: - const std::vector metric_headers = { - "TotalCommCost", "TotalLambdaCommCost", "BufferedSendingCosts"}; + const std::vector metric_headers = {"TotalCommCost", "TotalLambdaCommCost", "BufferedSendingCosts"}; public: - std::vector get_metric_headers() const override { - return metric_headers; - } + std::vector get_metric_headers() const override { return metric_headers; } - std::map record_statistics( - const BspSchedule &schedule, - std::ofstream & /*log_stream*/) const override { + std::map record_statistics(const BspSchedule &schedule, + std::ofstream & /*log_stream*/) const override { std::map stats; stats["TotalCommCost"] = std::to_string(TotalCommunicationCost()(schedule)); stats["TotalLambdaCommCost"] = std::to_string(TotalLambdaCommunicationCost()(schedule)); @@ -52,4 +49,4 @@ class BspCommStatsModule : public IStatisticModule> { } }; -} // namespace osp +} // namespace osp diff --git a/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp b/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp index 8839ba39..e2b650d2 100644 --- a/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/BspSptrsvStatsModule.hpp @@ -19,33 +19,38 @@ limitations under the License. #pragma once #ifdef EIGEN_FOUND -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "IStatsModule.hpp" -#include "osp/bsp/model/BspSchedule.hpp" -#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t -#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" -#include "osp/auxiliary/sptrsv_simulator/sptrsv.hpp" -#include "osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp" +# include + +# include +# include +# include +# include +# include +# include +# include +# include +# include + +# include "IStatsModule.hpp" +# include "osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp" +# include "osp/auxiliary/sptrsv_simulator/sptrsv.hpp" +# include "osp/bsp/model/BspSchedule.hpp" +# include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t +# include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" namespace osp { // Turn permutation mode into a human-readable prefix used in metric names -inline const char* mode_tag(SCHEDULE_NODE_PERMUTATION_MODES m) { +inline const char *mode_tag(SCHEDULE_NODE_PERMUTATION_MODES m) { switch (m) { - case NO_PERMUTE: return "NoPermute_"; - case LOOP_PROCESSORS: return "LoopProc_"; - case SNAKE_PROCESSORS: return "SnakeProc_"; - default: return "Unknown_"; + case NO_PERMUTE: + return "NoPermute_"; + case LOOP_PROCESSORS: + return "LoopProc_"; + case SNAKE_PROCESSORS: + return "SnakeProc_"; + default: + return "Unknown_"; } } @@ -56,9 +61,9 @@ bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { assert(v1.size() == v2.size()); bool same = true; const double epsilon = 1e-10; - for (long long int i=0; i < v1.size(); ++i){ - //std::cout << "Ind: " << i << ": | " << v1[i] << " - " << v2[i] << " | = " << abs(v1[i]-v2[i]) << "\n"; - if( std::abs(v1[i] - v2[i]) / (std::abs(v1[i]) + std::abs(v2[i]) + epsilon) > epsilon ){ + for (long long int i = 0; i < v1.size(); ++i) { + // std::cout << "Ind: " << i << ": | " << v1[i] << " - " << v2[i] << " | = " << abs(v1[i]-v2[i]) << "\n"; + if (std::abs(v1[i] - v2[i]) / (std::abs(v1[i]) + std::abs(v2[i]) + epsilon) > epsilon) { std::cout << "We have differences in the matrix in position: " << i << std::endl; std::cout << v1[i] << " , " << v2[i] << std::endl; same = false; @@ -68,49 +73,40 @@ bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { return same; } -template +template class BspSptrsvStatsModule : public IStatisticModule { -public: - explicit BspSptrsvStatsModule(SCHEDULE_NODE_PERMUTATION_MODES _mode = NO_PERMUTE) - : mode(_mode) {} + public: + explicit BspSptrsvStatsModule(SCHEDULE_NODE_PERMUTATION_MODES _mode = NO_PERMUTE) : mode(_mode) {} - std::vector get_metric_headers() const override { const std::string prefix = mode_tag(mode); - return { - prefix + "SpTrSV_Runtime_Geomean(ns)", - prefix + "SpTrSV_Runtime_Stddev", - prefix + "SpTrSV_Runtime_Q25(ns)", - prefix + "SpTrSV_Runtime_Q75(ns)" - }; + return {prefix + "SpTrSV_Runtime_Geomean(ns)", + prefix + "SpTrSV_Runtime_Stddev", + prefix + "SpTrSV_Runtime_Q25(ns)", + prefix + "SpTrSV_Runtime_Q75(ns)"}; } - std::map record_statistics( - const TargetObjectType& schedule, - std::ofstream&) const override { + std::map record_statistics(const TargetObjectType &schedule, std::ofstream &) const override { std::map stats; - if constexpr ( - std::is_same_v>> || - std::is_same_v>> - ) { - using index_t = std::conditional_t< - std::is_same_v>>, - int32_t, int64_t>; + if constexpr (std::is_same_v>> + || std::is_same_v>>) { + using index_t + = std::conditional_t>>, int32_t, int64_t>; auto instance = schedule.getInstance(); Sptrsv sim{instance}; std::vector perm; - if (mode == NO_PERMUTE){ + if (mode == NO_PERMUTE) { sim.setup_csr_no_permutation(schedule); } else if (mode == LOOP_PROCESSORS) { perm = schedule_node_permuter_basic(schedule, LOOP_PROCESSORS); - sim.setup_csr_with_permutation (schedule, perm); + sim.setup_csr_with_permutation(schedule, perm); } else if (mode == SNAKE_PROCESSORS) { perm = schedule_node_permuter_basic(schedule, SNAKE_PROCESSORS); - sim.setup_csr_with_permutation (schedule, perm); + sim.setup_csr_with_permutation(schedule, perm); } else { std::cout << "Wrong type of permutation provided" << std::endl; } @@ -132,13 +128,13 @@ class BspSptrsvStatsModule : public IStatisticModule { L_x_osp.setZero(); sim.x = &L_x_osp[0]; sim.b = &L_b_osp[0]; - std::chrono::_V2::system_clock::time_point start,end; - - if (mode == NO_PERMUTE){ + std::chrono::_V2::system_clock::time_point start, end; + + if (mode == NO_PERMUTE) { start = std::chrono::high_resolution_clock::now(); sim.lsolve_no_permutation(); end = std::chrono::high_resolution_clock::now(); - } else{ + } else { start = std::chrono::high_resolution_clock::now(); sim.lsolve_with_permutation(); end = std::chrono::high_resolution_clock::now(); @@ -149,17 +145,17 @@ class BspSptrsvStatsModule : public IStatisticModule { } // Geometric mean (requires conversion to double) - double total_log = std::accumulate(times_ns.begin(), times_ns.end(), 0.0, - [](double sum, long long val) { return sum + std::log(static_cast(val)); }); + double total_log = std::accumulate(times_ns.begin(), times_ns.end(), 0.0, [](double sum, long long val) { + return sum + std::log(static_cast(val)); + }); long long geom_mean = static_cast(std::exp(total_log / runs)); // Standard deviation double mean = std::accumulate(times_ns.begin(), times_ns.end(), 0.0) / runs; - double sq_sum = std::accumulate(times_ns.begin(), times_ns.end(), 0.0, - [mean](double acc, long long val) { - double diff = static_cast(val) - mean; - return acc + diff * diff; - }); + double sq_sum = std::accumulate(times_ns.begin(), times_ns.end(), 0.0, [mean](double acc, long long val) { + double diff = static_cast(val) - mean; + return acc + diff * diff; + }); long long stddev = static_cast(std::sqrt(sq_sum / runs)); // Quartiles @@ -168,25 +164,23 @@ class BspSptrsvStatsModule : public IStatisticModule { long long q75 = times_ns[3 * runs / 4]; auto to_str = [](long long value) { - return std::to_string(value); // no decimal points + return std::to_string(value); // no decimal points }; // Permute back if needed if (mode != NO_PERMUTE) { sim.permute_x_vector(perm); - } - + } if (!compare_vectors(L_x_ref, L_x_osp)) { std::cout << "Output is not equal" << std::endl; } - const std::string prefix = mode_tag(mode); stats[prefix + "SpTrSV_Runtime_Geomean(ns)"] = to_str(geom_mean); - stats[prefix + "SpTrSV_Runtime_Stddev"] = to_str(stddev); - stats[prefix + "SpTrSV_Runtime_Q25(ns)"] = to_str(q25); - stats[prefix + "SpTrSV_Runtime_Q75(ns)"] = to_str(q75); + stats[prefix + "SpTrSV_Runtime_Stddev"] = to_str(stddev); + stats[prefix + "SpTrSV_Runtime_Q25(ns)"] = to_str(q25); + stats[prefix + "SpTrSV_Runtime_Q75(ns)"] = to_str(q75); } else { std::cout << "Simulation is not available without the SparseMatrix type" << std::endl; @@ -195,11 +189,11 @@ class BspSptrsvStatsModule : public IStatisticModule { return stats; } - private: + private: SCHEDULE_NODE_PERMUTATION_MODES mode; - static constexpr int runs = 100; // number of runs for benchmarking + static constexpr int runs = 100; // number of runs for benchmarking }; -} // namespace osp +} // namespace osp -#endif \ No newline at end of file +#endif diff --git a/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp b/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp index 6ed263de..09a3953c 100644 --- a/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/GraphStatsModule.hpp @@ -18,42 +18,37 @@ limitations under the License. #pragma once -#include "IStatsModule.hpp" -#include "osp/bsp/model/IBspScheduleEval.hpp" -#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t +#include #include #include -#include - -namespace osp { -template -class GraphStatsModule : public IStatisticModule { -public: +#include "IStatsModule.hpp" +#include "osp/bsp/model/IBspScheduleEval.hpp" +#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" // For graph_t -private: - const std::vector metric_headers = { - "num_vertices", "num_edges", "avg_wavefront_size" - }; +namespace osp { -public: +template +class GraphStatsModule : public IStatisticModule { + public: + private: + const std::vector metric_headers = {"num_vertices", "num_edges", "avg_wavefront_size"}; - std::vector get_metric_headers() const override { - return metric_headers; - } + public: + std::vector get_metric_headers() const override { return metric_headers; } - std::map record_statistics( - const TargetObjectType& schedule, - std::ofstream& /*log_stream*/) const override { + std::map record_statistics(const TargetObjectType &schedule, + std::ofstream & /*log_stream*/) const override { std::map stats; const auto &graph = schedule.getInstance().getComputationalDag(); stats["num_vertices"] = std::to_string(graph.num_vertices()); stats["num_edges"] = std::to_string(graph.num_edges()); - stats["avg_wavefront_size"] = std::to_string(static_cast(graph.num_vertices()) / static_cast(longestPath(graph))); + stats["avg_wavefront_size"] + = std::to_string(static_cast(graph.num_vertices()) / static_cast(longestPath(graph))); return stats; } }; -} // namespace osp +} // namespace osp diff --git a/apps/test_suite_runner/StatsModules/IStatsModule.hpp b/apps/test_suite_runner/StatsModules/IStatsModule.hpp index dfef9049..46544af7 100644 --- a/apps/test_suite_runner/StatsModules/IStatsModule.hpp +++ b/apps/test_suite_runner/StatsModules/IStatsModule.hpp @@ -20,18 +20,19 @@ limitations under the License. #include #include +#include #include #include - #include + // #include "osp/bsp/model/BspSchedule.hpp" // TargetObject will be passed, no specific include here // Forward declarations to avoid circular dependencies -namespace osp { // Ensure this is within the osp namespace +namespace osp { // Ensure this is within the osp namespace namespace pt = boost::property_tree; -template -class IStatisticModule { // Changed from Graph_t_ to TargetObjectType +template +class IStatisticModule { // Changed from Graph_t_ to TargetObjectType public: virtual ~IStatisticModule() = default; @@ -40,9 +41,9 @@ class IStatisticModule { // Changed from Graph_t_ to TargetObjectType // Called for each generated target_object. // Returns a map of {header_name: value_string}. - virtual std::map record_statistics( - const TargetObjectType &target_object, // Changed parameter - std::ofstream &log_stream) const = 0; + virtual std::map record_statistics(const TargetObjectType &target_object, // Changed parameter + std::ofstream &log_stream) const + = 0; }; -} // namespace osp +} // namespace osp diff --git a/apps/test_suite_runner/StringToScheduler/get_coarser.hpp b/apps/test_suite_runner/StringToScheduler/get_coarser.hpp index 7c438833..4020a08e 100644 --- a/apps/test_suite_runner/StringToScheduler/get_coarser.hpp +++ b/apps/test_suite_runner/StringToScheduler/get_coarser.hpp @@ -28,35 +28,33 @@ limitations under the License. #include "../ConfigParser.hpp" #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" -#include "osp/coarser/Coarser.hpp" -#include "osp/coarser/funnel/FunnelBfs.hpp" #include "osp/coarser/BspScheduleCoarser.hpp" -#include "osp/coarser/hdagg/hdagg_coarser.hpp" +#include "osp/coarser/Coarser.hpp" #include "osp/coarser/MultilevelCoarser.hpp" #include "osp/coarser/Sarkar/Sarkar.hpp" #include "osp/coarser/Sarkar/SarkarMul.hpp" -#include "osp/coarser/top_order/top_order_coarser.hpp" -#include "osp/graph_algorithms/cuthill_mckee.hpp" #include "osp/coarser/SquashA/SquashA.hpp" #include "osp/coarser/SquashA/SquashAMul.hpp" +#include "osp/coarser/funnel/FunnelBfs.hpp" +#include "osp/coarser/hdagg/hdagg_coarser.hpp" +#include "osp/coarser/top_order/top_order_coarser.hpp" +#include "osp/graph_algorithms/cuthill_mckee.hpp" namespace osp { -template -std::unique_ptr> -get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coarser_algorithm) { - +template +std::unique_ptr> get_coarser_by_name(const ConfigParser &, + const boost::property_tree::ptree &coarser_algorithm) { const std::string coarser_name = coarser_algorithm.get_child("name").get_value(); if (coarser_name == "funnel") { typename FunnelBfs::FunnelBfs_parameters funnel_parameters; if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { const auto ¶ms_pt = params_opt.get(); - funnel_parameters.funnel_incoming = - params_pt.get_optional("funnel_incoming").value_or(funnel_parameters.funnel_incoming); - funnel_parameters.use_approx_transitive_reduction = - params_pt.get_optional("use_approx_transitive_reduction") - .value_or(funnel_parameters.use_approx_transitive_reduction); + funnel_parameters.funnel_incoming + = params_pt.get_optional("funnel_incoming").value_or(funnel_parameters.funnel_incoming); + funnel_parameters.use_approx_transitive_reduction = params_pt.get_optional("use_approx_transitive_reduction") + .value_or(funnel_parameters.use_approx_transitive_reduction); } return std::make_unique>(funnel_parameters); @@ -68,11 +66,10 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa .value_or(std::numeric_limits>::max())); coarser->set_memory_threshold(params_pt.get_optional>("max_memory_weight") .value_or(std::numeric_limits>::max())); - coarser->set_communication_threshold( - params_pt.get_optional>("max_communication_weight") - .value_or(std::numeric_limits>::max())); - coarser->set_super_node_size_threshold(params_pt.get_optional("max_super_node_size") - .value_or(std::numeric_limits::max())); + coarser->set_communication_threshold(params_pt.get_optional>("max_communication_weight") + .value_or(std::numeric_limits>::max())); + coarser->set_super_node_size_threshold( + params_pt.get_optional("max_super_node_size").value_or(std::numeric_limits::max())); } return coarser; @@ -89,15 +86,11 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa .value_or(std::numeric_limits>::max())); coarser_ptr->set_memory_threshold(params_pt.get_optional>("memory_threshold") .value_or(std::numeric_limits>::max())); - coarser_ptr->set_communication_threshold( - params_pt.get_optional>("communication_threshold") - .value_or(std::numeric_limits>::max())); + coarser_ptr->set_communication_threshold(params_pt.get_optional>("communication_threshold") + .value_or(std::numeric_limits>::max())); coarser_ptr->set_super_node_size_threshold( - params_pt.get_optional("super_node_size_threshold") - .value_or(10)); - coarser_ptr->set_node_dist_threshold( - params_pt.get_optional("node_dist_threshold").value_or(10)); - + params_pt.get_optional("super_node_size_threshold").value_or(10)); + coarser_ptr->set_node_dist_threshold(params_pt.get_optional("node_dist_threshold").value_or(10)); } }; @@ -126,18 +119,16 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa set_params(coarser); return coarser; } else if (top_order_strategy == "cuthill_mckee_wavefront") { - auto coarser = - std::make_unique>(); + auto coarser = std::make_unique>(); set_params(coarser); return coarser; } else if (top_order_strategy == "cuthill_mckee_undirected") { - auto coarser = - std::make_unique>(); + auto coarser = std::make_unique>(); set_params(coarser); return coarser; } else { - std::cerr << "Warning: Unknown top_order strategy '" << top_order_strategy - << "'. Falling back to default (bfs)." << std::endl; + std::cerr << "Warning: Unknown top_order strategy '" << top_order_strategy << "'. Falling back to default (bfs)." + << std::endl; auto coarser = std::make_unique>(); set_params(coarser); return coarser; @@ -149,25 +140,40 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa const auto ¶ms_pt = params_opt.get(); params.commCost = params_pt.get_optional>("commCost").value_or(params.commCost); params.maxWeight = params_pt.get_optional>("maxWeight").value_or(params.maxWeight); - params.smallWeightThreshold = params_pt.get_optional>("smallWeightThreshold").value_or(params.smallWeightThreshold); + params.smallWeightThreshold + = params_pt.get_optional>("smallWeightThreshold").value_or(params.smallWeightThreshold); params.useTopPoset = params_pt.get_optional("useTopPoset").value_or(params.useTopPoset); params.geomDecay = params_pt.get_optional("geomDecay").value_or(params.geomDecay); params.leniency = params_pt.get_optional("leniency").value_or(params.leniency); if (auto mode_str_opt = params_pt.get_optional("mode")) { const std::string &mode_str = mode_str_opt.get(); - if (mode_str == "LINES") params.mode = SarkarParams::Mode::LINES; - else if (mode_str == "FAN_IN_FULL") params.mode = SarkarParams::Mode::FAN_IN_FULL; - else if (mode_str == "FAN_IN_PARTIAL") params.mode = SarkarParams::Mode::FAN_IN_PARTIAL; - else if (mode_str == "FAN_OUT_FULL") params.mode = SarkarParams::Mode::FAN_OUT_FULL; - else if (mode_str == "FAN_OUT_PARTIAL") params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL; - else if (mode_str == "LEVEL_EVEN") params.mode = SarkarParams::Mode::LEVEL_EVEN; - else if (mode_str == "LEVEL_ODD") params.mode = SarkarParams::Mode::LEVEL_ODD; - else if (mode_str == "FAN_IN_BUFFER") params.mode = SarkarParams::Mode::FAN_IN_BUFFER; - else if (mode_str == "FAN_OUT_BUFFER") params.mode = SarkarParams::Mode::FAN_OUT_BUFFER; - else if (mode_str == "HOMOGENEOUS_BUFFER") params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER; - else throw std::invalid_argument("Invalid Sarkar mode: " + mode_str - + "!\nChoose from: LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER."); + if (mode_str == "LINES") { + params.mode = SarkarParams::Mode::LINES; + } else if (mode_str == "FAN_IN_FULL") { + params.mode = SarkarParams::Mode::FAN_IN_FULL; + } else if (mode_str == "FAN_IN_PARTIAL") { + params.mode = SarkarParams::Mode::FAN_IN_PARTIAL; + } else if (mode_str == "FAN_OUT_FULL") { + params.mode = SarkarParams::Mode::FAN_OUT_FULL; + } else if (mode_str == "FAN_OUT_PARTIAL") { + params.mode = SarkarParams::Mode::FAN_OUT_PARTIAL; + } else if (mode_str == "LEVEL_EVEN") { + params.mode = SarkarParams::Mode::LEVEL_EVEN; + } else if (mode_str == "LEVEL_ODD") { + params.mode = SarkarParams::Mode::LEVEL_ODD; + } else if (mode_str == "FAN_IN_BUFFER") { + params.mode = SarkarParams::Mode::FAN_IN_BUFFER; + } else if (mode_str == "FAN_OUT_BUFFER") { + params.mode = SarkarParams::Mode::FAN_OUT_BUFFER; + } else if (mode_str == "HOMOGENEOUS_BUFFER") { + params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER; + } else { + throw std::invalid_argument( + "Invalid Sarkar mode: " + mode_str + + "!\nChoose from: LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, " + "LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER."); + } } } return std::make_unique>(params); @@ -177,14 +183,18 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa auto coarser = std::make_unique>(params); if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { const auto ¶ms_pt = params_opt.get(); - params.use_structured_poset = - params_pt.get_optional("use_structured_poset").value_or(params.use_structured_poset); + params.use_structured_poset + = params_pt.get_optional("use_structured_poset").value_or(params.use_structured_poset); params.use_top_poset = params_pt.get_optional("use_top_poset").value_or(params.use_top_poset); if (auto mode_str_opt = params_pt.get_optional("mode")) { - if (mode_str_opt.get() == "EDGE_WEIGHT") params.mode = SquashAParams::Mode::EDGE_WEIGHT; - else if (mode_str_opt.get() == "TRIANGLES") params.mode = SquashAParams::Mode::TRIANGLES; - else throw std::invalid_argument("Invalid Squash mode: " + mode_str_opt.get() - + "!\nChoose from: EDGE_WEIGHT, TRIANGLES."); + if (mode_str_opt.get() == "EDGE_WEIGHT") { + params.mode = SquashAParams::Mode::EDGE_WEIGHT; + } else if (mode_str_opt.get() == "TRIANGLES") { + params.mode = SquashAParams::Mode::TRIANGLES; + } else { + throw std::invalid_argument("Invalid Squash mode: " + mode_str_opt.get() + + "!\nChoose from: EDGE_WEIGHT, TRIANGLES."); + } } } coarser->setParams(params); @@ -198,9 +208,9 @@ get_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coa throw std::invalid_argument("Invalid coarser name: " + coarser_name); } -template -std::unique_ptr> -get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree::ptree &coarser_algorithm) { +template +std::unique_ptr> get_multilevel_coarser_by_name( + const ConfigParser &, const boost::property_tree::ptree &coarser_algorithm) { const std::string coarser_name = coarser_algorithm.get_child("name").get_value(); if (coarser_name == "Sarkar") { @@ -219,23 +229,28 @@ get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree: } std::sort(ml_params.commCostVec.begin(), ml_params.commCostVec.end()); } - ml_params.maxWeight = - params_pt.get_optional>("maxWeight").value_or(ml_params.maxWeight); - ml_params.smallWeightThreshold = - params_pt.get_optional>("smallWeightThreshold").value_or(ml_params.smallWeightThreshold); - ml_params.max_num_iteration_without_changes = - params_pt.get_optional("max_num_iteration_without_changes") - .value_or(ml_params.max_num_iteration_without_changes); + ml_params.maxWeight = params_pt.get_optional>("maxWeight").value_or(ml_params.maxWeight); + ml_params.smallWeightThreshold + = params_pt.get_optional>("smallWeightThreshold").value_or(ml_params.smallWeightThreshold); + ml_params.max_num_iteration_without_changes = params_pt.get_optional("max_num_iteration_without_changes") + .value_or(ml_params.max_num_iteration_without_changes); if (auto mode_str_opt = params_pt.get_optional("buffer_merge_mode")) { const std::string &mode_str = mode_str_opt.get(); - if (mode_str == "OFF") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::OFF; - else if (mode_str == "FAN_IN") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_IN; - else if (mode_str == "FAN_OUT") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_OUT; - else if (mode_str == "HOMOGENEOUS") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::HOMOGENEOUS; - else if (mode_str == "FULL") ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FULL; - else throw std::invalid_argument("Invalid Sarkar Buffer Merge mode: " + mode_str - + "!\nChoose from: OFF, FAN_IN, FAN_OUT, HOMOGENEOUS, FULL."); + if (mode_str == "OFF") { + ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::OFF; + } else if (mode_str == "FAN_IN") { + ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_IN; + } else if (mode_str == "FAN_OUT") { + ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FAN_OUT; + } else if (mode_str == "HOMOGENEOUS") { + ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::HOMOGENEOUS; + } else if (mode_str == "FULL") { + ml_params.buffer_merge_mode = SarkarParams::BufferMergeMode::FULL; + } else { + throw std::invalid_argument("Invalid Sarkar Buffer Merge mode: " + mode_str + + "!\nChoose from: OFF, FAN_IN, FAN_OUT, HOMOGENEOUS, FULL."); + } } } @@ -248,18 +263,16 @@ get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree: if (auto params_opt = coarser_algorithm.get_child_optional("parameters")) { const auto ¶ms_pt = params_opt.get(); - params.geom_decay_num_nodes = - params_pt.get_optional("geom_decay_num_nodes").value_or(params.geom_decay_num_nodes); + params.geom_decay_num_nodes + = params_pt.get_optional("geom_decay_num_nodes").value_or(params.geom_decay_num_nodes); params.poisson_par = params_pt.get_optional("poisson_par").value_or(params.poisson_par); params.noise = params_pt.get_optional("noise").value_or(params.noise); - params.num_rep_without_node_decrease = - params_pt.get_optional("num_rep_without_node_decrease") - .value_or(params.num_rep_without_node_decrease); - params.temperature_multiplier = - params_pt.get_optional("temperature_multiplier").value_or(params.temperature_multiplier); - params.number_of_temperature_increases = - params_pt.get_optional("number_of_temperature_increases") - .value_or(params.number_of_temperature_increases); + params.num_rep_without_node_decrease + = params_pt.get_optional("num_rep_without_node_decrease").value_or(params.num_rep_without_node_decrease); + params.temperature_multiplier + = params_pt.get_optional("temperature_multiplier").value_or(params.temperature_multiplier); + params.number_of_temperature_increases = params_pt.get_optional("number_of_temperature_increases") + .value_or(params.number_of_temperature_increases); if (auto mode_str_opt = params_pt.get_optional("mode")) { if (mode_str_opt.get() == "EDGE_WEIGHT") { @@ -268,7 +281,7 @@ get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree: params.mode = SquashAParams::Mode::TRIANGLES; } else { throw std::invalid_argument("Invalid Squash mode: " + mode_str_opt.get() - + "!\nChoose from: EDGE_WEIGHT, TRIANGLES."); + + "!\nChoose from: EDGE_WEIGHT, TRIANGLES."); } } @@ -282,4 +295,4 @@ get_multilevel_coarser_by_name(const ConfigParser &, const boost::property_tree: throw std::invalid_argument("Invalid multilevel coarser name: " + coarser_name); } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp index 369c9fa6..3ce4f75a 100644 --- a/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp +++ b/apps/test_suite_runner/StringToScheduler/run_bsp_recomp_scheduler.hpp @@ -25,46 +25,44 @@ limitations under the License. #include #include -#include "osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "../ConfigParser.hpp" #include "osp/bsp/model/BspScheduleRecomp.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" #include "run_bsp_scheduler.hpp" - namespace osp { const std::set get_available_bsp_recomp_scheduler_names() { return {"GreedyRecomputer"}; } -template -RETURN_STATUS run_bsp_recomp_scheduler(const ConfigParser &parser, const boost::property_tree::ptree &algorithm, - BspScheduleRecomp &schedule) { - - //const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value(); - // const bool use_memory_constraint = parser.global_params.get_child("use_memory_constraints").get_value(); +template +RETURN_STATUS run_bsp_recomp_scheduler(const ConfigParser &parser, + const boost::property_tree::ptree &algorithm, + BspScheduleRecomp &schedule) { + // const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value(); + // const bool use_memory_constraint = parser.global_params.get_child("use_memory_constraints").get_value(); std::cout << "Running algorithm: " << algorithm.get_child("name").get_value() << std::endl; if (algorithm.get_child("name").get_value() == "GreedyRecomputer") { - BspSchedule bsp_schedule(schedule.getInstance()); RETURN_STATUS status = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), bsp_schedule); BspScheduleCS initial_schedule(std::move(bsp_schedule)); - if (status == RETURN_STATUS::ERROR) + if (status == RETURN_STATUS::ERROR) { return RETURN_STATUS::ERROR; + } GreedyRecomputer scheduler; return scheduler.computeRecompSchedule(initial_schedule, schedule); } else { - throw std::invalid_argument("Parameter error: Unknown algorithm.\n"); } } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp index 08209efd..996faa08 100644 --- a/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp +++ b/apps/test_suite_runner/StringToScheduler/run_bsp_scheduler.hpp @@ -50,18 +50,30 @@ limitations under the License. #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" #ifdef COPT -#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp" +# include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp" // #include "osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp" #endif namespace osp { const std::set get_available_bsp_scheduler_names() { - return {"Serial", "GreedyBsp", "GrowLocal", "BspLocking", "Cilk", "Etf", "GreedyRandom", - "GreedyChildren", "Variance", "MultiHC", "LocalSearch", "Coarser", "FullILP", "MultiLevel"}; + return {"Serial", + "GreedyBsp", + "GrowLocal", + "BspLocking", + "Cilk", + "Etf", + "GreedyRandom", + "GreedyChildren", + "Variance", + "MultiHC", + "LocalSearch", + "Coarser", + "FullILP", + "MultiLevel"}; } -template +template std::unique_ptr> get_bsp_improver_by_name(const ConfigParser &, const boost::property_tree::ptree &algorithm) { const std::string improver_name = algorithm.get_child("name").get_value(); @@ -77,10 +89,9 @@ std::unique_ptr> get_bsp_improver_by_name(const Co throw std::invalid_argument("Invalid improver name: " + improver_name); } -template +template std::unique_ptr> get_base_bsp_scheduler_by_name(const ConfigParser &parser, const boost::property_tree::ptree &algorithm) { - const std::string id = algorithm.get_child("id").get_value(); if (id == "Serial") { @@ -88,42 +99,38 @@ std::unique_ptr> get_base_bsp_scheduler_by_name(const ConfigP return scheduler; } else if (id == "GreedyBsp") { - float max_percent_idle_processors = - algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); - bool increase_parallelism_in_new_superstep = - algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); - auto scheduler = std::make_unique>(max_percent_idle_processors, - increase_parallelism_in_new_superstep); + float max_percent_idle_processors + = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); + bool increase_parallelism_in_new_superstep + = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); + auto scheduler + = std::make_unique>(max_percent_idle_processors, increase_parallelism_in_new_superstep); return scheduler; } else if (id == "GrowLocal") { GrowLocalAutoCores_Params> params; params.minSuperstepSize = algorithm.get_child("parameters").get_child("minSuperstepSize").get_value(); - params.syncCostMultiplierMinSuperstepWeight = algorithm.get_child("parameters") - .get_child("syncCostMultiplierMinSuperstepWeight") - .get_value>(); - params.syncCostMultiplierParallelCheck = algorithm.get_child("parameters") - .get_child("syncCostMultiplierParallelCheck") - .get_value>(); + params.syncCostMultiplierMinSuperstepWeight + = algorithm.get_child("parameters").get_child("syncCostMultiplierMinSuperstepWeight").get_value>(); + params.syncCostMultiplierParallelCheck + = algorithm.get_child("parameters").get_child("syncCostMultiplierParallelCheck").get_value>(); return std::make_unique>(params); } else if (id == "BspLocking") { - float max_percent_idle_processors = - algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); - bool increase_parallelism_in_new_superstep = - algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); - auto scheduler = - std::make_unique>(max_percent_idle_processors, increase_parallelism_in_new_superstep); + float max_percent_idle_processors + = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); + bool increase_parallelism_in_new_superstep + = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); + auto scheduler = std::make_unique>(max_percent_idle_processors, increase_parallelism_in_new_superstep); return scheduler; } else if (id == "Cilk") { auto scheduler = std::make_unique>(); - algorithm.get_child("parameters").get_child("mode").get_value() == "SJF" - ? scheduler->setMode(CilkMode::SJF) - : scheduler->setMode(CilkMode::CILK); + algorithm.get_child("parameters").get_child("mode").get_value() == "SJF" ? scheduler->setMode(CilkMode::SJF) + : scheduler->setMode(CilkMode::CILK); return scheduler; } else if (id == "Etf") { @@ -142,12 +149,12 @@ std::unique_ptr> get_base_bsp_scheduler_by_name(const ConfigP return scheduler; } else if (id == "Variance") { - float max_percent_idle_processors = - algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); - bool increase_parallelism_in_new_superstep = - algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); - auto scheduler = std::make_unique>(max_percent_idle_processors, - increase_parallelism_in_new_superstep); + float max_percent_idle_processors + = algorithm.get_child("parameters").get_child("max_percent_idle_processors").get_value(); + bool increase_parallelism_in_new_superstep + = algorithm.get_child("parameters").get_child("increase_parallelism_in_new_superstep").get_value(); + auto scheduler + = std::make_unique>(max_percent_idle_processors, increase_parallelism_in_new_superstep); return scheduler; } @@ -160,8 +167,7 @@ std::unique_ptr> get_base_bsp_scheduler_by_name(const ConfigP unsigned step = algorithm.get_child("parameters").get_child("hill_climbing_steps").get_value(); scheduler->setNumberOfHcSteps(step); - const double contraction_rate = - algorithm.get_child("parameters").get_child("contraction_rate").get_value(); + const double contraction_rate = algorithm.get_child("parameters").get_child("contraction_rate").get_value(); scheduler->setContractionRate(contraction_rate); scheduler->useLinearRefinementSteps(20U); scheduler->setMinTargetNrOfNodes(100U); @@ -172,29 +178,28 @@ std::unique_ptr> get_base_bsp_scheduler_by_name(const ConfigP throw std::invalid_argument("Invalid base scheduler name: " + id); } -template -RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::property_tree::ptree &algorithm, +template +RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, + const boost::property_tree::ptree &algorithm, BspSchedule &schedule) { - - using vertex_type_t_or_default = - std::conditional_t, v_type_t, unsigned>; - using edge_commw_t_or_default = - std::conditional_t, e_commw_t, v_commw_t>; - using boost_graph_t = boost_graph, v_commw_t, v_memw_t, - vertex_type_t_or_default, edge_commw_t_or_default>; + using vertex_type_t_or_default + = std::conditional_t, v_type_t, unsigned>; + using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; + using boost_graph_t + = boost_graph, v_commw_t, v_memw_t, vertex_type_t_or_default, edge_commw_t_or_default>; const std::string id = algorithm.get_child("id").get_value(); std::cout << "Running algorithm: " << id << std::endl; if (id == "LocalSearch") { - RETURN_STATUS status = - run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule); - if (status == RETURN_STATUS::ERROR) + RETURN_STATUS status = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule); + if (status == RETURN_STATUS::ERROR) { return RETURN_STATUS::ERROR; + } - std::unique_ptr> improver = - get_bsp_improver_by_name(parser, algorithm.get_child("parameters").get_child("improver")); + std::unique_ptr> improver + = get_bsp_improver_by_name(parser, algorithm.get_child("parameters").get_child("improver")); return improver->improveSchedule(schedule); #ifdef COPT } else if (id == "FullILP") { @@ -207,8 +212,8 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert // initial solution if (algorithm.get_child("parameters").get_child("use_initial_solution").get_value()) { - std::string init_sched = - algorithm.get_child("parameters").get_child("initial_solution_scheduler").get_value(); + std::string init_sched + = algorithm.get_child("parameters").get_child("initial_solution_scheduler").get_value(); if (init_sched == "FullILP") { throw std::invalid_argument("Parameter error: Initial solution cannot be FullILP.\n"); } @@ -228,48 +233,49 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert // intermediate solutions if (algorithm.get_child("parameters").get_child("write_intermediate_solutions").get_value()) { scheduler.enableWriteIntermediateSol( - algorithm.get_child("parameters") - .get_child("intermediate_solutions_directory") - .get_value(), + algorithm.get_child("parameters").get_child("intermediate_solutions_directory").get_value(), algorithm.get_child("parameters").get_child("intermediate_solutions_prefix").get_value()); } return scheduler.computeScheduleWithTimeLimit(schedule, timeLimit); #endif } else if (id == "Coarser") { - std::unique_ptr> coarser = - get_coarser_by_name(parser, algorithm.get_child("parameters").get_child("coarser")); + std::unique_ptr> coarser + = get_coarser_by_name(parser, algorithm.get_child("parameters").get_child("coarser")); const auto &instance = schedule.getInstance(); BspInstance instance_coarse; std::vector> reverse_vertex_map; - bool status = coarser->coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), - reverse_vertex_map); - if (!status) + bool status + = coarser->coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), reverse_vertex_map); + if (!status) { return RETURN_STATUS::ERROR; + } instance_coarse.getArchitecture() = instance.getArchitecture(); instance_coarse.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); BspSchedule schedule_coarse(instance_coarse); - const auto status_coarse = - run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule_coarse); - if (status_coarse != RETURN_STATUS::OSP_SUCCESS and status_coarse != RETURN_STATUS::BEST_FOUND) + const auto status_coarse + = run_bsp_scheduler(parser, algorithm.get_child("parameters").get_child("scheduler"), schedule_coarse); + if (status_coarse != RETURN_STATUS::OSP_SUCCESS and status_coarse != RETURN_STATUS::BEST_FOUND) { return status_coarse; + } status = coarser_util::pull_back_schedule(schedule_coarse, reverse_vertex_map, schedule); - if (!status) + if (!status) { return RETURN_STATUS::ERROR; + } return RETURN_STATUS::OSP_SUCCESS; } else if (id == "MultiLevel") { - std::unique_ptr> ml_coarser = - get_multilevel_coarser_by_name( - parser, algorithm.get_child("parameters").get_child("coarser")); - std::unique_ptr> improver = - get_bsp_improver_by_name(parser, algorithm.get_child("parameters").get_child("improver")); - std::unique_ptr> scheduler = get_base_bsp_scheduler_by_name( - parser, algorithm.get_child("parameters").get_child("scheduler")); + std::unique_ptr> ml_coarser + = get_multilevel_coarser_by_name(parser, + algorithm.get_child("parameters").get_child("coarser")); + std::unique_ptr> improver + = get_bsp_improver_by_name(parser, algorithm.get_child("parameters").get_child("improver")); + std::unique_ptr> scheduler + = get_base_bsp_scheduler_by_name(parser, algorithm.get_child("parameters").get_child("scheduler")); MultilevelCoarseAndSchedule coarse_and_schedule(*scheduler, *improver, *ml_coarser); return coarse_and_schedule.computeSchedule(schedule); @@ -279,4 +285,4 @@ RETURN_STATUS run_bsp_scheduler(const ConfigParser &parser, const boost::propert } } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp b/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp index 3dd05318..a03bcc67 100644 --- a/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp +++ b/apps/test_suite_runner/StringToScheduler/run_pebbler.hpp @@ -32,21 +32,19 @@ namespace osp { const std::set get_available_pebbler_names() { return {"Pebbler"}; } -template -RETURN_STATUS run_pebbler(const ConfigParser &parser, const boost::property_tree::ptree &algorithm, +template +RETURN_STATUS run_pebbler(const ConfigParser &parser, + const boost::property_tree::ptree &algorithm, PebblingSchedule &schedule) { - // const unsigned timeLimit = parser.global_params.get_child("timeLimit").get_value(); // const bool use_memory_constraint = parser.global_params.get_child("use_memory_constraints").get_value(); std::cout << "Running algorithm: " << algorithm.get_child("name").get_value() << std::endl; if (algorithm.get_child("name").get_value() == "Pebbler") { - } else { - throw std::invalid_argument("Parameter error: Unknown algorithm.\n"); } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/Balanced_Coin_Flips.hpp b/include/osp/auxiliary/Balanced_Coin_Flips.hpp index 7443719b..44676800 100644 --- a/include/osp/auxiliary/Balanced_Coin_Flips.hpp +++ b/include/osp/auxiliary/Balanced_Coin_Flips.hpp @@ -126,8 +126,7 @@ class Biased_Random_with_side_bias : public BalancedCoinFlips { throw std::runtime_error("true_bias is too large!"); } - unsigned die_size = - (side_ratio.first + side_ratio.second) * genuine_random_size + static_cast(abs_true_bias); + unsigned die_size = (side_ratio.first + side_ratio.second) * genuine_random_size + static_cast(abs_true_bias); if (die_size > static_cast(std::numeric_limits::max())) { throw std::runtime_error("die_size is too large!"); @@ -164,4 +163,4 @@ class Biased_Random_with_side_bias : public BalancedCoinFlips { const std::pair side_ratio; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/datastructures/bit_mask.hpp b/include/osp/auxiliary/datastructures/bit_mask.hpp index 225e16b8..a60c44b9 100644 --- a/include/osp/auxiliary/datastructures/bit_mask.hpp +++ b/include/osp/auxiliary/datastructures/bit_mask.hpp @@ -25,9 +25,9 @@ namespace osp { struct BitMask { std::vector mask; - BitMask(std::size_t n) : mask(std::vector(n, false)) { }; + BitMask(std::size_t n) : mask(std::vector(n, false)) {}; - BitMask& operator++() { + BitMask &operator++() { for (std::size_t i = 0; i < mask.size(); ++i) { if (mask[i]) { mask[i] = false; @@ -46,4 +46,4 @@ struct BitMask { }; }; -} // end namespace osp \ No newline at end of file +} // end namespace osp diff --git a/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp b/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp index 2f73f6e0..212c28ab 100644 --- a/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp +++ b/include/osp/auxiliary/datastructures/heaps/DaryHeap.hpp @@ -39,7 +39,8 @@ namespace osp { * @tparam D The number of children for each node (the 'd' in d-ary). Must be >= 2. * @tparam Compare The comparison function object type. */ -template class DaryHeap { +template +class DaryHeap { static_assert(D >= 2, "D-ary heap must have at least 2 children per node."); private: @@ -169,8 +170,9 @@ template class } void sift_up(size_t index) { - if (index == 0) + if (index == 0) { return; + } Key key_to_sift = std::move(heap[index]); const Value &value_to_sift = node_info.at(key_to_sift).value; @@ -238,4 +240,4 @@ using MaxIndexedHeap = IndexedHeap>; template using MinIndexedHeap = IndexedHeap>; -} // namespace osp +} // namespace osp diff --git a/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp b/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp index 39bb79f4..fe505ab1 100644 --- a/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp +++ b/include/osp/auxiliary/datastructures/heaps/PairingHeap.hpp @@ -18,22 +18,23 @@ limitations under the License. #pragma once -#include #include +#include #include #include #include namespace osp { -template class PairingHeap { +template +class PairingHeap { private: struct Node { Key key; Value value; - Node *child = nullptr; // Leftmost child - Node *next_sibling = nullptr; // Sibling to the right - Node *prev_or_parent = nullptr; // If leftmost child, parent; otherwise, left sibling. + Node *child = nullptr; // Leftmost child + Node *next_sibling = nullptr; // Sibling to the right + Node *prev_or_parent = nullptr; // If leftmost child, parent; otherwise, left sibling. }; Node *root = nullptr; @@ -43,10 +44,12 @@ template class PairingHeap { // Melds two heaps together. Node *meld(Node *heap1, Node *heap2) { - if (!heap1) + if (!heap1) { return heap2; - if (!heap2) + } + if (!heap2) { return heap1; + } if (comp(heap2->value, heap1->value)) { std::swap(heap1, heap2); @@ -108,9 +111,9 @@ template class PairingHeap { return; } - if (node->prev_or_parent->child == node) { // is leftmost child + if (node->prev_or_parent->child == node) { // is leftmost child node->prev_or_parent->child = node->next_sibling; - } else { // is not leftmost child + } else { // is not leftmost child node->prev_or_parent->next_sibling = node->next_sibling; } if (node->next_sibling) { @@ -122,18 +125,19 @@ template class PairingHeap { public: PairingHeap() = default; + ~PairingHeap() { clear(); } - - PairingHeap(const PairingHeap& other) : num_elements(other.num_elements), comp(other.comp) { + + PairingHeap(const PairingHeap &other) : num_elements(other.num_elements), comp(other.comp) { root = nullptr; if (!other.root) { return; } - std::unordered_map old_to_new; - std::vector q; + std::unordered_map old_to_new; + std::vector q; q.reserve(other.num_elements); - + // Create root root = new Node{other.root->key, other.root->value}; node_map[root->key] = root; @@ -141,15 +145,15 @@ template class PairingHeap { q.push_back(other.root); size_t head = 0; - while(head < q.size()) { - const Node* old_parent = q[head++]; - Node* new_parent = old_to_new[old_parent]; + while (head < q.size()) { + const Node *old_parent = q[head++]; + Node *new_parent = old_to_new[old_parent]; if (old_parent->child) { - const Node* old_child = old_parent->child; - + const Node *old_child = old_parent->child; + // First child - Node* new_child = new Node{old_child->key, old_child->value}; + Node *new_child = new Node{old_child->key, old_child->value}; new_parent->child = new_child; new_child->prev_or_parent = new_parent; node_map[new_child->key] = new_child; @@ -157,11 +161,11 @@ template class PairingHeap { q.push_back(old_child); // Siblings - Node* prev_new_sibling = new_child; - while(old_child->next_sibling) { + Node *prev_new_sibling = new_child; + while (old_child->next_sibling) { old_child = old_child->next_sibling; new_child = new Node{old_child->key, old_child->value}; - + prev_new_sibling->next_sibling = new_child; new_child->prev_or_parent = prev_new_sibling; @@ -175,7 +179,7 @@ template class PairingHeap { } } - PairingHeap& operator=(const PairingHeap& other) { + PairingHeap &operator=(const PairingHeap &other) { if (this != &other) { PairingHeap temp(other); std::swap(root, temp.root); @@ -205,7 +209,7 @@ template class PairingHeap { const auto pair = node_map.emplace(key, new_node); const bool &success = pair.second; if (!success) { - delete new_node; // Avoid memory leak if key already exists + delete new_node; // Avoid memory leak if key already exists throw std::invalid_argument("Key already exists in the heap."); } root = meld(root, new_node); @@ -248,13 +252,13 @@ template class PairingHeap { Node *node = it->second; const Value old_value = node->value; - if (comp(new_value, old_value)) { // Decrease key + if (comp(new_value, old_value)) { // Decrease key node->value = new_value; if (node != root) { cut(node); root = meld(root, node); } - } else if (comp(old_value, new_value)) { // Increase key + } else if (comp(old_value, new_value)) { // Increase key node->value = new_value; if (node != root) { cut(node); @@ -354,13 +358,13 @@ template class PairingHeap { top_keys.reserve(limit); } - const Value& top_value = root->value; - std::vector q; + const Value &top_value = root->value; + std::vector q; q.push_back(root); size_t head = 0; while (head < q.size()) { - const Node* current = q[head++]; + const Node *current = q[head++]; if (comp(top_value, current->value)) { continue; @@ -371,7 +375,7 @@ template class PairingHeap { return top_keys; } - Node* child = current->child; + Node *child = current->child; while (child) { q.push_back(child); child = child->next_sibling; @@ -387,4 +391,4 @@ using MaxPairingHeap = PairingHeap>; template using MinPairingHeap = PairingHeap>; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/datastructures/union_find.hpp b/include/osp/auxiliary/datastructures/union_find.hpp index 559d5b6b..f955eddc 100644 --- a/include/osp/auxiliary/datastructures/union_find.hpp +++ b/include/osp/auxiliary/datastructures/union_find.hpp @@ -29,9 +29,9 @@ limitations under the License. namespace osp { /// @brief Structure to execute a union-find algorithm -template +template struct union_find_object { - const T name; // unique identifier + const T name; // unique identifier index_t parent_index; unsigned rank; workw_t weight; @@ -43,11 +43,11 @@ struct union_find_object { } union_find_object(const union_find_object &other) = default; - union_find_object& operator=(const union_find_object &other) = default; + union_find_object &operator=(const union_find_object &other) = default; }; /// @brief Class to execute a union-find algorithm -template +template class Union_Find_Universe { private: std::vector> universe; @@ -91,7 +91,6 @@ class Union_Find_Universe { index_t get_index_from_name(const T &name) const { return names_to_indices.at(name); } public: - void reset() { universe.clear(); names_to_indices.clear(); @@ -108,9 +107,7 @@ class Union_Find_Universe { /// @brief Joins two components /// @param name of object to join /// @param other_name of object to join - void join_by_name(const T &name, const T &other_name) { - join(names_to_indices.at(name), names_to_indices.at(other_name)); - } + void join_by_name(const T &name, const T &other_name) { join(names_to_indices.at(name), names_to_indices.at(other_name)); } /// @brief Retrieves the current number of connected components std::size_t get_number_of_connected_components() const { return component_indices.size(); } @@ -140,8 +137,7 @@ class Union_Find_Universe { std::vector> component_names_weights_and_memory; component_names_weights_and_memory.reserve(component_indices.size()); for (auto &indx : component_indices) { - component_names_weights_and_memory.emplace_back( - {universe[indx].name, universe[indx].weight, universe[indx].memory}); + component_names_weights_and_memory.emplace_back({universe[indx].name, universe[indx].weight, universe[indx].memory}); } return component_names_weights_and_memory; } @@ -342,8 +338,7 @@ class Union_Find_Universe { /// @param names of objects /// @param weights of objects /// @param memories of objects - void add_object(const std::vector &names, const std::vector &weights, - const std::vector &memories) { + void add_object(const std::vector &names, const std::vector &weights, const std::vector &memories) { if (names.size() != weights.size()) { throw std::runtime_error("Vectors of names and weights must be of equal length."); } @@ -379,25 +374,23 @@ class Union_Find_Universe { /// @brief Initiates a union-find structure /// @param names of objects /// @param weights of objects - explicit Union_Find_Universe(const std::vector &names, const std::vector &weights) { - add_object(names, weights); - } + explicit Union_Find_Universe(const std::vector &names, const std::vector &weights) { add_object(names, weights); } /// @brief Initiates a union-find structure /// @param names of objects /// @param weights of objects - explicit Union_Find_Universe(const std::vector &names, const std::vector &weights, + explicit Union_Find_Universe(const std::vector &names, + const std::vector &weights, const std::vector &memories) { add_object(names, weights, memories); } Union_Find_Universe(const Union_Find_Universe &other) = default; - Union_Find_Universe& operator=(const Union_Find_Universe &other) = default; + Union_Find_Universe &operator=(const Union_Find_Universe &other) = default; }; -template -using union_find_universe_t = Union_Find_Universe, vertex_idx_t, v_workw_t, - v_memw_t>; - +template +using union_find_universe_t + = Union_Find_Universe, vertex_idx_t, v_workw_t, v_memw_t>; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/hash_util.hpp b/include/osp/auxiliary/hash_util.hpp index 80699cbc..825e5ba5 100644 --- a/include/osp/auxiliary/hash_util.hpp +++ b/include/osp/auxiliary/hash_util.hpp @@ -22,29 +22,32 @@ limitations under the License. namespace osp { -template +template struct uniform_node_hash_func { using result_type = std::size_t; - result_type operator()(const VertexType& ) { return defautlVal; } -}; + result_type operator()(const VertexType &) { return defautlVal; } +}; -template +template struct vector_node_hash_func { - const std::vector& node_hashes_; - vector_node_hash_func(const std::vector& node_hashes) : node_hashes_(node_hashes) {} + const std::vector &node_hashes_; + + vector_node_hash_func(const std::vector &node_hashes) : node_hashes_(node_hashes) {} + using result_type = std::size_t; - result_type operator()(const VertexType& v) const { return node_hashes_[v]; } + + result_type operator()(const VertexType &v) const { return node_hashes_[v]; } }; -template +template void hash_combine(std::size_t &seed, const T &v) { std::hash hasher; seed ^= hasher(v) + 0x9e3779b9 + (seed << 6) + (seed >> 2); } struct pair_hash { - template + template std::size_t operator()(const std::pair &p) const { std::size_t h1 = std::hash{}(p.first); @@ -57,4 +60,4 @@ struct pair_hash { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/io/DotFileWriter.hpp b/include/osp/auxiliary/io/DotFileWriter.hpp index 907fcb06..ef75c39f 100644 --- a/include/osp/auxiliary/io/DotFileWriter.hpp +++ b/include/osp/auxiliary/io/DotFileWriter.hpp @@ -18,19 +18,19 @@ limitations under the License. #pragma once +#include +#include + #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/model/BspScheduleRecomp.hpp" #include "osp/concepts/computational_dag_concept.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include -#include - namespace osp { class DotFileWriter { private: - template + template struct EdgeWriter_DOT { const Graph_t &graph; @@ -44,9 +44,8 @@ class DotFileWriter { } }; - template + template struct VertexWriterSchedule_DOT { - const BspSchedule &schedule; VertexWriterSchedule_DOT(const BspSchedule &schedule_) : schedule(schedule_) {} @@ -58,20 +57,17 @@ class DotFileWriter { << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";"; if constexpr (has_typed_vertices_v) { - out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";"; } - out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\"" - << schedule.assignedSuperstep(i) << "\";"; + out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\"" << schedule.assignedSuperstep(i) << "\";"; out << "]"; } }; - template + template struct VertexWriterScheduleRecomp_DOT { - const BspScheduleRecomp &schedule; VertexWriterScheduleRecomp_DOT(const BspScheduleRecomp &schedule_) : schedule(schedule_) {} @@ -83,7 +79,6 @@ class DotFileWriter { << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";"; if constexpr (has_typed_vertices_v) { - out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";"; } @@ -101,9 +96,7 @@ class DotFileWriter { bool found = false; for (const auto &[key, val] : schedule.getCommunicationSchedule()) { - if (std::get<0>(key) == i) { - if (!found) { out << "cs=\"["; found = true; @@ -123,7 +116,7 @@ class DotFileWriter { } }; - template + template struct VertexWriterDuplicateRecompSchedule_DOT { const Graph_t &graph; const std::vector name; @@ -136,21 +129,18 @@ class DotFileWriter { std::vector &node_to_superstep_) : graph(graph_), name(name_), node_to_proc(node_to_proc_), node_to_superstep(node_to_superstep_) {} - template + template void operator()(std::ostream &out, const VertexOrEdge &i) const { - out << i << " [" << "label=\"" << name[i] << "\";" << "work_weight=\"" - << graph.vertex_work_weight(i) << "\";" << "comm_weight=\"" - << graph.vertex_comm_weight(i) << "\";" << "mem_weight=\"" - << graph.vertex_mem_weight(i) << "\";" << "proc=\"" - << node_to_proc[i] << "\";" << "superstep=\"" << node_to_superstep[i] << "\";"; + out << i << " [" << "label=\"" << name[i] << "\";" << "work_weight=\"" << graph.vertex_work_weight(i) << "\";" + << "comm_weight=\"" << graph.vertex_comm_weight(i) << "\";" << "mem_weight=\"" << graph.vertex_mem_weight(i) + << "\";" << "proc=\"" << node_to_proc[i] << "\";" << "superstep=\"" << node_to_superstep[i] << "\";"; out << "]"; } }; - template + template struct VertexWriterScheduleCS_DOT { - const BspScheduleCS &schedule; VertexWriterScheduleCS_DOT(const BspScheduleCS &schedule_) : schedule(schedule_) {} @@ -162,19 +152,15 @@ class DotFileWriter { << "mem_weight=\"" << schedule.getInstance().getComputationalDag().vertex_mem_weight(i) << "\";"; if constexpr (has_typed_vertices_v) { - out << "type=\"" << schedule.getInstance().getComputationalDag().vertex_type(i) << "\";"; } - out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\"" - << schedule.assignedSuperstep(i) << "\";"; + out << "proc=\"" << schedule.assignedProcessor(i) << "\";" << "superstep=\"" << schedule.assignedSuperstep(i) << "\";"; bool found = false; for (const auto &[key, val] : schedule.getCommunicationSchedule()) { - if (std::get<0>(key) == i) { - if (!found) { out << "cs=\"["; found = true; @@ -194,10 +180,8 @@ class DotFileWriter { } }; - - template + template struct VertexWriterGraph_DOT { - const Graph_t &graph; VertexWriterGraph_DOT(const Graph_t &graph_) : graph(graph_) {} @@ -209,7 +193,6 @@ class DotFileWriter { << "mem_weight=\"" << graph.vertex_mem_weight(i) << "\";"; if constexpr (has_typed_vertices_v) { - out << "type=\"" << graph.vertex_type(i) << "\";"; } @@ -217,60 +200,62 @@ class DotFileWriter { } }; - template + template struct ColoredVertexWriterGraph_DOT { - const Graph_t &graph; const color_container_t &colors; std::vector color_strings; std::vector shape_strings; ColoredVertexWriterGraph_DOT(const Graph_t &graph_, const color_container_t &colors_) : graph(graph_), colors(colors_) { - - color_strings = { - "lightcoral", "palegreen", "lightblue", "gold", "orchid", "sandybrown", "aquamarine", "burlywood", - "hotpink", "yellowgreen", "skyblue", "khaki", "violet", "salmon", "turquoise", "tan", - "deeppink", "chartreuse", "deepskyblue", "lemonchiffon", "magenta", "orangered", "cyan", "wheat", - "mediumvioletred", "limegreen", "dodgerblue", "lightyellow", "darkviolet", "tomato", "paleturquoise", "bisque", - "crimson", "lime", "steelblue", "papayawhip", "purple", "darkorange", "cadetblue", "peachpuff", - "indianred", "springgreen", "powderblue", "cornsilk", "mediumorchid", "chocolate", "darkturquoise", "navajowhite", - "firebrick", "seagreen", "royalblue", "lightgoldenrodyellow", "darkmagenta", "coral", "teal", "moccasin", - "maroon", "forestgreen", "blue", "yellow", "darkorchid", "red", "green", "navy", - "darkred", "darkgreen", "mediumblue", "ivory", "indigo", "orange", "darkcyan", "antiquewhite" - }; - - shape_strings = { - "oval", "rect", "hexagon", "parallelogram" - }; + color_strings = {"lightcoral", "palegreen", "lightblue", "gold", + "orchid", "sandybrown", "aquamarine", "burlywood", + "hotpink", "yellowgreen", "skyblue", "khaki", + "violet", "salmon", "turquoise", "tan", + "deeppink", "chartreuse", "deepskyblue", "lemonchiffon", + "magenta", "orangered", "cyan", "wheat", + "mediumvioletred", "limegreen", "dodgerblue", "lightyellow", + "darkviolet", "tomato", "paleturquoise", "bisque", + "crimson", "lime", "steelblue", "papayawhip", + "purple", "darkorange", "cadetblue", "peachpuff", + "indianred", "springgreen", "powderblue", "cornsilk", + "mediumorchid", "chocolate", "darkturquoise", "navajowhite", + "firebrick", "seagreen", "royalblue", "lightgoldenrodyellow", + "darkmagenta", "coral", "teal", "moccasin", + "maroon", "forestgreen", "blue", "yellow", + "darkorchid", "red", "green", "navy", + "darkred", "darkgreen", "mediumblue", "ivory", + "indigo", "orange", "darkcyan", "antiquewhite"}; + + shape_strings = {"oval", "rect", "hexagon", "parallelogram"}; } void operator()(std::ostream &out, const vertex_idx_t &i) const { - if (i >= static_cast>(colors.size())) { - // Fallback for safety: print without color if colors vector is mismatched or palette is empty. - out << i << " ["; + // Fallback for safety: print without color if colors vector is mismatched or palette is empty. + out << i << " ["; } else { - // Use modulo operator to cycle through the fixed palette if there are more color - // groups than available colors. - const std::string& color = color_strings[colors[i] % color_strings.size()]; - out << i << " [style=filled;fillcolor=" << color << ";"; + // Use modulo operator to cycle through the fixed palette if there are more color + // groups than available colors. + const std::string &color = color_strings[colors[i] % color_strings.size()]; + out << i << " [style=filled;fillcolor=" << color << ";"; } - + out << "work_weight=\"" << graph.vertex_work_weight(i) << "\";" << "comm_weight=\"" << graph.vertex_comm_weight(i) << "\";" << "mem_weight=\"" << graph.vertex_mem_weight(i) << "\";"; if constexpr (has_typed_vertices_v) { - out << "type=\"" << graph.vertex_type(i) << "\";shape=\"" << shape_strings[graph.vertex_type(i) % shape_strings.size()] << "\";"; + out << "type=\"" << graph.vertex_type(i) << "\";shape=\"" + << shape_strings[graph.vertex_type(i) % shape_strings.size()] << "\";"; } out << "]"; } }; - template + template void write_graph_structure(std::ostream &os, const Graph_t &graph, const vertex_writer_t &vertex_writer) const { - os << "digraph G {\n"; for (const auto &v : graph.vertices()) { vertex_writer(os, v); @@ -286,7 +271,6 @@ class DotFileWriter { } } else { - for (const auto &v : graph.vertices()) { for (const auto &child : graph.children(v)) { os << v << "->" << child << "\n"; @@ -314,11 +298,9 @@ class DotFileWriter { * * @param os The output stream to write the DOT representation of the computational DAG. */ - template + template void write_schedule(std::ostream &os, const BspSchedule &schedule) const { - - write_graph_structure(os, schedule.getInstance().getComputationalDag(), - VertexWriterSchedule_DOT(schedule)); + write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterSchedule_DOT(schedule)); } /** @@ -335,41 +317,36 @@ class DotFileWriter { * * @param filename The name of the file to write the DOT representation of the computational DAG. */ - template + template void write_schedule(const std::string &filename, const BspSchedule &schedule) const { std::ofstream os(filename); write_schedule(os, schedule); } - template + template void write_schedule_cs(std::ostream &os, const BspScheduleCS &schedule) const { - - write_graph_structure(os, schedule.getInstance().getComputationalDag(), - VertexWriterScheduleCS_DOT(schedule)); + write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterScheduleCS_DOT(schedule)); } - template + template void write_schedule_cs(const std::string &filename, const BspScheduleCS &schedule) const { std::ofstream os(filename); write_schedule_cs(os, schedule); } - template + template void write_schedule_recomp(std::ostream &os, const BspScheduleRecomp &schedule) const { - - write_graph_structure(os, schedule.getInstance().getComputationalDag(), - VertexWriterScheduleRecomp_DOT(schedule)); + write_graph_structure(os, schedule.getInstance().getComputationalDag(), VertexWriterScheduleRecomp_DOT(schedule)); } - template + template void write_schedule_recomp(const std::string &filename, const BspScheduleRecomp &schedule) const { std::ofstream os(filename); write_schedule_recomp(os, schedule); } - template + template void write_schedule_recomp_duplicate(std::ostream &os, const BspScheduleRecomp &schedule) const { - const auto &g = schedule.getInstance().getComputationalDag(); using VertexType = vertex_idx_t; @@ -380,11 +357,12 @@ class DotFileWriter { std::unordered_map> vertex_to_idx; - using vertex_type_t_or_default = std::conditional_t, v_type_t, unsigned>; + using vertex_type_t_or_default + = std::conditional_t, v_type_t, unsigned>; using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; - using cdag_vertex_impl_t = cdag_vertex_impl, v_workw_t, v_commw_t, - v_memw_t, vertex_type_t_or_default>; + using cdag_vertex_impl_t + = cdag_vertex_impl, v_workw_t, v_commw_t, v_memw_t, vertex_type_t_or_default>; using cdag_edge_impl_t = cdag_edge_impl; using graph_t = computational_dag_edge_idx_vector_impl; @@ -394,11 +372,9 @@ class DotFileWriter { size_t idx_new = 0; for (const auto &node : g.vertices()) { - if (schedule.assignments(node).size() == 1) { - - g2.add_vertex(g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node), - g.vertex_type(node)); + g2.add_vertex( + g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node), g.vertex_type(node)); names[idx_new] = std::to_string(node); node_to_proc[idx_new] = schedule.assignments(node)[0].first; @@ -408,12 +384,10 @@ class DotFileWriter { idx_new++; } else { - std::vector idxs; for (unsigned i = 0; i < schedule.assignments(node).size(); ++i) { - - g2.add_vertex(g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node), - g.vertex_type(node)); + g2.add_vertex( + g.vertex_work_weight(node), g.vertex_comm_weight(node), g.vertex_mem_weight(node), g.vertex_type(node)); names[idx_new] = std::to_string(node).append("_").append(std::to_string(i)); node_to_proc[idx_new] = schedule.assignments(node)[i].first; @@ -426,34 +400,25 @@ class DotFileWriter { } for (const auto &[key, val] : vertex_to_idx) { - if (val.size() == 1) { - for (const auto &target : g.children(key)) { - for (const auto &new_node_target : vertex_to_idx[target]) { g2.add_edge(val[0], new_node_target); } } } else { - std::unordered_set assigned_processors; for (const auto &assignment : schedule.assignments(key)) { - assigned_processors.insert(assignment.first); } for (unsigned i = 0; i < val.size(); i++) { - for (const auto &target : g.children(key)) { - for (size_t j = 0; j < vertex_to_idx[target].size(); j++) { - - if (assigned_processors.find(node_to_proc[vertex_to_idx[target][j]]) == - assigned_processors.end() || - node_to_proc[val[i]] == node_to_proc[vertex_to_idx[target][j]]) { + if (assigned_processors.find(node_to_proc[vertex_to_idx[target][j]]) == assigned_processors.end() + || node_to_proc[val[i]] == node_to_proc[vertex_to_idx[target][j]]) { g2.add_edge(val[i], vertex_to_idx[target][j]); } } @@ -462,45 +427,39 @@ class DotFileWriter { } } - write_graph_structure( - os, g2, VertexWriterDuplicateRecompSchedule_DOT(g2, names, node_to_proc, node_to_superstep)); + write_graph_structure(os, g2, VertexWriterDuplicateRecompSchedule_DOT(g2, names, node_to_proc, node_to_superstep)); } - template - void write_schedule_recomp_duplicate(const std::string &filename, - const BspScheduleRecomp &schedule) const { + template + void write_schedule_recomp_duplicate(const std::string &filename, const BspScheduleRecomp &schedule) const { std::ofstream os(filename); write_schedule_recomp_duplicate(os, schedule); } - template - void write_colored_graph(std::ostream &os, const Graph_t &graph, const color_container_t & colors) const { - + template + void write_colored_graph(std::ostream &os, const Graph_t &graph, const color_container_t &colors) const { static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); write_graph_structure(os, graph, ColoredVertexWriterGraph_DOT(graph, colors)); } - template - void write_colored_graph(const std::string &filename, const Graph_t &graph, const color_container_t & colors) const { - + template + void write_colored_graph(const std::string &filename, const Graph_t &graph, const color_container_t &colors) const { static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); std::ofstream os(filename); write_colored_graph(os, graph, colors); } - template + template void write_graph(std::ostream &os, const Graph_t &graph) const { - static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); write_graph_structure(os, graph, VertexWriterGraph_DOT(graph)); } - template + template void write_graph(const std::string &filename, const Graph_t &graph) const { - static_assert(is_computational_dag_v, "Graph_t must be a computational DAG"); std::ofstream os(filename); @@ -508,4 +467,4 @@ class DotFileWriter { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/io/arch_file_reader.hpp b/include/osp/auxiliary/io/arch_file_reader.hpp index 71b0f006..68a269c2 100644 --- a/include/osp/auxiliary/io/arch_file_reader.hpp +++ b/include/osp/auxiliary/io/arch_file_reader.hpp @@ -18,21 +18,24 @@ limitations under the License. #pragma once -#include "osp/bsp/model/BspArchitecture.hpp" #include #include #include -namespace osp { namespace file_reader { +#include "osp/bsp/model/BspArchitecture.hpp" + +namespace osp { +namespace file_reader { -template +template bool readBspArchitecture(std::ifstream &infile, BspArchitecture &architecture) { std::string line; // Skip comment lines while (std::getline(infile, line)) { - if (!line.empty() && line[0] != '%') + if (!line.empty() && line[0] != '%') { break; + } } // Parse architecture parameters @@ -49,7 +52,7 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture &archit // Try to read optional mem_type and M if (!(iss >> mem_type >> M)) { - mem_type = -1; // Memory info not present + mem_type = -1; // Memory info not present } architecture.setNumberOfProcessors(p); @@ -59,24 +62,24 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture &archit if (0 <= mem_type && mem_type <= 3) { using memw_t = v_memw_t; switch (mem_type) { - case 0: - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::NONE); - break; - case 1: - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL); - architecture.setMemoryBound(static_cast(M)); - break; - case 2: - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::GLOBAL); - architecture.setMemoryBound(static_cast(M)); - break; - case 3: - architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT); - architecture.setMemoryBound(static_cast(M)); - break; - default: - std::cerr << "Invalid memory type.\n"; - return false; + case 0: + architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::NONE); + break; + case 1: + architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL); + architecture.setMemoryBound(static_cast(M)); + break; + case 2: + architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::GLOBAL); + architecture.setMemoryBound(static_cast(M)); + break; + case 3: + architecture.setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT); + architecture.setMemoryBound(static_cast(M)); + break; + default: + std::cerr << "Invalid memory type.\n"; + return false; } } else if (mem_type == -1) { std::cout << "No memory type specified. Assuming \"NONE\".\n"; @@ -131,7 +134,7 @@ bool readBspArchitecture(std::ifstream &infile, BspArchitecture &archit return true; } -template +template bool readBspArchitecture(const std::string &filename, BspArchitecture &architecture) { std::ifstream infile(filename); if (!infile.is_open()) { @@ -142,4 +145,5 @@ bool readBspArchitecture(const std::string &filename, BspArchitecture & return readBspArchitecture(infile, architecture); } -}} // namespace osp::file_reader +} // namespace file_reader +} // namespace osp diff --git a/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp b/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp index 11273e49..41062016 100644 --- a/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp +++ b/include/osp/auxiliary/io/bsp_schedule_file_writer.hpp @@ -18,36 +18,35 @@ limitations under the License. #pragma once -#include "osp/bsp/model/BspSchedule.hpp" -#include "osp/bsp/model/BspScheduleCS.hpp" #include #include -namespace osp { namespace file_writer { +#include "osp/bsp/model/BspSchedule.hpp" +#include "osp/bsp/model/BspScheduleCS.hpp" + +namespace osp { +namespace file_writer { -template +template void write_txt(std::ostream &os, const BspSchedule &schedule) { - os << "%% BspSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and " << schedule.numberOfSupersteps() << " supersteps." << std::endl; os << schedule.getInstance().numberOfVertices() << " " << schedule.getInstance().numberOfProcessors() << " " << schedule.numberOfSupersteps() << std::endl; for (const auto &vertex : schedule.getInstance().getComputationalDag().vertices()) { - os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex) - << std::endl; + os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex) << std::endl; } } -template +template void write_txt(const std::string &filename, const BspSchedule &schedule) { std::ofstream os(filename); write_txt(os, schedule); } -template +template void write_txt(std::ostream &os, const BspScheduleCS &schedule) { - os << "%% BspSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and " << schedule.numberOfSupersteps() << " supersteps." << std::endl; os << schedule.getInstance().numberOfVertices() << " " << schedule.getInstance().numberOfProcessors() << " " @@ -61,14 +60,12 @@ void write_txt(std::ostream &os, const BspScheduleCS &schedule) { os << std::endl; for (const auto &vertex : schedule.getInstance().getComputationalDag().vertices()) { - os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex) - << std::endl; + os << vertex << " " << schedule.assignedProcessor(vertex) << " " << schedule.assignedSuperstep(vertex) << std::endl; } if (schedule.getCommunicationSchedule().empty()) { os << "%% No communication schedule available." << std::endl; } else { - os << "%% Communication schedule available." << std::endl; for (const auto &[key, val] : schedule.getCommunicationSchedule()) { @@ -77,33 +74,33 @@ void write_txt(std::ostream &os, const BspScheduleCS &schedule) { } } -template +template void write_txt(const std::string &filename, const BspScheduleCS &schedule) { std::ofstream os(filename); write_txt(os, schedule); } -template +template void write_sankey(std::ostream &os, const BspScheduleCS &schedule) { // Computing workloads std::vector>> proc_workloads( schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors(), 0)); for (size_t node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - proc_workloads[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] += - schedule.getInstance().getComputationalDag().vertex_work_weight(node); + proc_workloads[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] + += schedule.getInstance().getComputationalDag().vertex_work_weight(node); } // Computing communicationloads std::vector>>> commloads( - schedule.numberOfSupersteps() - 1, - std::vector>>( - schedule.getInstance().numberOfProcessors(), - std::vector>(schedule.getInstance().numberOfProcessors(), 0))); + schedule.numberOfSupersteps() - 1, + std::vector>>( + schedule.getInstance().numberOfProcessors(), + std::vector>(schedule.getInstance().numberOfProcessors(), 0))); for (const auto &[comm_triple, sstep] : schedule.getCommunicationSchedule()) { - commloads[sstep][std::get<1>(comm_triple)][std::get<2>(comm_triple)] += - schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(comm_triple)); + commloads[sstep][std::get<1>(comm_triple)][std::get<2>(comm_triple)] + += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(comm_triple)); } os << "BspSchedule: Number of Processors, Number of Supersteps" << std::endl; @@ -132,10 +129,11 @@ void write_sankey(std::ostream &os, const BspScheduleCS &schedule) { } } -template +template void write_sankey(const std::string &filename, const BspScheduleCS &schedule) { std::ofstream os(filename); write_sankey(os, schedule); } -}} // namespace osp::file_writer \ No newline at end of file +} // namespace file_writer +} // namespace osp diff --git a/include/osp/auxiliary/io/dot_graph_file_reader.hpp b/include/osp/auxiliary/io/dot_graph_file_reader.hpp index a6117dcf..2b4b7b58 100644 --- a/include/osp/auxiliary/io/dot_graph_file_reader.hpp +++ b/include/osp/auxiliary/io/dot_graph_file_reader.hpp @@ -18,17 +18,17 @@ limitations under the License. #pragma once +#include #include #include -#include -#include +#include #include +#include #include -#include -#include +#include -#include "osp/concepts/constructable_computational_dag_concept.hpp" #include "osp/auxiliary/io/filepath_checker.hpp" +#include "osp/concepts/constructable_computational_dag_concept.hpp" namespace osp { namespace file_reader { @@ -44,7 +44,9 @@ std::vector split(const std::string &s, char delimiter) { } std::string removeLeadingAndTrailingQuotes(const std::string &str) { - if (str.empty()) return str; + if (str.empty()) { + return str; + } std::size_t start = 0; std::size_t end = str.length(); @@ -60,12 +62,16 @@ std::string removeLeadingAndTrailingQuotes(const std::string &str) { return str.substr(start, end - start); } -template -void parseDotNode(const std::string& line, Graph_t& G) { +template +void parseDotNode(const std::string &line, Graph_t &G) { std::size_t pos = line.find('['); - if (pos == std::string::npos) return; + if (pos == std::string::npos) { + return; + } std::size_t end_pos = line.find(']'); - if (end_pos == std::string::npos) return; + if (end_pos == std::string::npos) { + return; + } std::string properties = line.substr(pos + 1, end_pos - pos - 1); std::vector keyValuePairs = split(properties, ';'); @@ -77,14 +83,18 @@ void parseDotNode(const std::string& line, Graph_t& G) { for (const std::string &keyValuePair : keyValuePairs) { std::vector keyValue = split(keyValuePair, '='); - if (keyValue.size() != 2) continue; + if (keyValue.size() != 2) { + continue; + } std::string key = keyValue[0]; // trim leading/trailing whitespace from key key.erase(0, key.find_first_not_of(" \t\n\r\f\v")); key.erase(key.find_last_not_of(" \t\n\r\f\v") + 1); - if (key.empty()) continue; + if (key.empty()) { + continue; + } std::string value = removeLeadingAndTrailingQuotes(keyValue[1]); @@ -110,12 +120,14 @@ void parseDotNode(const std::string& line, Graph_t& G) { } } -template -void parseDotEdge(const std::string& line, Graph_t& G) { +template +void parseDotEdge(const std::string &line, Graph_t &G) { using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; std::size_t arrow_pos = line.find("->"); - if (arrow_pos == std::string::npos) return; + if (arrow_pos == std::string::npos) { + return; + } std::string source_str = line.substr(0, arrow_pos); source_str.erase(source_str.find_last_not_of(" \t\n\r\f\v") + 1); @@ -144,14 +156,18 @@ void parseDotEdge(const std::string& line, Graph_t& G) { std::string properties = line.substr(bracket_pos + 1, end_bracket_pos - bracket_pos - 1); std::vector keyValuePairs = split(properties, ';'); - for (const auto& keyValuePair : keyValuePairs) { + for (const auto &keyValuePair : keyValuePairs) { std::vector keyValue = split(keyValuePair, '='); - if (keyValue.size() != 2) continue; + if (keyValue.size() != 2) { + continue; + } std::string key = keyValue[0]; key.erase(0, key.find_first_not_of(" \t\n\r\f\v")); key.erase(key.find_last_not_of(" \t\n\r\f\v") + 1); - if (key.empty()) continue; + if (key.empty()) { + continue; + } std::string value = removeLeadingAndTrailingQuotes(keyValue[1]); @@ -171,8 +187,8 @@ void parseDotEdge(const std::string& line, Graph_t& G) { } } -template -bool readComputationalDagDotFormat(std::ifstream& infile, Graph_t& graph) { +template +bool readComputationalDagDotFormat(std::ifstream &infile, Graph_t &graph) { std::string line; while (std::getline(infile, line)) { if (line.length() > MAX_LINE_LENGTH) { @@ -182,7 +198,9 @@ bool readComputationalDagDotFormat(std::ifstream& infile, Graph_t& graph) { line.erase(0, line.find_first_not_of(" \t\n\r\f\v")); - if (line.empty() || line.rfind("digraph", 0) == 0 || line.rfind("}", 0) == 0) continue; + if (line.empty() || line.rfind("digraph", 0) == 0 || line.rfind("}", 0) == 0) { + continue; + } if (line.find("->") != std::string::npos) { // This is an edge @@ -196,8 +214,8 @@ bool readComputationalDagDotFormat(std::ifstream& infile, Graph_t& graph) { return true; } -template -bool readComputationalDagDotFormat(const std::string& filename, Graph_t& graph) { +template +bool readComputationalDagDotFormat(const std::string &filename, Graph_t &graph) { if (std::filesystem::path(filename).extension() != ".dot") { std::cerr << "Error: Only .dot files are accepted.\n"; return false; @@ -217,4 +235,5 @@ bool readComputationalDagDotFormat(const std::string& filename, Graph_t& graph) return readComputationalDagDotFormat(infile, graph); } -}} // namespace osp::file_reader \ No newline at end of file +} // namespace file_reader +} // namespace osp diff --git a/include/osp/auxiliary/io/filepath_checker.hpp b/include/osp/auxiliary/io/filepath_checker.hpp index e4014a2b..f3ac03a4 100644 --- a/include/osp/auxiliary/io/filepath_checker.hpp +++ b/include/osp/auxiliary/io/filepath_checker.hpp @@ -18,30 +18,37 @@ limitations under the License. #pragma once +#include #include #include +#include #include #include #include -#include -#include namespace osp { namespace file_reader { - -constexpr std::size_t MAX_LINE_LENGTH = 1 << 14; // 16 KB + +constexpr std::size_t MAX_LINE_LENGTH = 1 << 14; // 16 KB // Path safety to avoid symlink, traversal or malicious file types -inline bool isPathSafe(const std::string& path) { +inline bool isPathSafe(const std::string &path) { try { std::filesystem::path resolved = std::filesystem::weakly_canonical(path); - if (std::filesystem::is_symlink(resolved)) return false; - if (!std::filesystem::is_regular_file(resolved)) return false; - if (resolved.string().find('\0') != std::string::npos) return false; + if (std::filesystem::is_symlink(resolved)) { + return false; + } + if (!std::filesystem::is_regular_file(resolved)) { + return false; + } + if (resolved.string().find('\0') != std::string::npos) { + return false; + } return true; } catch (...) { return false; } } -}} // namespace osp::file_reader \ No newline at end of file +} // namespace file_reader +} // namespace osp diff --git a/include/osp/auxiliary/io/general_file_reader.hpp b/include/osp/auxiliary/io/general_file_reader.hpp index 8ed5ad4e..e05e5277 100644 --- a/include/osp/auxiliary/io/general_file_reader.hpp +++ b/include/osp/auxiliary/io/general_file_reader.hpp @@ -26,8 +26,8 @@ limitations under the License. namespace osp { namespace file_reader { -template -bool readGraph(const std::string& filename, Graph_t& graph) { +template +bool readGraph(const std::string &filename, Graph_t &graph) { if (!isPathSafe(filename)) { std::cerr << "Error: Unsafe file path (possible traversal or invalid type).\n"; return false; @@ -50,12 +50,12 @@ bool readGraph(const std::string& filename, Graph_t& graph) { } else if (file_ending == "dot") { status = file_reader::readComputationalDagDotFormat(infile, graph); } else { - std::cout << "Unknown file ending: ." << file_ending - << " ...assuming hyperDag format." << std::endl; + std::cout << "Unknown file ending: ." << file_ending << " ...assuming hyperDag format." << std::endl; status = file_reader::readComputationalDagHyperdagFormatDB(infile, graph); } return status; } -}} // namespace osp::file_reader \ No newline at end of file +} // namespace file_reader +} // namespace osp diff --git a/include/osp/auxiliary/io/hdag_graph_file_reader.hpp b/include/osp/auxiliary/io/hdag_graph_file_reader.hpp index a91481a7..b96c86ea 100644 --- a/include/osp/auxiliary/io/hdag_graph_file_reader.hpp +++ b/include/osp/auxiliary/io/hdag_graph_file_reader.hpp @@ -18,24 +18,24 @@ limitations under the License. #pragma once +#include #include #include +#include #include #include #include -#include -#include -#include "osp/concepts/computational_dag_concept.hpp" -#include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/auxiliary/io/filepath_checker.hpp" +#include "osp/concepts/computational_dag_concept.hpp" #include "osp/concepts/constructable_computational_dag_concept.hpp" +#include "osp/graph_algorithms/directed_graph_util.hpp" namespace osp { namespace file_reader { -template -bool readComputationalDagHyperdagFormat(std::ifstream& infile, Graph_t& graph) { +template +bool readComputationalDagHyperdagFormat(std::ifstream &infile, Graph_t &graph) { std::string line; // Skip comment lines starting with '%' @@ -84,8 +84,7 @@ bool readComputationalDagHyperdagFormat(std::ifstream& infile, Graph_t& graph) { if (edgeSource[edgeIdx] == -1) { edgeSource[edgeIdx] = node; } else { - graph.add_edge(static_cast>(edgeSource[edgeIdx]), - static_cast>(node)); + graph.add_edge(static_cast>(edgeSource[edgeIdx]), static_cast>(node)); } } @@ -120,7 +119,7 @@ bool readComputationalDagHyperdagFormat(std::ifstream& infile, Graph_t& graph) { } } */ - + if (!is_acyclic(graph)) { std::cerr << "Error: DAG is not acyclic.\n"; return false; @@ -129,8 +128,8 @@ bool readComputationalDagHyperdagFormat(std::ifstream& infile, Graph_t& graph) { return true; } -template -bool readComputationalDagHyperdagFormat(const std::string& filename, Graph_t& graph) { +template +bool readComputationalDagHyperdagFormat(const std::string &filename, Graph_t &graph) { if (!isPathSafe(filename)) { std::cerr << "Error: Unsafe file path (possible traversal or invalid type).\n"; return false; @@ -145,9 +144,8 @@ bool readComputationalDagHyperdagFormat(const std::string& filename, Graph_t& gr return readComputationalDagHyperdagFormat(infile, graph); } - -template -bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph) { +template +bool readComputationalDagHyperdagFormatDB(std::ifstream &infile, Graph_t &graph) { std::string line; // Skip comment lines @@ -182,7 +180,7 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph) std::cerr << "Warning: Could not read hyperedge ID for hyperedge " << i << ".\n"; continue; } - edgeStream >> comm_weight >> mem_weight; // optional + edgeStream >> comm_weight >> mem_weight; // optional if (hEdge < 0 || hEdge >= hEdges) { std::cerr << "Error: Hyperedge ID " << hEdge << " is out of range (0 to " << hEdges - 1 << ").\n"; @@ -219,7 +217,7 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph) if constexpr (has_typed_vertices_v) { graph.set_vertex_type(static_cast>(node), static_cast>(type)); - } + } } // Resize(N); @@ -254,16 +252,14 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph) graph.set_vertex_mem_weight(static_cast>(node), hyperedge_mem_weights[edgeIdx]); } else { if constexpr (is_modifiable_cdag_comm_edge_v) { - auto edge = graph.add_edge(static_cast>(edgeSource[edgeIdx]), - static_cast>(nodeIdx)); + static_cast>(nodeIdx)); - graph.set_edge_comm_weight(edge.first, - static_cast>(hyperedge_comm_weights[edgeIdx])); + graph.set_edge_comm_weight(edge.first, static_cast>(hyperedge_comm_weights[edgeIdx])); } else { graph.add_edge(static_cast>(edgeSource[edgeIdx]), - static_cast>(nodeIdx)); + static_cast>(nodeIdx)); } } } @@ -276,8 +272,8 @@ bool readComputationalDagHyperdagFormatDB(std::ifstream& infile, Graph_t& graph) return true; } -template -bool readComputationalDagHyperdagFormatDB(const std::string& filename, Graph_t& graph) { +template +bool readComputationalDagHyperdagFormatDB(const std::string &filename, Graph_t &graph) { // Optional: limit file extension for safety if (std::filesystem::path(filename).extension() != ".hdag") { std::cerr << "Error: Only .hdag files are accepted.\n"; @@ -298,4 +294,5 @@ bool readComputationalDagHyperdagFormatDB(const std::string& filename, Graph_t& return readComputationalDagHyperdagFormatDB(infile, graph); } -}} // namespace osp::file_reader \ No newline at end of file +} // namespace file_reader +} // namespace osp diff --git a/include/osp/auxiliary/io/hdag_graph_file_writer.hpp b/include/osp/auxiliary/io/hdag_graph_file_writer.hpp index e065801c..be0638ac 100644 --- a/include/osp/auxiliary/io/hdag_graph_file_writer.hpp +++ b/include/osp/auxiliary/io/hdag_graph_file_writer.hpp @@ -60,14 +60,18 @@ void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &grap os << num_hyperedges << " " << num_vertices << " " << num_pins << "\n"; // Hyperedges - if (write_comment_lines) os << "%% Hyperedges: ID comm_weight mem_weight\n"; + if (write_comment_lines) { + os << "%% Hyperedges: ID comm_weight mem_weight\n"; + } for (unsigned i = 0; i < num_hyperedges; ++i) { const auto u = hyperedge_idx_to_node[i]; os << i << " " << graph.vertex_comm_weight(u) << " " << graph.vertex_mem_weight(u) << "\n"; } // Vertices - if (write_comment_lines) os << "%% Vertices: ID work_weight type\n"; + if (write_comment_lines) { + os << "%% Vertices: ID work_weight type\n"; + } for (const auto &u : graph.vertices()) { os << u << " " << graph.vertex_work_weight(u); if constexpr (has_typed_vertices_v) { @@ -79,12 +83,14 @@ void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &grap } // Pins - if (write_comment_lines) os << "%% Pins: HyperedgeID NodeID\n"; + if (write_comment_lines) { + os << "%% Pins: HyperedgeID NodeID\n"; + } for (unsigned i = 0; i < num_hyperedges; ++i) { const auto u = hyperedge_idx_to_node[i]; - os << i << " " << u << "\n"; // Source pin + os << i << " " << u << "\n"; // Source pin for (const auto &v : graph.children(u)) { - os << i << " " << v << "\n"; // Target pins + os << i << " " << v << "\n"; // Target pins } } } @@ -98,7 +104,9 @@ void writeComputationalDagHyperdagFormatDB(std::ostream &os, const Graph_t &grap * @return true if writing was successful, false otherwise. */ template -bool writeComputationalDagHyperdagFormatDB(const std::string &filename, const Graph_t &graph, const bool write_comment_lines = false) { +bool writeComputationalDagHyperdagFormatDB(const std::string &filename, + const Graph_t &graph, + const bool write_comment_lines = false) { std::ofstream os(filename); if (!os.is_open()) { std::cerr << "Error: Failed to open file for writing: " << filename << "\n"; @@ -108,5 +116,5 @@ bool writeComputationalDagHyperdagFormatDB(const std::string &filename, const Gr return true; } -} // namespace file_writer -} // namespace osp \ No newline at end of file +} // namespace file_writer +} // namespace osp diff --git a/include/osp/auxiliary/io/mtx_graph_file_reader.hpp b/include/osp/auxiliary/io/mtx_graph_file_reader.hpp index 45382094..5a98721e 100644 --- a/include/osp/auxiliary/io/mtx_graph_file_reader.hpp +++ b/include/osp/auxiliary/io/mtx_graph_file_reader.hpp @@ -18,29 +18,31 @@ limitations under the License. #pragma once +#include #include #include +#include #include #include #include -#include -#include -#include "osp/concepts/computational_dag_concept.hpp" #include "osp/auxiliary/io/filepath_checker.hpp" +#include "osp/concepts/computational_dag_concept.hpp" namespace osp { namespace file_reader { -template -bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& graph) { +template +bool readComputationalDagMartixMarketFormat(std::ifstream &infile, Graph_t &graph) { using vertex_t = vertex_idx_t; std::string line; // Skip comments or empty lines (robustly) while (std::getline(infile, line)) { - if (line.empty() || line[0] == '%') continue; + if (line.empty() || line[0] == '%') { + continue; + } // Null byte check if (line.find('\0') != std::string::npos) { @@ -52,7 +54,7 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap std::cerr << "Error: Line too long, possible malformed or malicious file.\n"; return false; } - break; // We found the actual header line + break; // We found the actual header line } if (infile.eof()) { @@ -63,8 +65,7 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap int M_row = 0, M_col = 0, nEntries = 0; std::istringstream header_stream(line); - if (!(header_stream >> M_row >> M_col >> nEntries) || - M_row <= 0 || M_col <= 0 || M_row != M_col) { + if (!(header_stream >> M_row >> M_col >> nEntries) || M_row <= 0 || M_col <= 0 || M_row != M_col) { std::cerr << "Error: Invalid header or non-square matrix.\n"; return false; } @@ -84,7 +85,9 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap int entries_read = 0; while (entries_read < nEntries && std::getline(infile, line)) { - if (line.empty() || line[0] == '%') continue; + if (line.empty() || line[0] == '%') { + continue; + } if (line.size() > MAX_LINE_LENGTH) { std::cerr << "Error: Line too long.\n"; return false; @@ -99,7 +102,8 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap return false; } - row -= 1; col -= 1; // Convert to 0-based + row -= 1; + col -= 1; // Convert to 0-based if (row < 0 || col < 0 || row >= M_row || col >= M_col) { std::cerr << "Error: Matrix entry out of bounds.\n"; @@ -145,8 +149,8 @@ bool readComputationalDagMartixMarketFormat(std::ifstream& infile, Graph_t& grap return true; } -template -bool readComputationalDagMartixMarketFormat(const std::string& filename, Graph_t& graph) { +template +bool readComputationalDagMartixMarketFormat(const std::string &filename, Graph_t &graph) { // Ensure the file is .mtx format if (std::filesystem::path(filename).extension() != ".mtx") { std::cerr << "Error: Only .mtx files are accepted.\n"; @@ -177,13 +181,10 @@ bool readComputationalDagMartixMarketFormat(const std::string& filename, Graph_t return readComputationalDagMartixMarketFormat(infile, graph); } - // bool readProblem(const std::string &filename, DAG &G, BSPproblem ¶ms, bool NoNUMA = true); // std::pair readBspInstance(const std::string &filename); - - // std::pair // readComputationalDagMartixMarketFormat(const std::string &filename, // std::unordered_map, double, pair_hash> &mtx); @@ -206,7 +207,6 @@ bool readComputationalDagMartixMarketFormat(const std::string& filename, Graph_t // std::pair readBspArchitecture(std::ifstream &infile); - // std::pair readBspSchdeuleTxtFormat(const BspInstance &instance, const std::string &filename); // std::pair readBspSchdeuleTxtFormat(const BspInstance &instance, std::ifstream &infile); @@ -243,6 +243,6 @@ bool readComputationalDagMartixMarketFormat(const std::string& filename, Graph_t // */ // std::pair extractBspScheduleRecomp(std::ifstream &infile, const BspInstance &instance); -} // namespace FileReader +} // namespace file_reader -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp b/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp index 7d0bbe8f..d7f64c9b 100644 --- a/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp +++ b/include/osp/auxiliary/io/mtx_hypergraph_file_reader.hpp @@ -18,29 +18,30 @@ limitations under the License. #pragma once +#include #include #include +#include #include #include #include -#include -#include -#include "osp/partitioning/model/hypergraph.hpp" #include "osp/auxiliary/io/filepath_checker.hpp" +#include "osp/partitioning/model/hypergraph.hpp" namespace osp { namespace file_reader { // reads a matrix into Hypergraph format, where nonzeros are vertices, and rows/columns are hyperedges -template -bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph& hgraph) { - +template +bool readHypergraphMartixMarketFormat(std::ifstream &infile, Hypergraph &hgraph) { std::string line; // Skip comments or empty lines (robustly) while (std::getline(infile, line)) { - if (line.empty() || line[0] == '%') continue; + if (line.empty() || line[0] == '%') { + continue; + } // Null byte check if (line.find('\0') != std::string::npos) { @@ -52,7 +53,7 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph> M_row >> M_col >> nEntries) || - M_row <= 0 || M_col <= 0) { + if (!(header_stream >> M_row >> M_col >> nEntries) || M_row <= 0 || M_col <= 0) { std::cerr << "Error: Invalid header.\n"; return false; } @@ -82,7 +82,9 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph MAX_LINE_LENGTH) { std::cerr << "Error: Line too long.\n"; return false; @@ -97,7 +99,8 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph= M_row || col >= M_col) { std::cerr << "Error: Matrix entry out of bounds.\n"; @@ -127,19 +130,24 @@ bool readHypergraphMartixMarketFormat(std::ifstream& infile, Hypergraph(M_row); ++row) - if(!row_hyperedges[row].empty()) + for (index_type row = 0; row < static_cast(M_row); ++row) { + if (!row_hyperedges[row].empty()) { hgraph.add_hyperedge(row_hyperedges[row]); + } + } - for(index_type col = 0; col < static_cast(M_col); ++col) - if(!column_hyperedges[col].empty()) + for (index_type col = 0; col < static_cast(M_col); ++col) { + if (!column_hyperedges[col].empty()) { hgraph.add_hyperedge(column_hyperedges[col]); + } + } return true; } -template -bool readHypergraphMartixMarketFormat(const std::string& filename, Hypergraph& hgraph) { +template +bool readHypergraphMartixMarketFormat(const std::string &filename, + Hypergraph &hgraph) { // Ensure the file is .mtx format if (std::filesystem::path(filename).extension() != ".mtx") { std::cerr << "Error: Only .mtx files are accepted.\n"; @@ -170,6 +178,6 @@ bool readHypergraphMartixMarketFormat(const std::string& filename, Hypergraph #include -namespace osp { namespace file_writer { +#include "osp/partitioning/model/partitioning.hpp" +#include "osp/partitioning/model/partitioning_replication.hpp" + +namespace osp { +namespace file_writer { -template +template void write_txt(std::ostream &os, const Partitioning &partition) { - using index_type = typename hypergraph_t::vertex_idx; os << "%% Partitioning for " << partition.getInstance().getNumberOfPartitions() << " parts." << std::endl; - for(index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node) + for (index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node) { os << node << " " << partition.assignedPartition(node) << std::endl; + } } -template +template void write_txt(const std::string &filename, const Partitioning &partition) { std::ofstream os(filename); write_txt(os, partition); } -template +template void write_txt(std::ostream &os, const PartitioningWithReplication &partition) { - using index_type = typename hypergraph_t::vertex_idx; os << "%% Partitioning for " << partition.getInstance().getNumberOfPartitions() << " parts with replication." << std::endl; - for(index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node) - { + for (index_type node = 0; node < partition.getInstance().getHypergraph().num_vertices(); ++node) { os << node; - for(unsigned part : partition.assignedPartitions(node)) + for (unsigned part : partition.assignedPartitions(node)) { os << " " << part; + } os << std::endl; } } -template +template void write_txt(const std::string &filename, const PartitioningWithReplication &partition) { std::ofstream os(filename); write_txt(os, partition); } -}} // namespace osp::file_writer \ No newline at end of file +} // namespace file_writer +} // namespace osp diff --git a/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp b/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp index 217c591a..e3849668 100644 --- a/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp +++ b/include/osp/auxiliary/io/pebbling_schedule_file_writer.hpp @@ -18,58 +18,57 @@ limitations under the License. #pragma once -#include "osp/pebbling/PebblingSchedule.hpp" #include #include -namespace osp { namespace file_writer { +#include "osp/pebbling/PebblingSchedule.hpp" + +namespace osp { +namespace file_writer { -template +template void write_txt(std::ostream &os, const PebblingSchedule &schedule) { - using vertex_idx = vertex_idx_t; os << "%% PebblingSchedule for " << schedule.getInstance().numberOfProcessors() << " processors and " << schedule.numberOfSupersteps() << " supersteps." << std::endl; - for(unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) - { - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { - const auto& computeSteps = schedule.GetComputeStepsForProcSuperstep(proc, step); - for(const auto& computeStep : computeSteps) - { + for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { + const auto &computeSteps = schedule.GetComputeStepsForProcSuperstep(proc, step); + for (const auto &computeStep : computeSteps) { os << "Compute " << computeStep.node << " on proc " << proc << " in superstep " << step << std::endl; - for(vertex_idx to_evict : computeStep.nodes_evicted_after) + for (vertex_idx to_evict : computeStep.nodes_evicted_after) { os << "Evict " << to_evict << " from proc " << proc << " in superstep " << step << std::endl; + } } } - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { - const std::vector& nodesSentUp = schedule.GetNodesSentUp(proc, step); - for(vertex_idx node : nodesSentUp) + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { + const std::vector &nodesSentUp = schedule.GetNodesSentUp(proc, step); + for (vertex_idx node : nodesSentUp) { os << "Send up " << node << " from proc " << proc << " in superstep " << step << std::endl; + } } - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { - const std::vector& nodesEvictedInComm = schedule.GetNodesEvictedInComm(proc, step); - for(vertex_idx node : nodesEvictedInComm) + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { + const std::vector &nodesEvictedInComm = schedule.GetNodesEvictedInComm(proc, step); + for (vertex_idx node : nodesEvictedInComm) { os << "Evict " << node << " from proc " << proc << " in superstep " << step << std::endl; + } } - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { - const std::vector& nodesSentDown = schedule.GetNodesSentDown(proc, step); - for(vertex_idx node : nodesSentDown) + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { + const std::vector &nodesSentDown = schedule.GetNodesSentDown(proc, step); + for (vertex_idx node : nodesSentDown) { os << "Send down " << node << " to proc " << proc << " in superstep " << step << std::endl; + } } } - } -template +template void write_txt(const std::string &filename, const PebblingSchedule &schedule) { std::ofstream os(filename); write_txt(os, schedule); } -}} // namespace osp::file_writer \ No newline at end of file +} // namespace file_writer +} // namespace osp diff --git a/include/osp/auxiliary/math/divisors.hpp b/include/osp/auxiliary/math/divisors.hpp index 235f0129..fe268506 100644 --- a/include/osp/auxiliary/math/divisors.hpp +++ b/include/osp/auxiliary/math/divisors.hpp @@ -25,7 +25,7 @@ limitations under the License. namespace osp { -template +template integral_type intSqrtFloor(integral_type num) { static_assert(std::is_integral_v); assert(num > 0); @@ -48,8 +48,7 @@ integral_type intSqrtFloor(integral_type num) { return sqrt; } - -template +template std::vector divisorsList(integral_type num) { static_assert(std::is_integral_v); assert(num > 0); @@ -62,11 +61,13 @@ std::vector divisorsList(integral_type num) { divs.emplace_back(div); } } - for (std::size_t indx = divs.back() * divs.back() == num ? divs.size() - 2U : divs.size() - 1U; indx != std::numeric_limits::max(); --indx) { + for (std::size_t indx = divs.back() * divs.back() == num ? divs.size() - 2U : divs.size() - 1U; + indx != std::numeric_limits::max(); + --indx) { divs.emplace_back(num / divs[indx]); } return divs; } -} // end namespace osp +} // end namespace osp diff --git a/include/osp/auxiliary/math/math_helper.hpp b/include/osp/auxiliary/math/math_helper.hpp index 93ca721c..b5227d43 100644 --- a/include/osp/auxiliary/math/math_helper.hpp +++ b/include/osp/auxiliary/math/math_helper.hpp @@ -21,15 +21,15 @@ limitations under the License. namespace osp { -template +template float_type log_sum_exp(float_type lhs, float_type rhs) { static_assert(std::is_floating_point_v); const float_type max = std::max(lhs, rhs); - + float_type result = max; - result += std::log2( std::exp2(lhs - max) + std::exp2(rhs - max) ); + result += std::log2(std::exp2(lhs - max) + std::exp2(rhs - max)); return result; } -} // end namespace osp +} // end namespace osp diff --git a/include/osp/auxiliary/misc.hpp b/include/osp/auxiliary/misc.hpp index 56a95955..1b269f27 100644 --- a/include/osp/auxiliary/misc.hpp +++ b/include/osp/auxiliary/misc.hpp @@ -23,10 +23,10 @@ limitations under the License. #include #include #include +#include #include #include #include -#include #include #include #include @@ -37,32 +37,32 @@ namespace osp { // unbiased random int generator inline int randInt(int lim) { int rnd = std::rand(); - while (rnd >= RAND_MAX - RAND_MAX % lim) + while (rnd >= RAND_MAX - RAND_MAX % lim) { rnd = std::rand(); + } return rnd % lim; } // pair of integers -template +template struct Pair { int a, b; explicit Pair(const T1 a_ = T1(), const T2 b_ = T2()) : a(a_), b(b_) {} - template + template bool operator<(const Pair &other) const { return (a < other.a || (a == other.a && b < other.b)); } - std::ostream &operator<<(std::ostream &os) const { - return os << ("(" + std::to_string(a) + ", " + std::to_string(b) + ")"); - } + std::ostream &operator<<(std::ostream &os) const { return os << ("(" + std::to_string(a) + ", " + std::to_string(b) + ")"); } }; + using intPair = Pair; // triple of integers -template +template struct Triple { T1 a; T2 b; @@ -74,20 +74,22 @@ struct Triple { return os << "(" << std::to_string(a) << ", " << std::to_string(b) << ", " << std::to_string(c) << ")"; } }; + using intTriple = Triple; inline bool isDisjoint(std::vector &intervals) { - sort(intervals.begin(), intervals.end()); - for (size_t i = 0; i + 1 < intervals.size(); ++i) - if (intervals[i].b > intervals[i + 1].a) + for (size_t i = 0; i + 1 < intervals.size(); ++i) { + if (intervals[i].b > intervals[i + 1].a) { return false; + } + } return true; } // computes power of an integer -template +template constexpr T intpow(T base, unsigned exp) { static_assert(std::is_integral::value); @@ -118,26 +120,25 @@ struct contractionEdge { } }; - - // List of initializaton methods available -static const std::vector possibleModes{"random", "SJF", "cilk", "BSPg", "ETF", - "BL-EST", "ETF-NUMA", "BL-EST-NUMA", "Layers"}; +static const std::vector possibleModes{ + "random", "SJF", "cilk", "BSPg", "ETF", "BL-EST", "ETF-NUMA", "BL-EST-NUMA", "Layers"}; // modify problem filename by adding substring at the right place inline std::string editFilename(const std::string &filename, const std::string &toInsert) { auto pos = filename.find("_coarse"); - if (pos == std::string::npos) + if (pos == std::string::npos) { pos = filename.find("_instance"); - if (pos == std::string::npos) + } + if (pos == std::string::npos) { return toInsert + filename; + } return filename.substr(0, pos) + toInsert + filename.substr(pos, filename.length() - pos); } - // unordered set intersection -template +template std::unordered_set get_intersection(const std::unordered_set &a, const std::unordered_set &b) { std::vector result; const auto &larger = a.size() > b.size() ? a : b; @@ -151,7 +152,7 @@ std::unordered_set get_intersection(const std::unordered_set &a, const std } // unordered set union -template +template std::unordered_set get_union(const std::unordered_set &a, const std::unordered_set &b) { std::unordered_set larger = a.size() > b.size() ? a : b; std::unordered_set smaller = a.size() <= b.size() ? a : b; @@ -162,7 +163,7 @@ std::unordered_set get_union(const std::unordered_set &a, const std::unord } // zip two vectors of equal length -template +template std::vector> zip(const std::vector &a, const std::vector &b) { assert(a.size() == b.size()); @@ -175,7 +176,7 @@ std::vector> zip(const std::vector &a, const std::vector & return result; } -template +template void unzip(std::vector> &zipped, std::vector &a, std::vector &b) { a.resize(zipped.size()); b.resize(zipped.size()); @@ -186,7 +187,7 @@ void unzip(std::vector> &zipped, std::vector &a, std::vector< } } -template +template std::vector sort_and_sorting_arrangement(std::vector &a) { std::vector rearrangement; rearrangement.resize(a.size()); @@ -200,7 +201,7 @@ std::vector sort_and_sorting_arrangement(std::vector &a) { return rearrangement; } -template +template std::vector sorting_arrangement(const std::vector &a, bool increasing = true) { std::vector rearrangement; rearrangement.resize(a.size()); @@ -235,7 +236,7 @@ inline bool check_vector_is_rearrangement_of_0_to_N(const std::vector &a } // sorts a vector like the arrangement -template +template void sort_like_arrangement(std::vector &a, const std::vector &arrangement) { assert(a.size() == arrangement.size()); assert(check_vector_is_rearrangement_of_0_to_N(arrangement)); @@ -254,13 +255,13 @@ void sort_like_arrangement(std::vector &a, const std::vector &arrange prev_j = j; j = arrangement[j]; } - a[prev_j] = i_val; // j == i + a[prev_j] = i_val; // j == i moved[prev_j] = true; } } // sorts vector according to values in second vector w/o changing second vector -template +template void sort_like(std::vector &a, const std::vector &b) { assert(a.size() == b.size()); @@ -276,7 +277,7 @@ void sort_like(std::vector &a, const std::vector &b) { * @param ordered_set * @return T KeyType of SetType */ -template +template T Get_Median(SetType ordered_set) { assert(ordered_set.size() != 0); typename SetType::iterator it = ordered_set.begin(); @@ -299,7 +300,7 @@ T Get_Median(SetType ordered_set) { * @param ordered_set * @return T KeyType of SetType */ -template +template T Get_Lower_Median(SetType ordered_set) { assert(ordered_set.size() != 0); typename SetType::iterator it = ordered_set.begin(); @@ -316,7 +317,7 @@ T Get_Lower_Median(SetType ordered_set) { * @param ordered_set * @return T KeyType of SetType */ -template +template T Get_upper_third_percentile(SetType ordered_set) { assert(ordered_set.size() != 0); typename SetType::iterator it = ordered_set.begin(); @@ -333,7 +334,7 @@ T Get_upper_third_percentile(SetType ordered_set) { * @param ordered_set * @return T KeyType of SetType */ -template +template T Get_lower_third_percentile(SetType ordered_set) { assert(ordered_set.size() != 0); typename SetType::iterator it = ordered_set.begin(); @@ -342,4 +343,4 @@ T Get_lower_third_percentile(SetType ordered_set) { return *it; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/permute.hpp b/include/osp/auxiliary/permute.hpp index 5676a4f2..31ae7857 100644 --- a/include/osp/auxiliary/permute.hpp +++ b/include/osp/auxiliary/permute.hpp @@ -18,29 +18,36 @@ limitations under the License. #pragma once +#include #include #include #include namespace osp { -template +template void permute_inplace(std::vector &vec, std::vector &perm) { static_assert(std::is_integral_v); static_assert(std::is_unsigned_v); assert(vec.size() == perm.size()); - assert([&]() -> bool{ + assert([&]() -> bool { std::vector found(perm.size(), false); for (const Ind &val : perm) { - if (val < 0) return false; - if (val >= perm.size()) return false; - if (found[val]) return false; + if (val < 0) { + return false; + } + if (val >= perm.size()) { + return false; + } + if (found[val]) { + return false; + } found[val] = true; } return true; }()); - assert(reinterpret_cast(&vec) != reinterpret_cast(&perm)); + assert(reinterpret_cast(&vec) != reinterpret_cast(&perm)); for (Ind i = 0; i < perm.size(); ++i) { while (perm[i] != i) { @@ -50,26 +57,31 @@ void permute_inplace(std::vector &vec, std::vector &perm) { } } -template +template void inverse_permute_inplace(std::vector &vec, std::vector &perm) { static_assert(std::is_integral_v); static_assert(std::is_unsigned_v); assert(vec.size() == perm.size()); - assert([&]() -> bool{ + assert([&]() -> bool { std::vector found(perm.size(), false); for (const Ind &val : perm) { - if (val < 0) return false; - if (val >= perm.size()) return false; - if (found[val]) return false; + if (val < 0) { + return false; + } + if (val >= perm.size()) { + return false; + } + if (found[val]) { + return false; + } found[val] = true; } return true; }()); - assert(reinterpret_cast(&vec) != reinterpret_cast(&perm)); + assert(reinterpret_cast(&vec) != reinterpret_cast(&perm)); for (Ind i = 0; i < perm.size(); ++i) { - Ind j = i; while (i != perm[i]) { std::swap(vec[j], vec[perm[i]]); @@ -79,4 +91,4 @@ void inverse_permute_inplace(std::vector &vec, std::vector &perm) { } } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp b/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp index 36291b1a..692415c3 100644 --- a/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp +++ b/include/osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp @@ -18,10 +18,10 @@ limitations under the License. #pragma once +#include #include -#include #include -#include +#include #include "osp/concepts/constructable_computational_dag_concept.hpp" @@ -34,9 +34,8 @@ namespace osp { * @param chance chance/num_vertices is the probability of edge inclusion * @return DAG */ -template -void erdos_renyi_graph_gen(Graph_t& dag_out, vertex_idx_t num_vertices, double chance) { - +template +void erdos_renyi_graph_gen(Graph_t &dag_out, vertex_idx_t num_vertices, double chance) { static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG type"); dag_out = Graph_t(num_vertices); @@ -45,10 +44,8 @@ void erdos_renyi_graph_gen(Graph_t& dag_out, vertex_idx_t num_vertices, std::mt19937 gen(rd()); for (const auto &v : dag_out.vertices()) { - const auto one = static_cast>(1); - std::binomial_distribution> bino_dist(num_vertices - one - v, - chance / double(num_vertices)); + std::binomial_distribution> bino_dist(num_vertices - one - v, chance / double(num_vertices)); auto out_edges_num = bino_dist(gen); std::unordered_set> out_edges; @@ -56,8 +53,9 @@ void erdos_renyi_graph_gen(Graph_t& dag_out, vertex_idx_t num_vertices, std::uniform_int_distribution> dist(0, num_vertices - one - v); vertex_idx_t edge = v + one + dist(gen); - if (out_edges.find(edge) != out_edges.cend()) + if (out_edges.find(edge) != out_edges.cend()) { continue; + } out_edges.emplace(edge); } @@ -68,4 +66,4 @@ void erdos_renyi_graph_gen(Graph_t& dag_out, vertex_idx_t num_vertices, } } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp b/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp index dabb04fe..67728ad0 100644 --- a/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp +++ b/include/osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp @@ -34,9 +34,8 @@ namespace osp { * @param prob probability of an edge immediately off the diagonal to be included * @return DAG */ -template +template void near_diag_random_graph(Graph_t &dag_out, vertex_idx_t num_vertices, double bandwidth, double prob) { - static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG type"); dag_out = Graph_t(num_vertices); @@ -45,8 +44,8 @@ void near_diag_random_graph(Graph_t &dag_out, vertex_idx_t num_vertices std::mt19937 gen(rd()); for (vertex_idx_t v = 1; v < num_vertices; ++v) { - - std::binomial_distribution> bino_dist(vertex_idx_t(num_vertices - v), prob * std::exp( 1.0 - static_cast(v) / bandwidth) ); + std::binomial_distribution> bino_dist(vertex_idx_t(num_vertices - v), + prob * std::exp(1.0 - static_cast(v) / bandwidth)); vertex_idx_t off_diag_edges_num = bino_dist(gen); std::vector> range(num_vertices - v, 0); @@ -61,4 +60,4 @@ void near_diag_random_graph(Graph_t &dag_out, vertex_idx_t num_vertices } } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/return_status.hpp b/include/osp/auxiliary/return_status.hpp index e5f0b870..3aa23889 100644 --- a/include/osp/auxiliary/return_status.hpp +++ b/include/osp/auxiliary/return_status.hpp @@ -22,10 +22,7 @@ limitations under the License. namespace osp { -enum class RETURN_STATUS { OSP_SUCCESS, - BEST_FOUND, - TIMEOUT, - ERROR }; +enum class RETURN_STATUS { OSP_SUCCESS, BEST_FOUND, TIMEOUT, ERROR }; /** * @brief Converts the enum to a string literal. @@ -33,24 +30,22 @@ enum class RETURN_STATUS { OSP_SUCCESS, */ inline const char *to_string(const RETURN_STATUS status) { switch (status) { - case RETURN_STATUS::OSP_SUCCESS: - return "SUCCESS"; - case RETURN_STATUS::BEST_FOUND: - return "BEST FOUND"; - case RETURN_STATUS::TIMEOUT: - return "TIMEOUT"; - case RETURN_STATUS::ERROR: - return "ERROR"; - default: - return "UNKNOWN"; + case RETURN_STATUS::OSP_SUCCESS: + return "SUCCESS"; + case RETURN_STATUS::BEST_FOUND: + return "BEST FOUND"; + case RETURN_STATUS::TIMEOUT: + return "TIMEOUT"; + case RETURN_STATUS::ERROR: + return "ERROR"; + default: + return "UNKNOWN"; } } /** * @brief Stream operator overload using the helper function. */ -inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) { - return os << to_string(status); -} +inline std::ostream &operator<<(std::ostream &os, RETURN_STATUS status) { return os << to_string(status); } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp b/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp index 556d4b64..2594267f 100644 --- a/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp +++ b/include/osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp @@ -13,16 +13,15 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Christos Matzoros, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once - -#include #include -#include #include +#include +#include #include "osp/bsp/model/BspSchedule.hpp" @@ -32,46 +31,47 @@ enum SCHEDULE_NODE_PERMUTATION_MODES { LOOP_PROCESSORS, SNAKE_PROCESSORS, PROCES /** * @brief Computes a permutation to improve locality of a schedule, looping through processors - * + * * @param sched BSP Schedule * @param mode ordering of processors * @return std::vector vec[prev_node_name] = new_node_name(location) */ -template -std::vector schedule_node_permuter_basic(const BspSchedule& sched, const SCHEDULE_NODE_PERMUTATION_MODES mode = LOOP_PROCESSORS) { -// superstep, processor, nodes - std::vector>> allocation(sched.numberOfSupersteps(), - std::vector>(sched.getInstance().numberOfProcessors(), - std::vector({}))); +template +std::vector schedule_node_permuter_basic(const BspSchedule &sched, + const SCHEDULE_NODE_PERMUTATION_MODES mode = LOOP_PROCESSORS) { + // superstep, processor, nodes + std::vector>> allocation( + sched.numberOfSupersteps(), + std::vector>(sched.getInstance().numberOfProcessors(), std::vector({}))); for (size_t node = 0; node < sched.getInstance().numberOfVertices(); node++) { - allocation[ sched.assignedSuperstep(node) ][ sched.assignedProcessor(node) ].emplace_back(node); + allocation[sched.assignedSuperstep(node)][sched.assignedProcessor(node)].emplace_back(node); } // reordering and allocating into permutation std::vector permutation(sched.getInstance().numberOfVertices()); - if(mode == LOOP_PROCESSORS || mode == SNAKE_PROCESSORS) { + if (mode == LOOP_PROCESSORS || mode == SNAKE_PROCESSORS) { bool forward = true; size_t counter = 0; for (auto step_it = allocation.begin(); step_it != allocation.cend(); step_it++) { if (forward) { for (auto proc_it = step_it->begin(); proc_it != step_it->cend(); proc_it++) { - //topological_sort_for_data_locality_interior_basic(*proc_it, sched); - for (const auto& node : *proc_it) { + // topological_sort_for_data_locality_interior_basic(*proc_it, sched); + for (const auto &node : *proc_it) { permutation[node] = counter; counter++; } } } else { for (auto proc_it = step_it->rbegin(); proc_it != step_it->crend(); proc_it++) { - //topological_sort_for_data_locality_interior_basic(*proc_it, sched); - for (const auto& node : *proc_it) { + // topological_sort_for_data_locality_interior_basic(*proc_it, sched); + for (const auto &node : *proc_it) { permutation[node] = counter; counter++; } } } - + if (mode == SNAKE_PROCESSORS) { forward = !forward; } @@ -83,4 +83,4 @@ std::vector schedule_node_permuter_basic(const BspSchedule& sch return permutation; } -} +} // namespace osp diff --git a/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp b/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp index d7f7e77f..86f4e0f3 100644 --- a/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp +++ b/include/osp/auxiliary/sptrsv_simulator/sptrsv.hpp @@ -20,23 +20,24 @@ limitations under the License. #ifdef EIGEN_FOUND -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "osp/bsp/model/BspInstance.hpp" -#include "osp/bsp/model/BspSchedule.hpp" -#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" +# include + +# include +# include +# include +# include +# include +# include +# include +# include + +# include "osp/bsp/model/BspInstance.hpp" +# include "osp/bsp/model/BspSchedule.hpp" +# include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" namespace osp { -template +template class Sptrsv { using uVertType = typename SparseMatrixImp::vertex_idx; @@ -74,88 +75,88 @@ class Sptrsv { void setup_csr_no_permutation(const BspSchedule> &schedule) { vector_step_processor_vertices = std::vector>>( - schedule.numberOfSupersteps(), - std::vector>(schedule.getInstance().numberOfProcessors())); + schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); vector_step_processor_vertices_u = std::vector>>( - schedule.numberOfSupersteps(), - std::vector>(schedule.getInstance().numberOfProcessors())); + schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); bounds_array_l = std::vector>>( - schedule.numberOfSupersteps(), - std::vector>(schedule.getInstance().numberOfProcessors())); + schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); bounds_array_u = std::vector>>( - schedule.numberOfSupersteps(), - std::vector>(schedule.getInstance().numberOfProcessors())); + schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); num_supersteps = schedule.numberOfSupersteps(); size_t number_of_vertices = instance->getComputationalDag().num_vertices(); -#pragma omp parallel num_threads(2) +# pragma omp parallel num_threads(2) { int id = omp_get_thread_num(); switch (id) { - case 0: { - for (size_t node = 0; node < number_of_vertices; ++node) { - vector_step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].push_back(static_cast(node)); - } + case 0: { + for (size_t node = 0; node < number_of_vertices; ++node) { + vector_step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].push_back( + static_cast(node)); + } - for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) { - for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (!vector_step_processor_vertices[step][proc].empty()) { - eigen_idx_type start = vector_step_processor_vertices[step][proc][0]; - eigen_idx_type prev = vector_step_processor_vertices[step][proc][0]; - - for (size_t i = 1; i < vector_step_processor_vertices[step][proc].size(); ++i) { - if (vector_step_processor_vertices[step][proc][i] != prev + 1) { - bounds_array_l[step][proc].push_back(start); - bounds_array_l[step][proc].push_back(prev); - start = vector_step_processor_vertices[step][proc][i]; + for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) { + for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (!vector_step_processor_vertices[step][proc].empty()) { + eigen_idx_type start = vector_step_processor_vertices[step][proc][0]; + eigen_idx_type prev = vector_step_processor_vertices[step][proc][0]; + + for (size_t i = 1; i < vector_step_processor_vertices[step][proc].size(); ++i) { + if (vector_step_processor_vertices[step][proc][i] != prev + 1) { + bounds_array_l[step][proc].push_back(start); + bounds_array_l[step][proc].push_back(prev); + start = vector_step_processor_vertices[step][proc][i]; + } + prev = vector_step_processor_vertices[step][proc][i]; } - prev = vector_step_processor_vertices[step][proc][i]; - } - bounds_array_l[step][proc].push_back(start); - bounds_array_l[step][proc].push_back(prev); + bounds_array_l[step][proc].push_back(start); + bounds_array_l[step][proc].push_back(prev); + } } } - } - break; - } - case 1: { - size_t node = number_of_vertices; - do { - node--; - vector_step_processor_vertices_u[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].push_back(static_cast(node)); - } while (node > 0); - - for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) { - for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) { - if (!vector_step_processor_vertices_u[step][proc].empty()) { - eigen_idx_type start_u = static_cast(vector_step_processor_vertices_u[step][proc][0]); - eigen_idx_type prev_u = static_cast(vector_step_processor_vertices_u[step][proc][0]); - - for (size_t i = 1; i < vector_step_processor_vertices_u[step][proc].size(); ++i) { - if (static_cast(vector_step_processor_vertices_u[step][proc][i]) != prev_u - 1) { - bounds_array_u[step][proc].push_back(start_u); - bounds_array_u[step][proc].push_back(prev_u); - start_u = static_cast(vector_step_processor_vertices_u[step][proc][i]); + break; + } + case 1: { + size_t node = number_of_vertices; + do { + node--; + vector_step_processor_vertices_u[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] + .push_back(static_cast(node)); + } while (node > 0); + + for (unsigned int step = 0; step < schedule.numberOfSupersteps(); ++step) { + for (unsigned int proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (!vector_step_processor_vertices_u[step][proc].empty()) { + eigen_idx_type start_u + = static_cast(vector_step_processor_vertices_u[step][proc][0]); + eigen_idx_type prev_u + = static_cast(vector_step_processor_vertices_u[step][proc][0]); + + for (size_t i = 1; i < vector_step_processor_vertices_u[step][proc].size(); ++i) { + if (static_cast(vector_step_processor_vertices_u[step][proc][i]) != prev_u - 1) { + bounds_array_u[step][proc].push_back(start_u); + bounds_array_u[step][proc].push_back(prev_u); + start_u = static_cast(vector_step_processor_vertices_u[step][proc][i]); + } + prev_u = static_cast(vector_step_processor_vertices_u[step][proc][i]); } - prev_u = static_cast(vector_step_processor_vertices_u[step][proc][i]); - } - bounds_array_u[step][proc].push_back(start_u); - bounds_array_u[step][proc].push_back(prev_u); + bounds_array_u[step][proc].push_back(start_u); + bounds_array_u[step][proc].push_back(prev_u); + } } } - } - break; - } - default: { - std::cout << "Unexpected Behaviour" << std::endl; - } + break; + } + default: { + std::cout << "Unexpected Behaviour" << std::endl; + } } } } @@ -177,8 +178,8 @@ class Sptrsv { row_ptr.clear(); row_ptr.reserve(instance->numberOfVertices() + 1); - step_proc_ptr = - std::vector>(num_supersteps, std::vector(instance->numberOfProcessors(), 0)); + step_proc_ptr + = std::vector>(num_supersteps, std::vector(instance->numberOfProcessors(), 0)); step_proc_num = schedule.numAssignedNodesPerSuperstepProcessor(); @@ -188,12 +189,8 @@ class Sptrsv { step_proc_ptr[current_step][current_processor] = 0; for (const uVertType &node : perm_inv) { - if (schedule.assignedProcessor(node) != current_processor || schedule.assignedSuperstep(node) != current_step) { - - while (schedule.assignedProcessor(node) != current_processor || - schedule.assignedSuperstep(node) != current_step) { - + while (schedule.assignedProcessor(node) != current_processor || schedule.assignedSuperstep(node) != current_step) { if (current_processor < instance->numberOfProcessors() - 1) { current_processor++; } else { @@ -218,8 +215,9 @@ class Sptrsv { unsigned found = 0; const auto *outer = instance->getComputationalDag().getCSR()->outerIndexPtr(); - for (uVertType par_ind = static_cast(outer[node]); par_ind < static_cast(outer[node + 1] - 1); ++par_ind) { - + for (uVertType par_ind = static_cast(outer[node]); + par_ind < static_cast(outer[node + 1] - 1); + ++par_ind) { if (static_cast(instance->getComputationalDag().getCSR()->innerIndexPtr()[par_ind]) == perm_inv[par]) { val.push_back(instance->getComputationalDag().getCSR()->valuePtr()[par_ind]); found++; @@ -229,7 +227,9 @@ class Sptrsv { } col_idx.push_back(perm[node]); - val.push_back(instance->getComputationalDag().getCSR()->valuePtr()[instance->getComputationalDag().getCSR()->outerIndexPtr()[node + 1] - 1]); + val.push_back(instance->getComputationalDag() + .getCSR() + ->valuePtr()[instance->getComputationalDag().getCSR()->outerIndexPtr()[node + 1] - 1]); } row_ptr.push_back(col_idx.size()); @@ -239,10 +239,14 @@ class Sptrsv { eigen_idx_type number_of_vertices = static_cast(instance->numberOfVertices()); for (eigen_idx_type i = 0; i < number_of_vertices; ++i) { x[i] = b[i]; - for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i]; j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1; ++j) { - x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j] * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]]; + for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i]; + j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1; + ++j) { + x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j] + * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]]; } - x[i] /= (*(instance->getComputationalDag().getCSR())).valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1]; + x[i] /= (*(instance->getComputationalDag().getCSR())) + .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1]; } } @@ -253,15 +257,19 @@ class Sptrsv { do { i--; x[i] = b[i]; - for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1; j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1]; ++j) { - x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j] * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]]; + for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1; + j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1]; + ++j) { + x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j] + * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]]; } - x[i] /= (*(instance->getComputationalDag().getCSC())).valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]]; + x[i] /= (*(instance->getComputationalDag().getCSC())) + .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]]; } while (i != 0); } void lsolve_no_permutation_in_place() { -#pragma omp parallel num_threads(instance->numberOfProcessors()) +# pragma omp parallel num_threads(instance->numberOfProcessors()) { const size_t proc = static_cast(omp_get_thread_num()); for (unsigned step = 0; step < num_supersteps; ++step) { @@ -272,19 +280,23 @@ class Sptrsv { const eigen_idx_type upper_b = bounds_array_l[step][proc][index + 1]; for (eigen_idx_type node = lower_b; node <= upper_b; ++node) { - for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node]; i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1; ++i) { - x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i] * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]]; + for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node]; + i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1; + ++i) { + x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i] + * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]]; } - x[node] /= (*(instance->getComputationalDag().getCSR())).valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1]; + x[node] /= (*(instance->getComputationalDag().getCSR())) + .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1]; } } -#pragma omp barrier +# pragma omp barrier } } } void usolve_no_permutation_in_place() { -#pragma omp parallel num_threads(instance->numberOfProcessors()) +# pragma omp parallel num_threads(instance->numberOfProcessors()) { // Process each superstep starting from the last one (opposite of lsolve) const size_t proc = static_cast(omp_get_thread_num()); @@ -298,19 +310,23 @@ class Sptrsv { do { node--; - for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1; i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1]; ++i) { - x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i] * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]]; + for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1; + i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1]; + ++i) { + x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i] + * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]]; } - x[node] /= (*(instance->getComputationalDag().getCSC())).valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]]; + x[node] /= (*(instance->getComputationalDag().getCSC())) + .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]]; } while (node != lower_b); } -#pragma omp barrier +# pragma omp barrier } while (step != 0); } } void lsolve_no_permutation() { -#pragma omp parallel num_threads(instance->numberOfProcessors()) +# pragma omp parallel num_threads(instance->numberOfProcessors()) { const size_t proc = static_cast(omp_get_thread_num()); for (unsigned step = 0; step < num_supersteps; ++step) { @@ -322,19 +338,23 @@ class Sptrsv { for (eigen_idx_type node = lower_b; node <= upper_b; ++node) { x[node] = b[node]; - for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node]; i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1; ++i) { - x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i] * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]]; + for (eigen_idx_type i = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node]; + i < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1; + ++i) { + x[node] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[i] + * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[i]]; } - x[node] /= (*(instance->getComputationalDag().getCSR())).valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1]; + x[node] /= (*(instance->getComputationalDag().getCSR())) + .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[node + 1] - 1]; } } -#pragma omp barrier +# pragma omp barrier } } } void usolve_no_permutation() { -#pragma omp parallel num_threads(instance->numberOfProcessors()) +# pragma omp parallel num_threads(instance->numberOfProcessors()) { // Process each superstep starting from the last one (opposite of lsolve) const size_t proc = static_cast(omp_get_thread_num()); @@ -349,13 +369,17 @@ class Sptrsv { do { node--; x[node] = b[node]; - for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1; i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1]; ++i) { - x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i] * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]]; + for (eigen_idx_type i = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node] + 1; + i < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node + 1]; + ++i) { + x[node] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[i] + * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[i]]; } - x[node] /= (*(instance->getComputationalDag().getCSC())).valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]]; + x[node] /= (*(instance->getComputationalDag().getCSC())) + .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[node]]; } while (node != lower_b); } -#pragma omp barrier +# pragma omp barrier } while (step != 0); } } @@ -363,10 +387,14 @@ class Sptrsv { void lsolve_serial_in_place() { eigen_idx_type number_of_vertices = static_cast(instance->numberOfVertices()); for (eigen_idx_type i = 0; i < number_of_vertices; ++i) { - for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i]; j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1; ++j) { - x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j] * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]]; + for (eigen_idx_type j = (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i]; + j < (*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1; + ++j) { + x[i] -= (*(instance->getComputationalDag().getCSR())).valuePtr()[j] + * x[(*(instance->getComputationalDag().getCSR())).innerIndexPtr()[j]]; } - x[i] /= (*(instance->getComputationalDag().getCSR())).valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1]; + x[i] /= (*(instance->getComputationalDag().getCSR())) + .valuePtr()[(*(instance->getComputationalDag().getCSR())).outerIndexPtr()[i + 1] - 1]; } } @@ -375,22 +403,24 @@ class Sptrsv { eigen_idx_type i = number_of_vertices; do { i--; - for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1; j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1]; ++j) { - x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j] * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]]; + for (eigen_idx_type j = (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i] + 1; + j < (*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i + 1]; + ++j) { + x[i] -= (*(instance->getComputationalDag().getCSC())).valuePtr()[j] + * x[(*(instance->getComputationalDag().getCSC())).innerIndexPtr()[j]]; } - x[i] /= (*(instance->getComputationalDag().getCSC())).valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]]; + x[i] /= (*(instance->getComputationalDag().getCSC())) + .valuePtr()[(*(instance->getComputationalDag().getCSC())).outerIndexPtr()[i]]; } while (i != 0); } void lsolve_with_permutation_in_place() { -#pragma omp parallel num_threads(instance->numberOfProcessors()) +# pragma omp parallel num_threads(instance->numberOfProcessors()) { for (unsigned step = 0; step < num_supersteps; step++) { - const size_t proc = static_cast(omp_get_thread_num()); const uVertType upper_limit = step_proc_ptr[step][proc] + step_proc_num[step][proc]; for (uVertType _row_idx = step_proc_ptr[step][proc]; _row_idx < upper_limit; _row_idx++) { - for (uVertType i = row_ptr[_row_idx]; i < row_ptr[_row_idx + 1] - 1; i++) { x[_row_idx] -= val[i] * x[col_idx[i]]; } @@ -398,16 +428,15 @@ class Sptrsv { x[_row_idx] /= val[row_ptr[_row_idx + 1] - 1]; } -#pragma omp barrier +# pragma omp barrier } } } void lsolve_with_permutation() { -#pragma omp parallel num_threads(instance->numberOfProcessors()) +# pragma omp parallel num_threads(instance->numberOfProcessors()) { for (unsigned step = 0; step < num_supersteps; step++) { - const size_t proc = static_cast(omp_get_thread_num()); const uVertType upper_limit = step_proc_ptr[step][proc] + step_proc_num[step][proc]; for (uVertType _row_idx = step_proc_ptr[step][proc]; _row_idx < upper_limit; _row_idx++) { @@ -419,7 +448,7 @@ class Sptrsv { x[_row_idx] /= val[row_ptr[_row_idx + 1] - 1]; } -#pragma omp barrier +# pragma omp barrier } } } @@ -451,13 +480,11 @@ class Sptrsv { } } - std::size_t get_number_of_vertices() { - return instance->numberOfVertices(); - } + std::size_t get_number_of_vertices() { return instance->numberOfVertices(); } virtual ~Sptrsv() = default; }; -} // namespace osp +} // namespace osp -#endif \ No newline at end of file +#endif diff --git a/include/osp/bsp/model/BspArchitecture.hpp b/include/osp/bsp/model/BspArchitecture.hpp index 52240fa2..535f0d98 100644 --- a/include/osp/bsp/model/BspArchitecture.hpp +++ b/include/osp/bsp/model/BspArchitecture.hpp @@ -37,13 +37,15 @@ namespace osp { /** * @enum MEMORY_CONSTRAINT_TYPE * @brief Enumerates the different types of memory constraints. - * Memory bounds are set per processor and apply to aggregated memory weights of nodes according to the different types of memory constraints. + * Memory bounds are set per processor and apply to aggregated memory weights of nodes according to the different types of memory + * constraints. */ enum class MEMORY_CONSTRAINT_TYPE { - NONE, /** No memory constraints. */ - LOCAL, /** The memory bounds apply to the sum of memory weights of nodes assigned to the same processor and superstep. */ - GLOBAL, /** The memory bounds apply to the sum of memory weights of the nodes assigned to the same processor. */ - PERSISTENT_AND_TRANSIENT, /** Memory bounds apply to the sum of memory weights of nodes assigned to the same processor plus the maximum communication weight of a node assigned to a processor. */ + NONE, /** No memory constraints. */ + LOCAL, /** The memory bounds apply to the sum of memory weights of nodes assigned to the same processor and superstep. */ + GLOBAL, /** The memory bounds apply to the sum of memory weights of the nodes assigned to the same processor. */ + PERSISTENT_AND_TRANSIENT, /** Memory bounds apply to the sum of memory weights of nodes assigned to the same processor plus + the maximum communication weight of a node assigned to a processor. */ LOCAL_IN_OUT, /** Memory constraints are local in-out. Experimental. */ LOCAL_INC_EDGES, /** Memory constraints are local incident edges. Experimental. */ LOCAL_SOURCES_INC_EDGES /** Memory constraints are local source incident edges. Experimental. */ @@ -55,31 +57,29 @@ enum class MEMORY_CONSTRAINT_TYPE { */ inline const char *to_string(MEMORY_CONSTRAINT_TYPE type) { switch (type) { - case MEMORY_CONSTRAINT_TYPE::NONE: - return "NONE"; - case MEMORY_CONSTRAINT_TYPE::LOCAL: - return "LOCAL"; - case MEMORY_CONSTRAINT_TYPE::GLOBAL: - return "GLOBAL"; - case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT: - return "PERSISTENT_AND_TRANSIENT"; - case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT: - return "LOCAL_IN_OUT"; - case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES: - return "LOCAL_INC_EDGES"; - case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES: - return "LOCAL_SOURCES_INC_EDGES"; - default: - return "UNKNOWN"; + case MEMORY_CONSTRAINT_TYPE::NONE: + return "NONE"; + case MEMORY_CONSTRAINT_TYPE::LOCAL: + return "LOCAL"; + case MEMORY_CONSTRAINT_TYPE::GLOBAL: + return "GLOBAL"; + case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT: + return "PERSISTENT_AND_TRANSIENT"; + case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT: + return "LOCAL_IN_OUT"; + case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES: + return "LOCAL_INC_EDGES"; + case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES: + return "LOCAL_SOURCES_INC_EDGES"; + default: + return "UNKNOWN"; } } /** * @brief Stream operator overload using the helper function. */ -inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) { - return os << to_string(type); -} +inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) { return os << to_string(type); } /** * @class BspArchitecture @@ -110,7 +110,7 @@ inline std::ostream &operator<<(std::ostream &os, MEMORY_CONSTRAINT_TYPE type) { * Each processor has a memory bound. The `MEMORY_CONSTRAINT_TYPE` determines how these bounds are applied * (e.g., local per superstep, global per processor). */ -template +template class BspArchitecture { static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); @@ -133,7 +133,8 @@ class BspArchitecture { /** @brief Flag to indicate whether the architecture is NUMA , i.e., whether the send costs are different for different pairs of processors. */ bool isNuma_; - /** @brief The architecture allows to specify processor types. Processor types are used to express compatabilities, which can be specified in the BspInstance, regarding node types. */ + /** @brief The architecture allows to specify processor types. Processor types are used to express compatabilities, which can + * be specified in the BspInstance, regarding node types. */ std::vector processorTypes_; /** @brief A flattened p x p matrix of send costs. Access via index [i * numberOfProcessors_ + j]. */ @@ -148,16 +149,19 @@ class BspArchitecture { } bool AreSendCostsNuma() { - if (numberOfProcessors_ == 1U) + if (numberOfProcessors_ == 1U) { return false; + } const v_commw_t val = sendCosts_[1U]; for (unsigned p1 = 0U; p1 < numberOfProcessors_; p1++) { for (unsigned p2 = 0U; p2 < numberOfProcessors_; p2++) { - if (p1 == p2) + if (p1 == p2) { continue; - if (sendCosts_[FlatIndex(p1, p2)] != val) + } + if (sendCosts_[FlatIndex(p1, p2)] != val) { return true; + } } } return false; @@ -193,13 +197,20 @@ class BspArchitecture { * @param CommunicationCost The communication cost between processors. Default: 1. * @param SynchronisationCost The synchronization cost between processors. Default: 2. * @param MemoryBound The memory bound for each processor (default: 100). - * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero. Default: empty (uniform costs). - */ - BspArchitecture(const unsigned NumberOfProcessors = 2U, const v_commw_t CommunicationCost = 1U, const v_commw_t SynchronisationCost = 2U, - const v_memw_t MemoryBound = 100U, const std::vector>> &SendCosts = {}) - : numberOfProcessors_(NumberOfProcessors), numberOfProcessorTypes_(1U), communicationCosts_(CommunicationCost), + * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal + * entries are forced to zero. Default: empty (uniform costs). + */ + BspArchitecture(const unsigned NumberOfProcessors = 2U, + const v_commw_t CommunicationCost = 1U, + const v_commw_t SynchronisationCost = 2U, + const v_memw_t MemoryBound = 100U, + const std::vector>> &SendCosts = {}) + : numberOfProcessors_(NumberOfProcessors), + numberOfProcessorTypes_(1U), + communicationCosts_(CommunicationCost), synchronisationCosts_(SynchronisationCost), - memoryBound_(NumberOfProcessors, MemoryBound), isNuma_(false), + memoryBound_(NumberOfProcessors, MemoryBound), + isNuma_(false), processorTypes_(NumberOfProcessors, 0U) { if (NumberOfProcessors == 0U) { throw std::runtime_error("BspArchitecture: Number of processors must be greater than 0."); @@ -211,8 +222,9 @@ class BspArchitecture { if (NumberOfProcessors != SendCosts.size()) { throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); } - if (std::any_of(SendCosts.begin(), SendCosts.end(), - [NumberOfProcessors](const auto &thing) { return thing.size() != NumberOfProcessors; })) { + if (std::any_of(SendCosts.begin(), SendCosts.end(), [NumberOfProcessors](const auto &thing) { + return thing.size() != NumberOfProcessors; + })) { throw std::invalid_argument("sendCosts_ needs to be a processors x processors matrix.\n"); } @@ -238,11 +250,15 @@ class BspArchitecture { * @tparam Graph_t_other The graph type of the other BspArchitecture. * @param other The other BspArchitecture object. */ - template + template BspArchitecture(const BspArchitecture &other) - : numberOfProcessors_(other.numberOfProcessors()), numberOfProcessorTypes_(other.getNumberOfProcessorTypes()), - communicationCosts_(other.communicationCosts()), synchronisationCosts_(other.synchronisationCosts()), - memoryBound_(other.memoryBound()), isNuma_(other.isNumaArchitecture()), processorTypes_(other.processorTypes()), + : numberOfProcessors_(other.numberOfProcessors()), + numberOfProcessorTypes_(other.getNumberOfProcessorTypes()), + communicationCosts_(other.communicationCosts()), + synchronisationCosts_(other.synchronisationCosts()), + memoryBound_(other.memoryBound()), + isNuma_(other.isNumaArchitecture()), + processorTypes_(other.processorTypes()), sendCosts_(other.sendCostsVector()) { static_assert(std::is_same_v, v_memw_t>, "BspArchitecture: Graph_t and Graph_t_other have the same memory weight type."); @@ -260,9 +276,12 @@ class BspArchitecture { * @param NumberOfProcessors The number of processors. Must be greater than 0. * @param CommunicationCost The communication cost. * @param SynchronisationCost The synchronization cost. - * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal entries are forced to zero. + * @param SendCosts The matrix of send costs between processors. Needs to be a processors x processors matrix. Diagonal + * entries are forced to zero. */ - BspArchitecture(const unsigned NumberOfProcessors, const v_commw_t CommunicationCost, const v_commw_t SynchronisationCost, + BspArchitecture(const unsigned NumberOfProcessors, + const v_commw_t CommunicationCost, + const v_commw_t SynchronisationCost, const std::vector>> &SendCosts) : BspArchitecture(NumberOfProcessors, CommunicationCost, SynchronisationCost, 100U, SendCosts) {} @@ -296,8 +315,7 @@ class BspArchitecture { unsigned maxPos = 1; constexpr unsigned two = 2; - for (; intpow(two, maxPos + 1) <= numberOfProcessors_ - 1; ++maxPos) { - } + for (; intpow(two, maxPos + 1) <= numberOfProcessors_ - 1; ++maxPos) {} for (unsigned i = 0U; i < numberOfProcessors_; ++i) { for (unsigned j = i + 1U; j < numberOfProcessors_; ++j) { @@ -357,8 +375,9 @@ class BspArchitecture { * @throws std::invalid_argument if the processor indices are out of bounds. */ void SetSendCosts(const unsigned p1, const unsigned p2, const v_commw_t cost) { - if (p1 >= numberOfProcessors_ || p2 >= numberOfProcessors_) // Fixed condition: p2 >= number_processors + if (p1 >= numberOfProcessors_ || p2 >= numberOfProcessors_) { // Fixed condition: p2 >= number_processors throw std::invalid_argument("Invalid Argument: Processor index out of bounds."); + } if (p1 != p2) { sendCosts_.at(FlatIndex(p1, p2)) = cost; @@ -370,9 +389,7 @@ class BspArchitecture { * @brief Sets the memory bound for all processors. * @param MemoryBound The new memory bound for all processors. */ - void setMemoryBound(const v_memw_t MemoryBound) { - memoryBound_.assign(numberOfProcessors_, MemoryBound); - } + void setMemoryBound(const v_memw_t MemoryBound) { memoryBound_.assign(numberOfProcessors_, MemoryBound); } /** * @brief Sets the memory bound for all processors using a vector. @@ -434,8 +451,8 @@ class BspArchitecture { } /** - * @brief Sets the number of processors and their types. Number of processors is set to the size of the processor types vector. - * Resets send costs to uniform (1). Resets memory bound to 100 for all processors. + * @brief Sets the number of processors and their types. Number of processors is set to the size of the processor types + * vector. Resets send costs to uniform (1). Resets memory bound to 100 for all processors. * @param processorTypes The types of the respective processors. */ void setProcessorsWithTypes(const std::vector> &processorTypes) { @@ -509,7 +526,9 @@ class BspArchitecture { * @brief Returns the maximum memory bound over all processors. * @return The maximum memory bound. */ - [[nodiscard]] v_memw_t maxMemoryBound() const { return *(std::max_element(memoryBound_.begin(), memoryBound_.end())); } + [[nodiscard]] v_memw_t maxMemoryBound() const { + return *(std::max_element(memoryBound_.begin(), memoryBound_.end())); + } /** * @brief Returns the maximum memory bound over all processors of a specific type. @@ -546,11 +565,13 @@ class BspArchitecture { [[nodiscard]] v_commw_t synchronisationCosts() const { return synchronisationCosts_; } /** - * @brief Returns a the send costs matrix. Internally the matrix is stored as a flattened matrix. The allocates, computes and returns the matrix on the fly. + * @brief Returns a the send costs matrix. Internally the matrix is stored as a flattened matrix. The allocates, computes and + * returns the matrix on the fly. * @return The send costs matrix. */ [[nodiscard]] std::vector>> sendCost() const { - std::vector>> matrix(numberOfProcessors_, std::vector>(numberOfProcessors_)); + std::vector>> matrix(numberOfProcessors_, + std::vector>(numberOfProcessors_)); for (unsigned i = 0; i < numberOfProcessors_; ++i) { for (unsigned j = 0; j < numberOfProcessors_; ++j) { matrix[i][j] = sendCosts_[FlatIndex(i, j)]; @@ -591,7 +612,9 @@ class BspArchitecture { * @param p2 The index of the second processor. * @return The send costs between the two processors. */ - [[nodiscard]] v_commw_t sendCosts(const unsigned p1, const unsigned p2) const { return sendCosts_[FlatIndex(p1, p2)]; } + [[nodiscard]] v_commw_t sendCosts(const unsigned p1, const unsigned p2) const { + return sendCosts_[FlatIndex(p1, p2)]; + } /** * @brief Returns the type of a specific processor. Does not perform bounds checking. @@ -628,9 +651,8 @@ class BspArchitecture { */ void print(std::ostream &os) const { os << "Architecture info: number of processors: " << numberOfProcessors_ - << ", Number of processor types: " << numberOfProcessorTypes_ - << ", Communication costs: " << communicationCosts_ << ", Synchronization costs: " << synchronisationCosts_ - << "\n"; + << ", Number of processor types: " << numberOfProcessorTypes_ << ", Communication costs: " << communicationCosts_ + << ", Synchronization costs: " << synchronisationCosts_ << "\n"; os << std::setw(17) << " Processor: "; for (unsigned i = 0U; i < numberOfProcessors_; i++) { os << std::right << std::setw(5) << i << " "; @@ -651,9 +673,10 @@ class BspArchitecture { [[nodiscard]] unsigned getNumberOfProcessorTypes() const { return numberOfProcessorTypes_; }; [[nodiscard]] MEMORY_CONSTRAINT_TYPE getMemoryConstraintType() const { return memoryConstraintType_; } + void setMemoryConstraintType(const MEMORY_CONSTRAINT_TYPE memoryConstraintType) { memoryConstraintType_ = memoryConstraintType; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/BspInstance.hpp b/include/osp/bsp/model/BspInstance.hpp index 72f16e1e..34c17b98 100644 --- a/include/osp/bsp/model/BspInstance.hpp +++ b/include/osp/bsp/model/BspInstance.hpp @@ -57,7 +57,7 @@ namespace osp { * * @tparam Graph_t The type of the computational DAG, which must satisfy the `is_computational_dag` concept. */ -template +template class BspInstance { static_assert(is_computational_dag_v, "BspInstance can only be used with computational DAGs."); @@ -91,7 +91,8 @@ class BspInstance { * @brief The type of the vectex types in the computational DAG. * If the DAG does not support vertex types, this is `unsigned`. */ - using vertex_type_t_or_default = std::conditional_t, v_type_t, unsigned>; + using vertex_type_t_or_default + = std::conditional_t, v_type_t, unsigned>; using processor_type_t = unsigned; public: @@ -107,7 +108,8 @@ class BspInstance { * @param cdag The computational DAG for the instance. * @param architecture The BSP architecture for the instance. */ - BspInstance(const Graph_t &cdag_, const BspArchitecture &architecture_, + BspInstance(const Graph_t &cdag_, + const BspArchitecture &architecture_, std::vector> nodeProcessorCompatibility_ = std::vector>({{true}})) : cdag(cdag_), architecture(architecture_), nodeProcessorCompatibility(nodeProcessorCompatibility_) {} @@ -118,15 +120,16 @@ class BspInstance { * @param cdag The computational DAG for the instance. * @param architecture The BSP architecture for the instance. */ - BspInstance(Graph_t &&cdag_, BspArchitecture &&architecture_, + BspInstance(Graph_t &&cdag_, + BspArchitecture &&architecture_, std::vector> nodeProcessorCompatibility_ = std::vector>({{true}})) - : cdag(std::move(cdag_)), architecture(std::move(architecture_)), nodeProcessorCompatibility(nodeProcessorCompatibility_) { - } + : cdag(std::move(cdag_)), + architecture(std::move(architecture_)), + nodeProcessorCompatibility(nodeProcessorCompatibility_) {} - template + template explicit BspInstance(const BspInstance &other) - : architecture(other.getArchitecture()), - nodeProcessorCompatibility(other.getNodeProcessorCompatibilityMatrix()) { + : architecture(other.getArchitecture()), nodeProcessorCompatibility(other.getNodeProcessorCompatibilityMatrix()) { constructComputationalDag(other.getComputationalDag(), cdag); } @@ -142,6 +145,7 @@ class BspInstance { * The move operator may be used to transfer ownership of the architecture. */ [[nodiscard]] const BspArchitecture &getArchitecture() const { return architecture; } + [[nodiscard]] BspArchitecture &getArchitecture() { return architecture; } /** @@ -150,6 +154,7 @@ class BspInstance { * The move operator may be used to transfer ownership of the DAG. */ [[nodiscard]] const Graph_t &getComputationalDag() const { return cdag; } + [[nodiscard]] Graph_t &getComputationalDag() { return cdag; } /** @@ -202,9 +207,7 @@ class BspInstance { /** * @brief Returns the flattened send costs vector. */ - [[nodiscard]] const std::vector> &sendCostsVector() const { - return architecture.sendCostsVector(); - } + [[nodiscard]] const std::vector> &sendCostsVector() const { return architecture.sendCostsVector(); } /** * @brief Returns the communication costs of the BSP architecture. @@ -288,7 +291,9 @@ class BspInstance { /** * @brief Returns the node type - processor type compatibility matrix. */ - [[nodiscard]] const std::vector> &getProcessorCompatibilityMatrix() const { return nodeProcessorCompatibility; } + [[nodiscard]] const std::vector> &getProcessorCompatibilityMatrix() const { + return nodeProcessorCompatibility; + } /** * @brief Sets the compatibility matrix to be diagonal. This implies that node type `i` is only compatible with processor type `i`. @@ -296,15 +301,17 @@ class BspInstance { */ void setDiagonalCompatibilityMatrix(const vertex_type_t_or_default number_of_types) { nodeProcessorCompatibility.assign(number_of_types, std::vector(number_of_types, false)); - for (vertex_type_t_or_default i = 0; i < number_of_types; ++i) + for (vertex_type_t_or_default i = 0; i < number_of_types; ++i) { nodeProcessorCompatibility[i][i] = true; + } } /** * @brief Sets the compatibility matrix to all ones. This implies that all node types are compatible with all processor types. */ void setAllOnesCompatibilityMatrix() { - nodeProcessorCompatibility.assign(cdag.num_vertex_types(), std::vector(architecture.getNumberOfProcessorTypes(), true)); + nodeProcessorCompatibility.assign(cdag.num_vertex_types(), + std::vector(architecture.getNumberOfProcessorTypes(), true)); } /** @@ -314,8 +321,8 @@ class BspInstance { [[nodiscard]] bool CheckMemoryConstraintsFeasibility() const { std::vector> max_memory_per_proc_type(architecture.getNumberOfProcessorTypes(), 0); for (unsigned proc = 0U; proc < architecture.numberOfProcessors(); proc++) { - max_memory_per_proc_type[architecture.processorType(proc)] = - std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); + max_memory_per_proc_type[architecture.processorType(proc)] + = std::max(max_memory_per_proc_type[architecture.processorType(proc)], architecture.memoryBound(proc)); } for (vertex_type_t_or_default vertType = 0U; vertType < cdag.num_vertex_types(); vertType++) { @@ -325,13 +332,15 @@ class BspInstance { for (processor_type_t proc_type = 0U; proc_type < architecture.getNumberOfProcessorTypes(); proc_type++) { if (isCompatibleType(vertType, proc_type)) { fits = fits | (max_memory_of_type <= max_memory_per_proc_type[proc_type]); - if (fits) + if (fits) { break; + } } } - if (!fits) + if (!fits) { return false; + } } return true; @@ -346,13 +355,16 @@ class BspInstance { processor_type_t numberOfProcTypes = architecture.getNumberOfProcessorTypes(); std::vector> compatibleProcTypes(numberOfNodeTypes); - for (vertex_type_t_or_default nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType) - for (processor_type_t processorType = 0U; processorType < numberOfProcTypes; ++processorType) - if (isCompatibleType(nodeType, processorType)) + for (vertex_type_t_or_default nodeType = 0U; nodeType < numberOfNodeTypes; ++nodeType) { + for (processor_type_t processorType = 0U; processorType < numberOfProcTypes; ++processorType) { + if (isCompatibleType(nodeType, processorType)) { compatibleProcTypes[nodeType].push_back(processorType); + } + } + } return compatibleProcTypes; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/BspSchedule.hpp b/include/osp/bsp/model/BspSchedule.hpp index 9e5a5d52..23fe804e 100644 --- a/include/osp/bsp/model/BspSchedule.hpp +++ b/include/osp/bsp/model/BspSchedule.hpp @@ -45,18 +45,19 @@ namespace osp { * This class is templated on `Graph_t`, which must satisfy the `computational_dag_concept`. * Moreover, the work and communication weights of the nodes must be of the same type in order to properly compute the cost. * - * It interacts closely with `BspInstance` to access problem-specific data and constraints. In fact, a `BspSchedule` object is tied to a `BspInstance` object. + * It interacts closely with `BspInstance` to access problem-specific data and constraints. In fact, a `BspSchedule` object is + * tied to a `BspInstance` object. * * @tparam Graph_t The type of the computational DAG, which must satisfy `is_computational_dag_v`. * @see BspInstance * @see IBspSchedule * @see IBspScheduleEval */ -template +template class BspSchedule : public IBspSchedule, public IBspScheduleEval { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, "BspSchedule requires work and comm. weights to have the same type."); + static_assert(std::is_same_v, v_commw_t>, + "BspSchedule requires work and comm. weights to have the same type."); protected: using vertex_idx = vertex_idx_t; @@ -77,7 +78,8 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &inst) - : instance(&inst), number_of_supersteps(1), + : instance(&inst), + number_of_supersteps(1), node_to_processor_assignment(std::vector(inst.numberOfVertices(), 0)), node_to_superstep_assignment(std::vector(inst.numberOfVertices(), 0)) {} @@ -89,9 +91,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &inst, const std::vector &processor_assignment_, + BspSchedule(const BspInstance &inst, + const std::vector &processor_assignment_, const std::vector &superstep_assignment_) - : instance(&inst), node_to_processor_assignment(processor_assignment_), + : instance(&inst), + node_to_processor_assignment(processor_assignment_), node_to_superstep_assignment(superstep_assignment_) { updateNumberOfSupersteps(); } @@ -102,7 +106,8 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &schedule) - : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()), + : instance(&schedule.getInstance()), + number_of_supersteps(schedule.numberOfSupersteps()), node_to_processor_assignment(schedule.getInstance().numberOfVertices()), node_to_superstep_assignment(schedule.getInstance().numberOfVertices()) { for (const auto &v : schedule.getInstance().getComputationalDag().vertices()) { @@ -117,7 +122,8 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &schedule) - : instance(schedule.instance), number_of_supersteps(schedule.number_of_supersteps), + : instance(schedule.instance), + number_of_supersteps(schedule.number_of_supersteps), node_to_processor_assignment(schedule.node_to_processor_assignment), node_to_superstep_assignment(schedule.node_to_superstep_assignment) {} @@ -143,7 +149,8 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &&schedule) noexcept - : instance(schedule.instance), number_of_supersteps(schedule.number_of_supersteps), + : instance(schedule.instance), + number_of_supersteps(schedule.number_of_supersteps), node_to_processor_assignment(std::move(schedule.node_to_processor_assignment)), node_to_superstep_assignment(std::move(schedule.node_to_superstep_assignment)) {} @@ -170,9 +177,10 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval + template BspSchedule(const BspInstance &instance_, const BspSchedule &schedule) - : instance(&instance_), number_of_supersteps(schedule.numberOfSupersteps()), + : instance(&instance_), + number_of_supersteps(schedule.numberOfSupersteps()), node_to_processor_assignment(schedule.assignedProcessors()), node_to_superstep_assignment(schedule.assignedSupersteps()) {} @@ -229,6 +237,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &assignedSupersteps() const { return node_to_superstep_assignment; } + [[nodiscard]] std::vector &assignedSupersteps() { return node_to_superstep_assignment; } /** @@ -237,12 +246,13 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval &assignedProcessors() const { return node_to_processor_assignment; } + [[nodiscard]] std::vector &assignedProcessors() { return node_to_processor_assignment; } /** * @brief Returns the staleness of the schedule. - * The staleness determines the minimum number of supersteps that must elapse between the assignment of a node to a processor and the assignment of one of its neighbors to a different processor. - * The staleness for the BspSchedule is always 1. + * The staleness determines the minimum number of supersteps that must elapse between the assignment of a node to a processor + * and the assignment of one of its neighbors to a different processor. The staleness for the BspSchedule is always 1. * * @return The staleness of the schedule. */ @@ -304,8 +314,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEval(instance->numberOfVertices())) { node_to_superstep_assignment = std::move(vec); } else { - throw std::invalid_argument( - "Invalid Argument while assigning supersteps: size does not match number of nodes."); + throw std::invalid_argument("Invalid Argument while assigning supersteps: size does not match number of nodes."); } updateNumberOfSupersteps(); @@ -334,8 +342,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval(instance->numberOfVertices())) { node_to_processor_assignment = vec; } else { - throw std::invalid_argument( - "Invalid Argument while assigning processors: size does not match number of nodes."); + throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } @@ -348,8 +355,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval(instance->numberOfVertices())) { node_to_processor_assignment = std::move(vec); } else { - throw std::invalid_argument( - "Invalid Argument while assigning processors: size does not match number of nodes."); + throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } @@ -377,7 +383,9 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEval>(node_to_processor_assignment.size()) != instance->numberOfVertices() || - static_cast>(node_to_superstep_assignment.size()) != instance->numberOfVertices()) { + if (static_cast>(node_to_processor_assignment.size()) != instance->numberOfVertices() + || static_cast>(node_to_superstep_assignment.size()) != instance->numberOfVertices()) { return false; } @@ -403,7 +411,8 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalgetComputationalDag().children(v)) { - const unsigned different_processors = (node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : getStaleness(); + const unsigned different_processors + = (node_to_processor_assignment[v] == node_to_processor_assignment[target]) ? 0u : getStaleness(); if (node_to_superstep_assignment[v] + different_processors > node_to_superstep_assignment[target]) { return false; } @@ -442,32 +451,30 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalgetArchitecture().getMemoryConstraintType()) { + case MEMORY_CONSTRAINT_TYPE::LOCAL: + return satisfiesLocalMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::LOCAL: - return satisfiesLocalMemoryConstraints(); - - case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT: - return satisfiesPersistentAndTransientMemoryConstraints(); + case MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT: + return satisfiesPersistentAndTransientMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::GLOBAL: - return satisfiesGlobalMemoryConstraints(); + case MEMORY_CONSTRAINT_TYPE::GLOBAL: + return satisfiesGlobalMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT: - return satisfiesLocalInOutMemoryConstraints(); + case MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT: + return satisfiesLocalInOutMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES: - return satisfiesLocalIncEdgesMemoryConstraints(); + case MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES: + return satisfiesLocalIncEdgesMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES: - return satisfiesLocalSourcesIncEdgesMemoryConstraints(); + case MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES: + return satisfiesLocalSourcesIncEdgesMemoryConstraints(); - case MEMORY_CONSTRAINT_TYPE::NONE: - return true; + case MEMORY_CONSTRAINT_TYPE::NONE: + return true; - default: - throw std::invalid_argument("Unknown memory constraint type."); + default: + throw std::invalid_argument("Unknown memory constraint type."); } } @@ -496,7 +503,8 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval> getAssignedNodeVector(const unsigned processor, const unsigned superstep) const { + [[nodiscard]] std::vector> getAssignedNodeVector(const unsigned processor, + const unsigned superstep) const { std::vector> vec; for (const auto &node : instance->vertices()) { @@ -513,9 +521,7 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval, public IBspScheduleEvalvertices()) { for (const auto &child : instance->getComputationalDag().children(node)) { if (node_to_processor_assignment[node] != node_to_processor_assignment[child]) { - for (unsigned offset = 1; offset <= getStaleness(); ++offset) + for (unsigned offset = 1; offset <= getStaleness(); ++offset) { comm_phase_empty[node_to_superstep_assignment[child] - offset] = false; + } } } } @@ -583,8 +590,9 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalvertices()) { node_to_superstep_assignment[node] = new_step_index[node_to_superstep_assignment[node]]; @@ -606,7 +614,6 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfProcessors(); proc++) { - v_memw_t memory = 0; for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { memory += instance->getComputationalDag().vertex_mem_weight(node); @@ -636,11 +643,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalvertices()) { const unsigned proc = node_to_processor_assignment[node]; current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(node); - current_proc_transient_memory[proc] = std::max( - current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(node)); + current_proc_transient_memory[proc] + = std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(node)); - if (current_proc_persistent_memory[proc] + current_proc_transient_memory[proc] > - instance->getArchitecture().memoryBound(proc)) { + if (current_proc_persistent_memory[proc] + current_proc_transient_memory[proc] + > instance->getArchitecture().memoryBound(proc)) { return false; } } @@ -676,13 +683,11 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEvalnumberOfProcessors(); proc++) { v_memw_t memory = 0; for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { - memory += instance->getComputationalDag().vertex_mem_weight(node) + - instance->getComputationalDag().vertex_comm_weight(node); + memory += instance->getComputationalDag().vertex_mem_weight(node) + + instance->getComputationalDag().vertex_comm_weight(node); for (const auto &parent : instance->getComputationalDag().parents(node)) { - - if (node_to_processor_assignment[parent] == proc && - node_to_superstep_assignment[parent] == step) { + if (node_to_processor_assignment[parent] == proc && node_to_superstep_assignment[parent] == step) { memory -= instance->getComputationalDag().vertex_comm_weight(parent); } } @@ -760,4 +765,4 @@ class BspSchedule : public IBspSchedule, public IBspScheduleEval +template class BspScheduleCS : public BspSchedule { - static_assert(is_computational_dag_v, "BspScheduleCS can only be used with computational DAGs."); public: @@ -63,14 +62,15 @@ class BspScheduleCS : public BspSchedule { std::map commSchedule; protected: - void compute_cs_communication_costs_helper(std::vector>> &rec, std::vector>> &send) const { + void compute_cs_communication_costs_helper(std::vector>> &rec, + std::vector>> &send) const { for (auto const &[key, val] : commSchedule) { - send[std::get<1>(key)][val] += - BspSchedule::instance->sendCosts(std::get<1>(key), std::get<2>(key)) * - BspSchedule::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); - rec[std::get<2>(key)][val] += - BspSchedule::instance->sendCosts(std::get<1>(key), std::get<2>(key)) * - BspSchedule::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); + send[std::get<1>(key)][val] + += BspSchedule::instance->sendCosts(std::get<1>(key), std::get<2>(key)) + * BspSchedule::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); + rec[std::get<2>(key)][val] + += BspSchedule::instance->sendCosts(std::get<1>(key), std::get<2>(key)) + * BspSchedule::instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); } } @@ -92,7 +92,8 @@ class BspScheduleCS : public BspSchedule { * @param processor_assignment_ The processor assignment for the nodes. * @param superstep_assignment_ The superstep assignment for the nodes. */ - BspScheduleCS(const BspInstance &inst, const std::vector &processor_assignment_, + BspScheduleCS(const BspInstance &inst, + const std::vector &processor_assignment_, const std::vector &superstep_assignment_) : BspSchedule(inst, processor_assignment_, superstep_assignment_) {} @@ -105,7 +106,8 @@ class BspScheduleCS : public BspSchedule { * @param superstep_assignment_ The superstep assignment for the nodes. * @param comm_ The communication schedule for the nodes. */ - BspScheduleCS(const BspInstance &inst, const std::vector &processor_assignment_, + BspScheduleCS(const BspInstance &inst, + const std::vector &processor_assignment_, const std::vector &superstep_assignment_, const std::map &comm_) : BspSchedule(inst, processor_assignment_, superstep_assignment_), commSchedule(comm_) {} @@ -136,27 +138,28 @@ class BspScheduleCS : public BspSchedule { virtual ~BspScheduleCS() = default; inline const std::map &getCommunicationSchedule() const { return commSchedule; } + inline std::map &getCommunicationSchedule() { return commSchedule; } inline bool hasValidCommSchedule() const { return checkCommScheduleValidity(commSchedule); } void addCommunicationScheduleEntry(KeyTriple key, unsigned step) { + if (step >= BspSchedule::number_of_supersteps) { + throw std::invalid_argument("Invalid Argument while adding communication schedule entry: step out of range."); + } - if (step >= BspSchedule::number_of_supersteps) - throw std::invalid_argument( - "Invalid Argument while adding communication schedule entry: step out of range."); - - if (std::get<0>(key) >= BspSchedule::instance->numberOfVertices()) - throw std::invalid_argument( - "Invalid Argument while adding communication schedule entry: node out of range."); + if (std::get<0>(key) >= BspSchedule::instance->numberOfVertices()) { + throw std::invalid_argument("Invalid Argument while adding communication schedule entry: node out of range."); + } - if (std::get<1>(key) >= BspSchedule::instance->numberOfProcessors()) + if (std::get<1>(key) >= BspSchedule::instance->numberOfProcessors()) { throw std::invalid_argument( "Invalid Argument while adding communication schedule entry: from processor out of range."); + } - if (std::get<2>(key) >= BspSchedule::instance->numberOfProcessors()) - throw std::invalid_argument( - "Invalid Argument while adding communication schedule entry: to processor out of range."); + if (std::get<2>(key) >= BspSchedule::instance->numberOfProcessors()) { + throw std::invalid_argument("Invalid Argument while adding communication schedule entry: to processor out of range."); + } commSchedule[key] = step; } @@ -179,7 +182,6 @@ class BspScheduleCS : public BspSchedule { * @param cs The communication schedule to set. */ void setCommunicationSchedule(const std::map &cs) { - if (checkCommScheduleValidity(cs)) { commSchedule = cs; } else { @@ -188,37 +190,38 @@ class BspScheduleCS : public BspSchedule { } bool checkCommScheduleValidity(const std::map &cs) const { - - std::vector> first_at = std::vector>( - BspSchedule::instance->numberOfVertices(), - std::vector(BspSchedule::instance->numberOfProcessors(), - BspSchedule::number_of_supersteps)); + std::vector> first_at + = std::vector>(BspSchedule::instance->numberOfVertices(), + std::vector(BspSchedule::instance->numberOfProcessors(), + BspSchedule::number_of_supersteps)); for (const auto &node : BspSchedule::instance->vertices()) { - first_at[node][BspSchedule::node_to_processor_assignment[node]] = - BspSchedule::node_to_superstep_assignment[node]; + first_at[node][BspSchedule::node_to_processor_assignment[node]] + = BspSchedule::node_to_superstep_assignment[node]; } for (auto const &[key, val] : cs) { - - if (val >= BspSchedule::number_of_supersteps) + if (val >= BspSchedule::number_of_supersteps) { return false; + } - if (std::get<0>(key) >= BspSchedule::instance->numberOfVertices()) + if (std::get<0>(key) >= BspSchedule::instance->numberOfVertices()) { return false; + } - if (std::get<1>(key) >= BspSchedule::instance->numberOfProcessors()) + if (std::get<1>(key) >= BspSchedule::instance->numberOfProcessors()) { return false; + } - if (std::get<2>(key) >= BspSchedule::instance->numberOfProcessors()) + if (std::get<2>(key) >= BspSchedule::instance->numberOfProcessors()) { return false; + } - first_at[std::get<0>(key)][std::get<2>(key)] = - std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + this->getStaleness()); + first_at[std::get<0>(key)][std::get<2>(key)] + = std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + this->getStaleness()); } for (auto const &[key, val] : cs) { - if (val < first_at[std::get<0>(key)][std::get<1>(key)]) { return false; } @@ -226,11 +229,10 @@ class BspScheduleCS : public BspSchedule { for (const auto &v : BspSchedule::instance->getComputationalDag().vertices()) { for (const auto &target : BspSchedule::instance->getComputationalDag().children(v)) { - - if (BspSchedule::node_to_processor_assignment[v] != - BspSchedule::node_to_processor_assignment[target]) { - if (first_at[v][BspSchedule::node_to_processor_assignment[target]] > - BspSchedule::node_to_superstep_assignment[target]) { + if (BspSchedule::node_to_processor_assignment[v] + != BspSchedule::node_to_processor_assignment[target]) { + if (first_at[v][BspSchedule::node_to_processor_assignment[target]] + > BspSchedule::node_to_superstep_assignment[target]) { return false; } } @@ -241,9 +243,10 @@ class BspScheduleCS : public BspSchedule { } v_commw_t compute_cs_communication_costs() const { - - std::vector>> rec(this->instance->numberOfProcessors(), std::vector>(this->number_of_supersteps, 0)); - std::vector>> send(this->instance->numberOfProcessors(), std::vector>(this->number_of_supersteps, 0)); + std::vector>> rec(this->instance->numberOfProcessors(), + std::vector>(this->number_of_supersteps, 0)); + std::vector>> send(this->instance->numberOfProcessors(), + std::vector>(this->number_of_supersteps, 0)); compute_cs_communication_costs_helper(rec, send); const std::vector> max_comm_per_step = cost_helpers::compute_max_comm_per_step(*this, rec, send); @@ -266,8 +269,8 @@ class BspScheduleCS : public BspSchedule { void setAutoCommunicationSchedule() { std::map best_comm_schedule; - v_workw_t best_comm_cost = - std::numeric_limits>::max(); // computeCosts retunrs v_workw_t + v_workw_t best_comm_cost + = std::numeric_limits>::max(); // computeCosts retunrs v_workw_t if (hasValidCommSchedule()) { v_workw_t costs_com = BspSchedule::computeCosts(); @@ -306,9 +309,10 @@ class BspScheduleCS : public BspSchedule { void setImprovedLazyCommunicationSchedule() { commSchedule.clear(); - if (BspSchedule::instance->getComputationalDag().num_vertices() <= 1 || - BspSchedule::number_of_supersteps <= 1) + if (BspSchedule::instance->getComputationalDag().num_vertices() <= 1 + || BspSchedule::number_of_supersteps <= 1) { return; + } std::vector>>> step_proc_node_list( BspSchedule::number_of_supersteps, @@ -327,17 +331,18 @@ class BspScheduleCS : public BspSchedule { // The data structure stores for each processor a set of tuples representing required sends. // Each tuple is (communication_cost, source_node, destination_processor). - std::vector, vertex_idx_t, unsigned>, std::greater<>>> require_sending(BspSchedule::instance->numberOfProcessors()); + std::vector, vertex_idx_t, unsigned>, std::greater<>>> require_sending( + BspSchedule::instance->numberOfProcessors()); for (unsigned proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { for (const auto &node : step_proc_node_list[0][proc]) { - for (const auto &target : BspSchedule::instance->getComputationalDag().children(node)) { if (proc != BspSchedule::assignedProcessor(target)) { - require_sending[proc].insert( - {BspSchedule::instance->getComputationalDag().vertex_comm_weight(node) * BspSchedule::instance->getArchitecture().sendCosts(proc, BspSchedule::node_to_processor_assignment[target]), - node, - BspSchedule::node_to_processor_assignment[target]}); + require_sending[proc].insert({BspSchedule::instance->getComputationalDag().vertex_comm_weight(node) + * BspSchedule::instance->getArchitecture().sendCosts( + proc, BspSchedule::node_to_processor_assignment[target]), + node, + BspSchedule::node_to_processor_assignment[target]}); } } } @@ -351,18 +356,16 @@ class BspScheduleCS : public BspSchedule { for (unsigned proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { for (const auto &node : step_proc_node_list[step][proc]) { for (const auto &source : BspSchedule::instance->getComputationalDag().parents(node)) { - if (!node_to_proc_been_sent[source][proc]) { assert(BspSchedule::node_to_superstep_assignment[source] < step + 1 - this->getStaleness()); commSchedule.emplace( - std::make_tuple(source, BspSchedule::node_to_processor_assignment[source], - proc), + std::make_tuple(source, BspSchedule::node_to_processor_assignment[source], proc), step - this->getStaleness()); node_to_proc_been_sent[source][proc] = true; - v_commw_t comm_cost = - BspSchedule::instance->getComputationalDag().vertex_comm_weight(source) * - BspSchedule::instance->getArchitecture().sendCosts( - BspSchedule::node_to_processor_assignment[source], proc); + v_commw_t comm_cost + = BspSchedule::instance->getComputationalDag().vertex_comm_weight(source) + * BspSchedule::instance->getArchitecture().sendCosts( + BspSchedule::node_to_processor_assignment[source], proc); require_sending[BspSchedule::node_to_processor_assignment[source]].erase( {comm_cost, source, proc}); send_cost[BspSchedule::node_to_processor_assignment[source]] += comm_cost; @@ -382,15 +385,14 @@ class BspScheduleCS : public BspSchedule { // extra sends // TODO: permute the order of processors for (size_t proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { - if (require_sending[proc].empty() || - std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] > - max_comm_cost) + if (require_sending[proc].empty() + || std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] > max_comm_cost) { continue; + } auto iter = require_sending[proc].begin(); while (iter != require_sending[proc].end()) { const auto &[comm_cost, node_to_send, dest_proc] = *iter; - if (comm_cost + send_cost[proc] > max_comm_cost || - comm_cost + receive_cost[dest_proc] > max_comm_cost) { + if (comm_cost + send_cost[proc] > max_comm_cost || comm_cost + receive_cost[dest_proc] > max_comm_cost) { iter++; } else { commSchedule.emplace(std::make_tuple(node_to_send, proc, dest_proc), step - this->getStaleness()); @@ -398,10 +400,10 @@ class BspScheduleCS : public BspSchedule { send_cost[proc] += comm_cost; receive_cost[dest_proc] += comm_cost; iter = require_sending[proc].erase(iter); - if (require_sending[proc].empty() || - std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] > - max_comm_cost) - break; // Exit if no more sends can possibly fit. + if (require_sending[proc].empty() + || std::get<0>(*require_sending[proc].rbegin()) + send_cost[proc] > max_comm_cost) { + break; // Exit if no more sends can possibly fit. + } } } } @@ -409,15 +411,16 @@ class BspScheduleCS : public BspSchedule { // updating require_sending for (unsigned proc = 0; proc < BspSchedule::instance->numberOfProcessors(); proc++) { for (const auto &node : step_proc_node_list[step][proc]) { - - for (const auto &target : BspSchedule::instance->getComputationalDag().children(node)) + for (const auto &target : BspSchedule::instance->getComputationalDag().children(node)) { if (proc != BspSchedule::assignedProcessor(target)) { require_sending[proc].insert( - {BspSchedule::instance->getComputationalDag().vertex_comm_weight(node) * - BspSchedule::instance->getArchitecture().sendCosts( + {BspSchedule::instance->getComputationalDag().vertex_comm_weight(node) + * BspSchedule::instance->getArchitecture().sendCosts( proc, BspSchedule::node_to_processor_assignment[target]), - node, BspSchedule::node_to_processor_assignment[target]}); + node, + BspSchedule::node_to_processor_assignment[target]}); } + } } } } @@ -428,53 +431,59 @@ class BspScheduleCS : public BspSchedule { for (const auto &source : BspSchedule::instance->getComputationalDag().vertices()) { for (const auto &target : BspSchedule::instance->getComputationalDag().children(source)) { - - if (BspSchedule::node_to_processor_assignment[source] != - BspSchedule::node_to_processor_assignment[target]) { - - const auto tmp = std::make_tuple(source, BspSchedule::node_to_processor_assignment[source], + if (BspSchedule::node_to_processor_assignment[source] + != BspSchedule::node_to_processor_assignment[target]) { + const auto tmp = std::make_tuple(source, + BspSchedule::node_to_processor_assignment[source], BspSchedule::node_to_processor_assignment[target]); if (commSchedule.find(tmp) == commSchedule.end()) { commSchedule[tmp] = BspSchedule::node_to_superstep_assignment[target] - this->getStaleness(); } else { - commSchedule[tmp] = - std::min(BspSchedule::node_to_superstep_assignment[target] - this->getStaleness(), commSchedule[tmp]); + commSchedule[tmp] = std::min( + BspSchedule::node_to_superstep_assignment[target] - this->getStaleness(), commSchedule[tmp]); } } } } } + void setEagerCommunicationSchedule() { commSchedule.clear(); for (const auto &source : BspSchedule::instance->getComputationalDag().vertices()) { for (const auto &target : BspSchedule::instance->getComputationalDag().children(source)) { - - if (BspSchedule::node_to_processor_assignment[source] != - BspSchedule::node_to_processor_assignment[target]) { - - commSchedule[std::make_tuple(source, BspSchedule::node_to_processor_assignment[source], - BspSchedule::node_to_processor_assignment[target])] = - BspSchedule::node_to_superstep_assignment[source]; + if (BspSchedule::node_to_processor_assignment[source] + != BspSchedule::node_to_processor_assignment[target]) { + commSchedule[std::make_tuple(source, + BspSchedule::node_to_processor_assignment[source], + BspSchedule::node_to_processor_assignment[target])] + = BspSchedule::node_to_superstep_assignment[source]; } } } } virtual void shrinkByMergingSupersteps() override { - std::vector superstep_latest_dependency(this->number_of_supersteps, 0); std::vector> first_at = getFirstPresence(); - for (auto const &[key, val] : commSchedule) - if (this->assignedProcessor(std::get<0>(key)) != std::get<1>(key)) - superstep_latest_dependency[val] = std::max(superstep_latest_dependency[val], first_at[std::get<0>(key)][std::get<1>(key)]); + for (auto const &[key, val] : commSchedule) { + if (this->assignedProcessor(std::get<0>(key)) != std::get<1>(key)) { + superstep_latest_dependency[val] + = std::max(superstep_latest_dependency[val], first_at[std::get<0>(key)][std::get<1>(key)]); + } + } - for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) - for (const auto &child : BspSchedule::instance->getComputationalDag().children(node)) - if (this->assignedProcessor(node) != this->assignedProcessor(child)) - superstep_latest_dependency[this->assignedSuperstep(child)] = std::max(superstep_latest_dependency[this->assignedSuperstep(child)], first_at[node][this->assignedProcessor(child)]); + for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) { + for (const auto &child : BspSchedule::instance->getComputationalDag().children(node)) { + if (this->assignedProcessor(node) != this->assignedProcessor(child)) { + superstep_latest_dependency[this->assignedSuperstep(child)] + = std::max(superstep_latest_dependency[this->assignedSuperstep(child)], + first_at[node][this->assignedProcessor(child)]); + } + } + } std::vector merge_with_previous(this->number_of_supersteps, false); for (unsigned step = this->number_of_supersteps - 1; step < this->number_of_supersteps; --step) { @@ -491,89 +500,105 @@ class BspScheduleCS : public BspSchedule { std::vector new_step_index(this->number_of_supersteps); unsigned current_index = std::numeric_limits::max(); for (unsigned step = 0; step < this->number_of_supersteps; ++step) { - if (!merge_with_previous[step]) + if (!merge_with_previous[step]) { current_index++; + } new_step_index[step] = current_index; } - for (const auto &node : this->instance->vertices()) + for (const auto &node : this->instance->vertices()) { this->node_to_superstep_assignment[node] = new_step_index[this->node_to_superstep_assignment[node]]; - for (auto &[key, val] : commSchedule) + } + for (auto &[key, val] : commSchedule) { val = new_step_index[val]; + } this->setNumberOfSupersteps(current_index + 1); } // for each vertex v and processor p, find the first superstep where v is present on p by the end of the compute phase std::vector> getFirstPresence() const { + std::vector> first_at( + BspSchedule::instance->numberOfVertices(), + std::vector(BspSchedule::instance->numberOfProcessors(), std::numeric_limits::max())); - std::vector> first_at(BspSchedule::instance->numberOfVertices(), - std::vector(BspSchedule::instance->numberOfProcessors(), std::numeric_limits::max())); - - for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) + for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) { first_at[node][this->assignedProcessor(node)] = this->assignedSuperstep(node); + } - for (auto const &[key, val] : commSchedule) - first_at[std::get<0>(key)][std::get<2>(key)] = - std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + 1); // TODO: replace by staleness after merge + for (auto const &[key, val] : commSchedule) { + first_at[std::get<0>(key)][std::get<2>(key)] + = std::min(first_at[std::get<0>(key)][std::get<2>(key)], val + 1); // TODO: replace by staleness after merge + } return first_at; } // remove unneeded comm. schedule entries - these can happen in ILPs, partial ILPs, etc. void cleanCommSchedule() { - // data that is already present before it arrives - std::vector>> arrives_at(BspSchedule::instance->numberOfVertices(), - std::vector>(BspSchedule::instance->numberOfProcessors())); - for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) + std::vector>> arrives_at( + BspSchedule::instance->numberOfVertices(), + std::vector>(BspSchedule::instance->numberOfProcessors())); + for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) { arrives_at[node][this->assignedProcessor(node)].insert(this->assignedSuperstep(node)); + } - for (auto const &[key, val] : commSchedule) + for (auto const &[key, val] : commSchedule) { arrives_at[std::get<0>(key)][std::get<2>(key)].insert(val); + } std::vector toErase; for (auto const &[key, val] : commSchedule) { auto itr = arrives_at[std::get<0>(key)][std::get<2>(key)].begin(); - if (*itr < val) + if (*itr < val) { toErase.push_back(key); - else if (*itr == val && ++itr != arrives_at[std::get<0>(key)][std::get<2>(key)].end() && *itr == val) { + } else if (*itr == val && ++itr != arrives_at[std::get<0>(key)][std::get<2>(key)].end() && *itr == val) { toErase.push_back(key); arrives_at[std::get<0>(key)][std::get<2>(key)].erase(itr); } } - for (const KeyTriple &key : toErase) + for (const KeyTriple &key : toErase) { commSchedule.erase(key); + } // data that is not used after being sent - std::vector>> used_at(BspSchedule::instance->numberOfVertices(), - std::vector>(BspSchedule::instance->numberOfProcessors())); - for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) - for (const auto &child : BspSchedule::instance->getComputationalDag().children(node)) + std::vector>> used_at( + BspSchedule::instance->numberOfVertices(), + std::vector>(BspSchedule::instance->numberOfProcessors())); + for (const auto &node : BspSchedule::instance->getComputationalDag().vertices()) { + for (const auto &child : BspSchedule::instance->getComputationalDag().children(node)) { used_at[node][this->assignedProcessor(child)].insert(this->assignedSuperstep(child)); + } + } - for (auto const &[key, val] : commSchedule) + for (auto const &[key, val] : commSchedule) { used_at[std::get<0>(key)][std::get<1>(key)].insert(val); + } // (need to visit cs entries in reverse superstep order here) std::vector> entries(this->number_of_supersteps); - for (auto const &[key, val] : commSchedule) + for (auto const &[key, val] : commSchedule) { entries[val].push_back(key); + } toErase.clear(); - for (unsigned step = this->number_of_supersteps - 1; step < this->number_of_supersteps; --step) - for (const KeyTriple &key : entries[step]) - if (used_at[std::get<0>(key)][std::get<2>(key)].empty() || - *used_at[std::get<0>(key)][std::get<2>(key)].rbegin() <= step) { + for (unsigned step = this->number_of_supersteps - 1; step < this->number_of_supersteps; --step) { + for (const KeyTriple &key : entries[step]) { + if (used_at[std::get<0>(key)][std::get<2>(key)].empty() + || *used_at[std::get<0>(key)][std::get<2>(key)].rbegin() <= step) { toErase.push_back(key); auto itr = used_at[std::get<0>(key)][std::get<1>(key)].find(step); used_at[std::get<0>(key)][std::get<1>(key)].erase(itr); } + } + } - for (const KeyTriple &key : toErase) + for (const KeyTriple &key : toErase) { commSchedule.erase(key); + } } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/BspScheduleRecomp.hpp b/include/osp/bsp/model/BspScheduleRecomp.hpp index 8e8a9cc2..7f3f233c 100644 --- a/include/osp/bsp/model/BspScheduleRecomp.hpp +++ b/include/osp/bsp/model/BspScheduleRecomp.hpp @@ -18,16 +18,14 @@ limitations under the License. #pragma once - #include "IBspScheduleEval.hpp" #include "osp/bsp/model/BspScheduleCS.hpp" #include "osp/concepts/computational_dag_concept.hpp" namespace osp { -template +template class BspScheduleRecomp : public IBspScheduleEval { - public: using vertex_idx = vertex_idx_t; using cost_type = v_workw_t; @@ -35,10 +33,10 @@ class BspScheduleRecomp : public IBspScheduleEval { using KeyTriple = std::tuple, unsigned int, unsigned int>; static_assert(is_computational_dag_v, "BspScheduleRecomp can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t >, "BspScheduleRecomp requires work and comm. weights to have the same type."); + static_assert(std::is_same_v, v_commw_t>, + "BspScheduleRecomp requires work and comm. weights to have the same type."); private: - const BspInstance *instance; unsigned int number_of_supersteps = 0; @@ -48,13 +46,14 @@ class BspScheduleRecomp : public IBspScheduleEval { std::map commSchedule; public: - BspScheduleRecomp() = default; - BspScheduleRecomp(const BspInstance &inst) : instance(&inst) - {node_to_processor_and_supertep_assignment.resize(inst.numberOfVertices());} + BspScheduleRecomp(const BspInstance &inst) : instance(&inst) { + node_to_processor_and_supertep_assignment.resize(inst.numberOfVertices()); + } BspScheduleRecomp(const BspScheduleCS &schedule); + BspScheduleRecomp(const BspSchedule &schedule) : BspScheduleRecomp(BspScheduleCS(schedule)) {} virtual ~BspScheduleRecomp() = default; @@ -67,17 +66,17 @@ class BspScheduleRecomp : public IBspScheduleEval { * @return The number of supersteps in the schedule. */ virtual unsigned numberOfSupersteps() const override { return number_of_supersteps; } + void setNumberOfSupersteps(unsigned number_of_supersteps_) { number_of_supersteps = number_of_supersteps_; } - std::vector>& assignments(vertex_idx node) { + std::vector> &assignments(vertex_idx node) { return node_to_processor_and_supertep_assignment[node]; } - const std::vector>& assignments(vertex_idx node) const { + const std::vector> &assignments(vertex_idx node) const { return node_to_processor_and_supertep_assignment[node]; } - /** * @brief Sets the communication schedule for the schedule. * @@ -126,85 +125,87 @@ class BspScheduleRecomp : public IBspScheduleEval { vertex_idx getTotalAssignments() const; - void mergeSupersteps(); - + void mergeSupersteps(); }; -template -BspScheduleRecomp::BspScheduleRecomp(const BspScheduleCS &schedule) : instance(&schedule.getInstance()) -{ +template +BspScheduleRecomp::BspScheduleRecomp(const BspScheduleCS &schedule) : instance(&schedule.getInstance()) { node_to_processor_and_supertep_assignment.clear(); node_to_processor_and_supertep_assignment.resize(instance->numberOfVertices()); number_of_supersteps = schedule.numberOfSupersteps(); - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - node_to_processor_and_supertep_assignment[node].emplace_back(schedule.assignedProcessor(node), schedule.assignedSuperstep(node)); + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + node_to_processor_and_supertep_assignment[node].emplace_back(schedule.assignedProcessor(node), + schedule.assignedSuperstep(node)); + } commSchedule = schedule.getCommunicationSchedule(); } -template -void BspScheduleRecomp::addCommunicationScheduleEntry(unsigned node, unsigned from_proc, unsigned to_proc, - unsigned step) { +template +void BspScheduleRecomp::addCommunicationScheduleEntry(unsigned node, unsigned from_proc, unsigned to_proc, unsigned step) { addCommunicationScheduleEntry(std::make_tuple(node, from_proc, to_proc), step); } -template +template void BspScheduleRecomp::addCommunicationScheduleEntry(KeyTriple key, unsigned step) { - - if (step >= number_of_supersteps) + if (step >= number_of_supersteps) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: step out of range."); + } - if (std::get<0>(key) >= instance->numberOfVertices()) + if (std::get<0>(key) >= instance->numberOfVertices()) { throw std::invalid_argument("Invalid Argument while adding communication schedule entry: node out of range."); + } - if (std::get<1>(key) >= instance->numberOfProcessors()) - throw std::invalid_argument( - "Invalid Argument while adding communication schedule entry: from processor out of range."); + if (std::get<1>(key) >= instance->numberOfProcessors()) { + throw std::invalid_argument("Invalid Argument while adding communication schedule entry: from processor out of range."); + } - if (std::get<2>(key) >= instance->numberOfProcessors()) - throw std::invalid_argument( - "Invalid Argument while adding communication schedule entry: to processor out of range."); + if (std::get<2>(key) >= instance->numberOfProcessors()) { + throw std::invalid_argument("Invalid Argument while adding communication schedule entry: to processor out of range."); + } commSchedule[key] = step; } -template +template bool BspScheduleRecomp::satisfiesConstraints() const { - // find first availability - - std::vector > node_first_available_on_proc(instance->numberOfVertices(), - std::vector(instance->numberOfProcessors(), std::numeric_limits::max())); - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - for(const std::pair& compute_step : node_to_processor_and_supertep_assignment[node]) - node_first_available_on_proc[node][compute_step.first] = - std::min(node_first_available_on_proc[node][compute_step.first], compute_step.second); + std::vector> node_first_available_on_proc( + instance->numberOfVertices(), std::vector(instance->numberOfProcessors(), std::numeric_limits::max())); - for (auto const &[key, val] : commSchedule) { + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + for (const std::pair &compute_step : node_to_processor_and_supertep_assignment[node]) { + node_first_available_on_proc[node][compute_step.first] + = std::min(node_first_available_on_proc[node][compute_step.first], compute_step.second); + } + } - const vertex_idx& node = std::get<0>(key); - const unsigned& to_proc = std::get<2>(key); + for (auto const &[key, val] : commSchedule) { + const vertex_idx &node = std::get<0>(key); + const unsigned &to_proc = std::get<2>(key); node_first_available_on_proc[node][to_proc] = std::min(node_first_available_on_proc[node][to_proc], val + 1); } // check validity - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - for(vertex_idx pred : instance->getComputationalDag().parents(node)) - for(const std::pair& compute_step : node_to_processor_and_supertep_assignment[node]) - if(node_first_available_on_proc[pred][compute_step.first] > compute_step.second){ + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + for (vertex_idx pred : instance->getComputationalDag().parents(node)) { + for (const std::pair &compute_step : node_to_processor_and_supertep_assignment[node]) { + if (node_first_available_on_proc[pred][compute_step.first] > compute_step.second) { // std::cout << "Not a valid schedule: parent " << pred << " of node "<< node << //" not yet available on processor " << compute_step.first << " in superstep "<< compute_step.second <<"." << std::endl; return false; } + } + } + } for (auto const &[key, val] : commSchedule) { - - const vertex_idx& node = std::get<0>(key); - const unsigned& from_proc = std::get<1>(key); + const vertex_idx &node = std::get<0>(key); + const unsigned &from_proc = std::get<1>(key); if (node_first_available_on_proc[node][from_proc] > val) { // std::cout << "Not a valid schedule: node " << node << " not yet available for sending from processor " @@ -212,33 +213,29 @@ bool BspScheduleRecomp::satisfiesConstraints() const { return false; } } - + return true; } -template +template v_workw_t BspScheduleRecomp::computeWorkCosts() const { - assert(satisfiesConstraints()); std::vector> step_proc_work(number_of_supersteps, - std::vector(instance->numberOfProcessors(), 0)); + std::vector(instance->numberOfProcessors(), 0)); for (vertex_idx node = 0; node < instance->numberOfVertices(); node++) { - - for (const std::pair& processor_superstep : node_to_processor_and_supertep_assignment[node]) { - step_proc_work[processor_superstep.second][processor_superstep.first] += - instance->getComputationalDag().vertex_work_weight(node); + for (const std::pair &processor_superstep : node_to_processor_and_supertep_assignment[node]) { + step_proc_work[processor_superstep.second][processor_superstep.first] + += instance->getComputationalDag().vertex_work_weight(node); } } cost_type total_costs = 0; for (unsigned step = 0; step < number_of_supersteps; step++) { - cost_type max_work = 0; for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - if (max_work < step_proc_work[step][proc]) { max_work = step_proc_work[step][proc]; } @@ -250,35 +247,31 @@ v_workw_t BspScheduleRecomp::computeWorkCosts() const { return total_costs; } -template +template v_workw_t BspScheduleRecomp::computeCosts() const { - assert(satisfiesConstraints()); - std::vector> rec(number_of_supersteps, - std::vector(instance->numberOfProcessors(), 0)); - std::vector> send(number_of_supersteps, - std::vector(instance->numberOfProcessors(), 0)); + std::vector> rec(number_of_supersteps, std::vector(instance->numberOfProcessors(), 0)); + std::vector> send(number_of_supersteps, std::vector(instance->numberOfProcessors(), 0)); for (auto const &[key, val] : commSchedule) { - - send[val][std::get<1>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key)) * - instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); - rec[val][std::get<2>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key)) * - instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); + send[val][std::get<1>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key)) + * instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); + rec[val][std::get<2>(key)] += instance->sendCosts(std::get<1>(key), std::get<2>(key)) + * instance->getComputationalDag().vertex_comm_weight(std::get<0>(key)); } cost_type total_costs = 0; for (unsigned step = 0; step < number_of_supersteps; step++) { - cost_type max_comm = 0; for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - if (max_comm < send[step][proc]) + if (max_comm < send[step][proc]) { max_comm = send[step][proc]; - if (max_comm < rec[step][proc]) + } + if (max_comm < rec[step][proc]) { max_comm = rec[step][proc]; - + } } if (max_comm > 0) { @@ -289,10 +282,9 @@ v_workw_t BspScheduleRecomp::computeCosts() const { total_costs += computeWorkCosts(); return total_costs; - } -template +template vertex_idx_t BspScheduleRecomp::getTotalAssignments() const { vertex_idx total = 0; for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { @@ -301,28 +293,28 @@ vertex_idx_t BspScheduleRecomp::getTotalAssignments() const { return total; } -template -void BspScheduleRecomp::mergeSupersteps() -{ +template +void BspScheduleRecomp::mergeSupersteps() { std::vector new_step_idx(number_of_supersteps); std::vector comm_phase_empty(number_of_supersteps, true); - for (auto const &[key, val] : commSchedule) + for (auto const &[key, val] : commSchedule) { comm_phase_empty[val] = false; + } unsigned current_step_idx = 0; - for(unsigned step = 0; step < number_of_supersteps; ++step) - { + for (unsigned step = 0; step < number_of_supersteps; ++step) { new_step_idx[step] = current_step_idx; - if(!comm_phase_empty[step] || step == number_of_supersteps - 1) + if (!comm_phase_empty[step] || step == number_of_supersteps - 1) { ++current_step_idx; + } } - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - { + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { std::vector> new_assignment; - for(const std::pair& entry : node_to_processor_and_supertep_assignment[node]) + for (const std::pair &entry : node_to_processor_and_supertep_assignment[node]) { new_assignment.emplace_back(entry.first, new_step_idx[entry.second]); - node_to_processor_and_supertep_assignment[node] = new_assignment; + } + node_to_processor_and_supertep_assignment[node] = new_assignment; } for (auto &key_step_pair : commSchedule) { auto &step = key_step_pair.second; @@ -332,4 +324,4 @@ void BspScheduleRecomp::mergeSupersteps() number_of_supersteps = current_step_idx; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/IBspSchedule.hpp b/include/osp/bsp/model/IBspSchedule.hpp index 9840c56f..0a4a3d7e 100644 --- a/include/osp/bsp/model/IBspSchedule.hpp +++ b/include/osp/bsp/model/IBspSchedule.hpp @@ -24,9 +24,8 @@ namespace osp { /// @class IBspSchedule /// @brief Interface for a BSP (Bulk Synchronous Parallel) schedule. -template +template class IBspSchedule { - using vertex_idx = vertex_idx_t; public: @@ -62,4 +61,4 @@ class IBspSchedule { virtual unsigned numberOfSupersteps() const = 0; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/IBspScheduleEval.hpp b/include/osp/bsp/model/IBspScheduleEval.hpp index 55a4290b..6e0f7a51 100644 --- a/include/osp/bsp/model/IBspScheduleEval.hpp +++ b/include/osp/bsp/model/IBspScheduleEval.hpp @@ -24,9 +24,8 @@ namespace osp { /// @class IBspSchedule /// @brief Interface for a BSP (Bulk Synchronous Parallel) schedule. -template +template class IBspScheduleEval { - using vertex_idx = vertex_idx_t; public: @@ -37,7 +36,6 @@ class IBspScheduleEval { virtual v_workw_t computeWorkCosts() const = 0; virtual unsigned numberOfSupersteps() const = 0; virtual const BspInstance &getInstance() const = 0; - }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/MaxBspSchedule.hpp b/include/osp/bsp/model/MaxBspSchedule.hpp index e56c99d6..d35024d2 100644 --- a/include/osp/bsp/model/MaxBspSchedule.hpp +++ b/include/osp/bsp/model/MaxBspSchedule.hpp @@ -38,11 +38,11 @@ namespace osp { * * @see BspInstance */ -template +template class MaxBspSchedule : public BspSchedule { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, "BspSchedule requires work and comm. weights to have the same type."); + static_assert(std::is_same_v, v_commw_t>, + "BspSchedule requires work and comm. weights to have the same type."); protected: using vertex_idx = vertex_idx_t; @@ -65,8 +65,10 @@ class MaxBspSchedule : public BspSchedule { * @param processor_assignment_ The processor assignment for the nodes. * @param superstep_assignment_ The superstep assignment for the nodes. */ - MaxBspSchedule(const BspInstance &inst, const std::vector &processor_assignment_, - const std::vector &superstep_assignment_) : BspSchedule(inst, processor_assignment_, superstep_assignment_) {} + MaxBspSchedule(const BspInstance &inst, + const std::vector &processor_assignment_, + const std::vector &superstep_assignment_) + : BspSchedule(inst, processor_assignment_, superstep_assignment_) {} MaxBspSchedule(const IBspSchedule &schedule) : BspSchedule(schedule) {} @@ -80,8 +82,9 @@ class MaxBspSchedule : public BspSchedule { MaxBspSchedule &operator=(MaxBspSchedule &&schedule) noexcept = default; - template - MaxBspSchedule(const BspInstance &instance_, const MaxBspSchedule &schedule) : BspSchedule(instance_, schedule) {} + template + MaxBspSchedule(const BspInstance &instance_, const MaxBspSchedule &schedule) + : BspSchedule(instance_, schedule) {} /** * @brief Destructor for the BspSchedule class. @@ -89,9 +92,10 @@ class MaxBspSchedule : public BspSchedule { virtual ~MaxBspSchedule() = default; virtual v_workw_t computeCosts() const override { - - std::vector>> rec(this->instance->numberOfProcessors(), std::vector>(this->number_of_supersteps, 0)); - std::vector>> send(this->instance->numberOfProcessors(), std::vector>(this->number_of_supersteps, 0)); + std::vector>> rec(this->instance->numberOfProcessors(), + std::vector>(this->number_of_supersteps, 0)); + std::vector>> send(this->instance->numberOfProcessors(), + std::vector>(this->number_of_supersteps, 0)); compute_lazy_communication_costs(*this, rec, send); const std::vector> max_comm_per_step = cost_helpers::compute_max_comm_per_step(*this, rec, send); @@ -99,7 +103,8 @@ class MaxBspSchedule : public BspSchedule { v_workw_t costs = 0U; for (unsigned step = 0U; step < this->number_of_supersteps; step++) { - const v_commw_t step_comm_cost = (step == 0U) ? static_cast>(0) : max_comm_per_step[step - 1U]; + const v_commw_t step_comm_cost = (step == 0U) ? static_cast>(0) + : max_comm_per_step[step - 1U]; costs += std::max(step_comm_cost, max_work_per_step[step]); if (step_comm_cost > static_cast>(0)) { @@ -112,4 +117,4 @@ class MaxBspSchedule : public BspSchedule { unsigned virtual getStaleness() const override { return 2; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/MaxBspScheduleCS.hpp b/include/osp/bsp/model/MaxBspScheduleCS.hpp index 79b49b33..7027ea53 100644 --- a/include/osp/bsp/model/MaxBspScheduleCS.hpp +++ b/include/osp/bsp/model/MaxBspScheduleCS.hpp @@ -31,11 +31,11 @@ limitations under the License. namespace osp { -template +template class MaxBspScheduleCS : public BspScheduleCS { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, "BspSchedule requires work and comm. weights to have the same type."); + static_assert(std::is_same_v, v_commw_t>, + "BspSchedule requires work and comm. weights to have the same type."); protected: using vertex_idx = vertex_idx_t; @@ -58,10 +58,13 @@ class MaxBspScheduleCS : public BspScheduleCS { * @param processor_assignment_ The processor assignment for the nodes. * @param superstep_assignment_ The superstep assignment for the nodes. */ - MaxBspScheduleCS(const BspInstance &inst, const std::vector &processor_assignment_, const std::vector &superstep_assignment_) + MaxBspScheduleCS(const BspInstance &inst, + const std::vector &processor_assignment_, + const std::vector &superstep_assignment_) : BspScheduleCS(inst, processor_assignment_, superstep_assignment_) {} MaxBspScheduleCS(const BspScheduleCS &schedule) : BspScheduleCS(schedule) {} + MaxBspScheduleCS(BspScheduleCS &&schedule) : BspScheduleCS(std::move(schedule)) {} MaxBspScheduleCS(const MaxBspSchedule &schedule) : BspScheduleCS(schedule) { @@ -78,7 +81,7 @@ class MaxBspScheduleCS : public BspScheduleCS { MaxBspScheduleCS &operator=(const MaxBspScheduleCS &schedule) = default; MaxBspScheduleCS &operator=(MaxBspScheduleCS &&schedule) = default; - template + template MaxBspScheduleCS(const BspInstance &instance_, const MaxBspScheduleCS &schedule) : BspScheduleCS(instance_, schedule) {} @@ -88,7 +91,6 @@ class MaxBspScheduleCS : public BspScheduleCS { virtual ~MaxBspScheduleCS() = default; virtual v_workw_t computeCosts() const override { - std::vector>> rec(this->getInstance().numberOfProcessors(), std::vector>(this->number_of_supersteps, 0)); @@ -113,4 +115,5 @@ class MaxBspScheduleCS : public BspScheduleCS { unsigned virtual getStaleness() const override { return 2; } }; -} // namespace osp \ No newline at end of file + +} // namespace osp diff --git a/include/osp/bsp/model/cost/BufferedSendingCost.hpp b/include/osp/bsp/model/cost/BufferedSendingCost.hpp index f8b61f91..747174d9 100644 --- a/include/osp/bsp/model/cost/BufferedSendingCost.hpp +++ b/include/osp/bsp/model/cost/BufferedSendingCost.hpp @@ -18,20 +18,20 @@ limitations under the License. #pragma once -#include "osp/bsp/model/cost/CostModelHelpers.hpp" -#include "osp/concepts/computational_dag_concept.hpp" #include #include +#include "osp/bsp/model/cost/CostModelHelpers.hpp" +#include "osp/concepts/computational_dag_concept.hpp" + namespace osp { /** * @struct BufferedSendingCost * @brief Implements the buffered sending cost model. */ -template +template struct BufferedSendingCost { - using cost_type = v_commw_t; cost_type operator()(const BspSchedule &schedule) const { @@ -41,26 +41,29 @@ struct BufferedSendingCost { const auto &node_to_superstep_assignment = schedule.assignedSupersteps(); const auto staleness = schedule.getStaleness(); - std::vector>> rec(instance.numberOfProcessors(), std::vector>(number_of_supersteps, 0)); - std::vector>> send(instance.numberOfProcessors(), std::vector>(number_of_supersteps, 0)); + std::vector>> rec(instance.numberOfProcessors(), + std::vector>(number_of_supersteps, 0)); + std::vector>> send(instance.numberOfProcessors(), + std::vector>(number_of_supersteps, 0)); for (vertex_idx_t node = 0; node < instance.numberOfVertices(); node++) { - std::vector step_needed(instance.numberOfProcessors(), number_of_supersteps); for (const auto &target : instance.getComputationalDag().children(node)) { - if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) { - step_needed[node_to_processor_assignment[target]] = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]); + step_needed[node_to_processor_assignment[target]] + = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]); } } for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - if (step_needed[proc] < number_of_supersteps) { - send[node_to_processor_assignment[node]][node_to_superstep_assignment[node]] += instance.sendCosts(node_to_processor_assignment[node], proc) * instance.getComputationalDag().vertex_comm_weight(node); + send[node_to_processor_assignment[node]][node_to_superstep_assignment[node]] + += instance.sendCosts(node_to_processor_assignment[node], proc) + * instance.getComputationalDag().vertex_comm_weight(node); if (step_needed[proc] >= staleness) { - rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) * instance.getComputationalDag().vertex_comm_weight(node); + rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) + * instance.getComputationalDag().vertex_comm_weight(node); } } } @@ -81,4 +84,4 @@ struct BufferedSendingCost { } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/model/cost/CostModelHelpers.hpp b/include/osp/bsp/model/cost/CostModelHelpers.hpp index b1d449b4..fe9b269f 100644 --- a/include/osp/bsp/model/cost/CostModelHelpers.hpp +++ b/include/osp/bsp/model/cost/CostModelHelpers.hpp @@ -18,59 +18,58 @@ limitations under the License. #pragma once -#include "osp/bsp/model/BspInstance.hpp" #include #include +#include "osp/bsp/model/BspInstance.hpp" + namespace osp { -template +template class BspSchedule; namespace cost_helpers { -template -std::vector> compute_max_comm_per_step( - const BspInstance &instance, - unsigned number_of_supersteps, - const std::vector>> &rec, - const std::vector>> &send) { - +template +std::vector> compute_max_comm_per_step(const BspInstance &instance, + unsigned number_of_supersteps, + const std::vector>> &rec, + const std::vector>> &send) { std::vector> max_comm_per_step(number_of_supersteps, 0); for (unsigned step = 0; step < number_of_supersteps; step++) { v_commw_t max_send = 0; v_commw_t max_rec = 0; for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - if (max_send < send[proc][step]) + if (max_send < send[proc][step]) { max_send = send[proc][step]; - if (max_rec < rec[proc][step]) + } + if (max_rec < rec[proc][step]) { max_rec = rec[proc][step]; + } } max_comm_per_step[step] = std::max(max_send, max_rec) * instance.communicationCosts(); } return max_comm_per_step; } -template -std::vector> compute_max_comm_per_step( - const BspSchedule &schedule, - const std::vector>> &rec, - const std::vector>> &send) { +template +std::vector> compute_max_comm_per_step(const BspSchedule &schedule, + const std::vector>> &rec, + const std::vector>> &send) { return compute_max_comm_per_step(schedule.getInstance(), schedule.numberOfSupersteps(), rec, send); } -template -std::vector> compute_max_work_per_step( - const BspInstance &instance, - unsigned number_of_supersteps, - const std::vector &node_to_processor_assignment, - const std::vector &node_to_superstep_assignment) { +template +std::vector> compute_max_work_per_step(const BspInstance &instance, + unsigned number_of_supersteps, + const std::vector &node_to_processor_assignment, + const std::vector &node_to_superstep_assignment) { std::vector>> work = std::vector>>( number_of_supersteps, std::vector>(instance.numberOfProcessors(), 0)); for (const auto &node : instance.vertices()) { - work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]] += - instance.getComputationalDag().vertex_work_weight(node); + work[node_to_superstep_assignment[node]][node_to_processor_assignment[node]] + += instance.getComputationalDag().vertex_work_weight(node); } std::vector> max_work_per_step(number_of_supersteps, 0); @@ -88,30 +87,28 @@ std::vector> compute_max_work_per_step( return max_work_per_step; } -template -std::vector> compute_max_work_per_step( - const BspSchedule &schedule) { - return compute_max_work_per_step(schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps()); +template +std::vector> compute_max_work_per_step(const BspSchedule &schedule) { + return compute_max_work_per_step( + schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps()); } -template -v_workw_t compute_work_costs( - const BspInstance &instance, - unsigned number_of_supersteps, - const std::vector &node_to_processor_assignment, - const std::vector &node_to_superstep_assignment) { - - std::vector> max_work_per_step = compute_max_work_per_step(instance, number_of_supersteps, node_to_processor_assignment, node_to_superstep_assignment); +template +v_workw_t compute_work_costs(const BspInstance &instance, + unsigned number_of_supersteps, + const std::vector &node_to_processor_assignment, + const std::vector &node_to_superstep_assignment) { + std::vector> max_work_per_step + = compute_max_work_per_step(instance, number_of_supersteps, node_to_processor_assignment, node_to_superstep_assignment); return std::accumulate(max_work_per_step.begin(), max_work_per_step.end(), static_cast>(0)); } -template -v_workw_t compute_work_costs( - const BspSchedule &schedule) { - - return compute_work_costs(schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps()); +template +v_workw_t compute_work_costs(const BspSchedule &schedule) { + return compute_work_costs( + schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps()); } -} // namespace cost_helpers -} // namespace osp +} // namespace cost_helpers +} // namespace osp diff --git a/include/osp/bsp/model/cost/LazyCommunicationCost.hpp b/include/osp/bsp/model/cost/LazyCommunicationCost.hpp index 64338481..a0497174 100644 --- a/include/osp/bsp/model/cost/LazyCommunicationCost.hpp +++ b/include/osp/bsp/model/cost/LazyCommunicationCost.hpp @@ -18,65 +18,72 @@ limitations under the License. #pragma once -#include "osp/bsp/model/cost/CostModelHelpers.hpp" -#include "osp/concepts/computational_dag_concept.hpp" #include #include +#include "osp/bsp/model/cost/CostModelHelpers.hpp" +#include "osp/concepts/computational_dag_concept.hpp" + namespace osp { -template -void compute_lazy_communication_costs( - const BspInstance &instance, - unsigned number_of_supersteps, - const std::vector &node_to_processor_assignment, - const std::vector &node_to_superstep_assignment, - const unsigned staleness, - std::vector>> &rec, - std::vector>> &send) { +template +void compute_lazy_communication_costs(const BspInstance &instance, + unsigned number_of_supersteps, + const std::vector &node_to_processor_assignment, + const std::vector &node_to_superstep_assignment, + const unsigned staleness, + std::vector>> &rec, + std::vector>> &send) { for (const auto &node : instance.vertices()) { - std::vector step_needed(instance.numberOfProcessors(), number_of_supersteps); for (const auto &target : instance.getComputationalDag().children(node)) { - if (node_to_processor_assignment[node] != node_to_processor_assignment[target]) { - step_needed[node_to_processor_assignment[target]] = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]); + step_needed[node_to_processor_assignment[target]] + = std::min(step_needed[node_to_processor_assignment[target]], node_to_superstep_assignment[target]); } } for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { - if (step_needed[proc] < number_of_supersteps) { - send[node_to_processor_assignment[node]][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) * instance.getComputationalDag().vertex_comm_weight(node); - rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) * instance.getComputationalDag().vertex_comm_weight(node); + send[node_to_processor_assignment[node]][step_needed[proc] - staleness] + += instance.sendCosts(node_to_processor_assignment[node], proc) + * instance.getComputationalDag().vertex_comm_weight(node); + rec[proc][step_needed[proc] - staleness] += instance.sendCosts(node_to_processor_assignment[node], proc) + * instance.getComputationalDag().vertex_comm_weight(node); } } } } -template -void compute_lazy_communication_costs( - const BspSchedule &schedule, - std::vector>> &rec, - std::vector>> &send) { - compute_lazy_communication_costs(schedule.getInstance(), schedule.numberOfSupersteps(), schedule.assignedProcessors(), schedule.assignedSupersteps(), schedule.getStaleness(), rec, send); +template +void compute_lazy_communication_costs(const BspSchedule &schedule, + std::vector>> &rec, + std::vector>> &send) { + compute_lazy_communication_costs(schedule.getInstance(), + schedule.numberOfSupersteps(), + schedule.assignedProcessors(), + schedule.assignedSupersteps(), + schedule.getStaleness(), + rec, + send); } /** * @struct LazyCommunicationCost * @brief Implements the lazy communication cost model. */ -template +template struct LazyCommunicationCost { - using cost_type = v_workw_t; cost_type operator()(const BspSchedule &schedule) const { const auto &number_of_processors = schedule.getInstance().numberOfProcessors(); const auto &number_of_supersteps = schedule.numberOfSupersteps(); - std::vector>> rec(number_of_processors, std::vector>(number_of_supersteps, 0)); - std::vector>> send(number_of_processors, std::vector>(number_of_supersteps, 0)); + std::vector>> rec(number_of_processors, + std::vector>(number_of_supersteps, 0)); + std::vector>> send(number_of_processors, + std::vector>(number_of_supersteps, 0)); compute_lazy_communication_costs(schedule, rec, send); const auto max_comm_per_step = cost_helpers::compute_max_comm_per_step(schedule, rec, send); @@ -95,4 +102,4 @@ struct LazyCommunicationCost { } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/model/cost/TotalCommunicationCost.hpp b/include/osp/bsp/model/cost/TotalCommunicationCost.hpp index 3182f3c5..af97e5c8 100644 --- a/include/osp/bsp/model/cost/TotalCommunicationCost.hpp +++ b/include/osp/bsp/model/cost/TotalCommunicationCost.hpp @@ -27,13 +27,11 @@ namespace osp { * @struct TotalCommunicationCost * @brief Implements the total communication cost model. */ -template +template struct TotalCommunicationCost { - using cost_type = double; cost_type operator()(const BspSchedule &schedule) const { - const auto &instance = schedule.getInstance(); const auto &node_to_processor_assignment = schedule.assignedProcessors(); @@ -41,22 +39,24 @@ struct TotalCommunicationCost { for (const auto &v : instance.vertices()) { for (const auto &target : instance.getComputationalDag().children(v)) { - if (node_to_processor_assignment[v] != node_to_processor_assignment[target]) { - total_communication += instance.sendCosts(node_to_processor_assignment[v], node_to_processor_assignment[target]) * instance.getComputationalDag().vertex_comm_weight(v); + total_communication += instance.sendCosts(node_to_processor_assignment[v], node_to_processor_assignment[target]) + * instance.getComputationalDag().vertex_comm_weight(v); } } } - auto comm_cost = total_communication * static_cast(instance.communicationCosts()) / static_cast(instance.numberOfProcessors()); + auto comm_cost = total_communication * static_cast(instance.communicationCosts()) + / static_cast(instance.numberOfProcessors()); const unsigned number_of_supersteps = schedule.numberOfSupersteps(); auto work_cost = cost_helpers::compute_work_costs(schedule); - auto sync_cost = static_cast>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0) * instance.synchronisationCosts(); + auto sync_cost = static_cast>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0) + * instance.synchronisationCosts(); return comm_cost + work_cost + sync_cost; } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp b/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp index acab210f..27641937 100644 --- a/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp +++ b/include/osp/bsp/model/cost/TotalLambdaCommunicationCost.hpp @@ -18,9 +18,10 @@ limitations under the License. #pragma once +#include + #include "osp/bsp/model/cost/CostModelHelpers.hpp" #include "osp/concepts/computational_dag_concept.hpp" -#include namespace osp { @@ -28,9 +29,8 @@ namespace osp { * @struct TotalLambdaCommunicationCost * @brief Implements the total lambda communication cost model. */ -template +template struct TotalLambdaCommunicationCost { - using cost_type = double; cost_type operator()(const BspSchedule &schedule) const { @@ -41,8 +41,9 @@ struct TotalLambdaCommunicationCost { const double comm_multiplier = 1.0 / instance.numberOfProcessors(); for (const auto &v : instance.vertices()) { - if (instance.getComputationalDag().out_degree(v) == 0) + if (instance.getComputationalDag().out_degree(v) == 0) { continue; + } std::unordered_set target_procs; for (const auto &target : instance.getComputationalDag().children(v)) { @@ -61,10 +62,11 @@ struct TotalLambdaCommunicationCost { auto comm_cost = comm_costs * comm_multiplier * static_cast(instance.communicationCosts()); auto work_cost = cost_helpers::compute_work_costs(schedule); - auto sync_cost = static_cast>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0) * instance.synchronisationCosts(); + auto sync_cost = static_cast>(number_of_supersteps > 1 ? number_of_supersteps - 1 : 0) + * instance.synchronisationCosts(); return comm_cost + static_cast(work_cost) + static_cast(sync_cost); } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp index c4d8df30..1cc65621 100644 --- a/include/osp/bsp/model/util/CompatibleProcessorRange.hpp +++ b/include/osp/bsp/model/util/CompatibleProcessorRange.hpp @@ -18,9 +18,10 @@ limitations under the License. #pragma once -#include "osp/bsp/model/BspInstance.hpp" #include +#include "osp/bsp/model/BspInstance.hpp" + namespace osp { /** @@ -31,9 +32,8 @@ namespace osp { * * @tparam Graph_t The type of the computational DAG. */ -template +template class CompatibleProcessorRange { - std::vector> typeProcessorIdx; const BspInstance *instance = nullptr; @@ -48,9 +48,7 @@ class CompatibleProcessorRange { * * @param inst The BspInstance. */ - CompatibleProcessorRange(const BspInstance &inst) { - initialize(inst); - } + CompatibleProcessorRange(const BspInstance &inst) { initialize(inst); } /** * @brief Initializes the CompatibleProcessorRange with a BspInstance. @@ -64,9 +62,11 @@ class CompatibleProcessorRange { typeProcessorIdx.resize(inst.getComputationalDag().num_vertex_types()); for (v_type_t v_type = 0; v_type < inst.getComputationalDag().num_vertex_types(); v_type++) { - for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++) - if (inst.isCompatibleType(v_type, inst.processorType(proc))) + for (unsigned proc = 0; proc < inst.numberOfProcessors(); proc++) { + if (inst.isCompatibleType(v_type, inst.processorType(proc))) { typeProcessorIdx[v_type].push_back(proc); + } + } } } } @@ -98,4 +98,4 @@ class CompatibleProcessorRange { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/util/SetSchedule.hpp b/include/osp/bsp/model/util/SetSchedule.hpp index 61946fae..2fce50d8 100644 --- a/include/osp/bsp/model/util/SetSchedule.hpp +++ b/include/osp/bsp/model/util/SetSchedule.hpp @@ -37,9 +37,8 @@ namespace osp { * * @note This class assumes that the `BspInstance` and `ICommunicationScheduler` classes are defined and accessible. */ -template +template class SetSchedule : public IBspSchedule { - static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); private: @@ -56,14 +55,12 @@ class SetSchedule : public IBspSchedule { SetSchedule(const BspInstance &inst, unsigned num_supersteps) : instance(&inst), number_of_supersteps(num_supersteps) { - step_processor_vertices = std::vector>>( num_supersteps, std::vector>(inst.numberOfProcessors())); } SetSchedule(const IBspSchedule &schedule) : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()) { - step_processor_vertices = std::vector>>( schedule.numberOfSupersteps(), std::vector>(schedule.getInstance().numberOfProcessors())); @@ -85,12 +82,9 @@ class SetSchedule : public IBspSchedule { unsigned numberOfSupersteps() const override { return number_of_supersteps; } void setAssignedSuperstep(vertex_idx node, unsigned superstep) override { - unsigned assigned_processor = 0; for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (unsigned step = 0; step < number_of_supersteps; step++) { - if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) { assigned_processor = proc; step_processor_vertices[step][proc].erase(node); @@ -102,12 +96,9 @@ class SetSchedule : public IBspSchedule { } void setAssignedProcessor(vertex_idx node, unsigned processor) override { - unsigned assigned_step = 0; for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (unsigned step = 0; step < number_of_supersteps; step++) { - if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) { assigned_step = step; step_processor_vertices[step][proc].erase(node); @@ -122,13 +113,11 @@ class SetSchedule : public IBspSchedule { /// @param node /// @return the assigned superstep unsigned assignedSuperstep(vertex_idx node) const override { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (unsigned step = 0; step < number_of_supersteps; step++) { - - if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) + if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) { return step; + } } } @@ -139,13 +128,11 @@ class SetSchedule : public IBspSchedule { /// @param node /// @return the assigned processor unsigned assignedProcessor(vertex_idx node) const override { - for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - for (unsigned step = 0; step < number_of_supersteps; step++) { - - if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) + if (step_processor_vertices[step][proc].find(node) != step_processor_vertices[step][proc].end()) { return proc; + } } } @@ -153,39 +140,34 @@ class SetSchedule : public IBspSchedule { } void mergeSupersteps(unsigned start_step, unsigned end_step) { - unsigned step = start_step + 1; for (; step <= end_step; step++) { - for (unsigned proc = 0; proc < getInstance().numberOfProcessors(); proc++) { - step_processor_vertices[start_step][proc].merge(step_processor_vertices[step][proc]); } } for (; step < number_of_supersteps; step++) { - for (unsigned proc = 0; proc < getInstance().numberOfProcessors(); proc++) { - - step_processor_vertices[step - (end_step - start_step)][proc] = - std::move(step_processor_vertices[step][proc]); + step_processor_vertices[step - (end_step - start_step)][proc] = std::move(step_processor_vertices[step][proc]); } } } }; - -template -static void printSetScheduleWorkMemNodesGrid(std::ostream &os, const SetSchedule &set_schedule, bool print_detailed_node_assignment = false) { +template +static void printSetScheduleWorkMemNodesGrid(std::ostream &os, + const SetSchedule &set_schedule, + bool print_detailed_node_assignment = false) { const auto &instance = set_schedule.getInstance(); const unsigned num_processors = instance.numberOfProcessors(); const unsigned num_supersteps = set_schedule.numberOfSupersteps(); // Data structures to store aggregated work, memory, and nodes - std::vector>> total_work_per_cell( - num_processors, std::vector>(num_supersteps, 0.0)); - std::vector>> total_memory_per_cell( - num_processors, std::vector>(num_supersteps, 0.0)); + std::vector>> total_work_per_cell(num_processors, + std::vector>(num_supersteps, 0.0)); + std::vector>> total_memory_per_cell(num_processors, + std::vector>(num_supersteps, 0.0)); std::vector>>> nodes_per_cell( num_processors, std::vector>>(num_supersteps)); @@ -225,21 +207,20 @@ static void printSetScheduleWorkMemNodesGrid(std::ostream &os, const SetSchedule os << std::left << std::setw(cell_width) << ("P " + std::to_string(p)); for (unsigned s = 0; s < num_supersteps; ++s) { std::stringstream cell_content; - cell_content << "W:" << std::fixed << std::setprecision(0) << total_work_per_cell[p][s] - << " M:" << std::fixed << std::setprecision(0) << total_memory_per_cell[p][s] - << " N:" << nodes_per_cell[p][s].size(); // Add node count + cell_content << "W:" << std::fixed << std::setprecision(0) << total_work_per_cell[p][s] << " M:" << std::fixed + << std::setprecision(0) << total_memory_per_cell[p][s] + << " N:" << nodes_per_cell[p][s].size(); // Add node count os << std::left << std::setw(cell_width) << cell_content.str(); } os << "\n"; } - - if (print_detailed_node_assignment) { - os << "\n"; // Add a newline for separation between grid and detailed list + if (print_detailed_node_assignment) { + os << "\n"; // Add a newline for separation between grid and detailed list // Print detailed node lists below the grid os << "Detailed Node Assignments:\n"; - os << std::string(30, '=') << "\n"; // Separator + os << std::string(30, '=') << "\n"; // Separator for (unsigned p = 0; p < num_processors; ++p) { for (unsigned s = 0; s < num_supersteps; ++s) { if (!nodes_per_cell[p][s].empty()) { @@ -254,8 +235,8 @@ static void printSetScheduleWorkMemNodesGrid(std::ostream &os, const SetSchedule } } } - os << std::string(30, '=') << "\n"; // Separator + os << std::string(30, '=') << "\n"; // Separator } } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/model/util/VectorSchedule.hpp b/include/osp/bsp/model/util/VectorSchedule.hpp index ea856c1b..3c0ae212 100644 --- a/include/osp/bsp/model/util/VectorSchedule.hpp +++ b/include/osp/bsp/model/util/VectorSchedule.hpp @@ -18,17 +18,16 @@ limitations under the License. #pragma once +#include + #include "osp/bsp/model/IBspSchedule.hpp" #include "osp/concepts/computational_dag_concept.hpp" -#include namespace osp { -template +template class VectorSchedule : public IBspSchedule { - - static_assert(is_computational_dag_v, - "BspSchedule can only be used with computational DAGs."); + static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); private: const BspInstance *instance; @@ -39,7 +38,6 @@ class VectorSchedule : public IBspSchedule { std::vector node_to_processor_assignment; std::vector node_to_superstep_assignment; - /** * @brief Default constructor for VectorSchedule. */ @@ -52,21 +50,20 @@ class VectorSchedule : public IBspSchedule { VectorSchedule(const IBspSchedule &schedule) : instance(&schedule.getInstance()), number_of_supersteps(schedule.numberOfSupersteps()) { - - node_to_processor_assignment = - std::vector(schedule.getInstance().numberOfVertices(), instance->numberOfProcessors()); - node_to_superstep_assignment = - std::vector(schedule.getInstance().numberOfVertices(), schedule.numberOfSupersteps()); + node_to_processor_assignment + = std::vector(schedule.getInstance().numberOfVertices(), instance->numberOfProcessors()); + node_to_superstep_assignment + = std::vector(schedule.getInstance().numberOfVertices(), schedule.numberOfSupersteps()); for (vertex_idx_t i = 0; i < schedule.getInstance().numberOfVertices(); i++) { - node_to_processor_assignment[i] = schedule.assignedProcessor(i); node_to_superstep_assignment[i] = schedule.assignedSuperstep(i); } } VectorSchedule(const VectorSchedule &other) - : instance(other.instance), number_of_supersteps(other.number_of_supersteps), + : instance(other.instance), + number_of_supersteps(other.number_of_supersteps), node_to_processor_assignment(other.node_to_processor_assignment), node_to_superstep_assignment(other.node_to_superstep_assignment) {} @@ -74,8 +71,7 @@ class VectorSchedule : public IBspSchedule { if (this != &other) { instance = &other.getInstance(); number_of_supersteps = other.numberOfSupersteps(); - node_to_processor_assignment = - std::vector(instance->numberOfVertices(), instance->numberOfProcessors()); + node_to_processor_assignment = std::vector(instance->numberOfVertices(), instance->numberOfProcessors()); node_to_superstep_assignment = std::vector(instance->numberOfVertices(), number_of_supersteps); for (vertex_idx_t i = 0; i < instance->numberOfVertices(); i++) { @@ -97,7 +93,8 @@ class VectorSchedule : public IBspSchedule { } VectorSchedule(VectorSchedule &&other) noexcept - : instance(other.instance), number_of_supersteps(other.number_of_supersteps), + : instance(other.instance), + number_of_supersteps(other.number_of_supersteps), node_to_processor_assignment(std::move(other.node_to_processor_assignment)), node_to_superstep_assignment(std::move(other.node_to_superstep_assignment)) {} @@ -114,27 +111,22 @@ class VectorSchedule : public IBspSchedule { void setAssignedSuperstep(vertex_idx_t vertex, unsigned superstep) override { node_to_superstep_assignment[vertex] = superstep; }; + void setAssignedProcessor(vertex_idx_t vertex, unsigned processor) override { node_to_processor_assignment[vertex] = processor; }; unsigned numberOfSupersteps() const override { return number_of_supersteps; } - unsigned assignedSuperstep(vertex_idx_t vertex) const override { - return node_to_superstep_assignment[vertex]; - } - unsigned assignedProcessor(vertex_idx_t vertex) const override { - return node_to_processor_assignment[vertex]; - } + unsigned assignedSuperstep(vertex_idx_t vertex) const override { return node_to_superstep_assignment[vertex]; } - void mergeSupersteps(unsigned start_step, unsigned end_step) { + unsigned assignedProcessor(vertex_idx_t vertex) const override { return node_to_processor_assignment[vertex]; } + void mergeSupersteps(unsigned start_step, unsigned end_step) { number_of_supersteps = 0; for (const auto &vertex : getInstance().vertices()) { - if (node_to_superstep_assignment[vertex] > start_step && node_to_superstep_assignment[vertex] <= end_step) { - node_to_superstep_assignment[vertex] = start_step; } else if (node_to_superstep_assignment[vertex] > end_step) { node_to_superstep_assignment[vertex] -= end_step - start_step; @@ -147,11 +139,9 @@ class VectorSchedule : public IBspSchedule { } void insertSupersteps(const unsigned step_before, const unsigned num_new_steps) { - number_of_supersteps += num_new_steps; for (const auto &vertex : getInstance().vertices()) { - if (node_to_superstep_assignment[vertex] > step_before) { node_to_superstep_assignment[vertex] += num_new_steps; } @@ -159,4 +149,4 @@ class VectorSchedule : public IBspSchedule { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp index 2e23c22e..ec98b977 100644 --- a/include/osp/bsp/scheduler/CoarseAndSchedule.hpp +++ b/include/osp/bsp/scheduler/CoarseAndSchedule.hpp @@ -24,9 +24,8 @@ limitations under the License. namespace osp { -template +template class CoarseAndSchedule : public Scheduler { - private: Coarser &coarser; Scheduler &scheduler; @@ -35,18 +34,18 @@ class CoarseAndSchedule : public Scheduler { CoarseAndSchedule(Coarser &coarser_, Scheduler &scheduler_) : coarser(coarser_), scheduler(scheduler_) {} - std::string getScheduleName() const override { return "Coarse(" + coarser.getCoarserName() + ")AndSchedule(" + scheduler.getScheduleName() + ")"; } + std::string getScheduleName() const override { + return "Coarse(" + coarser.getCoarserName() + ")AndSchedule(" + scheduler.getScheduleName() + ")"; + } RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); BspInstance instance_coarse; std::vector> reverse_vertex_map; - bool status = coarser.coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), - reverse_vertex_map); + bool status = coarser.coarsenDag(instance.getComputationalDag(), instance_coarse.getComputationalDag(), reverse_vertex_map); if (!status) { return RETURN_STATUS::ERROR; @@ -69,4 +68,4 @@ class CoarseAndSchedule : public Scheduler { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp b/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp index 949c933b..e722989b 100644 --- a/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp +++ b/include/osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp @@ -13,27 +13,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/coarser/StepByStep/StepByStepCoarser.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" +#include "osp/coarser/StepByStep/StepByStepCoarser.hpp" -namespace osp{ +namespace osp { -template +template class MultiLevelHillClimbingScheduler : public Scheduler { - using vertex_idx = vertex_idx_t; - using vertex_type_t_or_default = std::conditional_t, v_type_t, unsigned>; + using vertex_type_t_or_default + = std::conditional_t, v_type_t, unsigned>; using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; - private: - - typename StepByStepCoarser::COARSENING_STRATEGY coarsening_strategy = StepByStepCoarser::COARSENING_STRATEGY::EDGE_BY_EDGE; + private: + typename StepByStepCoarser::COARSENING_STRATEGY coarsening_strategy + = StepByStepCoarser::COARSENING_STRATEGY::EDGE_BY_EDGE; unsigned number_hc_steps; unsigned target_nr_of_nodes = 0; unsigned min_target_nr_of_nodes_ = 1U; @@ -47,57 +47,62 @@ class MultiLevelHillClimbingScheduler : public Scheduler { std::deque refinement_points; - BspSchedule Refine(const BspInstance& instance, const StepByStepCoarser& coarser, - const BspSchedule &coarse_schedule) const; + BspSchedule Refine(const BspInstance &instance, + const StepByStepCoarser &coarser, + const BspSchedule &coarse_schedule) const; - BspSchedule ComputeUncontractedSchedule(const StepByStepCoarser& coarser, - const BspInstance& full_instance, - const BspSchedule &coarse_schedule, vertex_idx index_until) const; + BspSchedule ComputeUncontractedSchedule(const StepByStepCoarser &coarser, + const BspInstance &full_instance, + const BspSchedule &coarse_schedule, + vertex_idx index_until) const; void setLinearRefinementPoints(vertex_idx OriginalNrOfNodes, unsigned stepSize); void setExponentialRefinementPoints(vertex_idx OriginalNrOfNodes, double stepRatio); void set_parameter(const size_t num_vertices) { - target_nr_of_nodes = std::max(min_target_nr_of_nodes_, static_cast(static_cast(num_vertices) * contraction_rate_)); + target_nr_of_nodes + = std::max(min_target_nr_of_nodes_, static_cast(static_cast(num_vertices) * contraction_rate_)); target_nr_of_nodes = std::min(target_nr_of_nodes, static_cast(num_vertices)); - if(use_linear_refinement_) { + if (use_linear_refinement_) { setLinearRefinementPoints(num_vertices, linear_refinement_step_size_); - } else if (use_exponential_refinement_) { + } else if (use_exponential_refinement_) { setExponentialRefinementPoints(num_vertices, exponential_refinement_step_ratio_); } } public: - virtual ~MultiLevelHillClimbingScheduler() = default; virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override; virtual std::string getScheduleName() const override { return "MultiLevelHillClimbing"; } - void setCoarseningStrategy(typename StepByStepCoarser::COARSENING_STRATEGY strategy_){ coarsening_strategy = strategy_;} - void setContractionRate(double rate_){ contraction_rate_ = rate_;} + void setCoarseningStrategy(typename StepByStepCoarser::COARSENING_STRATEGY strategy_) { + coarsening_strategy = strategy_; + } + + void setContractionRate(double rate_) { contraction_rate_ = rate_; } + void setNumberOfHcSteps(unsigned steps_) { number_hc_steps = steps_; } + void setMinTargetNrOfNodes(unsigned min_target_nr_of_nodes) { min_target_nr_of_nodes_ = min_target_nr_of_nodes; } - void useLinearRefinementSteps(unsigned steps) { + void useLinearRefinementSteps(unsigned steps) { use_linear_refinement_ = true; use_exponential_refinement_ = false; linear_refinement_step_size_ = steps; } - void useExponentialRefinementPoints(double ratio) { + void useExponentialRefinementPoints(double ratio) { use_exponential_refinement_ = true; use_linear_refinement_ = false; exponential_refinement_step_ratio_ = ratio; } - }; -template +template RETURN_STATUS MultiLevelHillClimbingScheduler::computeSchedule(BspSchedule &schedule) { - StepByStepCoarser coarser; Graph_t coarseDAG; std::vector new_vertex_id; @@ -116,10 +121,12 @@ RETURN_STATUS MultiLevelHillClimbingScheduler::computeSchedule(BspSched HillClimbingScheduler coarse_hc; coarse_hc.improveSchedule(coarse_schedule); - if(refinement_points.empty()) + if (refinement_points.empty()) { setExponentialRefinementPoints(num_verices, 1.1); - while(!refinement_points.empty() && refinement_points.front() <= coarseDAG.num_vertices()) + } + while (!refinement_points.empty() && refinement_points.front() <= coarseDAG.num_vertices()) { refinement_points.pop_front(); + } schedule = Refine(schedule.getInstance(), coarser, coarse_schedule); @@ -127,13 +134,14 @@ RETURN_STATUS MultiLevelHillClimbingScheduler::computeSchedule(BspSched } // run refinement: uncoarsify the DAG in small batches, and apply some steps of hill climbing after each iteration -template -BspSchedule MultiLevelHillClimbingScheduler::Refine(const BspInstance& full_instance, const StepByStepCoarser& coarser, const BspSchedule &coarse_schedule) const { - - BspSchedule schedule_on_full_graph = ComputeUncontractedSchedule(coarser, full_instance, coarse_schedule, coarser.getContractionHistory().size()); - - for (vertex_idx next_size : refinement_points) - { +template +BspSchedule MultiLevelHillClimbingScheduler::Refine(const BspInstance &full_instance, + const StepByStepCoarser &coarser, + const BspSchedule &coarse_schedule) const { + BspSchedule schedule_on_full_graph + = ComputeUncontractedSchedule(coarser, full_instance, coarse_schedule, coarser.getContractionHistory().size()); + + for (vertex_idx next_size : refinement_points) { const vertex_idx contract_steps = coarser.getOriginalDag().num_vertices() - next_size; std::vector new_ids = coarser.GetIntermediateIDs(contract_steps); Graph_t dag = coarser.Contract(new_ids); @@ -158,52 +166,55 @@ BspSchedule MultiLevelHillClimbingScheduler::Refine(const BspI } // given an original DAG G, a schedule on the coarsified G and the contraction steps, project the coarse schedule to the entire G -template -BspSchedule MultiLevelHillClimbingScheduler::ComputeUncontractedSchedule(const StepByStepCoarser& coarser, - const BspInstance& full_instance, - const BspSchedule &coarse_schedule, vertex_idx index_until) const { - +template +BspSchedule MultiLevelHillClimbingScheduler::ComputeUncontractedSchedule( + const StepByStepCoarser &coarser, + const BspInstance &full_instance, + const BspSchedule &coarse_schedule, + vertex_idx index_until) const { std::vector new_ids = coarser.GetIntermediateIDs(index_until); BspSchedule schedule(full_instance); - for (vertex_idx node = 0; node < full_instance.numberOfVertices(); ++node) - { + for (vertex_idx node = 0; node < full_instance.numberOfVertices(); ++node) { schedule.setAssignedProcessor(node, coarse_schedule.assignedProcessor(new_ids[node])); schedule.setAssignedSuperstep(node, coarse_schedule.assignedSuperstep(new_ids[node])); } return schedule; } -template -void MultiLevelHillClimbingScheduler::setLinearRefinementPoints(vertex_idx OriginalNrOfNodes, unsigned stepSize) -{ +template +void MultiLevelHillClimbingScheduler::setLinearRefinementPoints(vertex_idx OriginalNrOfNodes, unsigned stepSize) { refinement_points.clear(); - if(stepSize<5) + if (stepSize < 5) { stepSize = 5; + } - for (vertex_idx nextN = target_nr_of_nodes + stepSize; nextN < OriginalNrOfNodes; nextN += stepSize) + for (vertex_idx nextN = target_nr_of_nodes + stepSize; nextN < OriginalNrOfNodes; nextN += stepSize) { refinement_points.push_back(nextN); + } - if (!refinement_points.empty()) + if (!refinement_points.empty()) { refinement_points.pop_back(); + } refinement_points.push_back(OriginalNrOfNodes); } -template -void MultiLevelHillClimbingScheduler::setExponentialRefinementPoints(vertex_idx OriginalNrOfNodes, double stepRatio) -{ +template +void MultiLevelHillClimbingScheduler::setExponentialRefinementPoints(vertex_idx OriginalNrOfNodes, double stepRatio) { refinement_points.clear(); - if(stepRatio<1.01) + if (stepRatio < 1.01) { stepRatio = 1.01; + } - for (vertex_idx nextN = std::max(static_cast(std::round(target_nr_of_nodes * stepRatio)), target_nr_of_nodes+5); - nextN < OriginalNrOfNodes; - nextN = std::max(static_cast(std::round(static_cast(nextN) * stepRatio)), refinement_points.back()+5)) + for (vertex_idx nextN = std::max(static_cast(std::round(target_nr_of_nodes * stepRatio)), target_nr_of_nodes + 5); + nextN < OriginalNrOfNodes; + nextN + = std::max(static_cast(std::round(static_cast(nextN) * stepRatio)), refinement_points.back() + 5)) { refinement_points.push_back(nextN); + } refinement_points.push_back(OriginalNrOfNodes); } - -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp b/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp index 6446800e..08a52aa0 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp @@ -1,635 +1,632 @@ -/* -Copyright 2024 Huawei Technologies Co., Ltd. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. - -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner -*/ - -#pragma once - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "MemoryConstraintModules.hpp" -#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp" -#include "osp/auxiliary/misc.hpp" -#include "osp/bsp/scheduler/Scheduler.hpp" -#include "osp/graph_algorithms/directed_graph_top_sort.hpp" - -namespace osp { - -/** - * @brief The GreedyBspLocking class represents a scheduler that uses a greedy algorithm to compute schedules for - * BspInstance. - * - * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods. - * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. - * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. - */ - -template -class BspLocking : public Scheduler { - - static_assert(is_computational_dag_v, "BspLocking can only be used with computational DAGs."); - - private: - using VertexType = vertex_idx_t; - - constexpr static bool use_memory_constraint = - is_memory_constraint_v or is_memory_constraint_schedule_v; - - static_assert(not use_memory_constraint or std::is_same_v, - "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); - - MemoryConstraint_t memory_constraint; - - using Priority = std::tuple; - - struct PriorityCompare { - bool operator()(const Priority &a, const Priority &b) const { - if (std::get<0>(a) != std::get<0>(b)) { - return std::get<0>(a) > std::get<0>(b); // Higher score is better - } - if (std::get<1>(a) != std::get<1>(b)) { - return std::get<1>(a) > std::get<1>(b); // Higher secondary_score is better - } - return std::get<2>(a) < std::get<2>(b); // Smaller node index is better for tie-breaking - } - }; - - using MaxHeap = PairingHeap; - - std::vector max_proc_score_heap; - std::vector max_all_proc_score_heap; - - static std::vector> get_longest_path(const Graph_t &graph) { - - std::vector> longest_path(graph.num_vertices(), 0); - - const std::vector top_order = GetTopOrder(graph); - - for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { - longest_path[*r_iter] = graph.vertex_work_weight(*r_iter); - if (graph.out_degree(*r_iter) > 0) { - v_workw_t max = 0; - for (const auto &child : graph.children(*r_iter)) { - if (max <= longest_path[child]) - max = longest_path[child]; - } - longest_path[*r_iter] += max; - } - } - - return longest_path; - } - - std::deque locked_set; - std::vector locked; - int lock_penalty = 1; - std::vector ready_phase; - - std::vector default_value; - - double max_percent_idle_processors; - bool increase_parallelism_in_new_superstep; - - int computeScore(VertexType node, unsigned proc, const BspInstance &instance) { - - int score = 0; - for (const auto &succ : instance.getComputationalDag().children(node)) { - if (locked[succ] < instance.numberOfProcessors() && locked[succ] != proc) - score -= lock_penalty; - } - - return score + default_value[node]; - }; - - bool check_mem_feasibility(const BspInstance &instance, const std::set &allReady, - const std::vector> &procReady) const { - - if constexpr (use_memory_constraint) { - - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (!procReady[i].empty()) { - - VertexType top_node = max_proc_score_heap[i].top(); - - if (memory_constraint.can_add(top_node, i)) { - return true; - } - } - } - - if (!allReady.empty()) - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - - VertexType top_node = max_all_proc_score_heap[i].top(); - - if (memory_constraint.can_add(top_node, i)) { - return true; - } - } - - return false; - } - } - - return true; - } - - bool Choose(const BspInstance &instance, std::set &allReady, - std::vector> &procReady, const std::vector &procFree, VertexType &node, - unsigned &p, const bool endSupStep, const v_workw_t remaining_time) { - - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - - if (procFree[proc] && !procReady[proc].empty()) { - - // select node - VertexType top_node = max_proc_score_heap[proc].top(); - - // filling up - bool procready_empty = false; - while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) { - procReady[proc].erase(top_node); - ready_phase[top_node] = std::numeric_limits::max(); - max_proc_score_heap[proc].pop(); - if (!procReady[proc].empty()) { - top_node = max_proc_score_heap[proc].top(); - } else { - procready_empty = true; - break; - } - } - if (procready_empty) { - continue; - } - - node = top_node; - p = proc; - } - } - - if (p < instance.numberOfProcessors()) - return true; - - Priority best_priority = {std::numeric_limits::min(), 0, 0}; - bool found_node = false; - - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (!procFree[proc] or max_all_proc_score_heap[proc].is_empty()) - continue; - - VertexType top_node = max_all_proc_score_heap[proc].top(); - - // filling up - bool all_procready_empty = false; - while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) { - allReady.erase(top_node); - for (unsigned proc_del = 0; proc_del < instance.numberOfProcessors(); proc_del++) { - if (proc_del == proc || !instance.isCompatible(top_node, proc_del)) - continue; - max_all_proc_score_heap[proc_del].erase(top_node); - } - max_all_proc_score_heap[proc].pop(); - ready_phase[top_node] = std::numeric_limits::max(); - if (!max_all_proc_score_heap[proc].is_empty()) { - top_node = max_all_proc_score_heap[proc].top(); - } else { - all_procready_empty = true; - break; - } - } - if (all_procready_empty) - continue; - - Priority top_priority = max_all_proc_score_heap[proc].get_value(top_node); - if (!found_node || PriorityCompare{}(top_priority, best_priority)) { - - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(top_node, proc)) { - best_priority = top_priority; - node = top_node; - p = proc; - found_node = true; - } - - } else { - - best_priority = top_priority; - node = top_node; - p = proc; - found_node = true; - } - } - } - return (found_node && std::get<0>(best_priority) > -3); - } - - bool CanChooseNode(const BspInstance &instance, const std::vector> &procReady, - const std::vector &procFree) const { - - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (procFree[i] && !procReady[i].empty()) - return true; - - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (procFree[i] && !max_all_proc_score_heap[i].is_empty()) - return true; - - return false; - } - - unsigned get_nr_parallelizable_nodes(const BspInstance &instance, - const std::vector &nr_ready_nodes_per_type, - const std::vector &nr_procs_per_type) const { - unsigned nr_nodes = 0; - - std::vector ready_nodes_per_type = nr_ready_nodes_per_type; - std::vector procs_per_type = nr_procs_per_type; - for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) - for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) - if (instance.isCompatibleType(node_type, proc_type)) { - unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); - nr_nodes += matched; - ready_nodes_per_type[node_type] -= matched; - procs_per_type[proc_type] -= matched; - } - - return nr_nodes; - } - - public: - /** - * @brief Default constructor for GreedyBspLocking. - */ - BspLocking(float max_percent_idle_processors_ = 0.4f, bool increase_parallelism_in_new_superstep_ = true) - : max_percent_idle_processors(max_percent_idle_processors_), - increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {} - - /** - * @brief Default destructor for GreedyBspLocking. - */ - virtual ~BspLocking() = default; - - /** - * @brief Compute a schedule for the given BspInstance. - * - * This method computes a schedule for the given BspInstance using a greedy algorithm. - * - * @param instance The BspInstance object representing the instance to compute the schedule for. - * @return A pair containing the return status and the computed BspSchedule. - */ - virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - - const auto &instance = schedule.getInstance(); - - for (const auto &v : instance.getComputationalDag().vertices()) { - schedule.setAssignedProcessor(v, std::numeric_limits::max()); - } - - unsigned supstepIdx = 0; - - if constexpr (is_memory_constraint_v) { - memory_constraint.initialize(instance); - } else if constexpr (is_memory_constraint_schedule_v) { - memory_constraint.initialize(schedule, supstepIdx); - } - - const auto &N = instance.numberOfVertices(); - const unsigned ¶ms_p = instance.numberOfProcessors(); - const auto &G = instance.getComputationalDag(); - - const std::vector> path_length = get_longest_path(G); - v_workw_t max_path = 1; - for (const auto &i : instance.vertices()) - if (path_length[i] > max_path) - max_path = path_length[i]; - - default_value.clear(); - default_value.resize(N, 0); - for (const auto &i : instance.vertices()) { - //assert(path_length[i] * 20 / max_path <= std::numeric_limits::max()); - default_value[i] = static_cast(path_length[i] * static_cast>(20) / max_path); - } - - max_proc_score_heap = std::vector(params_p); - max_all_proc_score_heap = std::vector(params_p); - - locked_set.clear(); - locked.clear(); - locked.resize(N, std::numeric_limits::max()); - - std::set ready; - ready_phase.clear(); - ready_phase.resize(N, std::numeric_limits::max()); - - std::vector> procReady(params_p); - std::set allReady; - - std::vector nrPredecDone(N, 0); - std::vector procFree(params_p, true); - unsigned free = params_p; - - std::vector nr_ready_nodes_per_type(G.num_vertex_types(), 0); - std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < params_p; ++proc) - ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; - - std::set, VertexType>> finishTimes; - finishTimes.emplace(0, std::numeric_limits::max()); - - for (const auto &v : source_vertices_view(G)) { - ready.insert(v); - allReady.insert(v); - ++nr_ready_nodes_per_type[G.vertex_type(v)]; - ready_phase[v] = params_p; - - for (unsigned proc = 0; proc < params_p; ++proc) { - if (instance.isCompatible(v, proc)) { - Priority priority = {default_value[v], static_cast(G.out_degree(v)), v}; - max_all_proc_score_heap[proc].push(v, priority); - } - } - } - - bool endSupStep = false; - - while (!ready.empty() || !finishTimes.empty()) { - - if (finishTimes.empty() && endSupStep) { - for (unsigned proc = 0; proc < params_p; ++proc) { - procReady[proc].clear(); - max_proc_score_heap[proc].clear(); - - if constexpr (use_memory_constraint) { - memory_constraint.reset(proc); - } - } - - allReady = ready; - - for (const auto &node : locked_set) - locked[node] = std::numeric_limits::max(); - locked_set.clear(); - - for (unsigned proc = 0; proc < params_p; ++proc) { - max_all_proc_score_heap[proc].clear(); - } - - for (const auto &v : ready) { - ready_phase[v] = params_p; - for (unsigned proc = 0; proc < params_p; ++proc) { - - if (!instance.isCompatible(v, proc)) - continue; - - int score = computeScore(v, proc, instance); - Priority priority = {score, static_cast(G.out_degree(v)), v}; - max_all_proc_score_heap[proc].push(v, priority); - } - } - - ++supstepIdx; - - endSupStep = false; - - finishTimes.emplace(0, std::numeric_limits::max()); - } - - const v_workw_t time = finishTimes.begin()->first; - const v_workw_t max_finish_time = finishTimes.rbegin()->first; - - // Find new ready jobs - while (!finishTimes.empty() && finishTimes.begin()->first == time) { - - const VertexType node = finishTimes.begin()->second; - finishTimes.erase(finishTimes.begin()); - - if (node != std::numeric_limits::max()) { - for (const auto &succ : G.children(node)) { - - ++nrPredecDone[succ]; - if (nrPredecDone[succ] == G.in_degree(succ)) { - ready.insert(succ); - ++nr_ready_nodes_per_type[G.vertex_type(succ)]; - - bool canAdd = true; - for (const auto &pred : G.parents(succ)) { - - if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) && - schedule.assignedSuperstep(pred) == supstepIdx) { - canAdd = false; - break; - } - } - - if constexpr (use_memory_constraint) { - - if (canAdd) { - if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) - canAdd = false; - } - } - - if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) - canAdd = false; - - if (canAdd) { - procReady[schedule.assignedProcessor(node)].insert(succ); - ready_phase[succ] = schedule.assignedProcessor(node); - - int score = computeScore(succ, schedule.assignedProcessor(node), instance); - Priority priority = {score, static_cast(G.out_degree(succ)), succ}; - - max_proc_score_heap[schedule.assignedProcessor(node)].push(succ, priority); - } - } - } - procFree[schedule.assignedProcessor(node)] = true; - ++free; - } - } - - // Assign new jobs to processors - if (!CanChooseNode(instance, procReady, procFree)) { - endSupStep = true; - } - - while (CanChooseNode(instance, procReady, procFree)) { - - VertexType nextNode = std::numeric_limits::max(); - unsigned nextProc = instance.numberOfProcessors(); - Choose(instance, allReady, procReady, procFree, nextNode, nextProc, endSupStep, max_finish_time - time); - - if (nextNode == std::numeric_limits::max() || nextProc == instance.numberOfProcessors()) { - endSupStep = true; - break; - } - - if (ready_phase[nextNode] < params_p) { - - procReady[nextProc].erase(nextNode); - - max_proc_score_heap[nextProc].erase(nextNode); - - } else { - - allReady.erase(nextNode); - - for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (instance.isCompatible(nextNode, proc) && max_all_proc_score_heap[proc].contains(nextNode)) { - max_all_proc_score_heap[proc].erase(nextNode); - } - } - } - - ready.erase(nextNode); - --nr_ready_nodes_per_type[G.vertex_type(nextNode)]; - schedule.setAssignedProcessor(nextNode, nextProc); - schedule.setAssignedSuperstep(nextNode, supstepIdx); - - ready_phase[nextNode] = std::numeric_limits::max(); - - if constexpr (use_memory_constraint) { - memory_constraint.add(nextNode, nextProc); - - std::vector toErase; - for (const auto &node : procReady[nextProc]) { - if (not memory_constraint.can_add(node, nextProc)) { - toErase.push_back(node); - } - } - - for (const auto &node : toErase) { - procReady[nextProc].erase(node); - max_proc_score_heap[nextProc].erase(node); - ready_phase[node] = std::numeric_limits::max(); - } - } - - finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode); - procFree[nextProc] = false; - --free; - - // update auxiliary structures - - for (const auto &succ : G.children(nextNode)) { - - if (locked[succ] < params_p && locked[succ] != nextProc) { - for (const auto &parent : G.parents(succ)) { - if (ready_phase[parent] < std::numeric_limits::max() && - ready_phase[parent] < params_p && ready_phase[parent] != locked[succ]) { - Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent); - std::get<0>(p) += lock_penalty; - max_proc_score_heap[ready_phase[parent]].update(parent, p); - } - if (ready_phase[parent] == params_p) { - for (unsigned proc = 0; proc < params_p; ++proc) { - if (proc == locked[succ] || !instance.isCompatible(parent, proc)) - continue; - - if (max_all_proc_score_heap[proc].contains(parent)) - { - Priority p = max_all_proc_score_heap[proc].get_value(parent); - std::get<0>(p) += lock_penalty; - max_all_proc_score_heap[proc].update(parent, p); - } - } - } - } - locked[succ] = params_p; - } else if (locked[succ] == std::numeric_limits::max()) { - locked_set.push_back(succ); - locked[succ] = nextProc; - - for (const auto &parent : G.parents(succ)) { - if (ready_phase[parent] < std::numeric_limits::max() && - ready_phase[parent] < params_p && ready_phase[parent] != nextProc) { - Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent); - std::get<0>(p) -= lock_penalty; - max_proc_score_heap[ready_phase[parent]].update(parent, p); - } - if (ready_phase[parent] == params_p) { - for (unsigned proc = 0; proc < params_p; ++proc) { - if (proc == nextProc || !instance.isCompatible(parent, proc)) - continue; - - if (max_all_proc_score_heap[proc].contains(parent)) - { - Priority p = max_all_proc_score_heap[proc].get_value(parent); - std::get<0>(p) -= lock_penalty; - max_all_proc_score_heap[proc].update(parent, p); - } - } - } - } - } - } - } - - if constexpr (use_memory_constraint) { - - if (not check_mem_feasibility(instance, allReady, procReady)) { - - return RETURN_STATUS::ERROR; - } - } - - if (free > params_p * max_percent_idle_processors && - ((!increase_parallelism_in_new_superstep) || - get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) >= - std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), - params_p - free + (static_cast(0.5 * free))))) { - endSupStep = true; - } - } - - - - assert(schedule.satisfiesPrecedenceConstraints()); - - return RETURN_STATUS::OSP_SUCCESS; - } - - /** - * @brief Get the name of the schedule. - * - * This method returns the name of the schedule, which is "BspGreedy" in this case. - * - * @return The name of the schedule. - */ - virtual std::string getScheduleName() const override { - - if (use_memory_constraint) { - return "BspGreedyLockingMemory"; - } else { - return "BspGreedyLocking"; - } - } - - void set_max_percent_idle_processors(float max_percent_idle_processors_) { - max_percent_idle_processors = max_percent_idle_processors_; - } -}; - -} // namespace osp \ No newline at end of file +/* +Copyright 2024 Huawei Technologies Co., Ltd. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +*/ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "MemoryConstraintModules.hpp" +#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp" +#include "osp/auxiliary/misc.hpp" +#include "osp/bsp/scheduler/Scheduler.hpp" +#include "osp/graph_algorithms/directed_graph_top_sort.hpp" + +namespace osp { + +/** + * @brief The GreedyBspLocking class represents a scheduler that uses a greedy algorithm to compute schedules for + * BspInstance. + * + * This class inherits from the Scheduler class and implements the computeSchedule() and getScheduleName() methods. + * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. + * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. + */ + +template +class BspLocking : public Scheduler { + static_assert(is_computational_dag_v, "BspLocking can only be used with computational DAGs."); + + private: + using VertexType = vertex_idx_t; + + constexpr static bool use_memory_constraint = is_memory_constraint_v + or is_memory_constraint_schedule_v; + + static_assert(not use_memory_constraint or std::is_same_v, + "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); + + MemoryConstraint_t memory_constraint; + + using Priority = std::tuple; + + struct PriorityCompare { + bool operator()(const Priority &a, const Priority &b) const { + if (std::get<0>(a) != std::get<0>(b)) { + return std::get<0>(a) > std::get<0>(b); // Higher score is better + } + if (std::get<1>(a) != std::get<1>(b)) { + return std::get<1>(a) > std::get<1>(b); // Higher secondary_score is better + } + return std::get<2>(a) < std::get<2>(b); // Smaller node index is better for tie-breaking + } + }; + + using MaxHeap = PairingHeap; + + std::vector max_proc_score_heap; + std::vector max_all_proc_score_heap; + + static std::vector> get_longest_path(const Graph_t &graph) { + std::vector> longest_path(graph.num_vertices(), 0); + + const std::vector top_order = GetTopOrder(graph); + + for (auto r_iter = top_order.rbegin(); r_iter != top_order.crend(); r_iter++) { + longest_path[*r_iter] = graph.vertex_work_weight(*r_iter); + if (graph.out_degree(*r_iter) > 0) { + v_workw_t max = 0; + for (const auto &child : graph.children(*r_iter)) { + if (max <= longest_path[child]) { + max = longest_path[child]; + } + } + longest_path[*r_iter] += max; + } + } + + return longest_path; + } + + std::deque locked_set; + std::vector locked; + int lock_penalty = 1; + std::vector ready_phase; + + std::vector default_value; + + double max_percent_idle_processors; + bool increase_parallelism_in_new_superstep; + + int computeScore(VertexType node, unsigned proc, const BspInstance &instance) { + int score = 0; + for (const auto &succ : instance.getComputationalDag().children(node)) { + if (locked[succ] < instance.numberOfProcessors() && locked[succ] != proc) { + score -= lock_penalty; + } + } + + return score + default_value[node]; + }; + + bool check_mem_feasibility(const BspInstance &instance, + const std::set &allReady, + const std::vector> &procReady) const { + if constexpr (use_memory_constraint) { + if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (!procReady[i].empty()) { + VertexType top_node = max_proc_score_heap[i].top(); + + if (memory_constraint.can_add(top_node, i)) { + return true; + } + } + } + + if (!allReady.empty()) { + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + VertexType top_node = max_all_proc_score_heap[i].top(); + + if (memory_constraint.can_add(top_node, i)) { + return true; + } + } + } + + return false; + } + } + + return true; + } + + bool Choose(const BspInstance &instance, + std::set &allReady, + std::vector> &procReady, + const std::vector &procFree, + VertexType &node, + unsigned &p, + const bool endSupStep, + const v_workw_t remaining_time) { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + if (procFree[proc] && !procReady[proc].empty()) { + // select node + VertexType top_node = max_proc_score_heap[proc].top(); + + // filling up + bool procready_empty = false; + while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) { + procReady[proc].erase(top_node); + ready_phase[top_node] = std::numeric_limits::max(); + max_proc_score_heap[proc].pop(); + if (!procReady[proc].empty()) { + top_node = max_proc_score_heap[proc].top(); + } else { + procready_empty = true; + break; + } + } + if (procready_empty) { + continue; + } + + node = top_node; + p = proc; + } + } + + if (p < instance.numberOfProcessors()) { + return true; + } + + Priority best_priority = {std::numeric_limits::min(), 0, 0}; + bool found_node = false; + + for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + if (!procFree[proc] or max_all_proc_score_heap[proc].is_empty()) { + continue; + } + + VertexType top_node = max_all_proc_score_heap[proc].top(); + + // filling up + bool all_procready_empty = false; + while (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(top_node))) { + allReady.erase(top_node); + for (unsigned proc_del = 0; proc_del < instance.numberOfProcessors(); proc_del++) { + if (proc_del == proc || !instance.isCompatible(top_node, proc_del)) { + continue; + } + max_all_proc_score_heap[proc_del].erase(top_node); + } + max_all_proc_score_heap[proc].pop(); + ready_phase[top_node] = std::numeric_limits::max(); + if (!max_all_proc_score_heap[proc].is_empty()) { + top_node = max_all_proc_score_heap[proc].top(); + } else { + all_procready_empty = true; + break; + } + } + if (all_procready_empty) { + continue; + } + + Priority top_priority = max_all_proc_score_heap[proc].get_value(top_node); + if (!found_node || PriorityCompare{}(top_priority, best_priority)) { + if constexpr (use_memory_constraint) { + if (memory_constraint.can_add(top_node, proc)) { + best_priority = top_priority; + node = top_node; + p = proc; + found_node = true; + } + + } else { + best_priority = top_priority; + node = top_node; + p = proc; + found_node = true; + } + } + } + return (found_node && std::get<0>(best_priority) > -3); + } + + bool CanChooseNode(const BspInstance &instance, + const std::vector> &procReady, + const std::vector &procFree) const { + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !procReady[i].empty()) { + return true; + } + } + + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !max_all_proc_score_heap[i].is_empty()) { + return true; + } + } + + return false; + } + + unsigned get_nr_parallelizable_nodes(const BspInstance &instance, + const std::vector &nr_ready_nodes_per_type, + const std::vector &nr_procs_per_type) const { + unsigned nr_nodes = 0; + + std::vector ready_nodes_per_type = nr_ready_nodes_per_type; + std::vector procs_per_type = nr_procs_per_type; + for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) { + for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { + if (instance.isCompatibleType(node_type, proc_type)) { + unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); + nr_nodes += matched; + ready_nodes_per_type[node_type] -= matched; + procs_per_type[proc_type] -= matched; + } + } + } + + return nr_nodes; + } + + public: + /** + * @brief Default constructor for GreedyBspLocking. + */ + BspLocking(float max_percent_idle_processors_ = 0.4f, bool increase_parallelism_in_new_superstep_ = true) + : max_percent_idle_processors(max_percent_idle_processors_), + increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {} + + /** + * @brief Default destructor for GreedyBspLocking. + */ + virtual ~BspLocking() = default; + + /** + * @brief Compute a schedule for the given BspInstance. + * + * This method computes a schedule for the given BspInstance using a greedy algorithm. + * + * @param instance The BspInstance object representing the instance to compute the schedule for. + * @return A pair containing the return status and the computed BspSchedule. + */ + virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { + const auto &instance = schedule.getInstance(); + + for (const auto &v : instance.getComputationalDag().vertices()) { + schedule.setAssignedProcessor(v, std::numeric_limits::max()); + } + + unsigned supstepIdx = 0; + + if constexpr (is_memory_constraint_v) { + memory_constraint.initialize(instance); + } else if constexpr (is_memory_constraint_schedule_v) { + memory_constraint.initialize(schedule, supstepIdx); + } + + const auto &N = instance.numberOfVertices(); + const unsigned ¶ms_p = instance.numberOfProcessors(); + const auto &G = instance.getComputationalDag(); + + const std::vector> path_length = get_longest_path(G); + v_workw_t max_path = 1; + for (const auto &i : instance.vertices()) { + if (path_length[i] > max_path) { + max_path = path_length[i]; + } + } + + default_value.clear(); + default_value.resize(N, 0); + for (const auto &i : instance.vertices()) { + // assert(path_length[i] * 20 / max_path <= std::numeric_limits::max()); + default_value[i] = static_cast(path_length[i] * static_cast>(20) / max_path); + } + + max_proc_score_heap = std::vector(params_p); + max_all_proc_score_heap = std::vector(params_p); + + locked_set.clear(); + locked.clear(); + locked.resize(N, std::numeric_limits::max()); + + std::set ready; + ready_phase.clear(); + ready_phase.resize(N, std::numeric_limits::max()); + + std::vector> procReady(params_p); + std::set allReady; + + std::vector nrPredecDone(N, 0); + std::vector procFree(params_p, true); + unsigned free = params_p; + + std::vector nr_ready_nodes_per_type(G.num_vertex_types(), 0); + std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); + for (unsigned proc = 0; proc < params_p; ++proc) { + ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; + } + + std::set, VertexType>> finishTimes; + finishTimes.emplace(0, std::numeric_limits::max()); + + for (const auto &v : source_vertices_view(G)) { + ready.insert(v); + allReady.insert(v); + ++nr_ready_nodes_per_type[G.vertex_type(v)]; + ready_phase[v] = params_p; + + for (unsigned proc = 0; proc < params_p; ++proc) { + if (instance.isCompatible(v, proc)) { + Priority priority = {default_value[v], static_cast(G.out_degree(v)), v}; + max_all_proc_score_heap[proc].push(v, priority); + } + } + } + + bool endSupStep = false; + + while (!ready.empty() || !finishTimes.empty()) { + if (finishTimes.empty() && endSupStep) { + for (unsigned proc = 0; proc < params_p; ++proc) { + procReady[proc].clear(); + max_proc_score_heap[proc].clear(); + + if constexpr (use_memory_constraint) { + memory_constraint.reset(proc); + } + } + + allReady = ready; + + for (const auto &node : locked_set) { + locked[node] = std::numeric_limits::max(); + } + locked_set.clear(); + + for (unsigned proc = 0; proc < params_p; ++proc) { + max_all_proc_score_heap[proc].clear(); + } + + for (const auto &v : ready) { + ready_phase[v] = params_p; + for (unsigned proc = 0; proc < params_p; ++proc) { + if (!instance.isCompatible(v, proc)) { + continue; + } + + int score = computeScore(v, proc, instance); + Priority priority = {score, static_cast(G.out_degree(v)), v}; + max_all_proc_score_heap[proc].push(v, priority); + } + } + + ++supstepIdx; + + endSupStep = false; + + finishTimes.emplace(0, std::numeric_limits::max()); + } + + const v_workw_t time = finishTimes.begin()->first; + const v_workw_t max_finish_time = finishTimes.rbegin()->first; + + // Find new ready jobs + while (!finishTimes.empty() && finishTimes.begin()->first == time) { + const VertexType node = finishTimes.begin()->second; + finishTimes.erase(finishTimes.begin()); + + if (node != std::numeric_limits::max()) { + for (const auto &succ : G.children(node)) { + ++nrPredecDone[succ]; + if (nrPredecDone[succ] == G.in_degree(succ)) { + ready.insert(succ); + ++nr_ready_nodes_per_type[G.vertex_type(succ)]; + + bool canAdd = true; + for (const auto &pred : G.parents(succ)) { + if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) + && schedule.assignedSuperstep(pred) == supstepIdx) { + canAdd = false; + break; + } + } + + if constexpr (use_memory_constraint) { + if (canAdd) { + if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) { + canAdd = false; + } + } + } + + if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) { + canAdd = false; + } + + if (canAdd) { + procReady[schedule.assignedProcessor(node)].insert(succ); + ready_phase[succ] = schedule.assignedProcessor(node); + + int score = computeScore(succ, schedule.assignedProcessor(node), instance); + Priority priority = {score, static_cast(G.out_degree(succ)), succ}; + + max_proc_score_heap[schedule.assignedProcessor(node)].push(succ, priority); + } + } + } + procFree[schedule.assignedProcessor(node)] = true; + ++free; + } + } + + // Assign new jobs to processors + if (!CanChooseNode(instance, procReady, procFree)) { + endSupStep = true; + } + + while (CanChooseNode(instance, procReady, procFree)) { + VertexType nextNode = std::numeric_limits::max(); + unsigned nextProc = instance.numberOfProcessors(); + Choose(instance, allReady, procReady, procFree, nextNode, nextProc, endSupStep, max_finish_time - time); + + if (nextNode == std::numeric_limits::max() || nextProc == instance.numberOfProcessors()) { + endSupStep = true; + break; + } + + if (ready_phase[nextNode] < params_p) { + procReady[nextProc].erase(nextNode); + + max_proc_score_heap[nextProc].erase(nextNode); + + } else { + allReady.erase(nextNode); + + for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + if (instance.isCompatible(nextNode, proc) && max_all_proc_score_heap[proc].contains(nextNode)) { + max_all_proc_score_heap[proc].erase(nextNode); + } + } + } + + ready.erase(nextNode); + --nr_ready_nodes_per_type[G.vertex_type(nextNode)]; + schedule.setAssignedProcessor(nextNode, nextProc); + schedule.setAssignedSuperstep(nextNode, supstepIdx); + + ready_phase[nextNode] = std::numeric_limits::max(); + + if constexpr (use_memory_constraint) { + memory_constraint.add(nextNode, nextProc); + + std::vector toErase; + for (const auto &node : procReady[nextProc]) { + if (not memory_constraint.can_add(node, nextProc)) { + toErase.push_back(node); + } + } + + for (const auto &node : toErase) { + procReady[nextProc].erase(node); + max_proc_score_heap[nextProc].erase(node); + ready_phase[node] = std::numeric_limits::max(); + } + } + + finishTimes.emplace(time + G.vertex_work_weight(nextNode), nextNode); + procFree[nextProc] = false; + --free; + + // update auxiliary structures + + for (const auto &succ : G.children(nextNode)) { + if (locked[succ] < params_p && locked[succ] != nextProc) { + for (const auto &parent : G.parents(succ)) { + if (ready_phase[parent] < std::numeric_limits::max() && ready_phase[parent] < params_p + && ready_phase[parent] != locked[succ]) { + Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent); + std::get<0>(p) += lock_penalty; + max_proc_score_heap[ready_phase[parent]].update(parent, p); + } + if (ready_phase[parent] == params_p) { + for (unsigned proc = 0; proc < params_p; ++proc) { + if (proc == locked[succ] || !instance.isCompatible(parent, proc)) { + continue; + } + + if (max_all_proc_score_heap[proc].contains(parent)) { + Priority p = max_all_proc_score_heap[proc].get_value(parent); + std::get<0>(p) += lock_penalty; + max_all_proc_score_heap[proc].update(parent, p); + } + } + } + } + locked[succ] = params_p; + } else if (locked[succ] == std::numeric_limits::max()) { + locked_set.push_back(succ); + locked[succ] = nextProc; + + for (const auto &parent : G.parents(succ)) { + if (ready_phase[parent] < std::numeric_limits::max() && ready_phase[parent] < params_p + && ready_phase[parent] != nextProc) { + Priority p = max_proc_score_heap[ready_phase[parent]].get_value(parent); + std::get<0>(p) -= lock_penalty; + max_proc_score_heap[ready_phase[parent]].update(parent, p); + } + if (ready_phase[parent] == params_p) { + for (unsigned proc = 0; proc < params_p; ++proc) { + if (proc == nextProc || !instance.isCompatible(parent, proc)) { + continue; + } + + if (max_all_proc_score_heap[proc].contains(parent)) { + Priority p = max_all_proc_score_heap[proc].get_value(parent); + std::get<0>(p) -= lock_penalty; + max_all_proc_score_heap[proc].update(parent, p); + } + } + } + } + } + } + } + + if constexpr (use_memory_constraint) { + if (not check_mem_feasibility(instance, allReady, procReady)) { + return RETURN_STATUS::ERROR; + } + } + + if (free > params_p * max_percent_idle_processors + && ((!increase_parallelism_in_new_superstep) + || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) + >= std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), + params_p - free + (static_cast(0.5 * free))))) { + endSupStep = true; + } + } + + assert(schedule.satisfiesPrecedenceConstraints()); + + return RETURN_STATUS::OSP_SUCCESS; + } + + /** + * @brief Get the name of the schedule. + * + * This method returns the name of the schedule, which is "BspGreedy" in this case. + * + * @return The name of the schedule. + */ + virtual std::string getScheduleName() const override { + if (use_memory_constraint) { + return "BspGreedyLockingMemory"; + } else { + return "BspGreedyLocking"; + } + } + + void set_max_percent_idle_processors(float max_percent_idle_processors_) { + max_percent_idle_processors = max_percent_idle_processors_; + } +}; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp b/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp index 363f5c85..7294e974 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp @@ -25,11 +25,11 @@ limitations under the License. namespace osp { -template +template class GreedyBspToMaxBspConverter { - static_assert(is_computational_dag_v, "GreedyBspToMaxBspConverter can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t >, "GreedyBspToMaxBspConverter requires work and comm. weights to have the same type."); + static_assert(std::is_same_v, v_commw_t>, + "GreedyBspToMaxBspConverter requires work and comm. weights to have the same type."); protected: using vertex_idx = vertex_idx_t; @@ -39,73 +39,68 @@ class GreedyBspToMaxBspConverter { double latency_coefficient = 1.25; double decay_factor = 0.5; - std::vector>>> createSuperstepLists(const BspScheduleCS& schedule, std::vector& priorities) const; + std::vector>>> createSuperstepLists(const BspScheduleCS &schedule, + std::vector &priorities) const; public: - - MaxBspSchedule Convert(const BspSchedule& schedule) const; - MaxBspScheduleCS Convert(const BspScheduleCS& schedule) const; - + MaxBspSchedule Convert(const BspSchedule &schedule) const; + MaxBspScheduleCS Convert(const BspScheduleCS &schedule) const; }; -template -MaxBspSchedule GreedyBspToMaxBspConverter::Convert(const BspSchedule& schedule) const -{ +template +MaxBspSchedule GreedyBspToMaxBspConverter::Convert(const BspSchedule &schedule) const { BspScheduleCS schedule_cs(schedule); return Convert(schedule_cs); } -template -MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const BspScheduleCS& schedule) const -{ - const Graph_t& dag = schedule.getInstance().getComputationalDag(); +template +MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const BspScheduleCS &schedule) const { + const Graph_t &dag = schedule.getInstance().getComputationalDag(); // Initialize data structures std::vector priorities; std::vector>> proc_list = createSuperstepLists(schedule, priorities); std::vector> work_remaining_proc_superstep(schedule.getInstance().numberOfProcessors(), - std::vector(schedule.numberOfSupersteps(), 0)); + std::vector(schedule.numberOfSupersteps(), 0)); std::vector nodes_remaining_superstep(schedule.numberOfSupersteps(), 0); MaxBspScheduleCS schedule_max(schedule.getInstance()); - for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++) - { - work_remaining_proc_superstep[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)] += dag.vertex_work_weight(node); + for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++) { + work_remaining_proc_superstep[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)] + += dag.vertex_work_weight(node); ++nodes_remaining_superstep[schedule.assignedSuperstep(node)]; schedule_max.setAssignedProcessor(node, schedule.assignedProcessor(node)); } - std::vector> send_comm_remaining_proc_superstep(schedule.getInstance().numberOfProcessors(), - std::vector(schedule.numberOfSupersteps(), 0)); - std::vector> rec_comm_remaining_proc_superstep(schedule.getInstance().numberOfProcessors(), - std::vector(schedule.numberOfSupersteps(), 0)); + std::vector> send_comm_remaining_proc_superstep( + schedule.getInstance().numberOfProcessors(), std::vector(schedule.numberOfSupersteps(), 0)); + std::vector> rec_comm_remaining_proc_superstep( + schedule.getInstance().numberOfProcessors(), std::vector(schedule.numberOfSupersteps(), 0)); std::vector>> free_comm_steps_for_superstep(schedule.numberOfSupersteps()); - std::vector>> dependent_comm_steps_for_node(schedule.getInstance().numberOfVertices()); - for (auto const &[key, val] : schedule.getCommunicationSchedule()) - { - if(schedule.assignedSuperstep(std::get<0>(key)) == val) - { + std::vector>> dependent_comm_steps_for_node( + schedule.getInstance().numberOfVertices()); + for (auto const &[key, val] : schedule.getCommunicationSchedule()) { + if (schedule.assignedSuperstep(std::get<0>(key)) == val) { dependent_comm_steps_for_node[std::get<0>(key)].emplace_back(key, val); - cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(key)) * - schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key)); + cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(key)) + * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key)); send_comm_remaining_proc_superstep[std::get<1>(key)][val] += comm_cost; rec_comm_remaining_proc_superstep[std::get<2>(key)][val] += comm_cost; + } else { + free_comm_steps_for_superstep[val].emplace(key, val); } - else - free_comm_steps_for_superstep[val].emplace(key,val); } // Iterate through supersteps unsigned current_step = 0; - for(unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) - { + for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { std::vector work_done_on_proc(schedule.getInstance().numberOfProcessors(), 0); cost_type max_work_done = 0; std::vector> newly_freed_comm_steps; std::vector send_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0), - rec_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0); + rec_sum_of_newly_free_on_proc(schedule.getInstance().numberOfProcessors(), 0); std::vector> comm_in_current_step; @@ -113,23 +108,22 @@ MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const Bsp rec_on_proc(schedule.getInstance().numberOfProcessors(), 0); bool empty_superstep = (nodes_remaining_superstep[step] == 0); - while(nodes_remaining_superstep[step] > 0) - { + while (nodes_remaining_superstep[step] > 0) { // I. Select the next node (from any proc) with highest priority unsigned chosen_proc = schedule.getInstance().numberOfProcessors(); double best_prio = std::numeric_limits::max(); - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { - if(!proc_list[proc][step].empty() && (chosen_proc == schedule.getInstance().numberOfProcessors() || - priorities[proc_list[proc][step].front()] < best_prio)) - { + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { + if (!proc_list[proc][step].empty() + && (chosen_proc == schedule.getInstance().numberOfProcessors() + || priorities[proc_list[proc][step].front()] < best_prio)) { chosen_proc = proc; best_prio = priorities[proc_list[proc][step].front()]; } } - if(chosen_proc == schedule.getInstance().numberOfProcessors()) + if (chosen_proc == schedule.getInstance().numberOfProcessors()) { break; + } vertex_idx chosen_node = proc_list[chosen_proc][step].front(); proc_list[chosen_proc][step].pop_front(); @@ -138,34 +132,33 @@ MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const Bsp max_work_done = std::max(max_work_done, work_done_on_proc[chosen_proc]); schedule_max.setAssignedSuperstep(chosen_node, current_step); --nodes_remaining_superstep[step]; - for(const std::pair& entry : dependent_comm_steps_for_node[chosen_node]) - { + for (const std::pair &entry : dependent_comm_steps_for_node[chosen_node]) { newly_freed_comm_steps.push_back(entry); - cost_type comm_cost = dag.vertex_comm_weight(chosen_node) * - schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)); + cost_type comm_cost + = dag.vertex_comm_weight(chosen_node) + * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)); send_sum_of_newly_free_on_proc[std::get<1>(entry.first)] += comm_cost; rec_sum_of_newly_free_on_proc[std::get<2>(entry.first)] += comm_cost; } // II. Add nodes on all other processors if this doesn't increase work cost - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { - if(proc == chosen_proc) + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { + if (proc == chosen_proc) { continue; - while(!proc_list[proc][step].empty() && work_done_on_proc[proc] + - dag.vertex_work_weight(proc_list[proc][step].front()) <= max_work_done) - { + } + while (!proc_list[proc][step].empty() + && work_done_on_proc[proc] + dag.vertex_work_weight(proc_list[proc][step].front()) <= max_work_done) { vertex_idx node = proc_list[proc][step].front(); proc_list[proc][step].pop_front(); work_done_on_proc[proc] += dag.vertex_work_weight(node); work_remaining_proc_superstep[proc][step] -= dag.vertex_work_weight(node); schedule_max.setAssignedSuperstep(node, current_step); --nodes_remaining_superstep[step]; - for(const std::pair& entry : dependent_comm_steps_for_node[node]) - { + for (const std::pair &entry : dependent_comm_steps_for_node[node]) { newly_freed_comm_steps.push_back(entry); - cost_type comm_cost = dag.vertex_comm_weight(node) * - schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)); + cost_type comm_cost = dag.vertex_comm_weight(node) + * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), + std::get<2>(entry.first)); send_sum_of_newly_free_on_proc[std::get<1>(entry.first)] += comm_cost; rec_sum_of_newly_free_on_proc[std::get<2>(entry.first)] += comm_cost; } @@ -173,47 +166,49 @@ MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const Bsp } // III. Add communication steps that are already available - for(auto itr = free_comm_steps_for_superstep[step].begin(); itr != free_comm_steps_for_superstep[step].end();) - { - if(send_on_proc[std::get<1>(itr->first)] < max_work_done && - rec_on_proc[std::get<2>(itr->first)] < max_work_done) - { - cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(itr->first)) * - schedule.getInstance().getArchitecture().sendCosts(std::get<1>(itr->first), std::get<2>(itr->first)) * - schedule.getInstance().getArchitecture().communicationCosts(); + for (auto itr = free_comm_steps_for_superstep[step].begin(); itr != free_comm_steps_for_superstep[step].end();) { + if (send_on_proc[std::get<1>(itr->first)] < max_work_done && rec_on_proc[std::get<2>(itr->first)] < max_work_done) { + cost_type comm_cost + = dag.vertex_comm_weight(std::get<0>(itr->first)) + * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(itr->first), std::get<2>(itr->first)) + * schedule.getInstance().getArchitecture().communicationCosts(); send_on_proc[std::get<1>(itr->first)] += comm_cost; rec_on_proc[std::get<2>(itr->first)] += comm_cost; - if(current_step - 1 >= schedule_max.numberOfSupersteps()) + if (current_step - 1 >= schedule_max.numberOfSupersteps()) { schedule_max.setNumberOfSupersteps(current_step); + } schedule_max.addCommunicationScheduleEntry(itr->first, current_step - 1); comm_in_current_step.emplace_back(*itr); free_comm_steps_for_superstep[step].erase(itr++); - } - else + } else { ++itr; + } } // IV. Decide whether to split superstep here - if(!free_comm_steps_for_superstep[step].empty() || nodes_remaining_superstep[step] == 0) + if (!free_comm_steps_for_superstep[step].empty() || nodes_remaining_superstep[step] == 0) { continue; + } cost_type max_work_remaining = 0, max_comm_remaining = 0, comm_after_reduction = 0; - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { max_work_remaining = std::max(max_work_remaining, work_remaining_proc_superstep[proc][step]); max_comm_remaining = std::max(max_comm_remaining, send_comm_remaining_proc_superstep[proc][step]); max_comm_remaining = std::max(max_comm_remaining, rec_comm_remaining_proc_superstep[proc][step]); - comm_after_reduction = std::max(comm_after_reduction, send_comm_remaining_proc_superstep[proc][step] - send_sum_of_newly_free_on_proc[proc]); - comm_after_reduction = std::max(comm_after_reduction, rec_comm_remaining_proc_superstep[proc][step] - rec_sum_of_newly_free_on_proc[proc]); + comm_after_reduction = std::max( + comm_after_reduction, send_comm_remaining_proc_superstep[proc][step] - send_sum_of_newly_free_on_proc[proc]); + comm_after_reduction = std::max( + comm_after_reduction, rec_comm_remaining_proc_superstep[proc][step] - rec_sum_of_newly_free_on_proc[proc]); } - cost_type comm_reduction = (max_comm_remaining - comm_after_reduction) * schedule.getInstance().getArchitecture().communicationCosts(); + cost_type comm_reduction + = (max_comm_remaining - comm_after_reduction) * schedule.getInstance().getArchitecture().communicationCosts(); cost_type gain = std::min(comm_reduction, max_work_remaining); - if(gain > 0 && static_cast(gain) >= static_cast(schedule.getInstance().getArchitecture().synchronisationCosts()) * latency_coefficient) - { + if (gain > 0 + && static_cast(gain) >= static_cast(schedule.getInstance().getArchitecture().synchronisationCosts()) + * latency_coefficient) { // Split superstep - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { work_done_on_proc[proc] = 0; send_on_proc[proc] = 0; rec_on_proc[proc] = 0; @@ -221,12 +216,12 @@ MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const Bsp rec_sum_of_newly_free_on_proc[proc] = 0; } max_work_done = 0; - for(const std::pair& entry : newly_freed_comm_steps) - { + for (const std::pair &entry : newly_freed_comm_steps) { free_comm_steps_for_superstep[step].insert(entry); - cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first)) * - schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)); + cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first)) + * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), + std::get<2>(entry.first)); send_comm_remaining_proc_superstep[std::get<1>(entry.first)][step] -= comm_cost; rec_comm_remaining_proc_superstep[std::get<2>(entry.first)][step] -= comm_cost; } @@ -236,19 +231,21 @@ MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const Bsp } } - if(!empty_superstep) + if (!empty_superstep) { ++current_step; + } - for(const std::pair& entry : newly_freed_comm_steps) + for (const std::pair &entry : newly_freed_comm_steps) { free_comm_steps_for_superstep[step].insert(entry); + } - if(free_comm_steps_for_superstep[step].empty()) + if (free_comm_steps_for_superstep[step].empty()) { continue; + } // Handle the remaining communication steps: creating a new superstep afterwards with no work cost_type max_comm_current = 0; - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { max_comm_current = std::max(max_comm_current, send_on_proc[proc]); max_comm_current = std::max(max_comm_current, rec_on_proc[proc]); } @@ -258,114 +255,118 @@ MaxBspScheduleCS GreedyBspToMaxBspConverter::Convert(const Bsp rec_on_proc.resize(schedule.getInstance().numberOfProcessors(), 0); std::set> late_arriving_nodes; - for(const std::pair& entry : free_comm_steps_for_superstep[step]) - { + for (const std::pair &entry : free_comm_steps_for_superstep[step]) { schedule_max.addCommunicationScheduleEntry(entry.first, current_step - 1); - cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first)) * - schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) * - schedule.getInstance().getArchitecture().communicationCosts(); + cost_type comm_cost + = dag.vertex_comm_weight(std::get<0>(entry.first)) + * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) + * schedule.getInstance().getArchitecture().communicationCosts(); send_on_proc[std::get<1>(entry.first)] += comm_cost; rec_on_proc[std::get<2>(entry.first)] += comm_cost; late_arriving_nodes.emplace(std::get<0>(entry.first), std::get<2>(entry.first)); } - // Edge case - check if it is worth moving all communications from the current superstep to the next one instead (thus saving a sync cost) - // (for this we need to compute the h-relation-max in the current superstep, the next superstep, and also their union) + // Edge case - check if it is worth moving all communications from the current superstep to the next one instead (thus + // saving a sync cost) (for this we need to compute the h-relation-max in the current superstep, the next superstep, and + // also their union) cost_type max_comm_after = 0; - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { max_comm_after = std::max(max_comm_after, send_on_proc[proc]); max_comm_after = std::max(max_comm_after, rec_on_proc[proc]); } - for(const std::pair& entry : comm_in_current_step) - { - cost_type comm_cost = dag.vertex_comm_weight(std::get<0>(entry.first)) * - schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) * - schedule.getInstance().getArchitecture().communicationCosts(); + for (const std::pair &entry : comm_in_current_step) { + cost_type comm_cost + = dag.vertex_comm_weight(std::get<0>(entry.first)) + * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(entry.first), std::get<2>(entry.first)) + * schedule.getInstance().getArchitecture().communicationCosts(); send_on_proc[std::get<1>(entry.first)] += comm_cost; rec_on_proc[std::get<2>(entry.first)] += comm_cost; } cost_type max_comm_together = 0; - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { max_comm_together = std::max(max_comm_together, send_on_proc[proc]); max_comm_together = std::max(max_comm_together, rec_on_proc[proc]); } cost_type work_limit = max_comm_after; - if(max_comm_together + max_work_done <= max_comm_after + std::max(max_work_done, max_comm_current) + schedule.getInstance().getArchitecture().synchronisationCosts()) - { + if (max_comm_together + max_work_done <= max_comm_after + std::max(max_work_done, max_comm_current) + + schedule.getInstance().getArchitecture().synchronisationCosts()) { work_limit = max_comm_together; - for(const std::pair& entry : comm_in_current_step) - { - if(current_step - 1 >= schedule_max.numberOfSupersteps()) + for (const std::pair &entry : comm_in_current_step) { + if (current_step - 1 >= schedule_max.numberOfSupersteps()) { schedule_max.setNumberOfSupersteps(current_step); + } schedule_max.addCommunicationScheduleEntry(entry.first, current_step - 1); late_arriving_nodes.emplace(std::get<0>(entry.first), std::get<2>(entry.first)); } } // Bring computation steps into the extra superstep from the next superstep, if possible,a s long as it does not increase cost - if(step == schedule.numberOfSupersteps() - 1) + if (step == schedule.numberOfSupersteps() - 1) { continue; + } - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { cost_type work_so_far = 0; std::set brought_forward; - for(vertex_idx node : proc_list[proc][step+1]) - { - if(work_so_far + dag.vertex_work_weight(node) > work_limit) + for (vertex_idx node : proc_list[proc][step + 1]) { + if (work_so_far + dag.vertex_work_weight(node) > work_limit) { continue; + } bool has_dependency = false; - for (const vertex_idx &parent : dag.parents(node)) - { - if(schedule.assignedProcessor(node) != schedule.assignedProcessor(parent) && - late_arriving_nodes.find(std::make_pair(parent, proc)) != late_arriving_nodes.end()) - has_dependency = true; + for (const vertex_idx &parent : dag.parents(node)) { + if (schedule.assignedProcessor(node) != schedule.assignedProcessor(parent) + && late_arriving_nodes.find(std::make_pair(parent, proc)) != late_arriving_nodes.end()) { + has_dependency = true; + } - if(schedule.assignedProcessor(node) == schedule.assignedProcessor(parent) && - schedule.assignedSuperstep(parent) == step + 1 && - brought_forward.find(parent) == brought_forward.end()) - has_dependency = true; + if (schedule.assignedProcessor(node) == schedule.assignedProcessor(parent) + && schedule.assignedSuperstep(parent) == step + 1 + && brought_forward.find(parent) == brought_forward.end()) { + has_dependency = true; + } } - if(has_dependency) + if (has_dependency) { continue; + } brought_forward.insert(node); work_so_far += dag.vertex_work_weight(node); schedule_max.setAssignedSuperstep(node, current_step); - work_remaining_proc_superstep[proc][step+1] -= dag.vertex_work_weight(node); - --nodes_remaining_superstep[step+1]; + work_remaining_proc_superstep[proc][step + 1] -= dag.vertex_work_weight(node); + --nodes_remaining_superstep[step + 1]; - for(const std::pair& entry : dependent_comm_steps_for_node[node]) - free_comm_steps_for_superstep[step+1].insert(entry); + for (const std::pair &entry : dependent_comm_steps_for_node[node]) { + free_comm_steps_for_superstep[step + 1].insert(entry); + } } std::deque remaining; - for(vertex_idx node : proc_list[proc][step+1]) - if(brought_forward.find(node) == brought_forward.end()) + for (vertex_idx node : proc_list[proc][step + 1]) { + if (brought_forward.find(node) == brought_forward.end()) { remaining.push_back(node); + } + } - proc_list[proc][step+1] = remaining; + proc_list[proc][step + 1] = remaining; } ++current_step; + } - } - - return schedule_max; + return schedule_max; } -// Auxiliary function: creates a separate vectors for each proc-supstep combination, collecting the nodes in a priority-based topological order -template -std::vector>>> GreedyBspToMaxBspConverter::createSuperstepLists(const BspScheduleCS& schedule, std::vector& priorities) const -{ - const Graph_t& dag = schedule.getInstance().getComputationalDag(); +// Auxiliary function: creates a separate vectors for each proc-supstep combination, collecting the nodes in a priority-based +// topological order +template +std::vector>>> GreedyBspToMaxBspConverter::createSuperstepLists( + const BspScheduleCS &schedule, std::vector &priorities) const { + const Graph_t &dag = schedule.getInstance().getComputationalDag(); std::vector top_order = GetTopOrder(dag); priorities.clear(); priorities.resize(dag.num_vertices()); @@ -373,54 +374,58 @@ std::vector>>> GreedyBspToMaxBspCon // compute for each node the amount of dependent send cost in the same superstep std::vector comm_dependency(dag.num_vertices(), 0); - for (auto const &[key, val] : schedule.getCommunicationSchedule()) - if(schedule.assignedSuperstep(std::get<0>(key)) == val) - comm_dependency[std::get<0>(key)] += dag.vertex_comm_weight(std::get<0>(key)) * - schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key)); + for (auto const &[key, val] : schedule.getCommunicationSchedule()) { + if (schedule.assignedSuperstep(std::get<0>(key)) == val) { + comm_dependency[std::get<0>(key)] + += dag.vertex_comm_weight(std::get<0>(key)) + * schedule.getInstance().getArchitecture().sendCosts(std::get<1>(key), std::get<2>(key)); + } + } // assign priority to nodes - based on their own work/comm ratio, and that of its successors in the same proc/supstep - for (auto itr = top_order.rbegin(); itr != top_order.rend(); ++itr) - { + for (auto itr = top_order.rbegin(); itr != top_order.rend(); ++itr) { vertex_idx node = *itr; double base = static_cast(dag.vertex_work_weight(node)); - if(comm_dependency[node] > 0) + if (comm_dependency[node] > 0) { base /= static_cast(2 * comm_dependency[node]); + } double successors = 0; unsigned num_children = 0; - for (const vertex_idx &child : dag.children(node)) - if(schedule.assignedProcessor(node) == schedule.assignedProcessor(child) && - schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child)) - { + for (const vertex_idx &child : dag.children(node)) { + if (schedule.assignedProcessor(node) == schedule.assignedProcessor(child) + && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child)) { ++num_children; successors += priorities[child]; ++local_in_degree[child]; } - if(num_children > 0) + } + if (num_children > 0) { successors = successors * decay_factor / static_cast(num_children); + } priorities[node] = base + successors; } // create lists for each processor-superstep pair, in a topological order, sorted by priority - std::vector>> superstep_lists(schedule.getInstance().numberOfProcessors(), - std::vector>(schedule.numberOfSupersteps())); + std::vector>> superstep_lists( + schedule.getInstance().numberOfProcessors(), std::vector>(schedule.numberOfSupersteps())); - std::set > free; - for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++) - if(local_in_degree[node]==0) + std::set> free; + for (vertex_idx node = 0; node < schedule.getInstance().numberOfVertices(); node++) { + if (local_in_degree[node] == 0) { free.emplace(priorities[node], node); - while(!free.empty()) - { + } + } + while (!free.empty()) { vertex_idx node = free.begin()->second; free.erase(free.begin()); superstep_lists[schedule.assignedProcessor(node)][schedule.assignedSuperstep(node)].push_back(node); - for (const vertex_idx &child : dag.children(node)) - { - if(schedule.assignedProcessor(node) == schedule.assignedProcessor(child) && - schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child)) - { - if(--local_in_degree[child] == 0) + for (const vertex_idx &child : dag.children(node)) { + if (schedule.assignedProcessor(node) == schedule.assignedProcessor(child) + && schedule.assignedSuperstep(node) == schedule.assignedSuperstep(child)) { + if (--local_in_degree[child] == 0) { free.emplace(priorities[child], child); + } } } } @@ -428,4 +433,4 @@ std::vector>>> GreedyBspToMaxBspCon return superstep_lists; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp index d55d9c82..9aeab9cc 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp @@ -41,9 +41,8 @@ enum CilkMode { CILK, SJF }; * a greedy scheduling algorithm for Cilk-based BSP (Bulk Synchronous Parallel) systems. The scheduler * selects the next node and processor to execute a task based on a greedy strategy. */ -template +template class CilkScheduler : public Scheduler { - static_assert(is_computational_dag_v, "CilkScheduler can only be used with computational DAGs."); private: @@ -61,42 +60,51 @@ class CilkScheduler : public Scheduler { std::mt19937 gen; - void Choose(const BspInstance &instance, std::vector>> &procQueue, - const std::set> &readyNodes, const std::vector &procFree, - vertex_idx_t &node, unsigned &p) { + void Choose(const BspInstance &instance, + std::vector>> &procQueue, + const std::set> &readyNodes, + const std::vector &procFree, + vertex_idx_t &node, + unsigned &p) { if (mode == SJF) { - node = *readyNodes.begin(); - for (auto &r : readyNodes) - if (instance.getComputationalDag().vertex_work_weight(r) < - instance.getComputationalDag().vertex_work_weight(node)) + for (auto &r : readyNodes) { + if (instance.getComputationalDag().vertex_work_weight(r) < instance.getComputationalDag().vertex_work_weight(node)) { node = r; + } + } p = 0; - for (; p < instance.numberOfProcessors(); ++p) - if (procFree[p]) + for (; p < instance.numberOfProcessors(); ++p) { + if (procFree[p]) { break; + } + } } else if (mode == CILK) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { if (procFree[i] && !procQueue[i].empty()) { p = i; node = procQueue[i].back(); procQueue[i].pop_back(); return; } + } // Time to steal - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { if (procFree[i]) { p = i; break; } + } std::vector canStealFrom; - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (!procQueue[i].empty()) + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (!procQueue[i].empty()) { canStealFrom.push_back(i); + } + } if (canStealFrom.empty()) { node = std::numeric_limits>::max(); @@ -139,7 +147,6 @@ class CilkScheduler : public Scheduler { * @return A pair containing the return status and the computed BSP schedule. */ virtual RETURN_STATUS computeSchedule(BspSchedule &bsp_schedule) override { - // if constexpr (use_memory_constraint) { // memory_constraint.initialize(instance); // } @@ -166,8 +173,9 @@ class CilkScheduler : public Scheduler { for (const auto &v : source_vertices_view(instance.getComputationalDag())) { ready.insert(v); - if (mode == CILK) + if (mode == CILK) { procQueue[0].push_front(v); + } } while (!finishTimes.empty()) { @@ -179,15 +187,13 @@ class CilkScheduler : public Scheduler { finishTimes.erase(finishTimes.begin()); const vertex_idx_t &node = currentPair.second; if (node != std::numeric_limits>::max()) { - for (const auto &succ : instance.getComputationalDag().children(node)) { - ++nrPredecDone[succ]; if (nrPredecDone[succ] == instance.getComputationalDag().in_degree(succ)) { - ready.insert(succ); - if (mode == CILK) + if (mode == CILK) { procQueue[schedule.proc[node]].push_back(succ); + } } } procFree[schedule.proc[node]] = true; @@ -197,7 +203,6 @@ class CilkScheduler : public Scheduler { // Assign new jobs to processors while (nrProcFree > 0 && !ready.empty()) { - unsigned nextProc = instance.numberOfProcessors(); vertex_idx_t nextNode = std::numeric_limits>::max(); @@ -214,8 +219,9 @@ class CilkScheduler : public Scheduler { finishTimes.insert({time + instance.getComputationalDag().vertex_work_weight(nextNode), nextNode}); procFree[nextProc] = false; - if (nrProcFree > 0) + if (nrProcFree > 0) { --nrProcFree; + } greedyProcLists[nextProc].push_back(nextNode); } @@ -252,19 +258,18 @@ class CilkScheduler : public Scheduler { * @return The name of the schedule. */ virtual std::string getScheduleName() const override { - switch (mode) { - case CILK: - return "CilkGreedy"; - break; + case CILK: + return "CilkGreedy"; + break; - case SJF: - return "SJFGreedy"; + case SJF: + return "SJFGreedy"; - default: - return "UnknownModeGreedy"; + default: + return "UnknownModeGreedy"; } } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp b/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp index 6f9d1e94..eceac14e 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/ClassicSchedule.hpp @@ -32,7 +32,7 @@ namespace osp { * * This class stores the processor and time information for a schedule. */ -template +template class CSchedule { private: using vertex_idx = vertex_idx_t; @@ -47,8 +47,7 @@ class CSchedule { * @param size The size of the schedule. */ CSchedule(std::size_t size) - : proc(std::vector(size, std::numeric_limits::max())), time(std::vector(size, 0)) { - } + : proc(std::vector(size, std::numeric_limits::max())), time(std::vector(size, 0)) {} /** * @brief Converts the CSchedule object to a BspSchedule object. @@ -60,9 +59,9 @@ class CSchedule { void convertToBspSchedule(const BspInstance &instance, const std::vector> &procAssignmentLists, BspSchedule &bsp_schedule) { - - for (const auto &v : instance.vertices()) + for (const auto &v : instance.vertices()) { bsp_schedule.setAssignedProcessor(v, proc[v]); + } const vertex_idx N = instance.numberOfVertices(); const unsigned P = instance.numberOfProcessors(); @@ -72,8 +71,9 @@ class CSchedule { std::vector done(P), limit(P); - for (unsigned j = 0; j < P; ++j) + for (unsigned j = 0; j < P; ++j) { done[j] = procAssignmentLists[j].cbegin(); + } while (totalNodesDone < N) { // create next superstep @@ -84,31 +84,34 @@ class CSchedule { bool cut = false; for (const auto &source : instance.getComputationalDag().parents(node)) { - if (!processed[source] && proc[source] != proc[node]) + if (!processed[source] && proc[source] != proc[node]) { cut = true; + } } - if (cut) + if (cut) { break; + } } - if (limit[j] != procAssignmentLists[j].end() && time[*limit[j]] < timeLimit) + if (limit[j] != procAssignmentLists[j].end() && time[*limit[j]] < timeLimit) { timeLimit = time[*limit[j]]; + } } - for (unsigned j = 0; j < P; ++j) - for (; done[j] != limit[j] && (time[*done[j]] < timeLimit || - (time[*done[j]] == timeLimit && - instance.getComputationalDag().vertex_work_weight(*done[j]) == 0)); + for (unsigned j = 0; j < P; ++j) { + for (; done[j] != limit[j] + && (time[*done[j]] < timeLimit + || (time[*done[j]] == timeLimit && instance.getComputationalDag().vertex_work_weight(*done[j]) == 0)); ++done[j]) { processed[*done[j]] = true; bsp_schedule.setAssignedSuperstep(*done[j], superStepIdx); ++totalNodesDone; } + } ++superStepIdx; } - } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp index fa05ebfd..78a573c5 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp @@ -47,28 +47,25 @@ enum EtfMode { ETF, BL_EST }; * each processor. The algorithm selects the task with the earliest EST and assigns it to the processor with the * earliest available start time. The process is repeated until all tasks are scheduled. */ -template +template class EtfScheduler : public Scheduler { - static_assert(is_computational_dag_v, "EtfScheduler can only be used with computational DAGs."); static_assert(std::is_convertible_v, v_workw_t>, "EtfScheduler requires that work and communication weights are convertible."); - static_assert(not has_edge_weights_v || - std::is_convertible_v, v_workw_t>, + static_assert(not has_edge_weights_v || std::is_convertible_v, v_workw_t>, "EtfScheduler requires that work and communication weights are convertible."); private: using tv_pair = std::pair, vertex_idx_t>; - EtfMode mode; // The mode of the scheduler (ETF or BL_EST) - bool use_numa; // Flag indicating whether to use NUMA-aware scheduling + EtfMode mode; // The mode of the scheduler (ETF or BL_EST) + bool use_numa; // Flag indicating whether to use NUMA-aware scheduling constexpr static bool use_memory_constraint = is_memory_constraint_v; - static_assert(not use_memory_constraint || - std::is_same_v>, + static_assert(not use_memory_constraint || std::is_same_v>, "EtfScheduler implements only persistent_transient_memory_constraint."); MemoryConstraint_t memory_constraint; @@ -81,7 +78,6 @@ class EtfScheduler : public Scheduler { * @return A vector containing the bottom level of each task. */ std::vector> ComputeBottomLevel(const BspInstance &instance) const { - std::vector> BL(instance.numberOfVertices(), 0); const std::vector> topOrder = GetTopOrder(instance.getComputationalDag()); @@ -93,11 +89,9 @@ class EtfScheduler : public Scheduler { v_workw_t maxval = 0; if constexpr (has_edge_weights_v) { - for (const auto &out_edge : out_edges(node, instance.getComputationalDag())) { - - const v_workw_t tmp_val = BL[target(out_edge, instance.getComputationalDag())] + - instance.getComputationalDag().edge_comm_weight(out_edge); + const v_workw_t tmp_val = BL[target(out_edge, instance.getComputationalDag())] + + instance.getComputationalDag().edge_comm_weight(out_edge); if (tmp_val > maxval) { maxval = tmp_val; @@ -105,11 +99,8 @@ class EtfScheduler : public Scheduler { } } else { - for (const auto &child : instance.getComputationalDag().children(node)) { - - const v_workw_t tmp_val = - BL[child] + instance.getComputationalDag().vertex_comm_weight(child); + const v_workw_t tmp_val = BL[child] + instance.getComputationalDag().vertex_comm_weight(child); if (tmp_val > maxval) { maxval = tmp_val; @@ -123,20 +114,16 @@ class EtfScheduler : public Scheduler { } bool check_mem_feasibility(const BspInstance &instance, const std::set &ready) const { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - if (ready.empty()) { return true; } for (const auto &node_pair : ready) { for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - const auto node = node_pair.second; if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(node, i)) { return true; } @@ -163,11 +150,13 @@ class EtfScheduler : public Scheduler { * @param avg_ The average execution time of the tasks. * @return The earliest start time (EST) for the task on the processor. */ - v_workw_t GetESTforProc(const BspInstance &instance, CSchedule &schedule, - vertex_idx_t node, unsigned proc, - const v_workw_t procAvailableFrom, std::vector> &send, + v_workw_t GetESTforProc(const BspInstance &instance, + CSchedule &schedule, + vertex_idx_t node, + unsigned proc, + const v_workw_t procAvailableFrom, + std::vector> &send, std::vector> &rec) const { - std::vector predec; for (const auto &pred : instance.getComputationalDag().parents(node)) { predec.emplace_back(schedule.time[pred] + instance.getComputationalDag().vertex_work_weight(pred), pred); @@ -177,22 +166,19 @@ class EtfScheduler : public Scheduler { v_workw_t EST = procAvailableFrom; for (const auto &next : predec) { - v_workw_t t = - schedule.time[next.second] + instance.getComputationalDag().vertex_work_weight(next.second); + v_workw_t t = schedule.time[next.second] + instance.getComputationalDag().vertex_work_weight(next.second); if (schedule.proc[next.second] != proc) { t = std::max(t, send[schedule.proc[next.second]]); t = std::max(t, rec[proc]); if constexpr (has_edge_weights_v) { - t += instance.getComputationalDag().edge_comm_weight( - edge_desc(next.second, node, instance.getComputationalDag()).first) * - instance.sendCosts(schedule.proc[next.second], proc); + edge_desc(next.second, node, instance.getComputationalDag()).first) + * instance.sendCosts(schedule.proc[next.second], proc); } else { - - t += instance.getComputationalDag().vertex_comm_weight(next.second) * - instance.sendCosts(schedule.proc[next.second], proc); + t += instance.getComputationalDag().vertex_comm_weight(next.second) + * instance.sendCosts(schedule.proc[next.second], proc); } send[schedule.proc[next.second]] = t; @@ -215,20 +201,19 @@ class EtfScheduler : public Scheduler { * @param avg_ The average execution time of the tasks. * @return A triple containing the best EST, the node index, and the processor index. */ - tv_pair GetBestESTforNodes(const BspInstance &instance, CSchedule &schedule, + tv_pair GetBestESTforNodes(const BspInstance &instance, + CSchedule &schedule, const std::vector> &nodeList, const std::vector> &procAvailableFrom, - std::vector> &send, std::vector> &rec, + std::vector> &send, + std::vector> &rec, unsigned &bestProc) const { - v_workw_t bestEST = std::numeric_limits>::max(); vertex_idx_t bestNode = 0; std::vector> bestSend, bestRec; - for (const auto &node : nodeList) + for (const auto &node : nodeList) { for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) { - if constexpr (use_memory_constraint) { - if (not memory_constraint.can_add(node, j)) { continue; } @@ -236,8 +221,7 @@ class EtfScheduler : public Scheduler { std::vector> newSend = send; std::vector> newRec = rec; - v_workw_t EST = - GetESTforProc(instance, schedule, node, j, procAvailableFrom[j], newSend, newRec); + v_workw_t EST = GetESTforProc(instance, schedule, node, j, procAvailableFrom[j], newSend, newRec); if (EST < bestEST) { bestEST = EST; bestProc = j; @@ -246,6 +230,7 @@ class EtfScheduler : public Scheduler { bestRec = newRec; } } + } send = bestSend; rec = bestRec; @@ -273,7 +258,6 @@ class EtfScheduler : public Scheduler { * @return A pair containing the return status and the computed BspSchedule object. */ virtual RETURN_STATUS computeSchedule(BspSchedule &bsp_schedule) override { - const auto &instance = bsp_schedule.getInstance(); if constexpr (use_memory_constraint) { @@ -286,14 +270,15 @@ class EtfScheduler : public Scheduler { std::vector> predecProcessed(instance.numberOfVertices(), 0); - std::vector> finishTimes(instance.numberOfProcessors(), 0), - send(instance.numberOfProcessors(), 0), rec(instance.numberOfProcessors(), 0); + std::vector> finishTimes(instance.numberOfProcessors(), 0), send(instance.numberOfProcessors(), 0), + rec(instance.numberOfProcessors(), 0); std::vector> BL; - if (mode == BL_EST) + if (mode == BL_EST) { BL = ComputeBottomLevel(instance); - else + } else { BL = std::vector>(instance.numberOfVertices(), 0); + } std::set ready; @@ -302,12 +287,10 @@ class EtfScheduler : public Scheduler { } while (!ready.empty()) { - tv_pair best_tv(0, 0); unsigned best_proc = 0; if (mode == BL_EST) { - std::vector> nodeList{ready.begin()->second}; ready.erase(ready.begin()); best_tv = GetBestESTforNodes(instance, schedule, nodeList, finishTimes, send, rec, best_proc); @@ -315,8 +298,9 @@ class EtfScheduler : public Scheduler { if (mode == ETF) { std::vector> nodeList; - for (const auto &next : ready) + for (const auto &next : ready) { nodeList.push_back(next.second); + } best_tv = GetBestESTforNodes(instance, schedule, nodeList, finishTimes, send, rec, best_proc); ready.erase(tv_pair({0, best_tv.second})); } @@ -334,12 +318,12 @@ class EtfScheduler : public Scheduler { for (const auto &succ : instance.getComputationalDag().children(node)) { ++predecProcessed[succ]; - if (predecProcessed[succ] == instance.getComputationalDag().in_degree(succ)) + if (predecProcessed[succ] == instance.getComputationalDag().in_degree(succ)) { ready.insert({BL[succ], succ}); + } } if constexpr (use_memory_constraint) { - if (not check_mem_feasibility(instance, ready)) { return RETURN_STATUS::ERROR; } @@ -386,16 +370,16 @@ class EtfScheduler : public Scheduler { */ virtual std::string getScheduleName() const override { switch (mode) { - case ETF: - return "ETFGreedy"; + case ETF: + return "ETFGreedy"; - case BL_EST: - return "BL-ESTGreedy"; + case BL_EST: + return "BL-ESTGreedy"; - default: - return "UnknownModeGreedy"; + default: + return "UnknownModeGreedy"; } } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp index be196e28..c67389aa 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp @@ -31,6 +31,7 @@ limitations under the License. #include "osp/graph_algorithms/directed_graph_util.hpp" namespace osp { + /** * @brief The GreedyBspScheduler class represents a scheduler that uses a greedy algorithm to compute schedules for * BspInstance. @@ -39,16 +40,15 @@ namespace osp { * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. */ -template +template class GreedyBspScheduler : public Scheduler { - static_assert(is_computational_dag_v, "GreedyBspScheduler can only be used with computational DAGs."); private: using VertexType = vertex_idx_t; - constexpr static bool use_memory_constraint = - is_memory_constraint_v or is_memory_constraint_schedule_v; + constexpr static bool use_memory_constraint = is_memory_constraint_v + or is_memory_constraint_schedule_v; static_assert(not use_memory_constraint or std::is_same_v, "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); @@ -56,17 +56,15 @@ class GreedyBspScheduler : public Scheduler { MemoryConstraint_t memory_constraint; struct heap_node { - VertexType node; double score; heap_node() : node(0), score(0) {} + heap_node(VertexType node_arg, double score_arg) : node(node_arg), score(score_arg) {} - bool operator<(heap_node const &rhs) const { - return (score < rhs.score) || (score <= rhs.score and node < rhs.node); - } + bool operator<(heap_node const &rhs) const { return (score < rhs.score) || (score <= rhs.score and node < rhs.node); } }; std::vector> max_proc_score_heap; @@ -80,29 +78,29 @@ class GreedyBspScheduler : public Scheduler { float max_percent_idle_processors; bool increase_parallelism_in_new_superstep; - double computeScore(VertexType node, unsigned proc, const std::vector> &procInHyperedge, + double computeScore(VertexType node, + unsigned proc, + const std::vector> &procInHyperedge, const BspInstance &instance) const { - double score = 0; for (const auto &pred : instance.getComputationalDag().parents(node)) { - if (procInHyperedge[pred][proc]) { - score += static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) / - static_cast(instance.getComputationalDag().out_degree(pred)); + score += static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) + / static_cast(instance.getComputationalDag().out_degree(pred)); } } return score; } - void Choose(const BspInstance &instance, const std::vector> &procReady, - const std::vector &procFree, VertexType &node, unsigned &p) const { - + void Choose(const BspInstance &instance, + const std::vector> &procReady, + const std::vector &procFree, + VertexType &node, + unsigned &p) const { double max_score = -1.0; for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (procFree[proc] && !procReady[proc].empty()) { - // select node heap_node top_node = max_proc_score_heap[proc].top(); @@ -116,24 +114,21 @@ class GreedyBspScheduler : public Scheduler { } for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { - if (!procFree[proc] or max_all_proc_score_heap[proc].empty()) + if (!procFree[proc] or max_all_proc_score_heap[proc].empty()) { continue; + } heap_node top_node = max_all_proc_score_heap[proc].top(); if (top_node.score > max_score) { - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(top_node.node, proc)) { - max_score = top_node.score; node = top_node.node; p = proc; } } else { - max_score = top_node.score; node = top_node.node; p = proc; @@ -142,32 +137,36 @@ class GreedyBspScheduler : public Scheduler { } }; - bool CanChooseNode(const BspInstance &instance, const std::set &allReady, - const std::vector> &procReady, const std::vector &procFree) const { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (procFree[i] && !procReady[i].empty()) + bool CanChooseNode(const BspInstance &instance, + const std::set &allReady, + const std::vector> &procReady, + const std::vector &procFree) const { + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !procReady[i].empty()) { return true; + } + } - if (!allReady.empty()) - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (procFree[i]) + if (!allReady.empty()) { + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i]) { return true; + } + } + } return false; }; - bool check_mem_feasibility(const BspInstance &instance, const std::set &allReady, + bool check_mem_feasibility(const BspInstance &instance, + const std::set &allReady, const std::vector> &procReady) const { - if constexpr (use_memory_constraint) { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - unsigned num_empty_proc = 0; for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { if (!procReady[i].empty()) { - const heap_node &top_node = max_proc_score_heap[i].top(); // todo check if this is correct @@ -183,9 +182,8 @@ class GreedyBspScheduler : public Scheduler { return true; } - if (!allReady.empty()) + if (!allReady.empty()) { for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - const heap_node &top_node = max_all_proc_score_heap[i].top(); // todo check if this is correct @@ -193,6 +191,7 @@ class GreedyBspScheduler : public Scheduler { return true; } } + } return false; } @@ -207,14 +206,16 @@ class GreedyBspScheduler : public Scheduler { std::vector ready_nodes_per_type = nr_ready_nodes_per_type; std::vector procs_per_type = nr_procs_per_type; - for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) - for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) + for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) { + for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { if (instance.isCompatibleType(node_type, proc_type)) { unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); nr_nodes += matched; ready_nodes_per_type[node_type] -= matched; procs_per_type[proc_type] -= matched; } + } + } return nr_nodes; } @@ -241,7 +242,6 @@ class GreedyBspScheduler : public Scheduler { * @return A pair containing the return status and the computed BspSchedule. */ RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); for (const auto &v : instance.getComputationalDag().vertices()) { @@ -268,8 +268,7 @@ class GreedyBspScheduler : public Scheduler { std::set ready; - std::vector> procInHyperedge = - std::vector>(N, std::vector(params_p, false)); + std::vector> procInHyperedge = std::vector>(N, std::vector(params_p, false)); std::vector> procReady(params_p); std::set allReady; @@ -280,8 +279,9 @@ class GreedyBspScheduler : public Scheduler { std::vector nr_ready_nodes_per_type(G.num_vertex_types(), 0); std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < params_p; ++proc) + for (unsigned proc = 0; proc < params_p; ++proc) { ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; + } std::set, VertexType>> finishTimes; finishTimes.emplace(0, std::numeric_limits::max()); @@ -301,7 +301,6 @@ class GreedyBspScheduler : public Scheduler { bool endSupStep = false; while (!ready.empty() || !finishTimes.empty()) { - if (finishTimes.empty() && endSupStep) { for (unsigned proc = 0; proc < params_p; ++proc) { procReady[proc].clear(); @@ -322,9 +321,9 @@ class GreedyBspScheduler : public Scheduler { for (const auto &v : ready) { for (unsigned proc = 0; proc < params_p; ++proc) { - - if (!instance.isCompatible(v, proc)) + if (!instance.isCompatible(v, proc)) { continue; + } double score = computeScore(v, proc, procInHyperedge, instance); heap_node new_node(v, score); @@ -343,13 +342,11 @@ class GreedyBspScheduler : public Scheduler { // Find new ready jobs while (!finishTimes.empty() && finishTimes.begin()->first == time) { - const VertexType node = finishTimes.begin()->second; finishTimes.erase(finishTimes.begin()); if (node != std::numeric_limits::max()) { for (const auto &succ : G.children(node)) { - ++nrPredecDone[succ]; if (nrPredecDone[succ] == G.in_degree(succ)) { ready.insert(succ); @@ -357,34 +354,33 @@ class GreedyBspScheduler : public Scheduler { bool canAdd = true; for (const auto &pred : G.parents(succ)) { - - if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) && - schedule.assignedSuperstep(pred) == supstepIdx) { + if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) + && schedule.assignedSuperstep(pred) == supstepIdx) { canAdd = false; break; } } if constexpr (use_memory_constraint) { - if (canAdd) { - if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) + if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) { canAdd = false; + } } } - if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) + if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) { canAdd = false; + } if (canAdd) { procReady[schedule.assignedProcessor(node)].insert(succ); - double score = - computeScore(succ, schedule.assignedProcessor(node), procInHyperedge, instance); + double score = computeScore(succ, schedule.assignedProcessor(node), procInHyperedge, instance); heap_node new_node(succ, score); - node_proc_heap_handles[schedule.assignedProcessor(node)][succ] = - max_proc_score_heap[schedule.assignedProcessor(node)].push(new_node); + node_proc_heap_handles[schedule.assignedProcessor(node)][succ] + = max_proc_score_heap[schedule.assignedProcessor(node)].push(new_node); } } } @@ -393,8 +389,9 @@ class GreedyBspScheduler : public Scheduler { } } - if (endSupStep) + if (endSupStep) { continue; + } // Assign new jobs to processors if (!CanChooseNode(instance, allReady, procReady, procFree)) { @@ -402,7 +399,6 @@ class GreedyBspScheduler : public Scheduler { } while (CanChooseNode(instance, allReady, procReady, procFree)) { - VertexType nextNode = std::numeric_limits::max(); unsigned nextProc = instance.numberOfProcessors(); Choose(instance, procReady, procFree, nextNode, nextProc); @@ -413,14 +409,12 @@ class GreedyBspScheduler : public Scheduler { } if (procReady[nextProc].find(nextNode) != procReady[nextProc].end()) { - procReady[nextProc].erase(nextNode); max_proc_score_heap[nextProc].erase(node_proc_heap_handles[nextProc][nextNode]); node_proc_heap_handles[nextProc].erase(nextNode); } else { - allReady.erase(nextNode); for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { @@ -461,7 +455,6 @@ class GreedyBspScheduler : public Scheduler { procInHyperedge[nextNode][nextProc] = true; for (const auto &pred : G.parents(nextNode)) { - if (procInHyperedge[pred][nextProc]) { continue; } @@ -469,21 +462,17 @@ class GreedyBspScheduler : public Scheduler { procInHyperedge[pred][nextProc] = true; for (const auto &child : G.children(pred)) { - if (child != nextNode && procReady[nextProc].find(child) != procReady[nextProc].end()) { - - (*node_proc_heap_handles[nextProc][child]).score += - static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) / - static_cast(instance.getComputationalDag().out_degree(pred)); + (*node_proc_heap_handles[nextProc][child]).score + += static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) + / static_cast(instance.getComputationalDag().out_degree(pred)); max_proc_score_heap[nextProc].update(node_proc_heap_handles[nextProc][child]); } - if (child != nextNode && allReady.find(child) != allReady.end() && - instance.isCompatible(child, nextProc)) { - - (*node_all_proc_heap_handles[nextProc][child]).score += - static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) / - static_cast(instance.getComputationalDag().out_degree(pred)); + if (child != nextNode && allReady.find(child) != allReady.end() && instance.isCompatible(child, nextProc)) { + (*node_all_proc_heap_handles[nextProc][child]).score + += static_cast(instance.getComputationalDag().vertex_comm_weight(pred)) + / static_cast(instance.getComputationalDag().out_degree(pred)); max_all_proc_score_heap[nextProc].update(node_all_proc_heap_handles[nextProc][child]); } } @@ -491,18 +480,16 @@ class GreedyBspScheduler : public Scheduler { } if constexpr (use_memory_constraint) { - if (not check_mem_feasibility(instance, allReady, procReady)) { - return RETURN_STATUS::ERROR; } } - if (free > static_cast(static_cast(params_p) * max_percent_idle_processors) && - ((!increase_parallelism_in_new_superstep) || - get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) >= - std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), - params_p - free + (static_cast(0.5 * free))))) { + if (free > static_cast(static_cast(params_p) * max_percent_idle_processors) + && ((!increase_parallelism_in_new_superstep) + || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) + >= std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), + params_p - free + (static_cast(0.5 * free))))) { endSupStep = true; } } @@ -522,4 +509,4 @@ class GreedyBspScheduler : public Scheduler { std::string getScheduleName() const override { return "BspGreedy"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp index 9b3880b9..57b043dc 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp @@ -28,17 +28,15 @@ limitations under the License. namespace osp { -template +template class GreedyChildren : public Scheduler { private: bool ensure_enough_sources; public: - GreedyChildren(bool ensure_enough_sources_ = true) : Scheduler(), ensure_enough_sources(ensure_enough_sources_) {}; RETURN_STATUS computeSchedule(BspSchedule &sched) override { - using VertexType = vertex_idx_t; const auto &instance = sched.getInstance(); @@ -87,8 +85,9 @@ class GreedyChildren : public Scheduler { } } - if (failed_to_allocate) + if (failed_to_allocate) { continue; + } sched.setAssignedSuperstep(node, superstep_counter); if (processor_set) { @@ -123,8 +122,9 @@ class GreedyChildren : public Scheduler { node_added = true; break; } - if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors()) + if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors()) { break; + } } superstep_counter++; @@ -133,9 +133,7 @@ class GreedyChildren : public Scheduler { return RETURN_STATUS::OSP_SUCCESS; } - std::string getScheduleName() const override { - return ensure_enough_sources ? "GreedyChildrenS" : "GreedyChildren"; - } + std::string getScheduleName() const override { return ensure_enough_sources ? "GreedyChildrenS" : "GreedyChildren"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp index 890f779c..9aff8fb3 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp @@ -18,11 +18,12 @@ limitations under the License. #pragma once +#include +#include + #include "osp/bsp/model/cost/LazyCommunicationCost.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/bsp/scheduler/Serial.hpp" -#include -#include namespace osp { @@ -38,9 +39,8 @@ namespace osp { * @tparam Graph_t The graph type representing the computational DAG. * @tparam CostModel The cost model functor to evaluate schedules. Defaults to LazyCommunicationCost. */ -template> +template > class GreedyMetaScheduler : public Scheduler { - Serial serial_scheduler_; std::vector *> schedulers_; @@ -58,13 +58,16 @@ class GreedyMetaScheduler : public Scheduler { ~GreedyMetaScheduler() override = default; void addSerialScheduler() { schedulers_.push_back(&serial_scheduler_); } + void addScheduler(Scheduler &s) { schedulers_.push_back(&s); } + void resetScheduler() { schedulers_.clear(); } RETURN_STATUS computeSchedule(BspSchedule &schedule) override { if (schedule.getInstance().getArchitecture().numberOfProcessors() == 1) { - if constexpr (verbose) + if constexpr (verbose) { std::cout << "Using serial scheduler for P=1." << std::endl; + } serial_scheduler_.computeSchedule(schedule); return RETURN_STATUS::OSP_SUCCESS; } @@ -76,14 +79,17 @@ class GreedyMetaScheduler : public Scheduler { scheduler->computeSchedule(current_schedule); const v_workw_t schedule_cost = CostModel()(current_schedule); - if constexpr (verbose) - std::cout << "Executed scheduler " << scheduler->getScheduleName() << ", costs: " << schedule_cost << ", nr. supersteps: " << current_schedule.numberOfSupersteps() << std::endl; + if constexpr (verbose) { + std::cout << "Executed scheduler " << scheduler->getScheduleName() << ", costs: " << schedule_cost + << ", nr. supersteps: " << current_schedule.numberOfSupersteps() << std::endl; + } if (schedule_cost < best_schedule_cost) { best_schedule_cost = schedule_cost; schedule = current_schedule; - if constexpr (verbose) + if constexpr (verbose) { std::cout << "New best schedule!" << std::endl; + } } } @@ -93,4 +99,4 @@ class GreedyMetaScheduler : public Scheduler { std::string getScheduleName() const override { return "GreedyMetaScheduler"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp index bb88ad78..5f943110 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp @@ -21,198 +21,202 @@ limitations under the License. #include "osp/bsp/model/BspScheduleRecomp.hpp" namespace osp { + /** * @brief The GreedyReccomputer class applies a greedy algorithm to remove some of the communciation steps in * a BspSchedule by recomputation steps if this decreases the cost. */ -template +template class GreedyRecomputer { - static_assert(is_computational_dag_v, "GreedyRecomputer can only be used with computational DAGs."); - -private: + + private: using vertex_idx = vertex_idx_t; using cost_type = v_workw_t; using KeyTriple = std::tuple, unsigned int, unsigned int>; - static_assert(std::is_same_v, v_commw_t>, "GreedyRecomputer requires work and comm. weights to have the same type."); - - -public: + static_assert(std::is_same_v, v_commw_t>, + "GreedyRecomputer requires work and comm. weights to have the same type."); + public: /** * @brief Default destructor for GreedyRecomputer. */ virtual ~GreedyRecomputer() = default; - RETURN_STATUS computeRecompSchedule(BspScheduleCS &initial_schedule, BspScheduleRecomp& out_schedule) const; - + RETURN_STATUS computeRecompSchedule(BspScheduleCS &initial_schedule, BspScheduleRecomp &out_schedule) const; }; -template -RETURN_STATUS GreedyRecomputer::computeRecompSchedule(BspScheduleCS &initial_schedule, BspScheduleRecomp& out_schedule) const -{ - const vertex_idx& N = initial_schedule.getInstance().numberOfVertices(); - const unsigned& P = initial_schedule.getInstance().numberOfProcessors(); - const unsigned& S = initial_schedule.numberOfSupersteps(); - const Graph_t& G = initial_schedule.getInstance().getComputationalDag(); +template +RETURN_STATUS GreedyRecomputer::computeRecompSchedule(BspScheduleCS &initial_schedule, + BspScheduleRecomp &out_schedule) const { + const vertex_idx &N = initial_schedule.getInstance().numberOfVertices(); + const unsigned &P = initial_schedule.getInstance().numberOfProcessors(); + const unsigned &S = initial_schedule.numberOfSupersteps(); + const Graph_t &G = initial_schedule.getInstance().getComputationalDag(); out_schedule = BspScheduleRecomp(initial_schedule.getInstance()); out_schedule.setNumberOfSupersteps(initial_schedule.numberOfSupersteps()); // Initialize required data structures - std::vector> work_cost(P, std::vector(S, 0)), - send_cost(P, std::vector(S, 0)), - rec_cost(P, std::vector(S, 0)); + std::vector> work_cost(P, std::vector(S, 0)), send_cost(P, std::vector(S, 0)), + rec_cost(P, std::vector(S, 0)); std::vector> first_computable(N, std::vector(P, 0U)), - first_present(N, std::vector(P, std::numeric_limits::max())); - - std::vector > > needed_on_proc(N, std::vector >(P, {S})); - + first_present(N, std::vector(P, std::numeric_limits::max())); + + std::vector>> needed_on_proc(N, std::vector>(P, {S})); + std::vector max_work(S, 0), max_comm(S, 0); - std::vector > comm_steps(S); + std::vector> comm_steps(S); - for(vertex_idx node = 0; node < N; ++node) - { - const unsigned& proc = initial_schedule.assignedProcessor(node); - const unsigned& step = initial_schedule.assignedSuperstep(node); + for (vertex_idx node = 0; node < N; ++node) { + const unsigned &proc = initial_schedule.assignedProcessor(node); + const unsigned &step = initial_schedule.assignedSuperstep(node); - work_cost[proc][step] += G.vertex_work_weight(node); - first_present[node][proc] = std::min(first_present[node][proc], step); - for(vertex_idx pred : G.parents(node)) - needed_on_proc[pred][proc].insert(step); - - out_schedule.assignments(node).emplace_back(proc, step); + work_cost[proc][step] += G.vertex_work_weight(node); + first_present[node][proc] = std::min(first_present[node][proc], step); + for (vertex_idx pred : G.parents(node)) { + needed_on_proc[pred][proc].insert(step); + } + + out_schedule.assignments(node).emplace_back(proc, step); } - for(const std::pair item : initial_schedule.getCommunicationSchedule()) - { - const vertex_idx& node = std::get<0>(item.first); - const unsigned& from_proc = std::get<1>(item.first); - const unsigned& to_proc = std::get<2>(item.first); - const unsigned& step = item.second; - send_cost[from_proc][step] += G.vertex_comm_weight(node) * - initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); - rec_cost[to_proc][step] += G.vertex_comm_weight(node) * - initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); - - comm_steps[step].emplace(item.first); - needed_on_proc[node][from_proc].insert(step); - first_present[node][to_proc] = std::min(first_present[node][to_proc], step+1); + for (const std::pair item : initial_schedule.getCommunicationSchedule()) { + const vertex_idx &node = std::get<0>(item.first); + const unsigned &from_proc = std::get<1>(item.first); + const unsigned &to_proc = std::get<2>(item.first); + const unsigned &step = item.second; + send_cost[from_proc][step] += G.vertex_comm_weight(node) + * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); + rec_cost[to_proc][step] += G.vertex_comm_weight(node) + * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); + + comm_steps[step].emplace(item.first); + needed_on_proc[node][from_proc].insert(step); + first_present[node][to_proc] = std::min(first_present[node][to_proc], step + 1); + } + for (unsigned step = 0; step < S; ++step) { + for (unsigned proc = 0; proc < P; ++proc) { + max_work[step] = std::max(max_work[step], work_cost[proc][step]); + max_comm[step] = std::max(max_comm[step], send_cost[proc][step]); + max_comm[step] = std::max(max_comm[step], rec_cost[proc][step]); + } } - for(unsigned step = 0; step < S; ++step) - for(unsigned proc = 0; proc < P; ++proc) - { - max_work[step] =std::max(max_work[step], work_cost[proc][step]); - max_comm[step] =std::max(max_comm[step], send_cost[proc][step]); - max_comm[step] =std::max(max_comm[step], rec_cost[proc][step]); - } - - for(vertex_idx node = 0; node < N; ++node) - for(const vertex_idx& pred : G.parents(node)) - for(unsigned proc = 0; proc < P; ++proc) - first_computable[node][proc] = std::max(first_computable[node][proc], first_present[pred][proc]); - + + for (vertex_idx node = 0; node < N; ++node) { + for (const vertex_idx &pred : G.parents(node)) { + for (unsigned proc = 0; proc < P; ++proc) { + first_computable[node][proc] = std::max(first_computable[node][proc], first_present[pred][proc]); + } + } + } + // Find improvement steps bool still_improved = true; - while(still_improved) - { - still_improved = false; - - for(unsigned step = 0; step < S; ++step) - { - std::vector to_erase; - for(const KeyTriple& entry : comm_steps[step]) - { - const vertex_idx& node = std::get<0>(entry); - const unsigned& from_proc = std::get<1>(entry); - const unsigned& to_proc = std::get<2>(entry); - - // check how much comm cost we save by removing comm schedule entry - cost_type comm_induced = G.vertex_comm_weight(node) * - initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); - - cost_type new_max_comm = 0; - for(unsigned proc = 0; proc < P; ++proc) - { - if(proc == from_proc) - new_max_comm = std::max(new_max_comm, send_cost[proc][step]-comm_induced); - else - new_max_comm = std::max(new_max_comm, send_cost[proc][step]); - if(proc == to_proc) - new_max_comm = std::max(new_max_comm, rec_cost[proc][step]-comm_induced); - else - new_max_comm = std::max(new_max_comm, rec_cost[proc][step]); - } - if(new_max_comm == max_comm[step]) - continue; - - if(!initial_schedule.getInstance().isCompatible(node, to_proc)) - continue; - - cost_type decrease = max_comm[step] - new_max_comm; - if(max_comm[step] > 0 && new_max_comm == 0) - decrease += initial_schedule.getInstance().getArchitecture().synchronisationCosts(); - - // check how much it would increase the work cost instead - unsigned best_step = S; - cost_type smallest_increase = std::numeric_limits::max(); - for(unsigned comp_step = first_computable[node][to_proc]; comp_step <= *needed_on_proc[node][to_proc].begin(); ++comp_step) - { - cost_type increase = work_cost[to_proc][comp_step] + G.vertex_work_weight(node) > max_work[comp_step] ? - work_cost[to_proc][comp_step] + G.vertex_work_weight(node) - max_work[comp_step] : 0 ; - - if(increase < smallest_increase) - { - best_step = comp_step; - smallest_increase = increase; + while (still_improved) { + still_improved = false; + + for (unsigned step = 0; step < S; ++step) { + std::vector to_erase; + for (const KeyTriple &entry : comm_steps[step]) { + const vertex_idx &node = std::get<0>(entry); + const unsigned &from_proc = std::get<1>(entry); + const unsigned &to_proc = std::get<2>(entry); + + // check how much comm cost we save by removing comm schedule entry + cost_type comm_induced = G.vertex_comm_weight(node) + * initial_schedule.getInstance().getArchitecture().communicationCosts(from_proc, to_proc); + + cost_type new_max_comm = 0; + for (unsigned proc = 0; proc < P; ++proc) { + if (proc == from_proc) { + new_max_comm = std::max(new_max_comm, send_cost[proc][step] - comm_induced); + } else { + new_max_comm = std::max(new_max_comm, send_cost[proc][step]); + } + if (proc == to_proc) { + new_max_comm = std::max(new_max_comm, rec_cost[proc][step] - comm_induced); + } else { + new_max_comm = std::max(new_max_comm, rec_cost[proc][step]); + } + } + if (new_max_comm == max_comm[step]) { + continue; + } + + if (!initial_schedule.getInstance().isCompatible(node, to_proc)) { + continue; + } + + cost_type decrease = max_comm[step] - new_max_comm; + if (max_comm[step] > 0 && new_max_comm == 0) { + decrease += initial_schedule.getInstance().getArchitecture().synchronisationCosts(); + } + + // check how much it would increase the work cost instead + unsigned best_step = S; + cost_type smallest_increase = std::numeric_limits::max(); + for (unsigned comp_step = first_computable[node][to_proc]; comp_step <= *needed_on_proc[node][to_proc].begin(); + ++comp_step) { + cost_type increase = work_cost[to_proc][comp_step] + G.vertex_work_weight(node) > max_work[comp_step] + ? work_cost[to_proc][comp_step] + G.vertex_work_weight(node) - max_work[comp_step] + : 0; + + if (increase < smallest_increase) { + best_step = comp_step; + smallest_increase = increase; + } + } + + // check if this modification is beneficial + if (best_step == S || smallest_increase > decrease) { + continue; + } + + // execute the modification + to_erase.emplace_back(entry); + out_schedule.assignments(node).emplace_back(to_proc, best_step); + + send_cost[from_proc][step] -= comm_induced; + rec_cost[to_proc][step] -= comm_induced; + max_comm[step] = new_max_comm; + + work_cost[to_proc][best_step] += G.vertex_work_weight(node); + max_work[best_step] += smallest_increase; + + // update movability bounds + for (const vertex_idx &pred : G.parents(node)) { + needed_on_proc[pred][to_proc].insert(best_step); + } + + needed_on_proc[node][from_proc].erase(needed_on_proc[node][from_proc].lower_bound(step)); + + first_present[node][to_proc] = best_step; + for (const vertex_idx &succ : G.children(node)) { + for (const vertex_idx &pred : G.parents(node)) { + first_computable[succ][to_proc] = std::max(first_computable[succ][to_proc], first_present[pred][to_proc]); + } + } + + still_improved = true; + } + for (const KeyTriple &entry : to_erase) { + comm_steps[step].erase(entry); } - } - - // check if this modification is beneficial - if(best_step == S || smallest_increase > decrease) - continue; - - // execute the modification - to_erase.emplace_back(entry); - out_schedule.assignments(node).emplace_back(to_proc, best_step); - - send_cost[from_proc][step] -= comm_induced; - rec_cost[to_proc][step] -= comm_induced; - max_comm[step] = new_max_comm; - - work_cost[to_proc][best_step] += G.vertex_work_weight(node); - max_work[best_step] += smallest_increase; - - // update movability bounds - for(const vertex_idx& pred : G.parents(node)) - needed_on_proc[pred][to_proc].insert(best_step); - - needed_on_proc[node][from_proc].erase(needed_on_proc[node][from_proc].lower_bound(step)); - - first_present[node][to_proc] = best_step; - for(const vertex_idx& succ : G.children(node)) - { - for(const vertex_idx& pred : G.parents(node)) - first_computable[succ][to_proc] = std::max(first_computable[succ][to_proc], first_present[pred][to_proc]); - } - - still_improved = true; - } - for(const KeyTriple& entry : to_erase) - comm_steps[step].erase(entry); - } } - for(unsigned step = 0; step < S; ++step) - for(const KeyTriple& entry : comm_steps[step]) - out_schedule.getCommunicationSchedule().emplace(entry, step); + for (unsigned step = 0; step < S; ++step) { + for (const KeyTriple &entry : comm_steps[step]) { + out_schedule.getCommunicationSchedule().emplace(entry, step); + } + } out_schedule.mergeSupersteps(); return RETURN_STATUS::OSP_SUCCESS; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp index 7a6c454d..857e4e02 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp @@ -42,16 +42,15 @@ namespace osp { * * It computes schedules for BspInstance using variance-based priorities. */ -template +template class GreedyVarianceSspScheduler : public MaxBspScheduler { - static_assert(is_computational_dag_v, "GreedyVarianceSspScheduler can only be used with computational DAGs."); private: using VertexType = vertex_idx_t; - constexpr static bool use_memory_constraint = - is_memory_constraint_v or is_memory_constraint_schedule_v; + constexpr static bool use_memory_constraint = is_memory_constraint_v + or is_memory_constraint_schedule_v; static_assert(not use_memory_constraint or std::is_same_v, "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); @@ -75,28 +74,19 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } temp = std::log(temp) / 2 + max_priority; - double node_weight = std::log( - static_cast( - std::max( - graph.vertex_work_weight(*r_iter), - static_cast>(1) - ) - ) - ); + double node_weight + = std::log(static_cast(std::max(graph.vertex_work_weight(*r_iter), static_cast>(1)))); double larger_val = node_weight > temp ? node_weight : temp; - work_variance[*r_iter] = - std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; + work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; } return work_variance; } - std::vector>> - procTypesCompatibleWithNodeType_omit_procType(const BspInstance &instance) const { - - const std::vector> procTypesCompatibleWithNodeType = - instance.getProcTypesCompatibleWithNodeType(); + std::vector>> procTypesCompatibleWithNodeType_omit_procType( + const BspInstance &instance) const { + const std::vector> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); std::vector>> procTypesCompatibleWithNodeType_skip( instance.getArchitecture().getNumberOfProcessorTypes(), @@ -104,8 +94,9 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { for (unsigned nodeType = 0; nodeType < instance.getComputationalDag().num_vertex_types(); nodeType++) { for (unsigned otherProcType : procTypesCompatibleWithNodeType[nodeType]) { - if (procType == otherProcType) + if (procType == otherProcType) { continue; + } procTypesCompatibleWithNodeType_skip[procType][nodeType].emplace_back(otherProcType); } } @@ -124,38 +115,42 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { const std::vector, VarianceCompare>> &allReady, const std::vector, VarianceCompare>> &procReady, const std::vector &procFree) const { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (procFree[i] && !procReady[i].empty()) + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !procReady[i].empty()) { return true; + } + } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) { return true; + } + } return false; } void Choose(const BspInstance &instance, - const std::vector &work_variance, - std::vector, VarianceCompare>> &allReady, - std::vector, VarianceCompare>> &procReady, - const std::vector &procFree, - VertexType &node, unsigned &p, - const bool endSupStep, - const v_workw_t remaining_time, - const std::vector>> &procTypesCompatibleWithNodeType_skip_proctype) const - { + const std::vector &work_variance, + std::vector, VarianceCompare>> &allReady, + std::vector, VarianceCompare>> &procReady, + const std::vector &procFree, + VertexType &node, + unsigned &p, + const bool endSupStep, + const v_workw_t remaining_time, + const std::vector>> &procTypesCompatibleWithNodeType_skip_proctype) const { double maxScore = -1; bool found_allocation = false; for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - if (!procFree[i] || procReady[i].empty()) + if (!procFree[i] || procReady[i].empty()) { continue; + } auto it = procReady[i].begin(); while (it != procReady[i].end()) { - if (endSupStep && - (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { + if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { it = procReady[i].erase(it); continue; } @@ -172,15 +167,14 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { found_allocation = true; if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) { - const auto &compatibleTypes = - procTypesCompatibleWithNodeType_skip_proctype[procType] - [instance.getComputationalDag().vertex_type(node)]; + const auto &compatibleTypes + = procTypesCompatibleWithNodeType_skip_proctype[procType] + [instance.getComputationalDag().vertex_type(node)]; for (unsigned otherType : compatibleTypes) { for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) { - if (j != i && - instance.getArchitecture().processorType(j) == otherType && - j < procReady.size()) { + if (j != i && instance.getArchitecture().processorType(j) == otherType + && j < procReady.size()) { procReady[j].erase(std::make_pair(node, work_variance[node])); } } @@ -195,15 +189,13 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { found_allocation = true; if (procType < procTypesCompatibleWithNodeType_skip_proctype.size()) { - const auto &compatibleTypes = - procTypesCompatibleWithNodeType_skip_proctype[procType] - [instance.getComputationalDag().vertex_type(node)]; + const auto &compatibleTypes + = procTypesCompatibleWithNodeType_skip_proctype[procType] + [instance.getComputationalDag().vertex_type(node)]; for (unsigned otherType : compatibleTypes) { for (unsigned j = 0; j < instance.numberOfProcessors(); ++j) { - if (j != i && - instance.getArchitecture().processorType(j) == otherType && - j < procReady.size()) { + if (j != i && instance.getArchitecture().processorType(j) == otherType && j < procReady.size()) { procReady[j].erase(std::make_pair(node, work_variance[node])); } } @@ -218,20 +210,21 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } } - if (found_allocation) + if (found_allocation) { return; + } for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { const unsigned procType = instance.getArchitecture().processorType(i); - if (!procFree[i] || procType >= allReady.size() || allReady[procType].empty()) + if (!procFree[i] || procType >= allReady.size() || allReady[procType].empty()) { continue; + } auto &readyList = allReady[procType]; auto it = readyList.begin(); while (it != readyList.end()) { - if (endSupStep && - (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { + if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(it->first))) { it = readyList.erase(it); continue; } @@ -244,13 +237,14 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { node = it->first; p = i; - const auto &compatibleTypes = - procTypesCompatibleWithNodeType_skip_proctype[procType] - [instance.getComputationalDag().vertex_type(node)]; + const auto &compatibleTypes + = procTypesCompatibleWithNodeType_skip_proctype[procType] + [instance.getComputationalDag().vertex_type(node)]; for (unsigned otherType : compatibleTypes) { - if (otherType < allReady.size()) + if (otherType < allReady.size()) { allReady[otherType].erase(std::make_pair(node, work_variance[node])); + } } return; @@ -259,15 +253,16 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { node = it->first; p = i; - const auto &compatibleTypes = - procTypesCompatibleWithNodeType_skip_proctype[procType] - [instance.getComputationalDag().vertex_type(node)]; + const auto &compatibleTypes + = procTypesCompatibleWithNodeType_skip_proctype[procType] + [instance.getComputationalDag().vertex_type(node)]; for (unsigned otherType : compatibleTypes) { - if (otherType < allReady.size()) + if (otherType < allReady.size()) { allReady[otherType].erase(std::make_pair(node, work_variance[node])); + } } - + return; } } @@ -276,18 +271,13 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } }; - - bool check_mem_feasibility( - const BspInstance &instance, - const std::vector, VarianceCompare>> &allReady, - const std::vector, VarianceCompare>> &procReady) const - { + bool check_mem_feasibility(const BspInstance &instance, + const std::vector, VarianceCompare>> &allReady, + const std::vector, VarianceCompare>> &procReady) const { if constexpr (use_memory_constraint) { - if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) - { + if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { if (!procReady[i].empty()) { - const std::pair &node_pair = *procReady[i].begin(); VertexType top_node = node_pair.first; @@ -298,12 +288,11 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { } for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - - if (allReady[instance.getArchitecture().processorType(i)].empty()) + if (allReady[instance.getArchitecture().processorType(i)].empty()) { continue; + } - const std::pair &node_pair = - *allReady[instance.getArchitecture().processorType(i)].begin(); + const std::pair &node_pair = *allReady[instance.getArchitecture().processorType(i)].begin(); VertexType top_node = node_pair.first; if (memory_constraint.can_add(top_node, i)) { @@ -318,14 +307,12 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { return true; } - unsigned get_nr_parallelizable_nodes( - const BspInstance &instance, - const unsigned &stale, - const std::vector &nr_old_ready_nodes_per_type, - const std::vector &nr_ready_nodes_per_type, - const std::vector, VarianceCompare>> &procReady, - const std::vector &nr_procs_per_type) const - { + unsigned get_nr_parallelizable_nodes(const BspInstance &instance, + const unsigned &stale, + const std::vector &nr_old_ready_nodes_per_type, + const std::vector &nr_ready_nodes_per_type, + const std::vector, VarianceCompare>> &procReady, + const std::vector &nr_procs_per_type) const { unsigned nr_nodes = 0; unsigned num_proc_types = instance.getArchitecture().getNumberOfProcessorTypes(); @@ -348,8 +335,7 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { for (unsigned proc_type = 0; proc_type < num_proc_types; ++proc_type) { for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { if (instance.isCompatibleType(node_type, proc_type)) { - unsigned matched = std::min(ready_nodes_per_type[node_type], - procs_per_type[proc_type]); + unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); nr_nodes += matched; ready_nodes_per_type[node_type] -= matched; procs_per_type[proc_type] -= matched; @@ -360,18 +346,17 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { return nr_nodes; } - public: - + public: /** - * @brief Default constructor for GreedyVarianceSspScheduler. - */ + * @brief Default constructor for GreedyVarianceSspScheduler. + */ GreedyVarianceSspScheduler(float max_percent_idle_processors_ = 0.2f, bool increase_parallelism_in_new_superstep_ = true) : max_percent_idle_processors(max_percent_idle_processors_), increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_) {} /** - * @brief Default destructor for GreedyVarianceSspScheduler. - */ + * @brief Default destructor for GreedyVarianceSspScheduler. + */ virtual ~GreedyVarianceSspScheduler() = default; RETURN_STATUS computeSspSchedule(BspSchedule &schedule, unsigned stale) { @@ -398,12 +383,11 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { instance.getArchitecture().getNumberOfProcessorTypes()); const auto procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); - const std::vector>> procTypesCompatibleWithNodeType_skip_proctype = - procTypesCompatibleWithNodeType_omit_procType(instance); + const std::vector>> procTypesCompatibleWithNodeType_skip_proctype + = procTypesCompatibleWithNodeType_omit_procType(instance); std::vector nr_old_ready_nodes_per_type(G.num_vertex_types(), 0); - std::vector> nr_ready_stale_nodes_per_type( - stale, std::vector(G.num_vertex_types(), 0)); + std::vector> nr_ready_stale_nodes_per_type(stale, std::vector(G.num_vertex_types(), 0)); std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); for (auto proc = 0u; proc < P; ++proc) { ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; @@ -428,8 +412,10 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { std::set, VertexType>> finishTimes; finishTimes.emplace(0, std::numeric_limits::max()); - std::vector number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - std::vector limit_of_number_of_allocated_allReady_tasks_in_superstep(instance.getArchitecture().getNumberOfProcessorTypes(), 0); + std::vector number_of_allocated_allReady_tasks_in_superstep( + instance.getArchitecture().getNumberOfProcessorTypes(), 0); + std::vector limit_of_number_of_allocated_allReady_tasks_in_superstep( + instance.getArchitecture().getNumberOfProcessorTypes(), 0); bool endSupStep = true; bool begin_outer_while = true; @@ -437,17 +423,21 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { unsigned successive_empty_supersteps = 0u; auto nonempty_ready = [&]() { - return std::any_of(ready.cbegin(), ready.cend(), - [](const std::set, VarianceCompare>& ready_set) { return !ready_set.empty(); }); + return std::any_of( + ready.cbegin(), ready.cend(), [](const std::set, VarianceCompare> &ready_set) { + return !ready_set.empty(); + }); }; while (!old_ready.empty() || nonempty_ready() || !finishTimes.empty()) { if (finishTimes.empty() && endSupStep) { able_to_schedule_in_step = false; - number_of_allocated_allReady_tasks_in_superstep = std::vector(instance.getArchitecture().getNumberOfProcessorTypes(), 0); + number_of_allocated_allReady_tasks_in_superstep + = std::vector(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (unsigned i = 0; i < P; ++i) + for (unsigned i = 0; i < P; ++i) { procReady[supstepIdx % stale][i].clear(); + } if (!begin_outer_while) { supstepIdx++; @@ -455,8 +445,9 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { begin_outer_while = false; } - for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType) + for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType) { allReady[procType].clear(); + } old_ready.insert(ready[supstepIdx % stale].begin(), ready[supstepIdx % stale].end()); ready[supstepIdx % stale].clear(); @@ -474,17 +465,19 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { if constexpr (use_memory_constraint) { if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::LOCAL) { - for (unsigned proc = 0; proc < P; proc++) + for (unsigned proc = 0; proc < P; proc++) { memory_constraint.reset(proc); + } } } for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { unsigned equal_split = (static_cast(allReady[procType].size()) + stale - 1) / stale; unsigned at_least_for_long_step = 3 * nr_procs_per_type[procType]; - limit_of_number_of_allocated_allReady_tasks_in_superstep[procType] = std::max(at_least_for_long_step, equal_split); + limit_of_number_of_allocated_allReady_tasks_in_superstep[procType] + = std::max(at_least_for_long_step, equal_split); } - + endSupStep = false; finishTimes.emplace(0, std::numeric_limits::max()); } @@ -500,14 +493,14 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { if (node != std::numeric_limits::max()) { const unsigned proc_of_node = schedule.assignedProcessor(node); - for (const auto& succ : G.children(node)) { + for (const auto &succ : G.children(node)) { nrPredecRemain[succ]--; if (nrPredecRemain[succ] == 0) { ready[supstepIdx % stale].emplace(succ, work_variances[succ]); nr_ready_stale_nodes_per_type[supstepIdx % stale][G.vertex_type(succ)]++; unsigned earliest_add = supstepIdx; - for (const auto& pred : G.parents(succ)) { + for (const auto &pred : G.parents(succ)) { if (schedule.assignedProcessor(pred) != proc_of_node) { earliest_add = std::max(earliest_add, stale + schedule.assignedSuperstep(pred)); } @@ -521,13 +514,11 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { memory_ok = memory_constraint.can_add(succ, proc_of_node); } } - for (unsigned step_to_add = earliest_add; - step_to_add < supstepIdx + stale; ++step_to_add) { + for (unsigned step_to_add = earliest_add; step_to_add < supstepIdx + stale; ++step_to_add) { if ((step_to_add == supstepIdx) && !memory_ok) { - continue; + continue; } - procReady[step_to_add % stale][proc_of_node].emplace( - succ, work_variances[succ]); + procReady[step_to_add % stale][proc_of_node].emplace(succ, work_variances[succ]); } } } @@ -547,29 +538,37 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { VertexType nextNode = std::numeric_limits::max(); unsigned nextProc = P; - Choose( instance, work_variances, allReady, - procReady[supstepIdx % stale], procFree, - nextNode, nextProc, endSupStep, max_finish_time - time, procTypesCompatibleWithNodeType_skip_proctype); + Choose(instance, + work_variances, + allReady, + procReady[supstepIdx % stale], + procFree, + nextNode, + nextProc, + endSupStep, + max_finish_time - time, + procTypesCompatibleWithNodeType_skip_proctype); if (nextNode == std::numeric_limits::max() || nextProc == P) { endSupStep = true; break; } - if (procReady[supstepIdx % stale][nextProc].find(std::make_pair(nextNode, work_variances[nextNode])) != - procReady[supstepIdx % stale][nextProc].end()) { + if (procReady[supstepIdx % stale][nextProc].find(std::make_pair(nextNode, work_variances[nextNode])) + != procReady[supstepIdx % stale][nextProc].end()) { for (size_t i = 0; i < stale; i++) { procReady[i][nextProc].erase(std::make_pair(nextNode, work_variances[nextNode])); } } else { - for(unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(nextNode)]) { + for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(nextNode)]) { allReady[procType].erase(std::make_pair(nextNode, work_variances[nextNode])); } nr_old_ready_nodes_per_type[G.vertex_type(nextNode)]--; const unsigned nextProcType = instance.getArchitecture().processorType(nextProc); number_of_allocated_allReady_tasks_in_superstep[nextProcType]++; - - if (number_of_allocated_allReady_tasks_in_superstep[nextProcType] >= limit_of_number_of_allocated_allReady_tasks_in_superstep[nextProcType]) { + + if (number_of_allocated_allReady_tasks_in_superstep[nextProcType] + >= limit_of_number_of_allocated_allReady_tasks_in_superstep[nextProcType]) { allReady[nextProcType].clear(); } } @@ -603,38 +602,35 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { --free; } - if (able_to_schedule_in_step) + if (able_to_schedule_in_step) { successive_empty_supersteps = 0; - else if (++successive_empty_supersteps > 100 + stale) + } else if (++successive_empty_supersteps > 100 + stale) { return RETURN_STATUS::ERROR; + } - if (free > (P * max_percent_idle_processors) && - ((!increase_parallelism_in_new_superstep) || - get_nr_parallelizable_nodes( - instance, stale, nr_old_ready_nodes_per_type, - nr_ready_stale_nodes_per_type[(supstepIdx + 1) % stale], - procReady[(supstepIdx + 1) % stale], - nr_procs_per_type) >= std::min( - std::min(P, static_cast(1.2 * (P - free))), - P - free + static_cast(0.5 * free)))) - { + if (free > (P * max_percent_idle_processors) + && ((!increase_parallelism_in_new_superstep) + || get_nr_parallelizable_nodes(instance, + stale, + nr_old_ready_nodes_per_type, + nr_ready_stale_nodes_per_type[(supstepIdx + 1) % stale], + procReady[(supstepIdx + 1) % stale], + nr_procs_per_type) + >= std::min(std::min(P, static_cast(1.2 * (P - free))), + P - free + static_cast(0.5 * free)))) { endSupStep = true; } } assert(schedule.satisfiesPrecedenceConstraints()); - //schedule.setAutoCommunicationSchedule(); + // schedule.setAutoCommunicationSchedule(); return RETURN_STATUS::OSP_SUCCESS; } - RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - return computeSspSchedule(schedule, 1U); - } + RETURN_STATUS computeSchedule(BspSchedule &schedule) override { return computeSspSchedule(schedule, 1U); } - RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) override { - return computeSspSchedule(schedule, 2U); - } + RETURN_STATUS computeSchedule(MaxBspSchedule &schedule) override { return computeSspSchedule(schedule, 2U); } std::string getScheduleName() const override { if constexpr (use_memory_constraint) { @@ -643,7 +639,6 @@ class GreedyVarianceSspScheduler : public MaxBspScheduler { return "GreedyVarianceSsp"; } } - }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp index 241aef58..981b6dfa 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp @@ -21,10 +21,10 @@ limitations under the License. #include #include #include -#include #include -#include +#include #include +#include #include #include "MemoryConstraintModules.hpp" @@ -34,7 +34,7 @@ limitations under the License. namespace osp { -template +template struct GrowLocalAutoCores_Params { unsigned minSuperstepSize = 20; weight_t syncCostMultiplierMinSuperstepWeight = 1; @@ -50,31 +50,30 @@ struct GrowLocalAutoCores_Params { * The getScheduleName() method returns the name of the schedule, which is "GreedyBspGrowLocalAutoCores" in this * case. */ -template +template class GrowLocalAutoCores : public Scheduler { - private: GrowLocalAutoCores_Params> params; - constexpr static bool use_memory_constraint = - is_memory_constraint_v or is_memory_constraint_schedule_v; + constexpr static bool use_memory_constraint = is_memory_constraint_v + or is_memory_constraint_schedule_v; static_assert(not use_memory_constraint or std::is_same_v, "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); - static_assert(not use_memory_constraint or not (std::is_same_v> or std::is_same_v>), - "MemoryConstraint_t must not be persistent_transient_memory_constraint or global_memory_constraint. Not supported in GrowLocalAutoCores."); - + static_assert(not use_memory_constraint + or not(std::is_same_v> + or std::is_same_v>), + "MemoryConstraint_t must not be persistent_transient_memory_constraint or global_memory_constraint. Not " + "supported in GrowLocalAutoCores."); MemoryConstraint_t local_memory_constraint; - public: /** * @brief Default constructor for GreedyBspGrowLocalAutoCores. */ - GrowLocalAutoCores( - GrowLocalAutoCores_Params> params_ = GrowLocalAutoCores_Params>()) + GrowLocalAutoCores(GrowLocalAutoCores_Params> params_ = GrowLocalAutoCores_Params>()) : params(params_) {} /** @@ -91,7 +90,6 @@ class GrowLocalAutoCores : public Scheduler { * @return A pair containing the return status and the computed BspSchedule. */ virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - using vertex_idx = typename Graph_t::vertex_idx; const auto &instance = schedule.getInstance(); @@ -115,7 +113,7 @@ class GrowLocalAutoCores : public Scheduler { const unsigned P = instance.numberOfProcessors(); const auto &G = instance.getComputationalDag(); - std::unordered_set ready; + std::unordered_set ready; std::vector allReady; std::vector> procReady(P); @@ -136,16 +134,13 @@ class GrowLocalAutoCores : public Scheduler { std::vector new_ready; std::vector best_new_ready; - const v_workw_t minWeightParallelCheck = - params.syncCostMultiplierParallelCheck * instance.synchronisationCosts(); - const v_workw_t minSuperstepWeight = - params.syncCostMultiplierMinSuperstepWeight * instance.synchronisationCosts(); + const v_workw_t minWeightParallelCheck = params.syncCostMultiplierParallelCheck * instance.synchronisationCosts(); + const v_workw_t minSuperstepWeight = params.syncCostMultiplierMinSuperstepWeight * instance.synchronisationCosts(); double desiredParallelism = static_cast(P); vertex_idx total_assigned = 0; while (total_assigned < N) { - unsigned limit = params.minSuperstepSize; double best_score = 0; double best_parallelism = 0; @@ -153,7 +148,6 @@ class GrowLocalAutoCores : public Scheduler { bool continueSuperstepAttempts = true; while (continueSuperstepAttempts) { - for (unsigned p = 0; p < P; p++) { new_assignments[p].clear(); procReady[p].clear(); @@ -295,8 +289,8 @@ class GrowLocalAutoCores : public Scheduler { bool accept_step = false; - double score = static_cast(total_weight_assigned) / - static_cast(weight_limit + instance.synchronisationCosts()); + double score = static_cast(total_weight_assigned) + / static_cast(weight_limit + instance.synchronisationCosts()); double parallelism = 0; if (weight_limit > 0) { parallelism = static_cast(total_weight_assigned) / static_cast(weight_limit); @@ -377,8 +371,8 @@ class GrowLocalAutoCores : public Scheduler { } } - desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism) + - (0.1 * static_cast(P)); // weights should sum up to one + desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism) + + (0.1 * static_cast(P)); // weights should sum up to one ++supstep; } @@ -398,4 +392,4 @@ class GrowLocalAutoCores : public Scheduler { virtual std::string getScheduleName() const override { return "GrowLocalAutoCores"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp index b5b4ea95..5569195d 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCoresParallel.hpp @@ -18,6 +18,8 @@ limitations under the License. #pragma once +#include + #include #include #include @@ -25,12 +27,10 @@ limitations under the License. #include #include -#include - // #define TIME_THREADS_GROW_LOCAL_PARALLEL #ifdef TIME_THREADS_GROW_LOCAL_PARALLEL -#include -#include +# include +# include #endif #include "osp/auxiliary/misc.hpp" @@ -41,14 +41,14 @@ namespace osp { static constexpr unsigned CacheLineSize = 64; -template +template struct GrowLocalAutoCoresParallel_Params { vert_t minSuperstepSize = 20; weight_t syncCostMultiplierMinSuperstepWeight = 1; weight_t syncCostMultiplierParallelCheck = 4; - unsigned numThreads = 0; // 0 for auto - unsigned maxNumThreads = UINT_MAX; // used when auto num threads + unsigned numThreads = 0; // 0 for auto + unsigned maxNumThreads = UINT_MAX; // used when auto num threads }; /** @@ -60,7 +60,7 @@ struct GrowLocalAutoCoresParallel_Params { * The getScheduleName() method returns the name of the schedule, which is "GrowLocalAutoCoresParallel" in this * case. */ -template +template class GrowLocalAutoCoresParallel : public Scheduler { static_assert(is_directed_graph_v); static_assert(has_vertex_weights_v); @@ -74,8 +74,8 @@ class GrowLocalAutoCoresParallel : public Scheduler { /** * @brief Default constructor for GrowLocalAutoCoresParallel. */ - GrowLocalAutoCoresParallel( - GrowLocalAutoCoresParallel_Params, v_workw_t> params_ = GrowLocalAutoCoresParallel_Params, v_workw_t>()) + GrowLocalAutoCoresParallel(GrowLocalAutoCoresParallel_Params, v_workw_t> params_ + = GrowLocalAutoCoresParallel_Params, v_workw_t>()) : params(params_) {} /** @@ -83,8 +83,12 @@ class GrowLocalAutoCoresParallel : public Scheduler { */ virtual ~GrowLocalAutoCoresParallel() = default; - void computePartialSchedule(BspSchedule &schedule, const std::vector &topOrder, const std::vector &posInTopOrder, const VertexType startNode, const VertexType endNode, unsigned &supstep) const { - + void computePartialSchedule(BspSchedule &schedule, + const std::vector &topOrder, + const std::vector &posInTopOrder, + const VertexType startNode, + const VertexType endNode, + unsigned &supstep) const { #ifdef TIME_THREADS_GROW_LOCAL_PARALLEL double startTime = omp_get_wtime(); #endif @@ -255,8 +259,9 @@ class GrowLocalAutoCoresParallel : public Scheduler { } else if (readyIter != ready.end()) { chosen_node = *readyIter; readyIter++; - } else + } else { break; + } new_assignments[proc].push_back(chosen_node); schedule.setAssignedProcessor(chosen_node, proc); @@ -310,13 +315,15 @@ class GrowLocalAutoCoresParallel : public Scheduler { bool accept_step = false; - double score = static_cast(total_weight_assigned) / static_cast(weight_limit + instance.synchronisationCosts()); + double score = static_cast(total_weight_assigned) + / static_cast(weight_limit + instance.synchronisationCosts()); double parallelism = 0; if (weight_limit > 0) { parallelism = static_cast(total_weight_assigned) / static_cast(weight_limit); } - if (score > 0.97 * best_score) { // It is possible to make this less strict, i.e. score > 0.98 * best_score. The purpose of this would be to encourage larger supersteps. + if (score > 0.97 * best_score) { // It is possible to make this less strict, i.e. score > 0.98 * best_score. + // The purpose of this would be to encourage larger supersteps. best_score = std::max(best_score, score); best_parallelism = parallelism; accept_step = true; @@ -457,7 +464,8 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } - desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism) + (0.1 * static_cast(P)); // weights should sum up to one + desiredParallelism = (0.3 * desiredParallelism) + (0.6 * best_parallelism) + + (0.1 * static_cast(P)); // weights should sum up to one ++supstep; } @@ -468,17 +476,26 @@ class GrowLocalAutoCoresParallel : public Scheduler { if (omp_get_thread_num() < 10) { padd = " "; } - std::string outputString = "Thread: " + padd + std::to_string(omp_get_thread_num()) + "\t Time: " + std::to_string(endTime - startTime) + "\n"; + std::string outputString + = "Thread: " + padd + std::to_string(omp_get_thread_num()) + "\t Time: " + std::to_string(endTime - startTime) + "\n"; std::cout << outputString; #endif } - void incrementScheduleSupersteps(BspSchedule &schedule, const VertexType startNode, const VertexType endNode, const unsigned incr) const { + void incrementScheduleSupersteps(BspSchedule &schedule, + const VertexType startNode, + const VertexType endNode, + const unsigned incr) const { for (VertexType node = startNode; node < endNode; node++) { schedule.setAssignedSuperstepNoUpdateNumSuperstep(node, schedule.assignedSuperstep(node) + incr); } } - void incrementScheduleSupersteps_TopOrder(BspSchedule &schedule, const std::vector &topOrder, const VertexType startIndex, const VertexType endIndex, const unsigned incr) const { + + void incrementScheduleSupersteps_TopOrder(BspSchedule &schedule, + const std::vector &topOrder, + const VertexType startIndex, + const VertexType endIndex, + const unsigned incr) const { for (VertexType index = startIndex; index < endIndex; index++) { const VertexType node = topOrder[index]; schedule.setAssignedSuperstepNoUpdateNumSuperstep(node, schedule.assignedSuperstep(node) + incr); @@ -486,7 +503,6 @@ class GrowLocalAutoCoresParallel : public Scheduler { } RETURN_STATUS computeScheduleParallel(BspSchedule &schedule, unsigned int numThreads) const { - const BspInstance &instance = schedule.getInstance(); const Graph_t &graph = instance.getComputationalDag(); @@ -524,11 +540,13 @@ class GrowLocalAutoCoresParallel : public Scheduler { } } -#pragma omp parallel num_threads(numThreads) default(none) shared(schedule, topOrder, posInTopOrder, superstepsThread, supstepIncr, numThreads, startNodes, incr) +#pragma omp parallel num_threads(numThreads) default(none) \ + shared(schedule, topOrder, posInTopOrder, superstepsThread, supstepIncr, numThreads, startNodes, incr) { #pragma omp for schedule(static, 1) for (unsigned thr = 0; thr < numThreads; thr++) { - computePartialSchedule(schedule, topOrder, posInTopOrder, startNodes[thr], startNodes[thr + 1], superstepsThread[thr * UnsignedPadding]); + computePartialSchedule( + schedule, topOrder, posInTopOrder, startNodes[thr], startNodes[thr + 1], superstepsThread[thr * UnsignedPadding]); } #pragma omp master @@ -547,7 +565,8 @@ class GrowLocalAutoCoresParallel : public Scheduler { if constexpr (has_vertices_in_top_order_v) { incrementScheduleSupersteps(schedule, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]); } else { - incrementScheduleSupersteps_TopOrder(schedule, topOrder, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]); + incrementScheduleSupersteps_TopOrder( + schedule, topOrder, startNodes[thr], startNodes[thr + 1], supstepIncr[thr]); } } } @@ -566,11 +585,11 @@ class GrowLocalAutoCoresParallel : public Scheduler { * @return A pair containing the return status and the computed BspSchedule. */ virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - unsigned numThreads = params.numThreads; if (numThreads == 0) { // numThreads = static_cast(std::sqrt( static_cast((schedule.getInstance().numberOfVertices() / 1000000)))) + 1; - numThreads = static_cast(std::log2(static_cast((schedule.getInstance().numberOfVertices() / 1000)))) + 1; + numThreads + = static_cast(std::log2(static_cast((schedule.getInstance().numberOfVertices() / 1000)))) + 1; } numThreads = std::min(numThreads, params.maxNumThreads); if (numThreads == 0) { @@ -590,4 +609,4 @@ class GrowLocalAutoCoresParallel : public Scheduler { virtual std::string getScheduleName() const override { return "GrowLocalAutoCoresParallel"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp b/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp index ce1f0503..f56ed2f2 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/MemoryConstraintModules.hpp @@ -29,19 +29,19 @@ namespace osp { * This trait checks if a type has the required methods for a memory constraint. * */ -template +template struct is_memory_constraint : std::false_type {}; -template +template struct is_memory_constraint< - T, std::void_t().initialize(std::declval>())), - decltype(std::declval().can_add(std::declval>(), - std::declval())), - decltype(std::declval().add(std::declval>(), - std::declval())), - decltype(std::declval().reset(std::declval())), decltype(T())>> : std::true_type {}; - -template + T, + std::void_t().initialize(std::declval>())), + decltype(std::declval().can_add(std::declval>(), std::declval())), + decltype(std::declval().add(std::declval>(), std::declval())), + decltype(std::declval().reset(std::declval())), + decltype(T())>> : std::true_type {}; + +template inline constexpr bool is_memory_constraint_v = is_memory_constraint::value; /** @@ -57,9 +57,8 @@ struct no_memory_constraint { * * @tparam Graph_t The graph type. */ -template +template struct local_memory_constraint { - using Graph_impl_t = Graph_t; const BspInstance *instance; @@ -78,20 +77,19 @@ struct local_memory_constraint { } inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) <= - instance->getArchitecture().memoryBound(proc); + return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) + <= instance->getArchitecture().memoryBound(proc); } inline void add(const vertex_idx_t &v, const unsigned proc) { current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v); } - inline bool can_add(const unsigned proc, const v_memw_t &custom_mem_weight, - const v_memw_t&) const { + inline bool can_add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_memw_t &) const { return current_proc_memory[proc] + custom_mem_weight <= instance->getArchitecture().memoryBound(proc); } - inline void add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_memw_t&) { + inline void add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_memw_t &) { current_proc_memory[proc] += custom_mem_weight; } @@ -109,12 +107,10 @@ struct local_memory_constraint { * * @tparam Graph_t The graph type. */ -template +template struct persistent_transient_memory_constraint { - - static_assert( - std::is_convertible_v, v_memw_t>, - "persistent_transient_memory_constraint requires that memory and communication weights are convertible."); + static_assert(std::is_convertible_v, v_memw_t>, + "persistent_transient_memory_constraint requires that memory and communication weights are convertible."); using Graph_impl_t = Graph_t; @@ -137,31 +133,26 @@ struct persistent_transient_memory_constraint { } inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - - return ( - current_proc_persistent_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) + - std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v)) <= - instance->getArchitecture().memoryBound(proc)); + return (current_proc_persistent_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) + + std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v)) + <= instance->getArchitecture().memoryBound(proc)); } inline void add(const vertex_idx_t &v, const unsigned proc) { - current_proc_persistent_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v); - current_proc_transient_memory[proc] = - std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v)); + current_proc_transient_memory[proc] + = std::max(current_proc_transient_memory[proc], instance->getComputationalDag().vertex_comm_weight(v)); } - inline bool can_add(const unsigned proc, const v_memw_t &custom_mem_weight, + inline bool can_add(const unsigned proc, + const v_memw_t &custom_mem_weight, const v_commw_t &custom_comm_weight) const { - - return (current_proc_persistent_memory[proc] + custom_mem_weight + - std::max(current_proc_transient_memory[proc], custom_comm_weight) <= - instance->getArchitecture().memoryBound(proc)); + return (current_proc_persistent_memory[proc] + custom_mem_weight + + std::max(current_proc_transient_memory[proc], custom_comm_weight) + <= instance->getArchitecture().memoryBound(proc)); } - inline void add(const unsigned proc, const v_memw_t &custom_mem_weight, - const v_commw_t &custom_comm_weight ) { - + inline void add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_commw_t &custom_comm_weight) { current_proc_persistent_memory[proc] += custom_mem_weight; current_proc_transient_memory[proc] = std::max(current_proc_transient_memory[proc], custom_comm_weight); } @@ -169,9 +160,8 @@ struct persistent_transient_memory_constraint { inline void reset(const unsigned) {} }; -template +template struct global_memory_constraint { - using Graph_impl_t = Graph_t; const BspInstance *instance; @@ -190,16 +180,15 @@ struct global_memory_constraint { } inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) <= - instance->getArchitecture().memoryBound(proc); + return current_proc_memory[proc] + instance->getComputationalDag().vertex_mem_weight(v) + <= instance->getArchitecture().memoryBound(proc); } inline void add(const vertex_idx_t &v, const unsigned proc) { current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v); } - inline bool can_add(const unsigned proc, const v_memw_t &custom_mem_weight, - const v_commw_t &) const { + inline bool can_add(const unsigned proc, const v_memw_t &custom_mem_weight, const v_commw_t &) const { return current_proc_memory[proc] + custom_mem_weight <= instance->getArchitecture().memoryBound(proc); } @@ -210,25 +199,23 @@ struct global_memory_constraint { inline void reset(const unsigned) {} }; -template +template struct is_memory_constraint_schedule : std::false_type {}; -template +template struct is_memory_constraint_schedule< - T, std::void_t().initialize(std::declval>(), - std::declval())), - decltype(std::declval().can_add(std::declval>(), - std::declval())), - decltype(std::declval().add(std::declval>(), - std::declval())), - decltype(std::declval().reset(std::declval())), decltype(T())>> : std::true_type {}; - -template + T, + std::void_t().initialize(std::declval>(), std::declval())), + decltype(std::declval().can_add(std::declval>(), std::declval())), + decltype(std::declval().add(std::declval>(), std::declval())), + decltype(std::declval().reset(std::declval())), + decltype(T())>> : std::true_type {}; + +template inline constexpr bool is_memory_constraint_schedule_v = is_memory_constraint_schedule::value; -template +template struct local_in_out_memory_constraint { - static_assert(std::is_convertible_v, v_memw_t>, "local_in_out_memory_constraint requires that memory and communication weights are convertible."); @@ -255,14 +242,12 @@ struct local_in_out_memory_constraint { } inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - - v_memw_t inc_memory = instance->getComputationalDag().vertex_mem_weight(v) + - instance->getComputationalDag().vertex_comm_weight(v); + v_memw_t inc_memory + = instance->getComputationalDag().vertex_mem_weight(v) + instance->getComputationalDag().vertex_comm_weight(v); for (const auto &pred : instance->getComputationalDag().parents(v)) { - - if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v) && - schedule->assignedSuperstep(pred) == *current_superstep) { + if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v) + && schedule->assignedSuperstep(pred) == *current_superstep) { inc_memory -= instance->getComputationalDag().vertex_comm_weight(pred); } } @@ -271,14 +256,12 @@ struct local_in_out_memory_constraint { } inline void add(const vertex_idx_t &v, const unsigned proc) { - - current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v) + - instance->getComputationalDag().vertex_comm_weight(v); + current_proc_memory[proc] + += instance->getComputationalDag().vertex_mem_weight(v) + instance->getComputationalDag().vertex_comm_weight(v); for (const auto &pred : instance->getComputationalDag().parents(v)) { - - if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v) && - schedule->assignedSuperstep(pred) == *current_superstep) { + if (schedule->assignedProcessor(pred) == schedule->assignedProcessor(v) + && schedule->assignedSuperstep(pred) == *current_superstep) { current_proc_memory[proc] -= instance->getComputationalDag().vertex_comm_weight(pred); } } @@ -287,9 +270,8 @@ struct local_in_out_memory_constraint { inline void reset(const unsigned proc) { current_proc_memory[proc] = 0; } }; -template +template struct local_inc_edges_memory_constraint { - using Graph_impl_t = Graph_t; const BspInstance *instance; @@ -316,13 +298,11 @@ struct local_inc_edges_memory_constraint { } inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - v_commw_t inc_memory = instance->getComputationalDag().vertex_comm_weight(v); for (const auto &pred : instance->getComputationalDag().parents(v)) { - - if (schedule->assignedSuperstep(pred) != *current_superstep && - current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) { + if (schedule->assignedSuperstep(pred) != *current_superstep + && current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) { inc_memory += instance->getComputationalDag().vertex_comm_weight(pred); } } @@ -331,11 +311,9 @@ struct local_inc_edges_memory_constraint { } inline void add(const vertex_idx_t &v, const unsigned proc) { - current_proc_memory[proc] += instance->getComputationalDag().vertex_comm_weight(v); for (const auto &pred : instance->getComputationalDag().parents(v)) { - if (schedule->assignedSuperstep(pred) != *current_superstep) { const auto pair = current_proc_predec[proc].insert(pred); if (pair.second) { @@ -351,12 +329,10 @@ struct local_inc_edges_memory_constraint { } }; -template +template struct local_sources_inc_edges_memory_constraint { - - static_assert( - std::is_convertible_v, v_memw_t>, - "local_sources_inc_edges_memory_constraint requires that memory and communication weights are convertible."); + static_assert(std::is_convertible_v, v_memw_t>, + "local_sources_inc_edges_memory_constraint requires that memory and communication weights are convertible."); using Graph_impl_t = Graph_t; @@ -384,7 +360,6 @@ struct local_sources_inc_edges_memory_constraint { } inline bool can_add(const vertex_idx_t &v, const unsigned proc) const { - v_memw_t inc_memory = 0; if (is_source(v, instance->getComputationalDag())) { @@ -392,9 +367,8 @@ struct local_sources_inc_edges_memory_constraint { } for (const auto &pred : instance->getComputationalDag().parents(v)) { - - if (schedule->assignedSuperstep(v) != *current_superstep && - current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) { + if (schedule->assignedSuperstep(v) != *current_superstep + && current_proc_predec[proc].find(pred) == current_proc_predec[proc].end()) { inc_memory += instance->getComputationalDag().vertex_comm_weight(pred); } } @@ -403,13 +377,11 @@ struct local_sources_inc_edges_memory_constraint { } inline void add(const vertex_idx_t &v, const unsigned proc) { - if (is_source(v, instance->getComputationalDag())) { current_proc_memory[proc] += instance->getComputationalDag().vertex_mem_weight(v); } for (const auto &pred : instance->getComputationalDag().parents(v)) { - if (schedule->assignedSuperstep(pred) != *current_superstep) { const auto pair = current_proc_predec[proc].insert(pred); if (pair.second) { @@ -425,4 +397,4 @@ struct local_sources_inc_edges_memory_constraint { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp b/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp index bf7e160c..795290fd 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp @@ -28,20 +28,17 @@ limitations under the License. namespace osp { -template +template class RandomGreedy : public Scheduler { - static_assert(is_computational_dag_v, "RandomGreedy can only be used with computational DAGs."); private: bool ensure_enough_sources; public: - RandomGreedy(bool ensure_enough_sources_ = true) : Scheduler(), ensure_enough_sources(ensure_enough_sources_) {}; RETURN_STATUS computeSchedule(BspSchedule &sched) override { - using VertexType = vertex_idx_t; const auto &instance = sched.getInstance(); @@ -72,7 +69,6 @@ class RandomGreedy : public Scheduler { bool few_sources = next.size() < instance.numberOfProcessors() ? true : false; unsigned fail_counter = 0; while (!next.empty() && fail_counter < 20) { - std::uniform_int_distribution rand_node_idx(0, next.size() - 1); VertexType node_ind = rand_node_idx(g); const auto &node = next[node_ind]; @@ -81,14 +77,12 @@ class RandomGreedy : public Scheduler { unsigned processor_to_be_allocated = 0; for (const auto &par : graph.parents(node)) { - if (processor_set && - (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend()) && - (sched.assignedProcessor(par) != processor_to_be_allocated)) { + if (processor_set && (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend()) + && (sched.assignedProcessor(par) != processor_to_be_allocated)) { failed_to_allocate = true; break; } - if ((!processor_set) && - (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend())) { + if ((!processor_set) && (nodes_assigned_this_superstep.find(par) != nodes_assigned_this_superstep.cend())) { processor_set = true; processor_to_be_allocated = sched.assignedProcessor(par); } @@ -108,8 +102,7 @@ class RandomGreedy : public Scheduler { assert(std::distance(processor_weights.begin(), min_iter) >= 0); - sched.setAssignedProcessor( - node, static_cast(std::distance(processor_weights.begin(), min_iter))); + sched.setAssignedProcessor(node, static_cast(std::distance(processor_weights.begin(), min_iter))); } nodes_assigned_this_superstep.emplace(node); @@ -127,8 +120,9 @@ class RandomGreedy : public Scheduler { next.erase(it); next.insert(next.end(), new_nodes.cbegin(), new_nodes.cend()); - if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors()) + if (ensure_enough_sources && few_sources && next.size() >= instance.numberOfProcessors()) { break; + } } superstep_counter++; @@ -140,4 +134,4 @@ class RandomGreedy : public Scheduler { std::string getScheduleName() const override { return ensure_enough_sources ? "RandomGreedyS" : "RandomGreedy"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp b/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp index ff245d1d..91541ca6 100644 --- a/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp +++ b/include/osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp @@ -45,16 +45,15 @@ namespace osp { * The computeSchedule() method computes a schedule for a given BspInstance using a greedy algorithm. * The getScheduleName() method returns the name of the schedule, which is "BspGreedy" in this case. */ -template +template class VarianceFillup : public Scheduler { - static_assert(is_computational_dag_v, "VarianceFillup can only be used with computational DAGs."); private: using VertexType = vertex_idx_t; - constexpr static bool use_memory_constraint = - is_memory_constraint_v or is_memory_constraint_schedule_v; + constexpr static bool use_memory_constraint = is_memory_constraint_v + or is_memory_constraint_schedule_v; static_assert(not use_memory_constraint or std::is_same_v, "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); @@ -65,7 +64,6 @@ class VarianceFillup : public Scheduler { bool increase_parallelism_in_new_superstep; std::vector compute_work_variance(const Graph_t &graph) const { - std::vector work_variance(graph.num_vertices(), 0.0); const std::vector top_order = GetTopOrder(graph); @@ -81,21 +79,19 @@ class VarianceFillup : public Scheduler { } temp = std::log(temp) / 2 + max_priority; - double node_weight = std::log( static_cast( std::max(graph.vertex_work_weight(*r_iter), static_cast>(1)) ) ); + double node_weight + = std::log(static_cast(std::max(graph.vertex_work_weight(*r_iter), static_cast>(1)))); double larger_val = node_weight > temp ? node_weight : temp; - work_variance[*r_iter] = - std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; + work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; } return work_variance; } - std::vector>> - procTypesCompatibleWithNodeType_omit_procType(const BspInstance &instance) const { - - const std::vector> procTypesCompatibleWithNodeType = - instance.getProcTypesCompatibleWithNodeType(); + std::vector>> procTypesCompatibleWithNodeType_omit_procType( + const BspInstance &instance) const { + const std::vector> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); std::vector>> procTypesCompatibleWithNodeType_skip( instance.getArchitecture().getNumberOfProcessorTypes(), @@ -103,8 +99,9 @@ class VarianceFillup : public Scheduler { for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); procType++) { for (unsigned nodeType = 0; nodeType < instance.getComputationalDag().num_vertex_types(); nodeType++) { for (unsigned otherProcType : procTypesCompatibleWithNodeType[nodeType]) { - if (procType == otherProcType) + if (procType == otherProcType) { continue; + } procTypesCompatibleWithNodeType_skip[procType][nodeType].emplace_back(otherProcType); } } @@ -119,17 +116,13 @@ class VarianceFillup : public Scheduler { } }; - bool check_mem_feasibility( - const BspInstance &instance, - const std::vector, VarianceCompare>> &allReady, - const std::vector, VarianceCompare>> &procReady) const { - + bool check_mem_feasibility(const BspInstance &instance, + const std::vector, VarianceCompare>> &allReady, + const std::vector, VarianceCompare>> &procReady) const { if constexpr (use_memory_constraint) { if (instance.getArchitecture().getMemoryConstraintType() == MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT) { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { if (!procReady[i].empty()) { - const std::pair &node_pair = *procReady[i].begin(); VertexType top_node = node_pair.first; @@ -140,12 +133,11 @@ class VarianceFillup : public Scheduler { } for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { - - if (allReady[instance.getArchitecture().processorType(i)].empty()) + if (allReady[instance.getArchitecture().processorType(i)].empty()) { continue; + } - const std::pair &node_pair = - *allReady[instance.getArchitecture().processorType(i)].begin(); + const std::pair &node_pair = *allReady[instance.getArchitecture().processorType(i)].begin(); VertexType top_node = node_pair.first; if (memory_constraint.can_add(top_node, i)) { @@ -160,21 +152,22 @@ class VarianceFillup : public Scheduler { return true; }; - void - Choose(const BspInstance &instance, const std::vector &work_variance, - std::vector, VarianceCompare>> &allReady, - std::vector, VarianceCompare>> &procReady, - const std::vector &procFree, VertexType &node, unsigned &p, const bool endSupStep, - const v_workw_t remaining_time, - const std::vector>> &procTypesCompatibleWithNodeType_skip_proctype) const { - + void Choose(const BspInstance &instance, + const std::vector &work_variance, + std::vector, VarianceCompare>> &allReady, + std::vector, VarianceCompare>> &procReady, + const std::vector &procFree, + VertexType &node, + unsigned &p, + const bool endSupStep, + const v_workw_t remaining_time, + const std::vector>> &procTypesCompatibleWithNodeType_skip_proctype) const { double maxScore = -1; for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { if (procFree[i] && !procReady[i].empty()) { // select node for (auto node_pair_it = procReady[i].begin(); node_pair_it != procReady[i].end();) { - if (endSupStep && - (remaining_time < instance.getComputationalDag().vertex_work_weight(node_pair_it->first))) { + if (endSupStep && (remaining_time < instance.getComputationalDag().vertex_work_weight(node_pair_it->first))) { node_pair_it = procReady[i].erase(node_pair_it); continue; } @@ -207,31 +200,27 @@ class VarianceFillup : public Scheduler { const double &score = it->second; if (score > maxScore) { - if constexpr (use_memory_constraint) { - if (memory_constraint.can_add(it->first, i)) { - node = it->first; p = i; allReady[instance.getArchitecture().processorType(i)].erase(it); - for (unsigned procType : procTypesCompatibleWithNodeType_skip_proctype - [instance.getArchitecture().processorType(i)] - [instance.getComputationalDag().vertex_type(node)]) { + for (unsigned procType : + procTypesCompatibleWithNodeType_skip_proctype[instance.getArchitecture().processorType( + i)][instance.getComputationalDag().vertex_type(node)]) { allReady[procType].erase(std::make_pair(node, work_variance[node])); } return; } } else { - node = it->first; p = i; allReady[instance.getArchitecture().processorType(i)].erase(it); for (unsigned procType : - procTypesCompatibleWithNodeType_skip_proctype[instance.getArchitecture().processorType( - i)][instance.getComputationalDag().vertex_type(node)]) { + procTypesCompatibleWithNodeType_skip_proctype[instance.getArchitecture().processorType(i)] + [instance.getComputationalDag().vertex_type(node)]) { allReady[procType].erase(std::make_pair(node, work_variance[node])); } return; @@ -247,13 +236,17 @@ class VarianceFillup : public Scheduler { const std::vector, VarianceCompare>> &allReady, const std::vector, VarianceCompare>> &procReady, const std::vector &procFree) const { - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (procFree[i] && !procReady[i].empty()) + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !procReady[i].empty()) { return true; + } + } - for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) - if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) + for (unsigned i = 0; i < instance.numberOfProcessors(); ++i) { + if (procFree[i] && !allReady[instance.getArchitecture().processorType(i)].empty()) { return true; + } + } return false; } @@ -265,14 +258,16 @@ class VarianceFillup : public Scheduler { std::vector ready_nodes_per_type = nr_ready_nodes_per_type; std::vector procs_per_type = nr_procs_per_type; - for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) - for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) + for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) { + for (unsigned node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { if (instance.isCompatibleType(node_type, proc_type)) { unsigned matched = std::min(ready_nodes_per_type[node_type], procs_per_type[proc_type]); nr_nodes += matched; ready_nodes_per_type[node_type] -= matched; procs_per_type[proc_type] -= matched; } + } + } return nr_nodes; } @@ -299,7 +294,6 @@ class VarianceFillup : public Scheduler { * @return A pair containing the return status and the computed BspSchedule. */ virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); for (const auto &v : instance.getComputationalDag().vertices()) { @@ -325,15 +319,15 @@ class VarianceFillup : public Scheduler { std::vector, VarianceCompare>> allReady( instance.getArchitecture().getNumberOfProcessorTypes()); - const std::vector> procTypesCompatibleWithNodeType = - instance.getProcTypesCompatibleWithNodeType(); - const std::vector>> procTypesCompatibleWithNodeType_skip_proctype = - procTypesCompatibleWithNodeType_omit_procType(instance); + const std::vector> procTypesCompatibleWithNodeType = instance.getProcTypesCompatibleWithNodeType(); + const std::vector>> procTypesCompatibleWithNodeType_skip_proctype + = procTypesCompatibleWithNodeType_omit_procType(instance); std::vector nr_ready_nodes_per_type(G.num_vertex_types(), 0); std::vector nr_procs_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for (unsigned proc = 0; proc < params_p; ++proc) + for (unsigned proc = 0; proc < params_p; ++proc) { ++nr_procs_per_type[instance.getArchitecture().processorType(proc)]; + } std::vector nrPredecRemain(N); for (VertexType node = 0; node < N; node++) { @@ -342,8 +336,9 @@ class VarianceFillup : public Scheduler { if (num_parents == 0) { ready.insert(std::make_pair(node, work_variances[node])); ++nr_ready_nodes_per_type[G.vertex_type(node)]; - for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) + for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) { allReady[procType].insert(std::make_pair(node, work_variances[node])); + } } } @@ -364,14 +359,15 @@ class VarianceFillup : public Scheduler { } } - for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); - ++procType) + for (unsigned procType = 0; procType < instance.getArchitecture().getNumberOfProcessorTypes(); ++procType) { allReady[procType].clear(); + } for (const auto &nodeAndValuePair : ready) { const auto node = nodeAndValuePair.first; - for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) + for (unsigned procType : procTypesCompatibleWithNodeType[G.vertex_type(node)]) { allReady[procType].insert(allReady[procType].end(), nodeAndValuePair); + } } ++supstepIdx; @@ -396,21 +392,23 @@ class VarianceFillup : public Scheduler { bool canAdd = true; for (const auto &pred : G.parents(succ)) { - if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) && - schedule.assignedSuperstep(pred) == supstepIdx) + if (schedule.assignedProcessor(pred) != schedule.assignedProcessor(node) + && schedule.assignedSuperstep(pred) == supstepIdx) { canAdd = false; + } } if constexpr (use_memory_constraint) { - if (canAdd) { - if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) + if (not memory_constraint.can_add(succ, schedule.assignedProcessor(node))) { canAdd = false; + } } } - if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) + if (!instance.isCompatible(succ, schedule.assignedProcessor(node))) { canAdd = false; + } if (canAdd) { procReady[schedule.assignedProcessor(node)].emplace(succ, work_variances[succ]); @@ -427,11 +425,18 @@ class VarianceFillup : public Scheduler { endSupStep = true; } while (CanChooseNode(instance, allReady, procReady, procFree)) { - VertexType nextNode = std::numeric_limits::max(); unsigned nextProc = params_p; - Choose(instance, work_variances, allReady, procReady, procFree, nextNode, nextProc, endSupStep, - max_finish_time - time, procTypesCompatibleWithNodeType_skip_proctype); + Choose(instance, + work_variances, + allReady, + procReady, + procFree, + nextNode, + nextProc, + endSupStep, + max_finish_time - time, + procTypesCompatibleWithNodeType_skip_proctype); if (nextNode == std::numeric_limits::max() || nextProc == params_p) { endSupStep = true; @@ -465,19 +470,18 @@ class VarianceFillup : public Scheduler { } if constexpr (use_memory_constraint) { - if (not check_mem_feasibility(instance, allReady, procReady)) { - return RETURN_STATUS::ERROR; } } - if (free > params_p * max_percent_idle_processors && - ((!increase_parallelism_in_new_superstep) || - get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) >= - std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), - params_p - free + (static_cast(0.5 * free))))) + if (free > params_p * max_percent_idle_processors + && ((!increase_parallelism_in_new_superstep) + || get_nr_parallelizable_nodes(instance, nr_ready_nodes_per_type, nr_procs_per_type) + >= std::min(std::min(params_p, static_cast(1.2 * (params_p - free))), + params_p - free + (static_cast(0.5 * free))))) { endSupStep = true; + } } assert(schedule.satisfiesPrecedenceConstraints()); @@ -493,7 +497,6 @@ class VarianceFillup : public Scheduler { * @return The name of the schedule. */ virtual std::string getScheduleName() const override { - if constexpr (use_memory_constraint) { return "VarianceGreedyFillupMemory"; } else { @@ -502,4 +505,4 @@ class VarianceFillup : public Scheduler { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp index 1b8d72b9..862c9bb2 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp @@ -33,9 +33,8 @@ namespace osp { * a BSP schedule, with the assignment of vertices to processors and supersteps fixed. */ -template +template class CoptCommScheduleOptimizer { - static_assert(is_computational_dag_v, "CoptFullScheduler can only be used with computational DAGs."); bool ignore_latency = false; @@ -43,21 +42,19 @@ class CoptCommScheduleOptimizer { unsigned int timeLimitSeconds = 600; protected: - VarArray superstep_has_comm; VarArray max_comm_superstep_var; std::vector>> comm_processor_to_processor_superstep_node_var; - void setupVariablesConstraintsObjective(const BspScheduleCS& schedule, Model& model); + void setupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model); - void setInitialSolution(BspScheduleCS& schedule, Model &model); + void setInitialSolution(BspScheduleCS &schedule, Model &model); bool canShrinkResultingSchedule(unsigned number_of_supersteps) const; - void updateCommSchedule(BspScheduleCS& schedule) const; + void updateCommSchedule(BspScheduleCS &schedule) const; public: - using KeyTriple = std::tuple, unsigned int, unsigned int>; virtual ~CoptCommScheduleOptimizer() = default; @@ -66,14 +63,14 @@ class CoptCommScheduleOptimizer { virtual std::string getScheduleName() const { return "ILPCommunication"; } virtual void setTimeLimitSeconds(unsigned int limit) { timeLimitSeconds = limit; } + inline unsigned int getTimeLimitSeconds() const { return timeLimitSeconds; } + virtual void setIgnoreLatency(bool ignore_latency_) { ignore_latency = ignore_latency_; } }; - -template -RETURN_STATUS CoptCommScheduleOptimizer::improveSchedule(BspScheduleCS& schedule) { - +template +RETURN_STATUS CoptCommScheduleOptimizer::improveSchedule(BspScheduleCS &schedule) { Envr env; Model model = env.CreateModel("bsp_schedule_cs"); @@ -86,11 +83,11 @@ RETURN_STATUS CoptCommScheduleOptimizer::improveSchedule(BspScheduleCS< model.Solve(); - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) - { + if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { updateCommSchedule(schedule); - if (canShrinkResultingSchedule(schedule.numberOfSupersteps())) - schedule.shrinkByMergingSupersteps(); + if (canShrinkResultingSchedule(schedule.numberOfSupersteps())) { + schedule.shrinkByMergingSupersteps(); + } } if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { @@ -98,38 +95,37 @@ RETURN_STATUS CoptCommScheduleOptimizer::improveSchedule(BspScheduleCS< } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) + if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { return RETURN_STATUS::BEST_FOUND; - else + } else { return RETURN_STATUS::TIMEOUT; + } } } -template +template bool CoptCommScheduleOptimizer::canShrinkResultingSchedule(unsigned number_of_supersteps) const { - for (unsigned step = 0; step < number_of_supersteps - 1; step++) { - - if (superstep_has_comm[static_cast(step)].Get(COPT_DBLINFO_VALUE) <= 0.01) + if (superstep_has_comm[static_cast(step)].Get(COPT_DBLINFO_VALUE) <= 0.01) { return true; + } } return false; } -template -void CoptCommScheduleOptimizer::updateCommSchedule(BspScheduleCS& schedule) const { - - std::map& cs = schedule.getCommunicationSchedule(); +template +void CoptCommScheduleOptimizer::updateCommSchedule(BspScheduleCS &schedule) const { + std::map &cs = schedule.getCommunicationSchedule(); cs.clear(); for (const auto &node : schedule.getInstance().vertices()) { - for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) { if (p_from != p_to) { for (unsigned int step = 0; step < schedule.numberOfSupersteps(); step++) { if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get( - COPT_DBLINFO_VALUE) >= .99) { + COPT_DBLINFO_VALUE) + >= .99) { cs[std::make_tuple(node, p_from, p_to)] = step; } } @@ -139,34 +135,32 @@ void CoptCommScheduleOptimizer::updateCommSchedule(BspScheduleCS -void CoptCommScheduleOptimizer::setInitialSolution(BspScheduleCS& schedule, Model &model){ - - const Graph_t& DAG = schedule.getInstance().getComputationalDag(); - const BspArchitecture& arch = schedule.getInstance().getArchitecture(); - const unsigned& num_processors = schedule.getInstance().numberOfProcessors(); - const unsigned& num_supersteps = schedule.numberOfSupersteps(); +template +void CoptCommScheduleOptimizer::setInitialSolution(BspScheduleCS &schedule, Model &model) { + const Graph_t &DAG = schedule.getInstance().getComputationalDag(); + const BspArchitecture &arch = schedule.getInstance().getArchitecture(); + const unsigned &num_processors = schedule.getInstance().numberOfProcessors(); + const unsigned &num_supersteps = schedule.numberOfSupersteps(); const auto &cs = schedule.getCommunicationSchedule(); - std::vector > first_at(DAG.num_vertices(), std::vector(num_processors, std::numeric_limits::max())); - for (const auto &node : DAG.vertices()) - first_at[node][schedule.assignedProcessor(node)] = schedule.assignedSuperstep(node); - + std::vector> first_at(DAG.num_vertices(), + std::vector(num_processors, std::numeric_limits::max())); for (const auto &node : DAG.vertices()) { + first_at[node][schedule.assignedProcessor(node)] = schedule.assignedSuperstep(node); + } + for (const auto &node : DAG.vertices()) { for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { - - if(p1 == p2) + if (p1 == p2) { continue; + } for (unsigned step = 0; step < num_supersteps; step++) { - const auto &key = std::make_tuple(node, p1, p2); if (cs.find(key) != cs.end() && cs.at(key) == step) { model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)], 1); - first_at[node][p2] = std::min(first_at[node][p2], step+1); + first_at[node][p2] = std::min(first_at[node][p2], step + 1); } else { model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)], 0); } @@ -175,37 +169,38 @@ void CoptCommScheduleOptimizer::setInitialSolution(BspScheduleCS= first_at[node][proc]) - model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step] - [static_cast(node)], 1); - else - model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step] - [static_cast(node)], 0); + for (const auto &node : DAG.vertices()) { + for (unsigned proc = 0; proc < num_processors; proc++) { + for (unsigned step = 0; step < num_supersteps; step++) { + if (step >= first_at[node][proc]) { + model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast(node)], 1); + } else { + model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast(node)], 0); } + } + } + } - if(!ignore_latency) - { + if (!ignore_latency) { std::vector comm_phase_used(num_supersteps, 0); - for (auto const &[key, val] : cs) + for (auto const &[key, val] : cs) { comm_phase_used[val] = 1; - for (unsigned step = 0; step < num_supersteps; step++) + } + for (unsigned step = 0; step < num_supersteps; step++) { model.SetMipStart(superstep_has_comm[static_cast(step)], comm_phase_used[step]); + } } std::vector>> send(num_supersteps, std::vector>(num_processors, 0)); std::vector>> rec(num_supersteps, std::vector>(num_processors, 0)); for (const auto &[key, val] : cs) { - send[val][std::get<1>(key)] += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); + send[val][std::get<1>(key)] + += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); rec[val][std::get<2>(key)] += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); } for (unsigned step = 0; step < num_supersteps; step++) { - v_commw_t max_comm = 0; for (unsigned proc = 0; proc < num_processors; proc++) { max_comm = std::max(max_comm, send[step][proc]); @@ -219,9 +214,8 @@ void CoptCommScheduleOptimizer::setInitialSolution(BspScheduleCS -void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(const BspScheduleCS& schedule, Model& model) { - +template +void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model) { const unsigned &max_number_supersteps = schedule.numberOfSupersteps(); const unsigned &num_processors = schedule.getInstance().numberOfProcessors(); const unsigned num_vertices = static_cast(schedule.getInstance().numberOfVertices()); @@ -235,17 +229,14 @@ void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(cons // communicate node from p1 to p2 at superstep - comm_processor_to_processor_superstep_node_var = std::vector>>(num_processors, - std::vector>(num_processors, std::vector(max_number_supersteps))); + comm_processor_to_processor_superstep_node_var = std::vector>>( + num_processors, std::vector>(num_processors, std::vector(max_number_supersteps))); for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - - comm_processor_to_processor_superstep_node_var[p1][p2][step] = model.AddVars(static_cast(num_vertices), - COPT_BINARY, "comm_processor_to_processor_superstep_node"); + comm_processor_to_processor_superstep_node_var[p1][p2][step] + = model.AddVars(static_cast(num_vertices), COPT_BINARY, "comm_processor_to_processor_superstep_node"); } } } @@ -253,16 +244,12 @@ void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(cons if (!ignore_latency) { unsigned M = num_processors * num_processors * num_vertices; for (unsigned int step = 0; step < schedule.numberOfSupersteps(); step++) { - Expr expr; for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { - if (p1 != p2) { for (unsigned node = 0; node < num_vertices; node++) { - expr += comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)]; } } @@ -275,25 +262,25 @@ void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(cons // precedence constraint: if task is computed then all of its predecessors must have been present // and vertex is present where it was computed for (unsigned node = 0; node < num_vertices; node++) { - const unsigned &processor = schedule.assignedProcessor(node); const unsigned &superstep = schedule.assignedSuperstep(node); Expr expr; unsigned num_com_edges = 0; for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node)) { - if (schedule.assignedProcessor(node) != schedule.assignedProcessor(pred)) { num_com_edges += 1; expr += comm_processor_to_processor_superstep_node_var[processor][processor][superstep][static_cast(pred)]; model.AddConstr( - comm_processor_to_processor_superstep_node_var[schedule.assignedProcessor(pred)][schedule.assignedProcessor(pred)] - [schedule.assignedSuperstep(pred)][static_cast(pred)] == 1); + comm_processor_to_processor_superstep_node_var[schedule.assignedProcessor(pred)][schedule.assignedProcessor( + pred)][schedule.assignedSuperstep(pred)][static_cast(pred)] + == 1); } } - if (num_com_edges > 0) + if (num_com_edges > 0) { model.AddConstr(expr >= num_com_edges); + } } // combines two constraints: node can only be communicated if it is present; and node is present if it was computed @@ -301,15 +288,15 @@ void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(cons for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < num_processors; processor++) { for (unsigned int node = 0; node < num_vertices; node++) { - - if (processor == schedule.assignedProcessor(node) && step >= schedule.assignedSuperstep(node)) + if (processor == schedule.assignedProcessor(node) && step >= schedule.assignedSuperstep(node)) { continue; + } Expr expr1, expr2; if (step > 0) { - for (unsigned int p_from = 0; p_from < num_processors; p_from++) { - expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast(node)]; + expr1 + += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast(node)]; } } @@ -324,26 +311,23 @@ void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(cons for (unsigned step = 0; step < max_number_supersteps; step++) { for (unsigned processor = 0; processor < num_processors; processor++) { - Expr expr1, expr2; for (unsigned node = 0; node < num_vertices; node++) { - for (unsigned p_to = 0; p_to < num_processors; p_to++) { if (processor != p_to) { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node) * - schedule.getInstance().sendCosts(processor, p_to) * - comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; + expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule.getInstance().sendCosts(processor, p_to) + * comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; } } for (unsigned int p_from = 0; p_from < num_processors; p_from++) { if (processor != p_from) { - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node) * - schedule.getInstance().sendCosts(p_from, processor) * - comm_processor_to_processor_superstep_node_var[p_from][processor][step][static_cast(node)]; + expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule.getInstance().sendCosts(p_from, processor) + * comm_processor_to_processor_superstep_node_var[p_from][processor][step][static_cast(node)]; } } - } model.AddConstr(max_comm_superstep_var[static_cast(step)] >= expr1); @@ -357,13 +341,11 @@ void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(cons Expr expr; if (!ignore_latency) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast(step)] + - schedule.getInstance().synchronisationCosts() * superstep_has_comm[static_cast(step)]; + expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast(step)] + + schedule.getInstance().synchronisationCosts() * superstep_has_comm[static_cast(step)]; } } else { - for (unsigned int step = 0; step < max_number_supersteps; step++) { expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast(step)]; } @@ -371,4 +353,4 @@ void CoptCommScheduleOptimizer::setupVariablesConstraintsObjective(cons model.SetObjective(expr - schedule.getInstance().synchronisationCosts(), COPT_MINIMIZE); } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp index 45b58ca3..fdd3f5c1 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp @@ -55,9 +55,8 @@ namespace osp { * supersteps, enable/disable writing intermediate solutions, and get information about the best gap, objective value, * and bound found by the solver. */ -template +template class CoptFullScheduler : public Scheduler { - static_assert(is_computational_dag_v, "CoptFullScheduler can only be used with computational DAGs."); private: @@ -77,7 +76,6 @@ class CoptFullScheduler : public Scheduler { std::string solution_file_prefix; class WriteSolutionCallback : public CallbackBase { - private: unsigned counter; unsigned max_number_solution; @@ -86,9 +84,15 @@ class CoptFullScheduler : public Scheduler { public: WriteSolutionCallback() - : counter(0), max_number_solution(500), best_obj(COPT_INFINITY), allow_recomputation_cb(false), - write_solutions_path_cb(""), solution_file_prefix_cb(""), instance_ptr(), - node_to_processor_superstep_var_ptr(), comm_processor_to_processor_superstep_node_var_ptr() {} + : counter(0), + max_number_solution(500), + best_obj(COPT_INFINITY), + allow_recomputation_cb(false), + write_solutions_path_cb(""), + solution_file_prefix_cb(""), + instance_ptr(), + node_to_processor_superstep_var_ptr(), + comm_processor_to_processor_superstep_node_var_ptr() {} bool allow_recomputation_cb; std::string write_solutions_path_cb; @@ -99,54 +103,40 @@ class CoptFullScheduler : public Scheduler { std::vector>> *comm_processor_to_processor_superstep_node_var_ptr; void callback() override { - - if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && - GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { - + if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { try { - if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { - best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); if (allow_recomputation_cb) { - auto sched = constructBspScheduleRecompFromCallback(); DotFileWriter sched_writer; - sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" + - std::to_string(counter) + "_schedule.dot", + sched_writer.write_schedule_recomp(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + + "_" + std::to_string(counter) + "_schedule.dot", sched); } else { - BspSchedule sched = constructBspScheduleFromCallback(); DotFileWriter sched_writer; - sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" + - std::to_string(counter) + "_schedule.dot", + sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" + + std::to_string(counter) + "_schedule.dot", sched); } counter++; } - } catch (const std::exception &e) { - } + } catch (const std::exception &e) {} } } BspScheduleCS constructBspScheduleFromCallback() { - BspScheduleCS schedule(*instance_ptr); for (const auto &node : instance_ptr->vertices()) { - for (unsigned int processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - - for (unsigned step = 0; - step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) { - - if (GetSolution( - (*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= - .99) { + for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); + step++) { + if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= .99) { schedule.setAssignedProcessor(node, processor); schedule.setAssignedSuperstep(node, step); } @@ -155,16 +145,15 @@ class CoptFullScheduler : public Scheduler { } for (const auto &node : instance_ptr->vertices()) { - for (unsigned int p_from = 0; p_from < instance_ptr->numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < instance_ptr->numberOfProcessors(); p_to++) { if (p_from != p_to) { for (int step = 0; step < (*node_to_processor_superstep_var_ptr)[0][0].Size(); step++) { if (GetSolution( - (*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][static_cast< - unsigned>(step)][static_cast(node)]) >= .99) { - schedule.addCommunicationScheduleEntry(node, p_from, p_to, - static_cast(step)); + (*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][static_cast( + step)][static_cast(node)]) + >= .99) { + schedule.addCommunicationScheduleEntry(node, p_from, p_to, static_cast(step)); } } } @@ -176,16 +165,13 @@ class CoptFullScheduler : public Scheduler { } BspScheduleRecomp constructBspScheduleRecompFromCallback() { - unsigned number_of_supersteps = 0; BspScheduleRecomp schedule(*instance_ptr); for (unsigned int node = 0; node < instance_ptr->numberOfVertices(); node++) { - for (unsigned int processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - - for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) { - + for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); + step++) { if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= .99) { schedule.assignments(node).emplace_back(processor, step); @@ -200,15 +186,15 @@ class CoptFullScheduler : public Scheduler { schedule.setNumberOfSupersteps(number_of_supersteps); for (unsigned int node = 0; node < instance_ptr->numberOfVertices(); node++) { - for (unsigned int p_from = 0; p_from < instance_ptr->numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < instance_ptr->numberOfProcessors(); p_to++) { if (p_from != p_to) { - for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) { - if (GetSolution( - (*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][step][static_cast(node)]) >= - .99) { - + for (unsigned step = 0; + step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); + step++) { + if (GetSolution((*comm_processor_to_processor_superstep_node_var_ptr)[p_from][p_to][step] + [static_cast(node)]) + >= .99) { schedule.addCommunicationScheduleEntry(node, p_from, p_to, step); } } @@ -234,26 +220,20 @@ class CoptFullScheduler : public Scheduler { VarArray max_work_superstep_var; void constructBspScheduleFromSolution(BspScheduleCS &schedule, bool cleanup_ = false) { - const auto &instance = schedule.getInstance(); unsigned number_of_supersteps = 0; for (unsigned step = 0; step < max_number_supersteps; step++) { - if (superstep_used_var[static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { number_of_supersteps++; } } for (const auto &node : instance.vertices()) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - - if (node_to_processor_superstep_var[node][processor][static_cast(step)].Get( - COPT_DBLINFO_VALUE) >= .99) { + if (node_to_processor_superstep_var[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { schedule.setAssignedProcessor(node, processor); schedule.setAssignedSuperstep(node, step); } @@ -261,19 +241,19 @@ class CoptFullScheduler : public Scheduler { } } - if (is_max_bsp && number_of_supersteps > 0) // can ignore last 2 comm phases in this case + if (is_max_bsp && number_of_supersteps > 0) { // can ignore last 2 comm phases in this case --number_of_supersteps; + } schedule.getCommunicationSchedule().clear(); for (const auto &node : instance.vertices()) { - for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { if (p_from != p_to) { for (unsigned int step = 0; step < number_of_supersteps - 1; step++) { - if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step] - [static_cast(node)] - .Get(COPT_DBLINFO_VALUE) >= .99) { + if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get( + COPT_DBLINFO_VALUE) + >= .99) { schedule.addCommunicationScheduleEntry(node, p_from, p_to, step); } } @@ -292,7 +272,6 @@ class CoptFullScheduler : public Scheduler { unsigned number_of_supersteps = 0; for (unsigned step = 0; step < max_number_supersteps; step++) { - if (superstep_used_var[static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { number_of_supersteps++; } @@ -301,11 +280,8 @@ class CoptFullScheduler : public Scheduler { schedule.setNumberOfSupersteps(number_of_supersteps); for (unsigned node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) { - for (unsigned step = 0; step < number_of_supersteps - 1; step++) { - if (node_to_processor_superstep_var[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { schedule.assignments(node).emplace_back(processor, step); } @@ -315,13 +291,13 @@ class CoptFullScheduler : public Scheduler { schedule.getCommunicationSchedule().clear(); for (unsigned int node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) { if (p_from != p_to) { for (unsigned int step = 0; step < max_number_supersteps; step++) { if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get( - COPT_DBLINFO_VALUE) >= .99) { + COPT_DBLINFO_VALUE) + >= .99) { schedule.addCommunicationScheduleEntry(node, p_from, p_to, step); } } @@ -337,36 +313,39 @@ class CoptFullScheduler : public Scheduler { } void loadInitialSchedule(Model &model, const BspInstance &instance) { - - if (use_initial_schedule_recomp && - (max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() || - instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() || - instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) { + if (use_initial_schedule_recomp + && (max_number_supersteps < initial_schedule_recomp->numberOfSupersteps() + || instance.numberOfProcessors() != initial_schedule_recomp->getInstance().numberOfProcessors() + || instance.numberOfVertices() != initial_schedule_recomp->getInstance().numberOfVertices())) { throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not " "agree with those of the initial schedule's instance!"); } - if (!use_initial_schedule_recomp & use_initial_schedule && - (max_number_supersteps < initial_schedule->numberOfSupersteps() || - instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() || - instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) { + if (!use_initial_schedule_recomp & use_initial_schedule + && (max_number_supersteps < initial_schedule->numberOfSupersteps() + || instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() + || instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) { throw std::invalid_argument("Invalid Argument while computeScheduleRecomp[Recomp]: instance parameters do not " "agree with those of the initial schedule's instance!"); } - const auto &DAG = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getComputationalDag() : initial_schedule->getInstance().getComputationalDag(); + const auto &DAG = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getComputationalDag() + : initial_schedule->getInstance().getComputationalDag(); - const auto &arch = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getArchitecture() : initial_schedule->getInstance().getArchitecture(); + const auto &arch = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().getArchitecture() + : initial_schedule->getInstance().getArchitecture(); - const unsigned &num_processors = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().numberOfProcessors() : initial_schedule->getInstance().numberOfProcessors(); + const unsigned &num_processors = use_initial_schedule_recomp ? initial_schedule_recomp->getInstance().numberOfProcessors() + : initial_schedule->getInstance().numberOfProcessors(); - const unsigned &num_supersteps = use_initial_schedule_recomp ? initial_schedule_recomp->numberOfSupersteps() : initial_schedule->numberOfSupersteps(); + const unsigned &num_supersteps = use_initial_schedule_recomp ? initial_schedule_recomp->numberOfSupersteps() + : initial_schedule->numberOfSupersteps(); - const auto &cs = use_initial_schedule_recomp ? initial_schedule_recomp->getCommunicationSchedule() : initial_schedule->getCommunicationSchedule(); + const auto &cs = use_initial_schedule_recomp ? initial_schedule_recomp->getCommunicationSchedule() + : initial_schedule->getCommunicationSchedule(); assert(max_number_supersteps <= static_cast(std::numeric_limits::max())); for (unsigned step = 0; step < max_number_supersteps; step++) { - if (step < num_supersteps) { model.SetMipStart(superstep_used_var[static_cast(step)], 1); @@ -380,49 +359,43 @@ class CoptFullScheduler : public Scheduler { std::vector>> computed(DAG.num_vertices()); for (const auto &node : DAG.vertices()) { - if (use_initial_schedule_recomp) - for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) + if (use_initial_schedule_recomp) { + for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) { computed[node].emplace(assignment); - else + } + } else { computed[node].emplace(initial_schedule->assignedProcessor(node), initial_schedule->assignedSuperstep(node)); + } } - std::vector> first_at(DAG.num_vertices(), std::vector(num_processors, std::numeric_limits::max())); + std::vector> first_at(DAG.num_vertices(), + std::vector(num_processors, std::numeric_limits::max())); for (const auto &node : DAG.vertices()) { if (use_initial_schedule_recomp) { - for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) + for (const std::pair &assignment : initial_schedule_recomp->assignments(node)) { first_at[node][assignment.first] = std::min(first_at[node][assignment.first], assignment.second); + } } else { - first_at[node][initial_schedule->assignedProcessor(node)] = std::min(first_at[node][initial_schedule->assignedProcessor(node)], - initial_schedule->assignedSuperstep(node)); + first_at[node][initial_schedule->assignedProcessor(node)] = std::min( + first_at[node][initial_schedule->assignedProcessor(node)], initial_schedule->assignedSuperstep(node)); } } unsigned staleness = is_max_bsp ? 2 : 1; for (const auto &node : DAG.vertices()) { - for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { - if (p1 != p2) { - const auto &key = std::make_tuple(node, p1, p2); if (cs.find(key) != cs.end()) { - if (cs.at(key) == step) { model.SetMipStart( - comm_processor_to_processor_superstep_node_var[p1][p2][step] - [static_cast(node)], - 1); + comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)], 1); first_at[node][p2] = std::min(first_at[node][p2], step + staleness); } else { model.SetMipStart( - comm_processor_to_processor_superstep_node_var[p1][p2][step] - [static_cast(node)], - 0); + comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node)], 0); } } } @@ -431,39 +404,35 @@ class CoptFullScheduler : public Scheduler { } } - for (const auto &node : DAG.vertices()) - for (unsigned proc = 0; proc < num_processors; proc++) + for (const auto &node : DAG.vertices()) { + for (unsigned proc = 0; proc < num_processors; proc++) { for (unsigned step = 0; step < max_number_supersteps; step++) { - if (step >= first_at[node][proc]) - model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step] - [static_cast(node)], - 1); - else - model.SetMipStart(comm_processor_to_processor_superstep_node_var[proc][proc][step] - [static_cast(node)], - 0); + if (step >= first_at[node][proc]) { + model.SetMipStart( + comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast(node)], 1); + } else { + model.SetMipStart( + comm_processor_to_processor_superstep_node_var[proc][proc][step][static_cast(node)], 0); + } } + } + } for (const auto &node : DAG.vertices()) { - for (unsigned proc = 0; proc < num_processors; proc++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (computed[node].find(std::make_pair(proc, step)) != computed[node].end()) { model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast(step)], 1); } else { - model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast(step)], 0); } } } } - std::vector>> work( - max_number_supersteps, - std::vector>(num_processors, 0)); + std::vector>> work(max_number_supersteps, + std::vector>(num_processors, 0)); if (use_initial_schedule_recomp) { for (const auto &node : initial_schedule_recomp->getInstance().vertices()) { @@ -472,28 +441,23 @@ class CoptFullScheduler : public Scheduler { } } } else { - for (const auto &node : initial_schedule->getInstance().vertices()) - work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] += - DAG.vertex_work_weight(node); + for (const auto &node : initial_schedule->getInstance().vertices()) { + work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] + += DAG.vertex_work_weight(node); + } } - std::vector>> send( - max_number_supersteps, - std::vector>(num_processors, 0)); + std::vector>> send(max_number_supersteps, + std::vector>(num_processors, 0)); - std::vector>> rec( - max_number_supersteps, - std::vector>(num_processors, 0)); + std::vector>> rec(max_number_supersteps, std::vector>(num_processors, 0)); for (const auto &[key, val] : cs) { + send[val][std::get<1>(key)] + += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); - send[val][std::get<1>(key)] += - DAG.vertex_comm_weight(std::get<0>(key)) * - arch.sendCosts(std::get<1>(key), std::get<2>(key)); - - rec[val][std::get<2>(key)] += - DAG.vertex_comm_weight(std::get<0>(key)) * - arch.sendCosts(std::get<1>(key), std::get<2>(key)); + rec[val][std::get<2>(key)] + += DAG.vertex_comm_weight(std::get<0>(key)) * arch.sendCosts(std::get<1>(key), std::get<2>(key)); } for (unsigned step = 0; step < max_number_supersteps; step++) { @@ -523,7 +487,6 @@ class CoptFullScheduler : public Scheduler { } void setupVariablesConstraintsObjective(const BspInstance &instance, Model &model) { - /* Variables */ @@ -539,7 +502,8 @@ class CoptFullScheduler : public Scheduler { // variables indicating if there is any communication in superstep superstep_has_comm = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_has_comm"); // variables that incentivize the schedule to be continuous - needs to be done differently for maxBsp - mergeable_superstep_penalty = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "mergeable_superstep_penalty"); + mergeable_superstep_penalty + = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "mergeable_superstep_penalty"); } // variables for assigments of nodes to processor and superstep @@ -547,11 +511,9 @@ class CoptFullScheduler : public Scheduler { instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); for (const auto &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - - node_to_processor_superstep_var[node][processor] = - model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); + node_to_processor_superstep_var[node][processor] + = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); } } @@ -559,14 +521,12 @@ class CoptFullScheduler : public Scheduler { Constraints */ if (use_memory_constraint) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { for (unsigned step = 0; step < max_number_supersteps; step++) { - Expr expr; for (const auto &node : instance.vertices()) { - expr += node_to_processor_superstep_var[node][processor][static_cast(step)] * - instance.getComputationalDag().vertex_mem_weight(node); + expr += node_to_processor_superstep_var[node][processor][static_cast(step)] + * instance.getComputationalDag().vertex_mem_weight(node); } model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor)); @@ -583,24 +543,20 @@ class CoptFullScheduler : public Scheduler { // superstep is used at all for (unsigned int step = 0; step < max_number_supersteps; step++) { - Expr expr; for (const auto &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { expr += node_to_processor_superstep_var[node][processor][static_cast(step)]; } } - model.AddConstr(expr <= static_cast(instance.numberOfVertices() * instance.numberOfProcessors()) * - superstep_used_var[static_cast(step)]); + model.AddConstr(expr <= static_cast(instance.numberOfVertices() * instance.numberOfProcessors()) + * superstep_used_var[static_cast(step)]); } // nodes are assigend depending on whether recomputation is allowed or not for (const auto &node : instance.vertices()) { - Expr expr; for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { expr += node_to_processor_superstep_var[node][processor].GetVar(static_cast(step)); } @@ -608,42 +564,36 @@ class CoptFullScheduler : public Scheduler { model.AddConstr(allow_recomputation ? expr >= .99 : expr == 1); } - if (allow_recomputation) + if (allow_recomputation) { std::cout << "setting up constraints with recomputation: " << allow_recomputation << std::endl; + } comm_processor_to_processor_superstep_node_var = std::vector>>( instance.numberOfProcessors(), - std::vector>(instance.numberOfProcessors(), - std::vector(max_number_supersteps))); + std::vector>(instance.numberOfProcessors(), std::vector(max_number_supersteps))); for (unsigned int p1 = 0; p1 < instance.numberOfProcessors(); p1++) { - for (unsigned int p2 = 0; p2 < instance.numberOfProcessors(); p2++) { for (unsigned int step = 0; step < max_number_supersteps; step++) { - - comm_processor_to_processor_superstep_node_var[p1][p2][step] = - model.AddVars(static_cast(instance.numberOfVertices()), COPT_BINARY, - "comm_processor_to_processor_superstep_node"); + comm_processor_to_processor_superstep_node_var[p1][p2][step] = model.AddVars( + static_cast(instance.numberOfVertices()), COPT_BINARY, "comm_processor_to_processor_superstep_node"); } } } // precedence constraint: if task is computed then all of its predecessors must have been present for (const auto &node : instance.vertices()) { - if (instance.getComputationalDag().in_degree(node) > 0) { for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr; for (const auto &parent : instance.getComputationalDag().parents(node)) { expr += comm_processor_to_processor_superstep_node_var[processor][processor][step] [static_cast(parent)]; } - model.AddConstr(expr >= - static_cast(instance.getComputationalDag().in_degree(node)) * - node_to_processor_superstep_var[node][processor][static_cast(step)]); + model.AddConstr(expr >= static_cast(instance.getComputationalDag().in_degree(node)) + * node_to_processor_superstep_var[node][processor][static_cast(step)]); } } } @@ -654,10 +604,8 @@ class CoptFullScheduler : public Scheduler { for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { for (const auto &node : instance.vertices()) { - Expr expr1, expr2; if (step > 0) { - for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { if (!is_max_bsp || p_from == processor) { expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1] @@ -672,8 +620,7 @@ class CoptFullScheduler : public Scheduler { expr1 += node_to_processor_superstep_var[node][processor][static_cast(step)]; for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { - expr2 += comm_processor_to_processor_superstep_node_var[processor][p_to][step] - [static_cast(node)]; + expr2 += comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; } model.AddConstr(instance.numberOfProcessors() * (expr1) >= expr2); @@ -688,36 +635,38 @@ class CoptFullScheduler : public Scheduler { for (const auto &node : instance.vertices()) { for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { - if (p_from != p_to) + if (p_from != p_to) { expr += comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)]; + } } } } - model.AddConstr(static_cast(instance.numberOfProcessors() * instance.numberOfProcessors() * instance.numberOfVertices()) * - superstep_has_comm[static_cast(step)] >= - expr); + model.AddConstr(static_cast(instance.numberOfProcessors() * instance.numberOfProcessors() + * instance.numberOfVertices()) + * superstep_has_comm[static_cast(step)] + >= expr); } // if step i and (i+1) has no comm, and (i+2) has work, then (i+1) and (i+2) are mergeable -> penalize - for (unsigned int step = 0; step < max_number_supersteps - 2; step++) - model.AddConstr(superstep_used_var[static_cast(step + 2)] - superstep_has_comm[static_cast(step)] - superstep_has_comm[static_cast(step + 1)] <= mergeable_superstep_penalty[static_cast(step)]); + for (unsigned int step = 0; step < max_number_supersteps - 2; step++) { + model.AddConstr(superstep_used_var[static_cast(step + 2)] - superstep_has_comm[static_cast(step)] + - superstep_has_comm[static_cast(step + 1)] + <= mergeable_superstep_penalty[static_cast(step)]); + } } - max_comm_superstep_var = - model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_comm_superstep"); + max_comm_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_comm_superstep"); // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_comm_superstep"); - max_work_superstep_var = - model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_work_superstep"); + max_work_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_work_superstep"); // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_work_superstep"); for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr; for (unsigned int node = 0; node < instance.numberOfVertices(); node++) { - expr += instance.getComputationalDag().vertex_work_weight(node) * - node_to_processor_superstep_var[node][processor][static_cast(step)]; + expr += instance.getComputationalDag().vertex_work_weight(node) + * node_to_processor_superstep_var[node][processor][static_cast(step)]; } model.AddConstr(max_work_superstep_var[static_cast(step)] >= expr); @@ -726,15 +675,12 @@ class CoptFullScheduler : public Scheduler { for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr; for (const auto &node : instance.vertices()) { for (unsigned int p_to = 0; p_to < instance.numberOfProcessors(); p_to++) { if (processor != p_to) { - expr += instance.getComputationalDag().vertex_comm_weight(node) * - instance.sendCosts(processor, p_to) * - comm_processor_to_processor_superstep_node_var[processor][p_to][step] - [static_cast(node)]; + expr += instance.getComputationalDag().vertex_comm_weight(node) * instance.sendCosts(processor, p_to) + * comm_processor_to_processor_superstep_node_var[processor][p_to][step][static_cast(node)]; } } } @@ -745,15 +691,13 @@ class CoptFullScheduler : public Scheduler { for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr; for (const auto &node : instance.vertices()) { for (unsigned int p_from = 0; p_from < instance.numberOfProcessors(); p_from++) { if (processor != p_from) { - expr += instance.getComputationalDag().vertex_comm_weight(node) * - instance.sendCosts(p_from, processor) * - comm_processor_to_processor_superstep_node_var[p_from][processor][step] - [static_cast(node)]; + expr + += instance.getComputationalDag().vertex_comm_weight(node) * instance.sendCosts(p_from, processor) + * comm_processor_to_processor_superstep_node_var[p_from][processor][step][static_cast(node)]; } } } @@ -782,17 +726,19 @@ class CoptFullScheduler : public Scheduler { VarArray max_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_superstep"); for (unsigned int step = 0; step < max_number_supersteps; step++) { model.AddConstr(max_superstep_var[static_cast(step)] >= max_work_superstep_var[static_cast(step)]); - if (step > 0) - model.AddConstr(max_superstep_var[static_cast(step)] >= instance.communicationCosts() * max_comm_superstep_var[static_cast(step - 1)]); + if (step > 0) { + model.AddConstr(max_superstep_var[static_cast(step)] + >= instance.communicationCosts() * max_comm_superstep_var[static_cast(step - 1)]); + } expr += max_superstep_var[static_cast(step)]; expr += instance.synchronisationCosts() * superstep_has_comm[static_cast(step)]; expr += instance.synchronisationCosts() * mergeable_superstep_penalty[static_cast(step)]; } } else { for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += max_work_superstep_var[static_cast(step)] + - instance.communicationCosts() * max_comm_superstep_var[static_cast(step)] + - instance.synchronisationCosts() * superstep_used_var[static_cast(step)]; + expr += max_work_superstep_var[static_cast(step)] + + instance.communicationCosts() * max_comm_superstep_var[static_cast(step)] + + instance.synchronisationCosts() * superstep_used_var[static_cast(step)]; } expr -= instance.synchronisationCosts(); } @@ -814,18 +760,14 @@ class CoptFullScheduler : public Scheduler { computeScheduleBase(schedule, model); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructBspScheduleFromSolution(schedule, true); return RETURN_STATUS::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructBspScheduleFromSolution(schedule, true); return RETURN_STATUS::BEST_FOUND; @@ -837,29 +779,36 @@ class CoptFullScheduler : public Scheduler { public: CoptFullScheduler(unsigned steps = 5) - : allow_recomputation(false), use_memory_constraint(false), use_initial_schedule(false), - write_solutions_found(false), initial_schedule(0), max_number_supersteps(steps) { - + : allow_recomputation(false), + use_memory_constraint(false), + use_initial_schedule(false), + write_solutions_found(false), + initial_schedule(0), + max_number_supersteps(steps) { // solution_callback.comm_processor_to_processor_superstep_node_var_ptr = // &comm_processor_to_processor_superstep_node_var; // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; } CoptFullScheduler(const BspScheduleCS &schedule) - : allow_recomputation(false), use_memory_constraint(false), use_initial_schedule(true), - write_solutions_found(false), initial_schedule(&schedule), + : allow_recomputation(false), + use_memory_constraint(false), + use_initial_schedule(true), + write_solutions_found(false), + initial_schedule(&schedule), max_number_supersteps(schedule.numberOfSupersteps()) { - // solution_callback.comm_processor_to_processor_superstep_node_var_ptr = // &comm_processor_to_processor_superstep_node_var; // solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; } CoptFullScheduler(const BspScheduleRecomp &schedule) - : allow_recomputation(true), use_memory_constraint(false), use_initial_schedule_recomp(true), - write_solutions_found(false), initial_schedule_recomp(&schedule), - max_number_supersteps(schedule.numberOfSupersteps()) { - } + : allow_recomputation(true), + use_memory_constraint(false), + use_initial_schedule_recomp(true), + write_solutions_found(false), + initial_schedule_recomp(&schedule), + max_number_supersteps(schedule.numberOfSupersteps()) {} virtual ~CoptFullScheduler() = default; @@ -874,7 +823,6 @@ class CoptFullScheduler : public Scheduler { * agree with those of the initial schedule's instance */ virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - BspScheduleCS schedule_cs(schedule.getInstance()); RETURN_STATUS status = computeScheduleCS(schedule_cs); if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) { @@ -886,13 +834,11 @@ class CoptFullScheduler : public Scheduler { } virtual RETURN_STATUS computeScheduleWithTimeLimit(BspSchedule &schedule, unsigned timeLimit) { - timeLimitSeconds = timeLimit; return computeSchedule(schedule); } virtual RETURN_STATUS computeMaxBspSchedule(MaxBspSchedule &schedule) { - MaxBspScheduleCS schedule_cs(schedule.getInstance()); RETURN_STATUS status = computeMaxBspScheduleCS(schedule_cs); if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) { @@ -916,7 +862,6 @@ class CoptFullScheduler : public Scheduler { } virtual RETURN_STATUS computeScheduleRecomp(BspScheduleRecomp &schedule) { - allow_recomputation = true; is_max_bsp = false; @@ -932,18 +877,14 @@ class CoptFullScheduler : public Scheduler { computeScheduleBase(schedule, model); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructBspScheduleRecompFromSolution(schedule, true); return RETURN_STATUS::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructBspScheduleRecompFromSolution(schedule, true); return RETURN_STATUS::BEST_FOUND; @@ -954,7 +895,6 @@ class CoptFullScheduler : public Scheduler { }; virtual void computeScheduleBase(const BspScheduleRecomp &schedule, Model &model) { - if (timeLimitSeconds > 0) { model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, timeLimitSeconds); } @@ -971,11 +911,9 @@ class CoptFullScheduler : public Scheduler { // model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2); if (write_solutions_found) { - WriteSolutionCallback solution_callback; solution_callback.instance_ptr = &schedule.getInstance(); - solution_callback.comm_processor_to_processor_superstep_node_var_ptr = - &comm_processor_to_processor_superstep_node_var; + solution_callback.comm_processor_to_processor_superstep_node_var_ptr = &comm_processor_to_processor_superstep_node_var; solution_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; solution_callback.solution_file_prefix_cb = solution_file_prefix; solution_callback.write_solutions_path_cb = write_solutions_path; @@ -997,7 +935,6 @@ class CoptFullScheduler : public Scheduler { * @param schedule The provided schedule. */ inline void setInitialSolutionFromBspSchedule(const BspScheduleCS &schedule) { - initial_schedule = &schedule; max_number_supersteps = schedule.numberOfSupersteps(); @@ -1022,7 +959,6 @@ class CoptFullScheduler : public Scheduler { */ void setMaxNumberOfSupersteps(unsigned max) { if (use_initial_schedule && max < initial_schedule->numberOfSupersteps()) { - throw std::invalid_argument("Invalid Argument while setting " "max number of supersteps to a value " "which is less than the number of " @@ -1090,4 +1026,4 @@ class CoptFullScheduler : public Scheduler { virtual std::string getScheduleName() const override { return "FullIlp"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp index db9a01f3..e0369177 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp @@ -32,9 +32,8 @@ namespace osp { * a BSP schedule, from a starting superstep to and ending superstep. */ -template +template class CoptPartialScheduler { - static_assert(is_computational_dag_v, "CoptPartialScheduler can only be used with computational DAGs."); using KeyTriple = std::tuple, unsigned int, unsigned int>; @@ -42,7 +41,6 @@ class CoptPartialScheduler { unsigned int timeLimitSeconds = 600; protected: - unsigned start_superstep = 1, end_superstep = 3; std::vector> node_global_ID; @@ -51,9 +49,9 @@ class CoptPartialScheduler { std::vector> source_global_ID; std::unordered_map, vertex_idx_t> source_local_ID; - std::vector > node_needed_after_on_proc, source_needed_after_on_proc; - std::vector, unsigned, unsigned, unsigned> > fixed_comm_steps; - std::set > source_present_before; + std::vector> node_needed_after_on_proc, source_needed_after_on_proc; + std::vector, unsigned, unsigned, unsigned>> fixed_comm_steps; + std::set> source_present_before; unsigned max_number_supersteps; @@ -66,30 +64,33 @@ class CoptPartialScheduler { bool has_fixed_comm_in_preceding_step; - void setupVariablesConstraintsObjective(const BspScheduleCS& schedule, Model& model); + void setupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model); - void setInitialSolution(const BspScheduleCS& schedule, Model &model); + void setInitialSolution(const BspScheduleCS &schedule, Model &model); - void updateSchedule(BspScheduleCS& schedule) const; + void updateSchedule(BspScheduleCS &schedule) const; - void setupVertexMaps(const BspScheduleCS& schedule); + void setupVertexMaps(const BspScheduleCS &schedule); public: - virtual RETURN_STATUS improveSchedule(BspScheduleCS &schedule); virtual std::string getScheduleName() const { return "ILPPartial"; } virtual void setTimeLimitSeconds(unsigned int limit) { timeLimitSeconds = limit; } + inline unsigned int getTimeLimitSeconds() const { return timeLimitSeconds; } - virtual void setStartAndEndSuperstep(unsigned start_, unsigned end_) { start_superstep = start_; end_superstep = end_; } + + virtual void setStartAndEndSuperstep(unsigned start_, unsigned end_) { + start_superstep = start_; + end_superstep = end_; + } virtual ~CoptPartialScheduler() = default; }; -template -RETURN_STATUS CoptPartialScheduler::improveSchedule(BspScheduleCS& schedule) { - +template +RETURN_STATUS CoptPartialScheduler::improveSchedule(BspScheduleCS &schedule) { Envr env; Model model = env.CreateModel("bsp_schedule_partial"); @@ -104,86 +105,92 @@ RETURN_STATUS CoptPartialScheduler::improveSchedule(BspScheduleCS -void CoptPartialScheduler::setInitialSolution(const BspScheduleCS& schedule, Model &model){ - - const Graph_t& DAG = schedule.getInstance().getComputationalDag(); - const unsigned& num_processors = schedule.getInstance().numberOfProcessors(); +template +void CoptPartialScheduler::setInitialSolution(const BspScheduleCS &schedule, Model &model) { + const Graph_t &DAG = schedule.getInstance().getComputationalDag(); + const unsigned &num_processors = schedule.getInstance().numberOfProcessors(); const auto &cs = schedule.getCommunicationSchedule(); - for (const vertex_idx_t &node : DAG.vertices()) - { - if(node_local_ID.find(node) == node_local_ID.end()) + for (const vertex_idx_t &node : DAG.vertices()) { + if (node_local_ID.find(node) == node_local_ID.end()) { continue; - for (unsigned proc = 0; proc < num_processors; proc++) - for(unsigned step = 0; step < max_number_supersteps; ++step) - { - if (schedule.assignedProcessor(node) == proc && schedule.assignedSuperstep(node) == start_superstep + step) + } + for (unsigned proc = 0; proc < num_processors; proc++) { + for (unsigned step = 0; step < max_number_supersteps; ++step) { + if (schedule.assignedProcessor(node) == proc && schedule.assignedSuperstep(node) == start_superstep + step) { model.SetMipStart(node_to_processor_superstep_var[node_local_ID[node]][proc][static_cast(step)], 1); - else + } else { model.SetMipStart(node_to_processor_superstep_var[node_local_ID[node]][proc][static_cast(step)], 0); + } } + } } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) + for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { model.SetMipStart(keep_fixed_comm_step[static_cast(index)], 1); + } for (const auto &node : DAG.vertices()) { - - if(node_local_ID.find(node) == node_local_ID.end()) + if (node_local_ID.find(node) == node_local_ID.end()) { continue; + } for (unsigned p1 = 0; p1 < num_processors; p1++) { - for (unsigned p2 = 0; p2 < num_processors; p2++) { - - if(p1 == p2) + if (p1 == p2) { continue; + } for (unsigned step = 0; step < max_number_supersteps && step <= end_superstep - start_superstep; step++) { - const auto &key = std::make_tuple(node, p1, p2); - if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step) - model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node_local_ID[node])], 1); - else - model.SetMipStart(comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node_local_ID[node])], 0); + if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step) { + model.SetMipStart( + comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node_local_ID[node])], 1); + } else { + model.SetMipStart( + comm_processor_to_processor_superstep_node_var[p1][p2][step][static_cast(node_local_ID[node])], 0); + } } } } } for (const auto &source : DAG.vertices()) { - - if(source_local_ID.find(source) == source_local_ID.end()) + if (source_local_ID.find(source) == source_local_ID.end()) { continue; + } - for (unsigned proc = 0; proc < num_processors; proc++) - { - if(proc == schedule.assignedProcessor(source)) + for (unsigned proc = 0; proc < num_processors; proc++) { + if (proc == schedule.assignedProcessor(source)) { continue; + } for (unsigned step = 0; step < max_number_supersteps + 1 && step <= end_superstep - start_superstep + 1; step++) { - const auto &key = std::make_tuple(source, schedule.assignedProcessor(source), proc); - if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step - 1) - model.SetMipStart(comm_to_processor_superstep_source_var[proc][step][static_cast(source_local_ID[source])], 1); - else if(step > 0) - model.SetMipStart(comm_to_processor_superstep_source_var[proc][step][static_cast(source_local_ID[source])], 0); + if (cs.find(key) != cs.end() && cs.at(key) == start_superstep + step - 1) { + model.SetMipStart( + comm_to_processor_superstep_source_var[proc][step][static_cast(source_local_ID[source])], 1); + } else if (step > 0) { + model.SetMipStart( + comm_to_processor_superstep_source_var[proc][step][static_cast(source_local_ID[source])], 0); + } } } } @@ -192,33 +199,33 @@ void CoptPartialScheduler::setInitialSolution(const BspScheduleCS -void CoptPartialScheduler::updateSchedule(BspScheduleCS& schedule) const { - +template +void CoptPartialScheduler::updateSchedule(BspScheduleCS &schedule) const { unsigned number_of_supersteps = 0; - while (number_of_supersteps < max_number_supersteps && - superstep_used_var[static_cast(number_of_supersteps)].Get(COPT_DBLINFO_VALUE) >= .99) { + while (number_of_supersteps < max_number_supersteps + && superstep_used_var[static_cast(number_of_supersteps)].Get(COPT_DBLINFO_VALUE) >= .99) { number_of_supersteps++; } const int offset = static_cast(number_of_supersteps) - static_cast(end_superstep - start_superstep + 1); - for (vertex_idx_t node = 0; node < schedule.getInstance().numberOfVertices(); node++) - if(schedule.assignedSuperstep(node) > end_superstep) + for (vertex_idx_t node = 0; node < schedule.getInstance().numberOfVertices(); node++) { + if (schedule.assignedSuperstep(node) > end_superstep) { schedule.setAssignedSuperstep(node, static_cast(static_cast(schedule.assignedSuperstep(node)) + offset)); + } + } for (vertex_idx_t node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - - if(node_local_ID.find(node) == node_local_ID.end()) + if (node_local_ID.find(node) == node_local_ID.end()) { continue; + } for (unsigned processor = 0; processor < schedule.getInstance().numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - - if (node_to_processor_superstep_var[node_local_ID.at(node)][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) - { + if (node_to_processor_superstep_var[node_local_ID.at(node)][processor][static_cast(step)].Get( + COPT_DBLINFO_VALUE) + >= .99) { schedule.setAssignedSuperstep(node, start_superstep + step); schedule.setAssignedProcessor(node, processor); } @@ -226,36 +233,38 @@ void CoptPartialScheduler::updateSchedule(BspScheduleCS& sched } } - std::map& commSchedule = schedule.getCommunicationSchedule(); + std::map &commSchedule = schedule.getCommunicationSchedule(); std::vector toErase; - for (const auto &[key, val] : schedule.getCommunicationSchedule()) - { - if (val > end_superstep) + for (const auto &[key, val] : schedule.getCommunicationSchedule()) { + if (val > end_superstep) { commSchedule[key] = static_cast(static_cast(val) + offset); - else if (static_cast(val) >= static_cast(start_superstep) - 1) + } else if (static_cast(val) >= static_cast(start_superstep) - 1) { toErase.push_back(key); + } } - for(const KeyTriple& key : toErase) + for (const KeyTriple &key : toErase) { commSchedule.erase(key); + } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) - { - const auto& entry = fixed_comm_steps[index]; - if (keep_fixed_comm_step[static_cast(index)].Get(COPT_DBLINFO_VALUE) >= .99 && - std::get<3>(entry) < start_superstep + number_of_supersteps) + for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { + const auto &entry = fixed_comm_steps[index]; + if (keep_fixed_comm_step[static_cast(index)].Get(COPT_DBLINFO_VALUE) >= .99 + && std::get<3>(entry) < start_superstep + number_of_supersteps) { commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = std::get<3>(entry); - else - commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = start_superstep-1; + } else { + commSchedule[std::make_tuple(std::get<0>(entry), std::get<1>(entry), std::get<2>(entry))] = start_superstep - 1; + } } for (vertex_idx_t node = 0; node < node_global_ID.size(); node++) { - for (unsigned int p_from = 0; p_from < schedule.getInstance().numberOfProcessors(); p_from++) { for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) { if (p_from != p_to) { for (unsigned int step = 0; step < max_number_supersteps; step++) { - if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get(COPT_DBLINFO_VALUE) >= .99) { + if (comm_processor_to_processor_superstep_node_var[p_from][p_to][step][static_cast(node)].Get( + COPT_DBLINFO_VALUE) + >= .99) { commSchedule[std::make_tuple(node_global_ID[node], p_from, p_to)] = start_superstep + step; break; } @@ -266,13 +275,14 @@ void CoptPartialScheduler::updateSchedule(BspScheduleCS& sched } for (vertex_idx_t source = 0; source < source_global_ID.size(); source++) { - for (unsigned int p_to = 0; p_to < schedule.getInstance().numberOfProcessors(); p_to++) { if (source_present_before.find(std::make_pair(source, p_to)) == source_present_before.end()) { for (unsigned int step = 0; step < max_number_supersteps + 1; step++) { - if (comm_to_processor_superstep_source_var[p_to][step][static_cast(source)].Get(COPT_DBLINFO_VALUE) >= .99) { - commSchedule[std::make_tuple(source_global_ID[source], schedule.assignedProcessor(source_global_ID[source]), p_to)] = - start_superstep - 1 + step; + if (comm_to_processor_superstep_source_var[p_to][step][static_cast(source)].Get(COPT_DBLINFO_VALUE) + >= .99) { + commSchedule[std::make_tuple( + source_global_ID[source], schedule.assignedProcessor(source_global_ID[source]), p_to)] + = start_superstep - 1 + step; break; } } @@ -282,13 +292,10 @@ void CoptPartialScheduler::updateSchedule(BspScheduleCS& sched schedule.cleanCommSchedule(); schedule.shrinkByMergingSupersteps(); - }; - -template -void CoptPartialScheduler::setupVariablesConstraintsObjective(const BspScheduleCS& schedule, Model& model) { - +template +void CoptPartialScheduler::setupVariablesConstraintsObjective(const BspScheduleCS &schedule, Model &model) { const vertex_idx_t num_vertices = static_cast>(node_global_ID.size()); const vertex_idx_t num_sources = static_cast>(source_global_ID.size()); const unsigned num_processors = schedule.getInstance().numberOfProcessors(); @@ -298,53 +305,53 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp */ // variables indicating if superstep is used at all superstep_used_var = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "superstep_used"); - VarArray superstep_has_comm = model.AddVars(static_cast(max_number_supersteps+1), COPT_BINARY, "superstep_has_comm"); + VarArray superstep_has_comm = model.AddVars(static_cast(max_number_supersteps + 1), COPT_BINARY, "superstep_has_comm"); VarArray has_comm_at_end = model.AddVars(1, COPT_BINARY, "has_comm_at_end"); // variables for assigments of nodes to processor and superstep node_to_processor_superstep_var = std::vector>(num_vertices, std::vector(num_processors)); for (unsigned int node = 0; node < num_vertices; node++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { - - node_to_processor_superstep_var[node][processor] = - model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); + node_to_processor_superstep_var[node][processor] + = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); } } // communicate node from p1 to p2 at superstep - comm_processor_to_processor_superstep_node_var = std::vector>>(num_processors, - std::vector>(num_processors, std::vector(max_number_supersteps))); + comm_processor_to_processor_superstep_node_var = std::vector>>( + num_processors, std::vector>(num_processors, std::vector(max_number_supersteps))); for (unsigned int p1 = 0; p1 < num_processors; p1++) { for (unsigned int p2 = 0; p2 < num_processors; p2++) { for (unsigned int step = 0; step < max_number_supersteps; step++) { - - comm_processor_to_processor_superstep_node_var[p1][p2][step] = - model.AddVars(static_cast(num_vertices), COPT_BINARY, "comm_processor_to_processor_superstep_node"); + comm_processor_to_processor_superstep_node_var[p1][p2][step] + = model.AddVars(static_cast(num_vertices), COPT_BINARY, "comm_processor_to_processor_superstep_node"); } } } // communicate nodes in supersteps smaller than start_superstep - comm_to_processor_superstep_source_var = std::vector>(num_processors, std::vector(max_number_supersteps + 1)); - std::vector> present_on_processor_superstep_source_var = std::vector>(num_processors, std::vector(max_number_supersteps)); + comm_to_processor_superstep_source_var + = std::vector>(num_processors, std::vector(max_number_supersteps + 1)); + std::vector> present_on_processor_superstep_source_var + = std::vector>(num_processors, std::vector(max_number_supersteps)); for (unsigned int proc = 0; proc < num_processors; proc++) { for (unsigned int step = 0; step < max_number_supersteps + 1; step++) { + comm_to_processor_superstep_source_var[proc][step] + = model.AddVars(static_cast(num_sources), COPT_BINARY, "comm_to_processor_superstep_source"); - comm_to_processor_superstep_source_var[proc][step] = - model.AddVars(static_cast(num_sources), COPT_BINARY, "comm_to_processor_superstep_source"); - - if(step < max_number_supersteps) - present_on_processor_superstep_source_var[proc][step] = - model.AddVars(static_cast(num_sources), COPT_BINARY, "present_on_processor_superstep_source"); + if (step < max_number_supersteps) { + present_on_processor_superstep_source_var[proc][step] + = model.AddVars(static_cast(num_sources), COPT_BINARY, "present_on_processor_superstep_source"); + } } } - VarArray max_comm_superstep_var = model.AddVars(static_cast(max_number_supersteps + 1), COPT_INTEGER, "max_comm_superstep"); + VarArray max_comm_superstep_var + = model.AddVars(static_cast(max_number_supersteps + 1), COPT_INTEGER, "max_comm_superstep"); VarArray max_work_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_INTEGER, "max_work_superstep"); @@ -363,63 +370,75 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp // check whether superstep is used at all (work or comm), and whether superstep has any communication at all unsigned large_constant_work = static_cast(num_vertices) * num_processors; - unsigned large_constant_comm = static_cast(num_vertices+num_sources) * num_processors * num_processors + static_cast(fixed_comm_steps.size()); + unsigned large_constant_comm = static_cast(num_vertices + num_sources) * num_processors * num_processors + + static_cast(fixed_comm_steps.size()); for (unsigned int step = 0; step < max_number_supersteps; step++) { - Expr expr_work, expr_comm; for (vertex_idx_t node = 0; node < num_vertices; node++) { - for (unsigned int processor = 0; processor < num_processors; processor++) { expr_work += node_to_processor_superstep_var[node][processor][static_cast(step)]; - for (unsigned int p_other = 0; p_other < num_processors; p_other++) - if(processor != p_other) - expr_comm += comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast(node)]; + for (unsigned int p_other = 0; p_other < num_processors; p_other++) { + if (processor != p_other) { + expr_comm + += comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast(node)]; + } + } + } + } + for (vertex_idx_t source = 0; source < num_sources; source++) { + for (unsigned int processor = 0; processor < num_processors; processor++) { + if (source_present_before.find(std::make_pair(source, processor)) == source_present_before.end()) { + expr_comm += comm_to_processor_superstep_source_var[processor][step + 1][static_cast(source)]; + } } } - for (vertex_idx_t source = 0; source < num_sources; source++) - for (unsigned int processor = 0; processor < num_processors; processor++) - if(source_present_before.find(std::make_pair(source, processor)) == source_present_before.end()) - expr_comm += comm_to_processor_superstep_source_var[processor][step+1][static_cast(source)]; - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) - if(std::get<3>(fixed_comm_steps[index]) == start_superstep + step) + for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { + if (std::get<3>(fixed_comm_steps[index]) == start_superstep + step) { expr_comm += keep_fixed_comm_step[static_cast(index)]; + } + } - model.AddConstr(expr_comm <= large_constant_comm * superstep_has_comm[static_cast(step+1)]); + model.AddConstr(expr_comm <= large_constant_comm * superstep_has_comm[static_cast(step + 1)]); model.AddConstr(expr_work <= large_constant_work * superstep_used_var[static_cast(step)]); - model.AddConstr(superstep_has_comm[static_cast(step+1)] <= superstep_used_var[static_cast(step)]); + model.AddConstr(superstep_has_comm[static_cast(step + 1)] <= superstep_used_var[static_cast(step)]); } // check communication usage in edge case: comm phase before the segment - if(has_fixed_comm_in_preceding_step) + if (has_fixed_comm_in_preceding_step) { model.AddConstr(superstep_has_comm[0] == 1); - else { + } else { Expr expr_comm_0; - for (vertex_idx_t source = 0; source < num_sources; source++) - for (unsigned int processor = 0; processor < num_processors; processor++) - if(source_present_before.find(std::make_pair(source, processor)) == source_present_before.end()) + for (vertex_idx_t source = 0; source < num_sources; source++) { + for (unsigned int processor = 0; processor < num_processors; processor++) { + if (source_present_before.find(std::make_pair(source, processor)) == source_present_before.end()) { expr_comm_0 += comm_to_processor_superstep_source_var[processor][0][static_cast(source)]; - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) + } + } + } + for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { expr_comm_0 += 1 - keep_fixed_comm_step[static_cast(index)]; - model.AddConstr(expr_comm_0 <= (static_cast(num_sources) * num_processors + static_cast(fixed_comm_steps.size())) * superstep_has_comm[0]); + } + model.AddConstr(expr_comm_0 + <= (static_cast(num_sources) * num_processors + static_cast(fixed_comm_steps.size())) + * superstep_has_comm[0]); } // check if there is any communication at the end of the subschedule - for (unsigned int step = 0; step < max_number_supersteps - 1; step++) - { - model.AddConstr(superstep_used_var[static_cast(step)] - superstep_used_var[static_cast(step + 1)] + - superstep_has_comm[static_cast(step+1)] - 1 <= has_comm_at_end[0]); + for (unsigned int step = 0; step < max_number_supersteps - 1; step++) { + model.AddConstr(superstep_used_var[static_cast(step)] - superstep_used_var[static_cast(step + 1)] + + superstep_has_comm[static_cast(step + 1)] - 1 + <= has_comm_at_end[0]); } - model.AddConstr(superstep_used_var[static_cast(max_number_supersteps - 1)] + - superstep_has_comm[static_cast(max_number_supersteps)] - 1 <= has_comm_at_end[0]); + model.AddConstr(superstep_used_var[static_cast(max_number_supersteps - 1)] + + superstep_has_comm[static_cast(max_number_supersteps)] - 1 + <= has_comm_at_end[0]); // nodes are assigend for (vertex_idx_t node = 0; node < num_vertices; node++) { - Expr expr; for (unsigned int processor = 0; processor < num_processors; processor++) { - for (unsigned int step = 0; step < max_number_supersteps; step++) { expr += node_to_processor_superstep_var[node][processor][static_cast(step)]; } @@ -432,26 +451,24 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp for (vertex_idx_t node = 0; node < num_vertices; node++) { for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < num_processors; processor++) { - Expr expr; unsigned num_terms = 0; - for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node_global_ID[node])) - { - if(node_local_ID.find(pred) != node_local_ID.end()) - { + for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node_global_ID[node])) { + if (node_local_ID.find(pred) != node_local_ID.end()) { ++num_terms; - expr += comm_processor_to_processor_superstep_node_var[processor][processor][step][static_cast(node_local_ID[pred])]; - } - else if(source_local_ID.find(pred) != source_local_ID.end() && - source_present_before.find(std::make_pair(source_local_ID[pred], processor)) == source_present_before.end()) - { + expr += comm_processor_to_processor_superstep_node_var[processor][processor][step] + [static_cast(node_local_ID[pred])]; + } else if (source_local_ID.find(pred) != source_local_ID.end() + && source_present_before.find(std::make_pair(source_local_ID[pred], processor)) + == source_present_before.end()) { ++num_terms; expr += present_on_processor_superstep_source_var[processor][step][static_cast(source_local_ID[pred])]; } } - if(num_terms > 0) + if (num_terms > 0) { model.AddConstr(expr >= num_terms * node_to_processor_superstep_var[node][processor][static_cast(step)]); + } } } } @@ -461,12 +478,11 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < num_processors; processor++) { for (vertex_idx_t node = 0; node < num_vertices; node++) { - Expr expr1, expr2; if (step > 0) { - for (unsigned int p_from = 0; p_from < num_processors; p_from++) { - expr1 += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast(node)]; + expr1 + += comm_processor_to_processor_superstep_node_var[p_from][processor][step - 1][static_cast(node)]; } } @@ -486,13 +502,14 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < num_processors; processor++) { for (vertex_idx_t source_node = 0; source_node < num_sources; source_node++) { - - if(source_present_before.find(std::make_pair(source_node, processor)) != source_present_before.end()) + if (source_present_before.find(std::make_pair(source_node, processor)) != source_present_before.end()) { continue; + } Expr expr1 = comm_to_processor_superstep_source_var[processor][step][static_cast(source_node)]; - if (step > 0) - expr1 += present_on_processor_superstep_source_var[processor][step-1][static_cast(source_node)]; + if (step > 0) { + expr1 += present_on_processor_superstep_source_var[processor][step - 1][static_cast(source_node)]; + } Expr expr2 = present_on_processor_superstep_source_var[processor][step][static_cast(source_node)]; @@ -502,30 +519,31 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp } // boundary conditions at the end - for(const std::pair, unsigned> node_and_proc : node_needed_after_on_proc) - { + for (const std::pair, unsigned> node_and_proc : node_needed_after_on_proc) { Expr expr; - for (unsigned int p_from = 0; p_from < num_processors; p_from++) - expr += comm_processor_to_processor_superstep_node_var[p_from][node_and_proc.second][max_number_supersteps - 1][static_cast(node_and_proc.first)]; + for (unsigned int p_from = 0; p_from < num_processors; p_from++) { + expr += comm_processor_to_processor_superstep_node_var[p_from][node_and_proc.second][max_number_supersteps - 1] + [static_cast(node_and_proc.first)]; + } model.AddConstr(expr >= 1); } - for(const std::pair, unsigned> source_and_proc : source_needed_after_on_proc) - { - Expr expr = present_on_processor_superstep_source_var[source_and_proc.second][max_number_supersteps - 1][static_cast(source_and_proc.first)]; - expr += comm_to_processor_superstep_source_var[source_and_proc.second][max_number_supersteps][static_cast(source_and_proc.first)]; + for (const std::pair, unsigned> source_and_proc : source_needed_after_on_proc) { + Expr expr = present_on_processor_superstep_source_var[source_and_proc.second][max_number_supersteps - 1] + [static_cast(source_and_proc.first)]; + expr += comm_to_processor_superstep_source_var[source_and_proc.second][max_number_supersteps] + [static_cast(source_and_proc.first)]; model.AddConstr(expr >= 1); } // cost calculation - work for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < num_processors; processor++) { - Expr expr; for (unsigned int node = 0; node < num_vertices; node++) { - expr += schedule.getInstance().getComputationalDag().vertex_work_weight(node_global_ID[node]) * - node_to_processor_superstep_var[node][processor][static_cast(step)]; + expr += schedule.getInstance().getComputationalDag().vertex_work_weight(node_global_ID[node]) + * node_to_processor_superstep_var[node][processor][static_cast(step)]; } model.AddConstr(max_work_superstep_var[static_cast(step)] >= expr); @@ -535,51 +553,49 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp // cost calculation - comm for (unsigned int step = 0; step < max_number_supersteps; step++) { for (unsigned int processor = 0; processor < num_processors; processor++) { - Expr expr1, expr2; for (vertex_idx_t node = 0; node < num_vertices; node++) { for (unsigned int p_other = 0; p_other < num_processors; p_other++) { if (processor != p_other) { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node]) * - schedule.getInstance().sendCosts(processor, p_other) * - comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast(node)]; - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node]) * - schedule.getInstance().sendCosts(p_other, processor) * - comm_processor_to_processor_superstep_node_var[p_other][processor][step][static_cast(node)]; + expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node]) + * schedule.getInstance().sendCosts(processor, p_other) + * comm_processor_to_processor_superstep_node_var[processor][p_other][step][static_cast(node)]; + expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(node_global_ID[node]) + * schedule.getInstance().sendCosts(p_other, processor) + * comm_processor_to_processor_superstep_node_var[p_other][processor][step][static_cast(node)]; } } } for (vertex_idx_t source = 0; source < num_sources; source++) { const unsigned origin_proc = schedule.assignedProcessor(source_global_ID[source]); - if(origin_proc == processor) - { - for (unsigned int p_other = 0; p_other < num_processors; p_other++) - { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) * - schedule.getInstance().sendCosts(processor, p_other) * - comm_to_processor_superstep_source_var[p_other][step + 1][static_cast(source)]; + if (origin_proc == processor) { + for (unsigned int p_other = 0; p_other < num_processors; p_other++) { + expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) + * schedule.getInstance().sendCosts(processor, p_other) + * comm_to_processor_superstep_source_var[p_other][step + 1][static_cast(source)]; } } - expr2 += - schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) * - schedule.getInstance().sendCosts(origin_proc, processor) * - comm_to_processor_superstep_source_var[processor][step + 1][static_cast(source)]; + expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) + * schedule.getInstance().sendCosts(origin_proc, processor) + * comm_to_processor_superstep_source_var[processor][step + 1][static_cast(source)]; } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) - { - const auto& entry = fixed_comm_steps[index]; - if(std::get<3>(entry) != start_superstep + step) + for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { + const auto &entry = fixed_comm_steps[index]; + if (std::get<3>(entry) != start_superstep + step) { continue; - if(std::get<1>(entry) == processor) - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) * - schedule.getInstance().sendCosts(processor, std::get<2>(entry)) * - keep_fixed_comm_step[static_cast(index)]; - if(std::get<2>(entry) == processor) - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) * - schedule.getInstance().sendCosts(std::get<1>(entry), processor) * - keep_fixed_comm_step[static_cast(index)]; + } + if (std::get<1>(entry) == processor) { + expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) + * schedule.getInstance().sendCosts(processor, std::get<2>(entry)) + * keep_fixed_comm_step[static_cast(index)]; + } + if (std::get<2>(entry) == processor) { + expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) + * schedule.getInstance().sendCosts(std::get<1>(entry), processor) + * keep_fixed_comm_step[static_cast(index)]; + } } model.AddConstr(max_comm_superstep_var[static_cast(step + 1)] >= expr1); @@ -589,36 +605,33 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp // cost calculation - first comm phase handled separately for (unsigned int processor = 0; processor < num_processors; processor++) { - Expr expr1, expr2; for (vertex_idx_t source = 0; source < num_sources; source++) { const unsigned origin_proc = schedule.assignedProcessor(source_global_ID[source]); - if(origin_proc == processor) - { - for (unsigned int p_other = 0; p_other < num_processors; p_other++) - { - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) * - schedule.getInstance().sendCosts(processor, p_other) * - comm_to_processor_superstep_source_var[p_other][0][static_cast(source)]; + if (origin_proc == processor) { + for (unsigned int p_other = 0; p_other < num_processors; p_other++) { + expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) + * schedule.getInstance().sendCosts(processor, p_other) + * comm_to_processor_superstep_source_var[p_other][0][static_cast(source)]; } } - expr2 += - schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) * - schedule.getInstance().sendCosts(origin_proc, processor) * - comm_to_processor_superstep_source_var[processor][0][static_cast(source)]; + expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(source_global_ID[source]) + * schedule.getInstance().sendCosts(origin_proc, processor) + * comm_to_processor_superstep_source_var[processor][0][static_cast(source)]; } - for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) - { - const auto& entry = fixed_comm_steps[index]; - if(std::get<1>(entry) == processor) - expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) * - schedule.getInstance().sendCosts(processor, std::get<2>(entry)) * - (1-keep_fixed_comm_step[static_cast(index)]); - if(std::get<2>(entry) == processor) - expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) * - schedule.getInstance().sendCosts(std::get<1>(entry), processor) * - (1-keep_fixed_comm_step[static_cast(index)]); + for (unsigned index = 0; index < fixed_comm_steps.size(); ++index) { + const auto &entry = fixed_comm_steps[index]; + if (std::get<1>(entry) == processor) { + expr1 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) + * schedule.getInstance().sendCosts(processor, std::get<2>(entry)) + * (1 - keep_fixed_comm_step[static_cast(index)]); + } + if (std::get<2>(entry) == processor) { + expr2 += schedule.getInstance().getComputationalDag().vertex_comm_weight(std::get<0>(entry)) + * schedule.getInstance().sendCosts(std::get<1>(entry), processor) + * (1 - keep_fixed_comm_step[static_cast(index)]); + } } model.AddConstr(max_comm_superstep_var[0] >= expr1); @@ -631,8 +644,9 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp Expr expr; for (unsigned int step = 0; step < max_number_supersteps; step++) { - expr += max_work_superstep_var[static_cast(step)] + schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast(step + 1)] + - schedule.getInstance().synchronisationCosts() * superstep_used_var[static_cast(step)]; + expr += max_work_superstep_var[static_cast(step)] + + schedule.getInstance().communicationCosts() * max_comm_superstep_var[static_cast(step + 1)] + + schedule.getInstance().synchronisationCosts() * superstep_used_var[static_cast(step)]; } expr += schedule.getInstance().communicationCosts() * max_comm_superstep_var[0]; @@ -642,9 +656,8 @@ void CoptPartialScheduler::setupVariablesConstraintsObjective(const Bsp model.SetObjective(expr - schedule.getInstance().synchronisationCosts(), COPT_MINIMIZE); }; -template -void CoptPartialScheduler::setupVertexMaps(const BspScheduleCS& schedule) { - +template +void CoptPartialScheduler::setupVertexMaps(const BspScheduleCS &schedule) { node_local_ID.clear(); node_global_ID.clear(); source_local_ID.clear(); @@ -655,28 +668,23 @@ void CoptPartialScheduler::setupVertexMaps(const BspScheduleCS fixed_comm_steps.clear(); source_present_before.clear(); - std::vector > first_at = schedule.getFirstPresence(); + std::vector> first_at = schedule.getFirstPresence(); max_number_supersteps = end_superstep - start_superstep + 3; for (unsigned node = 0; node < schedule.getInstance().numberOfVertices(); node++) { - if (schedule.assignedSuperstep(node) >= start_superstep && schedule.assignedSuperstep(node) <= end_superstep) { - node_local_ID[node] = static_cast>(node_global_ID.size()); node_global_ID.push_back(node); for (const auto &pred : schedule.getInstance().getComputationalDag().parents(node)) { - if (schedule.assignedSuperstep(pred) < start_superstep) { - if (source_local_ID.find(pred) == source_local_ID.end()) { source_local_ID[pred] = static_cast>(source_global_ID.size()); source_global_ID.push_back(pred); } } else if (schedule.assignedSuperstep(pred) > end_superstep) { - throw std::invalid_argument("Initial Schedule might be invalid?!"); } } @@ -684,77 +692,83 @@ void CoptPartialScheduler::setupVertexMaps(const BspScheduleCS } // find where the sources are already present before the segment - for(const auto& source_and_ID : source_local_ID) - { + for (const auto &source_and_ID : source_local_ID) { vertex_idx_t source = source_and_ID.first; - for(unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) - if(first_at[source][proc] < start_superstep) + for (unsigned proc = 0; proc < schedule.getInstance().numberOfProcessors(); ++proc) { + if (first_at[source][proc] < start_superstep) { source_present_before.emplace(std::make_pair(source_and_ID.second, proc)); + } + } } // collect values that are needed by the end of the segment - for(const auto& source_and_ID : source_local_ID) - { + for (const auto &source_and_ID : source_local_ID) { vertex_idx_t source = source_and_ID.first; std::set procs_needing_this; - for (const auto &succ : schedule.getInstance().getComputationalDag().children(source)) - if(schedule.assignedProcessor(succ) != schedule.assignedProcessor(source) && - schedule.assignedSuperstep(succ) > end_superstep) + for (const auto &succ : schedule.getInstance().getComputationalDag().children(source)) { + if (schedule.assignedProcessor(succ) != schedule.assignedProcessor(source) + && schedule.assignedSuperstep(succ) > end_superstep) { procs_needing_this.insert(schedule.assignedProcessor(succ)); + } + } - for(unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1) - for(unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2) - { - if(proc1 == proc2) + for (unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1) { + for (unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2) { + if (proc1 == proc2) { continue; + } auto itr = schedule.getCommunicationSchedule().find(std::make_tuple(source, proc1, proc2)); - if (itr != schedule.getCommunicationSchedule().end() && itr->second > end_superstep) + if (itr != schedule.getCommunicationSchedule().end() && itr->second > end_superstep) { procs_needing_this.insert(schedule.assignedProcessor(proc1)); + } } + } - for(unsigned proc : procs_needing_this) - if(first_at[source][proc] >= start_superstep && first_at[source][proc] <= end_superstep + 1) + for (unsigned proc : procs_needing_this) { + if (first_at[source][proc] >= start_superstep && first_at[source][proc] <= end_superstep + 1) { source_needed_after_on_proc.emplace_back(source_and_ID.second, proc); + } + } } - for(const auto& node_and_ID : node_local_ID) - { + for (const auto &node_and_ID : node_local_ID) { vertex_idx_t node = node_and_ID.first; std::set procs_needing_this; - for (const auto &succ : schedule.getInstance().getComputationalDag().children(node)) - if(schedule.assignedSuperstep(succ) > end_superstep) + for (const auto &succ : schedule.getInstance().getComputationalDag().children(node)) { + if (schedule.assignedSuperstep(succ) > end_superstep) { procs_needing_this.insert(schedule.assignedProcessor(succ)); + } + } - for(unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1) - for(unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2) - { + for (unsigned proc1 = 0; proc1 < schedule.getInstance().numberOfProcessors(); ++proc1) { + for (unsigned proc2 = 0; proc2 < schedule.getInstance().numberOfProcessors(); ++proc2) { auto itr = schedule.getCommunicationSchedule().find(std::make_tuple(node, proc1, proc2)); - if (itr != schedule.getCommunicationSchedule().end() && proc1 != proc2 && itr->second > end_superstep) + if (itr != schedule.getCommunicationSchedule().end() && proc1 != proc2 && itr->second > end_superstep) { procs_needing_this.insert(schedule.assignedProcessor(proc1)); + } } + } - for(unsigned proc : procs_needing_this) - if(first_at[node][proc] <= end_superstep + 1) + for (unsigned proc : procs_needing_this) { + if (first_at[node][proc] <= end_superstep + 1) { node_needed_after_on_proc.emplace_back(node_and_ID.second, proc); + } + } } - // comm steps that just happen to be in this interval, but not connected to the nodes within has_fixed_comm_in_preceding_step = false; - for (const auto &[key, val] : schedule.getCommunicationSchedule()) - { + for (const auto &[key, val] : schedule.getCommunicationSchedule()) { vertex_idx_t source = std::get<0>(key); - if(source_local_ID.find(source) == source_local_ID.end() && - schedule.assignedSuperstep(source) < start_superstep && - val >= start_superstep - 1 && val <= end_superstep) - { - fixed_comm_steps.emplace_back(std::get<0>(key), std::get<1>(key), std::get<2>(key), val); - if(val == start_superstep - 1) - has_fixed_comm_in_preceding_step = true; + if (source_local_ID.find(source) == source_local_ID.end() && schedule.assignedSuperstep(source) < start_superstep + && val >= start_superstep - 1 && val <= end_superstep) { + fixed_comm_steps.emplace_back(std::get<0>(key), std::get<1>(key), std::get<2>(key), val); + if (val == start_superstep - 1) { + has_fixed_comm_in_preceding_step = true; } + } } - }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp index c051c8dc..150c6f73 100644 --- a/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp +++ b/include/osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp @@ -28,9 +28,8 @@ limitations under the License. namespace osp { -template +template class TotalCommunicationScheduler : public Scheduler { - private: Envr env; Model model; @@ -45,7 +44,6 @@ class TotalCommunicationScheduler : public Scheduler { bool use_lk_heuristic_callback; class WriteSolutionCallback : public CallbackBase { - private: unsigned counter; unsigned max_number_solution; @@ -54,8 +52,13 @@ class TotalCommunicationScheduler : public Scheduler { public: WriteSolutionCallback() - : counter(0), max_number_solution(100), best_obj(COPT_INFINITY), write_solutions_path_cb(""), - solution_file_prefix_cb(""), instance_ptr(0), node_to_processor_superstep_var_ptr() {} + : counter(0), + max_number_solution(100), + best_obj(COPT_INFINITY), + write_solutions_path_cb(""), + solution_file_prefix_cb(""), + instance_ptr(0), + node_to_processor_superstep_var_ptr() {} std::string write_solutions_path_cb; std::string solution_file_prefix_cb; @@ -64,44 +67,32 @@ class TotalCommunicationScheduler : public Scheduler { std::vector> *node_to_processor_superstep_var_ptr; void callback() override { - - if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && - GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { - + if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { try { - if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { - best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); auto sched = constructBspScheduleFromCallback(); DotFileWriter sched_writer; - sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + - "_" + std::to_string(counter) + "_schedule.dot", + sched_writer.write_schedule(write_solutions_path_cb + "intmed_sol_" + solution_file_prefix_cb + "_" + + std::to_string(counter) + "_schedule.dot", sched); counter++; } - } catch (const std::exception &e) { - } + } catch (const std::exception &e) {} } } BspSchedule constructBspScheduleFromCallback() { - BspSchedule schedule(*instance_ptr); for (const auto &node : instance_ptr->vertices()) { - for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - - for (unsigned step = 0; - step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) { - + for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); + step++) { assert(size < std::numeric_limits::max()); - if (GetSolution( - (*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= - .99) { + if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= .99) { schedule.setAssignedProcessor(node, processor); schedule.setAssignedSuperstep(node, step); } @@ -114,7 +105,6 @@ class TotalCommunicationScheduler : public Scheduler { }; class LKHeuristicCallback : public CallbackBase { - private: kl_total_comm lk_heuristic; @@ -122,8 +112,14 @@ class TotalCommunicationScheduler : public Scheduler { public: LKHeuristicCallback() - : lk_heuristic(), best_obj(COPT_INFINITY), num_step(0), instance_ptr(0), max_work_superstep_var_ptr(0), - superstep_used_var_ptr(0), node_to_processor_superstep_var_ptr(0), edge_vars_ptr(0) {} + : lk_heuristic(), + best_obj(COPT_INFINITY), + num_step(0), + instance_ptr(0), + max_work_superstep_var_ptr(0), + superstep_used_var_ptr(0), + node_to_processor_superstep_var_ptr(0), + edge_vars_ptr(0) {} unsigned num_step; const BspInstance *instance_ptr; @@ -134,16 +130,12 @@ class TotalCommunicationScheduler : public Scheduler { std::vector> *edge_vars_ptr; void callback() override { - if (Where() == COPT_CBCONTEXT_MIPSOL && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { - try { - if (0.0 < GetDblInfo(COPT_CBINFO_BESTBND) && 1.0 < GetDblInfo(COPT_CBINFO_BESTOBJ) && // GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && - 0.1 < (GetDblInfo(COPT_CBINFO_BESTOBJ) - GetDblInfo(COPT_CBINFO_BESTBND)) / - GetDblInfo(COPT_CBINFO_BESTOBJ)) { - + 0.1 < (GetDblInfo(COPT_CBINFO_BESTOBJ) - GetDblInfo(COPT_CBINFO_BESTBND)) + / GetDblInfo(COPT_CBINFO_BESTOBJ)) { // best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); auto sched = constructBspScheduleFromCallback(); @@ -157,25 +149,19 @@ class TotalCommunicationScheduler : public Scheduler { } } - } catch (const std::exception &e) { - } + } catch (const std::exception &e) {} } } BspSchedule constructBspScheduleFromCallback() { - BspSchedule schedule(*instance_ptr); for (const auto &node : instance_ptr->vertices()) { - for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - - for (unsigned step = 0; - step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) { + for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); + step++) { assert(step <= std::numeric_limits::max()); - if (GetSolution( - (*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= - .99) { + if (GetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)]) >= .99) { schedule.setAssignedProcessor(node, processor); schedule.setAssignedSuperstep(node, step); } @@ -187,9 +173,7 @@ class TotalCommunicationScheduler : public Scheduler { }; void feedImprovedSchedule(const BspSchedule &schedule) { - for (unsigned step = 0; step < num_step; step++) { - if (step < schedule.numberOfSupersteps()) { assert(step <= std::numeric_limits::max()); SetSolution((*superstep_used_var_ptr)[static_cast(step)], 1.0); @@ -200,20 +184,15 @@ class TotalCommunicationScheduler : public Scheduler { } for (const auto &node : instance_ptr->vertices()) { - for (unsigned processor = 0; processor < instance_ptr->numberOfProcessors(); processor++) { - - for (unsigned step = 0; - step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); step++) { - + for (unsigned step = 0; step < static_cast((*node_to_processor_superstep_var_ptr)[0][0].Size()); + step++) { if (schedule.assignedProcessor(node) == processor && schedule.assignedSuperstep(node) == step) { assert(step <= std::numeric_limits::max()); - SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)], - 1.0); + SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)], 1.0); } else { assert(step <= std::numeric_limits::max()); - SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)], - 0.0); + SetSolution((*node_to_processor_superstep_var_ptr)[node][processor][static_cast(step)], 0.0); } } } @@ -223,12 +202,11 @@ class TotalCommunicationScheduler : public Scheduler { num_step, std::vector>(instance_ptr->numberOfProcessors(), 0)); for (const auto &node : instance_ptr->vertices()) { - work[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] += - instance_ptr->getComputationalDag().vertex_work_weight(node); + work[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] + += instance_ptr->getComputationalDag().vertex_work_weight(node); } for (unsigned step = 0; step < num_step; step++) { - v_workw_t max_work = 0; for (unsigned proc = 0; proc < instance_ptr->numberOfProcessors(); proc++) { if (max_work < work[step][proc]) { @@ -241,17 +219,12 @@ class TotalCommunicationScheduler : public Scheduler { } if (instance_ptr->getArchitecture().isNumaArchitecture()) { - for (unsigned p1 = 0; p1 < instance_ptr->numberOfProcessors(); p1++) { for (unsigned p2 = 0; p2 < instance_ptr->numberOfProcessors(); p2++) { if (p1 != p2) { - int edge_id = 0; for (const auto &ep : edge_view(instance_ptr->getComputationalDag())) { - - if (schedule.assignedProcessor(ep.source) == p1 && - schedule.assignedProcessor(ep.target) == p2) { - + if (schedule.assignedProcessor(ep.source) == p1 && schedule.assignedProcessor(ep.target) == p2) { SetSolution((*edge_vars_ptr)[p1][p2][edge_id], 1.0); } else { SetSolution((*edge_vars_ptr)[p1][p2][edge_id], 0.0); @@ -264,12 +237,9 @@ class TotalCommunicationScheduler : public Scheduler { } } else { - int edge_id = 0; for (const auto &ep : edge_view(instance_ptr->getComputationalDag())) { - if (schedule.assignedProcessor(ep.source) != schedule.assignedProcessor(ep.target)) { - SetSolution((*edge_vars_ptr)[0][0][edge_id], 1.0); } else { SetSolution((*edge_vars_ptr)[0][0][edge_id], 0.0); @@ -297,15 +267,11 @@ class TotalCommunicationScheduler : public Scheduler { VarArray max_work_superstep_var; void constructBspScheduleFromSolution(BspSchedule &schedule, bool cleanup_ = false) { - const auto &instance = schedule.getInstance(); for (const auto &node : instance.vertices()) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (node_to_processor_superstep_var[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) { schedule.setAssignedProcessor(node, processor); schedule.setAssignedSuperstep(node, step); @@ -320,9 +286,7 @@ class TotalCommunicationScheduler : public Scheduler { } void loadInitialSchedule() { - for (unsigned step = 0; step < max_number_supersteps; step++) { - if (step < initial_schedule->numberOfSupersteps()) { assert(step <= std::numeric_limits::max()); model.SetMipStart(superstep_used_var[static_cast(step)], 1); @@ -334,19 +298,13 @@ class TotalCommunicationScheduler : public Scheduler { } for (const auto &node : initial_schedule->getInstance().vertices()) { - for (unsigned proc = 0; proc < initial_schedule->getInstance().numberOfProcessors(); proc++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - - if (proc == initial_schedule->assignedProcessor(node) && - step == initial_schedule->assignedSuperstep(node)) { - + if (proc == initial_schedule->assignedProcessor(node) && step == initial_schedule->assignedSuperstep(node)) { assert(step <= std::numeric_limits::max()); model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast(step)], 1); } else { - assert(step <= std::numeric_limits::max()); model.SetMipStart(node_to_processor_superstep_var[node][proc][static_cast(step)], 0); } @@ -355,12 +313,11 @@ class TotalCommunicationScheduler : public Scheduler { } std::vector>> work( - max_number_supersteps, - std::vector>(initial_schedule->getInstance().numberOfProcessors(), 0)); + max_number_supersteps, std::vector>(initial_schedule->getInstance().numberOfProcessors(), 0)); for (const auto &node : initial_schedule->getInstance().vertices()) { - work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] += - initial_schedule->getInstance().getComputationalDag().vertex_work_weight(node); + work[initial_schedule->assignedSuperstep(node)][initial_schedule->assignedProcessor(node)] + += initial_schedule->getInstance().getComputationalDag().vertex_work_weight(node); } for (unsigned step = 0; step < max_number_supersteps; step++) { @@ -380,7 +337,6 @@ class TotalCommunicationScheduler : public Scheduler { } void setupVariablesConstraintsObjective(const BspInstance &instance) { - /* Variables */ @@ -393,11 +349,9 @@ class TotalCommunicationScheduler : public Scheduler { assert(max_number_supersteps <= std::numeric_limits::max()); // variables for assigments of nodes to processor and superstep for (const auto &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - - node_to_processor_superstep_var[node][processor] = - model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); + node_to_processor_superstep_var[node][processor] + = model.AddVars(static_cast(max_number_supersteps), COPT_BINARY, "node_to_processor_superstep"); } } @@ -409,14 +363,12 @@ class TotalCommunicationScheduler : public Scheduler { Constraints */ if (use_memory_constraint) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned step = 0; step < max_number_supersteps; step++) { Expr expr; for (unsigned int node = 0; node < instance.numberOfVertices(); node++) { - expr += node_to_processor_superstep_var[node][processor][static_cast(step)] * - instance.getComputationalDag().vertex_mem_weight(node); + expr += node_to_processor_superstep_var[node][processor][static_cast(step)] + * instance.getComputationalDag().vertex_mem_weight(node); } model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor)); } @@ -427,30 +379,25 @@ class TotalCommunicationScheduler : public Scheduler { model.AddConstr(superstep_used_var[0] == 1); for (unsigned int step = 0; step < max_number_supersteps - 1; step++) { - model.AddConstr(superstep_used_var[static_cast(step)] >= - superstep_used_var[static_cast(step + 1)]); + model.AddConstr(superstep_used_var[static_cast(step)] >= superstep_used_var[static_cast(step + 1)]); } // superstep is used at all for (unsigned int step = 0; step < max_number_supersteps; step++) { - Expr expr; for (const auto &node : instance.vertices()) { - for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { expr += node_to_processor_superstep_var[node][processor][static_cast(step)]; } } - model.AddConstr(expr <= static_cast(instance.numberOfVertices() * instance.numberOfProcessors()) * - superstep_used_var.GetVar(static_cast(step))); + model.AddConstr(expr <= static_cast(instance.numberOfVertices() * instance.numberOfProcessors()) + * superstep_used_var.GetVar(static_cast(step))); } // nodes are assigend depending on whether recomputation is allowed or not for (const auto &node : instance.vertices()) { - Expr expr; for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - assert(max_number_supersteps <= std::numeric_limits::max()); for (unsigned int step = 0; step < max_number_supersteps; step++) { expr += node_to_processor_superstep_var[node][processor].GetVar(static_cast(step)); @@ -462,28 +409,21 @@ class TotalCommunicationScheduler : public Scheduler { } for (const auto &node : instance.vertices()) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - assert(max_number_supersteps <= std::numeric_limits::max()); for (unsigned step = 0; step < max_number_supersteps; step++) { - for (const auto &source : instance.getComputationalDag().parents(node)) { - Expr expr1; for (unsigned p2 = 0; p2 < instance.numberOfProcessors(); p2++) { - for (unsigned step_prime = 0; step_prime < step; step_prime++) { - expr1 += node_to_processor_superstep_var[source][p2][static_cast(step_prime)]; } } expr1 += node_to_processor_superstep_var[source][processor][static_cast(step)]; - model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast(step)] <= - expr1); + model.AddConstr(node_to_processor_superstep_var[node][processor][static_cast(step)] <= expr1); } } } @@ -492,21 +432,18 @@ class TotalCommunicationScheduler : public Scheduler { Expr total_edges_cut; if (instance.getArchitecture().isNumaArchitecture()) { - edge_vars = std::vector>(instance.numberOfProcessors(), std::vector(instance.numberOfProcessors())); for (unsigned int p1 = 0; p1 < instance.numberOfProcessors(); p1++) { for (unsigned int p2 = 0; p2 < instance.numberOfProcessors(); p2++) { if (p1 != p2) { - assert(instance.getComputationalDag().num_edges() <= std::numeric_limits::max()); - edge_vars[p1][p2] = model.AddVars(static_cast(instance.getComputationalDag().num_edges()), - COPT_BINARY, "edge"); + edge_vars[p1][p2] + = model.AddVars(static_cast(instance.getComputationalDag().num_edges()), COPT_BINARY, "edge"); int edge_id = 0; for (const auto &ep : edge_view(instance.getComputationalDag())) { - Expr expr1, expr2; assert(max_number_supersteps <= std::numeric_limits::max()); for (unsigned step = 0; step < max_number_supersteps; step++) { @@ -515,9 +452,9 @@ class TotalCommunicationScheduler : public Scheduler { } model.AddConstr(edge_vars[p1][p2][edge_id] >= expr1 + expr2 - 1.001); - total_edges_cut += edge_vars[p1][p2][edge_id] * - instance.getComputationalDag().vertex_comm_weight(ep.source) * - instance.sendCosts(p1, p2); + total_edges_cut += edge_vars[p1][p2][edge_id] + * instance.getComputationalDag().vertex_comm_weight(ep.source) + * instance.sendCosts(p1, p2); edge_id++; } @@ -526,15 +463,12 @@ class TotalCommunicationScheduler : public Scheduler { } } else { - edge_vars = std::vector>(1, std::vector(1)); assert(instance.getComputationalDag().num_edges() <= std::numeric_limits::max()); - edge_vars[0][0] = - model.AddVars(static_cast(instance.getComputationalDag().num_edges()), COPT_BINARY, "edge"); + edge_vars[0][0] = model.AddVars(static_cast(instance.getComputationalDag().num_edges()), COPT_BINARY, "edge"); int edge_id = 0; for (const auto &ep : edge_view(instance.getComputationalDag())) { - for (unsigned p1 = 0; p1 < instance.numberOfProcessors(); p1++) { Expr expr1, expr2; for (unsigned step = 0; step < max_number_supersteps; step++) { @@ -543,9 +477,7 @@ class TotalCommunicationScheduler : public Scheduler { for (unsigned p2 = 0; p2 < instance.numberOfProcessors(); p2++) { if (p1 != p2) { - for (unsigned step = 0; step < max_number_supersteps; step++) { - expr2 += node_to_processor_superstep_var[ep.target][p2][static_cast(step)]; } } @@ -553,8 +485,7 @@ class TotalCommunicationScheduler : public Scheduler { model.AddConstr(edge_vars[0][0][edge_id] >= expr1 + expr2 - 1.001); } - total_edges_cut += - instance.getComputationalDag().vertex_comm_weight(ep.source) * edge_vars[0][0][edge_id]; + total_edges_cut += instance.getComputationalDag().vertex_comm_weight(ep.source) * edge_vars[0][0][edge_id]; edge_id++; } @@ -563,7 +494,6 @@ class TotalCommunicationScheduler : public Scheduler { Expr expr; if (ignore_workload_balance) { - for (unsigned step = 0; step < max_number_supersteps; step++) { assert(step <= std::numeric_limits::max()); expr += instance.synchronisationCosts() * superstep_used_var[static_cast(step)]; @@ -571,18 +501,16 @@ class TotalCommunicationScheduler : public Scheduler { } else { assert(max_number_supersteps <= std::numeric_limits::max()); - max_work_superstep_var = - model.AddVars(static_cast(max_number_supersteps), COPT_CONTINUOUS, "max_work_superstep"); + max_work_superstep_var = model.AddVars(static_cast(max_number_supersteps), COPT_CONTINUOUS, "max_work_superstep"); // coptModel.AddVars(max_number_supersteps, 0, COPT_INFINITY, 0, COPT_INTEGER, "max_work_superstep"); for (unsigned int step = 0; step < max_number_supersteps; step++) { assert(step <= std::numeric_limits::max()); for (unsigned int processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr_work; for (const auto &node : instance.vertices()) { - expr_work += instance.getComputationalDag().vertex_work_weight(node) * - node_to_processor_superstep_var[node][processor][static_cast(step)]; + expr_work += instance.getComputationalDag().vertex_work_weight(node) + * node_to_processor_superstep_var[node][processor][static_cast(step)]; } model.AddConstr(max_work_superstep_var[static_cast(step)] >= expr_work); @@ -591,8 +519,8 @@ class TotalCommunicationScheduler : public Scheduler { for (unsigned step = 0; step < max_number_supersteps; step++) { assert(step <= std::numeric_limits::max()); - expr += max_work_superstep_var[static_cast(step)] + - instance.synchronisationCosts() * superstep_used_var[static_cast(step)]; + expr += max_work_superstep_var[static_cast(step)] + + instance.synchronisationCosts() * superstep_used_var[static_cast(step)]; } } @@ -606,11 +534,18 @@ class TotalCommunicationScheduler : public Scheduler { public: TotalCommunicationScheduler(unsigned steps = 5) - : Scheduler(), env(), model(env.CreateModel("TotalCommScheduler")), use_memory_constraint(false), - ignore_workload_balance(false), use_initial_schedule(false), initial_schedule(0), - write_solutions_found(false), use_lk_heuristic_callback(true), solution_callback(), heuristic_callback(), + : Scheduler(), + env(), + model(env.CreateModel("TotalCommScheduler")), + use_memory_constraint(false), + ignore_workload_balance(false), + use_initial_schedule(false), + initial_schedule(0), + write_solutions_found(false), + use_lk_heuristic_callback(true), + solution_callback(), + heuristic_callback(), max_number_supersteps(steps) { - heuristic_callback.max_work_superstep_var_ptr = &max_work_superstep_var; heuristic_callback.superstep_used_var_ptr = &superstep_used_var; heuristic_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; @@ -620,11 +555,18 @@ class TotalCommunicationScheduler : public Scheduler { } TotalCommunicationScheduler(const BspSchedule &schedule) - : Scheduler(), env(), model(env.CreateModel("TotalCommScheduler")), use_memory_constraint(false), - ignore_workload_balance(false), use_initial_schedule(true), initial_schedule(&schedule), - write_solutions_found(false), use_lk_heuristic_callback(true), solution_callback(), heuristic_callback(), + : Scheduler(), + env(), + model(env.CreateModel("TotalCommScheduler")), + use_memory_constraint(false), + ignore_workload_balance(false), + use_initial_schedule(true), + initial_schedule(&schedule), + write_solutions_found(false), + use_lk_heuristic_callback(true), + solution_callback(), + heuristic_callback(), max_number_supersteps(schedule.numberOfSupersteps()) { - heuristic_callback.max_work_superstep_var_ptr = &max_work_superstep_var; heuristic_callback.superstep_used_var_ptr = &superstep_used_var; heuristic_callback.node_to_processor_superstep_var_ptr = &node_to_processor_superstep_var; @@ -651,15 +593,14 @@ class TotalCommunicationScheduler : public Scheduler { * agree with those of the initial schedule's instance */ virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - auto &instance = schedule.getInstance(); assert(!ignore_workload_balance || !use_lk_heuristic_callback); - if (use_initial_schedule && - (max_number_supersteps < initial_schedule->numberOfSupersteps() || - instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() || - instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) { + if (use_initial_schedule + && (max_number_supersteps < initial_schedule->numberOfSupersteps() + || instance.numberOfProcessors() != initial_schedule->getInstance().numberOfProcessors() + || instance.numberOfVertices() != initial_schedule->getInstance().numberOfVertices())) { throw std::invalid_argument("Invalid Argument while computeSchedule(instance): instance parameters do not " "agree with those of the initial schedule's instance!"); } @@ -681,12 +622,10 @@ class TotalCommunicationScheduler : public Scheduler { // model.SetIntParam(COPT_INTPARAM_DIVINGHEURLEVEL, 2); if (write_solutions_found) { - solution_callback.instance_ptr = &instance; model.SetCallback(&solution_callback, COPT_CBCONTEXT_MIPSOL); } if (use_lk_heuristic_callback) { - heuristic_callback.instance_ptr = &instance; heuristic_callback.num_step = max_number_supersteps; model.SetCallback(&heuristic_callback, COPT_CBCONTEXT_MIPSOL); @@ -695,18 +634,14 @@ class TotalCommunicationScheduler : public Scheduler { model.Solve(); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - - return RETURN_STATUS::OSP_SUCCESS; //, constructBspScheduleFromSolution(instance, true)}; + return RETURN_STATUS::OSP_SUCCESS; //, constructBspScheduleFromSolution(instance, true)}; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - - return RETURN_STATUS::BEST_FOUND; //, constructBspScheduleFromSolution(instance, true)}; + return RETURN_STATUS::BEST_FOUND; //, constructBspScheduleFromSolution(instance, true)}; } else { return RETURN_STATUS::TIMEOUT; @@ -724,7 +659,6 @@ class TotalCommunicationScheduler : public Scheduler { * @param schedule The provided schedule. */ inline void setInitialSolutionFromBspSchedule(const BspSchedule &schedule) { - initial_schedule = &schedule; max_number_supersteps = schedule.numberOfSupersteps(); @@ -749,7 +683,6 @@ class TotalCommunicationScheduler : public Scheduler { */ void setMaxNumberOfSupersteps(unsigned max) { if (use_initial_schedule && max < initial_schedule->numberOfSupersteps()) { - throw std::invalid_argument("Invalid Argument while setting " "max number of supersteps to a value " "which is less than the number of " @@ -862,4 +795,4 @@ class TotalCommunicationScheduler : public Scheduler { virtual std::string getScheduleName() const override { return "TotalCommIlp"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/ImprovementScheduler.hpp b/include/osp/bsp/scheduler/ImprovementScheduler.hpp index f45ab33c..05bfcfb4 100644 --- a/include/osp/bsp/scheduler/ImprovementScheduler.hpp +++ b/include/osp/bsp/scheduler/ImprovementScheduler.hpp @@ -28,9 +28,8 @@ namespace osp { * The ImprovementScheduler class provides a common interface for improvement scheduling scheduler. * Subclasses of this class can implement specific improvement scheduler by overriding the virtual methods. */ -template +template class ImprovementScheduler { - protected: unsigned timeLimitSeconds; /**< The time limit in seconds for the improvement algorithm. */ @@ -91,9 +90,8 @@ class ImprovementScheduler { virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule &schedule) = 0; }; -template +template class ComboScheduler : public Scheduler { - private: Scheduler &base_scheduler; ImprovementScheduler &improvement_scheduler; @@ -109,7 +107,6 @@ class ComboScheduler : public Scheduler { } virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - RETURN_STATUS status = base_scheduler.computeSchedule(schedule); if (status != RETURN_STATUS::OSP_SUCCESS and status != RETURN_STATUS::BEST_FOUND) { return status; @@ -119,4 +116,4 @@ class ComboScheduler : public Scheduler { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp index 1ad7304d..32818027 100644 --- a/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp +++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp @@ -23,11 +23,11 @@ limitations under the License. namespace osp { -template -std::vector>> -heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median, - const double min_percent_components_retained, const double bound_component_weight_percent) { - +template +std::vector>> heavy_edge_preprocess(const Graph_t &graph, + const double heavy_is_x_times_median, + const double min_percent_components_retained, + const double bound_component_weight_percent) { static_assert(is_computational_dag_edge_desc_v, "HeavyEdgePreProcess can only be used with computational DAGs with edge weights."); @@ -44,7 +44,6 @@ heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median std::vector> edge_communications; edge_communications.reserve(graph.num_edges()); for (const auto &edge : edges(graph)) { - if constexpr (has_edge_weights_v) { edge_communications.emplace_back(graph.edge_comm_weight(edge)); } else { @@ -55,21 +54,17 @@ heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median // Computing the median and setting it to at least one e_commw_t median_edge_weight = 1; if (not edge_communications.empty()) { - auto median_it = edge_communications.begin(); std::advance(median_it, edge_communications.size() / 2); std::nth_element(edge_communications.begin(), median_it, edge_communications.end()); - median_edge_weight = - std::max(edge_communications[edge_communications.size() / 2], static_cast>(1)); + median_edge_weight = std::max(edge_communications[edge_communications.size() / 2], static_cast>(1)); } // Making edge list - e_commw_t minimal_edge_weight = - static_cast>(heavy_is_x_times_median * median_edge_weight); + e_commw_t minimal_edge_weight = static_cast>(heavy_is_x_times_median * median_edge_weight); std::vector edge_list; edge_list.reserve(graph.num_edges()); for (const auto &edge : edges(graph)) { - if constexpr (has_edge_weights_v) { if (graph.edge_comm_weight(edge) > minimal_edge_weight) { edge_list.emplace_back(edge); @@ -102,14 +97,16 @@ heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median // Joining heavy edges for (const EdgeType &edge : edge_list) { - if (static_cast(uf_structure.get_number_of_connected_components()) - 1.0 < - min_percent_components_retained * static_cast(graph.num_vertices())) + if (static_cast(uf_structure.get_number_of_connected_components()) - 1.0 + < min_percent_components_retained * static_cast(graph.num_vertices())) { break; + } v_workw_t weight_comp_a = uf_structure.get_weight_of_component_by_name(source(edge, graph)); v_workw_t weight_comp_b = uf_structure.get_weight_of_component_by_name(target(edge, graph)); - if (weight_comp_a + weight_comp_b > max_component_size) + if (weight_comp_a + weight_comp_b > max_component_size) { continue; + } uf_structure.join_by_name(source(edge, graph), target(edge, graph)); } @@ -117,4 +114,4 @@ heavy_edge_preprocess(const Graph_t &graph, const double heavy_is_x_times_median return uf_structure.get_connected_components(); } -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp index 49c9030e..00ffb584 100644 --- a/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp +++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp @@ -18,14 +18,13 @@ limitations under the License. #pragma once -#include "VariancePartitioner.hpp" #include "HeavyEdgePreProcess.hpp" +#include "VariancePartitioner.hpp" namespace osp { -template +template class LightEdgeVariancePartitioner : public VariancePartitioner { - private: using VertexType = vertex_idx_t; @@ -45,14 +44,19 @@ class LightEdgeVariancePartitioner : public VariancePartitioner( - max_percent_idle_processors_, variance_power_, increase_parallelism_in_new_superstep_, - max_priority_difference_percent_, slack_), + float max_priority_difference_percent_ = 0.34f, + float slack_ = 0.0f) + : VariancePartitioner(max_percent_idle_processors_, + variance_power_, + increase_parallelism_in_new_superstep_, + max_priority_difference_percent_, + slack_), heavy_is_x_times_median(heavy_is_x_times_median_), min_percent_components_retained(min_percent_components_retained_), bound_component_weight_percent(bound_component_weight_percent_) {}; @@ -93,7 +97,6 @@ class LightEdgeVariancePartitioner : public VariancePartitioner total_work = 0; for (const auto &v : graph.vertices()) { - schedule.setAssignedProcessor(v, n_processors); total_work += graph.vertex_work_weight(v); @@ -110,9 +113,8 @@ class LightEdgeVariancePartitioner : public VariancePartitioner> total_partition_work(n_processors, 0); std::vector> superstep_partition_work(n_processors, 0); - std::vector> preprocessed_partition = - heavy_edge_preprocess(graph, heavy_is_x_times_median, min_percent_components_retained, - bound_component_weight_percent / n_processors); + std::vector> preprocessed_partition = heavy_edge_preprocess( + graph, heavy_is_x_times_median, min_percent_components_retained, bound_component_weight_percent / n_processors); std::vector which_preprocess_partition(graph.num_vertices()); for (size_t i = 0; i < preprocessed_partition.size(); i++) { @@ -131,8 +133,8 @@ class LightEdgeVariancePartitioner : public VariancePartitioner> transient_cost_of_preprocessed_partition(preprocessed_partition.size(), 0); for (size_t i = 0; i < preprocessed_partition.size(); i++) { for (const auto &vert : preprocessed_partition[i]) { - transient_cost_of_preprocessed_partition[i] = - std::max(transient_cost_of_preprocessed_partition[i], graph.vertex_comm_weight(vert)); + transient_cost_of_preprocessed_partition[i] + = std::max(transient_cost_of_preprocessed_partition[i], graph.vertex_comm_weight(vert)); } } @@ -147,9 +149,7 @@ class LightEdgeVariancePartitioner : public VariancePartitioner= 2) { return RETURN_STATUS::ERROR; } @@ -161,13 +161,12 @@ class LightEdgeVariancePartitioner : public VariancePartitioner(free_processors.size()) > base::max_percent_idle_processors * n_processors && - ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors || - static_cast(ready.size()) >= - 1.2 * (n_processors - static_cast(free_processors.size())) || - static_cast(ready.size()) >= - n_processors - static_cast(free_processors.size()) + (0.5 * static_cast(free_processors.size())))) { + if (num_unable_to_partition_node_loop == 0 + && static_cast(free_processors.size()) > base::max_percent_idle_processors * n_processors + && ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors + || static_cast(ready.size()) >= 1.2 * (n_processors - static_cast(free_processors.size())) + || static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) + + (0.5 * static_cast(free_processors.size())))) { endsuperstep = true; // std::cout << "\nCall for new superstep - parallelism.\n"; } @@ -181,9 +180,9 @@ class LightEdgeVariancePartitioner : public VariancePartitioner - base::max_priority_difference_percent * static_cast(total_work) / static_cast(n_processors)) { + if (num_unable_to_partition_node_loop == 0 + && (max_priority - min_priority) > base::max_priority_difference_percent * static_cast(total_work) + / static_cast(n_processors)) { endsuperstep = true; // std::cout << "\nCall for new superstep - difference.\n"; } @@ -219,18 +218,18 @@ class LightEdgeVariancePartitioner : public VariancePartitioner(free_processors.size()) > base::max_percent_idle_processors * n_processors && - ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors || - static_cast(ready.size()) >= - 1.2 * (n_processors - static_cast(free_processors.size())) || - static_cast(ready.size()) >= - n_processors - static_cast(free_processors.size()) + (0.5 * static_cast(free_processors.size())))) { + if (num_unable_to_partition_node_loop == 0 + && static_cast(free_processors.size()) > base::max_percent_idle_processors * n_processors + && ((!base::increase_parallelism_in_new_superstep) || ready.size() >= n_processors + || static_cast(ready.size()) >= 1.2 * (n_processors - static_cast(free_processors.size())) + || static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) + + (0.5 * static_cast(free_processors.size())))) { endsuperstep = true; // std::cout << "\nCall for new superstep - parallelism.\n"; break; @@ -240,17 +239,19 @@ class LightEdgeVariancePartitioner : public VariancePartitionerfirst; if constexpr (base::use_memory_constraint) { - if (has_vertex_been_assigned[vert] || base::memory_constraint.can_add( - proc, memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], - transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { - + if (has_vertex_been_assigned[vert] + || base::memory_constraint.can_add( + proc, + memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], + transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { next_node = vert; assigned_a_node = true; } @@ -261,16 +262,19 @@ class LightEdgeVariancePartitioner : public VariancePartitionerfirst; if constexpr (base::use_memory_constraint) { - if (has_vertex_been_assigned[vert] || base::memory_constraint.can_add( - proc, memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], - transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { - + if (has_vertex_been_assigned[vert] + || base::memory_constraint.can_add( + proc, + memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], + transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { next_node = vert; assigned_a_node = true; } @@ -281,15 +285,17 @@ class LightEdgeVariancePartitioner : public VariancePartitionerfirst; if constexpr (base::use_memory_constraint) { - if (has_vertex_been_assigned[vert] || base::memory_constraint.can_add( - proc, memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], - transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { - + if (has_vertex_been_assigned[vert] + || base::memory_constraint.can_add( + proc, + memory_cost_of_preprocessed_partition[which_preprocess_partition[vert]], + transient_cost_of_preprocessed_partition[which_preprocess_partition[vert]])) { next_node = vert; assigned_a_node = true; } @@ -304,7 +310,6 @@ class LightEdgeVariancePartitioner : public VariancePartitioner #include +#include "osp/bsp/scheduler/Scheduler.hpp" + namespace osp { struct linear_interpolation { @@ -42,9 +43,8 @@ struct global_only_interpolation { float operator()(float, const float) { return 1.0f; }; }; -template +template class LoadBalancerBase : public Scheduler { - static_assert(std::is_invocable_r::value, "Interpolation_t must be invocable with two float arguments and return a float."); @@ -56,11 +56,11 @@ class LoadBalancerBase : public Scheduler { /// @param instance bsp instance /// @param slack how much to ignore global balance /// @return vector with the interpolated priorities - std::vector - computeProcessorPrioritiesInterpolation(const std::vector> &superstep_partition_work, - const std::vector> &total_partition_work, - const v_workw_t &total_work, const BspInstance &instance, - const float slack = 0.0) { + std::vector computeProcessorPrioritiesInterpolation(const std::vector> &superstep_partition_work, + const std::vector> &total_partition_work, + const v_workw_t &total_work, + const BspInstance &instance, + const float slack = 0.0) { v_workw_t work_till_now = 0; for (const auto &part_work : total_partition_work) { work_till_now += part_work; @@ -72,8 +72,10 @@ class LoadBalancerBase : public Scheduler { std::vector proc_prio(instance.numberOfProcessors()); for (size_t i = 0; i < proc_prio.size(); i++) { - assert(static_cast(total_partition_work[i]) < std::numeric_limits::max() && static_cast(superstep_partition_work[i]) < std::numeric_limits::max()); - proc_prio[i] = ((1 - value) * static_cast(superstep_partition_work[i])) + (value * static_cast(total_partition_work[i])); + assert(static_cast(total_partition_work[i]) < std::numeric_limits::max() + && static_cast(superstep_partition_work[i]) < std::numeric_limits::max()); + proc_prio[i] = ((1 - value) * static_cast(superstep_partition_work[i])) + + (value * static_cast(total_partition_work[i])); } return proc_prio; @@ -89,9 +91,10 @@ class LoadBalancerBase : public Scheduler { std::vector computeProcessorPriority(const std::vector> &superstep_partition_work, const std::vector> &total_partition_work, const v_workw_t &total_work, - const BspInstance &instance, const float slack = 0.0) { - return sorting_arrangement(computeProcessorPrioritiesInterpolation( - superstep_partition_work, total_partition_work, total_work, instance, slack)); + const BspInstance &instance, + const float slack = 0.0) { + return sorting_arrangement( + computeProcessorPrioritiesInterpolation(superstep_partition_work, total_partition_work, total_work, instance, slack)); } public: @@ -99,4 +102,4 @@ class LoadBalancerBase : public Scheduler { virtual ~LoadBalancerBase() = default; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp b/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp index 089cf4d4..6b5f904c 100644 --- a/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp +++ b/include/osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp @@ -24,12 +24,12 @@ limitations under the License. namespace osp { -template +template class VariancePartitioner : public LoadBalancerBase { - static_assert(is_computational_dag_v, "VariancePartitioner can only be used with computational DAGs."); using VertexType = vertex_idx_t; + struct VarianceCompare { bool operator()(const std::pair &lhs, const std::pair &rhs) const { return ((lhs.second > rhs.second) || ((lhs.second >= rhs.second) && (lhs.first < rhs.first))); @@ -37,8 +37,8 @@ class VariancePartitioner : public LoadBalancerBase { }; protected: - constexpr static bool use_memory_constraint = - is_memory_constraint_v or is_memory_constraint_schedule_v; + constexpr static bool use_memory_constraint = is_memory_constraint_v + or is_memory_constraint_schedule_v; static_assert(not use_memory_constraint or std::is_same_v, "Graph_t must be the same as MemoryConstraint_t::Graph_impl_t."); @@ -65,7 +65,6 @@ class VariancePartitioner : public LoadBalancerBase { /// @param power the power in the power mean average /// @return vector of the logarithm of power mean averaged bottom node distance std::vector compute_work_variance(const Graph_t &graph, double power = 2) const { - std::vector work_variance(graph.num_vertices(), 0.0); const auto top_order = GetTopOrder(graph); @@ -84,25 +83,27 @@ class VariancePartitioner : public LoadBalancerBase { double node_weight = std::log(graph.vertex_work_weight(*r_iter)); double larger_val = node_weight > temp ? node_weight : temp; - work_variance[*r_iter] = - std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; + work_variance[*r_iter] = std::log(std::exp(node_weight - larger_val) + std::exp(temp - larger_val)) + larger_val; } return work_variance; } public: - VariancePartitioner(double max_percent_idle_processors_ = 0.2, double variance_power_ = 2.0, + VariancePartitioner(double max_percent_idle_processors_ = 0.2, + double variance_power_ = 2.0, bool increase_parallelism_in_new_superstep_ = true, - float max_priority_difference_percent_ = 0.34f, float slack_ = 0.0f) - : max_percent_idle_processors(max_percent_idle_processors_), variance_power(variance_power_), + float max_priority_difference_percent_ = 0.34f, + float slack_ = 0.0f) + : max_percent_idle_processors(max_percent_idle_processors_), + variance_power(variance_power_), increase_parallelism_in_new_superstep(increase_parallelism_in_new_superstep_), - max_priority_difference_percent(max_priority_difference_percent_), slack(slack_) {}; + max_priority_difference_percent(max_priority_difference_percent_), + slack(slack_) {}; virtual ~VariancePartitioner() = default; virtual RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); const auto &n_vert = instance.numberOfVertices(); const unsigned &n_processors = instance.numberOfProcessors(); @@ -132,7 +133,6 @@ class VariancePartitioner : public LoadBalancerBase { std::vector which_proc_ready_prior(n_vert, n_processors); for (const auto &v : graph.vertices()) { - schedule.setAssignedProcessor(v, n_processors); total_work += graph.vertex_work_weight(v); @@ -158,9 +158,7 @@ class VariancePartitioner : public LoadBalancerBase { endsuperstep = true; // std::cout << "\nCall for new superstep - unable to schedule.\n"; } else { - if constexpr (use_memory_constraint) { - if (num_unable_to_partition_node_loop >= 2) { return RETURN_STATUS::ERROR; } @@ -172,18 +170,17 @@ class VariancePartitioner : public LoadBalancerBase { // * n_processors << " ready size " << ready.size() << " small increase " << 1.2 * (n_processors - // free_processors.size()) << " large increase " << n_processors - free_processors.size() + (0.5 * // free_processors.size()) << "\n"; - if (num_unable_to_partition_node_loop == 0 && - static_cast(free_processors.size()) > max_percent_idle_processors * n_processors && - ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors || - static_cast(ready.size()) >= - 1.2 * (n_processors - static_cast(free_processors.size())) || - static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) + - (0.5 * static_cast(free_processors.size())))) { + if (num_unable_to_partition_node_loop == 0 + && static_cast(free_processors.size()) > max_percent_idle_processors * n_processors + && ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors + || static_cast(ready.size()) >= 1.2 * (n_processors - static_cast(free_processors.size())) + || static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) + + (0.5 * static_cast(free_processors.size())))) { endsuperstep = true; // std::cout << "\nCall for new superstep - parallelism.\n"; } - std::vector processor_priorities = - LoadBalancerBase::computeProcessorPrioritiesInterpolation( + std::vector processor_priorities + = LoadBalancerBase::computeProcessorPrioritiesInterpolation( superstep_partition_work, total_partition_work, total_work, instance); float min_priority = processor_priorities[0]; float max_priority = processor_priorities[0]; @@ -191,9 +188,9 @@ class VariancePartitioner : public LoadBalancerBase { min_priority = std::min(min_priority, prio); max_priority = std::max(max_priority, prio); } - if (num_unable_to_partition_node_loop == 0 && - (max_priority - min_priority) > - max_priority_difference_percent * static_cast(total_work) / static_cast(n_processors)) { + if (num_unable_to_partition_node_loop == 0 + && (max_priority - min_priority) + > max_priority_difference_percent * static_cast(total_work) / static_cast(n_processors)) { endsuperstep = true; // std::cout << "\nCall for new superstep - difference.\n"; } @@ -225,22 +222,21 @@ class VariancePartitioner : public LoadBalancerBase { bool assigned_a_node = false; // Choosing next processor - std::vector processors_in_order = - LoadBalancerBase::computeProcessorPriority( - superstep_partition_work, total_partition_work, total_work, instance, slack); + std::vector processors_in_order = LoadBalancerBase::computeProcessorPriority( + superstep_partition_work, total_partition_work, total_work, instance, slack); for (unsigned &proc : processors_in_order) { - if ((free_processors.find(proc)) != free_processors.cend()) + if ((free_processors.find(proc)) != free_processors.cend()) { continue; + } // Check for too many free processors - needed here because free processors may not have been detected // yet - if (num_unable_to_partition_node_loop == 0 && - static_cast(free_processors.size()) > max_percent_idle_processors * n_processors && - ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors || - static_cast(ready.size()) >= - 1.2 * (n_processors - static_cast(free_processors.size())) || - static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) + - (0.5 * static_cast(free_processors.size())))) { + if (num_unable_to_partition_node_loop == 0 + && static_cast(free_processors.size()) > max_percent_idle_processors * n_processors + && ((!increase_parallelism_in_new_superstep) || ready.size() >= n_processors + || static_cast(ready.size()) >= 1.2 * (n_processors - static_cast(free_processors.size())) + || static_cast(ready.size()) >= n_processors - static_cast(free_processors.size()) + + (0.5 * static_cast(free_processors.size())))) { endsuperstep = true; // std::cout << "\nCall for new superstep - parallelism.\n"; break; @@ -250,10 +246,11 @@ class VariancePartitioner : public LoadBalancerBase { // Choosing next node VertexType next_node; - for (auto vertex_prior_pair_iter = procReady[proc].begin(); - vertex_prior_pair_iter != procReady[proc].cend(); vertex_prior_pair_iter++) { - if (assigned_a_node) + for (auto vertex_prior_pair_iter = procReady[proc].begin(); vertex_prior_pair_iter != procReady[proc].cend(); + vertex_prior_pair_iter++) { + if (assigned_a_node) { break; + } if constexpr (use_memory_constraint) { if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) { @@ -267,9 +264,11 @@ class VariancePartitioner : public LoadBalancerBase { } for (auto vertex_prior_pair_iter = procReadyPrior[proc].begin(); - vertex_prior_pair_iter != procReadyPrior[proc].cend(); vertex_prior_pair_iter++) { - if (assigned_a_node) + vertex_prior_pair_iter != procReadyPrior[proc].cend(); + vertex_prior_pair_iter++) { + if (assigned_a_node) { break; + } if constexpr (use_memory_constraint) { if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) { @@ -284,9 +283,9 @@ class VariancePartitioner : public LoadBalancerBase { for (auto vertex_prior_pair_iter = allReady.begin(); vertex_prior_pair_iter != allReady.cend(); vertex_prior_pair_iter++) { - - if (assigned_a_node) + if (assigned_a_node) { break; + } if constexpr (use_memory_constraint) { if (memory_constraint.can_add(vertex_prior_pair_iter->first, proc)) { @@ -334,8 +333,8 @@ class VariancePartitioner : public LoadBalancerBase { ready.insert(std::make_pair(chld, variance_priorities[chld])); bool is_proc_ready = true; for (const auto &parent : graph.parents(chld)) { - if ((schedule.assignedProcessor(parent) != proc) && - (schedule.assignedSuperstep(parent) == superstep)) { + if ((schedule.assignedProcessor(parent) != proc) + && (schedule.assignedSuperstep(parent) == superstep)) { is_proc_ready = false; break; } @@ -362,4 +361,4 @@ class VariancePartitioner : public LoadBalancerBase { std::string getScheduleName() const override { return "VariancePartitioner"; }; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp index 48a983a6..5d2374de 100644 --- a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp @@ -18,27 +18,28 @@ limitations under the License. #pragma once +#include + #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/scheduler/ImprovementScheduler.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" -#include +namespace osp { -namespace osp{ - -template +template class HillClimbingScheduler : public ImprovementScheduler { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); static_assert(is_computational_dag_v, "Graph_t must satisfy the computational_dag concept"); using vertex_idx = vertex_idx_t; using cost_type = v_workw_t; - static_assert(std::is_same_v, v_commw_t>, "HillClimbing requires work and comm. weights to have the same type."); + static_assert(std::is_same_v, v_commw_t>, + "HillClimbing requires work and comm. weights to have the same type."); public: enum Direction { EARLIER = 0, AT, LATER }; + static const int NumDirections = 3; // aux structure for efficiently storing the changes incurred by a potential HC step @@ -49,9 +50,8 @@ class HillClimbingScheduler : public ImprovementScheduler { }; private: - BspSchedule *schedule; - cost_type cost=0; + cost_type cost = 0; // Main parameters for runnign algorithm bool shrink = true; @@ -60,18 +60,18 @@ class HillClimbingScheduler : public ImprovementScheduler { // aux data structures std::vector>> supsteplists; std::vector>> canMove; - std::vector > > moveOptions; - std::vector >::iterator>>> movePointer; + std::vector>> moveOptions; + std::vector>::iterator>>> movePointer; std::vector>> succSteps; - std::vector > workCost, sent, received, commCost; - std::vector > > workCostList, commCostList; - std::vector >::iterator> > workCostPointer, commCostPointer; + std::vector> workCost, sent, received, commCost; + std::vector>> workCostList, commCostList; + std::vector>::iterator>> workCostPointer, commCostPointer; std::vector::iterator> supStepListPointer; - std::pair >::iterator> nextMove; + std::pair>::iterator> nextMove; bool HCwithLatency = true; // for improved candidate selection - std::deque > promisingMoves; + std::deque> promisingMoves; bool findPromisingMoves = true; // Initialize data structures (based on current schedule) @@ -117,28 +117,27 @@ class HillClimbingScheduler : public ImprovementScheduler { virtual RETURN_STATUS improveSchedule(BspSchedule &input_schedule) override; - //call with time/step limits + // call with time/step limits virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule &input_schedule) override; virtual RETURN_STATUS improveScheduleWithStepLimit(BspSchedule &input_schedule, const unsigned stepLimit = 10); - //setting parameters - void setSteepestAscend(bool steepestAscent_) {steepestAscent = steepestAscent_;} - void setShrink(bool shrink_) {shrink = shrink_;} + // setting parameters + void setSteepestAscend(bool steepestAscent_) { steepestAscent = steepestAscent_; } + + void setShrink(bool shrink_) { shrink = shrink_; } virtual std::string getScheduleName() const override { return "HillClimbing"; } }; -template +template RETURN_STATUS HillClimbingScheduler::improveSchedule(BspSchedule &input_schedule) { - ImprovementScheduler::setTimeLimitSeconds(600U); return improveScheduleWithTimeLimit(input_schedule); } // Main method for hill climbing (with time limit) -template +template RETURN_STATUS HillClimbingScheduler::improveScheduleWithTimeLimit(BspSchedule &input_schedule) { - schedule = &input_schedule; CreateSupstepLists(); @@ -146,7 +145,7 @@ RETURN_STATUS HillClimbingScheduler::improveScheduleWithTimeLimit(BspSc const std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now(); int counter = 0; - while (Improve()) + while (Improve()) { if ((++counter) == 10) { counter = 0; std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); @@ -156,29 +155,31 @@ RETURN_STATUS HillClimbingScheduler::improveScheduleWithTimeLimit(BspSc break; } } + } return RETURN_STATUS::OSP_SUCCESS; } -template +template // Hill climbing with step limit (designed as an ingredient for multilevel algorithms, no safety checks) -RETURN_STATUS HillClimbingScheduler::improveScheduleWithStepLimit(BspSchedule &input_schedule, const unsigned stepLimit) { - +RETURN_STATUS HillClimbingScheduler::improveScheduleWithStepLimit(BspSchedule &input_schedule, + const unsigned stepLimit) { schedule = &input_schedule; CreateSupstepLists(); Init(); - for (unsigned step = 0; step < stepLimit; ++step) - if (!Improve()) + for (unsigned step = 0; step < stepLimit; ++step) { + if (!Improve()) { break; + } + } return RETURN_STATUS::OSP_SUCCESS; } -template +template void HillClimbingScheduler::Init() { - if(shrink) - { + if (shrink) { schedule->shrinkByMergingSupersteps(); CreateSupstepLists(); } @@ -194,20 +195,23 @@ void HillClimbingScheduler::Init() { moveOptions.clear(); moveOptions.resize(NumDirections); movePointer.clear(); - movePointer.resize(NumDirections, std::vector >::iterator>>( - N, std::vector >::iterator>(P))); + movePointer.resize(NumDirections, + std::vector>::iterator>>( + N, std::vector>::iterator>(P))); // Value use lists succSteps.clear(); succSteps.resize(N, std::vector>(P)); - for (vertex_idx node = 0; node < N; ++node) + for (vertex_idx node = 0; node < N; ++node) { for (const vertex_idx &succ : G.children(node)) { - if (succSteps[node][schedule->assignedProcessor(succ)].find(schedule->assignedSuperstep(succ)) == - succSteps[node][schedule->assignedProcessor(succ)].end()) + if (succSteps[node][schedule->assignedProcessor(succ)].find(schedule->assignedSuperstep(succ)) + == succSteps[node][schedule->assignedProcessor(succ)].end()) { succSteps[node][schedule->assignedProcessor(succ)].insert({schedule->assignedSuperstep(succ), 1U}); - else + } else { succSteps[node][schedule->assignedProcessor(succ)].at(schedule->assignedSuperstep(succ)) += 1; + } } + } // Cost data workCost.clear(); @@ -224,21 +228,25 @@ void HillClimbingScheduler::Init() { commCostList.clear(); commCostList.resize(M - 1); workCostPointer.clear(); - workCostPointer.resize(M, std::vector >::iterator>(P)); + workCostPointer.resize(M, std::vector>::iterator>(P)); commCostPointer.clear(); - commCostPointer.resize(M - 1, std::vector >::iterator>(P)); + commCostPointer.resize(M - 1, std::vector>::iterator>(P)); // Supstep std::list pointers supStepListPointer.clear(); supStepListPointer.resize(N); - for (unsigned step = 0; step < M; ++step) - for (unsigned proc = 0; proc < P; ++proc) - for (auto it = supsteplists[step][proc].begin(); it != supsteplists[step][proc].end(); ++it) + for (unsigned step = 0; step < M; ++step) { + for (unsigned proc = 0; proc < P; ++proc) { + for (auto it = supsteplists[step][proc].begin(); it != supsteplists[step][proc].end(); ++it) { supStepListPointer[*it] = it; + } + } + } // Compute movement options - for (vertex_idx node = 0; node < N; ++node) + for (vertex_idx node = 0; node < N; ++node) { updateNodeMoves(node); + } nextMove.first = 0; nextMove.second = moveOptions[0].begin(); @@ -247,8 +255,9 @@ void HillClimbingScheduler::Init() { std::vector work_cost(M, 0); for (unsigned step = 0; step < M; ++step) { for (unsigned proc = 0; proc < P; ++proc) { - for (const vertex_idx node : supsteplists[step][proc]) + for (const vertex_idx node : supsteplists[step][proc]) { workCost[step][proc] += schedule->getInstance().getComputationalDag().vertex_work_weight(node); + } std::pair entry(workCost[step][proc], proc); workCostPointer[step][proc] = workCostList[step].insert(entry).first; @@ -259,16 +268,24 @@ void HillClimbingScheduler::Init() { cost = work_cost[0]; std::vector> present(N, std::vector(P, false)); for (unsigned step = 0; step < M - schedule->getStaleness(); ++step) { - for (unsigned proc = 0; proc < P; ++proc) - for (const vertex_idx node : supsteplists[step + schedule->getStaleness()][proc]) - for (const vertex_idx &pred : G.parents(node)) - if (schedule->assignedProcessor(node) != schedule->assignedProcessor(pred) && !present[pred][schedule->assignedProcessor(node)]) { + for (unsigned proc = 0; proc < P; ++proc) { + for (const vertex_idx node : supsteplists[step + schedule->getStaleness()][proc]) { + for (const vertex_idx &pred : G.parents(node)) { + if (schedule->assignedProcessor(node) != schedule->assignedProcessor(pred) + && !present[pred][schedule->assignedProcessor(node)]) { present[pred][schedule->assignedProcessor(node)] = true; - sent[step][schedule->assignedProcessor(pred)] += - schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), schedule->assignedProcessor(node)); - received[step][schedule->assignedProcessor(node)] += - schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), schedule->assignedProcessor(node)); + sent[step][schedule->assignedProcessor(pred)] + += schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), + schedule->assignedProcessor(node)); + received[step][schedule->assignedProcessor(node)] + += schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), + schedule->assignedProcessor(node)); } + } + } + } } for (unsigned step = 0; step < M - 1; ++step) { @@ -280,175 +297,212 @@ void HillClimbingScheduler::Init() { cost_type comm_cost = schedule->getInstance().getArchitecture().communicationCosts() * commCostList[step].rbegin()->first; cost_type sync_cost = (comm_cost > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; - if(schedule->getStaleness() == 1) - cost += comm_cost + work_cost[step+1] + sync_cost; - else - cost += std::max(comm_cost, work_cost[step+1]) + sync_cost; + if (schedule->getStaleness() == 1) { + cost += comm_cost + work_cost[step + 1] + sync_cost; + } else { + cost += std::max(comm_cost, work_cost[step + 1]) + sync_cost; + } } updatePromisingMoves(); // memory_constraints - if(use_memory_constraint) - { + if (use_memory_constraint) { memory_used.clear(); memory_used.resize(P, std::vector>(M, 0)); - for (vertex_idx node = 0; node < N; ++node) - memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)] += schedule->getInstance().getComputationalDag().vertex_mem_weight(node); + for (vertex_idx node = 0; node < N; ++node) { + memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)] + += schedule->getInstance().getComputationalDag().vertex_mem_weight(node); + } } - } -template -void HillClimbingScheduler::updatePromisingMoves() -{ - if(!findPromisingMoves) +template +void HillClimbingScheduler::updatePromisingMoves() { + if (!findPromisingMoves) { return; + } const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); const Graph_t &G = schedule->getInstance().getComputationalDag(); promisingMoves.clear(); - for(vertex_idx node=0; node < schedule->getInstance().getComputationalDag().num_vertices(); ++node) - { + for (vertex_idx node = 0; node < schedule->getInstance().getComputationalDag().num_vertices(); ++node) { std::vector nrPredOnProc(P, 0); - for(const vertex_idx &pred : G.parents(node)) + for (const vertex_idx &pred : G.parents(node)) { ++nrPredOnProc[schedule->assignedProcessor(pred)]; + } unsigned otherProcUsed = 0; - for(unsigned proc=0; procassignedProcessor(node)!=proc && nrPredOnProc[proc]>0) + for (unsigned proc = 0; proc < P; ++proc) { + if (schedule->assignedProcessor(node) != proc && nrPredOnProc[proc] > 0) { ++otherProcUsed; + } + } - if(otherProcUsed==1) - for(unsigned proc=0; procassignedProcessor(node)!=proc && nrPredOnProc[proc]>0 && schedule->getInstance().isCompatible(node,proc)) - { + if (otherProcUsed == 1) { + for (unsigned proc = 0; proc < P; ++proc) { + if (schedule->assignedProcessor(node) != proc && nrPredOnProc[proc] > 0 + && schedule->getInstance().isCompatible(node, proc)) { promisingMoves.push_back(std::make_tuple(node, proc, EARLIER)); promisingMoves.push_back(std::make_tuple(node, proc, AT)); promisingMoves.push_back(std::make_tuple(node, proc, LATER)); } + } + } std::vector nrSuccOnProc(P, 0); - for(const vertex_idx &succ : G.children(node)) + for (const vertex_idx &succ : G.children(node)) { ++nrSuccOnProc[schedule->assignedProcessor(succ)]; + } otherProcUsed = 0; - for(unsigned proc=0; procassignedProcessor(node)!=proc && nrSuccOnProc[proc]>0) + for (unsigned proc = 0; proc < P; ++proc) { + if (schedule->assignedProcessor(node) != proc && nrSuccOnProc[proc] > 0) { ++otherProcUsed; + } + } - if(otherProcUsed==1) - for(unsigned proc=0; procassignedProcessor(node)!=proc && nrSuccOnProc[proc]>0 && schedule->getInstance().isCompatible(node,proc)) - { + if (otherProcUsed == 1) { + for (unsigned proc = 0; proc < P; ++proc) { + if (schedule->assignedProcessor(node) != proc && nrSuccOnProc[proc] > 0 + && schedule->getInstance().isCompatible(node, proc)) { promisingMoves.push_back(std::make_tuple(node, proc, EARLIER)); promisingMoves.push_back(std::make_tuple(node, proc, AT)); promisingMoves.push_back(std::make_tuple(node, proc, LATER)); } + } } + } - for(unsigned step=0; step < schedule->numberOfSupersteps(); ++step) - { + for (unsigned step = 0; step < schedule->numberOfSupersteps(); ++step) { std::list minProcs, maxProcs; - cost_type minWork=std::numeric_limits::max(), maxWork=std::numeric_limits::min(); - for(unsigned proc=0; proc maxWork) - maxWork=workCost[step][proc]; - if(workCost[step][proc]< minWork) - minWork=workCost[step][proc]; - } - for(unsigned proc=0; proc::max(), maxWork = std::numeric_limits::min(); + for (unsigned proc = 0; proc < P; ++proc) { + if (workCost[step][proc] > maxWork) { + maxWork = workCost[step][proc]; + } + if (workCost[step][proc] < minWork) { + minWork = workCost[step][proc]; + } + } + for (unsigned proc = 0; proc < P; ++proc) { + if (workCost[step][proc] == minWork) { minProcs.push_back(proc); - if(workCost[step][proc]==maxWork) + } + if (workCost[step][proc] == maxWork) { maxProcs.push_back(proc); + } + } + for (unsigned to : minProcs) { + for (unsigned from : maxProcs) { + for (vertex_idx node : supsteplists[step][from]) { + if (schedule->getInstance().isCompatible(node, to)) { + promisingMoves.push_back(std::make_tuple(node, to, AT)); + } + } + } } - for(unsigned to: minProcs) - for(unsigned from: maxProcs) - for(vertex_idx node : supsteplists[step][from]) - if(schedule->getInstance().isCompatible(node, to)) - promisingMoves.push_back(std::make_tuple(node,to, AT)); } } // Functions to compute and update the std::list of possible moves -template +template void HillClimbingScheduler::updateNodeMovesEarlier(const vertex_idx node) { - if (schedule->assignedSuperstep(node) == 0) + if (schedule->assignedSuperstep(node) == 0) { return; + } std::set predProc; for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) { - if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node)) + if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node)) { return; - if (static_cast(schedule->assignedSuperstep(pred)) >= static_cast(schedule->assignedSuperstep(node)) - static_cast(schedule->getStaleness())) + } + if (static_cast(schedule->assignedSuperstep(pred)) + >= static_cast(schedule->assignedSuperstep(node)) - static_cast(schedule->getStaleness())) { predProc.insert(schedule->assignedProcessor(pred)); + } } - if(schedule->getStaleness() == 2) - { - for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) - if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node)) + if (schedule->getStaleness() == 2) { + for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) { + if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node)) { predProc.insert(schedule->assignedProcessor(succ)); + } + } } - if (predProc.size() > 1) + if (predProc.size() > 1) { return; + } - if (predProc.size() == 1) + if (predProc.size() == 1) { addMoveOption(node, *predProc.begin(), EARLIER); - else - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) + } else { + for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { addMoveOption(node, proc, EARLIER); + } + } } -template +template void HillClimbingScheduler::updateNodeMovesAt(const vertex_idx node) { - for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) - if (static_cast(schedule->assignedSuperstep(pred)) >= static_cast(schedule->assignedSuperstep(node)) - static_cast(schedule->getStaleness()) + 1) + for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) { + if (static_cast(schedule->assignedSuperstep(pred)) + >= static_cast(schedule->assignedSuperstep(node)) - static_cast(schedule->getStaleness()) + 1) { return; + } + } - for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) - if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness() - 1) + for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) { + if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness() - 1) { return; + } + } - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) - if (proc != schedule->assignedProcessor(node)) + for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { + if (proc != schedule->assignedProcessor(node)) { addMoveOption(node, proc, AT); + } + } } -template +template void HillClimbingScheduler::updateNodeMovesLater(const vertex_idx node) { - if (schedule->assignedSuperstep(node) == schedule->numberOfSupersteps() - 1) + if (schedule->assignedSuperstep(node) == schedule->numberOfSupersteps() - 1) { return; + } std::set succProc; for (const vertex_idx &succ : schedule->getInstance().getComputationalDag().children(node)) { - if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node)) + if (schedule->assignedSuperstep(succ) == schedule->assignedSuperstep(node)) { return; - if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness()) + } + if (schedule->assignedSuperstep(succ) <= schedule->assignedSuperstep(node) + schedule->getStaleness()) { succProc.insert(schedule->assignedProcessor(succ)); + } } - if(schedule->getStaleness() == 2) - { - for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) - if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node)) + if (schedule->getStaleness() == 2) { + for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) { + if (schedule->assignedSuperstep(pred) == schedule->assignedSuperstep(node)) { succProc.insert(schedule->assignedProcessor(pred)); + } + } } - if (succProc.size() > 1) + if (succProc.size() > 1) { return; + } - if (succProc.size() == 1) + if (succProc.size() == 1) { addMoveOption(node, *succProc.begin(), LATER); - else - for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) + } else { + for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { addMoveOption(node, proc, LATER); + } + } } -template +template void HillClimbingScheduler::updateNodeMoves(const vertex_idx node) { eraseMoveOptions(node); updateNodeMovesEarlier(node); @@ -456,70 +510,56 @@ void HillClimbingScheduler::updateNodeMoves(const vertex_idx node) { updateNodeMovesLater(node); } -template -void HillClimbingScheduler::updateMoveOptions(vertex_idx node, int where) -{ +template +void HillClimbingScheduler::updateMoveOptions(vertex_idx node, int where) { const Graph_t &G = schedule->getInstance().getComputationalDag(); updateNodeMoves(node); - if(where==0) - { - for(const vertex_idx &pred : G.parents(node)) - { + if (where == 0) { + for (const vertex_idx &pred : G.parents(node)) { eraseMoveOptionsLater(pred); updateNodeMovesLater(pred); } - for(const vertex_idx &succ : G.children(node)) - { + for (const vertex_idx &succ : G.children(node)) { eraseMoveOptionsEarlier(succ); updateNodeMovesEarlier(succ); } } - if(where==-1) - { - for(const vertex_idx &pred : G.parents(node)) - { + if (where == -1) { + for (const vertex_idx &pred : G.parents(node)) { eraseMoveOptionsLater(pred); updateNodeMovesLater(pred); eraseMoveOptionsAt(pred); updateNodeMovesAt(pred); - if(schedule->getStaleness() == 2) - { + if (schedule->getStaleness() == 2) { eraseMoveOptionsEarlier(pred); updateNodeMovesEarlier(pred); } } - for(const vertex_idx &succ : G.children(node)) - { + for (const vertex_idx &succ : G.children(node)) { eraseMoveOptionsEarlier(succ); updateNodeMovesEarlier(succ); - if(schedule->getStaleness() == 2) - { + if (schedule->getStaleness() == 2) { eraseMoveOptionsAt(succ); updateNodeMovesAt(succ); } } } - if(where==1) - { - for(const vertex_idx &pred : G.parents(node)) - { + if (where == 1) { + for (const vertex_idx &pred : G.parents(node)) { eraseMoveOptionsLater(pred); updateNodeMovesLater(pred); - if(schedule->getStaleness() == 2) - { + if (schedule->getStaleness() == 2) { eraseMoveOptionsAt(pred); updateNodeMovesAt(pred); } } - for(const vertex_idx &succ : G.children(node)) - { + for (const vertex_idx &succ : G.children(node)) { eraseMoveOptionsEarlier(succ); updateNodeMovesEarlier(succ); eraseMoveOptionsAt(succ); updateNodeMovesAt(succ); - if(schedule->getStaleness() == 2) - { + if (schedule->getStaleness() == 2) { eraseMoveOptionsLater(succ); updateNodeMovesLater(succ); } @@ -527,7 +567,7 @@ void HillClimbingScheduler::updateMoveOptions(vertex_idx node, int wher } } -template +template void HillClimbingScheduler::addMoveOption(const vertex_idx node, const unsigned p, const Direction dir) { if (!canMove[dir][node][p] && schedule->getInstance().isCompatible(node, p)) { canMove[dir][node][p] = true; @@ -536,49 +576,51 @@ void HillClimbingScheduler::addMoveOption(const vertex_idx node, const } } -template -void HillClimbingScheduler::eraseMoveOption(vertex_idx node, unsigned p, Direction dir) -{ +template +void HillClimbingScheduler::eraseMoveOption(vertex_idx node, unsigned p, Direction dir) { canMove[dir][node][p] = false; - if(nextMove.first == dir && nextMove.second->first == node && nextMove.second->second == p) + if (nextMove.first == dir && nextMove.second->first == node && nextMove.second->second == p) { ++nextMove.second; + } moveOptions[dir].erase(movePointer[dir][node][p]); } -template -void HillClimbingScheduler::eraseMoveOptionsEarlier(vertex_idx node) -{ - for(unsigned proc=0; procgetInstance().getArchitecture().numberOfProcessors(); ++proc) - if(canMove[EARLIER][node][proc]) +template +void HillClimbingScheduler::eraseMoveOptionsEarlier(vertex_idx node) { + for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { + if (canMove[EARLIER][node][proc]) { eraseMoveOption(node, proc, EARLIER); + } + } } -template -void HillClimbingScheduler::eraseMoveOptionsAt(vertex_idx node) -{ - for(unsigned proc=0; procgetInstance().getArchitecture().numberOfProcessors(); ++proc) - if(canMove[AT][node][proc]) +template +void HillClimbingScheduler::eraseMoveOptionsAt(vertex_idx node) { + for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { + if (canMove[AT][node][proc]) { eraseMoveOption(node, proc, AT); + } + } } -template -void HillClimbingScheduler::eraseMoveOptionsLater(vertex_idx node) -{ - for(unsigned proc=0; procgetInstance().getArchitecture().numberOfProcessors(); ++proc) - if(canMove[LATER][node][proc]) +template +void HillClimbingScheduler::eraseMoveOptionsLater(vertex_idx node) { + for (unsigned proc = 0; proc < schedule->getInstance().getArchitecture().numberOfProcessors(); ++proc) { + if (canMove[LATER][node][proc]) { eraseMoveOption(node, proc, LATER); + } + } } -template -void HillClimbingScheduler::eraseMoveOptions(vertex_idx node) -{ +template +void HillClimbingScheduler::eraseMoveOptions(vertex_idx node) { eraseMoveOptionsEarlier(node); eraseMoveOptionsAt(node); eraseMoveOptionsLater(node); } // Compute the cost change incurred by a potential move -template +template int HillClimbingScheduler::moveCostChange(const vertex_idx node, unsigned p, const int where, stepAuxData &changing) { const unsigned step = schedule->assignedSuperstep(node); const unsigned new_step = static_cast(static_cast(step) + where); @@ -596,116 +638,174 @@ int HillClimbingScheduler::moveCostChange(const vertex_idx node, unsign if (itBest->second == oldProc) { auto itNext = itBest; --itNext; - maxAfterRemoval = std::max(itBest->first - schedule->getInstance().getComputationalDag().vertex_work_weight(node), itNext->first); - if(itBest->first != maxAfterRemoval) - { - if(step == 0 || schedule->getStaleness() == 1) // incorporate immediately into cost change + maxAfterRemoval + = std::max(itBest->first - schedule->getInstance().getComputationalDag().vertex_work_weight(node), itNext->first); + if (itBest->first != maxAfterRemoval) { + if (step == 0 || schedule->getStaleness() == 1) { // incorporate immediately into cost change change -= static_cast(itBest->first) - static_cast(maxAfterRemoval); - else - { + } else { newWorkCost[step] = maxAfterRemoval; - affectedSteps.insert(step-1); + affectedSteps.insert(step - 1); } } } const cost_type maxBeforeAddition = (where == 0) ? maxAfterRemoval : workCostList[new_step].rbegin()->first; - if (workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node) > maxBeforeAddition) - { - if(new_step == 0 || schedule->getStaleness() == 1) // incorporate immediately into cost change - change += static_cast(workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node)) - static_cast(maxBeforeAddition); - else - { + if (workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node) > maxBeforeAddition) { + if (new_step == 0 || schedule->getStaleness() == 1) { // incorporate immediately into cost change + change + += static_cast(workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node)) + - static_cast(maxBeforeAddition); + } else { newWorkCost[new_step] = workCost[new_step][p] + schedule->getInstance().getComputationalDag().vertex_work_weight(node); - affectedSteps.insert(new_step-1); + affectedSteps.insert(new_step - 1); } } // Comm cost change - std::list > sentInc, recInc; + std::list> sentInc, recInc; // -outputs if (p != oldProc) { for (unsigned j = 0; j < schedule->getInstance().getArchitecture().numberOfProcessors(); ++j) { - if (succSteps[node][j].empty()) + if (succSteps[node][j].empty()) { continue; + } unsigned affectedStep = succSteps[node][j].begin()->first - schedule->getStaleness(); if (j == p) { - sentInc.emplace_back(affectedStep, oldProc, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); - recInc.emplace_back(affectedStep, p, -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); + sentInc.emplace_back(affectedStep, + oldProc, + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); + recInc.emplace_back(affectedStep, + p, + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); } else if (j == oldProc) { - recInc.emplace_back(affectedStep, oldProc, static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(p, j))); - sentInc.emplace_back(affectedStep, p, static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(p, j))); + recInc.emplace_back(affectedStep, + oldProc, + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(p, j))); + sentInc.emplace_back(affectedStep, + p, + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(p, j))); } else { - sentInc.emplace_back(affectedStep, oldProc, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); - recInc.emplace_back(affectedStep, j, -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); - sentInc.emplace_back(affectedStep, p, static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(p, j))); - recInc.emplace_back(affectedStep, j, static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(p, j))); + sentInc.emplace_back(affectedStep, + oldProc, + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); + recInc.emplace_back(affectedStep, + j, + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(oldProc, j))); + sentInc.emplace_back(affectedStep, + p, + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(p, j))); + recInc.emplace_back(affectedStep, + j, + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(p, j))); } } } // -inputs - if (p == oldProc) + if (p == oldProc) { for (const vertex_idx &pred : G.parents(node)) { - if (schedule->assignedProcessor(pred) == p) + if (schedule->assignedProcessor(pred) == p) { continue; + } const auto firstUse = *succSteps[pred][p].begin(); const bool skip = firstUse.first < step || (firstUse.first == step && where >= 0 && firstUse.second > 1); if (!skip) { - sentInc.emplace_back(step - schedule->getStaleness(), schedule->assignedProcessor(pred), - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); - recInc.emplace_back(step - schedule->getStaleness(), p, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); - sentInc.emplace_back(new_step - schedule->getStaleness(), schedule->assignedProcessor(pred), - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); - recInc.emplace_back(new_step - schedule->getStaleness(), p, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); + sentInc.emplace_back(step - schedule->getStaleness(), + schedule->assignedProcessor(pred), + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts( + schedule->assignedProcessor(pred), p))); + recInc.emplace_back(step - schedule->getStaleness(), + p, + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts( + schedule->assignedProcessor(pred), p))); + sentInc.emplace_back( + new_step - schedule->getStaleness(), + schedule->assignedProcessor(pred), + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); + recInc.emplace_back( + new_step - schedule->getStaleness(), + p, + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); } } - else + } else { for (const vertex_idx &pred : G.parents(node)) { // Comm. cost of sending pred to oldProc auto firstUse = succSteps[pred][oldProc].begin(); - bool skip = (schedule->assignedProcessor(pred) == oldProc) || firstUse->first < step || - (firstUse->first == step && firstUse->second > 1); + bool skip = (schedule->assignedProcessor(pred) == oldProc) || firstUse->first < step + || (firstUse->first == step && firstUse->second > 1); if (!skip) { - sentInc.emplace_back(step - schedule->getStaleness(), schedule->assignedProcessor(pred), - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), oldProc))); - recInc.emplace_back(step - schedule->getStaleness(), oldProc, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), oldProc))); + sentInc.emplace_back(step - schedule->getStaleness(), + schedule->assignedProcessor(pred), + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts( + schedule->assignedProcessor(pred), oldProc))); + recInc.emplace_back(step - schedule->getStaleness(), + oldProc, + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts( + schedule->assignedProcessor(pred), oldProc))); ++firstUse; if (firstUse != succSteps[pred][oldProc].end()) { const unsigned nextStep = firstUse->first; - sentInc.emplace_back(nextStep - schedule->getStaleness(), schedule->assignedProcessor(pred), - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * - schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), oldProc))); - recInc.emplace_back(nextStep - schedule->getStaleness(), oldProc, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * - schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), oldProc))); + sentInc.emplace_back(nextStep - schedule->getStaleness(), + schedule->assignedProcessor(pred), + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts( + schedule->assignedProcessor(pred), oldProc))); + recInc.emplace_back(nextStep - schedule->getStaleness(), + oldProc, + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts( + schedule->assignedProcessor(pred), oldProc))); } } // Comm. cost of sending pred to p firstUse = succSteps[pred][p].begin(); - skip = (schedule->assignedProcessor(pred) == p) || - ((firstUse != succSteps[pred][p].end()) && (firstUse->first <= new_step)); + skip = (schedule->assignedProcessor(pred) == p) + || ((firstUse != succSteps[pred][p].end()) && (firstUse->first <= new_step)); if (!skip) { - sentInc.emplace_back(new_step - schedule->getStaleness(), schedule->assignedProcessor(pred), - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); - recInc.emplace_back(new_step - schedule->getStaleness(), p, - static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); + sentInc.emplace_back( + new_step - schedule->getStaleness(), + schedule->assignedProcessor(pred), + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); + recInc.emplace_back( + new_step - schedule->getStaleness(), + p, + static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); if (firstUse != succSteps[pred][p].end()) { - sentInc.emplace_back(firstUse->first - schedule->getStaleness(), schedule->assignedProcessor(pred), - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); - recInc.emplace_back(firstUse->first - schedule->getStaleness(), p, - -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(pred), p))); + sentInc.emplace_back(firstUse->first - schedule->getStaleness(), + schedule->assignedProcessor(pred), + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts( + schedule->assignedProcessor(pred), p))); + recInc.emplace_back(firstUse->first - schedule->getStaleness(), + p, + -static_cast(schedule->getInstance().getComputationalDag().vertex_comm_weight(pred) + * schedule->getInstance().getArchitecture().sendCosts( + schedule->assignedProcessor(pred), p))); } } } + } // -process changes changing.sentChange.clear(); @@ -716,10 +816,11 @@ int HillClimbingScheduler::moveCostChange(const vertex_idx node, unsign const int e_increase = std::get<2>(entry); affectedSteps.insert(e_step); auto itr = changing.sentChange.find(std::make_pair(e_step, e_proc)); - if (itr == changing.sentChange.end()) + if (itr == changing.sentChange.end()) { changing.sentChange.insert({std::make_pair(e_step, e_proc), e_increase}); - else + } else { itr->second += e_increase; + } } for (auto entry : recInc) { const unsigned e_step = std::get<0>(entry); @@ -727,10 +828,11 @@ int HillClimbingScheduler::moveCostChange(const vertex_idx node, unsign const int e_increase = std::get<2>(entry); affectedSteps.insert(e_step); auto itr = changing.recChange.find(std::make_pair(e_step, e_proc)); - if (itr == changing.recChange.end()) + if (itr == changing.recChange.end()) { changing.recChange.insert({std::make_pair(e_step, e_proc), e_increase}); - else + } else { itr->second += e_increase; + } } auto itrSent = changing.sentChange.begin(), itrRec = changing.recChange.begin(); @@ -744,38 +846,40 @@ int HillClimbingScheduler::moveCostChange(const vertex_idx node, unsign int diff = (itrSent != changing.sentChange.end() && itrSent->first.first == sstep && itrSent->first.second == j) ? (itrSent++)->second : 0; - if (static_cast(sent[sstep][j]) + diff > static_cast(newMax)) + if (static_cast(sent[sstep][j]) + diff > static_cast(newMax)) { newMax = static_cast(static_cast(sent[sstep][j]) + diff); + } diff = (itrRec != changing.recChange.end() && itrRec->first.first == sstep && itrRec->first.second == j) ? (itrRec++)->second : 0; - if (static_cast(received[sstep][j]) + diff > static_cast(newMax)) + if (static_cast(received[sstep][j]) + diff > static_cast(newMax)) { newMax = static_cast(static_cast(received[sstep][j]) + diff); + } } newMax *= schedule->getInstance().getArchitecture().communicationCosts(); cost_type newSync = (HCwithLatency && newMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; - if(newMax == 0) - { - if(schedule->getStaleness() == 1) + if (newMax == 0) { + if (schedule->getStaleness() == 1) { changing.canShrink = true; - else - { - if( (sstep > 0 && affectedSteps.find(sstep-1) == affectedSteps.end() && commCostList[sstep-1].rbegin()->first == 0) || - (sstep < commCostList.size()-1 && affectedSteps.find(sstep+1) == affectedSteps.end() && commCostList[sstep+1].rbegin()->first == 0) || - (sstep > 0 && affectedSteps.find(sstep-1) != affectedSteps.end() && last_affected_empty) ) + } else { + if ((sstep > 0 && affectedSteps.find(sstep - 1) == affectedSteps.end() + && commCostList[sstep - 1].rbegin()->first == 0) + || (sstep < commCostList.size() - 1 && affectedSteps.find(sstep + 1) == affectedSteps.end() + && commCostList[sstep + 1].rbegin()->first == 0) + || (sstep > 0 && affectedSteps.find(sstep - 1) != affectedSteps.end() && last_affected_empty)) { changing.canShrink = true; + } } last_affected_empty = true; - } - else + } else { last_affected_empty = false; + } - if(schedule->getStaleness() == 2) - { - auto itrWork = newWorkCost.find(sstep+1); - oldMax = std::max(oldMax, workCostList[sstep+1].rbegin()->first); - newMax = std::max(newMax, itrWork != newWorkCost.end() ? itrWork->second : workCostList[sstep+1].rbegin()->first); + if (schedule->getStaleness() == 2) { + auto itrWork = newWorkCost.find(sstep + 1); + oldMax = std::max(oldMax, workCostList[sstep + 1].rbegin()->first); + newMax = std::max(newMax, itrWork != newWorkCost.end() ? itrWork->second : workCostList[sstep + 1].rbegin()->first); } change += static_cast(newMax + newSync) - static_cast(oldMax + oldSync); } @@ -785,8 +889,11 @@ int HillClimbingScheduler::moveCostChange(const vertex_idx node, unsign } // Execute a chosen move, updating the schedule and the data structures -template -void HillClimbingScheduler::executeMove(const vertex_idx node, const unsigned newProc, const int where, const stepAuxData &changing) { +template +void HillClimbingScheduler::executeMove(const vertex_idx node, + const unsigned newProc, + const int where, + const stepAuxData &changing) { unsigned oldStep = schedule->assignedSuperstep(node); unsigned newStep = static_cast(static_cast(oldStep) + where); const unsigned oldProc = schedule->assignedProcessor(node); @@ -795,55 +902,63 @@ void HillClimbingScheduler::executeMove(const vertex_idx node, const un // Work cost change workCostList[oldStep].erase(workCostPointer[oldStep][oldProc]); workCost[oldStep][oldProc] -= schedule->getInstance().getComputationalDag().vertex_work_weight(node); - workCostPointer[oldStep][oldProc] = - workCostList[oldStep].insert(std::make_pair(workCost[oldStep][oldProc], oldProc)).first; + workCostPointer[oldStep][oldProc] = workCostList[oldStep].insert(std::make_pair(workCost[oldStep][oldProc], oldProc)).first; workCostList[newStep].erase(workCostPointer[newStep][newProc]); workCost[newStep][newProc] += schedule->getInstance().getComputationalDag().vertex_work_weight(node); - workCostPointer[newStep][newProc] = - workCostList[newStep].insert(std::make_pair(workCost[newStep][newProc], newProc)).first; + workCostPointer[newStep][newProc] = workCostList[newStep].insert(std::make_pair(workCost[newStep][newProc], newProc)).first; // Comm cost change - for (const auto& update : changing.sentChange) - sent[update.first.first][update.first.second] = static_cast(static_cast(sent[update.first.first][update.first.second]) + update.second); - for (const auto& update : changing.recChange) - received[update.first.first][update.first.second] = static_cast(static_cast(received[update.first.first][update.first.second]) + update.second); - - std::set > toUpdate; - for (const auto& update : changing.sentChange) - if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second]) != - commCost[update.first.first][update.first.second]) + for (const auto &update : changing.sentChange) { + sent[update.first.first][update.first.second] + = static_cast(static_cast(sent[update.first.first][update.first.second]) + update.second); + } + for (const auto &update : changing.recChange) { + received[update.first.first][update.first.second] + = static_cast(static_cast(received[update.first.first][update.first.second]) + update.second); + } + + std::set> toUpdate; + for (const auto &update : changing.sentChange) { + if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second]) + != commCost[update.first.first][update.first.second]) { toUpdate.insert(std::make_pair(update.first.first, update.first.second)); + } + } - for (const auto& update : changing.recChange) - if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second]) != - commCost[update.first.first][update.first.second]) + for (const auto &update : changing.recChange) { + if (std::max(sent[update.first.first][update.first.second], received[update.first.first][update.first.second]) + != commCost[update.first.first][update.first.second]) { toUpdate.insert(std::make_pair(update.first.first, update.first.second)); + } + } - for (const auto& update : toUpdate) { + for (const auto &update : toUpdate) { commCostList[update.first].erase(commCostPointer[update.first][update.second]); commCost[update.first][update.second] = std::max(sent[update.first][update.second], received[update.first][update.second]); - commCostPointer[update.first][update.second] = - commCostList[update.first].insert(std::make_pair(commCost[update.first][update.second], update.second)).first; + commCostPointer[update.first][update.second] + = commCostList[update.first].insert(std::make_pair(commCost[update.first][update.second], update.second)).first; } // update successor lists for (const vertex_idx &pred : schedule->getInstance().getComputationalDag().parents(node)) { auto itr = succSteps[pred][oldProc].find(oldStep); - if ((--(itr->second)) == 0) + if ((--(itr->second)) == 0) { succSteps[pred][oldProc].erase(itr); + } itr = succSteps[pred][newProc].find(newStep); - if (itr == succSteps[pred][newProc].end()) + if (itr == succSteps[pred][newProc].end()) { succSteps[pred][newProc].insert({newStep, 1U}); - else + } else { itr->second += 1; + } } // memory constraints, if any - if(use_memory_constraint) - { - memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)] -= schedule->getInstance().getComputationalDag().vertex_mem_weight(node); + if (use_memory_constraint) { + memory_used[schedule->assignedProcessor(node)][schedule->assignedSuperstep(node)] + -= schedule->getInstance().getComputationalDag().vertex_mem_weight(node); memory_used[newProc][newStep] += schedule->getInstance().getComputationalDag().vertex_mem_weight(node); } @@ -858,7 +973,7 @@ void HillClimbingScheduler::executeMove(const vertex_idx node, const un } // Single hill climbing step -template +template bool HillClimbingScheduler::Improve() { cost_type bestCost = cost; stepAuxData bestMoveData; @@ -867,8 +982,7 @@ bool HillClimbingScheduler::Improve() { int startingDir = nextMove.first; // pre-selected "promising" moves - while(!promisingMoves.empty() && !steepestAscent) - { + while (!promisingMoves.empty() && !steepestAscent) { std::tuple next = promisingMoves.front(); promisingMoves.pop_front(); @@ -876,44 +990,43 @@ bool HillClimbingScheduler::Improve() { const unsigned proc = std::get<1>(next); const int where = std::get<2>(next); - if(!canMove[static_cast(where)][node][proc]) + if (!canMove[static_cast(where)][node][proc]) { continue; + } - if(use_memory_constraint && violatesMemConstraint(node, proc, where-1)) + if (use_memory_constraint && violatesMemConstraint(node, proc, where - 1)) { continue; + } stepAuxData moveData; - int costDiff = moveCostChange(node, proc, where-1, moveData); + int costDiff = moveCostChange(node, proc, where - 1, moveData); - if(costDiff<0) - { - executeMove(node, proc, where-1, moveData); - if(shrink && moveData.canShrink) + if (costDiff < 0) { + executeMove(node, proc, where - 1, moveData); + if (shrink && moveData.canShrink) { Init(); + } return true; } - } // standard moves int dir = startingDir; - while(true) - { + while (true) { bool reachedBeginning = false; - while(nextMove.second == moveOptions[static_cast(nextMove.first)].end()) - { - dir = (nextMove.first+1)%NumDirections; - if(dir == startingDir) - { + while (nextMove.second == moveOptions[static_cast(nextMove.first)].end()) { + dir = (nextMove.first + 1) % NumDirections; + if (dir == startingDir) { reachedBeginning = true; break; } nextMove.first = dir; nextMove.second = moveOptions[static_cast(nextMove.first)].begin(); } - if(reachedBeginning) + if (reachedBeginning) { break; + } std::pair next = *nextMove.second; ++nextMove.second; @@ -921,55 +1034,54 @@ bool HillClimbingScheduler::Improve() { const vertex_idx node = next.first; const unsigned proc = next.second; - if(use_memory_constraint && violatesMemConstraint(node, proc, dir-1)) + if (use_memory_constraint && violatesMemConstraint(node, proc, dir - 1)) { continue; + } stepAuxData moveData; - int costDiff = moveCostChange(node, proc, dir-1, moveData); + int costDiff = moveCostChange(node, proc, dir - 1, moveData); - if(!steepestAscent && costDiff<0) - { - executeMove(node, proc, dir-1, moveData); - if(shrink && moveData.canShrink) + if (!steepestAscent && costDiff < 0) { + executeMove(node, proc, dir - 1, moveData); + if (shrink && moveData.canShrink) { Init(); + } return true; - } - else if(static_cast(static_cast(cost)+costDiff) < bestCost) - { - bestCost = static_cast(static_cast(cost)+costDiff); + } else if (static_cast(static_cast(cost) + costDiff) < bestCost) { + bestCost = static_cast(static_cast(cost) + costDiff); bestMove = next; bestMoveData = moveData; - bestDir = dir-1; + bestDir = dir - 1; } - - } - if (bestCost == cost) + if (bestCost == cost) { return false; + } executeMove(bestMove.first, bestMove.second, bestDir, bestMoveData); - if(shrink && bestMoveData.canShrink) + if (shrink && bestMoveData.canShrink) { Init(); + } return true; } // Check if move violates mem constraints -template -bool HillClimbingScheduler::violatesMemConstraint(vertex_idx node, unsigned processor, int where) -{ - if(memory_used[processor][static_cast(static_cast(schedule->assignedSuperstep(node))+where)] - + schedule->getInstance().getComputationalDag().vertex_mem_weight(node) > schedule->getInstance().memoryBound(processor)) // TODO ANDRAS double check change +template +bool HillClimbingScheduler::violatesMemConstraint(vertex_idx node, unsigned processor, int where) { + if (memory_used[processor][static_cast(static_cast(schedule->assignedSuperstep(node)) + where)] + + schedule->getInstance().getComputationalDag().vertex_mem_weight(node) + > schedule->getInstance().memoryBound(processor)) { // TODO ANDRAS double check change return true; + } return false; } -template +template void HillClimbingScheduler::CreateSupstepLists() { - const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); const Graph_t &G = schedule->getInstance().getComputationalDag(); @@ -979,9 +1091,9 @@ void HillClimbingScheduler::CreateSupstepLists() { supsteplists.clear(); supsteplists.resize(M, std::vector>(P)); - for (vertex_idx node : top_sort_view(G)) + for (vertex_idx node : top_sort_view(G)) { supsteplists[schedule->assignedSuperstep(node)][schedule->assignedProcessor(node)].push_back(node); - + } } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp index ba895b70..07131ff3 100644 --- a/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp @@ -25,9 +25,8 @@ limitations under the License. namespace osp { -template +template class HillClimbingForCommSteps { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); static_assert(is_computational_dag_v, "Graph_t must satisfy the computational_dag concept"); @@ -88,27 +87,27 @@ class HillClimbingForCommSteps { virtual std::string getScheduleName() const { return "HillClimbingForCommSchedule"; } }; -template +template RETURN_STATUS HillClimbingForCommSteps::improveSchedule(BspScheduleCS &input_schedule) { - return improveScheduleWithTimeLimit(input_schedule, 180); } // Main method for hill climbing (with time limit) -template -RETURN_STATUS HillClimbingForCommSteps::improveScheduleWithTimeLimit(BspScheduleCS &input_schedule, const unsigned timeLimit) { - +template +RETURN_STATUS HillClimbingForCommSteps::improveScheduleWithTimeLimit(BspScheduleCS &input_schedule, + const unsigned timeLimit) { schedule = &input_schedule; - if (schedule->numberOfSupersteps() <= 2) + if (schedule->numberOfSupersteps() <= 2) { return RETURN_STATUS::OSP_SUCCESS; + } Init(); // ConvertCommSchedule(); const std::chrono::steady_clock::time_point startTime = std::chrono::steady_clock::now(); unsigned counter = 0; - while (Improve()) + while (Improve()) { if ((++counter) == 100) { counter = 0; std::chrono::steady_clock::time_point now = std::chrono::steady_clock::now(); @@ -118,6 +117,7 @@ RETURN_STATUS HillClimbingForCommSteps::improveScheduleWithTimeLimit(Bs break; } } + } ConvertCommSchedule(); @@ -125,7 +125,7 @@ RETURN_STATUS HillClimbingForCommSteps::improveScheduleWithTimeLimit(Bs } // Initialization for comm. schedule hill climbing -template +template void HillClimbingForCommSteps::Init() { const unsigned N = static_cast(schedule->getInstance().getComputationalDag().num_vertices()); const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); @@ -160,63 +160,72 @@ void HillClimbingForCommSteps::Init() { commSchedRecListPointer.resize(N, std::vector>::iterator>(P)); // initialize to lazy comm schedule first - to make sure it's correct even if e.g. com scehdule has indirect sending - for (unsigned step = 1; step < M; ++step) - for (unsigned proc = 0; proc < P; ++proc) - for (const vertex_idx node : supsteplists[step][proc]) - for (const vertex_idx &pred : G.parents(node)) - if (schedule->assignedProcessor(pred) != schedule->assignedProcessor(node) && - commSchedule[pred][schedule->assignedProcessor(node)] == UINT_MAX) { + for (unsigned step = 1; step < M; ++step) { + for (unsigned proc = 0; proc < P; ++proc) { + for (const vertex_idx node : supsteplists[step][proc]) { + for (const vertex_idx &pred : G.parents(node)) { + if (schedule->assignedProcessor(pred) != schedule->assignedProcessor(node) + && commSchedule[pred][schedule->assignedProcessor(node)] == UINT_MAX) { commSchedule[pred][schedule->assignedProcessor(node)] = step - schedule->getStaleness(); - commBounds[pred][schedule->assignedProcessor(node)] = std::make_pair(schedule->assignedSuperstep(pred), step - schedule->getStaleness()); + commBounds[pred][schedule->assignedProcessor(node)] + = std::make_pair(schedule->assignedSuperstep(pred), step - schedule->getStaleness()); } + } + } + } + } // overwrite with original comm schedule, wherever possible - const std::map, unsigned int> originalCommSchedule = schedule->getCommunicationSchedule(); - for (vertex_idx node = 0; node < N; ++node) + const std::map, unsigned int> originalCommSchedule + = schedule->getCommunicationSchedule(); + for (vertex_idx node = 0; node < N; ++node) { for (unsigned proc = 0; proc < P; ++proc) { - if (commSchedule[node][proc] == UINT_MAX) + if (commSchedule[node][proc] == UINT_MAX) { continue; + } const auto comm_schedule_key = std::make_tuple(node, schedule->assignedProcessor(node), proc); auto mapIterator = originalCommSchedule.find(comm_schedule_key); if (mapIterator != originalCommSchedule.end()) { unsigned originalStep = mapIterator->second; - if (originalStep >= commBounds[node][proc].first && originalStep <= commBounds[node][proc].second) + if (originalStep >= commBounds[node][proc].first && originalStep <= commBounds[node][proc].second) { commSchedule[node][proc] = originalStep; + } } unsigned step = commSchedule[node][proc]; commSchedSendLists[step][schedule->assignedProcessor(node)].emplace_front(node, proc); - commSchedSendListPointer[node][proc] = - commSchedSendLists[step][schedule->assignedProcessor(node)].begin(); + commSchedSendListPointer[node][proc] = commSchedSendLists[step][schedule->assignedProcessor(node)].begin(); commSchedRecLists[step][proc].emplace_front(node, proc); - commSchedRecListPointer[node][proc] = - commSchedRecLists[step][proc].begin(); + commSchedRecListPointer[node][proc] = commSchedRecLists[step][proc].begin(); - sent[step][schedule->assignedProcessor(node)] += - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc); - received[step][proc] += - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc); + sent[step][schedule->assignedProcessor(node)] + += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc); + received[step][proc] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(schedule->assignedProcessor(node), proc); } + } - for (unsigned step = 0; step < M - 1; ++step) + for (unsigned step = 0; step < M - 1; ++step) { for (unsigned proc = 0; proc < P; ++proc) { commCost[step][proc] = std::max(sent[step][proc], received[step][proc]); commCostPointer[step][proc] = commCostList[step].emplace(commCost[step][proc], proc).first; } + } // set minimum cost - differs for BSP and MaxBSP minimum_cost_per_superstep.clear(); - if (schedule->getStaleness() == 1) + if (schedule->getStaleness() == 1) { minimum_cost_per_superstep.resize(M - 1, 0); - else { + } else { minimum_cost_per_superstep = cost_helpers::compute_max_work_per_step(*schedule); minimum_cost_per_superstep.erase(minimum_cost_per_superstep.begin()); } } // compute cost change incurred by a potential move -template +template int HillClimbingForCommSteps::moveCostChange(const vertex_idx node, const unsigned p, const unsigned step) { const unsigned oldStep = commSchedule[node][p]; const unsigned sourceProc = schedule->assignedProcessor(node); @@ -224,20 +233,27 @@ int HillClimbingForCommSteps::moveCostChange(const vertex_idx node, con // Change at old place auto itr = commCostList[oldStep].rbegin(); - cost_type oldMax = std::max(itr->first * schedule->getInstance().getArchitecture().communicationCosts(), minimum_cost_per_superstep[oldStep]) + schedule->getInstance().getArchitecture().synchronisationCosts(); - cost_type maxSource = - std::max(sent[oldStep][sourceProc] - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p), - received[oldStep][sourceProc]); + cost_type oldMax = std::max(itr->first * schedule->getInstance().getArchitecture().communicationCosts(), + minimum_cost_per_superstep[oldStep]) + + schedule->getInstance().getArchitecture().synchronisationCosts(); + cost_type maxSource = std::max(sent[oldStep][sourceProc] + - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p), + received[oldStep][sourceProc]); cost_type maxTarget = std::max(sent[oldStep][p], - received[oldStep][p] - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); + received[oldStep][p] + - schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); cost_type maxOther = 0; - for (; itr != commCostList[oldStep].rend(); ++itr) + for (; itr != commCostList[oldStep].rend(); ++itr) { if (itr->second != sourceProc && itr->second != p) { maxOther = itr->first; break; } + } - cost_type newMax = std::max(std::max(maxSource, maxTarget), maxOther) * schedule->getInstance().getArchitecture().communicationCosts(); + cost_type newMax + = std::max(std::max(maxSource, maxTarget), maxOther) * schedule->getInstance().getArchitecture().communicationCosts(); cost_type newSync = (newMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; newMax = std::max(newMax, minimum_cost_per_superstep[oldStep]) + newSync; change += static_cast(newMax) - static_cast(oldMax); @@ -246,19 +262,24 @@ int HillClimbingForCommSteps::moveCostChange(const vertex_idx node, con oldMax = commCostList[step].rbegin()->first * schedule->getInstance().getArchitecture().communicationCosts(); cost_type oldSync = (oldMax > 0) ? schedule->getInstance().getArchitecture().synchronisationCosts() : 0; oldMax = std::max(oldMax, minimum_cost_per_superstep[step]); - maxSource = schedule->getInstance().getArchitecture().communicationCosts() * - (sent[step][sourceProc] + schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); - maxTarget = schedule->getInstance().getArchitecture().communicationCosts() * - (received[step][p] + schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); + maxSource = schedule->getInstance().getArchitecture().communicationCosts() + * (sent[step][sourceProc] + + schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); + maxTarget = schedule->getInstance().getArchitecture().communicationCosts() + * (received[step][p] + + schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p)); newMax = std::max(std::max(oldMax, maxSource), maxTarget); - change += static_cast(newMax + schedule->getInstance().getArchitecture().synchronisationCosts()) - static_cast(oldMax + oldSync); + change += static_cast(newMax + schedule->getInstance().getArchitecture().synchronisationCosts()) + - static_cast(oldMax + oldSync); return change; } // execute a move, updating the comm. schedule and the data structures -template +template void HillClimbingForCommSteps::executeMove(vertex_idx node, unsigned p, const unsigned step, const int changeCost) { const unsigned oldStep = commSchedule[node][p]; const unsigned sourceProc = schedule->assignedProcessor(node); @@ -267,31 +288,37 @@ void HillClimbingForCommSteps::executeMove(vertex_idx node, unsigned p, // Old step update if (sent[oldStep][sourceProc] > received[oldStep][sourceProc]) { commCostList[oldStep].erase(commCostPointer[oldStep][sourceProc]); - sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); commCost[oldStep][sourceProc] = std::max(sent[oldStep][sourceProc], received[oldStep][sourceProc]); - commCostPointer[oldStep][sourceProc] = - commCostList[oldStep].emplace(commCost[oldStep][sourceProc], sourceProc).first; - } else - sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + commCostPointer[oldStep][sourceProc] = commCostList[oldStep].emplace(commCost[oldStep][sourceProc], sourceProc).first; + } else { + sent[oldStep][sourceProc] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + } if (received[oldStep][p] > sent[oldStep][p]) { commCostList[oldStep].erase(commCostPointer[oldStep][p]); - received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); commCost[oldStep][p] = std::max(sent[oldStep][p], received[oldStep][p]); commCostPointer[oldStep][p] = commCostList[oldStep].emplace(commCost[oldStep][p], p).first; - } else - received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + } else { + received[oldStep][p] -= schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + } // New step update - sent[step][sourceProc] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + sent[step][sourceProc] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); if (sent[step][sourceProc] > received[step][sourceProc]) { commCostList[step].erase(commCostPointer[step][sourceProc]); commCost[step][sourceProc] = sent[step][sourceProc]; - commCostPointer[step][sourceProc] = - commCostList[step].emplace(commCost[step][sourceProc], sourceProc).first; + commCostPointer[step][sourceProc] = commCostList[step].emplace(commCost[step][sourceProc], sourceProc).first; } - received[step][p] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); + received[step][p] += schedule->getInstance().getComputationalDag().vertex_comm_weight(node) + * schedule->getInstance().getArchitecture().sendCosts(sourceProc, p); if (received[step][p] > sent[step][p]) { commCostList[step].erase(commCostPointer[step][p]); commCost[step][p] = received[step][p]; @@ -312,9 +339,8 @@ void HillClimbingForCommSteps::executeMove(vertex_idx node, unsigned p, } // Single comm. schedule hill climbing step -template +template bool HillClimbingForCommSteps::Improve() { - const unsigned M = static_cast(schedule->numberOfSupersteps()); int bestDiff = 0; vertex_idx bestNode = 0; @@ -325,31 +351,34 @@ bool HillClimbingForCommSteps::Improve() { while (true) { auto itr = commCostList[nextSupstep].rbegin(); - if (itr == commCostList[nextSupstep].crend()) + if (itr == commCostList[nextSupstep].crend()) { break; + } // find maximal comm cost that dominates the h-relation const cost_type commMax = itr->first; if (commMax == 0) { nextSupstep = (nextSupstep + 1) % (M - 1); - if (nextSupstep == startingSupstep) + if (nextSupstep == startingSupstep) { break; - else + } else { continue; + } } // go over all processors that incur this maximal comm cost in superstep nextSupstep for (; itr != commCostList[nextSupstep].rend() && itr->first == commMax; ++itr) { const unsigned maxProc = itr->second; - if (sent[nextSupstep][maxProc] == commMax) + if (sent[nextSupstep][maxProc] == commMax) { for (const std::pair &entry : commSchedSendLists[nextSupstep][maxProc]) { const vertex_idx node = entry.first; const unsigned p = entry.second; // iterate over alternative supsteps to place this communication step for (unsigned step = commBounds[node][p].first; step < commBounds[node][p].second; ++step) { - if (step == commSchedule[node][p]) + if (step == commSchedule[node][p]) { continue; + } const int costDiff = moveCostChange(node, p, step); @@ -364,15 +393,17 @@ bool HillClimbingForCommSteps::Improve() { } } } + } - if (received[nextSupstep][maxProc] == commMax) + if (received[nextSupstep][maxProc] == commMax) { for (const std::pair &entry : commSchedRecLists[nextSupstep][maxProc]) { const vertex_idx node = entry.first; const unsigned p = entry.second; // iterate over alternative supsteps to place this communication step for (unsigned step = commBounds[node][p].first; step < commBounds[node][p].second; ++step) { - if (step == commSchedule[node][p]) + if (step == commSchedule[node][p]) { continue; + } const int costDiff = moveCostChange(node, p, step); @@ -388,24 +419,26 @@ bool HillClimbingForCommSteps::Improve() { } } } + } } nextSupstep = (nextSupstep + 1) % (M - 1); - if (nextSupstep == startingSupstep) + if (nextSupstep == startingSupstep) { break; + } } - if (bestDiff == 0) + if (bestDiff == 0) { return false; + } executeMove(bestNode, bestProc, bestStep, bestDiff); return true; } -template +template void HillClimbingForCommSteps::CreateSupstepLists() { - const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); const Graph_t &G = schedule->getInstance().getComputationalDag(); @@ -416,25 +449,28 @@ void HillClimbingForCommSteps::CreateSupstepLists() { supsteplists.resize(M, std::vector>(P)); const std::vector topOrder = GetTopOrder(G); - for (vertex_idx node : topOrder) + for (vertex_idx node : topOrder) { supsteplists[schedule->assignedSuperstep(node)][schedule->assignedProcessor(node)].push_back(node); + } } -template +template void HillClimbingForCommSteps::ConvertCommSchedule() { const vertex_idx N = static_cast(schedule->getInstance().getComputationalDag().num_vertices()); const unsigned P = schedule->getInstance().getArchitecture().numberOfProcessors(); std::map, unsigned> newCommSchedule; - for (vertex_idx node = 0; node < N; ++node) - for (unsigned proc = 0; proc < P; ++proc) + for (vertex_idx node = 0; node < N; ++node) { + for (unsigned proc = 0; proc < P; ++proc) { if (commSchedule[node][proc] != UINT_MAX) { const auto comm_schedule_key = std::make_tuple(node, schedule->assignedProcessor(node), proc); newCommSchedule[comm_schedule_key] = commSchedule[node][proc]; } + } + } schedule->setCommunicationSchedule(newCommSchedule); } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp index 7d378d1b..f9a921e4 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp @@ -19,6 +19,7 @@ limitations under the License. #pragma once #include +#include #include #include #include @@ -28,25 +29,21 @@ limitations under the License. #include #include -#include - +#include "kl_current_schedule.hpp" #include "osp/auxiliary/misc.hpp" #include "osp/bsp/scheduler/ImprovementScheduler.hpp" -#include "kl_current_schedule.hpp" - #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" // #define KL_PRINT_SCHEDULE #ifdef KL_PRINT_SCHEDULE -#include "file_interactions/DotFileWriter.hpp" +# include "file_interactions/DotFileWriter.hpp" #endif namespace osp { struct kl_base_parameter { - double max_div_best_sol_base_percent = 1.05; double max_div_best_sol_rate_percent = 0.002; @@ -74,9 +71,8 @@ struct kl_base_parameter { unsigned violations_threshold = 0; }; -template +template class kl_base : public ImprovementScheduler, public Ikl_cost_function { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph concept"); static_assert(has_hashable_edge_desc_v, "Graph_t must satisfy the has_hashable_edge_desc concept"); static_assert(is_computational_dag_v, "Graph_t must satisfy the computational_dag concept"); @@ -120,9 +116,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { std::vector unlock; bool unlock_node(VertexType node) { - if (super_locked_nodes.find(node) == super_locked_nodes.end()) { - if (locked_nodes.find(node) == locked_nodes.end()) { return true; } else if (locked_nodes.find(node) != locked_nodes.end() && unlock[node] > 0) { @@ -137,18 +131,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } bool check_node_unlocked(VertexType node) { - - if (super_locked_nodes.find(node) == super_locked_nodes.end() && - locked_nodes.find(node) == locked_nodes.end()) { + if (super_locked_nodes.find(node) == super_locked_nodes.end() && locked_nodes.find(node) == locked_nodes.end()) { return true; } return false; }; void reset_locked_nodes() { - for (const auto &i : locked_nodes) { - unlock[i] = parameters.max_num_unlocks; } @@ -156,23 +146,20 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } bool check_violation_locked() { - - if (current_schedule.current_violations.empty()) + if (current_schedule.current_violations.empty()) { return false; + } for (auto &edge : current_schedule.current_violations) { - const auto &source_v = source(edge, current_schedule.instance->getComputationalDag()); const auto &target_v = target(edge, current_schedule.instance->getComputationalDag()); - if (locked_nodes.find(source_v) == locked_nodes.end() || - locked_nodes.find(target_v) == locked_nodes.end()) { + if (locked_nodes.find(source_v) == locked_nodes.end() || locked_nodes.find(target_v) == locked_nodes.end()) { return false; } bool abort = false; if (locked_nodes.find(source_v) != locked_nodes.end()) { - if (unlock_node(source_v)) { nodes_to_update.insert(source_v); node_selection.insert(source_v); @@ -182,7 +169,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (locked_nodes.find(target_v) != locked_nodes.end()) { - if (unlock_node(target_v)) { nodes_to_update.insert(target_v); node_selection.insert(target_v); @@ -199,13 +185,11 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void reset_gain_heap() { - max_gain_heap.clear(); node_heap_handles.clear(); } virtual void initialize_datastructures() { - #ifdef KL_DEBUG std::cout << "KLBase initialize datastructures" << std::endl; #endif @@ -225,31 +209,23 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { std::unordered_set nodes_to_update; void compute_nodes_to_update(kl_move move) { - nodes_to_update.clear(); for (const auto &target : current_schedule.instance->getComputationalDag().children(move.node)) { - - if (node_selection.find(target) != node_selection.end() && - locked_nodes.find(target) == locked_nodes.end() && - super_locked_nodes.find(target) == super_locked_nodes.end()) { - + if (node_selection.find(target) != node_selection.end() && locked_nodes.find(target) == locked_nodes.end() + && super_locked_nodes.find(target) == super_locked_nodes.end()) { nodes_to_update.insert(target); } } for (const auto &source : current_schedule.instance->getComputationalDag().parents(move.node)) { - - if (node_selection.find(source) != node_selection.end() && - locked_nodes.find(source) == locked_nodes.end() && - super_locked_nodes.find(source) == super_locked_nodes.end()) { - + if (node_selection.find(source) != node_selection.end() && locked_nodes.find(source) == locked_nodes.end() + && super_locked_nodes.find(source) == super_locked_nodes.end()) { nodes_to_update.insert(source); } } - const unsigned start_step = - std::min(move.from_step, move.to_step) == 0 ? 0 : std::min(move.from_step, move.to_step) - 1; + const unsigned start_step = std::min(move.from_step, move.to_step) == 0 ? 0 : std::min(move.from_step, move.to_step) - 1; const unsigned end_step = std::min(current_schedule.num_steps(), std::max(move.from_step, move.to_step) + 2); #ifdef KL_DEBUG @@ -257,15 +233,10 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #endif for (unsigned step = start_step; step < end_step; step++) { - for (unsigned proc = 0; proc < num_procs; proc++) { - for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) { - - if (node_selection.find(node) != node_selection.end() && - locked_nodes.find(node) == locked_nodes.end() && - super_locked_nodes.find(node) == super_locked_nodes.end()) { - + if (node_selection.find(node) != node_selection.end() && locked_nodes.find(node) == locked_nodes.end() + && super_locked_nodes.find(node) == super_locked_nodes.end()) { nodes_to_update.insert(node); } } @@ -274,7 +245,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void initialize_gain_heap(const std::unordered_set &nodes) { - reset_gain_heap(); for (const auto &node : nodes) { @@ -284,14 +254,10 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void initialize_gain_heap_unlocked_nodes(const std::unordered_set &nodes) { - reset_gain_heap(); for (const auto &node : nodes) { - - if (locked_nodes.find(node) == locked_nodes.end() && - super_locked_nodes.find(node) == super_locked_nodes.end()) { - + if (locked_nodes.find(node) == locked_nodes.end() && super_locked_nodes.find(node) == super_locked_nodes.end()) { compute_node_gain(node); compute_max_gain_insert_or_update_heap(node); } @@ -299,14 +265,11 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void compute_node_gain(VertexType node) { - const unsigned ¤t_proc = current_schedule.vector_schedule.assignedProcessor(node); const unsigned ¤t_step = current_schedule.vector_schedule.assignedSuperstep(node); for (unsigned new_proc = 0; new_proc < num_procs; new_proc++) { - if (current_schedule.instance->isCompatible(node, new_proc)) { - node_gains[node][new_proc][0] = 0.0; node_gains[node][new_proc][1] = 0.0; node_gains[node][new_proc][2] = 0.0; @@ -319,7 +282,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { compute_work_gain(node, current_step, current_proc, new_proc); if constexpr (current_schedule.use_memory_constraint) { - if (not current_schedule.memory_constraint.can_move( node, new_proc, current_schedule.vector_schedule.assignedSuperstep(node))) { node_gains[node][new_proc][1] = std::numeric_limits::lowest(); @@ -340,7 +302,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } else { - node_gains[node][new_proc][0] = std::numeric_limits::lowest(); node_gains[node][new_proc][1] = std::numeric_limits::lowest(); node_gains[node][new_proc][2] = std::numeric_limits::lowest(); @@ -349,7 +310,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } double compute_max_gain_insert_or_update_heap(VertexType node) { - double node_max_gain = std::numeric_limits::lowest(); double node_change_in_cost = 0; unsigned node_best_step = 0; @@ -360,14 +320,11 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { unsigned best_step = 0; for (unsigned proc = 0; proc < num_procs; proc++) { - int rand_count = 0; - if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 && - current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - + if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 + && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { if (node_gains[node][proc][0] > node_gains[node][proc][1]) { - if (node_gains[node][proc][0] > node_gains[node][proc][2]) { proc_max = node_gains[node][proc][0]; proc_change_in_cost = node_change_in_costs[node][proc][0]; @@ -380,45 +337,36 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } else { - if (node_gains[node][proc][1] > node_gains[node][proc][2]) { - proc_max = node_gains[node][proc][1]; proc_change_in_cost = node_change_in_costs[node][proc][1]; best_step = 1; } else { - proc_max = node_gains[node][proc][2]; proc_change_in_cost = node_change_in_costs[node][proc][2]; best_step = 2; } } - } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 && - current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - + } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 + && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { if (node_gains[node][proc][2] > node_gains[node][proc][1]) { - proc_max = node_gains[node][proc][2]; proc_change_in_cost = node_change_in_costs[node][proc][2]; best_step = 2; } else { - proc_max = node_gains[node][proc][1]; proc_change_in_cost = node_change_in_costs[node][proc][1]; best_step = 1; } - } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 && - current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { - + } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 + && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { if (node_gains[node][proc][1] > node_gains[node][proc][0]) { - proc_max = node_gains[node][proc][1]; proc_change_in_cost = node_change_in_costs[node][proc][1]; best_step = 1; } else { - proc_max = node_gains[node][proc][0]; proc_change_in_cost = node_change_in_costs[node][proc][0]; best_step = 0; @@ -430,14 +378,13 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (node_max_gain < proc_max) { - node_max_gain = proc_max; node_change_in_cost = proc_change_in_cost; node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1; node_best_proc = proc; rand_count = 0; - } else if (node_max_gain <= proc_max) { // only == + } else if (node_max_gain <= proc_max) { // only == if (rand() % (2 + rand_count) == 0) { node_max_gain = proc_max; @@ -450,26 +397,27 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (node_heap_handles.find(node) != node_heap_handles.end()) { - (*node_heap_handles[node]).to_proc = node_best_proc; (*node_heap_handles[node]).to_step = node_best_step; (*node_heap_handles[node]).change_in_cost = node_change_in_cost; if ((*node_heap_handles[node]).gain >= node_max_gain) { - (*node_heap_handles[node]).gain = node_max_gain; max_gain_heap.update(node_heap_handles[node]); } } else { - // if (node_max_gain < parameters.gain_threshold && node_change_in_cost > // parameters.change_in_cost_threshold) // return node_max_gain; - kl_move move( - node, node_max_gain, node_change_in_cost, current_schedule.vector_schedule.assignedProcessor(node), - current_schedule.vector_schedule.assignedSuperstep(node), node_best_proc, node_best_step); + kl_move move(node, + node_max_gain, + node_change_in_cost, + current_schedule.vector_schedule.assignedProcessor(node), + current_schedule.vector_schedule.assignedSuperstep(node), + node_best_proc, + node_best_step); node_heap_handles[node] = max_gain_heap.push(move); } @@ -477,57 +425,46 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void compute_work_gain(VertexType node, unsigned current_step, unsigned current_proc, unsigned new_proc) { - if (current_proc == new_proc) { - node_gains[node][current_proc][1] = std::numeric_limits::lowest(); } else { - - if (current_schedule.step_max_work[current_step] == - current_schedule.step_processor_work[current_step][current_proc] && - current_schedule.step_processor_work[current_step][current_proc] > - current_schedule.step_second_max_work[current_step]) { - + if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc] + && current_schedule.step_processor_work[current_step][current_proc] + > current_schedule.step_second_max_work[current_step]) { // new max - const double new_max_work = - std::max(current_schedule.step_processor_work[current_step][current_proc] - - current_schedule.instance->getComputationalDag().vertex_work_weight(node), - current_schedule.step_second_max_work[current_step]); - - if (current_schedule.step_processor_work[current_step][new_proc] + - current_schedule.instance->getComputationalDag().vertex_work_weight(node) > - new_max_work) { - - const double gain = - static_cast(current_schedule.step_max_work[current_step]) - - (static_cast(current_schedule.step_processor_work[current_step][new_proc]) + - static_cast( - current_schedule.instance->getComputationalDag().vertex_work_weight(node))); + const double new_max_work + = std::max(current_schedule.step_processor_work[current_step][current_proc] + - current_schedule.instance->getComputationalDag().vertex_work_weight(node), + current_schedule.step_second_max_work[current_step]); + + if (current_schedule.step_processor_work[current_step][new_proc] + + current_schedule.instance->getComputationalDag().vertex_work_weight(node) + > new_max_work) { + const double gain + = static_cast(current_schedule.step_max_work[current_step]) + - (static_cast(current_schedule.step_processor_work[current_step][new_proc]) + + static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node))); node_gains[node][new_proc][1] += gain; node_change_in_costs[node][new_proc][1] -= gain; } else { - - const double gain = static_cast(current_schedule.step_max_work[current_step]) - - static_cast(new_max_work); + const double gain + = static_cast(current_schedule.step_max_work[current_step]) - static_cast(new_max_work); node_gains[node][new_proc][1] += gain; node_change_in_costs[node][new_proc][1] -= gain; } } else { - - if (current_schedule.step_max_work[current_step] < - current_schedule.instance->getComputationalDag().vertex_work_weight(node) + - current_schedule.step_processor_work[current_step][new_proc]) { - - const double gain = - (static_cast( - current_schedule.instance->getComputationalDag().vertex_work_weight(node)) + - static_cast(current_schedule.step_processor_work[current_step][new_proc]) - - static_cast(current_schedule.step_max_work[current_step])); + if (current_schedule.step_max_work[current_step] + < current_schedule.instance->getComputationalDag().vertex_work_weight(node) + + current_schedule.step_processor_work[current_step][new_proc]) { + const double gain + = (static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) + + static_cast(current_schedule.step_processor_work[current_step][new_proc]) + - static_cast(current_schedule.step_max_work[current_step])); node_gains[node][new_proc][1] -= gain; node_change_in_costs[node][new_proc][1] += gain; @@ -536,38 +473,31 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (current_step > 0) { - - if (current_schedule.step_max_work[current_step - 1] < - current_schedule.step_processor_work[current_step - 1][new_proc] + - current_schedule.instance->getComputationalDag().vertex_work_weight(node)) { - - const double gain = - static_cast(current_schedule.step_processor_work[current_step - 1][new_proc]) + - static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) - - static_cast(current_schedule.step_max_work[current_step - 1]); + if (current_schedule.step_max_work[current_step - 1] + < current_schedule.step_processor_work[current_step - 1][new_proc] + + current_schedule.instance->getComputationalDag().vertex_work_weight(node)) { + const double gain = static_cast(current_schedule.step_processor_work[current_step - 1][new_proc]) + + static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) + - static_cast(current_schedule.step_max_work[current_step - 1]); node_gains[node][new_proc][0] -= gain; node_change_in_costs[node][new_proc][0] += gain; } - if (current_schedule.step_max_work[current_step] == - current_schedule.step_processor_work[current_step][current_proc] && - current_schedule.step_processor_work[current_step][current_proc] > - current_schedule.step_second_max_work[current_step]) { - - if (current_schedule.step_max_work[current_step] - - current_schedule.instance->getComputationalDag().vertex_work_weight(node) > - current_schedule.step_second_max_work[current_step]) { - + if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc] + && current_schedule.step_processor_work[current_step][current_proc] + > current_schedule.step_second_max_work[current_step]) { + if (current_schedule.step_max_work[current_step] + - current_schedule.instance->getComputationalDag().vertex_work_weight(node) + > current_schedule.step_second_max_work[current_step]) { const double gain = current_schedule.instance->getComputationalDag().vertex_work_weight(node); node_gains[node][new_proc][0] += gain; node_change_in_costs[node][new_proc][0] -= gain; } else { - - const double gain = current_schedule.step_max_work[current_step] - - current_schedule.step_second_max_work[current_step]; + const double gain + = current_schedule.step_max_work[current_step] - current_schedule.step_second_max_work[current_step]; node_gains[node][new_proc][0] += gain; node_change_in_costs[node][new_proc][0] -= gain; @@ -575,73 +505,59 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } else { - node_gains[node][new_proc][0] = std::numeric_limits::lowest(); } if (current_step < current_schedule.num_steps() - 1) { - - if (current_schedule.step_max_work[current_step + 1] < - current_schedule.step_processor_work[current_step + 1][new_proc] + - current_schedule.instance->getComputationalDag().vertex_work_weight(node)) { - - const double gain = - static_cast(current_schedule.step_processor_work[current_step + 1][new_proc]) + - static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) - - static_cast(current_schedule.step_max_work[current_step + 1]); + if (current_schedule.step_max_work[current_step + 1] + < current_schedule.step_processor_work[current_step + 1][new_proc] + + current_schedule.instance->getComputationalDag().vertex_work_weight(node)) { + const double gain = static_cast(current_schedule.step_processor_work[current_step + 1][new_proc]) + + static_cast(current_schedule.instance->getComputationalDag().vertex_work_weight(node)) + - static_cast(current_schedule.step_max_work[current_step + 1]); node_gains[node][new_proc][2] -= gain; node_change_in_costs[node][new_proc][2] += gain; } - if (current_schedule.step_max_work[current_step] == - current_schedule.step_processor_work[current_step][current_proc] && - current_schedule.step_processor_work[current_step][current_proc] > - current_schedule.step_second_max_work[current_step]) { - - if ((current_schedule.step_max_work[current_step] - - current_schedule.instance->getComputationalDag().vertex_work_weight(node)) > - current_schedule.step_second_max_work[current_step]) { - + if (current_schedule.step_max_work[current_step] == current_schedule.step_processor_work[current_step][current_proc] + && current_schedule.step_processor_work[current_step][current_proc] + > current_schedule.step_second_max_work[current_step]) { + if ((current_schedule.step_max_work[current_step] + - current_schedule.instance->getComputationalDag().vertex_work_weight(node)) + > current_schedule.step_second_max_work[current_step]) { const double gain = current_schedule.instance->getComputationalDag().vertex_work_weight(node); node_gains[node][new_proc][2] += gain; node_change_in_costs[node][new_proc][2] -= gain; } else { - - const double gain = current_schedule.step_max_work[current_step] - - current_schedule.step_second_max_work[current_step]; + const double gain + = current_schedule.step_max_work[current_step] - current_schedule.step_second_max_work[current_step]; node_gains[node][new_proc][2] += gain; node_change_in_costs[node][new_proc][2] -= gain; } } } else { - node_gains[node][new_proc][2] = std::numeric_limits::lowest(); } } - virtual void compute_comm_gain(vertex_idx_t node, unsigned current_step, unsigned current_proc, - unsigned new_proc) = 0; + virtual void compute_comm_gain(vertex_idx_t node, unsigned current_step, unsigned current_proc, unsigned new_proc) = 0; void update_node_gains(const std::unordered_set &nodes) { - for (const auto &node : nodes) { - compute_node_gain(node); compute_max_gain_insert_or_update_heap(node); } }; kl_move find_best_move() { - const unsigned local_max = 50; std::vector max_nodes(local_max); unsigned count = 0; for (auto iter = max_gain_heap.ordered_begin(); iter != max_gain_heap.ordered_end(); ++iter) { - if (iter->gain >= max_gain_heap.top().gain && count < local_max) { max_nodes[count] = (iter->node); count++; @@ -663,7 +579,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } kl_move compute_best_move(VertexType node) { - double node_max_gain = std::numeric_limits::lowest(); double node_change_in_cost = 0; unsigned node_best_step = 0; @@ -673,14 +588,11 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { double proc_max = 0; unsigned best_step = 0; for (unsigned proc = 0; proc < num_procs; proc++) { - unsigned rand_count = 0; - if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 && - current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - + if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 + && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { if (node_gains[node][proc][0] > node_gains[node][proc][1]) { - if (node_gains[node][proc][0] > node_gains[node][proc][2]) { proc_max = node_gains[node][proc][0]; proc_change_in_cost = node_change_in_costs[node][proc][0]; @@ -693,45 +605,36 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } else { - if (node_gains[node][proc][1] > node_gains[node][proc][2]) { - proc_max = node_gains[node][proc][1]; proc_change_in_cost = node_change_in_costs[node][proc][1]; best_step = 1; } else { - proc_max = node_gains[node][proc][2]; proc_change_in_cost = node_change_in_costs[node][proc][2]; best_step = 2; } } - } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 && - current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - + } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 + && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { if (node_gains[node][proc][2] > node_gains[node][proc][1]) { - proc_max = node_gains[node][proc][2]; proc_change_in_cost = node_change_in_costs[node][proc][2]; best_step = 2; } else { - proc_max = node_gains[node][proc][1]; proc_change_in_cost = node_change_in_costs[node][proc][1]; best_step = 1; } - } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 && - current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { - + } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 + && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { if (node_gains[node][proc][1] > node_gains[node][proc][0]) { - proc_max = node_gains[node][proc][1]; proc_change_in_cost = node_change_in_costs[node][proc][1]; best_step = 1; } else { - proc_max = node_gains[node][proc][0]; proc_change_in_cost = node_change_in_costs[node][proc][0]; best_step = 0; @@ -743,7 +646,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (node_max_gain < proc_max) { - node_max_gain = proc_max; node_change_in_cost = proc_change_in_cost; node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1; @@ -751,7 +653,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { rand_count = 0; } else if (node_max_gain <= proc_max) { - if (rand() % (2 + rand_count) == 0) { node_max_gain = proc_max; node_change_in_cost = proc_change_in_cost; @@ -762,13 +663,16 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } - return kl_move( - node, node_max_gain, node_change_in_cost, current_schedule.vector_schedule.assignedProcessor(node), - current_schedule.vector_schedule.assignedSuperstep(node), node_best_proc, node_best_step); + return kl_move(node, + node_max_gain, + node_change_in_cost, + current_schedule.vector_schedule.assignedProcessor(node), + current_schedule.vector_schedule.assignedSuperstep(node), + node_best_proc, + node_best_step); } kl_move best_move_change_superstep(VertexType node) { - double node_max_gain = std::numeric_limits::lowest(); double node_change_in_cost = 0; unsigned node_best_step = 0; @@ -778,10 +682,8 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { double proc_max = 0; unsigned best_step = 0; for (unsigned proc = 0; proc < num_procs; proc++) { - - if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 && - current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - + if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 + && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { if (node_gains[node][proc][0] > node_gains[node][proc][2]) { proc_max = node_gains[node][proc][0]; proc_change_in_cost = node_change_in_costs[node][proc][0]; @@ -793,16 +695,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { best_step = 2; } - } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 && - current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { - + } else if (current_schedule.vector_schedule.assignedSuperstep(node) == 0 + && current_schedule.vector_schedule.assignedSuperstep(node) < current_schedule.num_steps() - 1) { proc_max = node_gains[node][proc][2]; proc_change_in_cost = node_change_in_costs[node][proc][2]; best_step = 2; - } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 && - current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { - + } else if (current_schedule.vector_schedule.assignedSuperstep(node) > 0 + && current_schedule.vector_schedule.assignedSuperstep(node) == current_schedule.num_steps() - 1) { proc_max = node_gains[node][proc][0]; proc_change_in_cost = node_change_in_costs[node][proc][0]; best_step = 0; @@ -812,7 +712,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (node_max_gain < proc_max) { - node_max_gain = proc_max; node_change_in_cost = proc_change_in_cost; node_best_step = current_schedule.vector_schedule.assignedSuperstep(node) + best_step - 1; @@ -820,15 +719,17 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } - return kl_move( - node, node_max_gain, node_change_in_cost, current_schedule.vector_schedule.assignedProcessor(node), - current_schedule.vector_schedule.assignedSuperstep(node), node_best_proc, node_best_step); + return kl_move(node, + node_max_gain, + node_change_in_cost, + current_schedule.vector_schedule.assignedProcessor(node), + current_schedule.vector_schedule.assignedSuperstep(node), + node_best_proc, + node_best_step); } void save_best_schedule(const IBspSchedule &schedule) { - for (const auto &node : current_schedule.instance->vertices()) { - best_schedule->setAssignedProcessor(node, schedule.assignedProcessor(node)); best_schedule->setAssignedSuperstep(node, schedule.assignedSuperstep(node)); } @@ -843,12 +744,11 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { std::unordered_set node_selection; void select_nodes() { - if (parameters.select_all_nodes) { - for (const auto &node : current_schedule.instance->vertices()) { - if (super_locked_nodes.find(node) == super_locked_nodes.end()) + if (super_locked_nodes.find(node) == super_locked_nodes.end()) { node_selection.insert(node); + } } } else { @@ -857,28 +757,22 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } virtual void select_nodes_comm() { - for (const auto &node : current_schedule.instance->vertices()) { - if (super_locked_nodes.find(node) != super_locked_nodes.end()) { continue; } for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) { - - if (current_schedule.vector_schedule.assignedProcessor(node) != - current_schedule.vector_schedule.assignedProcessor(source)) { - + if (current_schedule.vector_schedule.assignedProcessor(node) + != current_schedule.vector_schedule.assignedProcessor(source)) { node_selection.insert(node); break; } } for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - - if (current_schedule.vector_schedule.assignedProcessor(node) != - current_schedule.vector_schedule.assignedProcessor(target)) { - + if (current_schedule.vector_schedule.assignedProcessor(node) + != current_schedule.vector_schedule.assignedProcessor(target)) { node_selection.insert(node); break; } @@ -887,11 +781,9 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void select_nodes_threshold(std::size_t threshold) { - std::uniform_int_distribution> dis(0, num_nodes - 1); while (node_selection.size() < threshold) { - auto node = dis(gen); if (super_locked_nodes.find(node) == super_locked_nodes.end()) { @@ -901,28 +793,25 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void select_nodes_permutation_threshold(std::size_t threshold) { - std::vector permutation(num_nodes); std::iota(std::begin(permutation), std::end(permutation), 0); std::shuffle(permutation.begin(), permutation.end(), gen); for (std::size_t i = 0; i < threshold; i++) { - - if (super_locked_nodes.find(permutation[i]) == super_locked_nodes.end()) + if (super_locked_nodes.find(permutation[i]) == super_locked_nodes.end()) { node_selection.insert(permutation[i]); + } } } void select_nodes_violations() { - if (current_schedule.current_violations.empty()) { select_nodes(); return; } for (const auto &edge : current_schedule.current_violations) { - const auto &source_v = source(edge, current_schedule.instance->getComputationalDag()); const auto &target_v = target(edge, current_schedule.instance->getComputationalDag()); @@ -956,9 +845,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void select_nodes_conseque_max_work(bool do_not_select_super_locked_nodes = false) { - if (step_selection_epoch_counter > parameters.max_step_selection_epochs) { - #ifdef KL_DEBUG std::cout << "step selection epoch counter exceeded. conseque max work" << std::endl; #endif @@ -973,7 +860,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { unsigned second_max_step = 0; for (unsigned proc = 0; proc < num_procs; proc++) { - if (current_schedule.step_processor_work[step_selection_counter][proc] > max_work_step) { second_max_work_step = max_work_step; second_max_step = max_step; @@ -986,35 +872,31 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } - if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].size() < - parameters.selection_threshold * .66) { - - node_selection.insert( - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].begin(), - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].end()); + if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].size() + < parameters.selection_threshold * .66) { + node_selection.insert(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].begin(), + current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].end()); } else { - std::sample(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].begin(), current_schedule.set_schedule.step_processor_vertices[step_selection_counter][max_step].end(), std::inserter(node_selection, node_selection.end()), - static_cast(std::round(parameters.selection_threshold * .66)), gen); + static_cast(std::round(parameters.selection_threshold * .66)), + gen); } - if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].size() < - parameters.selection_threshold * .33) { - + if (current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].size() + < parameters.selection_threshold * .33) { node_selection.insert( current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].begin(), current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].end()); } else { - - std::sample( - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].begin(), - current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].end(), - std::inserter(node_selection, node_selection.end()), - static_cast(std::round(parameters.selection_threshold * .33)), gen); + std::sample(current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].begin(), + current_schedule.set_schedule.step_processor_vertices[step_selection_counter][second_max_step].end(), + std::inserter(node_selection, node_selection.end()), + static_cast(std::round(parameters.selection_threshold * .33)), + gen); } if (do_not_select_super_locked_nodes) { @@ -1037,9 +919,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void select_nodes_check_remove_superstep() { - if (step_selection_epoch_counter > parameters.max_step_selection_epochs) { - #ifdef KL_DEBUG std::cout << "step selection epoch counter exceeded, remove supersteps" << std::endl; #endif @@ -1048,24 +928,18 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { return; } - for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps(); - step_to_remove++) { - + for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps(); step_to_remove++) { #ifdef KL_DEBUG - std::cout << "checking step to remove " << step_to_remove << " / " << current_schedule.num_steps() - << std::endl; + std::cout << "checking step to remove " << step_to_remove << " / " << current_schedule.num_steps() << std::endl; #endif if (check_remove_superstep(step_to_remove)) { - #ifdef KL_DEBUG std::cout << "trying to remove superstep " << step_to_remove << std::endl; #endif if (scatter_nodes_remove_superstep(step_to_remove)) { - for (unsigned proc = 0; proc < num_procs; proc++) { - if (step_to_remove < current_schedule.num_steps()) { node_selection.insert( current_schedule.set_schedule.step_processor_vertices[step_to_remove][proc].begin(), @@ -1114,7 +988,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { bool reset_superstep = false; virtual bool check_remove_superstep(unsigned step) { - if (current_schedule.num_steps() <= 2) { return false; } @@ -1122,7 +995,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { v_workw_t total_work = 0; for (unsigned proc = 0; proc < num_procs; proc++) { - total_work += current_schedule.step_processor_work[step][proc]; } @@ -1133,7 +1005,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } bool scatter_nodes_remove_superstep(unsigned step) { - assert(step < current_schedule.num_steps()); std::vector> moves; @@ -1142,7 +1013,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { for (unsigned proc = 0; proc < num_procs; proc++) { for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) { - compute_node_gain(node); moves.push_back(best_move_change_superstep(node)); @@ -1152,10 +1022,8 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if constexpr (current_schedule.use_memory_constraint) { - current_schedule.memory_constraint.apply_move(node, proc, step, moves.back().to_proc, - moves.back().to_step); + current_schedule.memory_constraint.apply_move(node, proc, step, moves.back().to_proc, moves.back().to_step); } - } if (abort) { @@ -1168,8 +1036,9 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG BspSchedule tmp_schedule(current_schedule.set_schedule); - if (not tmp_schedule.satisfiesMemoryConstraints()) + if (not tmp_schedule.satisfiesMemoryConstraints()) { std::cout << "Mem const violated" << std::endl; + } #endif return false; @@ -1180,10 +1049,8 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } for (const auto &move : moves) { - #ifdef KL_DEBUG - std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step - << std::endl; + std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step << std::endl; #endif current_schedule.vector_schedule.setAssignedSuperstep(move.node, move.to_step); @@ -1195,17 +1062,16 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG BspSchedule tmp_schedule(current_schedule.set_schedule); - if (not tmp_schedule.satisfiesMemoryConstraints()) + if (not tmp_schedule.satisfiesMemoryConstraints()) { std::cout << "Mem const violated" << std::endl; + } #endif return true; } void select_nodes_check_reset_superstep() { - if (step_selection_epoch_counter > parameters.max_step_selection_epochs) { - #ifdef KL_DEBUG std::cout << "step selection epoch counter exceeded, reset supersteps" << std::endl; #endif @@ -1214,24 +1080,18 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { return; } - for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps(); - step_to_remove++) { - + for (unsigned step_to_remove = step_selection_counter; step_to_remove < current_schedule.num_steps(); step_to_remove++) { #ifdef KL_DEBUG - std::cout << "checking step to reset " << step_to_remove << " / " << current_schedule.num_steps() - << std::endl; + std::cout << "checking step to reset " << step_to_remove << " / " << current_schedule.num_steps() << std::endl; #endif if (check_reset_superstep(step_to_remove)) { - #ifdef KL_DEBUG std::cout << "trying to reset superstep " << step_to_remove << std::endl; #endif if (scatter_nodes_reset_superstep(step_to_remove)) { - for (unsigned proc = 0; proc < num_procs; proc++) { - if (step_to_remove < current_schedule.num_steps() - 1) { node_selection.insert( current_schedule.set_schedule.step_processor_vertices[step_to_remove + 1][proc].begin(), @@ -1273,7 +1133,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } virtual bool check_reset_superstep(unsigned step) { - if (current_schedule.num_steps() <= 2) { return false; } @@ -1291,14 +1150,13 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG std::cout << " avg " - << static_cast(total_work) / - static_cast(current_schedule.instance->numberOfProcessors()) + << static_cast(total_work) / static_cast(current_schedule.instance->numberOfProcessors()) << " max " << max_total_work << " min " << min_total_work << std::endl; #endif - if (static_cast(total_work) / static_cast(current_schedule.instance->numberOfProcessors()) - - static_cast(min_total_work) > - 0.1 * static_cast(min_total_work)) { + if (static_cast(total_work) / static_cast(current_schedule.instance->numberOfProcessors()) + - static_cast(min_total_work) + > 0.1 * static_cast(min_total_work)) { return true; } @@ -1306,7 +1164,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } bool scatter_nodes_reset_superstep(unsigned step) { - assert(step < current_schedule.num_steps()); std::vector> moves; @@ -1315,7 +1172,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { for (unsigned proc = 0; proc < num_procs; proc++) { for (const auto &node : current_schedule.set_schedule.step_processor_vertices[step][proc]) { - compute_node_gain(node); moves.push_back(best_move_change_superstep(node)); @@ -1325,9 +1181,9 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if constexpr (current_schedule.use_memory_constraint) { - current_schedule.memory_constraint.apply_forward_move(node, proc, step, moves.back().to_proc, - moves.back().to_step); - } + current_schedule.memory_constraint.apply_forward_move( + node, proc, step, moves.back().to_proc, moves.back().to_step); + } } if (abort) { @@ -1336,7 +1192,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (abort) { - current_schedule.recompute_neighboring_supersteps(step); return false; } @@ -1346,10 +1201,8 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } for (const auto &move : moves) { - #ifdef KL_DEBUG - std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step - << std::endl; + std::cout << "scatter node " << move.node << " to proc " << move.to_proc << " to step " << move.to_step << std::endl; #endif current_schedule.vector_schedule.setAssignedSuperstep(move.node, move.to_step); @@ -1363,20 +1216,15 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void select_unlock_neighbors(VertexType node) { - for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - if (check_node_unlocked(target)) { - node_selection.insert(target); nodes_to_update.insert(target); } } for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) { - if (check_node_unlocked(source)) { - node_selection.insert(source); nodes_to_update.insert(source); } @@ -1384,47 +1232,39 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } void set_parameters() { - if (num_nodes < 250) { - parameters.max_outer_iterations = 300; parameters.select_all_nodes = true; parameters.selection_threshold = num_nodes; } else if (num_nodes < 1000) { - parameters.max_outer_iterations = static_cast(num_nodes / 2); parameters.select_all_nodes = true; parameters.selection_threshold = num_nodes; } else if (num_nodes < 5000) { - parameters.max_outer_iterations = 4 * static_cast(std::sqrt(num_nodes)); parameters.selection_threshold = num_nodes / 3; } else if (num_nodes < 10000) { - parameters.max_outer_iterations = 3 * static_cast(std::sqrt(num_nodes)); parameters.selection_threshold = num_nodes / 3; } else if (num_nodes < 50000) { - parameters.max_outer_iterations = static_cast(std::sqrt(num_nodes)); parameters.selection_threshold = num_nodes / 5; } else if (num_nodes < 100000) { - parameters.max_outer_iterations = 2 * static_cast(std::log(num_nodes)); parameters.selection_threshold = num_nodes / 10; } else { - parameters.max_outer_iterations = static_cast(std::min(10000.0, std::log(num_nodes))); parameters.selection_threshold = num_nodes / 10; @@ -1444,15 +1284,15 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } #ifdef KL_DEBUG - if (parameters.select_all_nodes) + if (parameters.select_all_nodes) { std::cout << "KLBase set parameters, select all nodes" << std::endl; - else + } else { std::cout << "KLBase set parameters, selection threshold: " << parameters.selection_threshold << std::endl; + } #endif } virtual void cleanup_datastructures() { - node_change_in_costs.clear(); node_gains.clear(); @@ -1472,7 +1312,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } bool run_local_search_without_violations() { - penalty = std::numeric_limits::max() * .24; double initial_costs = current_schedule.current_cost; @@ -1497,42 +1336,37 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { unsigned inner_counter = 0; while (failed_branches < 3 && inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) { - inner_counter++; const double iter_costs = current_schedule.current_cost; - kl_move best_move = find_best_move(); // O(log n) + kl_move best_move = find_best_move(); // O(log n) if (best_move.gain < -std::numeric_limits::max() * .25) { continue; } - current_schedule.apply_move(best_move); // O(p + log n) + current_schedule.apply_move(best_move); // O(p + log n) locked_nodes.insert(best_move.node); #ifdef KL_DEBUG double tmp_costs = current_schedule.current_cost; if (tmp_costs != compute_current_costs()) { - - std::cout << "current costs: " << current_schedule.current_cost - << " best move gain: " << best_move.gain - << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs - << std::endl; + std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain + << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl; std::cout << "! costs not equal " << std::endl; } #endif if (best_move.change_in_cost > 0 && current_schedule.current_feasible) { - if (best_schedule_costs > iter_costs) { #ifdef KL_DEBUG std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl; #endif best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) + save_best_schedule(current_schedule.vector_schedule); // O(n) reverse_move_best_schedule(best_move); } } @@ -1545,8 +1379,8 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost " - << best_move.change_in_cost << " from step " << best_move.from_step << " to " - << best_move.to_step << ", from proc " << best_move.from_proc << " to " << best_move.to_proc + << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step + << ", from proc " << best_move.from_proc << " to " << best_move.to_proc << " violations: " << current_schedule.current_violations.size() << " cost " << current_schedule.current_cost << std::endl; #endif @@ -1554,11 +1388,10 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { // if (not current_schedule.current_feasible) { if (current_schedule.current_cost > (1.04 + outer_counter * 0.002) * best_schedule_costs) { - #ifdef KL_DEBUG std::cout << "current cost " << current_schedule.current_cost - << " too far away from best schedule costs: " << best_schedule_costs - << " rollback to best schedule" << std::endl; + << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule" + << std::endl; #endif current_schedule.set_current_schedule(*best_schedule); @@ -1570,13 +1403,12 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } //} - } // while + } // while #ifdef KL_DEBUG std::cout << "end inner loop current cost: " << current_schedule.current_cost << " with " - << current_schedule.current_violations.size() - << " violation, best sol cost: " << best_schedule_costs << " with " - << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" + << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs + << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" << parameters.max_outer_iterations << std::endl; #endif @@ -1602,7 +1434,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #endif if (compute_with_time_limit) { - auto finish_time = std::chrono::high_resolution_clock::now(); auto duration = std::chrono::duration_cast(finish_time - start_time).count(); @@ -1611,18 +1442,18 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } - } // for + } // for cleanup_datastructures(); - if (initial_costs > current_schedule.current_cost) + if (initial_costs > current_schedule.current_cost) { return true; - else + } else { return false; + } } bool run_local_search_simple() { - set_initial_reward_penalty(); const double initial_costs = current_schedule.current_cost; @@ -1653,15 +1484,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { unsigned inner_counter = 0; - while (failed_branches < parameters.max_num_failed_branches && - inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) { - + while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations + && max_gain_heap.size() > 0) { inner_counter++; const bool iter_feasible = current_schedule.current_feasible; const double iter_costs = current_schedule.current_cost; - kl_move best_move = find_best_move(); // O(log n) + kl_move best_move = find_best_move(); // O(log n) if (best_move.gain < -std::numeric_limits::max() * .25) { #ifdef KL_DEBUG @@ -1672,13 +1502,13 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost " - << best_move.change_in_cost << " from step " << best_move.from_step << " to " - << best_move.to_step << ", from proc " << best_move.from_proc << " to " << best_move.to_proc + << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step + << ", from proc " << best_move.from_proc << " to " << best_move.to_proc << " violations: " << current_schedule.current_violations.size() << " cost " << current_schedule.current_cost << std::endl; #endif - current_schedule.apply_move(best_move); // O(p + log n) + current_schedule.apply_move(best_move); // O(p + log n) update_reward_penalty(); locked_nodes.insert(best_move.node); @@ -1686,18 +1516,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG double tmp_costs = current_schedule.current_cost; if (tmp_costs != compute_current_costs()) { - - std::cout << "current costs: " << current_schedule.current_cost - << " best move gain: " << best_move.gain - << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs - << std::endl; + std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain + << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl; std::cout << "! costs not equal " << std::endl; } #endif if (iter_feasible != current_schedule.current_feasible) { - if (iter_feasible) { #ifdef KL_DEBUG std::cout << "===> current schedule changed from feasible to infeasible" << std::endl; @@ -1710,7 +1536,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { std::cout << "save best schedule with costs " << iter_costs << std::endl; #endif best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) + save_best_schedule(current_schedule.vector_schedule); // O(n) reverse_move_best_schedule(best_move); } @@ -1720,13 +1546,12 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #endif } } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) { - if (iter_costs < best_schedule_costs) { #ifdef KL_DEBUG std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl; #endif best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) + save_best_schedule(current_schedule.vector_schedule); // O(n) reverse_move_best_schedule(best_move); } } @@ -1736,28 +1561,26 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { select_unlock_neighbors(best_move.node); if (check_violation_locked()) { - if (iter_feasible != current_schedule.current_feasible && iter_feasible) { node_causing_first_violation = best_move.node; } super_locked_nodes.insert(node_causing_first_violation); #ifdef KL_DEBUG - std::cout << "abort iteration on locked violation, super locking node " - << node_causing_first_violation << std::endl; + std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation + << std::endl; #endif break; } update_node_gains(nodes_to_update); - if (current_schedule.current_cost > (parameters.max_div_best_sol_base_percent + - outer_counter * parameters.max_div_best_sol_rate_percent) * - best_schedule_costs) { - + if (current_schedule.current_cost + > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent) + * best_schedule_costs) { #ifdef KL_DEBUG std::cout << "current cost " << current_schedule.current_cost - << " too far away from best schedule costs: " << best_schedule_costs - << " rollback to best schedule" << std::endl; + << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule" + << std::endl; #endif current_schedule.set_current_schedule(*best_schedule); @@ -1768,13 +1591,12 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { failed_branches++; } - } // while + } // while #ifdef KL_DEBUG std::cout << "end inner loop current cost: " << current_schedule.current_cost << " with " - << current_schedule.current_violations.size() - << " violation, best sol cost: " << best_schedule_costs << " with " - << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" + << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs + << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" << parameters.max_outer_iterations << std::endl; #endif @@ -1835,24 +1657,23 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { improvement_counter = 0; } - } // for + } // for cleanup_datastructures(); #ifdef KL_DEBUG - std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs" - << std::endl; + std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs" << std::endl; assert(best_schedule->satisfiesPrecedenceConstraints()); #endif - if (initial_costs > current_schedule.current_cost) + if (initial_costs > current_schedule.current_cost) { return true; - else + } else { return false; + } } bool run_local_search_remove_supersteps() { - const double initial_costs = current_schedule.current_cost; #ifdef KL_DEBUG @@ -1871,8 +1692,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { for (unsigned outer_counter = 0; outer_counter < parameters.max_outer_iterations; outer_counter++) { #ifdef KL_DEBUG - std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost - << std::endl; + std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost << std::endl; if (max_gain_heap.size() == 0) { std::cout << "max gain heap empty" << std::endl; } @@ -1887,15 +1707,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { unsigned inner_counter = 0; - while (failed_branches < parameters.max_num_failed_branches && - inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) { - + while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations + && max_gain_heap.size() > 0) { inner_counter++; const bool iter_feasible = current_schedule.current_feasible; const double iter_costs = current_schedule.current_cost; - kl_move best_move = find_best_move(); // O(log n) + kl_move best_move = find_best_move(); // O(log n) if (best_move.gain < -std::numeric_limits::max() * .25) { #ifdef KL_DEBUG @@ -1906,14 +1725,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost " - << best_move.change_in_cost << " from step " << best_move.from_step << " to " - << best_move.to_step << ", from proc " << best_move.from_proc << " to " << best_move.to_proc + << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step + << ", from proc " << best_move.from_proc << " to " << best_move.to_proc << " violations: " << current_schedule.current_violations.size() << " old cost " << current_schedule.current_cost << " new cost " << current_schedule.current_cost + best_move.change_in_cost << std::endl; #endif - current_schedule.apply_move(best_move); // O(p + log n) + current_schedule.apply_move(best_move); // O(p + log n) update_reward_penalty(); locked_nodes.insert(best_move.node); @@ -1921,18 +1740,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG double tmp_costs = current_schedule.current_cost; if (tmp_costs != compute_current_costs()) { - - std::cout << "current costs: " << current_schedule.current_cost - << " best move gain: " << best_move.gain - << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs - << std::endl; + std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain + << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl; std::cout << "! costs not equal " << std::endl; } #endif if (iter_feasible != current_schedule.current_feasible) { - if (iter_feasible) { #ifdef KL_DEBUG std::cout << "===> current schedule changed from feasible to infeasible" << std::endl; @@ -1945,7 +1760,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { std::cout << "save best schedule with costs " << iter_costs << std::endl; #endif best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) + save_best_schedule(current_schedule.vector_schedule); // O(n) reverse_move_best_schedule(best_move); #ifdef KL_DEBUG std::cout << "KLBase save best schedule with (source node comm) cost " @@ -1960,13 +1775,12 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #endif } } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) { - if (iter_costs < best_schedule_costs) { #ifdef KL_DEBUG std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl; #endif best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) + save_best_schedule(current_schedule.vector_schedule); // O(n) reverse_move_best_schedule(best_move); #ifdef KL_DEBUG std::cout << "KLBase save best schedule with (source node comm) cost " @@ -1981,28 +1795,26 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { select_unlock_neighbors(best_move.node); if (check_violation_locked()) { - if (iter_feasible != current_schedule.current_feasible && iter_feasible) { node_causing_first_violation = best_move.node; } super_locked_nodes.insert(node_causing_first_violation); #ifdef KL_DEBUG - std::cout << "abort iteration on locked violation, super locking node " - << node_causing_first_violation << std::endl; + std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation + << std::endl; #endif break; } update_node_gains(nodes_to_update); - if (current_schedule.current_cost > (parameters.max_div_best_sol_base_percent + - outer_counter * parameters.max_div_best_sol_rate_percent) * - best_schedule_costs) { - + if (current_schedule.current_cost + > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent) + * best_schedule_costs) { #ifdef KL_DEBUG std::cout << "current cost " << current_schedule.current_cost - << " too far away from best schedule costs: " << best_schedule_costs - << " rollback to best schedule" << std::endl; + << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule" + << std::endl; #endif current_schedule.set_current_schedule(*best_schedule); @@ -2017,13 +1829,12 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { failed_branches++; } - } // while + } // while #ifdef KL_DEBUG - std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost - << " with " << current_schedule.current_violations.size() - << " violation, best sol cost: " << best_schedule_costs << " with " - << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" + std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost << " with " + << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs + << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" << parameters.max_outer_iterations << std::endl; #endif @@ -2072,11 +1883,9 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (best_iter_costs <= current_schedule.current_cost) { - no_improvement_iter_counter++; if (no_improvement_iter_counter > parameters.reset_epoch_counter_threshold) { - step_selection_epoch_counter = 0; parameters.reset_epoch_counter_threshold += current_schedule.num_steps(); #ifdef KL_DEBUG @@ -2087,7 +1896,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (no_improvement_iter_counter > 10) { - parameters.initial_penalty = 0.0; parameters.violations_threshold = 3; #ifdef KL_DEBUG @@ -2097,7 +1905,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (no_improvement_iter_counter == 35) { - parameters.max_div_best_sol_base_percent *= 1.02; #ifdef KL_DEBUG std::cout << "no improvement for " << no_improvement_iter_counter @@ -2117,28 +1924,28 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { no_improvement_iter_counter = 0; } - } // for + } // for cleanup_datastructures(); #ifdef KL_DEBUG - std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs" - << std::endl; + std::cout << "kl done, current cost " << best_schedule_costs << " vs " << initial_costs << " initial costs" << std::endl; assert(best_schedule->satisfiesPrecedenceConstraints()); #endif - if (initial_costs > current_schedule.current_cost) + if (initial_costs > current_schedule.current_cost) { return true; - else + } else { return false; + } } bool run_local_search_unlock_delay() { - const double initial_costs = current_schedule.current_cost; #ifdef KL_DEBUG_1 - std::cout << "Initial costs " << initial_costs << " with " << best_schedule->numberOfSupersteps() << " supersteps."<< std::endl; + std::cout << "Initial costs " << initial_costs << " with " << best_schedule->numberOfSupersteps() << " supersteps." + << std::endl; #endif #ifdef KL_PRINT_SCHEDULE @@ -2157,8 +1964,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { for (unsigned outer_counter = 0; outer_counter < parameters.max_outer_iterations; outer_counter++) { #ifdef KL_DEBUG - std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost - << std::endl; + std::cout << "outer iteration " << outer_counter << " current costs: " << current_schedule.current_cost << std::endl; if (max_gain_heap.size() == 0) { std::cout << "max gain heap empty" << std::endl; } @@ -2173,9 +1979,8 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { unsigned inner_counter = 0; - while (failed_branches < parameters.max_num_failed_branches && - inner_counter < parameters.max_inner_iterations && max_gain_heap.size() > 0) { - + while (failed_branches < parameters.max_num_failed_branches && inner_counter < parameters.max_inner_iterations + && max_gain_heap.size() > 0) { inner_counter++; const bool iter_feasible = current_schedule.current_feasible; @@ -2183,7 +1988,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG print_heap(); #endif - kl_move best_move = find_best_move(); // O(log n) + kl_move best_move = find_best_move(); // O(log n) if (best_move.gain < -std::numeric_limits::max() * .25) { #ifdef KL_DEBUG @@ -2194,23 +1999,22 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG std::cout << "best move: " << best_move.node << " gain " << best_move.gain << " chng in cost " - << best_move.change_in_cost << " from step " << best_move.from_step << " to " - << best_move.to_step << ", from proc " << best_move.from_proc << " to " << best_move.to_proc + << best_move.change_in_cost << " from step " << best_move.from_step << " to " << best_move.to_step + << ", from proc " << best_move.from_proc << " to " << best_move.to_proc << " violations: " << current_schedule.current_violations.size() << " old cost " << current_schedule.current_cost << " new cost " << current_schedule.current_cost + best_move.change_in_cost << std::endl; if constexpr (current_schedule.use_memory_constraint) { std::cout << "memory to step/proc " - << current_schedule.memory_constraint - .step_processor_memory[best_move.to_step][best_move.to_proc] + << current_schedule.memory_constraint.step_processor_memory[best_move.to_step][best_move.to_proc] << std::endl; } printSetScheduleWorkMemNodesGrid(std::cout, current_schedule.set_schedule, true); #endif - current_schedule.apply_move(best_move); // O(p + log n) + current_schedule.apply_move(best_move); // O(p + log n) // if (best_move.gain <= 0.000000001) { // conseq_no_gain_moves_counter++; @@ -2234,8 +2038,9 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG BspSchedule tmp_schedule(current_schedule.set_schedule); - if (not tmp_schedule.satisfiesMemoryConstraints()) + if (not tmp_schedule.satisfiesMemoryConstraints()) { std::cout << "Mem const violated" << std::endl; + } #endif update_reward_penalty(); @@ -2244,18 +2049,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #ifdef KL_DEBUG double tmp_costs = current_schedule.current_cost; if (tmp_costs != compute_current_costs()) { - - std::cout << "current costs: " << current_schedule.current_cost - << " best move gain: " << best_move.gain - << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs - << std::endl; + std::cout << "current costs: " << current_schedule.current_cost << " best move gain: " << best_move.gain + << " best move costs: " << best_move.change_in_cost << " tmp cost: " << tmp_costs << std::endl; std::cout << "! costs not equal " << std::endl; } #endif if (iter_feasible != current_schedule.current_feasible) { - if (iter_feasible) { #ifdef KL_DEBUG std::cout << "===> current schedule changed from feasible to infeasible" << std::endl; @@ -2268,7 +2069,7 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { std::cout << "save best schedule with costs " << iter_costs << std::endl; #endif best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) + save_best_schedule(current_schedule.vector_schedule); // O(n) reverse_move_best_schedule(best_move); #ifdef KL_DEBUG std::cout << "KLBase save best schedule with (source node comm) cost " @@ -2283,13 +2084,12 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #endif } } else if (best_move.change_in_cost > 0 && current_schedule.current_feasible) { - if (iter_costs < best_schedule_costs) { #ifdef KL_DEBUG std::cout << "costs increased .. save best schedule with costs " << iter_costs << std::endl; #endif best_schedule_costs = iter_costs; - save_best_schedule(current_schedule.vector_schedule); // O(n) + save_best_schedule(current_schedule.vector_schedule); // O(n) reverse_move_best_schedule(best_move); #ifdef KL_DEBUG std::cout << "KLBase save best schedule with (source node comm) cost " @@ -2325,14 +2125,13 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { select_unlock_neighbors(best_move.node); if (check_violation_locked()) { - if (iter_feasible != current_schedule.current_feasible && iter_feasible) { node_causing_first_violation = best_move.node; } super_locked_nodes.insert(node_causing_first_violation); #ifdef KL_DEBUG - std::cout << "abort iteration on locked violation, super locking node " - << node_causing_first_violation << std::endl; + std::cout << "abort iteration on locked violation, super locking node " << node_causing_first_violation + << std::endl; #endif break; } @@ -2347,11 +2146,9 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { update_node_gains(nodes_to_update); - if (not(current_schedule.current_violations.size() > 4) && not iter_feasible && - not max_gain_heap.empty()) { + if (not(current_schedule.current_violations.size() > 4) && not iter_feasible && not max_gain_heap.empty()) { const auto &iter = max_gain_heap.ordered_begin(); if (iter->gain < parameters.gain_threshold) { - node_selection.clear(); locked_nodes.clear(); super_locked_nodes.clear(); @@ -2367,14 +2164,13 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } } - if (current_schedule.current_cost > (parameters.max_div_best_sol_base_percent + - outer_counter * parameters.max_div_best_sol_rate_percent) * - best_schedule_costs) { - + if (current_schedule.current_cost + > (parameters.max_div_best_sol_base_percent + outer_counter * parameters.max_div_best_sol_rate_percent) + * best_schedule_costs) { #ifdef KL_DEBUG std::cout << "current cost " << current_schedule.current_cost - << " too far away from best schedule costs: " << best_schedule_costs - << " rollback to best schedule" << std::endl; + << " too far away from best schedule costs: " << best_schedule_costs << " rollback to best schedule" + << std::endl; #endif current_schedule.set_current_schedule(*best_schedule); @@ -2389,13 +2185,12 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { failed_branches++; } - } // while + } // while #ifdef KL_DEBUG - std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost - << " with " << current_schedule.current_violations.size() - << " violation, best sol cost: " << best_schedule_costs << " with " - << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" + std::cout << std::setprecision(12) << "end inner loop current cost: " << current_schedule.current_cost << " with " + << current_schedule.current_violations.size() << " violation, best sol cost: " << best_schedule_costs + << " with " << best_schedule->numberOfSupersteps() << " supersteps, counter: " << outer_counter << "/" << parameters.max_outer_iterations << std::endl; #endif @@ -2404,9 +2199,8 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { save_best_schedule(current_schedule.vector_schedule); best_schedule_costs = current_schedule.current_cost; #ifdef KL_DEBUG - std::cout << "KLBase save best schedule with (source node comm) cost " - << best_schedule->computeTotalCosts() << " and number of supersteps " - << best_schedule->numberOfSupersteps() << std::endl; + std::cout << "KLBase save best schedule with (source node comm) cost " << best_schedule->computeTotalCosts() + << " and number of supersteps " << best_schedule->numberOfSupersteps() << std::endl; #endif } else { current_schedule.set_current_schedule(*best_schedule); @@ -2455,11 +2249,9 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { #endif if (best_iter_costs <= current_schedule.current_cost) { - no_improvement_iter_counter++; if (no_improvement_iter_counter > parameters.reset_epoch_counter_threshold) { - step_selection_epoch_counter = 0; parameters.reset_epoch_counter_threshold += current_schedule.num_steps(); #ifdef KL_DEBUG @@ -2485,17 +2277,14 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { // } if (no_improvement_iter_counter > 50 && no_improvement_iter_counter % 3 == 0) { - parameters.initial_penalty = 0.0; parameters.violations_threshold = 5; } else if (no_improvement_iter_counter > 30 && no_improvement_iter_counter % 5 == 0) { - parameters.initial_penalty = 0.0; parameters.violations_threshold = 4; } else if (no_improvement_iter_counter > 9 && no_improvement_iter_counter % 10 == 0) { - parameters.initial_penalty = 0.0; parameters.violations_threshold = 3; #ifdef KL_DEBUG @@ -2505,7 +2294,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } if (no_improvement_iter_counter == 35) { - parameters.max_div_best_sol_base_percent *= 1.02; #ifdef KL_DEBUG std::cout << "no improvement for " << no_improvement_iter_counter @@ -2529,20 +2317,21 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { std::cout << "end of while, current cost " << current_schedule.current_cost << std::endl; #endif - } // for + } // for cleanup_datastructures(); #ifdef KL_DEBUG_1 - std::cout << "kl done, current cost " << best_schedule_costs << " with " << best_schedule->numberOfSupersteps() << " supersteps vs " << initial_costs << " initial costs" - << std::endl; + std::cout << "kl done, current cost " << best_schedule_costs << " with " << best_schedule->numberOfSupersteps() + << " supersteps vs " << initial_costs << " initial costs" << std::endl; assert(best_schedule->satisfiesPrecedenceConstraints()); #endif - if (initial_costs > current_schedule.current_cost) + if (initial_costs > current_schedule.current_cost) { return true; - else + } else { return false; + } } // virtual void checkMergeSupersteps(); @@ -2551,14 +2340,13 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { // virtual void insertSuperstep(unsigned step); void print_heap() { - std::cout << "heap current size: " << max_gain_heap.size() << std::endl; std::cout << "heap top node " << max_gain_heap.top().node << " gain " << max_gain_heap.top().gain << std::endl; unsigned count = 0; for (auto it = max_gain_heap.ordered_begin(); it != max_gain_heap.ordered_end(); ++it) { - std::cout << "node " << it->node << " gain " << it->gain << " to proc " << it->to_proc << " to step " - << it->to_step << std::endl; + std::cout << "node " << it->node << " gain " << it->gain << " to proc " << it->to_proc << " to step " << it->to_step + << std::endl; if (count++ > 15 || it->gain <= 0.0) { break; @@ -2583,7 +2371,6 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { virtual ~kl_base() = default; virtual RETURN_STATUS improveSchedule(BspSchedule &schedule) override { - reset_run_datastructures(); best_schedule = &schedule; @@ -2597,10 +2384,11 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { bool improvement_found = run_local_search_unlock_delay(); - if (improvement_found) + if (improvement_found) { return RETURN_STATUS::OSP_SUCCESS; - else + } else { return RETURN_STATUS::BEST_FOUND; + } } virtual RETURN_STATUS improveScheduleWithTimeLimit(BspSchedule &schedule) override { @@ -2622,4 +2410,4 @@ class kl_base : public ImprovementScheduler, public Ikl_cost_function { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp index 1c544fd1..f8ded91e 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_current_schedule.hpp @@ -28,9 +28,8 @@ limitations under the License. namespace osp { -template +template struct kl_move { - vertex_idx_t node; double gain; @@ -43,19 +42,28 @@ struct kl_move { unsigned to_step; kl_move() : node(0), gain(0), change_in_cost(0), from_proc(0), from_step(0), to_proc(0), to_step(0) {} - kl_move(vertex_idx_t _node, double _gain, double _change_cost, unsigned _from_proc, unsigned _from_step, - unsigned _to_proc, unsigned _to_step) - : node(_node), gain(_gain), change_in_cost(_change_cost), from_proc(_from_proc), from_step(_from_step), - to_proc(_to_proc), to_step(_to_step) {} + + kl_move(vertex_idx_t _node, + double _gain, + double _change_cost, + unsigned _from_proc, + unsigned _from_step, + unsigned _to_proc, + unsigned _to_step) + : node(_node), + gain(_gain), + change_in_cost(_change_cost), + from_proc(_from_proc), + from_step(_from_step), + to_proc(_to_proc), + to_step(_to_step) {} bool operator<(kl_move const &rhs) const { - return (gain < rhs.gain) or (gain <= rhs.gain and change_in_cost < rhs.change_in_cost) or - (gain <= rhs.gain and change_in_cost <= rhs.change_in_cost and node > rhs.node); + return (gain < rhs.gain) or (gain <= rhs.gain and change_in_cost < rhs.change_in_cost) + or (gain <= rhs.gain and change_in_cost <= rhs.change_in_cost and node > rhs.node); } - kl_move reverse_move() const { - return kl_move(node, -gain, -change_in_cost, to_proc, to_step, from_proc, from_step); - } + kl_move reverse_move() const { return kl_move(node, -gain, -change_in_cost, to_proc, to_step, from_proc, from_step); } }; class Ikl_cost_function { @@ -65,16 +73,14 @@ class Ikl_cost_function { virtual ~Ikl_cost_function() = default; }; -template +template class kl_current_schedule { - private: using VertexType = vertex_idx_t; using EdgeType = edge_desc_t; public: kl_current_schedule(Ikl_cost_function *cost_f_) : cost_f(cost_f_) { - #ifdef KL_DEBUG if constexpr (use_memory_constraint) { std::cout << "KLCurrentSchedule constructor with memory constraint" << std::endl; @@ -105,13 +111,12 @@ class kl_current_schedule { double current_cost = 0; bool current_feasible = true; - std::unordered_set current_violations; // edges + std::unordered_set current_violations; // edges std::unordered_map new_violations; std::unordered_set resolved_violations; void remove_superstep(unsigned step) { - if (step > 0) { vector_schedule.mergeSupersteps(step - 1, step); set_schedule.mergeSupersteps(step - 1, step); @@ -126,12 +131,10 @@ class kl_current_schedule { } for (unsigned i = step + 1; i < num_steps(); i++) { - step_max_work[i] = step_max_work[i + 1]; step_second_max_work[i] = step_second_max_work[i + 1]; for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - step_processor_work[i][proc] = step_processor_work[i + 1][proc]; if constexpr (use_memory_constraint) { @@ -152,7 +155,6 @@ class kl_current_schedule { } void reset_superstep(unsigned step) { - if (step > 0) { compute_work_memory_datastructures(step - 1, step - 1); if (step < num_steps() - 1) { @@ -190,9 +192,7 @@ class kl_current_schedule { inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); } virtual void set_current_schedule(const IBspSchedule &schedule) { - if (num_steps() == schedule.numberOfSupersteps()) { - #ifdef KL_DEBUG std::cout << "KLCurrentSchedule set current schedule, same nr supersteps" << std::endl; #endif @@ -204,16 +204,14 @@ class kl_current_schedule { } for (const auto &node : instance->getComputationalDag().vertices()) { - vector_schedule.setAssignedProcessor(node, schedule.assignedProcessor(node)); vector_schedule.setAssignedSuperstep(node, schedule.assignedSuperstep(node)); - set_schedule.step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)] - .insert(node); + set_schedule.step_processor_vertices[schedule.assignedSuperstep(node)][schedule.assignedProcessor(node)].insert( + node); } } else { - #ifdef KL_DEBUG std::cout << "KLCurrentSchedule set current schedule, different nr supersteps" << std::endl; #endif @@ -236,7 +234,6 @@ class kl_current_schedule { } virtual void initialize_superstep_datastructures() { - #ifdef KL_DEBUG std::cout << "KLCurrentSchedule initialize datastructures" << std::endl; #endif @@ -244,18 +241,16 @@ class kl_current_schedule { const unsigned num_procs = instance->numberOfProcessors(); if constexpr (use_memory_constraint) { - memory_constraint.initialize(set_schedule, vector_schedule); } - step_processor_work = - std::vector>>(num_steps(), std::vector>(num_procs, 0)); + step_processor_work + = std::vector>>(num_steps(), std::vector>(num_procs, 0)); step_max_work = std::vector>(num_steps(), 0); step_second_max_work = std::vector>(num_steps(), 0); } virtual void cleanup_superstep_datastructures() { - step_processor_work.clear(); step_max_work.clear(); step_second_max_work.clear(); @@ -266,18 +261,15 @@ class kl_current_schedule { } virtual void compute_work_memory_datastructures(unsigned start_step, unsigned end_step) { - if constexpr (use_memory_constraint) { memory_constraint.compute_memory_datastructure(start_step, end_step); } for (unsigned step = start_step; step <= end_step; step++) { - step_max_work[step] = 0; step_second_max_work[step] = 0; for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - step_processor_work[step][proc] = 0; for (const auto &node : set_schedule.step_processor_vertices[step][proc]) { @@ -285,12 +277,10 @@ class kl_current_schedule { } if (step_processor_work[step][proc] > step_max_work[step]) { - step_second_max_work[step] = step_max_work[step]; step_max_work[step] = step_processor_work[step][proc]; } else if (step_processor_work[step][proc] > step_second_max_work[step]) { - step_second_max_work[step] = step_processor_work[step][proc]; } } @@ -298,7 +288,6 @@ class kl_current_schedule { } virtual void recompute_current_violations() { - current_violations.clear(); #ifdef KL_DEBUG @@ -306,15 +295,12 @@ class kl_current_schedule { #endif for (const auto &edge : edges(instance->getComputationalDag())) { - const auto &source_v = source(edge, instance->getComputationalDag()); const auto &target_v = target(edge, instance->getComputationalDag()); if (vector_schedule.assignedSuperstep(source_v) >= vector_schedule.assignedSuperstep(target_v)) { - - if (vector_schedule.assignedProcessor(source_v) != vector_schedule.assignedProcessor(target_v) || - vector_schedule.assignedSuperstep(source_v) > vector_schedule.assignedSuperstep(target_v)) { - + if (vector_schedule.assignedProcessor(source_v) != vector_schedule.assignedProcessor(target_v) + || vector_schedule.assignedSuperstep(source_v) > vector_schedule.assignedSuperstep(target_v)) { current_violations.insert(edge); #ifdef KL_DEBUG @@ -336,7 +322,6 @@ class kl_current_schedule { }; virtual void apply_move(kl_move move) { - vector_schedule.setAssignedProcessor(move.node, move.to_proc); vector_schedule.setAssignedSuperstep(move.node, move.to_step); @@ -345,22 +330,18 @@ class kl_current_schedule { current_cost += move.change_in_cost; - step_processor_work[move.to_step][move.to_proc] += - instance->getComputationalDag().vertex_work_weight(move.node); - step_processor_work[move.from_step][move.from_proc] -= - instance->getComputationalDag().vertex_work_weight(move.node); + step_processor_work[move.to_step][move.to_proc] += instance->getComputationalDag().vertex_work_weight(move.node); + step_processor_work[move.from_step][move.from_proc] -= instance->getComputationalDag().vertex_work_weight(move.node); update_max_work_datastructures(move); update_violations(move.node); if constexpr (use_memory_constraint) { - memory_constraint.apply_move(move.node, move.from_proc, move.from_step, move.to_proc, move.to_step); } } virtual void initialize_current_schedule(const IBspSchedule &schedule) { - #ifdef KL_DEBUG std::cout << "KLCurrentSchedule initialize current schedule" << std::endl; #endif @@ -378,32 +359,24 @@ class kl_current_schedule { private: void update_violations(VertexType node) { - new_violations.clear(); resolved_violations.clear(); for (const auto &edge : out_edges(node, instance->getComputationalDag())) { - const auto &child = target(edge, instance->getComputationalDag()); if (current_violations.find(edge) == current_violations.end()) { - if (vector_schedule.assignedSuperstep(node) >= vector_schedule.assignedSuperstep(child)) { - - if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(child) || - vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(child)) { - + if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(child) + || vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(child)) { current_violations.insert(edge); new_violations[child] = edge; } } } else { - if (vector_schedule.assignedSuperstep(node) <= vector_schedule.assignedSuperstep(child)) { - - if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(child) || - vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(child)) { - + if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(child) + || vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(child)) { current_violations.erase(edge); resolved_violations.insert(edge); } @@ -412,27 +385,20 @@ class kl_current_schedule { } for (const auto &edge : in_edges(node, instance->getComputationalDag())) { - const auto &parent = source(edge, instance->getComputationalDag()); if (current_violations.find(edge) == current_violations.end()) { - if (vector_schedule.assignedSuperstep(node) <= vector_schedule.assignedSuperstep(parent)) { - - if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(parent) || - vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(parent)) { - + if (vector_schedule.assignedProcessor(node) != vector_schedule.assignedProcessor(parent) + || vector_schedule.assignedSuperstep(node) < vector_schedule.assignedSuperstep(parent)) { current_violations.insert(edge); new_violations[parent] = edge; } } } else { - if (vector_schedule.assignedSuperstep(node) >= vector_schedule.assignedSuperstep(parent)) { - - if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(parent) || - vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(parent)) { - + if (vector_schedule.assignedProcessor(node) == vector_schedule.assignedProcessor(parent) + || vector_schedule.assignedSuperstep(node) > vector_schedule.assignedSuperstep(parent)) { current_violations.erase(edge); resolved_violations.insert(edge); } @@ -468,41 +434,33 @@ class kl_current_schedule { } void update_max_work_datastructures(kl_move move) { - if (move.from_step == move.to_step) { - recompute_superstep_max_work(move.from_step); } else { - recompute_superstep_max_work(move.from_step); recompute_superstep_max_work(move.to_step); } } void recompute_superstep_max_work(unsigned step) { - step_max_work[step] = 0; step_second_max_work[step] = 0; for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { - if (step_processor_work[step][proc] > step_max_work[step]) { - step_second_max_work[step] = step_max_work[step]; step_max_work[step] = step_processor_work[step][proc]; } else if (step_processor_work[step][proc] > step_second_max_work[step]) { - step_second_max_work[step] = step_processor_work[step][proc]; } } } }; -template +template class kl_current_schedule_max_comm : public kl_current_schedule { - public: std::vector>> step_processor_send; std::vector> step_max_send; @@ -513,4 +471,4 @@ class kl_current_schedule_max_comm : public kl_current_schedule> step_second_max_receive; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp index 7f36fefa..81841b0e 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_comm.hpp @@ -28,18 +28,17 @@ limitations under the License. namespace osp { -template +template class kl_hyper_total_comm : public kl_total { - protected: - - virtual void compute_comm_gain(vertex_idx_t node, unsigned current_step, unsigned current_proc, + virtual void compute_comm_gain(vertex_idx_t node, + unsigned current_step, + unsigned current_proc, unsigned new_proc) override { throw std::runtime_error("Not implemented yet"); } virtual double compute_current_costs() override { - double work_costs = 0; for (unsigned step = 0; step < current_schedule.num_steps(); step++) { work_costs += current_schedule.step_max_work[step]; @@ -48,14 +47,13 @@ class kl_hyper_total_comm : public kl_total { double comm_costs = 0; for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) { - - if (is_sink(node, current_schedule.instance->getComputationalDag())) + if (is_sink(node, current_schedule.instance->getComputationalDag())) { continue; + } std::unordered_set intersects; for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - const unsigned &target_proc = current_schedule.vector_schedule.assignedProcessor(target); if (current_schedule.vector_schedule.assignedProcessor(node) != target_proc) { @@ -63,13 +61,11 @@ class kl_hyper_total_comm : public kl_total { } } - comm_costs += - intersects.size() * current_schedule.instance->getComputationalDag().vertex_comm_weight(node); + comm_costs += intersects.size() * current_schedule.instance->getComputationalDag().vertex_comm_weight(node); } - current_schedule.current_cost = - work_costs + comm_costs * current_schedule.comm_multiplier + - (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts(); + current_schedule.current_cost = work_costs + comm_costs * current_schedule.comm_multiplier + + (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts(); return current_schedule.current_cost; } @@ -82,4 +78,4 @@ class kl_hyper_total_comm : public kl_total { virtual std::string getScheduleName() const override { return "KLHyperTotalComm"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp index 365f9e85..6d4a15fe 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_hyper_total_cut.hpp @@ -27,17 +27,18 @@ limitations under the License. #include "kl_total.hpp" namespace osp { -template -class kl_hyper_total_cut : public kl_total { +template +class kl_hyper_total_cut : public kl_total { protected: - virtual void compute_comm_gain(vertex_idx_t node, unsigned current_step, unsigned current_proc, + virtual void compute_comm_gain(vertex_idx_t node, + unsigned current_step, + unsigned current_proc, unsigned new_proc) override { throw std::runtime_error("Not implemented yet"); } virtual double compute_current_costs() override { - double work_costs = 0; for (unsigned step = 0; step < current_schedule.num_steps(); step++) { work_costs += current_schedule.step_max_work[step]; @@ -46,19 +47,18 @@ class kl_hyper_total_cut : public kl_total { double comm_costs = 0; for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) { - - if (is_sink(node, current_schedule.instance->getComputationalDag())) + if (is_sink(node, current_schedule.instance->getComputationalDag())) { continue; + } std::unordered_set intersects; for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - const unsigned &target_proc = current_schedule.vector_schedule.assignedProcessor(target); const unsigned &target_step = current_schedule.vector_schedule.assignedSuperstep(target); - if (current_schedule.vector_schedule.assignedProcessor(node) != target_proc || - current_schedule.vector_schedule.assignedSuperstep(node) != target_step) { + if (current_schedule.vector_schedule.assignedProcessor(node) != target_proc + || current_schedule.vector_schedule.assignedSuperstep(node) != target_step) { intersects.insert(current_schedule.instance->numberOfProcessors() * target_step + target_proc); } } @@ -66,9 +66,8 @@ class kl_hyper_total_cut : public kl_total { comm_costs += intersects.size() * current_schedule.instance->getComputationalDag().vertex_comm_weight(node); } - current_schedule.current_cost = - work_costs + comm_costs * current_schedule.comm_multiplier + - (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts(); + current_schedule.current_cost = work_costs + comm_costs * current_schedule.comm_multiplier + + (current_schedule.num_steps() - 1) * current_schedule.instance->synchronisationCosts(); return current_schedule.current_cost; } @@ -81,4 +80,4 @@ class kl_hyper_total_cut : public kl_total { virtual std::string getScheduleName() const override { return "KLHyperTotalCut"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp index 7b7c7dca..0e3f5d65 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total.hpp @@ -28,28 +28,24 @@ limitations under the License. namespace osp { -template +template class kl_current_schedule_total : public kl_current_schedule { - public: - kl_current_schedule_total(Ikl_cost_function *cost_f_) - : kl_current_schedule(cost_f_) {} + kl_current_schedule_total(Ikl_cost_function *cost_f_) : kl_current_schedule(cost_f_) {} - double comm_multiplier = 1.0; + double comm_multiplier = 1.0; constexpr static bool use_node_communication_costs = use_node_communication_costs_arg || not has_edge_weights_v; - }; -template +template class kl_total : public kl_base { - protected: kl_current_schedule_total current_schedule; v_commw_t node_comm_selection_threshold = 0; double max_edge_weight = 0.0; - virtual void initialize_datastructures() override { + virtual void initialize_datastructures() override { #ifdef KL_DEBUG std::cout << "KLTotal initialize datastructures" << std::endl; #endif @@ -60,39 +56,36 @@ class kl_total : public kl_base { v_workw_t max_node_weight_ = 0; for (const auto vertex : current_schedule.instance->getComputationalDag().vertices()) { - - if (is_sink(vertex, current_schedule.instance->getComputationalDag())) + if (is_sink(vertex, current_schedule.instance->getComputationalDag())) { continue; + } - max_edge_weight_ = - std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().vertex_comm_weight(vertex)); + max_edge_weight_ + = std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().vertex_comm_weight(vertex)); - max_node_weight_ = - std::max(max_node_weight_, current_schedule.instance->getComputationalDag().vertex_work_weight(vertex)); + max_node_weight_ + = std::max(max_node_weight_, current_schedule.instance->getComputationalDag().vertex_work_weight(vertex)); } - if constexpr (not current_schedule.use_node_communication_costs) { - max_edge_weight_ = 0; for (const auto &edge : edges(current_schedule.instance->getComputationalDag())) { - max_edge_weight_ = std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().edge_comm_weight(edge)); + max_edge_weight_ + = std::max(max_edge_weight_, current_schedule.instance->getComputationalDag().edge_comm_weight(edge)); } } - max_edge_weight = max_edge_weight_ + max_node_weight_; - kl_base::parameters.initial_penalty = - max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); + kl_base::parameters.initial_penalty + = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); - kl_base::parameters.gain_threshold = - max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); + kl_base::parameters.gain_threshold + = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); } virtual void update_reward_penalty() override { - if (current_schedule.current_violations.size() <= kl_base::parameters.violations_threshold) { kl_base::penalty = kl_base::parameters.initial_penalty; kl_base::reward = 0.0; @@ -100,52 +93,42 @@ class kl_total : public kl_base { } else { kl_base::parameters.violations_threshold = 0; - kl_base::penalty = std::log((current_schedule.current_violations.size())) * max_edge_weight * - current_schedule.comm_multiplier * - current_schedule.instance->communicationCosts(); + kl_base::penalty = std::log((current_schedule.current_violations.size())) + * max_edge_weight * current_schedule.comm_multiplier + * current_schedule.instance->communicationCosts(); - kl_base::reward = std::sqrt((current_schedule.current_violations.size() + 4)) * max_edge_weight * - current_schedule.comm_multiplier * - current_schedule.instance->communicationCosts(); + kl_base::reward = std::sqrt((current_schedule.current_violations.size() + 4)) + * max_edge_weight * current_schedule.comm_multiplier + * current_schedule.instance->communicationCosts(); } } virtual void set_initial_reward_penalty() override { - kl_base::penalty = kl_base::parameters.initial_penalty; - kl_base::reward = - max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); + kl_base::reward + = max_edge_weight * current_schedule.comm_multiplier * current_schedule.instance->communicationCosts(); } virtual void select_nodes_comm() override { - if constexpr (current_schedule.use_node_communication_costs) { - for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) { - for (const auto &source : current_schedule.instance->getComputationalDag().parents(node)) { - - if (current_schedule.vector_schedule.assignedProcessor(node) != - current_schedule.vector_schedule.assignedProcessor(source)) { - - if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node) > - node_comm_selection_threshold) { - - kl_base::node_selection.insert(node); + if (current_schedule.vector_schedule.assignedProcessor(node) + != current_schedule.vector_schedule.assignedProcessor(source)) { + if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node) + > node_comm_selection_threshold) { + kl_base::node_selection.insert(node); break; } } } for (const auto &target : current_schedule.instance->getComputationalDag().children(node)) { - - if (current_schedule.vector_schedule.assignedProcessor(node) != - current_schedule.vector_schedule.assignedProcessor(target)) { - - if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node) > - node_comm_selection_threshold) { - - kl_base::node_selection.insert(node); + if (current_schedule.vector_schedule.assignedProcessor(node) + != current_schedule.vector_schedule.assignedProcessor(target)) { + if (current_schedule.instance->getComputationalDag().vertex_comm_weight(node) + > node_comm_selection_threshold) { + kl_base::node_selection.insert(node); break; } } @@ -154,16 +137,12 @@ class kl_total : public kl_base { } else { for (const auto &node : current_schedule.instance->getComputationalDag().vertices()) { - for (const auto &in_edge : in_edges(node, current_schedule.instance->getComputationalDag())) { - const auto &source_v = source(in_edge, current_schedule.instance->getComputationalDag()); - if (current_schedule.vector_schedule.assignedProcessor(node) != - current_schedule.vector_schedule.assignedProcessor(source_v)) { - - if (current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge) > - node_comm_selection_threshold) { - + if (current_schedule.vector_schedule.assignedProcessor(node) + != current_schedule.vector_schedule.assignedProcessor(source_v)) { + if (current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge) + > node_comm_selection_threshold) { kl_base::node_selection.insert(node); break; } @@ -171,14 +150,11 @@ class kl_total : public kl_base { } for (const auto &out_edge : out_edges(node, current_schedule.instance->getComputationalDag())) { - const auto &target_v = target(out_edge, current_schedule.instance->getComputationalDag()); - if (current_schedule.vector_schedule.assignedProcessor(node) != - current_schedule.vector_schedule.assignedProcessor(target_v)) { - - if (current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge) > - node_comm_selection_threshold) { - + if (current_schedule.vector_schedule.assignedProcessor(node) + != current_schedule.vector_schedule.assignedProcessor(target_v)) { + if (current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge) + > node_comm_selection_threshold) { kl_base::node_selection.insert(node); break; } @@ -189,11 +165,9 @@ class kl_total : public kl_base { } public: - kl_total() - : kl_base(current_schedule), current_schedule(this) {} + kl_total() : kl_base(current_schedule), current_schedule(this) {} virtual ~kl_total() = default; - }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp index 912650bb..b5f16bf0 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp @@ -27,582 +27,829 @@ limitations under the License. #include "kl_total.hpp" namespace osp { -template -class kl_total_comm : public kl_total { +template +class kl_total_comm : public kl_total { protected: - virtual void compute_comm_gain(vertex_idx_t node, unsigned current_step, unsigned current_proc, + virtual void compute_comm_gain(vertex_idx_t node, + unsigned current_step, + unsigned current_proc, unsigned new_proc) override { - - if constexpr (kl_total::current_schedule.use_node_communication_costs) { - + if constexpr (kl_total::current_schedule + .use_node_communication_costs) { if (current_proc == new_proc) { - for (const auto &target : - kl_total::current_schedule.instance->getComputationalDag().children(node)) { - - if ((current_step + 1 == - kl_total::current_schedule.vector_schedule.assignedSuperstep(target) && - current_proc != - kl_total::current_schedule.vector_schedule.assignedProcessor(target)) || - (current_step == - kl_total::current_schedule.vector_schedule.assignedSuperstep(target) && - current_proc == - kl_total::current_schedule.vector_schedule.assignedProcessor(target))) { - kl_total::node_gains[node][current_proc][2] -= kl_total::penalty; - - } else if ((current_step == - kl_total::current_schedule.vector_schedule.assignedSuperstep(target) && - current_proc != - kl_total::current_schedule.vector_schedule.assignedProcessor(target)) || - (current_step - 1 == - kl_total::current_schedule.vector_schedule.assignedSuperstep(target) && - current_proc == - kl_total::current_schedule.vector_schedule.assignedProcessor(target))) { - - kl_total::node_gains[node][current_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(node)) + - kl_total::reward; + kl_total::current_schedule.instance + ->getComputationalDag() + .children(node)) { + if ((current_step + 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(target)) + || (current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(target))) { + kl_total::node_gains[node][current_proc][2] + -= kl_total::penalty; + + } else if ((current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(target)) + || (current_step - 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(target))) { + kl_total::node_gains[node][current_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; } } for (const auto &source : - kl_total::current_schedule.instance->getComputationalDag().parents(node)) { - - if ((current_step - 1 == - kl_total::current_schedule.vector_schedule.assignedSuperstep(source) && - current_proc != - kl_total::current_schedule.vector_schedule.assignedProcessor(source)) || - (current_step == - kl_total::current_schedule.vector_schedule.assignedSuperstep(source) && - current_proc == - kl_total::current_schedule.vector_schedule.assignedProcessor(source))) { - kl_total::node_gains[node][current_proc][0] -= kl_total::penalty; - - } else if ((current_step == - kl_total::current_schedule.vector_schedule.assignedSuperstep(source) && - current_proc != - kl_total::current_schedule.vector_schedule.assignedProcessor(source)) || - (current_step + 1 == - kl_total::current_schedule.vector_schedule.assignedSuperstep(source) && - current_proc == - kl_total::current_schedule.vector_schedule.assignedProcessor(source))) { - - kl_total::node_gains[node][current_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(source)) + - kl_total::reward; + kl_total::current_schedule.instance + ->getComputationalDag() + .parents(node)) { + if ((current_step - 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(source)) + || (current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(source))) { + kl_total::node_gains[node][current_proc][0] + -= kl_total::penalty; + + } else if ((current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(source)) + || (current_step + 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(source))) { + kl_total::node_gains[node][current_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; } } } else { - // current_proc != new_proc for (const auto &target : - kl_total::current_schedule.instance->getComputationalDag().children(node)) { - - const unsigned &target_proc = - kl_total::current_schedule.vector_schedule.assignedProcessor(target); + kl_total::current_schedule.instance + ->getComputationalDag() + .children(node)) { + const unsigned &target_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(target); if (target_proc == current_proc) { - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(node)) * - kl_total::current_schedule.instance->communicationCosts(new_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][1] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][1] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == - current_step) { - - kl_total::node_gains[node][new_proc][1] -= kl_total::penalty; - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == - current_step + 1) { - - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; + const double loss + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + * kl_total::current_schedule + .instance->communicationCosts(new_proc, target_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][1] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][1] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step) { + kl_total::node_gains[node][new_proc][1] + -= kl_total::penalty; + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; } } else if (target_proc == new_proc) { - - const double gain = static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(node)) * - kl_total::current_schedule.instance->communicationCosts( - current_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == - current_step) { - - kl_total::node_gains[node][new_proc][1] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(node)) + - kl_total::reward; - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(node)) + - kl_total::reward; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) < - current_step) { - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(node)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, target_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; + + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + < current_step) { + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; } } else { - assert(target_proc != current_proc && target_proc != new_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->communicationCosts(new_proc, - target_proc) - - kl_total::current_schedule.instance->communicationCosts(current_proc, - target_proc)) * - kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight( - node) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == - current_step + 1) { - - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == - current_step) { - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(node)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->communicationCosts(new_proc, target_proc) + - kl_total::current_schedule + .instance->communicationCosts(current_proc, target_proc)) + * kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step) { + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; } } } for (const auto &source : - kl_total::current_schedule.instance->getComputationalDag().parents(node)) { - - const unsigned &source_proc = - kl_total::current_schedule.vector_schedule.assignedProcessor(source); + kl_total::current_schedule.instance + ->getComputationalDag() + .parents(node)) { + const unsigned &source_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(source); if (source_proc == current_proc) { - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(source)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, new_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][1] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][1] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == - current_step) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; - kl_total::node_gains[node][new_proc][1] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == - current_step - 1) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; + const double loss + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, new_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][1] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][1] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; + kl_total::node_gains[node][new_proc][1] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; } } else if (source_proc == new_proc) { - assert(source_proc != current_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(source)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, new_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == - current_step) { - - kl_total::node_gains[node][new_proc][1] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(source)) + - kl_total::reward; - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(source)) + - kl_total::reward; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == - current_step + 1) { - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(source)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, new_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; + + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; } } else { - assert(source_proc != current_proc && source_proc != new_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->communicationCosts(new_proc, - source_proc) - - kl_total::current_schedule.instance->communicationCosts(current_proc, - source_proc)) * - kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight( - source) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == - current_step - 1) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == - current_step) { - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .vertex_comm_weight(source)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->communicationCosts(new_proc, source_proc) + - kl_total::current_schedule + .instance->communicationCosts(current_proc, source_proc)) + * kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step) { + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; } } } } } else { - if (current_proc == new_proc) { - for (const auto &out_edge : - out_edges(node, kl_total::current_schedule.instance->getComputationalDag())) { - const auto &target_v = - target(out_edge, kl_total::current_schedule.instance->getComputationalDag()); + out_edges(node, + kl_total::current_schedule.instance + ->getComputationalDag())) { + const auto &target_v + = target(out_edge, + kl_total::current_schedule + .instance->getComputationalDag()); // for (const auto &target : - // kl_total::current_schedule.instance->getComputationalDag().children(node)) { - - if ((current_step + 1 == - kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) && - current_proc != - kl_total::current_schedule.vector_schedule.assignedProcessor(target_v)) || - (current_step == - kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) && - current_proc == - kl_total::current_schedule.vector_schedule.assignedProcessor(target_v))) { - - kl_total::node_gains[node][current_proc][2] -= kl_total::penalty; - - } else if ((current_step == - kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) && - current_proc != - kl_total::current_schedule.vector_schedule.assignedProcessor(target_v)) || - (current_step - 1 == - kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) && - current_proc == - kl_total::current_schedule.vector_schedule.assignedProcessor(target_v))) { - - kl_total::node_gains[node][current_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(out_edge)) + - kl_total::reward; + // kl_total::current_schedule.instance->getComputationalDag().children(node)) { + + if ((current_step + 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v)) + || (current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v))) { + kl_total::node_gains[node][current_proc][2] + -= kl_total::penalty; + + } else if ((current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v)) + || (current_step - 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v))) { + kl_total::node_gains[node][current_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; } } for (const auto &in_edge : - in_edges(node, kl_total::current_schedule.instance->getComputationalDag())) { - - const auto &source_v = - source(in_edge, kl_total::current_schedule.instance->getComputationalDag()); + in_edges(node, + kl_total::current_schedule.instance + ->getComputationalDag())) { + const auto &source_v + = source(in_edge, + kl_total::current_schedule + .instance->getComputationalDag()); // for (const auto &source : - // kl_total::current_schedule.instance->getComputationalDag().parents(node)) { - - if ((current_step - 1 == - kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) && - current_proc != - kl_total::current_schedule.vector_schedule.assignedProcessor(source_v)) || - (current_step == - kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) && - current_proc == - kl_total::current_schedule.vector_schedule.assignedProcessor(source_v))) { - - kl_total::node_gains[node][current_proc][0] -= kl_total::penalty; - - } else if ((current_step == - kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) && - current_proc != - kl_total::current_schedule.vector_schedule.assignedProcessor(source_v)) || - (current_step + 1 == - kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) && - current_proc == - kl_total::current_schedule.vector_schedule.assignedProcessor(source_v))) { - - kl_total::node_gains[node][current_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(in_edge)) + - kl_total::reward; + // kl_total::current_schedule.instance->getComputationalDag().parents(node)) { + + if ((current_step - 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v)) + || (current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v))) { + kl_total::node_gains[node][current_proc][0] + -= kl_total::penalty; + + } else if ((current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v)) + || (current_step + 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v))) { + kl_total::node_gains[node][current_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; } } } else { - // current_proc != new_proc for (const auto &out_edge : - out_edges(node, kl_total::current_schedule.instance->getComputationalDag())) { - - const auto &target_v = - target(out_edge, kl_total::current_schedule.instance->getComputationalDag()); - const unsigned &target_proc = - kl_total::current_schedule.vector_schedule.assignedProcessor(target_v); + out_edges(node, + kl_total::current_schedule.instance + ->getComputationalDag())) { + const auto &target_v + = target(out_edge, + kl_total::current_schedule + .instance->getComputationalDag()); + const unsigned &target_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v); if (target_proc == current_proc) { - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(out_edge)) * - kl_total::current_schedule.instance->communicationCosts(new_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][1] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][1] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == - current_step) { - - kl_total::node_gains[node][new_proc][1] -= kl_total::penalty; - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == - current_step + 1) { - - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; + const double loss + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + * kl_total::current_schedule + .instance->communicationCosts(new_proc, target_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][1] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][1] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step) { + kl_total::node_gains[node][new_proc][1] + -= kl_total::penalty; + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; } } else if (target_proc == new_proc) { - - const double gain = static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(out_edge)) * - kl_total::current_schedule.instance->communicationCosts( - current_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == - current_step) { - - kl_total::node_gains[node][new_proc][1] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(out_edge)) + - kl_total::reward; - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(out_edge)) + - kl_total::reward; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == - current_step - 1) { - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(out_edge)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, target_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; } } else { - assert(target_proc != current_proc && target_proc != new_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->communicationCosts(new_proc, - target_proc) - - kl_total::current_schedule.instance->communicationCosts(current_proc, - target_proc)) * - kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight( - out_edge) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == - current_step + 1) { - - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == - current_step) { - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(out_edge)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->communicationCosts(new_proc, target_proc) + - kl_total::current_schedule + .instance->communicationCosts(current_proc, target_proc)) + * kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step) { + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; } } } - for (const auto &in_edge : in_edges(node, kl_total::current_schedule.instance->getComputationalDag())) { - const auto &source_v = - source(in_edge, kl_total::current_schedule.instance->getComputationalDag()); - - const unsigned &source_proc = - kl_total::current_schedule.vector_schedule.assignedProcessor(source_v); + for (const auto &in_edge : + in_edges(node, + kl_total::current_schedule.instance + ->getComputationalDag())) { + const auto &source_v + = source(in_edge, + kl_total::current_schedule + .instance->getComputationalDag()); + + const unsigned &source_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v); if (source_proc == current_proc) { - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(in_edge)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, new_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][1] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][1] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == - current_step) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; - kl_total::node_gains[node][new_proc][1] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == - current_step - 1) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; + const double loss + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, new_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][1] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][1] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; + kl_total::node_gains[node][new_proc][1] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; } } else if (source_proc == new_proc) { - assert(source_proc != current_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(in_edge)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, new_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == - current_step) { - - kl_total::node_gains[node][new_proc][1] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(in_edge)) + - kl_total::reward; - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(in_edge)) + - kl_total::reward; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == - current_step + 1) { - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(in_edge)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, new_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; + + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; } } else { - assert(source_proc != current_proc && source_proc != new_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->communicationCosts(new_proc, - source_proc) - - kl_total::current_schedule.instance->communicationCosts(current_proc, - source_proc)) * - kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight( - in_edge) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == - current_step - 1) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == - current_step) { - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag() - .edge_comm_weight(in_edge)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->communicationCosts(new_proc, source_proc) + - kl_total::current_schedule + .instance->communicationCosts(current_proc, source_proc)) + * kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step) { + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; } } } @@ -611,40 +858,60 @@ class kl_total_comm : public kl_total::current_schedule.num_steps(); step++) { - work_costs += kl_total::current_schedule.step_max_work[step]; + for (unsigned step = 0; + step < kl_total::current_schedule.num_steps(); + step++) { + work_costs + += kl_total::current_schedule.step_max_work[step]; } double comm_costs = 0; - for (const auto &edge : edges(kl_total::current_schedule.instance->getComputationalDag())) { - - const auto &source_v = source(edge, kl_total::current_schedule.instance->getComputationalDag()); - const unsigned &source_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(source_v); - const unsigned &target_proc = kl_total::current_schedule.vector_schedule.assignedProcessor( - target(edge, kl_total::current_schedule.instance->getComputationalDag())); + for (const auto &edge : edges(kl_total::current_schedule + .instance->getComputationalDag())) { + const auto &source_v = source(edge, + kl_total::current_schedule + .instance->getComputationalDag()); + const unsigned &source_proc = kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v); + const unsigned &target_proc + = kl_total::current_schedule.vector_schedule + .assignedProcessor( + target(edge, + kl_total::current_schedule + .instance->getComputationalDag())); if (source_proc != target_proc) { - - if constexpr (kl_total::current_schedule.use_node_communication_costs) { - comm_costs += - kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source_v) * - kl_total::current_schedule.instance->communicationCosts(source_proc, target_proc); + if constexpr (kl_total::current_schedule + .use_node_communication_costs) { + comm_costs + += kl_total::current_schedule.instance + ->getComputationalDag() + .vertex_comm_weight(source_v) + * kl_total::current_schedule.instance + ->communicationCosts(source_proc, target_proc); } else { - comm_costs += - kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(edge) * - kl_total::current_schedule.instance->communicationCosts(source_proc, target_proc); + comm_costs + += kl_total::current_schedule.instance + ->getComputationalDag() + .edge_comm_weight(edge) + * kl_total::current_schedule.instance + ->communicationCosts(source_proc, target_proc); } } } - kl_total::current_schedule.current_cost = - work_costs + comm_costs * kl_total::current_schedule.comm_multiplier + - (static_cast(kl_total::current_schedule.num_steps()) - 1) * - kl_total::current_schedule.instance->synchronisationCosts(); - - return kl_total::current_schedule.current_cost; + kl_total::current_schedule.current_cost + = work_costs + + comm_costs + * kl_total::current_schedule.comm_multiplier + + (static_cast( + kl_total::current_schedule.num_steps()) + - 1) + * kl_total::current_schedule.instance + ->synchronisationCosts(); + + return kl_total::current_schedule.current_cost; } public: @@ -655,9 +922,8 @@ class kl_total_comm : public kl_total +template class kl_total_comm_test : public kl_total_comm { - public: kl_total_comm_test() : kl_total_comm() {} @@ -670,10 +936,15 @@ class kl_total_comm_test : public kl_total_comm::node_gains; } - auto &get_node_change_in_costs() { return kl_total::node_change_in_costs; } + + auto &get_node_change_in_costs() { + return kl_total::node_change_in_costs; + } + auto &get_max_gain_heap() { return kl_total::max_gain_heap; } - void initialize_gain_heap_test(const std::unordered_set> &nodes, double reward_ = 0.0, + void initialize_gain_heap_test(const std::unordered_set> &nodes, + double reward_ = 0.0, double penalty_ = 0.0) { kl_total::reward = reward_; kl_total::penalty = penalty_; @@ -682,59 +953,64 @@ class kl_total_comm_test : public kl_total_comm &schedule) { - - kl_total::current_schedule.instance = &schedule.getInstance(); + kl_total::current_schedule.instance + = &schedule.getInstance(); kl_total::best_schedule = &schedule; - kl_total::num_nodes = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs = kl_total::current_schedule.instance->numberOfProcessors(); + kl_total::num_nodes + = kl_total::current_schedule.instance->numberOfVertices(); + kl_total::num_procs + = kl_total::current_schedule.instance + ->numberOfProcessors(); kl_total::set_parameters(); kl_total::initialize_datastructures(); } RETURN_STATUS improve_schedule_test_1(BspSchedule &schedule) { - - kl_total::current_schedule.instance = &schedule.getInstance(); + kl_total::current_schedule.instance + = &schedule.getInstance(); kl_total::best_schedule = &schedule; - kl_total::num_nodes = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs = kl_total::current_schedule.instance->numberOfProcessors(); + kl_total::num_nodes + = kl_total::current_schedule.instance->numberOfVertices(); + kl_total::num_procs + = kl_total::current_schedule.instance + ->numberOfProcessors(); kl_total::set_parameters(); kl_total::initialize_datastructures(); bool improvement_found = kl_total::run_local_search_simple(); - - - if (improvement_found) + if (improvement_found) { return RETURN_STATUS::OSP_SUCCESS; - else + } else { return RETURN_STATUS::BEST_FOUND; + } } RETURN_STATUS improve_schedule_test_2(BspSchedule &schedule) { - kl_total::current_schedule.instance = &schedule.getInstance(); kl_total::best_schedule = &schedule; - kl_total::num_nodes = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs = kl_total::current_schedule.instance->numberOfProcessors(); + kl_total::num_nodes + = kl_total::current_schedule.instance->numberOfVertices(); + kl_total::num_procs + = kl_total::current_schedule.instance->numberOfProcessors(); kl_total::set_parameters(); kl_total::initialize_datastructures(); bool improvement_found = kl_total::run_local_search_unlock_delay(); - - - if (improvement_found) + if (improvement_found) { return RETURN_STATUS::OSP_SUCCESS; - else + } else { return RETURN_STATUS::BEST_FOUND; + } } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp index eea392dc..d10c6109 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp @@ -27,589 +27,1061 @@ limitations under the License. #include "kl_total.hpp" namespace osp { -template -class kl_total_cut : public kl_total { +template +class kl_total_cut : public kl_total { protected: double max_edge_weight = 0.0; - virtual void compute_comm_gain(vertex_idx_t node, unsigned current_step, unsigned current_proc, + virtual void compute_comm_gain(vertex_idx_t node, + unsigned current_step, + unsigned current_proc, unsigned new_proc) override { - - if constexpr (kl_total::current_schedule.use_node_communication_costs) { - + if constexpr (kl_total::current_schedule + .use_node_communication_costs) { if (current_proc == new_proc) { - - for (const auto &target : kl_total::current_schedule.instance->getComputationalDag().children(node)) { - - const unsigned &target_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(target); - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) * - kl_total::current_schedule.instance->communicationCosts(new_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) { - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] += loss; - kl_total::node_change_in_costs[node][new_proc][2] -= loss; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][0] -= loss; + for (const auto &target : + kl_total::current_schedule.instance + ->getComputationalDag() + .children(node)) { + const unsigned &target_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(target); + const double loss + = static_cast( + kl_total::current_schedule.instance + ->getComputationalDag() + .vertex_comm_weight(node)) + * kl_total::current_schedule.instance + ->communicationCosts(new_proc, target_proc) + * kl_total::current_schedule.comm_multiplier; + + if (kl_total::current_schedule.vector_schedule + .assignedSuperstep(target) + == current_step) { + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= loss; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][0] + -= loss; } - if ((current_step + 1 == kl_total::current_schedule.vector_schedule.assignedSuperstep(target) && - current_proc != kl_total::current_schedule.vector_schedule.assignedProcessor(target)) || - (current_step == kl_total::current_schedule.vector_schedule.assignedSuperstep(target) && - current_proc == kl_total::current_schedule.vector_schedule.assignedProcessor(target))) { - kl_total::node_gains[node][current_proc][2] -= kl_total::penalty; - - } else if ((current_step == kl_total::current_schedule.vector_schedule.assignedSuperstep(target) && - current_proc != kl_total::current_schedule.vector_schedule.assignedProcessor(target)) || - (current_step - 1 == kl_total::current_schedule.vector_schedule.assignedSuperstep(target) && - current_proc == kl_total::current_schedule.vector_schedule.assignedProcessor(target))) { - - kl_total::node_gains[node][current_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) + kl_total::reward; + if ((current_step + 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(target)) + || (current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(target))) { + kl_total::node_gains[node][current_proc][2] + -= kl_total::penalty; + + } else if ((current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(target)) + || (current_step - 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(target))) { + kl_total::node_gains[node][current_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; } } - for (const auto &source : kl_total::current_schedule.instance->getComputationalDag().parents(node)) { - - const unsigned &source_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(source); - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) * - kl_total::current_schedule.instance->communicationCosts(new_proc, source_proc) * - kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) { - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] += loss; - kl_total::node_change_in_costs[node][new_proc][2] -= loss; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][0] -= loss; + for (const auto &source : + kl_total::current_schedule.instance + ->getComputationalDag() + .parents(node)) { + const unsigned &source_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(source); + const double loss + = static_cast( + kl_total::current_schedule.instance + ->getComputationalDag() + .vertex_comm_weight(source)) + * kl_total::current_schedule.instance + ->communicationCosts(new_proc, source_proc) + * kl_total::current_schedule.comm_multiplier; + + if (kl_total::current_schedule.vector_schedule + .assignedSuperstep(source) + == current_step) { + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= loss; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][0] + -= loss; } - if ((current_step - 1 == kl_total::current_schedule.vector_schedule.assignedSuperstep(source) && - current_proc != kl_total::current_schedule.vector_schedule.assignedProcessor(source)) || - (current_step == kl_total::current_schedule.vector_schedule.assignedSuperstep(source) && - current_proc == kl_total::current_schedule.vector_schedule.assignedProcessor(source))) { - kl_total::node_gains[node][current_proc][0] -= kl_total::penalty; - - } else if ((current_step == kl_total::current_schedule.vector_schedule.assignedSuperstep(source) && - current_proc != kl_total::current_schedule.vector_schedule.assignedProcessor(source)) || - (current_step + 1 == kl_total::current_schedule.vector_schedule.assignedSuperstep(source) && - current_proc == kl_total::current_schedule.vector_schedule.assignedProcessor(source))) { - - kl_total::node_gains[node][current_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) + - kl_total::reward; + if ((current_step - 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(source)) + || (current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(source))) { + kl_total::node_gains[node][current_proc][0] + -= kl_total::penalty; + + } else if ((current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(source)) + || (current_step + 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(source))) { + kl_total::node_gains[node][current_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; } } } else { - // current_proc != new_proc - for (const auto &target : kl_total::current_schedule.instance->getComputationalDag().children(node)) { - - const unsigned &target_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(target); + for (const auto &target : + kl_total::current_schedule.instance + ->getComputationalDag() + .children(node)) { + const unsigned &target_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(target); if (target_proc == current_proc) { - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) { - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) * - kl_total::current_schedule.instance->communicationCosts(new_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][1] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][1] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step) { + const double loss + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + * kl_total::current_schedule + .instance->communicationCosts(new_proc, target_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][1] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][1] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; } - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) { - - kl_total::node_gains[node][new_proc][1] -= kl_total::penalty; - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step) { + kl_total::node_gains[node][new_proc][1] + -= kl_total::penalty; + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; } } else if (target_proc == new_proc) { - - const double gain = - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) { - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step + 1) { - kl_total::node_gains[node][new_proc][2] += gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step - 1) { - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_change_in_costs[node][new_proc][0] -= gain; + const double gain + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, target_proc) + * kl_total::current_schedule + .comm_multiplier; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; } - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) { - - kl_total::node_gains[node][new_proc][1] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) + - kl_total::reward; - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) + - kl_total::reward; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) < current_step) { - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) + - kl_total::reward; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; + + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + < current_step) { + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; } } else { - assert(target_proc != current_proc && target_proc != new_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->communicationCosts(new_proc, target_proc) - - kl_total::current_schedule.instance->communicationCosts(current_proc, target_proc)) * - kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target) == current_step) { - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(node)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->communicationCosts(new_proc, target_proc) + - kl_total::current_schedule + .instance->communicationCosts(current_proc, target_proc)) + * kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target) + == current_step) { + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(node)) + + kl_total::reward; } } } - for (const auto &source : kl_total::current_schedule.instance->getComputationalDag().parents(node)) { - - const unsigned &source_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(source); + for (const auto &source : + kl_total::current_schedule.instance + ->getComputationalDag() + .parents(node)) { + const unsigned &source_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(source); if (source_proc == current_proc) { - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) { - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, new_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][1] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][1] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step) { + const double loss + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, new_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][1] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][1] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; } - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; - kl_total::node_gains[node][new_proc][1] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; + kl_total::node_gains[node][new_proc][1] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; } } else if (source_proc == new_proc) { - assert(source_proc != current_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, new_proc) * - kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) { - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step - 1) { - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step + 1) { - kl_total::node_gains[node][new_proc][2] += gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; + const double gain + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, new_proc) + * kl_total::current_schedule + .comm_multiplier; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; } - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) { - - kl_total::node_gains[node][new_proc][1] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) + - kl_total::reward; - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) + - kl_total::reward; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) + - kl_total::reward; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; + + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; } } else { - assert(source_proc != current_proc && source_proc != new_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->communicationCosts(new_proc, source_proc) - - kl_total::current_schedule.instance->communicationCosts(current_proc, source_proc)) * - kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source) == current_step) { - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->communicationCosts(new_proc, source_proc) + - kl_total::current_schedule + .instance->communicationCosts(current_proc, source_proc)) + * kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source) + == current_step) { + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .vertex_comm_weight(source)) + + kl_total::reward; } } } } } else { - if (current_proc == new_proc) { - - for (const auto &out_edge : out_edges(node, kl_total::current_schedule.instance->getComputationalDag())) { - const auto &target_v = target(out_edge, kl_total::current_schedule.instance->getComputationalDag()); - const unsigned &target_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(target_v); - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) * - kl_total::current_schedule.instance->communicationCosts(new_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) { - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] += loss; - kl_total::node_change_in_costs[node][new_proc][2] -= loss; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][0] -= loss; + for (const auto &out_edge : + out_edges(node, + kl_total::current_schedule.instance + ->getComputationalDag())) { + const auto &target_v + = target(out_edge, + kl_total::current_schedule + .instance->getComputationalDag()); + const unsigned &target_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v); + + const double loss + = static_cast( + kl_total::current_schedule.instance + ->getComputationalDag() + .edge_comm_weight(out_edge)) + * kl_total::current_schedule.instance + ->communicationCosts(new_proc, target_proc) + * kl_total::current_schedule.comm_multiplier; + + if (kl_total::current_schedule.vector_schedule + .assignedSuperstep(target_v) + == current_step) { + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= loss; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][0] + -= loss; } - if ((current_step + 1 == kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) && - current_proc != kl_total::current_schedule.vector_schedule.assignedProcessor(target_v)) || - (current_step == kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) && - current_proc == kl_total::current_schedule.vector_schedule.assignedProcessor(target_v))) { - - kl_total::node_gains[node][current_proc][2] -= kl_total::penalty; - - } else if ((current_step == kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) && - current_proc != kl_total::current_schedule.vector_schedule.assignedProcessor(target_v)) || - (current_step - 1 == kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) && - current_proc == kl_total::current_schedule.vector_schedule.assignedProcessor(target_v))) { - - kl_total::node_gains[node][current_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) + - kl_total::reward; + if ((current_step + 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v)) + || (current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v))) { + kl_total::node_gains[node][current_proc][2] + -= kl_total::penalty; + + } else if ((current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v)) + || (current_step - 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v))) { + kl_total::node_gains[node][current_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; } } - for (const auto &in_edge : in_edges(node, kl_total::current_schedule.instance->getComputationalDag())) { - const auto &source_v = source(in_edge, kl_total::current_schedule.instance->getComputationalDag()); - const unsigned &source_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(source_v); - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) * - kl_total::current_schedule.instance->communicationCosts(new_proc, source_proc) * - kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) { - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] += loss; - kl_total::node_change_in_costs[node][new_proc][2] -= loss; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][0] -= loss; + for (const auto &in_edge : + in_edges(node, + kl_total::current_schedule.instance + ->getComputationalDag())) { + const auto &source_v + = source(in_edge, + kl_total::current_schedule + .instance->getComputationalDag()); + const unsigned &source_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v); + + const double loss + = static_cast( + kl_total::current_schedule.instance + ->getComputationalDag() + .edge_comm_weight(in_edge)) + * kl_total::current_schedule.instance + ->communicationCosts(new_proc, source_proc) + * kl_total::current_schedule.comm_multiplier; + + if (kl_total::current_schedule.vector_schedule + .assignedSuperstep(source_v) + == current_step) { + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= loss; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][0] + -= loss; } - if ((current_step - 1 == kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) && - current_proc != kl_total::current_schedule.vector_schedule.assignedProcessor(source_v)) || - (current_step == kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) && - current_proc == kl_total::current_schedule.vector_schedule.assignedProcessor(source_v))) { - - kl_total::node_gains[node][current_proc][0] -= kl_total::penalty; - - } else if ((current_step == kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) && - current_proc != kl_total::current_schedule.vector_schedule.assignedProcessor(source_v)) || - (current_step + 1 == kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) && - current_proc == kl_total::current_schedule.vector_schedule.assignedProcessor(source_v))) { - - kl_total::node_gains[node][current_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) + kl_total::reward; + if ((current_step - 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v)) + || (current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v))) { + kl_total::node_gains[node][current_proc][0] + -= kl_total::penalty; + + } else if ((current_step + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + && current_proc + != kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v)) + || (current_step + 1 + == kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + && current_proc + == kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v))) { + kl_total::node_gains[node][current_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; } } } else { - // current_proc != new_proc - for (const auto &out_edge : out_edges(node, kl_total::current_schedule.instance->getComputationalDag())) { - - const auto &target_v = target(out_edge, kl_total::current_schedule.instance->getComputationalDag()); - const unsigned &target_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(target_v); + for (const auto &out_edge : + out_edges(node, + kl_total::current_schedule.instance + ->getComputationalDag())) { + const auto &target_v + = target(out_edge, + kl_total::current_schedule + .instance->getComputationalDag()); + const unsigned &target_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v); if (target_proc == current_proc) { - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) { - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) * - kl_total::current_schedule.instance->communicationCosts(new_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][1] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][1] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step) { + const double loss + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + * kl_total::current_schedule + .instance->communicationCosts(new_proc, target_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][1] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][1] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; } - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) { - - kl_total::node_gains[node][new_proc][1] -= kl_total::penalty; - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step) { + kl_total::node_gains[node][new_proc][1] + -= kl_total::penalty; + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; } } else if (target_proc == new_proc) { - - const double gain = - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, target_proc) * - kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) { - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step + 1) { - kl_total::node_gains[node][new_proc][2] += gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step - 1) { - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_change_in_costs[node][new_proc][0] -= gain; + const double gain + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, target_proc) + * kl_total::current_schedule + .comm_multiplier; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; } - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) { - - kl_total::node_gains[node][new_proc][1] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) + - kl_total::reward; - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) + - kl_total::reward; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) + - kl_total::reward; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; } } else { - assert(target_proc != current_proc && target_proc != new_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->communicationCosts(new_proc, target_proc) - - kl_total::current_schedule.instance->communicationCosts(current_proc, target_proc)) * - kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] -= kl_total::penalty; - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v) == current_step) { - - kl_total::node_gains[node][new_proc][0] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(out_edge)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->communicationCosts(new_proc, target_proc) + - kl_total::current_schedule + .instance->communicationCosts(current_proc, target_proc)) + * kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + -= kl_total::penalty; + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v) + == current_step) { + kl_total::node_gains[node][new_proc][0] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(out_edge)) + + kl_total::reward; } } } - for (const auto &in_edge : in_edges(node, kl_total::current_schedule.instance->getComputationalDag())) { - const auto &source_v = source(in_edge, kl_total::current_schedule.instance->getComputationalDag()); - - const unsigned &source_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(source_v); + for (const auto &in_edge : + in_edges(node, + kl_total::current_schedule.instance + ->getComputationalDag())) { + const auto &source_v + = source(in_edge, + kl_total::current_schedule + .instance->getComputationalDag()); + + const unsigned &source_proc + = kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v); if (source_proc == current_proc) { - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) { - - const double loss = - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, new_proc) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] -= loss; - kl_total::node_gains[node][new_proc][1] -= loss; - kl_total::node_gains[node][new_proc][2] -= loss; - - kl_total::node_change_in_costs[node][new_proc][0] += loss; - kl_total::node_change_in_costs[node][new_proc][1] += loss; - kl_total::node_change_in_costs[node][new_proc][2] += loss; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step) { + const double loss + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, new_proc) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + -= loss; + kl_total::node_gains[node][new_proc][1] + -= loss; + kl_total::node_gains[node][new_proc][2] + -= loss; + + kl_total::node_change_in_costs[node] + [new_proc][0] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][1] + += loss; + kl_total::node_change_in_costs[node] + [new_proc][2] + += loss; } - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; - kl_total::node_gains[node][new_proc][1] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; + kl_total::node_gains[node][new_proc][1] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; } } else if (source_proc == new_proc) { - assert(source_proc != current_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) * - kl_total::current_schedule.instance->communicationCosts(current_proc, new_proc) * - kl_total::current_schedule.comm_multiplier; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) { - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step - 1) { - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step + 1) { - kl_total::node_gains[node][new_proc][2] += gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; + const double gain + = static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + * kl_total::current_schedule + .instance->communicationCosts(current_proc, new_proc) + * kl_total::current_schedule + .comm_multiplier; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; } - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) { - - kl_total::node_gains[node][new_proc][1] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) + - kl_total::reward; - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) + - kl_total::reward; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step + 1) { - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) + - kl_total::reward; + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step) { + kl_total::node_gains[node][new_proc][1] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; + + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step + 1) { + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; } } else { - assert(source_proc != current_proc && source_proc != new_proc); - const double gain = - static_cast(kl_total::current_schedule.instance->communicationCosts(new_proc, source_proc) - - kl_total::current_schedule.instance->communicationCosts(current_proc, source_proc)) * - kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge) * - kl_total::current_schedule.comm_multiplier; - - kl_total::node_gains[node][new_proc][0] += gain; - kl_total::node_gains[node][new_proc][1] += gain; - kl_total::node_gains[node][new_proc][2] += gain; - - kl_total::node_change_in_costs[node][new_proc][0] -= gain; - kl_total::node_change_in_costs[node][new_proc][1] -= gain; - kl_total::node_change_in_costs[node][new_proc][2] -= gain; - - if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step - 1) { - - kl_total::node_gains[node][new_proc][0] -= kl_total::penalty; - - } else if (kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v) == current_step) { - - kl_total::node_gains[node][new_proc][2] += - static_cast(kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(in_edge)) + - kl_total::reward; + const double gain + = static_cast( + kl_total::current_schedule + .instance->communicationCosts(new_proc, source_proc) + - kl_total::current_schedule + .instance->communicationCosts(current_proc, source_proc)) + * kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge) + * kl_total::current_schedule + .comm_multiplier; + + kl_total::node_gains[node][new_proc][0] + += gain; + kl_total::node_gains[node][new_proc][1] + += gain; + kl_total::node_gains[node][new_proc][2] + += gain; + + kl_total::node_change_in_costs[node] + [new_proc][0] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][1] + -= gain; + kl_total::node_change_in_costs[node] + [new_proc][2] + -= gain; + + if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step - 1) { + kl_total::node_gains[node][new_proc][0] + -= kl_total::penalty; + + } else if (kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v) + == current_step) { + kl_total::node_gains[node][new_proc][2] + += static_cast( + kl_total::current_schedule + .instance->getComputationalDag() + .edge_comm_weight(in_edge)) + + kl_total::reward; } } } @@ -618,52 +1090,75 @@ class kl_total_cut : public kl_total::current_schedule.num_steps(); step++) { - work_costs += kl_total::current_schedule.step_max_work[step]; + for (unsigned step = 0; + step < kl_total::current_schedule.num_steps(); + step++) { + work_costs + += kl_total::current_schedule.step_max_work[step]; } double comm_costs = 0; - for (const auto &edge : edges(kl_total::current_schedule.instance->getComputationalDag())) { - - const vertex_idx_t &source_v = source(edge, kl_total::current_schedule.instance->getComputationalDag()); - const vertex_idx_t &target_v = target(edge, kl_total::current_schedule.instance->getComputationalDag()); - const unsigned &source_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(source_v); - const unsigned &target_proc = kl_total::current_schedule.vector_schedule.assignedProcessor(target_v); - const unsigned &source_step = kl_total::current_schedule.vector_schedule.assignedSuperstep(source_v); - const unsigned &target_step = kl_total::current_schedule.vector_schedule.assignedSuperstep(target_v); + for (const auto &edge : edges(kl_total::current_schedule + .instance->getComputationalDag())) { + const vertex_idx_t &source_v + = source(edge, + kl_total::current_schedule.instance + ->getComputationalDag()); + const vertex_idx_t &target_v + = target(edge, + kl_total::current_schedule.instance + ->getComputationalDag()); + const unsigned &source_proc = kl_total::current_schedule + .vector_schedule.assignedProcessor(source_v); + const unsigned &target_proc = kl_total::current_schedule + .vector_schedule.assignedProcessor(target_v); + const unsigned &source_step = kl_total::current_schedule + .vector_schedule.assignedSuperstep(source_v); + const unsigned &target_step = kl_total::current_schedule + .vector_schedule.assignedSuperstep(target_v); if (source_proc != target_proc || source_step != target_step) { - - if constexpr (kl_total::current_schedule.use_node_communication_costs) { - comm_costs += kl_total::current_schedule.instance->getComputationalDag().vertex_comm_weight(source_v) * - kl_total::current_schedule.instance->communicationCosts(source_proc, target_proc); + if constexpr (kl_total::current_schedule + .use_node_communication_costs) { + comm_costs + += kl_total::current_schedule.instance + ->getComputationalDag() + .vertex_comm_weight(source_v) + * kl_total::current_schedule.instance + ->communicationCosts(source_proc, target_proc); } else { - comm_costs += kl_total::current_schedule.instance->getComputationalDag().edge_comm_weight(edge) * - kl_total::current_schedule.instance->communicationCosts(source_proc, target_proc); + comm_costs + += kl_total::current_schedule.instance + ->getComputationalDag() + .edge_comm_weight(edge) + * kl_total::current_schedule.instance + ->communicationCosts(source_proc, target_proc); } } } - kl_total::current_schedule.current_cost = - work_costs + comm_costs * kl_total::current_schedule.comm_multiplier + - (kl_total::current_schedule.num_steps() - 1) * static_cast(kl_total::current_schedule.instance->synchronisationCosts()); + kl_total::current_schedule.current_cost + = work_costs + + comm_costs + * kl_total::current_schedule.comm_multiplier + + (kl_total::current_schedule.num_steps() - 1) + * static_cast(kl_total::current_schedule + .instance->synchronisationCosts()); - return kl_total::current_schedule.current_cost; + return kl_total::current_schedule.current_cost; } public: - kl_total_cut() : kl_total() {} + kl_total_cut() : kl_total() {} virtual ~kl_total_cut() = default; virtual std::string getScheduleName() const override { return "KLTotalCut"; } }; -template +template class kl_total_cut_test : public kl_total_cut { - public: kl_total_cut_test() : kl_total_cut() {} @@ -671,13 +1166,18 @@ class kl_total_cut_test : public kl_total_cut virtual std::string getScheduleName() const override { return "KLTotalCutTest"; } - kl_current_schedule_total &get_current_schedule() { return kl_total::current_schedule; } + kl_current_schedule_total &get_current_schedule() { + return kl_total::current_schedule; + } auto &get_node_gains() { return kl_total::node_gains; } + auto &get_node_change_in_costs() { return kl_total::node_change_in_costs; } + auto &get_max_gain_heap() { return kl_total::max_gain_heap; } - void initialize_gain_heap_test(const std::unordered_set> &nodes, double reward_ = 0.0, + void initialize_gain_heap_test(const std::unordered_set> &nodes, + double reward_ = 0.0, double penalty_ = 0.0) { kl_total::reward = reward_; kl_total::penalty = penalty_; @@ -686,55 +1186,60 @@ class kl_total_cut_test : public kl_total_cut } void test_setup_schedule(BspSchedule &schedule) { - kl_total::current_schedule.instance = &schedule.getInstance(); kl_total::best_schedule = &schedule; - kl_total::num_nodes = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs = kl_total::current_schedule.instance->numberOfProcessors(); + kl_total::num_nodes + = kl_total::current_schedule.instance->numberOfVertices(); + kl_total::num_procs + = kl_total::current_schedule.instance->numberOfProcessors(); kl_total::set_parameters(); kl_total::initialize_datastructures(); } RETURN_STATUS improve_schedule_test_1(BspSchedule &schedule) { - kl_total::current_schedule.instance = &schedule.getInstance(); kl_total::best_schedule = &schedule; - kl_total::num_nodes = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs = kl_total::current_schedule.instance->numberOfProcessors(); + kl_total::num_nodes + = kl_total::current_schedule.instance->numberOfVertices(); + kl_total::num_procs + = kl_total::current_schedule.instance->numberOfProcessors(); kl_total::set_parameters(); kl_total::initialize_datastructures(); bool improvement_found = kl_total::run_local_search_simple(); - if (improvement_found) + if (improvement_found) { return RETURN_STATUS::OSP_SUCCESS; - else + } else { return RETURN_STATUS::BEST_FOUND; + } } RETURN_STATUS improve_schedule_test_2(BspSchedule &schedule) { - kl_total::current_schedule.instance = &schedule.getInstance(); kl_total::best_schedule = &schedule; - kl_total::num_nodes = kl_total::current_schedule.instance->numberOfVertices(); - kl_total::num_procs = kl_total::current_schedule.instance->numberOfProcessors(); + kl_total::num_nodes + = kl_total::current_schedule.instance->numberOfVertices(); + kl_total::num_procs + = kl_total::current_schedule.instance->numberOfProcessors(); kl_total::set_parameters(); kl_total::initialize_datastructures(); bool improvement_found = kl_total::run_local_search_unlock_delay(); - if (improvement_found) + if (improvement_found) { return RETURN_STATUS::OSP_SUCCESS; - else + } else { return RETURN_STATUS::BEST_FOUND; + } } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp index 8fb1ceff..07537551 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/comm_cost_policies.hpp @@ -27,17 +27,25 @@ namespace osp { struct EagerCommCostPolicy { using ValueType = unsigned; - template - static inline void attribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step, - const unsigned u_proc, const unsigned v_proc, const unsigned v_step, + template + static inline void attribute_communication(DS &ds, + const comm_weight_t &cost, + const unsigned u_step, + const unsigned u_proc, + const unsigned v_proc, + const unsigned v_step, const ValueType &val) { ds.step_proc_receive(u_step, v_proc) += cost; ds.step_proc_send(u_step, u_proc) += cost; } - template - static inline void unattribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step, - const unsigned u_proc, const unsigned v_proc, const unsigned v_step, + template + static inline void unattribute_communication(DS &ds, + const comm_weight_t &cost, + const unsigned u_step, + const unsigned u_proc, + const unsigned v_proc, + const unsigned v_step, const ValueType &val) { ds.step_proc_receive(u_step, v_proc) -= cost; ds.step_proc_send(u_step, u_proc) -= cost; @@ -59,9 +67,13 @@ struct EagerCommCostPolicy { static inline bool is_single_entry(const ValueType &val) { return val == 1; } - template - static inline void calculate_delta_remove(const ValueType &val, unsigned child_step, unsigned parent_step, - unsigned parent_proc, unsigned child_proc, comm_weight_t cost, + template + static inline void calculate_delta_remove(const ValueType &val, + unsigned child_step, + unsigned parent_step, + unsigned parent_proc, + unsigned child_proc, + comm_weight_t cost, DeltaTracker &dt) { if (val == 1) { dt.add(true, parent_step, child_proc, -cost); @@ -69,9 +81,13 @@ struct EagerCommCostPolicy { } } - template - static inline void calculate_delta_add(const ValueType &val, unsigned child_step, unsigned parent_step, - unsigned parent_proc, unsigned child_proc, comm_weight_t cost, + template + static inline void calculate_delta_add(const ValueType &val, + unsigned child_step, + unsigned parent_step, + unsigned parent_proc, + unsigned child_proc, + comm_weight_t cost, DeltaTracker &dt) { if (val == 0) { dt.add(true, parent_step, child_proc, cost); @@ -79,9 +95,9 @@ struct EagerCommCostPolicy { } } - template - static inline void calculate_delta_outgoing(const ValueType &val, unsigned node_step, unsigned node_proc, - unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { + template + static inline void calculate_delta_outgoing( + const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { if (val > 0) { comm_weight_t total_cost = cost * val; dt.add(true, node_step, child_proc, total_cost); @@ -93,15 +109,20 @@ struct EagerCommCostPolicy { struct LazyCommCostPolicy { using ValueType = std::vector; - template - static inline void attribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step, - const unsigned u_proc, const unsigned v_proc, const unsigned v_step, + template + static inline void attribute_communication(DS &ds, + const comm_weight_t &cost, + const unsigned u_step, + const unsigned u_proc, + const unsigned v_proc, + const unsigned v_step, const ValueType &val) { // val contains v_step (already added). // Check if v_step is the new minimum. unsigned min_step = std::numeric_limits::max(); - for (unsigned s : val) + for (unsigned s : val) { min_step = std::min(min_step, s); + } if (min_step == v_step) { // Check if it was strictly smaller than previous min. @@ -123,9 +144,13 @@ struct LazyCommCostPolicy { } } - template - static inline void unattribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step, - const unsigned u_proc, const unsigned v_proc, const unsigned v_step, + template + static inline void unattribute_communication(DS &ds, + const comm_weight_t &cost, + const unsigned u_step, + const unsigned u_proc, + const unsigned v_proc, + const unsigned v_step, const ValueType &val) { // val is state AFTER removal. @@ -138,8 +163,9 @@ struct LazyCommCostPolicy { } else { // Check if v_step was the unique minimum. unsigned new_min = val[0]; - for (unsigned s : val) + for (unsigned s : val) { new_min = std::min(new_min, s); + } if (v_step < new_min) { // v_step was the unique minimum. @@ -157,11 +183,13 @@ struct LazyCommCostPolicy { static inline bool add_child(ValueType &val, unsigned step) { val.push_back(step); - if (val.size() == 1) + if (val.size() == 1) { return true; + } unsigned min_s = val[0]; - for (unsigned s : val) + for (unsigned s : val) { min_s = std::min(min_s, s); + } return step == min_s; } @@ -173,8 +201,9 @@ struct LazyCommCostPolicy { return true; } unsigned new_min = val[0]; - for (unsigned s : val) + for (unsigned s : val) { new_min = std::min(new_min, s); + } bool res = step < new_min; return res; } @@ -187,21 +216,29 @@ struct LazyCommCostPolicy { static inline bool is_single_entry(const ValueType &val) { return val.size() == 1; } - template - static inline void calculate_delta_remove(const ValueType &val, unsigned child_step, unsigned parent_step, - unsigned parent_proc, unsigned child_proc, comm_weight_t cost, + template + static inline void calculate_delta_remove(const ValueType &val, + unsigned child_step, + unsigned parent_step, + unsigned parent_proc, + unsigned child_proc, + comm_weight_t cost, DeltaTracker &dt) { - if (val.empty()) + if (val.empty()) { return; + } unsigned min_s = val[0]; - for (unsigned s : val) + for (unsigned s : val) { min_s = std::min(min_s, s); + } if (child_step == min_s) { int count = 0; - for (unsigned s : val) - if (s == min_s) + for (unsigned s : val) { + if (s == min_s) { count++; + } + } if (count == 1) { if (min_s > 0) { @@ -211,8 +248,9 @@ struct LazyCommCostPolicy { if (val.size() > 1) { unsigned next_min = std::numeric_limits::max(); for (unsigned s : val) { - if (s != min_s) + if (s != min_s) { next_min = std::min(next_min, s); + } } if (next_min != std::numeric_limits::max() && next_min > 0) { dt.add(true, next_min - 1, child_proc, cost); @@ -223,9 +261,13 @@ struct LazyCommCostPolicy { } } - template - static inline void calculate_delta_add(const ValueType &val, unsigned child_step, unsigned parent_step, - unsigned parent_proc, unsigned child_proc, comm_weight_t cost, + template + static inline void calculate_delta_add(const ValueType &val, + unsigned child_step, + unsigned parent_step, + unsigned parent_proc, + unsigned child_proc, + comm_weight_t cost, DeltaTracker &dt) { if (val.empty()) { if (child_step > 0) { @@ -234,8 +276,9 @@ struct LazyCommCostPolicy { } } else { unsigned min_s = val[0]; - for (unsigned s : val) + for (unsigned s : val) { min_s = std::min(min_s, s); + } if (child_step < min_s) { if (min_s > 0) { @@ -250,9 +293,9 @@ struct LazyCommCostPolicy { } } - template - static inline void calculate_delta_outgoing(const ValueType &val, unsigned node_step, unsigned node_proc, - unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { + template + static inline void calculate_delta_outgoing( + const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { for (unsigned s : val) { if (s > 0) { dt.add(true, s - 1, child_proc, cost); @@ -265,20 +308,26 @@ struct LazyCommCostPolicy { struct BufferedCommCostPolicy { using ValueType = std::vector; - template - static inline void attribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step, - const unsigned u_proc, const unsigned v_proc, const unsigned v_step, + template + static inline void attribute_communication(DS &ds, + const comm_weight_t &cost, + const unsigned u_step, + const unsigned u_proc, + const unsigned v_proc, + const unsigned v_step, const ValueType &val) { // Buffered: Send at u_step, Receive at v_step - 1. unsigned min_step = std::numeric_limits::max(); - for (unsigned s : val) + for (unsigned s : val) { min_step = std::min(min_step, s); + } if (min_step == v_step) { unsigned prev_min = std::numeric_limits::max(); - for (size_t i = 0; i < val.size() - 1; ++i) + for (size_t i = 0; i < val.size() - 1; ++i) { prev_min = std::min(prev_min, val[i]); + } if (v_step < prev_min) { if (prev_min != std::numeric_limits::max() && prev_min > 0) { @@ -297,23 +346,28 @@ struct BufferedCommCostPolicy { } } - template - static inline void unattribute_communication(DS &ds, const comm_weight_t &cost, const unsigned u_step, - const unsigned u_proc, const unsigned v_proc, const unsigned v_step, + template + static inline void unattribute_communication(DS &ds, + const comm_weight_t &cost, + const unsigned u_step, + const unsigned u_proc, + const unsigned v_proc, + const unsigned v_step, const ValueType &val) { // val is state AFTER removal. if (val.empty()) { // Removed last child. - ds.step_proc_send(u_step, u_proc) -= cost; // Send side + ds.step_proc_send(u_step, u_proc) -= cost; // Send side if (v_step > 0) { - ds.step_proc_receive(v_step - 1, v_proc) -= cost; // Recv side + ds.step_proc_receive(v_step - 1, v_proc) -= cost; // Recv side } } else { // Check if v_step was unique minimum for Recv side. unsigned new_min = val[0]; - for (unsigned s : val) + for (unsigned s : val) { new_min = std::min(new_min, s); + } if (v_step < new_min) { if (v_step > 0) { @@ -329,76 +383,96 @@ struct BufferedCommCostPolicy { static inline bool add_child(ValueType &val, unsigned step) { val.push_back(step); - if (val.size() == 1) - return true; // Need update for send side + if (val.size() == 1) { + return true; // Need update for send side + } unsigned min_s = val[0]; - for (unsigned s : val) + for (unsigned s : val) { min_s = std::min(min_s, s); - return step == min_s; // Need update for recv side + } + return step == min_s; // Need update for recv side } static inline bool remove_child(ValueType &val, unsigned step) { auto it = std::find(val.begin(), val.end(), step); if (it != val.end()) { val.erase(it); - if (val.empty()) - return true; // Need update for send side + if (val.empty()) { + return true; // Need update for send side + } unsigned new_min = val[0]; - for (unsigned s : val) + for (unsigned s : val) { new_min = std::min(new_min, s); - return step < new_min; // Need update for recv side + } + return step < new_min; // Need update for recv side } return false; } static inline void reset(ValueType &val) { val.clear(); } + static inline bool has_entry(const ValueType &val) { return !val.empty(); } + static inline bool is_single_entry(const ValueType &val) { return val.size() == 1; } - template - static inline void calculate_delta_remove(const ValueType &val, unsigned child_step, unsigned parent_step, - unsigned parent_proc, unsigned child_proc, comm_weight_t cost, + template + static inline void calculate_delta_remove(const ValueType &val, + unsigned child_step, + unsigned parent_step, + unsigned parent_proc, + unsigned child_proc, + comm_weight_t cost, DeltaTracker &dt) { // Lazy: Send and Recv are both at min(child_steps) - 1. - if (val.empty()) + if (val.empty()) { return; + } unsigned min_s = val[0]; - for (unsigned s : val) + for (unsigned s : val) { min_s = std::min(min_s, s); + } if (child_step == min_s) { int count = 0; - for (unsigned s : val) - if (s == min_s) + for (unsigned s : val) { + if (s == min_s) { count++; + } + } if (count == 1) { // Unique min being removed. if (min_s > 0) { - dt.add(true, min_s - 1, child_proc, -cost); // Remove Recv - dt.add(false, min_s - 1, parent_proc, -cost); // Remove Send + dt.add(true, min_s - 1, child_proc, -cost); // Remove Recv + dt.add(false, min_s - 1, parent_proc, -cost); // Remove Send } if (val.size() > 1) { unsigned next_min = std::numeric_limits::max(); - for (unsigned s : val) - if (s != min_s) + for (unsigned s : val) { + if (s != min_s) { next_min = std::min(next_min, s); + } + } if (next_min != std::numeric_limits::max() && next_min > 0) { - dt.add(true, next_min - 1, child_proc, cost); // Add Recv at new min - dt.add(false, next_min - 1, parent_proc, cost); // Add Send at new min + dt.add(true, next_min - 1, child_proc, cost); // Add Recv at new min + dt.add(false, next_min - 1, parent_proc, cost); // Add Send at new min } } } } } - template - static inline void calculate_delta_add(const ValueType &val, unsigned child_step, unsigned parent_step, - unsigned parent_proc, unsigned child_proc, comm_weight_t cost, + template + static inline void calculate_delta_add(const ValueType &val, + unsigned child_step, + unsigned parent_step, + unsigned parent_proc, + unsigned child_proc, + comm_weight_t cost, DeltaTracker &dt) { // Lazy: Send and Recv are both at min(child_steps) - 1. @@ -410,26 +484,27 @@ struct BufferedCommCostPolicy { } } else { unsigned min_s = val[0]; - for (unsigned s : val) + for (unsigned s : val) { min_s = std::min(min_s, s); + } if (child_step < min_s) { // New global minimum. if (min_s > 0) { - dt.add(true, min_s - 1, child_proc, -cost); // Remove old Recv - dt.add(false, min_s - 1, parent_proc, -cost); // Remove old Send + dt.add(true, min_s - 1, child_proc, -cost); // Remove old Recv + dt.add(false, min_s - 1, parent_proc, -cost); // Remove old Send } if (child_step > 0) { - dt.add(true, child_step - 1, child_proc, cost); // Add new Recv - dt.add(false, child_step - 1, parent_proc, cost); // Add new Send + dt.add(true, child_step - 1, child_proc, cost); // Add new Recv + dt.add(false, child_step - 1, parent_proc, cost); // Add new Send } } } } - template - static inline void calculate_delta_outgoing(const ValueType &val, unsigned node_step, unsigned node_proc, - unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { + template + static inline void calculate_delta_outgoing( + const ValueType &val, unsigned node_step, unsigned node_proc, unsigned child_proc, comm_weight_t cost, DeltaTracker &dt) { // Buffered Outgoing (Node -> Children) // Node is parent (sender). Pays at node_step. // Children are receivers. Pay at child_step - 1. @@ -444,8 +519,9 @@ struct BufferedCommCostPolicy { // But we only pay at min(val) - 1. if (!val.empty()) { unsigned min_s = val[0]; - for (unsigned s : val) + for (unsigned s : val) { min_s = std::min(min_s, s); + } if (min_s > 0) { dt.add(true, min_s - 1, child_proc, cost); @@ -454,4 +530,4 @@ struct BufferedCommCostPolicy { } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp index 623d51d8..e86baada 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/generic_lambda_container.hpp @@ -24,12 +24,12 @@ limitations under the License. namespace osp { -template +template struct DefaultHasEntry { static inline bool has_entry(const T &val) { return val != 0; } }; -template +template struct DefaultHasEntry> { static inline bool has_entry(const std::vector &val) { return !val.empty(); } }; @@ -40,9 +40,8 @@ struct DefaultHasEntry> { * This structure tracks information about children assigned to each processor. * It uses a 2D vector for dense data. */ -template> +template > struct generic_lambda_vector_container { - /** * @brief Range adapter for iterating over non-zero/non-empty processor entries. */ @@ -82,12 +81,14 @@ struct generic_lambda_vector_container { value_type operator*() const { return std::make_pair(index_, vec_[index_]); } bool operator==(const lambda_vector_iterator &other) const { return index_ == other.index_; } + bool operator!=(const lambda_vector_iterator &other) const { return !(*this == other); } }; lambda_vector_range(const std::vector &vec) : vec_(vec) {} lambda_vector_iterator begin() { return lambda_vector_iterator(vec_); } + lambda_vector_iterator end() { return lambda_vector_iterator(vec_, static_cast(vec_.size())); } }; @@ -110,15 +111,11 @@ struct generic_lambda_vector_container { return HasEntry::has_entry(node_lambda_vec[node][proc]); } - inline ValueType &get_proc_entry(const vertex_idx_t node, const unsigned proc) { - return node_lambda_vec[node][proc]; - } + inline ValueType &get_proc_entry(const vertex_idx_t node, const unsigned proc) { return node_lambda_vec[node][proc]; } - inline ValueType get_proc_entry(const vertex_idx_t node, const unsigned proc) const { - return node_lambda_vec[node][proc]; - } + inline ValueType get_proc_entry(const vertex_idx_t node, const unsigned proc) const { return node_lambda_vec[node][proc]; } inline auto iterate_proc_entries(const vertex_idx_t node) { return lambda_vector_range(node_lambda_vec[node]); } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp index 2cf0c631..7fd3693f 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_bsp_comm_cost.hpp @@ -18,20 +18,21 @@ limitations under the License. #pragma once +#include + #include "../kl_active_schedule.hpp" #include "../kl_improver.hpp" #include "max_comm_datastructure.hpp" -#include namespace osp { // A lightweight helper to track deltas without hash maps or repeated allocations. // Uses a dense vector for O(1) lookups and a sparse list for fast iteration/clearing. -template +template struct FastDeltaTracker { - std::vector dense_vals; // Size: num_procs - std::vector dirty_procs; // List of modified indices - std::vector proc_dirty_index; // Map proc -> index in dirty_procs (num_procs if not dirty) + std::vector dense_vals; // Size: num_procs + std::vector dirty_procs; // List of modified indices + std::vector proc_dirty_index; // Map proc -> index in dirty_procs (num_procs if not dirty) unsigned num_procs = 0; void initialize(unsigned n_procs) { @@ -44,8 +45,9 @@ struct FastDeltaTracker { } inline void add(unsigned proc, comm_weight_t val) { - if (val == 0) + if (val == 0) { return; + } // If currently 0, it is becoming dirty if (dense_vals[proc] == 0) { @@ -71,8 +73,9 @@ struct FastDeltaTracker { } inline comm_weight_t get(unsigned proc) const { - if (proc < dense_vals.size()) + if (proc < dense_vals.size()) { return dense_vals[proc]; + } return 0; } @@ -85,9 +88,8 @@ struct FastDeltaTracker { } }; -template +template struct kl_bsp_comm_cost_function { - using VertexType = vertex_idx_t; using kl_move = kl_move_struct; using kl_gain_update_info = kl_update_info; @@ -104,22 +106,24 @@ struct kl_bsp_comm_cost_function { max_comm_datastructure> comm_ds; inline cost_t get_comm_multiplier() { return 1; } + inline cost_t get_max_comm_weight() { return comm_ds.max_comm_weight; } + inline cost_t get_max_comm_weight_multiplied() { return comm_ds.max_comm_weight; } + inline const std::string name() const { return "bsp_comm"; } - inline bool is_compatible(VertexType node, unsigned proc) { - return active_schedule->getInstance().isCompatible(node, proc); - } + + inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); } + inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0; } + inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { - return (node_step + window_size <= end_step) ? window_range - : window_range - (node_step + window_size - end_step); + return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step); } - void initialize(kl_active_schedule &sched, - CompatibleProcessorRange &p_range) { + void initialize(kl_active_schedule &sched, CompatibleProcessorRange &p_range) { active_schedule = &sched; proc_range = &p_range; instance = &sched.getInstance(); @@ -135,14 +139,13 @@ struct kl_bsp_comm_cost_function { return comm_ds.get_pre_move_comm_data(move); } - void compute_send_receive_datastructures() { - comm_ds.compute_comm_datastructures(0, active_schedule->num_steps() - 1); - } + void compute_send_receive_datastructures() { comm_ds.compute_comm_datastructures(0, active_schedule->num_steps() - 1); } - template + template cost_t compute_schedule_cost() { - if constexpr (compute_datastructures) + if constexpr (compute_datastructures) { compute_send_receive_datastructures(); + } cost_t total_cost = 0; for (unsigned step = 0; step < active_schedule->num_steps(); step++) { @@ -165,11 +168,11 @@ struct kl_bsp_comm_cost_function { // Structure to hold thread-local scratchpads to avoid re-allocation. struct ScratchData { - std::vector> send_deltas; // Size: num_steps - std::vector> recv_deltas; // Size: num_steps + std::vector> send_deltas; // Size: num_steps + std::vector> recv_deltas; // Size: num_steps - std::vector active_steps; // List of steps touched in current operation - std::vector step_is_active; // Fast lookup for active steps + std::vector active_steps; // List of steps touched in current operation + std::vector step_is_active; // Fast lookup for active steps std::vector> child_cost_buffer; @@ -181,10 +184,12 @@ struct kl_bsp_comm_cost_function { active_steps.reserve(n_steps); } - for (auto &tracker : send_deltas) + for (auto &tracker : send_deltas) { tracker.initialize(n_procs); - for (auto &tracker : recv_deltas) + } + for (auto &tracker : recv_deltas) { tracker.initialize(n_procs); + } child_cost_buffer.reserve(n_procs); } @@ -207,10 +212,13 @@ struct kl_bsp_comm_cost_function { } }; - template - void compute_comm_affinity(VertexType node, affinity_table_t &affinity_table_node, const cost_t &penalty, - const cost_t &reward, const unsigned start_step, const unsigned end_step) { - + template + void compute_comm_affinity(VertexType node, + affinity_table_t &affinity_table_node, + const cost_t &penalty, + const cost_t &reward, + const unsigned start_step, + const unsigned end_step) { // Use static thread_local scratchpad to avoid allocation in hot loop static thread_local ScratchData scratch; scratch.init(active_schedule->num_steps(), instance->numberOfProcessors()); @@ -286,14 +294,16 @@ struct kl_bsp_comm_cost_function { const auto ¤t_vec_schedule = active_schedule->getVectorSchedule(); auto add_delta = [&](bool is_recv, unsigned step, unsigned proc, comm_weight_t val) { - if (val == 0) + if (val == 0) { return; + } if (step < active_schedule->num_steps()) { scratch.mark_active(step); - if (is_recv) + if (is_recv) { scratch.recv_deltas[step].add(proc, val); - else + } else { scratch.send_deltas[step].add(proc, val); + } } }; @@ -337,7 +347,6 @@ struct kl_bsp_comm_cost_function { // 2. Add Node to Target (Iterate candidates) for (const unsigned p_to : proc_range->compatible_processors_vertex(node)) { - // --- Part A: Incoming Edges (Parents -> p_to) --- // These updates are specific to p_to but independent of s_to. // We apply them, run the s_to loop, then revert them. @@ -352,8 +361,9 @@ struct kl_bsp_comm_cost_function { unsigned count_on_p_to = comm_ds.node_lambda_map.get_proc_entry(u, p_to); if (p_to == node_proc) { - if (count_on_p_to > 0) + if (count_on_p_to > 0) { count_on_p_to--; + } } if (count_on_p_to > 0) { @@ -404,11 +414,8 @@ struct kl_bsp_comm_cost_function { for (unsigned step : scratch.active_steps) { // Check if dirty_procs is empty implies no change for this step // FastDeltaTracker ensures dirty_procs is empty if all deltas summed to 0 - if (!scratch.send_deltas[step].dirty_procs.empty() || - !scratch.recv_deltas[step].dirty_procs.empty()) { - - total_change += - calculate_step_cost_change(step, scratch.send_deltas[step], scratch.recv_deltas[step]); + if (!scratch.send_deltas[step].dirty_procs.empty() || !scratch.recv_deltas[step].dirty_procs.empty()) { + total_change += calculate_step_cost_change(step, scratch.send_deltas[step], scratch.recv_deltas[step]); } } @@ -433,11 +440,13 @@ struct kl_bsp_comm_cost_function { bool already_sending_to_p_to = false; unsigned count_on_p_to = comm_ds.node_lambda_map.get_proc_entry(u, p_to); if (p_to == node_proc) { - if (count_on_p_to > 0) + if (count_on_p_to > 0) { count_on_p_to--; + } } - if (count_on_p_to > 0) + if (count_on_p_to > 0) { already_sending_to_p_to = true; + } if (!already_sending_to_p_to) { const comm_weight_t cost = comm_w_u * instance->sendCosts(u_proc, p_to); @@ -451,9 +460,9 @@ struct kl_bsp_comm_cost_function { } } - comm_weight_t calculate_step_cost_change(unsigned step, const FastDeltaTracker &delta_send, + comm_weight_t calculate_step_cost_change(unsigned step, + const FastDeltaTracker &delta_send, const FastDeltaTracker &delta_recv) { - comm_weight_t old_max = comm_ds.step_max_comm(step); comm_weight_t second_max = comm_ds.step_second_max_comm(step); unsigned old_max_count = comm_ds.step_max_comm_count(step); @@ -469,10 +478,12 @@ struct kl_bsp_comm_cost_function { comm_weight_t current_val = comm_ds.step_proc_send(step, proc); comm_weight_t new_val = current_val + delta; - if (new_val > new_global_max) + if (new_val > new_global_max) { new_global_max = new_val; - if (delta < 0 && current_val == old_max) + } + if (delta < 0 && current_val == old_max) { reduced_max_instances++; + } } // 2. Check modified receives (Iterate sparse dirty list) @@ -482,10 +493,12 @@ struct kl_bsp_comm_cost_function { comm_weight_t current_val = comm_ds.step_proc_receive(step, proc); comm_weight_t new_val = current_val + delta; - if (new_val > new_global_max) + if (new_val > new_global_max) { new_global_max = new_val; - if (delta < 0 && current_val == old_max) + } + if (delta < 0 && current_val == old_max) { reduced_max_instances++; + } } // 3. Determine result @@ -498,21 +511,25 @@ struct kl_bsp_comm_cost_function { return std::max(new_global_max, second_max) - old_max; } - template - void update_node_comm_affinity(const kl_move &move, thread_data_t &thread_data, const cost_t &penalty, - const cost_t &reward, std::map &, + template + void update_node_comm_affinity(const kl_move &move, + thread_data_t &thread_data, + const cost_t &penalty, + const cost_t &reward, + std::map &, std::vector &new_nodes) { - const unsigned start_step = thread_data.start_step; const unsigned end_step = thread_data.end_step; for (const auto &target : instance->getComputationalDag().children(move.node)) { const unsigned target_step = active_schedule->assigned_superstep(target); - if (target_step < start_step || target_step > end_step) + if (target_step < start_step || target_step > end_step) { continue; + } - if (thread_data.lock_manager.is_locked(target)) + if (thread_data.lock_manager.is_locked(target)) { continue; + } if (not thread_data.affinity_table.is_selected(target)) { new_nodes.push_back(target); @@ -590,11 +607,13 @@ struct kl_bsp_comm_cost_function { for (const auto &source : instance->getComputationalDag().parents(move.node)) { const unsigned source_step = active_schedule->assigned_superstep(source); - if (source_step < start_step || source_step > end_step) + if (source_step < start_step || source_step > end_step) { continue; + } - if (thread_data.lock_manager.is_locked(source)) + if (thread_data.lock_manager.is_locked(source)) { continue; + } if (not thread_data.affinity_table.is_selected(source)) { new_nodes.push_back(source); @@ -666,4 +685,4 @@ struct kl_bsp_comm_cost_function { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp index caaad9ca..898f2df4 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_hyper_total_comm_cost.hpp @@ -24,9 +24,8 @@ limitations under the License. namespace osp { -template +template struct kl_hyper_total_comm_cost_function { - using VertexType = vertex_idx_t; using kl_move = kl_move_struct; using kl_gain_update_info = kl_update_info; @@ -47,9 +46,13 @@ struct kl_hyper_total_comm_cost_function { lambda_vector_container node_lambda_map; inline cost_t get_comm_multiplier() { return comm_multiplier; } + inline cost_t get_max_comm_weight() { return max_comm_weight; } + inline cost_t get_max_comm_weight_multiplied() { return max_comm_weight * comm_multiplier; } + const std::string name() const { return "toal_comm_cost"; } + inline bool is_compatible(VertexType node, unsigned proc) { return active_schedule->getInstance().isCompatible(node, proc); } void initialize(kl_active_schedule &sched, CompatibleProcessorRange &p_range) { @@ -85,12 +88,15 @@ struct kl_hyper_total_comm_cost_function { const unsigned target_proc = active_schedule->assigned_processor(target); if (node_lambda_map.increase_proc_count(vertex, target_proc)) { - comm_costs += v_comm_cost * instance->communicationCosts(vertex_proc, target_proc); // is 0 if target_proc == vertex_proc + comm_costs + += v_comm_cost + * instance->communicationCosts(vertex_proc, target_proc); // is 0 if target_proc == vertex_proc } } } - return work_costs + comm_costs * comm_multiplier + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); + return work_costs + comm_costs * comm_multiplier + + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); } cost_t compute_schedule_cost_test() { @@ -109,15 +115,17 @@ struct kl_hyper_total_comm_cost_function { } } - return work_costs + comm_costs * comm_multiplier + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); + return work_costs + comm_costs * comm_multiplier + + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); } inline void update_datastructure_after_move(const kl_move &move, const unsigned start_step, const unsigned end_step) { if (move.to_proc != move.from_proc) { for (const auto &source : instance->getComputationalDag().parents(move.node)) { const unsigned source_step = active_schedule->assigned_superstep(source); - if (source_step < start_step || source_step > end_step) + if (source_step < start_step || source_step > end_step) { continue; + } update_source_after_move(move, source); } } @@ -128,19 +136,25 @@ struct kl_hyper_total_comm_cost_function { node_lambda_map.increase_proc_count(source, move.to_proc); } - template - void update_node_comm_affinity(const kl_move &move, thread_data_t &thread_data, const cost_t &penalty, const cost_t &reward, std::map &max_gain_recompute, std::vector &new_nodes) { - + template + void update_node_comm_affinity(const kl_move &move, + thread_data_t &thread_data, + const cost_t &penalty, + const cost_t &reward, + std::map &max_gain_recompute, + std::vector &new_nodes) { const unsigned start_step = thread_data.start_step; const unsigned end_step = thread_data.end_step; for (const auto &target : instance->getComputationalDag().children(move.node)) { const unsigned target_step = active_schedule->assigned_superstep(target); - if (target_step < start_step || target_step > end_step) + if (target_step < start_step || target_step > end_step) { continue; + } - if (thread_data.lock_manager.is_locked(target)) + if (thread_data.lock_manager.is_locked(target)) { continue; + } if (not thread_data.affinity_table.is_selected(target)) { new_nodes.push_back(target); @@ -226,8 +240,9 @@ struct kl_hyper_total_comm_cost_function { const unsigned window_bound = end_idx(target_step, end_step); for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - if (p == target_proc) + if (p == target_proc) { continue; + } if (node_lambda_map.get_proc_entry(move.node, target_proc) == 1) { for (unsigned idx = target_start_idx; idx < window_bound; idx++) { const cost_t x = instance->communicationCosts(move.from_proc, target_proc) * comm_gain; @@ -248,7 +263,6 @@ struct kl_hyper_total_comm_cost_function { } for (const auto &source : instance->getComputationalDag().parents(move.node)) { - if (move.to_proc != move.from_proc) { const unsigned source_proc = active_schedule->assigned_processor(source); if (node_lambda_map.has_no_proc_entry(source, move.from_proc)) { @@ -256,11 +270,13 @@ struct kl_hyper_total_comm_cost_function { for (const auto &target : instance->getComputationalDag().children(source)) { const unsigned target_step = active_schedule->assigned_superstep(target); - if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) + if ((target_step < start_step || target_step > end_step) || (target == move.node) + || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) { continue; + } if (source_proc != move.from_proc && is_compatible(target, move.from_proc)) { - if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update + if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update max_gain_recompute[target].full_update = true; } else { max_gain_recompute[target] = kl_gain_update_info(target, true); @@ -279,12 +295,14 @@ struct kl_hyper_total_comm_cost_function { for (const auto &target : instance->getComputationalDag().children(source)) { const unsigned target_step = active_schedule->assigned_superstep(target); - if ((target_step < start_step || target_step > end_step) || (target == move.node) || thread_data.lock_manager.is_locked(target) || (not thread_data.affinity_table.is_selected(target))) + if ((target_step < start_step || target_step > end_step) || (target == move.node) + || thread_data.lock_manager.is_locked(target) || (not thread_data.affinity_table.is_selected(target))) { continue; + } const unsigned target_proc = active_schedule->assigned_processor(target); if (target_proc == move.from_proc) { - if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update + if (max_gain_recompute.find(target) != max_gain_recompute.end()) { // todo more specialized update max_gain_recompute[target].full_update = true; } else { max_gain_recompute[target] = kl_gain_update_info(target, true); @@ -295,14 +313,15 @@ struct kl_hyper_total_comm_cost_function { auto &affinity_table_target = thread_data.affinity_table.at(target); const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) * comm_gain; for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - if (p == target_proc) + if (p == target_proc) { continue; + } for (unsigned idx = target_start_idx; idx < target_window_bound; idx++) { affinity_table_target[p][idx] -= comm_aff; } } - break; // since node_lambda_map[source][move.from_proc] == 1 + break; // since node_lambda_map[source][move.from_proc] == 1 } } } @@ -312,8 +331,10 @@ struct kl_hyper_total_comm_cost_function { for (const auto &target : instance->getComputationalDag().children(source)) { const unsigned target_step = active_schedule->assigned_superstep(target); - if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) + if ((target_step < start_step || target_step > end_step) || (target == move.node) + || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) { continue; + } if (source_proc != move.to_proc && is_compatible(target, move.to_proc)) { if (max_gain_recompute.find(target) != max_gain_recompute.end()) { @@ -333,8 +354,10 @@ struct kl_hyper_total_comm_cost_function { } else if (node_lambda_map.get_proc_entry(source, move.to_proc) == 2) { for (const auto &target : instance->getComputationalDag().children(source)) { const unsigned target_step = active_schedule->assigned_superstep(target); - if ((target_step < start_step || target_step > end_step) || (target == move.node) || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) + if ((target_step < start_step || target_step > end_step) || (target == move.node) + || (not thread_data.affinity_table.is_selected(target)) || thread_data.lock_manager.is_locked(target)) { continue; + } const unsigned target_proc = active_schedule->assigned_processor(target); if (target_proc == move.to_proc) { @@ -348,10 +371,12 @@ struct kl_hyper_total_comm_cost_function { const unsigned target_start_idx = start_idx(target_step, start_step); const unsigned target_window_bound = end_idx(target_step, end_step); auto &affinity_table_target = thread_data.affinity_table.at(target); - const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) * graph->vertex_comm_weight(source) * comm_multiplier; + const cost_t comm_aff = instance->communicationCosts(source_proc, target_proc) + * graph->vertex_comm_weight(source) * comm_multiplier; for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - if (p == target_proc) + if (p == target_proc) { continue; + } for (unsigned idx = target_start_idx; idx < target_window_bound; idx++) { affinity_table_target[p][idx] += comm_aff; @@ -365,11 +390,13 @@ struct kl_hyper_total_comm_cost_function { } const unsigned source_step = active_schedule->assigned_superstep(source); - if (source_step < start_step || source_step > end_step) + if (source_step < start_step || source_step > end_step) { continue; + } - if (thread_data.lock_manager.is_locked(source)) + if (thread_data.lock_manager.is_locked(source)) { continue; + } if (not thread_data.affinity_table.is_selected(source)) { new_nodes.push_back(source); @@ -449,10 +476,13 @@ struct kl_hyper_total_comm_cost_function { const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - if (p == source_proc) + if (p == source_proc) { continue; + } - const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.from_proc), instance->communicationCosts(source_proc, move.from_proc), comm_gain); + const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.from_proc), + instance->communicationCosts(source_proc, move.from_proc), + comm_gain); for (unsigned idx = source_start_idx; idx < window_bound; idx++) { affinity_table_source[p][idx] -= comm_cost; } @@ -463,10 +493,13 @@ struct kl_hyper_total_comm_cost_function { const cost_t comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - if (p == source_proc) + if (p == source_proc) { continue; + } - const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.to_proc), instance->communicationCosts(source_proc, move.to_proc), comm_gain); + const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, move.to_proc), + instance->communicationCosts(source_proc, move.to_proc), + comm_gain); for (unsigned idx = source_start_idx; idx < window_bound; idx++) { affinity_table_source[p][idx] += comm_cost; } @@ -476,12 +509,28 @@ struct kl_hyper_total_comm_cost_function { } } - inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { return node_step < window_size + start_step ? window_size - (node_step - start_step) : 0; } - inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step); } - inline cost_t change_comm_cost(const v_commw_t &p_target_comm_cost, const v_commw_t &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; } + inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { + return node_step < window_size + start_step ? window_size - (node_step - start_step) : 0; + } + + inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { + return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step); + } - template - void compute_comm_affinity(VertexType node, affinity_table_t &affinity_table_node, const cost_t &penalty, const cost_t &reward, const unsigned start_step, const unsigned end_step) { + inline cost_t change_comm_cost(const v_commw_t &p_target_comm_cost, + const v_commw_t &node_target_comm_cost, + const cost_t &comm_gain) { + return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain + : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; + } + + template + void compute_comm_affinity(VertexType node, + affinity_table_t &affinity_table_node, + const cost_t &penalty, + const cost_t &reward, + const unsigned start_step, + const unsigned end_step) { const unsigned node_step = active_schedule->assigned_superstep(node); const unsigned node_proc = active_schedule->assigned_processor(node); const unsigned window_bound = end_idx(node_step, end_step); @@ -520,17 +569,19 @@ struct kl_hyper_total_comm_cost_function { } } } - } // traget + } // traget const cost_t comm_gain = graph->vertex_comm_weight(node) * comm_multiplier; for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - if (p == node_proc) + if (p == node_proc) { continue; + } for (const auto lambda_pair : node_lambda_map.iterate_proc_entries(node)) { const auto &lambda_proc = lambda_pair.first; - const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, lambda_proc), instance->communicationCosts(node_proc, lambda_proc), comm_gain); + const cost_t comm_cost = change_comm_cost( + instance->communicationCosts(p, lambda_proc), instance->communicationCosts(node_proc, lambda_proc), comm_gain); for (unsigned idx = node_start_idx; idx < window_bound; idx++) { affinity_table_node[p][idx] += comm_cost; } @@ -575,8 +626,9 @@ struct kl_hyper_total_comm_cost_function { const cost_t source_comm_gain = graph->vertex_comm_weight(source) * comm_multiplier; for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - if (p == node_proc) + if (p == node_proc) { continue; + } if (source_proc != node_proc && node_lambda_map.get_proc_entry(source, node_proc) == 1) { for (unsigned idx = node_start_idx; idx < window_bound; idx++) { @@ -590,8 +642,8 @@ struct kl_hyper_total_comm_cost_function { } } } - } // source + } // source } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp index 5f471077..a28e4640 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/kl_total_comm_cost.hpp @@ -22,9 +22,9 @@ limitations under the License. #include "../kl_improver.hpp" namespace osp { -template -struct kl_total_comm_cost_function { +template +struct kl_total_comm_cost_function { using VertexType = vertex_idx_t; using kl_move = kl_move_struct; using kl_gain_update_info = kl_update_info; @@ -45,7 +45,9 @@ struct kl_total_comm_cost_function { cost_t max_comm_weight = 0; inline cost_t get_comm_multiplier() { return comm_multiplier; } + inline cost_t get_max_comm_weight() { return max_comm_weight; } + inline cost_t get_max_comm_weight_multiplied() { return max_comm_weight * comm_multiplier; } const std::string name() const { return "toal_comm_cost"; } @@ -61,17 +63,16 @@ struct kl_total_comm_cost_function { } struct empty_struct {}; + using pre_move_comm_data_t = empty_struct; + inline empty_struct get_pre_move_comm_data(const kl_move &) { return empty_struct(); } - cost_t compute_schedule_cost_test() { - return compute_schedule_cost(); - } + cost_t compute_schedule_cost_test() { return compute_schedule_cost(); } void update_datastructure_after_move(const kl_move &, const unsigned, const unsigned) {} cost_t compute_schedule_cost() { - cost_t work_costs = 0; for (unsigned step = 0; step < active_schedule->num_steps(); step++) { work_costs += active_schedule->get_step_max_work(step); @@ -79,7 +80,6 @@ struct kl_total_comm_cost_function { cost_t comm_costs = 0; for (const auto &edge : edges(*graph)) { - const auto &source_v = source(edge, *graph); const auto &target_v = target(edge, *graph); @@ -87,7 +87,6 @@ struct kl_total_comm_cost_function { const unsigned &target_proc = active_schedule->assigned_processor(target_v); if (source_proc != target_proc) { - if constexpr (use_node_communication_costs) { const cost_t source_comm_cost = graph->vertex_comm_weight(source_v); max_comm_weight = std::max(max_comm_weight, source_comm_cost); @@ -100,23 +99,29 @@ struct kl_total_comm_cost_function { } } - return work_costs + comm_costs * comm_multiplier + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); + return work_costs + comm_costs * comm_multiplier + + static_cast>(active_schedule->num_steps() - 1) * instance->synchronisationCosts(); } - template - void update_node_comm_affinity(const kl_move &move, thread_data_t &thread_data, const cost_t &penalty, const cost_t &reward, std::map &max_gain_recompute, std::vector &new_nodes) { - + template + void update_node_comm_affinity(const kl_move &move, + thread_data_t &thread_data, + const cost_t &penalty, + const cost_t &reward, + std::map &max_gain_recompute, + std::vector &new_nodes) { const unsigned &start_step = thread_data.start_step; const unsigned &end_step = thread_data.end_step; for (const auto &target : instance->getComputationalDag().children(move.node)) { - const unsigned target_step = active_schedule->assigned_superstep(target); - if (target_step < start_step || target_step > end_step) + if (target_step < start_step || target_step > end_step) { continue; + } - if (thread_data.lock_manager.is_locked(target)) + if (thread_data.lock_manager.is_locked(target)) { continue; + } if (not thread_data.affinity_table.is_selected(target)) { new_nodes.push_back(target); @@ -134,7 +139,6 @@ struct kl_total_comm_cost_function { auto &affinity_table_target = thread_data.affinity_table.at(target); if (move.from_step < target_step + (move.from_proc == target_proc)) { - const unsigned diff = target_step - move.from_step; const unsigned bound = window_size >= diff ? window_size - diff + 1 : 0; unsigned idx = target_start_idx; @@ -149,7 +153,6 @@ struct kl_total_comm_cost_function { } } else { - const unsigned diff = move.from_step - target_step; const unsigned window_bound = end_idx(target_step, end_step); unsigned idx = std::min(window_size + diff, window_bound); @@ -209,8 +212,10 @@ struct kl_total_comm_cost_function { const unsigned window_bound = end_idx(target_step, end_step); for (; idx < window_bound; idx++) { for (const unsigned p : proc_range->compatible_processors_vertex(target)) { - const auto x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_target_comm_cost, comm_gain); - const auto y = change_comm_cost(instance->communicationCosts(p, move.from_proc), from_proc_target_comm_cost, comm_gain); + const auto x = change_comm_cost( + instance->communicationCosts(p, move.to_proc), to_proc_target_comm_cost, comm_gain); + const auto y = change_comm_cost( + instance->communicationCosts(p, move.from_proc), from_proc_target_comm_cost, comm_gain); affinity_table_target[p][idx] += x - y; } } @@ -218,13 +223,14 @@ struct kl_total_comm_cost_function { } for (const auto &source : instance->getComputationalDag().parents(move.node)) { - const unsigned source_step = active_schedule->assigned_superstep(source); - if (source_step < start_step || source_step > end_step) + if (source_step < start_step || source_step > end_step) { continue; + } - if (thread_data.lock_manager.is_locked(source)) + if (thread_data.lock_manager.is_locked(source)) { continue; + } if (not thread_data.affinity_table.is_selected(source)) { new_nodes.push_back(source); @@ -242,7 +248,6 @@ struct kl_total_comm_cost_function { auto &affinity_table_source = thread_data.affinity_table.at(source); if (move.from_step < source_step + (move.from_proc != source_proc)) { - const unsigned diff = source_step - move.from_step; const unsigned bound = window_size > diff ? window_size - diff : 0; unsigned idx = start_idx(source_step, start_step); @@ -257,7 +262,6 @@ struct kl_total_comm_cost_function { } } else { - const unsigned diff = move.from_step - source_step; unsigned idx = window_size + diff; @@ -309,8 +313,10 @@ struct kl_total_comm_cost_function { unsigned idx = start_idx(source_step, start_step); for (; idx < window_bound; idx++) { for (const unsigned p : proc_range->compatible_processors_vertex(source)) { - const cost_t x = change_comm_cost(instance->communicationCosts(p, move.to_proc), to_proc_source_comm_cost, comm_gain); - const cost_t y = change_comm_cost(instance->communicationCosts(p, move.from_proc), from_proc_source_comm_cost, comm_gain); + const cost_t x = change_comm_cost( + instance->communicationCosts(p, move.to_proc), to_proc_source_comm_cost, comm_gain); + const cost_t y = change_comm_cost( + instance->communicationCosts(p, move.from_proc), from_proc_source_comm_cost, comm_gain); affinity_table_source[p][idx] += x - y; } } @@ -318,13 +324,28 @@ struct kl_total_comm_cost_function { } } - inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0; } - inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step); } + inline unsigned start_idx(const unsigned node_step, const unsigned start_step) { + return (node_step < window_size + start_step) ? window_size - (node_step - start_step) : 0; + } + + inline unsigned end_idx(const unsigned node_step, const unsigned end_step) { + return (node_step + window_size <= end_step) ? window_range : window_range - (node_step + window_size - end_step); + } - inline cost_t change_comm_cost(const v_commw_t &p_target_comm_cost, const v_commw_t &node_target_comm_cost, const cost_t &comm_gain) { return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; } + inline cost_t change_comm_cost(const v_commw_t &p_target_comm_cost, + const v_commw_t &node_target_comm_cost, + const cost_t &comm_gain) { + return p_target_comm_cost > node_target_comm_cost ? (p_target_comm_cost - node_target_comm_cost) * comm_gain + : (node_target_comm_cost - p_target_comm_cost) * comm_gain * -1.0; + } - template - void compute_comm_affinity(VertexType node, affinity_table_t &affinity_table_node, const cost_t &penalty, const cost_t &reward, const unsigned start_step, const unsigned end_step) { + template + void compute_comm_affinity(VertexType node, + affinity_table_t &affinity_table_node, + const cost_t &penalty, + const cost_t &reward, + const unsigned start_step, + const unsigned end_step) { const unsigned node_step = active_schedule->assigned_superstep(node); const unsigned node_proc = active_schedule->assigned_processor(node); const unsigned window_bound = end_idx(node_step, end_step); @@ -368,13 +389,14 @@ struct kl_total_comm_cost_function { const auto node_target_comm_cost = instance->communicationCosts(node_proc, target_proc); for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, target_proc), node_target_comm_cost, comm_gain); + const cost_t comm_cost + = change_comm_cost(instance->communicationCosts(p, target_proc), node_target_comm_cost, comm_gain); for (unsigned idx = node_start_idx; idx < window_bound; idx++) { affinity_table_node[p][idx] += comm_cost; } } - } // traget + } // traget for (const auto &source : instance->getComputationalDag().parents(node)) { const unsigned source_step = active_schedule->assigned_superstep(source); @@ -416,13 +438,14 @@ struct kl_total_comm_cost_function { const auto source_node_comm_cost = instance->communicationCosts(source_proc, node_proc); for (const unsigned p : proc_range->compatible_processors_vertex(node)) { - const cost_t comm_cost = change_comm_cost(instance->communicationCosts(p, source_proc), source_node_comm_cost, comm_gain); + const cost_t comm_cost + = change_comm_cost(instance->communicationCosts(p, source_proc), source_node_comm_cost, comm_gain); for (unsigned idx = node_start_idx; idx < window_bound; idx++) { affinity_table_node[p][idx] += comm_cost; } } - } // source + } // source } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp index 0eccc815..a1494bff 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/lambda_container.hpp @@ -33,9 +33,8 @@ namespace osp { * For each node, the map stores the count of children assigned to each processor, which is * important for computing communication costs in BSP scheduling. */ -template +template struct lambda_map_container { - /// Vector of maps: for each node, maps processor ID to assignment count std::vector> node_lambda_map; @@ -102,9 +101,7 @@ struct lambda_map_container { * @param node Node index * @return The count of different processors the node is sending to */ - inline unsigned get_proc_count(const vertex_idx_t node) const { - return static_cast(node_lambda_map[node].size()); - } + inline unsigned get_proc_count(const vertex_idx_t node) const { return static_cast(node_lambda_map[node].size()); } /** * @brief Increase the processor count for a given node. @@ -158,9 +155,8 @@ struct lambda_map_container { * For each node, the vector stores the count of children assigned to each processor, which is * important for computing communication costs in BSP scheduling. */ -template +template struct lambda_vector_container { - /** * @brief Range adapter for iterating over non-zero processor entries. * @@ -179,7 +175,6 @@ struct lambda_vector_container { * for all processors with non-zero assignment counts. */ class lambda_vector_iterator { - using iterator_category = std::input_iterator_tag; using value_type = std::pair; using difference_type = std::ptrdiff_t; @@ -296,9 +291,7 @@ struct lambda_vector_container { * @param proc Processor ID * @return true if the processor has no assignments to the node */ - inline bool has_no_proc_entry(const vertex_idx_t node, const unsigned proc) const { - return node_lambda_vec[node][proc] == 0; - } + inline bool has_no_proc_entry(const vertex_idx_t node, const unsigned proc) const { return node_lambda_vec[node][proc] == 0; } /** * @brief Get a reference to the processor count for a given node. @@ -369,4 +362,4 @@ struct lambda_vector_container { inline auto iterate_proc_entries(const vertex_idx_t node) { return lambda_vector_range(node_lambda_vec[node]); } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp index 236e11cc..8c76efe4 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/comm_cost_modules/max_comm_datastructure.hpp @@ -18,20 +18,20 @@ limitations under the License. #pragma once -#include "comm_cost_policies.hpp" -#include "generic_lambda_container.hpp" -#include "lambda_container.hpp" -#include "osp/bsp/model/BspInstance.hpp" #include #include #include #include +#include "comm_cost_policies.hpp" +#include "generic_lambda_container.hpp" +#include "lambda_container.hpp" +#include "osp/bsp/model/BspInstance.hpp" + namespace osp { -template +template struct pre_move_comm_data { - struct step_info { comm_weight_t max_comm; comm_weight_t second_max_comm; @@ -56,9 +56,8 @@ struct pre_move_comm_data { } }; -template +template struct max_comm_datastructure { - using comm_weight_t = v_commw_t; using VertexType = vertex_idx_t; using kl_move = kl_move_struct; @@ -89,14 +88,17 @@ struct max_comm_datastructure { std::vector step_is_affected; inline comm_weight_t step_proc_send(unsigned step, unsigned proc) const { return step_proc_send_[step][proc]; } + inline comm_weight_t &step_proc_send(unsigned step, unsigned proc) { return step_proc_send_[step][proc]; } - inline comm_weight_t step_proc_receive(unsigned step, unsigned proc) const { - return step_proc_receive_[step][proc]; - } + + inline comm_weight_t step_proc_receive(unsigned step, unsigned proc) const { return step_proc_receive_[step][proc]; } + inline comm_weight_t &step_proc_receive(unsigned step, unsigned proc) { return step_proc_receive_[step][proc]; } inline comm_weight_t step_max_comm(unsigned step) const { return step_max_comm_cache[step]; } + inline comm_weight_t step_second_max_comm(unsigned step) const { return step_second_max_comm_cache[step]; } + inline unsigned step_max_comm_count(unsigned step) const { return step_max_comm_count_cache[step]; } inline void initialize(kl_active_schedule_t &kl_sched) { @@ -170,10 +172,12 @@ struct max_comm_datastructure { step_max_comm_cache[step] = global_max; unsigned global_count = 0; - if (max_send == global_max) + if (max_send == global_max) { global_count += max_send_count; - if (max_receive == global_max) + } + if (max_receive == global_max) { global_count += max_receive_count; + } step_max_comm_count_cache[step] = global_count; comm_weight_t cand_send = (max_send == global_max) ? second_max_send : max_send; @@ -209,8 +213,9 @@ struct max_comm_datastructure { // Prepare Scratchpad (Avoids Allocations) --- for (unsigned step : affected_steps_list) { - if (step < step_is_affected.size()) + if (step < step_is_affected.size()) { step_is_affected[step] = false; + } } affected_steps_list.clear(); @@ -280,8 +285,9 @@ struct max_comm_datastructure { for (const auto &parent : graph.parents(node)) { const unsigned parent_step = active_schedule->assigned_superstep(parent); // Fast boundary check - if (parent_step >= step_proc_send_.size()) + if (parent_step >= step_proc_send_.size()) { continue; + } const unsigned parent_proc = active_schedule->assigned_processor(parent); const comm_weight_t comm_w_parent = graph.vertex_comm_weight(parent); @@ -294,8 +300,7 @@ struct max_comm_datastructure { if (from_proc != parent_proc) { const comm_weight_t cost = comm_w_parent * instance->sendCosts(parent_proc, from_proc); if (cost > 0) { - CommPolicy::unattribute_communication(*this, cost, parent_step, parent_proc, from_proc, - from_step, val); + CommPolicy::unattribute_communication(*this, cost, parent_step, parent_proc, from_proc, from_step, val); } } } @@ -308,15 +313,13 @@ struct max_comm_datastructure { if (to_proc != parent_proc) { const comm_weight_t cost = comm_w_parent * instance->sendCosts(parent_proc, to_proc); if (cost > 0) { - CommPolicy::attribute_communication(*this, cost, parent_step, parent_proc, to_proc, to_step, - val_to); + CommPolicy::attribute_communication(*this, cost, parent_step, parent_proc, to_proc, to_step, val_to); } } } mark_step(parent_step); } - // Re-arrange Affected Steps for (unsigned step : affected_steps_list) { @@ -358,14 +361,12 @@ struct max_comm_datastructure { const unsigned v_proc = vec_sched.assignedProcessor(v); const unsigned v_step = vec_sched.assignedSuperstep(v); - const comm_weight_t comm_w_send_cost = - (u_proc != v_proc) ? comm_w * instance->sendCosts(u_proc, v_proc) : 0; + const comm_weight_t comm_w_send_cost = (u_proc != v_proc) ? comm_w * instance->sendCosts(u_proc, v_proc) : 0; auto &val = node_lambda_map.get_proc_entry(u, v_proc); if (CommPolicy::add_child(val, v_step)) { if (u_proc != v_proc && comm_w_send_cost > 0) { - CommPolicy::attribute_communication(*this, comm_w_send_cost, u_step, u_proc, v_proc, v_step, - val); + CommPolicy::attribute_communication(*this, comm_w_send_cost, u_step, u_proc, v_proc, v_step, val); } } } @@ -380,4 +381,4 @@ struct max_comm_datastructure { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp index 862eeacc..2e6f3af0 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp @@ -28,9 +28,8 @@ limitations under the License. namespace osp { -template +template struct kl_move_struct { - vertex_idx_t node; cost_t gain; @@ -41,10 +40,9 @@ struct kl_move_struct { unsigned to_step; kl_move_struct() : node(0), gain(0), from_proc(0), from_step(0), to_proc(0), to_step(0) {} - kl_move_struct(vertex_idx_t _node, cost_t _gain, unsigned _from_proc, unsigned _from_step, - unsigned _to_proc, unsigned _to_step) - : node(_node), gain(_gain), from_proc(_from_proc), from_step(_from_step), - to_proc(_to_proc), to_step(_to_step) {} + + kl_move_struct(vertex_idx_t _node, cost_t _gain, unsigned _from_proc, unsigned _from_step, unsigned _to_proc, unsigned _to_step) + : node(_node), gain(_gain), from_proc(_from_proc), from_step(_from_step), to_proc(_to_proc), to_step(_to_step) {} bool operator<(kl_move_struct const &rhs) const { return (gain < rhs.gain) or (gain == rhs.gain and node > rhs.node); @@ -59,9 +57,8 @@ struct kl_move_struct { } }; -template +template struct pre_move_work_data { - work_weight_t from_step_max_work; work_weight_t from_step_second_max_work; unsigned from_step_max_work_processor_count; @@ -71,18 +68,23 @@ struct pre_move_work_data { unsigned to_step_max_work_processor_count; pre_move_work_data() {} - pre_move_work_data(work_weight_t from_step_max_work_, work_weight_t from_step_second_max_work_, unsigned from_step_max_work_processor_count_, - work_weight_t to_step_max_work_, work_weight_t to_step_second_max_work_, + + pre_move_work_data(work_weight_t from_step_max_work_, + work_weight_t from_step_second_max_work_, + unsigned from_step_max_work_processor_count_, + work_weight_t to_step_max_work_, + work_weight_t to_step_second_max_work_, unsigned to_step_max_work_processor_count_) - : from_step_max_work(from_step_max_work_), from_step_second_max_work(from_step_second_max_work_), + : from_step_max_work(from_step_max_work_), + from_step_second_max_work(from_step_second_max_work_), from_step_max_work_processor_count(from_step_max_work_processor_count_), - to_step_max_work(to_step_max_work_), to_step_second_max_work(to_step_second_max_work_), + to_step_max_work(to_step_max_work_), + to_step_second_max_work(to_step_second_max_work_), to_step_max_work_processor_count(to_step_max_work_processor_count_) {} }; -template +template struct kl_active_schedule_work_datastructures { - using work_weight_t = v_workw_t; const BspInstance *instance; @@ -93,11 +95,10 @@ struct kl_active_schedule_work_datastructures { unsigned proc; weight_proc() : work(0), proc(0) {} + weight_proc(work_weight_t _work, unsigned _proc) : work(_work), proc(_proc) {} - bool operator<(weight_proc const &rhs) const { - return (work > rhs.work) or (work == rhs.work and proc < rhs.proc); - } + bool operator<(weight_proc const &rhs) const { return (work > rhs.work) or (work == rhs.work and proc < rhs.proc); } }; std::vector> step_processor_work_; @@ -107,14 +108,27 @@ struct kl_active_schedule_work_datastructures { work_weight_t total_work_weight; inline work_weight_t step_max_work(unsigned step) const { return step_processor_work_[step][0].work; } - inline work_weight_t step_second_max_work(unsigned step) const { return step_processor_work_[step][step_max_work_processor_count[step]].work; } - inline work_weight_t step_proc_work(unsigned step, unsigned proc) const { return step_processor_work_[step][step_processor_position[step][proc]].work; } - inline work_weight_t &step_proc_work(unsigned step, unsigned proc) { return step_processor_work_[step][step_processor_position[step][proc]].work; } - template + inline work_weight_t step_second_max_work(unsigned step) const { + return step_processor_work_[step][step_max_work_processor_count[step]].work; + } + + inline work_weight_t step_proc_work(unsigned step, unsigned proc) const { + return step_processor_work_[step][step_processor_position[step][proc]].work; + } + + inline work_weight_t &step_proc_work(unsigned step, unsigned proc) { + return step_processor_work_[step][step_processor_position[step][proc]].work; + } + + template inline pre_move_work_data get_pre_move_work_data(kl_move_struct move) { - return pre_move_work_data(step_max_work(move.from_step), step_second_max_work(move.from_step), step_max_work_processor_count[move.from_step], - step_max_work(move.to_step), step_second_max_work(move.to_step), step_max_work_processor_count[move.to_step]); + return pre_move_work_data(step_max_work(move.from_step), + step_second_max_work(move.from_step), + step_max_work_processor_count[move.from_step], + step_max_work(move.to_step), + step_second_max_work(move.to_step), + step_max_work_processor_count[move.to_step]); } inline void initialize(const SetSchedule &sched, const BspInstance &inst, unsigned num_steps) { @@ -122,8 +136,10 @@ struct kl_active_schedule_work_datastructures { set_schedule = &sched; max_work_weight = 0; total_work_weight = 0; - step_processor_work_ = std::vector>(num_steps, std::vector(instance->numberOfProcessors())); - step_processor_position = std::vector>(num_steps, std::vector(instance->numberOfProcessors(), 0)); + step_processor_work_ + = std::vector>(num_steps, std::vector(instance->numberOfProcessors())); + step_processor_position + = std::vector>(num_steps, std::vector(instance->numberOfProcessors(), 0)); step_max_work_processor_count = std::vector(num_steps, 0); } @@ -141,16 +157,17 @@ struct kl_active_schedule_work_datastructures { for (const auto &wp : step_processor_work_[step]) { step_processor_position[step][wp.proc] = pos++; - if (wp.work == max_work_to && pos < instance->numberOfProcessors()) + if (wp.work == max_work_to && pos < instance->numberOfProcessors()) { step_max_work_processor_count[step] = pos; + } } } - template + template void apply_move(kl_move_struct move, work_weight_t work_weight) { - - if (work_weight == 0) + if (work_weight == 0) { return; + } if (move.to_step != move.from_step) { step_proc_work(move.to_step, move.to_proc) += work_weight; @@ -171,8 +188,9 @@ struct kl_active_schedule_work_datastructures { // unsigned to_proc_pos = step_processor_position[move.to_step][move.to_proc]; // while (to_proc_pos > 0 && step_processor_work_[move.to_step][to_proc_pos - 1].work < new_weight_to) { - // std::swap(step_processor_work_[move.to_step][to_proc_pos], step_processor_work_[move.to_step][to_proc_pos - 1]); - // std::swap(step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos].proc], step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos - 1].proc]); + // std::swap(step_processor_work_[move.to_step][to_proc_pos], step_processor_work_[move.to_step][to_proc_pos - + // 1]); std::swap(step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos].proc], + // step_processor_position[move.to_step][step_processor_work_[move.to_step][to_proc_pos - 1].proc]); // to_proc_pos--; // } @@ -182,9 +200,12 @@ struct kl_active_schedule_work_datastructures { // unsigned from_proc_pos = step_processor_position[move.from_step][move.from_proc]; - // while (from_proc_pos < instance->numberOfProcessors() - 1 && step_processor_work_[move.from_step][from_proc_pos + 1].work > new_weight_from) { - // std::swap(step_processor_work_[move.from_step][from_proc_pos], step_processor_work_[move.from_step][from_proc_pos + 1]); - // std::swap(step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos].proc], step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos + 1].proc]); + // while (from_proc_pos < instance->numberOfProcessors() - 1 && step_processor_work_[move.from_step][from_proc_pos + + // 1].work > new_weight_from) { + // std::swap(step_processor_work_[move.from_step][from_proc_pos], + // step_processor_work_[move.from_step][from_proc_pos + 1]); + // std::swap(step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos].proc], + // step_processor_position[move.from_step][step_processor_work_[move.from_step][from_proc_pos + 1].proc]); // from_proc_pos++; // } @@ -209,7 +230,6 @@ struct kl_active_schedule_work_datastructures { } void override_next_superstep(unsigned step) { - const unsigned next_step = step + 1; for (unsigned i = 0; i < instance->numberOfProcessors(); i++) { step_processor_work_[next_step][i] = step_processor_work_[step][i]; @@ -245,7 +265,8 @@ struct kl_active_schedule_work_datastructures { if (step_processor_work_[step][proc].work > max_work) { max_work = step_processor_work_[step][proc].work; step_max_work_processor_count[step] = 1; - } else if (step_processor_work_[step][proc].work == max_work && step_max_work_processor_count[step] < (instance->numberOfProcessors() - 1)) { + } else if (step_processor_work_[step][proc].work == max_work + && step_max_work_processor_count[step] < (instance->numberOfProcessors() - 1)) { step_max_work_processor_count[step]++; } } @@ -259,9 +280,8 @@ struct kl_active_schedule_work_datastructures { } }; -template +template struct thread_local_active_schedule_data { - using VertexType = vertex_idx_t; using EdgeType = edge_desc_t; @@ -297,9 +317,8 @@ struct thread_local_active_schedule_data { } }; -template +template class kl_active_schedule { - private: using VertexType = vertex_idx_t; using EdgeType = edge_desc_t; @@ -318,21 +337,45 @@ class kl_active_schedule { virtual ~kl_active_schedule() = default; inline const BspInstance &getInstance() const { return *instance; } + inline const VectorSchedule &getVectorSchedule() const { return vector_schedule; } + inline VectorSchedule &getVectorSchedule() { return vector_schedule; } + inline const SetSchedule &getSetSchedule() const { return set_schedule; } + inline cost_t get_cost() { return cost; } + inline bool is_feasible() { return feasible; } + inline unsigned num_steps() const { return vector_schedule.numberOfSupersteps(); } + inline unsigned assigned_processor(VertexType node) const { return vector_schedule.assignedProcessor(node); } + inline unsigned assigned_superstep(VertexType node) const { return vector_schedule.assignedSuperstep(node); } + inline v_workw_t get_step_max_work(unsigned step) const { return work_datastructures.step_max_work(step); } - inline v_workw_t get_step_second_max_work(unsigned step) const { return work_datastructures.step_second_max_work(step); } - inline std::vector &get_step_max_work_processor_count() { return work_datastructures.step_max_work_processor_count; } - inline v_workw_t get_step_processor_work(unsigned step, unsigned proc) const { return work_datastructures.step_proc_work(step, proc); } - inline pre_move_work_data> get_pre_move_work_data(kl_move move) { return work_datastructures.get_pre_move_work_data(move); } + + inline v_workw_t get_step_second_max_work(unsigned step) const { + return work_datastructures.step_second_max_work(step); + } + + inline std::vector &get_step_max_work_processor_count() { + return work_datastructures.step_max_work_processor_count; + } + + inline v_workw_t get_step_processor_work(unsigned step, unsigned proc) const { + return work_datastructures.step_proc_work(step, proc); + } + + inline pre_move_work_data> get_pre_move_work_data(kl_move move) { + return work_datastructures.get_pre_move_work_data(move); + } + inline v_workw_t get_max_work_weight() { return work_datastructures.max_work_weight; } + inline v_workw_t get_total_work_weight() { return work_datastructures.total_work_weight; } + inline void set_cost(cost_t cost_) { cost = cost_; } constexpr static bool use_memory_constraint = is_local_search_memory_constraint_v; @@ -365,8 +408,13 @@ class kl_active_schedule { } } - template - void revert_to_best_schedule(unsigned start_move, unsigned insert_step, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned &end_step) { + template + void revert_to_best_schedule(unsigned start_move, + unsigned insert_step, + comm_datastructures_t &comm_datastructures, + thread_data_t &thread_data, + unsigned start_step, + unsigned &end_step) { const unsigned bound = std::max(start_move, thread_data.best_schedule_idx); revert_moves(bound, comm_datastructures, thread_data, start_step, end_step); @@ -377,8 +425,10 @@ class kl_active_schedule { revert_moves(thread_data.best_schedule_idx, comm_datastructures, thread_data, start_step, end_step); #ifdef KL_DEBUG - if (not thread_data.feasible) - std::cout << "Reverted to best schedule with cost: " << thread_data.best_cost << " and " << vector_schedule.number_of_supersteps << " supersteps" << std::endl; + if (not thread_data.feasible) { + std::cout << "Reverted to best schedule with cost: " << thread_data.best_cost << " and " + << vector_schedule.number_of_supersteps << " supersteps" << std::endl; + } #endif thread_data.applied_moves.clear(); @@ -388,8 +438,14 @@ class kl_active_schedule { thread_data.cost = thread_data.best_cost; } - template - void revert_schedule_to_bound(const size_t bound, const cost_t new_cost, const bool is_feasible, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) { + template + void revert_schedule_to_bound(const size_t bound, + const cost_t new_cost, + const bool is_feasible, + comm_datastructures_t &comm_datastructures, + thread_data_t &thread_data, + unsigned start_step, + unsigned end_step) { revert_moves(bound, comm_datastructures, thread_data, start_step, end_step); thread_data.current_violations.clear(); @@ -409,8 +465,12 @@ class kl_active_schedule { void swap_steps(const unsigned step1, const unsigned step2); private: - template - void revert_moves(const size_t bound, comm_datastructures_t &comm_datastructures, thread_data_t &thread_data, unsigned start_step, unsigned end_step) { + template + void revert_moves(const size_t bound, + comm_datastructures_t &comm_datastructures, + thread_data_t &thread_data, + unsigned start_step, + unsigned end_step) { while (thread_data.applied_moves.size() > bound) { const auto move = thread_data.applied_moves.back().reverse_move(); thread_data.applied_moves.pop_back(); @@ -439,14 +499,16 @@ class kl_active_schedule { const auto &child = target(edge, instance->getComputationalDag()); if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) { - if ((node_step > vector_schedule.assignedSuperstep(child)) || - (node_step == vector_schedule.assignedSuperstep(child) && node_proc != vector_schedule.assignedProcessor(child))) { + if ((node_step > vector_schedule.assignedSuperstep(child)) + || (node_step == vector_schedule.assignedSuperstep(child) + && node_proc != vector_schedule.assignedProcessor(child))) { thread_data.current_violations.insert(edge); thread_data.new_violations[child] = edge; } } else { - if ((node_step < vector_schedule.assignedSuperstep(child)) || - (node_step == vector_schedule.assignedSuperstep(child) && node_proc == vector_schedule.assignedProcessor(child))) { + if ((node_step < vector_schedule.assignedSuperstep(child)) + || (node_step == vector_schedule.assignedSuperstep(child) + && node_proc == vector_schedule.assignedProcessor(child))) { thread_data.current_violations.erase(edge); thread_data.resolved_violations.insert(edge); } @@ -457,14 +519,16 @@ class kl_active_schedule { const auto &parent = source(edge, instance->getComputationalDag()); if (thread_data.current_violations.find(edge) == thread_data.current_violations.end()) { - if ((node_step < vector_schedule.assignedSuperstep(parent)) || - (node_step == vector_schedule.assignedSuperstep(parent) && node_proc != vector_schedule.assignedProcessor(parent))) { + if ((node_step < vector_schedule.assignedSuperstep(parent)) + || (node_step == vector_schedule.assignedSuperstep(parent) + && node_proc != vector_schedule.assignedProcessor(parent))) { thread_data.current_violations.insert(edge); thread_data.new_violations[parent] = edge; } } else { - if ((node_step > vector_schedule.assignedSuperstep(parent)) || - (node_step == vector_schedule.assignedSuperstep(parent) && node_proc == vector_schedule.assignedProcessor(parent))) { + if ((node_step > vector_schedule.assignedSuperstep(parent)) + || (node_step == vector_schedule.assignedSuperstep(parent) + && node_proc == vector_schedule.assignedProcessor(parent))) { thread_data.current_violations.erase(edge); thread_data.resolved_violations.insert(edge); } @@ -499,7 +563,7 @@ class kl_active_schedule { } }; -template +template void kl_active_schedule::clear() { work_datastructures.clear(); vector_schedule.clear(); @@ -509,14 +573,12 @@ void kl_active_schedule::clear() { } } -template +template void kl_active_schedule::compute_violations(thread_data_t &thread_data) { - thread_data.current_violations.clear(); thread_data.feasible = true; for (const auto &edge : edges(instance->getComputationalDag())) { - const auto &source_v = source(edge, instance->getComputationalDag()); const auto &target_v = target(edge, instance->getComputationalDag()); @@ -532,7 +594,7 @@ void kl_active_schedule::compute_violations } } -template +template void kl_active_schedule::initialize(const IBspSchedule &schedule) { instance = &schedule.getInstance(); vector_schedule = VectorSchedule(schedule); @@ -549,15 +611,16 @@ void kl_active_schedule::initialize(const I compute_work_memory_datastructures(0, num_steps() - 1); } -template -void kl_active_schedule::compute_work_memory_datastructures(unsigned start_step, unsigned end_step) { +template +void kl_active_schedule::compute_work_memory_datastructures(unsigned start_step, + unsigned end_step) { if constexpr (use_memory_constraint) { memory_constraint.compute_memory_datastructure(start_step, end_step); } work_datastructures.compute_work_datastructures(start_step, end_step); } -template +template void kl_active_schedule::write_schedule(BspSchedule &schedule) { for (const auto v : instance->vertices()) { schedule.setAssignedProcessor(v, vector_schedule.assignedProcessor(v)); @@ -566,7 +629,7 @@ void kl_active_schedule::write_schedule(Bsp schedule.updateNumberOfSupersteps(); } -template +template void kl_active_schedule::remove_empty_step(unsigned step) { for (unsigned i = step; i < num_steps() - 1; i++) { for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { @@ -583,7 +646,7 @@ void kl_active_schedule::remove_empty_step( vector_schedule.number_of_supersteps--; } -template +template void kl_active_schedule::swap_empty_step_fwd(const unsigned step, const unsigned to_step) { for (unsigned i = step; i < to_step; i++) { for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { @@ -599,7 +662,7 @@ void kl_active_schedule::swap_empty_step_fw } } -template +template void kl_active_schedule::insert_empty_step(unsigned step) { unsigned i = vector_schedule.number_of_supersteps++; @@ -617,8 +680,9 @@ void kl_active_schedule::insert_empty_step( } } -template -void kl_active_schedule::swap_empty_step_bwd(const unsigned to_step, const unsigned empty_step) { +template +void kl_active_schedule::swap_empty_step_bwd(const unsigned to_step, + const unsigned empty_step) { unsigned i = to_step; for (; i > empty_step; i--) { @@ -635,10 +699,11 @@ void kl_active_schedule::swap_empty_step_bw } } -template +template void kl_active_schedule::swap_steps(const unsigned step1, const unsigned step2) { - if (step1 == step2) + if (step1 == step2) { return; + } for (unsigned proc = 0; proc < instance->numberOfProcessors(); proc++) { for (const auto node : set_schedule.step_processor_vertices[step1][proc]) { @@ -655,4 +720,4 @@ void kl_active_schedule::swap_steps(const u } } -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp index dd572710..5003d796 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver.hpp @@ -28,6 +28,8 @@ limitations under the License. #include #include +#include "kl_active_schedule.hpp" +#include "kl_util.hpp" #include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp" #include "osp/auxiliary/misc.hpp" #include "osp/bsp/model/util/CompatibleProcessorRange.hpp" @@ -36,9 +38,6 @@ limitations under the License. #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" -#include "kl_active_schedule.hpp" -#include "kl_util.hpp" - namespace osp { struct kl_parameter { @@ -64,9 +63,8 @@ struct kl_parameter { unsigned thread_range_gap = 0; }; -template +template struct kl_update_info { - VertexType node = 0; bool full_update = false; @@ -76,17 +74,19 @@ struct kl_update_info { bool update_entire_from_step = false; kl_update_info() = default; - kl_update_info(VertexType n) - : node(n), full_update(false), update_entire_to_step(false), update_entire_from_step(false) {} + + kl_update_info(VertexType n) : node(n), full_update(false), update_entire_to_step(false), update_entire_from_step(false) {} + kl_update_info(VertexType n, bool full) : node(n), full_update(full), update_entire_to_step(false), update_entire_from_step(false) {} }; -template +template class kl_improver : public ImprovementScheduler { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph concept"); static_assert(has_hashable_edge_desc_v, "Graph_t must satisfy the has_hashable_edge_desc concept"); static_assert(is_computational_dag_v, "Graph_t must satisfy the computational_dag concept"); @@ -110,7 +110,6 @@ class kl_improver : public ImprovementScheduler { using kl_gain_update_info = kl_update_info; struct ThreadSearchContext { - unsigned thread_id = 0; unsigned start_step = 0; unsigned end_step = 0; @@ -138,12 +137,13 @@ class kl_improver : public ImprovementScheduler { unsigned max_no_vioaltions_removed_backtrack = 0; inline unsigned num_steps() const { return end_step - start_step + 1; } + inline unsigned start_idx(const unsigned node_step) const { return node_step < start_step + window_size ? window_size - (node_step - start_step) : 0; } + inline unsigned end_idx(unsigned node_step) const { - return node_step + window_size <= end_step ? window_range - : window_range - (node_step + window_size - end_step); + return node_step + window_size <= end_step ? window_range : window_range - (node_step + window_size - end_step); } }; @@ -164,9 +164,9 @@ class kl_improver : public ImprovementScheduler { std::vector thread_finished_vec; inline unsigned rel_step_idx(const unsigned node_step, const unsigned move_step) const { - return (move_step >= node_step) ? ((move_step - node_step) + window_size) - : (window_size - (node_step - move_step)); + return (move_step >= node_step) ? ((move_step - node_step) + window_size) : (window_size - (node_step - move_step)); } + inline bool is_compatible(VertexType node, unsigned proc) const { return active_schedule.getInstance().isCompatible(node, proc); } @@ -188,7 +188,8 @@ class kl_improver : public ImprovementScheduler { } kl_move get_best_move(node_selection_container_t &affinity_table, - vector_vertex_lock_manger &lock_manager, heap_datastructure &max_gain_heap) { + vector_vertex_lock_manger &lock_manager, + heap_datastructure &max_gain_heap) { // To introduce non-determinism and help escape local optima, if there are multiple moves with the same // top gain, we randomly select one. We check up to `local_max` ties. const unsigned local_max = 50; @@ -210,14 +211,19 @@ class kl_improver : public ImprovementScheduler { return best_move; } - inline void process_other_steps_best_move(const unsigned idx, const unsigned node_step, const VertexType &node, - const cost_t affinity_current_proc_step, cost_t &max_gain, - unsigned &max_proc, unsigned &max_step, + inline void process_other_steps_best_move(const unsigned idx, + const unsigned node_step, + const VertexType &node, + const cost_t affinity_current_proc_step, + cost_t &max_gain, + unsigned &max_proc, + unsigned &max_step, const std::vector> &affinity_table_node) const { for (const unsigned p : proc_range.compatible_processors_vertex(node)) { if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.can_move(node, p, node_step + idx - window_size)) + if (not active_schedule.memory_constraint.can_move(node, p, node_step + idx - window_size)) { continue; + } } const cost_t gain = affinity_current_proc_step - affinity_table_node[p][idx]; @@ -229,8 +235,9 @@ class kl_improver : public ImprovementScheduler { } } - template - kl_move compute_best_move(VertexType node, const std::vector> &affinity_table_node, + template + kl_move compute_best_move(VertexType node, + const std::vector> &affinity_table_node, ThreadSearchContext &thread_data) { const unsigned node_step = active_schedule.assigned_superstep(node); const unsigned node_proc = active_schedule.assigned_processor(node); @@ -244,18 +251,20 @@ class kl_improver : public ImprovementScheduler { unsigned idx = thread_data.start_idx(node_step); for (; idx < window_size; idx++) { - process_other_steps_best_move(idx, node_step, node, affinity_current_proc_step, max_gain, max_proc, - max_step, affinity_table_node); + process_other_steps_best_move( + idx, node_step, node, affinity_current_proc_step, max_gain, max_proc, max_step, affinity_table_node); } if constexpr (move_to_same_super_step) { for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - if (proc == node_proc) + if (proc == node_proc) { continue; + } if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.can_move(node, proc, node_step + idx - window_size)) + if (not active_schedule.memory_constraint.can_move(node, proc, node_step + idx - window_size)) { continue; + } } const cost_t gain = affinity_current_proc_step - affinity_table_node[proc][window_size]; @@ -271,14 +280,15 @@ class kl_improver : public ImprovementScheduler { const unsigned bound = thread_data.end_idx(node_step); for (; idx < bound; idx++) { - process_other_steps_best_move(idx, node_step, node, affinity_current_proc_step, max_gain, max_proc, - max_step, affinity_table_node); + process_other_steps_best_move( + idx, node_step, node, affinity_current_proc_step, max_gain, max_proc, max_step, affinity_table_node); } return kl_move(node, max_gain, node_proc, node_step, max_proc, node_step + max_step - window_size); } - kl_gain_update_info update_node_work_affinity_after_move(VertexType node, kl_move move, + kl_gain_update_info update_node_work_affinity_after_move(VertexType node, + kl_move move, const pre_move_work_data &prev_work_data, std::vector> &affinity_table_node) { const unsigned node_step = active_schedule.assigned_superstep(node); @@ -298,30 +308,25 @@ class kl_improver : public ImprovementScheduler { if (node_step == move.from_step) { const unsigned node_proc = active_schedule.assigned_processor(node); const work_weight_t new_max_weight = active_schedule.get_step_max_work(move.from_step); - const work_weight_t new_second_max_weight = - active_schedule.get_step_second_max_work(move.from_step); - const work_weight_t new_step_proc_work = - active_schedule.get_step_processor_work(node_step, node_proc); - const work_weight_t prev_step_proc_work = - (node_proc == move.from_proc) ? new_step_proc_work + graph->vertex_work_weight(move.node) - : (node_proc == move.to_proc) ? new_step_proc_work - graph->vertex_work_weight(move.node) - : new_step_proc_work; - const bool prev_is_sole_max_processor = (prev_work_data.from_step_max_work_processor_count == 1) && - (prev_max_work == prev_step_proc_work); - const cost_t prev_node_proc_affinity = - prev_is_sole_max_processor ? std::min(vertex_weight, prev_max_work - prev_second_max_work) - : 0.0; - const bool new_is_sole_max_processor = - (active_schedule.get_step_max_work_processor_count()[node_step] == 1) && - (new_max_weight == new_step_proc_work); - const cost_t new_node_proc_affinity = - new_is_sole_max_processor ? std::min(vertex_weight, new_max_weight - new_second_max_weight) - : 0.0; + const work_weight_t new_second_max_weight = active_schedule.get_step_second_max_work(move.from_step); + const work_weight_t new_step_proc_work = active_schedule.get_step_processor_work(node_step, node_proc); + const work_weight_t prev_step_proc_work + = (node_proc == move.from_proc) ? new_step_proc_work + graph->vertex_work_weight(move.node) + : (node_proc == move.to_proc) ? new_step_proc_work - graph->vertex_work_weight(move.node) + : new_step_proc_work; + const bool prev_is_sole_max_processor = (prev_work_data.from_step_max_work_processor_count == 1) + && (prev_max_work == prev_step_proc_work); + const cost_t prev_node_proc_affinity + = prev_is_sole_max_processor ? std::min(vertex_weight, prev_max_work - prev_second_max_work) : 0.0; + const bool new_is_sole_max_processor = (active_schedule.get_step_max_work_processor_count()[node_step] == 1) + && (new_max_weight == new_step_proc_work); + const cost_t new_node_proc_affinity + = new_is_sole_max_processor ? std::min(vertex_weight, new_max_weight - new_second_max_weight) : 0.0; const cost_t diff = new_node_proc_affinity - prev_node_proc_affinity; if (std::abs(diff) > EPSILON) { update_info.full_update = true; - affinity_table_node[node_proc][window_size] += diff; // Use the pre-calculated diff + affinity_table_node[node_proc][window_size] += diff; // Use the pre-calculated diff } if ((prev_max_work != new_max_weight) || update_info.full_update) { @@ -332,40 +337,40 @@ class kl_improver : public ImprovementScheduler { continue; } - const work_weight_t new_weight = - vertex_weight + active_schedule.get_step_processor_work(node_step, proc); - const cost_t prev_other_affinity = - compute_same_step_affinity(prev_max_work, new_weight, prev_node_proc_affinity); - const cost_t other_affinity = - compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); + const work_weight_t new_weight + = vertex_weight + active_schedule.get_step_processor_work(node_step, proc); + const cost_t prev_other_affinity + = compute_same_step_affinity(prev_max_work, new_weight, prev_node_proc_affinity); + const cost_t other_affinity + = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); affinity_table_node[proc][window_size] += (other_affinity - prev_other_affinity); } } if (node_proc != move.from_proc && is_compatible(node, move.from_proc)) { - const work_weight_t prev_new_weight = - vertex_weight + active_schedule.get_step_processor_work(node_step, move.from_proc) + - graph->vertex_work_weight(move.node); - const cost_t prev_other_affinity = - compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity); - const work_weight_t new_weight = - vertex_weight + active_schedule.get_step_processor_work(node_step, move.from_proc); - const cost_t other_affinity = - compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); + const work_weight_t prev_new_weight = vertex_weight + + active_schedule.get_step_processor_work(node_step, move.from_proc) + + graph->vertex_work_weight(move.node); + const cost_t prev_other_affinity + = compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity); + const work_weight_t new_weight + = vertex_weight + active_schedule.get_step_processor_work(node_step, move.from_proc); + const cost_t other_affinity + = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); affinity_table_node[move.from_proc][window_size] += (other_affinity - prev_other_affinity); } if (node_proc != move.to_proc && is_compatible(node, move.to_proc)) { - const work_weight_t prev_new_weight = - vertex_weight + active_schedule.get_step_processor_work(node_step, move.to_proc) - - graph->vertex_work_weight(move.node); - const cost_t prev_other_affinity = - compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity); - const work_weight_t new_weight = - vertex_weight + active_schedule.get_step_processor_work(node_step, move.to_proc); - const cost_t other_affinity = - compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); + const work_weight_t prev_new_weight = vertex_weight + + active_schedule.get_step_processor_work(node_step, move.to_proc) + - graph->vertex_work_weight(move.node); + const cost_t prev_other_affinity + = compute_same_step_affinity(prev_max_work, prev_new_weight, prev_node_proc_affinity); + const work_weight_t new_weight + = vertex_weight + active_schedule.get_step_processor_work(node_step, move.to_proc); + const cost_t other_affinity + = compute_same_step_affinity(new_max_weight, new_weight, new_node_proc_affinity); affinity_table_node[move.to_proc][window_size] += (other_affinity - prev_other_affinity); } @@ -376,79 +381,73 @@ class kl_improver : public ImprovementScheduler { update_info.update_entire_from_step = true; // update moving to all procs with special for move.from_proc for (const unsigned proc : proc_range.compatible_processors_vertex(node)) { - const work_weight_t new_weight = - vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc); + const work_weight_t new_weight + = vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc); if (proc == move.from_proc) { - const work_weight_t prev_new_weight = - vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc) + - graph->vertex_work_weight(move.node); - const cost_t prev_affinity = - prev_max_work < prev_new_weight - ? static_cast(prev_new_weight) - static_cast(prev_max_work) - : 0.0; - const cost_t new_affinity = - new_max_weight < new_weight - ? static_cast(new_weight) - static_cast(new_max_weight) - : 0.0; + const work_weight_t prev_new_weight + = vertex_weight + active_schedule.get_step_processor_work(move.from_step, proc) + + graph->vertex_work_weight(move.node); + const cost_t prev_affinity + = prev_max_work < prev_new_weight + ? static_cast(prev_new_weight) - static_cast(prev_max_work) + : 0.0; + const cost_t new_affinity = new_max_weight < new_weight ? static_cast(new_weight) + - static_cast(new_max_weight) + : 0.0; affinity_table_node[proc][idx] += new_affinity - prev_affinity; } else if (proc == move.to_proc) { - const work_weight_t prev_new_weight = - vertex_weight + active_schedule.get_step_processor_work(move.to_step, proc) - - graph->vertex_work_weight(move.node); - const cost_t prev_affinity = - prev_max_work < prev_new_weight - ? static_cast(prev_new_weight) - static_cast(prev_max_work) - : 0.0; - const cost_t new_affinity = - new_max_weight < new_weight - ? static_cast(new_weight) - static_cast(new_max_weight) - : 0.0; + const work_weight_t prev_new_weight + = vertex_weight + active_schedule.get_step_processor_work(move.to_step, proc) + - graph->vertex_work_weight(move.node); + const cost_t prev_affinity + = prev_max_work < prev_new_weight + ? static_cast(prev_new_weight) - static_cast(prev_max_work) + : 0.0; + const cost_t new_affinity = new_max_weight < new_weight ? static_cast(new_weight) + - static_cast(new_max_weight) + : 0.0; affinity_table_node[proc][idx] += new_affinity - prev_affinity; } else { - const cost_t prev_affinity = - prev_max_work < new_weight - ? static_cast(new_weight) - static_cast(prev_max_work) - : 0.0; - const cost_t new_affinity = - new_max_weight < new_weight - ? static_cast(new_weight) - static_cast(new_max_weight) - : 0.0; + const cost_t prev_affinity = prev_max_work < new_weight ? static_cast(new_weight) + - static_cast(prev_max_work) + : 0.0; + const cost_t new_affinity = new_max_weight < new_weight ? static_cast(new_weight) + - static_cast(new_max_weight) + : 0.0; affinity_table_node[proc][idx] += new_affinity - prev_affinity; } } } else { // update only move.from_proc and move.to_proc if (is_compatible(node, move.from_proc)) { - const work_weight_t from_new_weight = - vertex_weight + active_schedule.get_step_processor_work(move.from_step, move.from_proc); - const work_weight_t from_prev_new_weight = - from_new_weight + graph->vertex_work_weight(move.node); - const cost_t from_prev_affinity = - prev_max_work < from_prev_new_weight - ? static_cast(from_prev_new_weight) - static_cast(prev_max_work) - : 0.0; - - const cost_t from_new_affinity = - new_max_weight < from_new_weight - ? static_cast(from_new_weight) - static_cast(new_max_weight) - : 0.0; + const work_weight_t from_new_weight + = vertex_weight + active_schedule.get_step_processor_work(move.from_step, move.from_proc); + const work_weight_t from_prev_new_weight = from_new_weight + graph->vertex_work_weight(move.node); + const cost_t from_prev_affinity + = prev_max_work < from_prev_new_weight + ? static_cast(from_prev_new_weight) - static_cast(prev_max_work) + : 0.0; + + const cost_t from_new_affinity + = new_max_weight < from_new_weight + ? static_cast(from_new_weight) - static_cast(new_max_weight) + : 0.0; affinity_table_node[move.from_proc][idx] += from_new_affinity - from_prev_affinity; } if (is_compatible(node, move.to_proc)) { - const work_weight_t to_new_weight = - vertex_weight + active_schedule.get_step_processor_work(move.to_step, move.to_proc); - const work_weight_t to_prev_new_weight = - to_new_weight - graph->vertex_work_weight(move.node); - const cost_t to_prev_affinity = - prev_max_work < to_prev_new_weight - ? static_cast(to_prev_new_weight) - static_cast(prev_max_work) - : 0.0; - - const cost_t to_new_affinity = - new_max_weight < to_new_weight - ? static_cast(to_new_weight) - static_cast(new_max_weight) - : 0.0; + const work_weight_t to_new_weight + = vertex_weight + active_schedule.get_step_processor_work(move.to_step, move.to_proc); + const work_weight_t to_prev_new_weight = to_new_weight - graph->vertex_work_weight(move.node); + const cost_t to_prev_affinity + = prev_max_work < to_prev_new_weight + ? static_cast(to_prev_new_weight) - static_cast(prev_max_work) + : 0.0; + + const cost_t to_new_affinity + = new_max_weight < to_new_weight + ? static_cast(to_new_weight) - static_cast(new_max_weight) + : 0.0; affinity_table_node[move.to_proc][idx] += to_new_affinity - to_prev_affinity; } } @@ -457,47 +456,82 @@ class kl_improver : public ImprovementScheduler { } else { const unsigned node_proc = active_schedule.assigned_processor(node); - process_work_update_step(node, node_step, node_proc, vertex_weight, move.from_step, move.from_proc, - graph->vertex_work_weight(move.node), prev_work_data.from_step_max_work, + process_work_update_step(node, + node_step, + node_proc, + vertex_weight, + move.from_step, + move.from_proc, + graph->vertex_work_weight(move.node), + prev_work_data.from_step_max_work, prev_work_data.from_step_second_max_work, - prev_work_data.from_step_max_work_processor_count, update_info.update_from_step, - update_info.update_entire_from_step, update_info.full_update, affinity_table_node); - process_work_update_step(node, node_step, node_proc, vertex_weight, move.to_step, move.to_proc, - -graph->vertex_work_weight(move.node), prev_work_data.to_step_max_work, + prev_work_data.from_step_max_work_processor_count, + update_info.update_from_step, + update_info.update_entire_from_step, + update_info.full_update, + affinity_table_node); + process_work_update_step(node, + node_step, + node_proc, + vertex_weight, + move.to_step, + move.to_proc, + -graph->vertex_work_weight(move.node), + prev_work_data.to_step_max_work, prev_work_data.to_step_second_max_work, - prev_work_data.to_step_max_work_processor_count, update_info.update_to_step, - update_info.update_entire_to_step, update_info.full_update, affinity_table_node); + prev_work_data.to_step_max_work_processor_count, + update_info.update_to_step, + update_info.update_entire_to_step, + update_info.full_update, + affinity_table_node); } return update_info; } - void process_work_update_step(VertexType node, unsigned node_step, unsigned node_proc, work_weight_t vertex_weight, - unsigned move_step, unsigned move_proc, work_weight_t move_correction_node_weight, + void process_work_update_step(VertexType node, + unsigned node_step, + unsigned node_proc, + work_weight_t vertex_weight, + unsigned move_step, + unsigned move_proc, + work_weight_t move_correction_node_weight, const work_weight_t prev_move_step_max_work, const work_weight_t prev_move_step_second_max_work, - unsigned prev_move_step_max_work_processor_count, bool &update_step, - bool &update_entire_step, bool &full_update, + unsigned prev_move_step_max_work_processor_count, + bool &update_step, + bool &update_entire_step, + bool &full_update, std::vector> &affinity_table_node); - void update_node_work_affinity(node_selection_container_t &nodes, kl_move move, + void update_node_work_affinity(node_selection_container_t &nodes, + kl_move move, const pre_move_work_data &prev_work_data, std::map &recompute_max_gain); - void update_best_move(VertexType node, unsigned step, unsigned proc, node_selection_container_t &affinity_table, + void update_best_move(VertexType node, + unsigned step, + unsigned proc, + node_selection_container_t &affinity_table, ThreadSearchContext &thread_data); - void update_best_move(VertexType node, unsigned step, node_selection_container_t &affinity_table, + void update_best_move(VertexType node, + unsigned step, + node_selection_container_t &affinity_table, ThreadSearchContext &thread_data); - void update_max_gain(kl_move move, std::map &recompute_max_gain, + void update_max_gain(kl_move move, + std::map &recompute_max_gain, ThreadSearchContext &thread_data); - void compute_work_affinity(VertexType node, std::vector> &affinity_table_node, + void compute_work_affinity(VertexType node, + std::vector> &affinity_table_node, ThreadSearchContext &thread_data); - inline void recompute_node_max_gain(VertexType node, node_selection_container_t &affinity_table, + inline void recompute_node_max_gain(VertexType node, + node_selection_container_t &affinity_table, ThreadSearchContext &thread_data) { const auto best_move = compute_best_move(node, affinity_table[node], thread_data); thread_data.max_gain_heap.update(node, best_move); } - inline cost_t compute_same_step_affinity(const work_weight_t &max_work_for_step, const work_weight_t &new_weight, + inline cost_t compute_same_step_affinity(const work_weight_t &max_work_for_step, + const work_weight_t &new_weight, const cost_t &node_proc_affinity) { const cost_t max_work_after_removal = static_cast(max_work_for_step) - node_proc_affinity; if (new_weight > max_work_after_removal) { @@ -510,10 +544,10 @@ class kl_improver : public ImprovementScheduler { active_schedule.apply_move(move, thread_data.active_schedule_data); comm_cost_f.update_datastructure_after_move(move, thread_data.start_step, thread_data.end_step); cost_t change_in_cost = -move.gain; - change_in_cost += static_cast(thread_data.active_schedule_data.resolved_violations.size()) * - thread_data.reward_penalty_strat.reward; - change_in_cost -= static_cast(thread_data.active_schedule_data.new_violations.size()) * - thread_data.reward_penalty_strat.penalty; + change_in_cost += static_cast(thread_data.active_schedule_data.resolved_violations.size()) + * thread_data.reward_penalty_strat.reward; + change_in_cost -= static_cast(thread_data.active_schedule_data.new_violations.size()) + * thread_data.reward_penalty_strat.penalty; #ifdef KL_DEBUG std::cout << "penalty: " << thread_data.reward_penalty_strat.penalty @@ -531,7 +565,9 @@ class kl_improver : public ImprovementScheduler { return change_in_cost; } - void run_quick_moves(unsigned &inner_iter, ThreadSearchContext &thread_data, const cost_t change_in_cost, + void run_quick_moves(unsigned &inner_iter, + ThreadSearchContext &thread_data, + const cost_t change_in_cost, const VertexType best_move_node) { #ifdef KL_DEBUG std::cout << "Starting quick moves sequence." << std::endl; @@ -552,15 +588,13 @@ class kl_improver : public ImprovementScheduler { } while (quick_moves_stack.size() > 0) { - auto next_node_to_move = quick_moves_stack.back(); quick_moves_stack.pop_back(); thread_data.reward_penalty_strat.init_reward_penalty( static_cast(thread_data.active_schedule_data.current_violations.size()) + 1.0); compute_node_affinities(next_node_to_move, thread_data.local_affinity_table, thread_data); - kl_move best_quick_move = - compute_best_move(next_node_to_move, thread_data.local_affinity_table, thread_data); + kl_move best_quick_move = compute_best_move(next_node_to_move, thread_data.local_affinity_table, thread_data); local_lock.insert(next_node_to_move); if (best_quick_move.gain <= std::numeric_limits::lowest()) { @@ -588,8 +622,9 @@ class kl_improver : public ImprovementScheduler { quick_moves_stack.push_back(key); } - if (abort) + if (abort) { break; + } } else if (thread_data.active_schedule_data.feasible) { break; @@ -597,8 +632,12 @@ class kl_improver : public ImprovementScheduler { } if (!thread_data.active_schedule_data.feasible) { - active_schedule.revert_schedule_to_bound(num_applied_moves, saved_cost, true, comm_cost_f, - thread_data.active_schedule_data, thread_data.start_step, + active_schedule.revert_schedule_to_bound(num_applied_moves, + saved_cost, + true, + comm_cost_f, + thread_data.active_schedule_data, + thread_data.start_step, thread_data.end_step); #ifdef KL_DEBUG std::cout << "Ending quick moves sequence with infeasible solution." << std::endl; @@ -613,17 +652,16 @@ class kl_improver : public ImprovementScheduler { thread_data.affinity_table.trim(); thread_data.max_gain_heap.clear(); thread_data.reward_penalty_strat.init_reward_penalty(1.0); - insert_gain_heap(thread_data); // Re-initialize the heap with the current state + insert_gain_heap(thread_data); // Re-initialize the heap with the current state } void resolve_violations(ThreadSearchContext &thread_data) { auto ¤t_violations = thread_data.active_schedule_data.current_violations; unsigned num_violations = static_cast(current_violations.size()); if (num_violations > 0) { - #ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", Starting preresolving violations with " - << num_violations << " initial violations" << std::endl; + std::cout << "thread " << thread_data.thread_id << ", Starting preresolving violations with " << num_violations + << " initial violations" << std::endl; #endif thread_data.reward_penalty_strat.init_reward_penalty(static_cast(num_violations) + 1.0); std::unordered_set local_lock; @@ -653,18 +691,17 @@ class kl_improver : public ImprovementScheduler { best_move = compute_best_move(node, thread_data.local_affinity_table, thread_data); } else { compute_node_affinities(source_v, thread_data.local_affinity_table, thread_data); - kl_move best_source_v_move = - compute_best_move(source_v, thread_data.local_affinity_table, thread_data); + kl_move best_source_v_move = compute_best_move(source_v, thread_data.local_affinity_table, thread_data); compute_node_affinities(target_v, thread_data.local_affinity_table, thread_data); - kl_move best_target_v_move = - compute_best_move(target_v, thread_data.local_affinity_table, thread_data); + kl_move best_target_v_move = compute_best_move(target_v, thread_data.local_affinity_table, thread_data); best_move = best_target_v_move.gain > best_source_v_move.gain ? std::move(best_target_v_move) : std::move(best_source_v_move); } local_lock.insert(best_move.node); - if (best_move.gain <= std::numeric_limits::lowest()) + if (best_move.gain <= std::numeric_limits::lowest()) { continue; + } apply_move(best_move, thread_data); thread_data.affinity_table.insert(best_move.node); @@ -674,8 +711,9 @@ class kl_improver : public ImprovementScheduler { << " to: " << best_move.to_proc << "|" << best_move.to_step << std::endl; #endif const unsigned new_num_violations = static_cast(current_violations.size()); - if (new_num_violations == 0) + if (new_num_violations == 0) { break; + } if (thread_data.active_schedule_data.new_violations.size() > 0) { for (const auto &vertex_edge_pair : thread_data.active_schedule_data.new_violations) { @@ -689,8 +727,8 @@ class kl_improver : public ImprovementScheduler { update_avg_gain(gain, num_iter++, thread_data.average_gain); #ifdef KL_DEBUG_1 std::cout << "thread " << thread_data.thread_id << ", preresolving violations with " << num_violations - << " violations, " << num_iter << " #iterations, " << thread_data.average_gain - << " average gain" << std::endl; + << " violations, " << num_iter << " #iterations, " << thread_data.average_gain << " average gain" + << std::endl; #endif if (num_iter > min_iter && thread_data.average_gain < 0.0) { break; @@ -701,11 +739,10 @@ class kl_improver : public ImprovementScheduler { } void run_local_search(ThreadSearchContext &thread_data) { - #ifdef KL_DEBUG_1 std::cout << "thread " << thread_data.thread_id - << ", start local search, initial schedule cost: " << thread_data.active_schedule_data.cost - << " with " << thread_data.num_steps() << " supersteps." << std::endl; + << ", start local search, initial schedule cost: " << thread_data.active_schedule_data.cost << " with " + << thread_data.num_steps() << " supersteps." << std::endl; #endif std::vector new_nodes; std::vector unlock_nodes; @@ -751,19 +788,20 @@ class kl_improver : public ImprovementScheduler { if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" - << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; } if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) + if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { std::cout << "memory constraint not satisfied" << std::endl; + } } #endif while (inner_iter < thread_data.max_inner_iterations && thread_data.max_gain_heap.size() > 0) { - kl_move best_move = - get_best_move(thread_data.affinity_table, thread_data.lock_manager, - thread_data.max_gain_heap); // locks best_move.node and removes it from node_selection + kl_move best_move + = get_best_move(thread_data.affinity_table, + thread_data.lock_manager, + thread_data.max_gain_heap); // locks best_move.node and removes it from node_selection if (best_move.gain <= std::numeric_limits::lowest()) { break; } @@ -771,13 +809,12 @@ class kl_improver : public ImprovementScheduler { #ifdef KL_DEBUG std::cout << " >>> move node " << best_move.node << " with gain " << best_move.gain << ", from proc|step: " << best_move.from_proc << "|" << best_move.from_step - << " to: " << best_move.to_proc << "|" << best_move.to_step - << ",avg gain: " << thread_data.average_gain << std::endl; + << " to: " << best_move.to_proc << "|" << best_move.to_step << ",avg gain: " << thread_data.average_gain + << std::endl; #endif if (inner_iter > thread_data.min_inner_iter && thread_data.average_gain < 0.0) { #ifdef KL_DEBUG - std::cout << "Negative average gain: " << thread_data.average_gain << ", end local search" - << std::endl; + std::cout << "Negative average gain: " << thread_data.average_gain << ", end local search" << std::endl; #endif break; } @@ -789,21 +826,20 @@ class kl_improver : public ImprovementScheduler { #endif const auto prev_work_data = active_schedule.get_pre_move_work_data(best_move); - const typename comm_cost_function_t::pre_move_comm_data_t prev_comm_data = - comm_cost_f.get_pre_move_comm_data(best_move); + const typename comm_cost_function_t::pre_move_comm_data_t prev_comm_data + = comm_cost_f.get_pre_move_comm_data(best_move); const cost_t change_in_cost = apply_move(best_move, thread_data); #ifdef KL_DEBUG_COST_CHECK active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > - 0.00001) { + if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" - << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; } if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) + if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { std::cout << "memory constraint not satisfied" << std::endl; + } } #endif if constexpr (enable_quick_moves) { @@ -811,17 +847,16 @@ class kl_improver : public ImprovementScheduler { run_quick_moves(inner_iter, thread_data, change_in_cost, best_move.node); #ifdef KL_DEBUG_COST_CHECK active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > - 0.00001) { + if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout - << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" - << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" + << std::endl; } if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) + if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { std::cout << "memory constraint not satisfied" << std::endl; + } } #endif continue; @@ -835,19 +870,20 @@ class kl_improver : public ImprovementScheduler { violation_removed_count++; if (violation_removed_count > 3) { - if (reset_counter < thread_data.max_no_vioaltions_removed_backtrack && - ((not iter_inital_feasible) || (thread_data.active_schedule_data.cost < - thread_data.active_schedule_data.best_cost))) { + if (reset_counter < thread_data.max_no_vioaltions_removed_backtrack + && ((not iter_inital_feasible) + || (thread_data.active_schedule_data.cost < thread_data.active_schedule_data.best_cost))) { thread_data.affinity_table.reset_node_selection(); thread_data.max_gain_heap.clear(); thread_data.lock_manager.clear(); thread_data.selection_strategy.select_nodes_violations( - thread_data.affinity_table, thread_data.active_schedule_data.current_violations, - thread_data.start_step, thread_data.end_step); + thread_data.affinity_table, + thread_data.active_schedule_data.current_violations, + thread_data.start_step, + thread_data.end_step); #ifdef KL_DEBUG - std::cout - << "Infeasible, and no violations resolved for 5 iterations, reset node selection" - << std::endl; + std::cout << "Infeasible, and no violations resolved for 5 iterations, reset node selection" + << std::endl; #endif thread_data.reward_penalty_strat.init_reward_penalty( static_cast(thread_data.active_schedule_data.current_violations.size())); @@ -874,8 +910,7 @@ class kl_improver : public ImprovementScheduler { } thread_data.affinity_table.trim(); - update_affinities(best_move, thread_data, recompute_max_gain, new_nodes, prev_work_data, - prev_comm_data); + update_affinities(best_move, thread_data, recompute_max_gain, new_nodes, prev_work_data, prev_comm_data); for (const auto v : unlock_nodes) { thread_data.lock_manager.unlock(v); @@ -898,16 +933,15 @@ class kl_improver : public ImprovementScheduler { #endif #ifdef KL_DEBUG_COST_CHECK active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > - 0.00001) { + if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" - << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; } if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) + if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { std::cout << "memory constraint not satisfied" << std::endl; + } } #endif update_max_gain(best_move, recompute_max_gain, thread_data); @@ -921,17 +955,20 @@ class kl_improver : public ImprovementScheduler { #ifdef KL_DEBUG std::cout << "--- end inner loop after " << inner_iter - << " inner iterations, gain heap size: " << thread_data.max_gain_heap.size() - << ", outer iteraion " << outer_iter << "/" << parameters.max_outer_iterations + << " inner iterations, gain heap size: " << thread_data.max_gain_heap.size() << ", outer iteraion " + << outer_iter << "/" << parameters.max_outer_iterations << ", current cost: " << thread_data.active_schedule_data.cost << ", " << (thread_data.active_schedule_data.feasible ? "feasible" : "infeasible") << std::endl; #endif #ifdef KL_DEBUG_1 const unsigned num_steps_tmp = thread_data.end_step; #endif - active_schedule.revert_to_best_schedule(thread_data.local_search_start_step, thread_data.step_to_remove, - comm_cost_f, thread_data.active_schedule_data, - thread_data.start_step, thread_data.end_step); + active_schedule.revert_to_best_schedule(thread_data.local_search_start_step, + thread_data.step_to_remove, + comm_cost_f, + thread_data.active_schedule_data, + thread_data.start_step, + thread_data.end_step); #ifdef KL_DEBUG_1 if (thread_data.local_search_start_step > 0) { if (num_steps_tmp == thread_data.end_step) { @@ -949,12 +986,12 @@ class kl_improver : public ImprovementScheduler { if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" - << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; } if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) + if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { std::cout << "memory constraint not satisfied" << std::endl; + } } #endif @@ -968,8 +1005,7 @@ class kl_improver : public ImprovementScheduler { if (other_threads_finished(thread_data.thread_id)) { #ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", other threads finished, end local search" - << std::endl; + std::cout << "thread " << thread_data.thread_id << ", other threads finished, end local search" << std::endl; #endif break; } @@ -980,8 +1016,7 @@ class kl_improver : public ImprovementScheduler { if (no_improvement_iter_counter >= parameters.max_no_improvement_iterations) { #ifdef KL_DEBUG_1 std::cout << "thread " << thread_data.thread_id << ", no improvement for " - << parameters.max_no_improvement_iterations << " iterations, end local search" - << std::endl; + << parameters.max_no_improvement_iterations << " iterations, end local search" << std::endl; #endif break; } @@ -995,35 +1030,40 @@ class kl_improver : public ImprovementScheduler { #ifdef KL_DEBUG_1 std::cout << "thread " << thread_data.thread_id << ", local search end after " << outer_iter << " outer iterations, current cost: " << thread_data.active_schedule_data.cost << " with " - << thread_data.num_steps() << " supersteps, vs serial cost " - << active_schedule.get_total_work_weight() << "." << std::endl; + << thread_data.num_steps() << " supersteps, vs serial cost " << active_schedule.get_total_work_weight() << "." + << std::endl; #endif thread_finished_vec[thread_data.thread_id] = true; } bool other_threads_finished(const unsigned thread_id) { const size_t num_threads = thread_finished_vec.size(); - if (num_threads == 1) + if (num_threads == 1) { return false; + } for (size_t i = 0; i < num_threads; i++) { - if (i != thread_id && !thread_finished_vec[i]) + if (i != thread_id && !thread_finished_vec[i]) { return false; + } } return true; } - inline void update_affinities(const kl_move &best_move, ThreadSearchContext &thread_data, + inline void update_affinities(const kl_move &best_move, + ThreadSearchContext &thread_data, std::map &recompute_max_gain, std::vector &new_nodes, const pre_move_work_data> &prev_work_data, const typename comm_cost_function_t::pre_move_comm_data_t &prev_comm_data) { - if constexpr (comm_cost_function_t::is_max_comm_cost_function) { comm_cost_f.update_node_comm_affinity( - best_move, thread_data, thread_data.reward_penalty_strat.penalty, - thread_data.reward_penalty_strat.reward, recompute_max_gain, - new_nodes); // this only updated reward/penalty, collects new_nodes, and fills recompute_max_gain + best_move, + thread_data, + thread_data.reward_penalty_strat.penalty, + thread_data.reward_penalty_strat.reward, + recompute_max_gain, + new_nodes); // this only updated reward/penalty, collects new_nodes, and fills recompute_max_gain // Add nodes from affected steps to new_nodes // { @@ -1073,29 +1113,28 @@ class kl_improver : public ImprovementScheduler { const auto current_max = active_schedule.get_step_max_work(best_move.from_step); const auto current_second_max = active_schedule.get_step_second_max_work(best_move.from_step); const auto current_count = active_schedule.get_step_max_work_processor_count()[best_move.from_step]; - if (current_max != prev_work_data.from_step_max_work || - current_second_max != prev_work_data.from_step_second_max_work || - current_count != prev_work_data.from_step_max_work_processor_count) { + if (current_max != prev_work_data.from_step_max_work + || current_second_max != prev_work_data.from_step_second_max_work + || current_count != prev_work_data.from_step_max_work_processor_count) { changed_steps.insert(best_move.from_step); } } else { // Different steps - check both const auto current_from_max = active_schedule.get_step_max_work(best_move.from_step); const auto current_from_second_max = active_schedule.get_step_second_max_work(best_move.from_step); - const auto current_from_count = - active_schedule.get_step_max_work_processor_count()[best_move.from_step]; - if (current_from_max != prev_work_data.from_step_max_work || - current_from_second_max != prev_work_data.from_step_second_max_work || - current_from_count != prev_work_data.from_step_max_work_processor_count) { + const auto current_from_count = active_schedule.get_step_max_work_processor_count()[best_move.from_step]; + if (current_from_max != prev_work_data.from_step_max_work + || current_from_second_max != prev_work_data.from_step_second_max_work + || current_from_count != prev_work_data.from_step_max_work_processor_count) { changed_steps.insert(best_move.from_step); } const auto current_to_max = active_schedule.get_step_max_work(best_move.to_step); const auto current_to_second_max = active_schedule.get_step_second_max_work(best_move.to_step); const auto current_to_count = active_schedule.get_step_max_work_processor_count()[best_move.to_step]; - if (current_to_max != prev_work_data.to_step_max_work || - current_to_second_max != prev_work_data.to_step_second_max_work || - current_to_count != prev_work_data.to_step_max_work_processor_count) { + if (current_to_max != prev_work_data.to_step_max_work + || current_to_second_max != prev_work_data.to_step_second_max_work + || current_to_count != prev_work_data.to_step_max_work_processor_count) { changed_steps.insert(best_move.to_step); } } @@ -1107,8 +1146,8 @@ class kl_improver : public ImprovementScheduler { const auto current_second_max = comm_cost_f.comm_ds.step_second_max_comm(step); const auto current_count = comm_cost_f.comm_ds.step_max_comm_count(step); - if (current_max != step_info.max_comm || current_second_max != step_info.second_max_comm || - current_count != step_info.max_comm_count) { + if (current_max != step_info.max_comm || current_second_max != step_info.second_max_comm + || current_count != step_info.max_comm_count) { changed_steps.insert(step); } } @@ -1152,14 +1191,16 @@ class kl_improver : public ImprovementScheduler { } } else { update_node_work_affinity(thread_data.affinity_table, best_move, prev_work_data, recompute_max_gain); - comm_cost_f.update_node_comm_affinity(best_move, thread_data, thread_data.reward_penalty_strat.penalty, - thread_data.reward_penalty_strat.reward, recompute_max_gain, + comm_cost_f.update_node_comm_affinity(best_move, + thread_data, + thread_data.reward_penalty_strat.penalty, + thread_data.reward_penalty_strat.reward, + recompute_max_gain, new_nodes); } } - inline bool blocked_edge_strategy(VertexType node, std::vector &unlock_nodes, - ThreadSearchContext &thread_data) { + inline bool blocked_edge_strategy(VertexType node, std::vector &unlock_nodes, ThreadSearchContext &thread_data) { if (thread_data.unlock_edge_backtrack_counter > 1) { for (const auto vertex_edge_pair : thread_data.active_schedule_data.new_violations) { const auto &e = vertex_edge_pair.second; @@ -1173,8 +1214,8 @@ class kl_improver : public ImprovementScheduler { } } #ifdef KL_DEBUG - std::cout << "Nodes of violated edge locked, backtrack counter: " - << thread_data.unlock_edge_backtrack_counter << std::endl; + std::cout << "Nodes of violated edge locked, backtrack counter: " << thread_data.unlock_edge_backtrack_counter + << std::endl; #endif thread_data.unlock_edge_backtrack_counter--; return true; @@ -1182,28 +1223,26 @@ class kl_improver : public ImprovementScheduler { #ifdef KL_DEBUG std::cout << "Nodes of violated edge locked, end local search" << std::endl; #endif - return false; // or reset local search and initalize with violating nodes + return false; // or reset local search and initalize with violating nodes } } - inline void adjust_local_search_parameters(unsigned outer_iter, unsigned no_imp_counter, - ThreadSearchContext &thread_data) { - if (no_imp_counter >= thread_data.no_improvement_iterations_reduce_penalty && - thread_data.reward_penalty_strat.initial_penalty > 1.0) { - thread_data.reward_penalty_strat.initial_penalty = - static_cast(std::floor(std::sqrt(thread_data.reward_penalty_strat.initial_penalty))); + inline void adjust_local_search_parameters(unsigned outer_iter, unsigned no_imp_counter, ThreadSearchContext &thread_data) { + if (no_imp_counter >= thread_data.no_improvement_iterations_reduce_penalty + && thread_data.reward_penalty_strat.initial_penalty > 1.0) { + thread_data.reward_penalty_strat.initial_penalty + = static_cast(std::floor(std::sqrt(thread_data.reward_penalty_strat.initial_penalty))); thread_data.unlock_edge_backtrack_counter_reset += 1; thread_data.no_improvement_iterations_reduce_penalty += 15; #ifdef KL_DEBUG_1 std::cout << "thread " << thread_data.thread_id << ", no improvement for " - << thread_data.no_improvement_iterations_reduce_penalty - << " iterations, reducing initial penalty to " << thread_data.reward_penalty_strat.initial_penalty - << std::endl; + << thread_data.no_improvement_iterations_reduce_penalty << " iterations, reducing initial penalty to " + << thread_data.reward_penalty_strat.initial_penalty << std::endl; #endif } - if (parameters.try_remove_step_after_num_outer_iterations > 0 && - ((outer_iter + 1) % parameters.try_remove_step_after_num_outer_iterations) == 0) { + if (parameters.try_remove_step_after_num_outer_iterations > 0 + && ((outer_iter + 1) % parameters.try_remove_step_after_num_outer_iterations) == 0) { thread_data.step_selection_epoch_counter = 0; ; #ifdef KL_DEBUG @@ -1216,8 +1255,8 @@ class kl_improver : public ImprovementScheduler { thread_data.no_improvement_iterations_increase_inner_iter += 20; #ifdef KL_DEBUG_1 std::cout << "thread " << thread_data.thread_id << ", no improvement for " - << thread_data.no_improvement_iterations_increase_inner_iter - << " iterations, increasing min inner iter to " << thread_data.min_inner_iter << std::endl; + << thread_data.no_improvement_iterations_increase_inner_iter << " iterations, increasing min inner iter to " + << thread_data.min_inner_iter << std::endl; #endif } } @@ -1230,14 +1269,19 @@ class kl_improver : public ImprovementScheduler { void cleanup_datastructures(); void update_avg_gain(const cost_t gain, const unsigned num_iter, double &average_gain); void insert_gain_heap(ThreadSearchContext &thread_data); - void insert_new_nodes_gain_heap(std::vector &new_nodes, node_selection_container_t &nodes, + void insert_new_nodes_gain_heap(std::vector &new_nodes, + node_selection_container_t &nodes, ThreadSearchContext &thread_data); - inline void compute_node_affinities(VertexType node, std::vector> &affinity_table_node, + inline void compute_node_affinities(VertexType node, + std::vector> &affinity_table_node, ThreadSearchContext &thread_data) { compute_work_affinity(node, affinity_table_node, thread_data); - comm_cost_f.compute_comm_affinity(node, affinity_table_node, thread_data.reward_penalty_strat.penalty, - thread_data.reward_penalty_strat.reward, thread_data.start_step, + comm_cost_f.compute_comm_affinity(node, + affinity_table_node, + thread_data.reward_penalty_strat.penalty, + thread_data.reward_penalty_strat.reward, + thread_data.start_step, thread_data.end_step); } @@ -1245,8 +1289,7 @@ class kl_improver : public ImprovementScheduler { if (select_nodes_check_remove_superstep(thread_data.step_to_remove, thread_data)) { active_schedule.swap_empty_step_fwd(thread_data.step_to_remove, thread_data.end_step); thread_data.end_step--; - thread_data.local_search_start_step = - static_cast(thread_data.active_schedule_data.applied_moves.size()); + thread_data.local_search_start_step = static_cast(thread_data.active_schedule_data.applied_moves.size()); thread_data.active_schedule_data.update_cost(static_cast(-1.0 * instance->synchronisationCosts())); if constexpr (enable_preresolving_violations) { @@ -1254,27 +1297,30 @@ class kl_improver : public ImprovementScheduler { } if (thread_data.active_schedule_data.current_violations.size() > parameters.initial_violation_threshold) { - active_schedule.revert_to_best_schedule(thread_data.local_search_start_step, thread_data.step_to_remove, - comm_cost_f, thread_data.active_schedule_data, - thread_data.start_step, thread_data.end_step); + active_schedule.revert_to_best_schedule(thread_data.local_search_start_step, + thread_data.step_to_remove, + comm_cost_f, + thread_data.active_schedule_data, + thread_data.start_step, + thread_data.end_step); } else { - thread_data.unlock_edge_backtrack_counter = - static_cast(thread_data.active_schedule_data.current_violations.size()); - thread_data.max_inner_iterations = - std::max(thread_data.unlock_edge_backtrack_counter * 5u, parameters.max_inner_iterations_reset); - thread_data.max_no_vioaltions_removed_backtrack = - parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset; + thread_data.unlock_edge_backtrack_counter + = static_cast(thread_data.active_schedule_data.current_violations.size()); + thread_data.max_inner_iterations + = std::max(thread_data.unlock_edge_backtrack_counter * 5u, parameters.max_inner_iterations_reset); + thread_data.max_no_vioaltions_removed_backtrack + = parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset; #ifdef KL_DEBUG_1 - std::cout << "thread " << thread_data.thread_id << ", Trying to remove step " - << thread_data.step_to_remove << std::endl; + std::cout << "thread " << thread_data.thread_id << ", Trying to remove step " << thread_data.step_to_remove + << std::endl; #endif return; } } // thread_data.step_to_remove = thread_data.start_step; thread_data.local_search_start_step = 0; - thread_data.selection_strategy.select_active_nodes(thread_data.affinity_table, thread_data.start_step, - thread_data.end_step); + thread_data.selection_strategy.select_active_nodes( + thread_data.affinity_table, thread_data.start_step, thread_data.end_step); } bool check_remove_superstep(unsigned step); @@ -1289,7 +1335,6 @@ class kl_improver : public ImprovementScheduler { active_schedule.getSetSchedule().step_processor_vertices[step][proc].begin(), active_schedule.getSetSchedule().step_processor_vertices[step][proc].end()); for (const auto &node : step_proc_node_vec) { - thread_data.reward_penalty_strat.init_reward_penalty( static_cast(thread_data.active_schedule_data.current_violations.size()) + 1.0); compute_node_affinities(node, thread_data.local_affinity_table, thread_data); @@ -1301,8 +1346,8 @@ class kl_improver : public ImprovementScheduler { } apply_move(best_move, thread_data); - if (thread_data.active_schedule_data.current_violations.size() > - parameters.abort_scatter_nodes_violation_threshold) { + if (thread_data.active_schedule_data.current_violations.size() + > parameters.abort_scatter_nodes_violation_threshold) { abort = true; break; } @@ -1311,7 +1356,6 @@ class kl_improver : public ImprovementScheduler { // thread_data.selection_strategy.add_neighbours_to_selection(node, thread_data.affinity_table, // thread_data.start_step, thread_data.end_step); if (thread_data.active_schedule_data.new_violations.size() > 0) { - for (const auto &vertex_edge_pair : thread_data.active_schedule_data.new_violations) { const auto &vertex = vertex_edge_pair.first; thread_data.affinity_table.insert(vertex); @@ -1326,16 +1370,15 @@ class kl_improver : public ImprovementScheduler { #ifdef KL_DEBUG_COST_CHECK active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); - if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > - 0.00001) { + if (std::abs(comm_cost_f.compute_schedule_cost_test() - thread_data.active_schedule_data.cost) > 0.00001) { std::cout << "computed cost: " << comm_cost_f.compute_schedule_cost_test() << ", current cost: " << thread_data.active_schedule_data.cost << std::endl; - std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" - << std::endl; + std::cout << ">>>>>>>>>>>>>>>>>>>>>> compute cost not equal to new cost <<<<<<<<<<<<<<<<<<<<" << std::endl; } if constexpr (active_schedule_t::use_memory_constraint) { - if (not active_schedule.memory_constraint.satisfied_memory_constraint()) + if (not active_schedule.memory_constraint.satisfied_memory_constraint()) { std::cout << "memory constraint not satisfied" << std::endl; + } } #endif } @@ -1346,8 +1389,8 @@ class kl_improver : public ImprovementScheduler { } if (abort) { - active_schedule.revert_to_best_schedule(0, 0, comm_cost_f, thread_data.active_schedule_data, - thread_data.start_step, thread_data.end_step); + active_schedule.revert_to_best_schedule( + 0, 0, comm_cost_f, thread_data.active_schedule_data, thread_data.start_step, thread_data.end_step); thread_data.affinity_table.reset_node_selection(); return false; } @@ -1355,7 +1398,7 @@ class kl_improver : public ImprovementScheduler { } void synchronize_active_schedule(const unsigned num_threads) { - if (num_threads == 1) { // single thread case + if (num_threads == 1) { // single thread case active_schedule.set_cost(thread_data_vec[0].active_schedule_data.cost); active_schedule.getVectorSchedule().number_of_supersteps = thread_data_vec[0].num_steps(); return; @@ -1389,8 +1432,9 @@ class kl_improver : public ImprovementScheduler { virtual ~kl_improver() = default; virtual RETURN_STATUS improveSchedule(BspSchedule &schedule) override { - if (schedule.getInstance().numberOfProcessors() < 2) + if (schedule.getInstance().numberOfProcessors() < 2) { return RETURN_STATUS::BEST_FOUND; + } const unsigned num_threads = 1; @@ -1428,6 +1472,7 @@ class kl_improver : public ImprovementScheduler { } virtual void setTimeQualityParameter(const double time_quality) { this->parameters.time_quality = time_quality; } + virtual void setSuperstepRemoveStrengthParameter(const double superstep_remove_strength) { this->parameters.superstep_remove_strength = superstep_remove_strength; } @@ -1435,15 +1480,14 @@ class kl_improver : public ImprovementScheduler { virtual std::string getScheduleName() const { return "kl_improver_" + comm_cost_f.name(); } }; -template +template void kl_improver::set_parameters( vertex_idx_t num_nodes) { const unsigned log_num_nodes = (num_nodes > 1) ? static_cast(std::log(num_nodes)) : 1; // Total number of outer iterations. Proportional to sqrt N. - parameters.max_outer_iterations = - static_cast(std::sqrt(num_nodes) * (parameters.time_quality * 10.0) / parameters.num_parallel_loops); + parameters.max_outer_iterations + = static_cast(std::sqrt(num_nodes) * (parameters.time_quality * 10.0) / parameters.num_parallel_loops); // Number of times to reset the search for violations before giving up. parameters.max_no_vioaltions_removed_backtrack_reset = parameters.time_quality < 0.75 ? 1 @@ -1451,19 +1495,17 @@ void kl_improver(parameters.superstep_remove_strength * 7); + parameters.max_no_vioaltions_removed_backtrack_for_remove_step_reset + = 3 + static_cast(parameters.superstep_remove_strength * 7); parameters.node_max_step_selection_epochs = parameters.superstep_remove_strength < 0.75 ? 1 : parameters.superstep_remove_strength < 1.0 ? 2 : 3; parameters.remove_step_epocs = static_cast(parameters.superstep_remove_strength * 4.0); - parameters.min_inner_iter_reset = - static_cast(log_num_nodes + log_num_nodes * (1.0 + parameters.time_quality)); + parameters.min_inner_iter_reset = static_cast(log_num_nodes + log_num_nodes * (1.0 + parameters.time_quality)); if (parameters.remove_step_epocs > 0) { - parameters.try_remove_step_after_num_outer_iterations = - parameters.max_outer_iterations / parameters.remove_step_epocs; + parameters.try_remove_step_after_num_outer_iterations = parameters.max_outer_iterations / parameters.remove_step_epocs; } else { // Effectively disable superstep removal if remove_step_epocs is 0. parameters.try_remove_step_after_num_outer_iterations = parameters.max_outer_iterations + 1; @@ -1473,16 +1515,16 @@ void kl_improver(std::ceil(parameters.time_quality * 10 * log_num_nodes + log_num_nodes)); + thread.selection_strategy.selection_threshold + = static_cast(std::ceil(parameters.time_quality * 10 * log_num_nodes + log_num_nodes)); } #ifdef KL_DEBUG_1 std::cout << "kl set parameter, number of nodes: " << num_nodes << std::endl; std::cout << "max outer iterations: " << parameters.max_outer_iterations << std::endl; std::cout << "max inner iterations: " << parameters.max_inner_iterations_reset << std::endl; - std::cout << "no improvement iterations reduce penalty: " - << thread_data_vec[0].no_improvement_iterations_reduce_penalty << std::endl; + std::cout << "no improvement iterations reduce penalty: " << thread_data_vec[0].no_improvement_iterations_reduce_penalty + << std::endl; std::cout << "selction threshold: " << thread_data_vec[0].selection_strategy.selection_threshold << std::endl; std::cout << "remove step epocs: " << parameters.remove_step_epocs << std::endl; std::cout << "try remove step after num outer iterations: " << parameters.try_remove_step_after_num_outer_iterations @@ -1491,26 +1533,25 @@ void kl_improver +template void kl_improver::update_node_work_affinity( - node_selection_container_t &nodes, kl_move move, const pre_move_work_data &prev_work_data, + node_selection_container_t &nodes, + kl_move move, + const pre_move_work_data &prev_work_data, std::map &recompute_max_gain) { const size_t active_count = nodes.size(); for (size_t i = 0; i < active_count; ++i) { const VertexType node = nodes.get_selected_nodes()[i]; - kl_gain_update_info update_info = - update_node_work_affinity_after_move(node, move, prev_work_data, nodes.at(node)); + kl_gain_update_info update_info = update_node_work_affinity_after_move(node, move, prev_work_data, nodes.at(node)); if (update_info.update_from_step || update_info.update_to_step) { recompute_max_gain[node] = update_info; } } } -template +template void kl_improver::update_max_gain( kl_move move, std::map &recompute_max_gain, ThreadSearchContext &thread_data) { for (auto &pair : recompute_max_gain) { @@ -1534,8 +1575,7 @@ void kl_improver +template void kl_improver::compute_work_affinity( VertexType node, std::vector> &affinity_table_node, ThreadSearchContext &thread_data) { const unsigned node_step = active_schedule.assigned_superstep(node); @@ -1558,33 +1598,39 @@ void kl_improver +template void kl_improver::process_work_update_step( - VertexType node, unsigned node_step, unsigned node_proc, work_weight_t vertex_weight, unsigned move_step, - unsigned move_proc, work_weight_t move_correction_node_weight, const work_weight_t prev_move_step_max_work, - const work_weight_t prev_move_step_second_max_work, unsigned prev_move_step_max_work_processor_count, - bool &update_step, bool &update_entire_step, bool &full_update, + VertexType node, + unsigned node_step, + unsigned node_proc, + work_weight_t vertex_weight, + unsigned move_step, + unsigned move_proc, + work_weight_t move_correction_node_weight, + const work_weight_t prev_move_step_max_work, + const work_weight_t prev_move_step_second_max_work, + unsigned prev_move_step_max_work_processor_count, + bool &update_step, + bool &update_entire_step, + bool &full_update, std::vector> &affinity_table_node) { const unsigned lower_bound = move_step > window_size ? move_step - window_size : 0; if (lower_bound <= node_step && node_step <= move_step + window_size) { @@ -1594,20 +1640,18 @@ void kl_improver EPSILON; @@ -1620,30 +1664,26 @@ void kl_improver(new_weight) - static_cast(prev_move_step_max_work) - : 0.0; - const cost_t new_affinity = - new_max_weight < new_weight - ? static_cast(new_weight) - static_cast(new_max_weight) - : 0.0; + const cost_t prev_affinity + = prev_move_step_max_work < new_weight + ? static_cast(new_weight) - static_cast(prev_move_step_max_work) + : 0.0; + const cost_t new_affinity = new_max_weight < new_weight + ? static_cast(new_weight) - static_cast(new_max_weight) + : 0.0; affinity_table_node[proc][idx] += new_affinity - prev_affinity; } else { - const work_weight_t prev_new_weight = vertex_weight + - active_schedule.get_step_processor_work(move_step, proc) + - move_correction_node_weight; - const cost_t prev_affinity = - prev_move_step_max_work < prev_new_weight - ? static_cast(prev_new_weight) - static_cast(prev_move_step_max_work) - : 0.0; - - const cost_t new_affinity = - new_max_weight < new_weight - ? static_cast(new_weight) - static_cast(new_max_weight) - : 0.0; + const work_weight_t prev_new_weight = vertex_weight + + active_schedule.get_step_processor_work(move_step, proc) + + move_correction_node_weight; + const cost_t prev_affinity + = prev_move_step_max_work < prev_new_weight + ? static_cast(prev_new_weight) - static_cast(prev_move_step_max_work) + : 0.0; + + const cost_t new_affinity = new_max_weight < new_weight + ? static_cast(new_weight) - static_cast(new_max_weight) + : 0.0; affinity_table_node[proc][idx] += new_affinity - prev_affinity; } } } else { // update only move_proc if (is_compatible(node, move_proc)) { - const work_weight_t new_weight = - vertex_weight + active_schedule.get_step_processor_work(move_step, move_proc); + const work_weight_t new_weight = vertex_weight + active_schedule.get_step_processor_work(move_step, move_proc); const work_weight_t prev_new_weight = new_weight + move_correction_node_weight; - const cost_t prev_affinity = - prev_move_step_max_work < prev_new_weight - ? static_cast(prev_new_weight) - static_cast(prev_move_step_max_work) - : 0.0; - - const cost_t new_affinity = new_max_weight < new_weight ? static_cast(new_weight) - - static_cast(new_max_weight) - : 0.0; + const cost_t prev_affinity + = prev_move_step_max_work < prev_new_weight + ? static_cast(prev_new_weight) - static_cast(prev_move_step_max_work) + : 0.0; + + const cost_t new_affinity = new_max_weight < new_weight + ? static_cast(new_weight) - static_cast(new_max_weight) + : 0.0; affinity_table_node[move_proc][idx] += new_affinity - prev_affinity; } } @@ -1707,18 +1742,14 @@ void kl_improver -bool kl_improver::select_nodes_check_remove_superstep(unsigned &step_to_remove, - ThreadSearchContext &thread_data) { - if (thread_data.step_selection_epoch_counter >= parameters.node_max_step_selection_epochs || - thread_data.num_steps() < 3) { +template +bool kl_improver::select_nodes_check_remove_superstep( + unsigned &step_to_remove, ThreadSearchContext &thread_data) { + if (thread_data.step_selection_epoch_counter >= parameters.node_max_step_selection_epochs || thread_data.num_steps() < 3) { return false; } - for (step_to_remove = thread_data.step_selection_counter; step_to_remove <= thread_data.end_step; - step_to_remove++) { + for (step_to_remove = thread_data.step_selection_counter; step_to_remove <= thread_data.end_step; step_to_remove++) { assert(step_to_remove >= thread_data.start_step && step_to_remove <= thread_data.end_step); #ifdef KL_DEBUG std::cout << "Checking to remove step " << step_to_remove << "/" << thread_data.end_step << std::endl; @@ -1745,21 +1776,20 @@ bool kl_improver -bool kl_improver::check_remove_superstep( - unsigned step) { - if (active_schedule.num_steps() < 2) +template +bool kl_improver::check_remove_superstep(unsigned step) { + if (active_schedule.num_steps() < 2) { return false; + } - if (active_schedule.get_step_max_work(step) < instance->synchronisationCosts()) + if (active_schedule.get_step_max_work(step) < instance->synchronisationCosts()) { return true; + } return false; } -template +template void kl_improver::reset_inner_search_structures( ThreadSearchContext &thread_data) const { thread_data.unlock_edge_backtrack_counter = thread_data.unlock_edge_backtrack_counter_reset; @@ -1771,8 +1801,7 @@ void kl_improver +template bool kl_improver::is_local_search_blocked( ThreadSearchContext &thread_data) { for (const auto &pair : thread_data.active_schedule_data.new_violations) { @@ -1783,8 +1812,7 @@ bool kl_improver +template void kl_improver::initialize_datastructures( BspSchedule &schedule) { input_schedule = &schedule; @@ -1801,8 +1829,8 @@ void kl_improvernum_vertices()); - t_data.reward_penalty_strat.initialize(active_schedule, comm_cost_f.get_max_comm_weight_multiplied(), - active_schedule.get_max_work_weight()); + t_data.reward_penalty_strat.initialize( + active_schedule, comm_cost_f.get_max_comm_weight_multiplied(), active_schedule.get_max_work_weight()); t_data.selection_strategy.initialize(active_schedule, gen, t_data.start_step, t_data.end_step); t_data.local_affinity_table.resize(instance->numberOfProcessors()); @@ -1812,15 +1840,14 @@ void kl_improver -void kl_improver::update_avg_gain( - const cost_t gain, const unsigned num_iter, double &average_gain) { +template +void kl_improver::update_avg_gain(const cost_t gain, + const unsigned num_iter, + double &average_gain) { average_gain = static_cast((average_gain * num_iter + gain)) / (num_iter + 1.0); } -template +template void kl_improver::insert_gain_heap( ThreadSearchContext &thread_data) { const size_t active_count = thread_data.affinity_table.size(); @@ -1833,8 +1860,7 @@ void kl_improver +template void kl_improver::insert_new_nodes_gain_heap( std::vector &new_nodes, node_selection_container_t &nodes, ThreadSearchContext &thread_data) { for (const auto &node : new_nodes) { @@ -1845,23 +1871,20 @@ void kl_improver +template void kl_improver::cleanup_datastructures() { thread_data_vec.clear(); active_schedule.clear(); } -template +template void kl_improver::print_heap( heap_datastructure &max_gain_heap) const { - if (max_gain_heap.is_empty()) { std::cout << "heap is empty" << std::endl; return; } - heap_datastructure temp_heap = max_gain_heap; // requires copy constructor + heap_datastructure temp_heap = max_gain_heap; // requires copy constructor std::cout << "heap current size: " << temp_heap.size() << std::endl; const auto &top_val = temp_heap.get_value(temp_heap.top()); @@ -1870,22 +1893,21 @@ void kl_improver +template void kl_improver::update_best_move( - VertexType node, unsigned step, unsigned proc, node_selection_container_t &affinity_table, - ThreadSearchContext &thread_data) { + VertexType node, unsigned step, unsigned proc, node_selection_container_t &affinity_table, ThreadSearchContext &thread_data) { const unsigned node_proc = active_schedule.assigned_processor(node); const unsigned node_step = active_schedule.assigned_superstep(node); - if ((node_proc == proc) && (node_step == step)) + if ((node_proc == proc) && (node_step == step)) { return; + } kl_move node_move = thread_data.max_gain_heap.get_value(node); cost_t max_gain = node_move.gain; @@ -1897,8 +1919,9 @@ void kl_improver +template void kl_improver::update_best_move( VertexType node, unsigned step, node_selection_container_t &affinity_table, ThreadSearchContext &thread_data) { - const unsigned node_proc = active_schedule.assigned_processor(node); const unsigned node_step = active_schedule.assigned_superstep(node); @@ -1939,8 +1960,9 @@ void kl_improver max_gain) { @@ -1951,14 +1973,15 @@ void kl_improver max_gain) { max_gain = gain; max_proc = proc; @@ -1977,4 +2000,4 @@ void kl_improver + #include "kl_improver.hpp" namespace osp { - - -template +template class kl_improver_mt : public kl_improver { - protected: - unsigned max_num_threads = std::numeric_limits::max(); void set_thread_boundaries(const unsigned num_threads, const unsigned num_steps, bool last_thread_large_range) { - if (num_threads == 1) { this->set_start_step(0, this->thread_data_vec[0]); this->thread_data_vec[0].end_step = (num_steps > 0) ? num_steps - 1 : 0; @@ -62,7 +61,8 @@ class kl_improver_mt : public kl_improverthread_data_vec[i].original_end_step = this->thread_data_vec[i].end_step; current_start_step = end_step + 1 + this->parameters.thread_range_gap; #ifdef KL_DEBUG_1 - std::cout << "thread " << i << ": start_step=" << this->thread_data_vec[i].start_step << ", end_step=" << this->thread_data_vec[i].end_step << std::endl; + std::cout << "thread " << i << ": start_step=" << this->thread_data_vec[i].start_step + << ", end_step=" << this->thread_data_vec[i].end_step << std::endl; #endif } } @@ -91,59 +91,60 @@ class kl_improver_mt : public kl_improver() {} - explicit kl_improver_mt(unsigned seed) : kl_improver(seed) {} + + explicit kl_improver_mt(unsigned seed) + : kl_improver(seed) {} + virtual ~kl_improver_mt() = default; - void set_max_num_threads(const unsigned num_threads) { - max_num_threads = num_threads; - } + void set_max_num_threads(const unsigned num_threads) { max_num_threads = num_threads; } virtual RETURN_STATUS improveSchedule(BspSchedule &schedule) override { - if (schedule.getInstance().numberOfProcessors() < 2) + if (schedule.getInstance().numberOfProcessors() < 2) { return RETURN_STATUS::BEST_FOUND; + } unsigned num_threads = std::min(max_num_threads, static_cast(omp_get_max_threads())); set_num_threads(num_threads, schedule.numberOfSupersteps()); - this->thread_data_vec.resize(num_threads); + this->thread_data_vec.resize(num_threads); this->thread_finished_vec.assign(num_threads, true); if (num_threads == 1) { - this->parameters.num_parallel_loops = 1; // no parallelization with one thread. Affects parameters.max_out_iteration calculation in set_parameters() + this->parameters.num_parallel_loops + = 1; // no parallelization with one thread. Affects parameters.max_out_iteration calculation in set_parameters() } this->set_parameters(schedule.getInstance().numberOfVertices()); - this->initialize_datastructures(schedule); + this->initialize_datastructures(schedule); const cost_t initial_cost = this->active_schedule.get_cost(); for (size_t i = 0; i < this->parameters.num_parallel_loops; ++i) { - set_thread_boundaries(num_threads, schedule.numberOfSupersteps(), i % 2 == 0); + set_thread_boundaries(num_threads, schedule.numberOfSupersteps(), i % 2 == 0); - #pragma omp parallel num_threads(num_threads) +#pragma omp parallel num_threads(num_threads) { const size_t thread_id = static_cast(omp_get_thread_num()); - auto & thread_data = this->thread_data_vec[thread_id]; + auto &thread_data = this->thread_data_vec[thread_id]; thread_data.active_schedule_data.initialize_cost(this->active_schedule.get_cost()); thread_data.selection_strategy.setup(thread_data.start_step, thread_data.end_step); - this->run_local_search(thread_data); + this->run_local_search(thread_data); } - + this->synchronize_active_schedule(num_threads); if (num_threads > 1) { this->active_schedule.set_cost(this->comm_cost_f.compute_schedule_cost()); set_num_threads(num_threads, schedule.numberOfSupersteps()); this->thread_finished_vec.resize(num_threads); } - } + } if (initial_cost > this->active_schedule.get_cost()) { this->active_schedule.write_schedule(schedule); @@ -156,4 +157,4 @@ class kl_improver_mt : public kl_improver +template class kl_improver_test : public kl_improver { - using VertexType = vertex_idx_t; using kl_move = kl_move_struct; using heap_datastructure = MaxPairingHeap; @@ -64,9 +65,7 @@ class kl_improver_test : public kl_improverthread_data_vec[0].active_schedule_data.feasible; } - void compute_violations_test() { - this->active_schedule.compute_violations(this->thread_data_vec[0].active_schedule_data); - } + void compute_violations_test() { this->active_schedule.compute_violations(this->thread_data_vec[0].active_schedule_data); } node_selection_container_t &insert_gain_heap_test(const std::vector &n) { this->thread_data_vec[0].reward_penalty_strat.penalty = 0.0; @@ -118,25 +117,23 @@ class kl_improver_test : public kl_improverapply_move(best_move, this->thread_data_vec[0]); this->thread_data_vec[0].affinity_table.trim(); - this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data, - prev_comm_data); + this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data, prev_comm_data); } auto run_inner_iteration_test() { - std::map recompute_max_gain; std::vector new_nodes; this->print_heap(this->thread_data_vec[0].max_gain_heap); kl_move best_move = this->get_best_move( - this->thread_data_vec[0].affinity_table, this->thread_data_vec[0].lock_manager, - this->thread_data_vec[0].max_gain_heap); // locks best_move.node and removes it from node_selection + this->thread_data_vec[0].affinity_table, + this->thread_data_vec[0].lock_manager, + this->thread_data_vec[0].max_gain_heap); // locks best_move.node and removes it from node_selection #ifdef KL_DEBUG - std::cout << "Best move: " << best_move.node << " gain: " << best_move.gain << ", from: " << best_move.from_step - << "|" << best_move.from_proc << " to: " << best_move.to_step << "|" << best_move.to_proc - << std::endl; + std::cout << "Best move: " << best_move.node << " gain: " << best_move.gain << ", from: " << best_move.from_step << "|" + << best_move.from_proc << " to: " << best_move.to_step << "|" << best_move.to_proc << std::endl; #endif const auto prev_work_data = this->active_schedule.get_pre_move_work_data(best_move); @@ -144,8 +141,7 @@ class kl_improver_test : public kl_improverapply_move(best_move, this->thread_data_vec[0]); this->thread_data_vec[0].affinity_table.trim(); - this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data, - prev_comm_data); + this->update_affinities(best_move, this->thread_data_vec[0], recompute_max_gain, new_nodes, prev_work_data, prev_comm_data); #ifdef KL_DEBUG std::cout << "New nodes: { "; @@ -166,4 +162,4 @@ class kl_improver_test : public kl_improver &schedule) { this->active_schedule.write_schedule(schedule); } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp index 80ed0e48..9727357f 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp @@ -19,39 +19,71 @@ limitations under the License. #pragma once -//#define KL_DEBUG -//#define KL_DEBUG_1 -//#define KL_DEBUG_COST_CHECK +// #define KL_DEBUG +// #define KL_DEBUG_1 +// #define KL_DEBUG_COST_CHECK -#include "kl_improver.hpp" -#include "comm_cost_modules/kl_total_comm_cost.hpp" -#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp" #include "comm_cost_modules/kl_bsp_comm_cost.hpp" +#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp" +#include "comm_cost_modules/kl_total_comm_cost.hpp" +#include "kl_improver.hpp" #include "osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp" namespace osp { using double_cost_t = double; -template -using kl_total_comm_improver = kl_improver, MemoryConstraint_t, window_size, double_cost_t>; - -template, unsigned window_size = 1, bool use_node_communication_costs_arg = true> -using kl_total_comm_improver_local_mem_constr = kl_improver, MemoryConstraint_t, window_size, double_cost_t>; - -template -using kl_total_lambda_comm_improver = kl_improver, MemoryConstraint_t, window_size, double_cost_t>; - -template, unsigned window_size = 1> -using kl_total_lambda_comm_improver_local_mem_constr = kl_improver, MemoryConstraint_t, window_size, double_cost_t>; - -template -using kl_bsp_comm_improver = kl_improver, MemoryConstraint_t, window_size, double_cost_t>; - -template, unsigned window_size = 1> -using kl_bsp_comm_improver_local_mem_constr = kl_improver, MemoryConstraint_t, window_size, double_cost_t>; - - - -} // namespace osp - +template +using kl_total_comm_improver + = kl_improver, + MemoryConstraint_t, + window_size, + double_cost_t>; + +template , + unsigned window_size = 1, + bool use_node_communication_costs_arg = true> +using kl_total_comm_improver_local_mem_constr + = kl_improver, + MemoryConstraint_t, + window_size, + double_cost_t>; + +template +using kl_total_lambda_comm_improver + = kl_improver, + MemoryConstraint_t, + window_size, + double_cost_t>; + +template , unsigned window_size = 1> +using kl_total_lambda_comm_improver_local_mem_constr + = kl_improver, + MemoryConstraint_t, + window_size, + double_cost_t>; + +template +using kl_bsp_comm_improver = kl_improver, + MemoryConstraint_t, + window_size, + double_cost_t>; + +template , unsigned window_size = 1> +using kl_bsp_comm_improver_local_mem_constr + = kl_improver, + MemoryConstraint_t, + window_size, + double_cost_t>; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp index 5946c7e5..1d70f3eb 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp @@ -19,23 +19,35 @@ limitations under the License. #pragma once -#include "kl_include.hpp" -#include "kl_improver_mt.hpp" -#include "comm_cost_modules/kl_total_comm_cost.hpp" -#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp" #include "comm_cost_modules/kl_bsp_comm_cost.hpp" +#include "comm_cost_modules/kl_hyper_total_comm_cost.hpp" +#include "comm_cost_modules/kl_total_comm_cost.hpp" +#include "kl_improver_mt.hpp" +#include "kl_include.hpp" namespace osp { -template -using kl_total_comm_improver_mt = kl_improver_mt, MemoryConstraint_t, window_size, double>; - -template -using kl_total_lambda_comm_improver_mt = kl_improver_mt, MemoryConstraint_t, window_size, double>; - -template -using kl_bsp_comm_improver_mt = kl_improver_mt, MemoryConstraint_t, window_size, double>; - - -} // namespace osp - +template +using kl_total_comm_improver_mt + = kl_improver_mt, + MemoryConstraint_t, + window_size, + double>; + +template +using kl_total_lambda_comm_improver_mt + = kl_improver_mt, + MemoryConstraint_t, + window_size, + double>; + +template +using kl_bsp_comm_improver_mt + = kl_improver_mt, MemoryConstraint_t, window_size, double>; + +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp index 7f3bb29d..397f0a1f 100644 --- a/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp @@ -18,14 +18,14 @@ limitations under the License. #pragma once -#include "kl_active_schedule.hpp" #include +#include "kl_active_schedule.hpp" + namespace osp { -template +template struct reward_penalty_strategy { - kl_active_schedule_t *active_schedule; cost_t max_weight; @@ -47,9 +47,8 @@ struct reward_penalty_strategy { } }; -template +template struct set_vertex_lock_manger { - std::unordered_set locked_nodes; void initialize(size_t) {} @@ -63,9 +62,8 @@ struct set_vertex_lock_manger { void clear() { locked_nodes.clear(); } }; -template +template struct vector_vertex_lock_manger { - std::vector locked_nodes; void initialize(size_t num_nodes) { locked_nodes.resize(num_nodes); } @@ -79,7 +77,7 @@ struct vector_vertex_lock_manger { void clear() { locked_nodes.assign(locked_nodes.size(), false); } }; -template +template struct adaptive_affinity_table { constexpr static unsigned window_range = 2 * window_size + 1; using VertexType = vertex_idx_t; @@ -153,8 +151,9 @@ struct adaptive_affinity_table { } bool insert(VertexType node) { - if (node_is_selected[node]) - return false; // Node is already in the table. + if (node_is_selected[node]) { + return false; // Node is already in the table. + } size_t insert_location; if (!gaps.empty()) { @@ -241,7 +240,7 @@ struct adaptive_affinity_table { } }; -template +template struct static_affinity_table { constexpr static unsigned window_range = 2 * window_size + 1; using VertexType = vertex_idx_t; @@ -300,9 +299,8 @@ struct static_affinity_table { void trim() {} }; -template +template struct vertex_selection_strategy { - using EdgeType = edge_desc_t; const kl_active_schedule_t *active_schedule; @@ -316,7 +314,9 @@ struct vertex_selection_strategy { unsigned max_work_counter = 0; - inline void initialize(const kl_active_schedule_t &sche_, std::mt19937 &gen_, const unsigned start_step, + inline void initialize(const kl_active_schedule_t &sche_, + std::mt19937 &gen_, + const unsigned start_step, const unsigned end_step) { active_schedule = &sche_; graph = &(sche_.getInstance().getComputationalDag()); @@ -344,18 +344,22 @@ struct vertex_selection_strategy { std::shuffle(permutation.begin(), permutation.end(), *gen); } - void add_neighbours_to_selection(vertex_idx_t node, container_t &nodes, const unsigned start_step, + void add_neighbours_to_selection(vertex_idx_t node, + container_t &nodes, + const unsigned start_step, const unsigned end_step) { for (const auto parent : graph->parents(node)) { const unsigned parent_step = active_schedule->assigned_superstep(parent); - if (parent_step >= start_step && parent_step <= end_step) + if (parent_step >= start_step && parent_step <= end_step) { nodes.insert(parent); + } } for (const auto child : graph->children(node)) { const unsigned child_step = active_schedule->assigned_superstep(child); - if (child_step >= start_step && child_step <= end_step) + if (child_step >= start_step && child_step <= end_step) { nodes.insert(child); + } } } @@ -370,24 +374,27 @@ struct vertex_selection_strategy { strategy_counter %= 5; } - void select_nodes_violations(container_t &node_selection, std::unordered_set ¤t_violations, - const unsigned start_step, const unsigned end_step) { + void select_nodes_violations(container_t &node_selection, + std::unordered_set ¤t_violations, + const unsigned start_step, + const unsigned end_step) { for (const auto &edge : current_violations) { const auto source_v = source(edge, *graph); const auto target_v = target(edge, *graph); const unsigned source_step = active_schedule->assigned_superstep(source_v); - if (source_step >= start_step && source_step <= end_step) + if (source_step >= start_step && source_step <= end_step) { node_selection.insert(source_v); + } const unsigned target_step = active_schedule->assigned_superstep(target_v); - if (target_step >= start_step && target_step <= end_step) + if (target_step >= start_step && target_step <= end_step) { node_selection.insert(target_v); + } } } void select_nodes_permutation_threshold(const std::size_t &threshold, container_t &node_selection) { - const size_t bound = std::min(threshold + permutation_idx, permutation.size()); for (std::size_t i = permutation_idx; i < bound; i++) { node_selection.insert(permutation[i]); @@ -400,12 +407,14 @@ struct vertex_selection_strategy { } } - void select_nodes_max_work_proc(const std::size_t &threshold, container_t &node_selection, - const unsigned start_step, const unsigned end_step) { + void select_nodes_max_work_proc(const std::size_t &threshold, + container_t &node_selection, + const unsigned start_step, + const unsigned end_step) { while (node_selection.size() < threshold) { if (max_work_counter > end_step) { - max_work_counter = start_step; // wrap around - break; // stop after one full pass + max_work_counter = start_step; // wrap around + break; // stop after one full pass } select_nodes_max_work_proc_helper(threshold - node_selection.size(), max_work_counter, node_selection); @@ -417,8 +426,8 @@ struct vertex_selection_strategy { const unsigned num_max_work_proc = active_schedule->work_datastructures.step_max_work_processor_count[step]; for (unsigned idx = 0; idx < num_max_work_proc; idx++) { const unsigned proc = active_schedule->work_datastructures.step_processor_work_[step][idx].proc; - const std::unordered_set> step_proc_vert = - active_schedule->getSetSchedule().step_processor_vertices[step][proc]; + const std::unordered_set> step_proc_vert + = active_schedule->getSetSchedule().step_processor_vertices[step][proc]; const size_t num_insert = std::min(threshold - node_selection.size(), step_proc_vert.size()); auto end_it = step_proc_vert.begin(); std::advance(end_it, num_insert); @@ -427,4 +436,4 @@ struct vertex_selection_strategy { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp index 2cee3d0f..bc0ed8eb 100644 --- a/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp +++ b/include/osp/bsp/scheduler/LocalSearch/LocalSearchMemoryConstraintModules.hpp @@ -31,27 +31,30 @@ namespace osp { * This trait checks if a type has the required methods for a memory constraint. * */ -template +template struct is_local_search_memory_constraint : std::false_type {}; -template +template struct is_local_search_memory_constraint< - T, std::void_t().initialize(std::declval>(), - std::declval>())), - decltype(std::declval().apply_move(std::declval>(), - std::declval(), std::declval(), - std::declval(), std::declval())), - decltype(std::declval().compute_memory_datastructure(std::declval(), - std::declval())), - decltype(std::declval().swap_steps(std::declval(), std::declval())), - decltype(std::declval().reset_superstep(std::declval())), - decltype(std::declval().override_superstep(std::declval(), std::declval(), - std::declval(), std::declval())), - decltype(std::declval().can_move(std::declval>(), - std::declval(), std::declval())), - decltype(std::declval().clear()), decltype(T())>> : std::true_type {}; - -template + T, + std::void_t().initialize(std::declval>(), + std::declval>())), + decltype(std::declval().apply_move(std::declval>(), + std::declval(), + std::declval(), + std::declval(), + std::declval())), + decltype(std::declval().compute_memory_datastructure(std::declval(), std::declval())), + decltype(std::declval().swap_steps(std::declval(), std::declval())), + decltype(std::declval().reset_superstep(std::declval())), + decltype(std::declval().override_superstep( + std::declval(), std::declval(), std::declval(), std::declval())), + decltype(std::declval().can_move( + std::declval>(), std::declval(), std::declval())), + decltype(std::declval().clear()), + decltype(T())>> : std::true_type {}; + +template inline constexpr bool is_local_search_memory_constraint_v = is_local_search_memory_constraint::value; /** @@ -67,9 +70,8 @@ struct no_local_search_memory_constraint { * * @tparam Graph_t The graph type. */ -template +template struct ls_local_memory_constraint { - using Graph_impl_t = Graph_t; const SetSchedule *set_schedule; @@ -80,7 +82,6 @@ struct ls_local_memory_constraint { ls_local_memory_constraint() : set_schedule(nullptr), graph(nullptr) {} inline void initialize(const SetSchedule &set_schedule_, const VectorSchedule &) { - if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL) { throw std::invalid_argument("Memory constraint type is not LOCAL"); } @@ -92,15 +93,14 @@ struct ls_local_memory_constraint { std::vector>(set_schedule->getInstance().numberOfProcessors(), 0)); } - inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, - unsigned to_step) { + inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) { step_processor_memory[to_step][to_proc] += graph->vertex_mem_weight(vertex); step_processor_memory[from_step][from_proc] -= graph->vertex_mem_weight(vertex); } inline bool can_move(vertex_idx_t vertex, const unsigned proc, unsigned step) const { - return step_processor_memory[step][proc] + graph->vertex_mem_weight(vertex) <= - set_schedule->getInstance().getArchitecture().memoryBound(proc); + return step_processor_memory[step][proc] + graph->vertex_mem_weight(vertex) + <= set_schedule->getInstance().getArchitecture().memoryBound(proc); } void swap_steps(const unsigned step1, const unsigned step2) { @@ -108,15 +108,11 @@ struct ls_local_memory_constraint { } void compute_memory_datastructure(unsigned start_step, unsigned end_step) { - for (unsigned step = start_step; step <= end_step; step++) { - for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; for (const auto &node : set_schedule->step_processor_vertices[step][proc]) { - step_processor_memory[step][proc] += graph->vertex_mem_weight(node); } } @@ -131,7 +127,6 @@ struct ls_local_memory_constraint { } inline void reset_superstep(unsigned step) { - for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) { step_processor_memory[step][proc] = 0; } @@ -153,9 +148,8 @@ struct ls_local_memory_constraint { } }; -template +template struct ls_local_inc_edges_memory_constraint { - using Graph_impl_t = Graph_t; const SetSchedule *set_schedule; @@ -168,7 +162,6 @@ struct ls_local_inc_edges_memory_constraint { ls_local_inc_edges_memory_constraint() : set_schedule(nullptr), vector_schedule(nullptr), graph(nullptr) {} inline void initialize(const SetSchedule &set_schedule_, const VectorSchedule &vec_schedule_) { - if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES) { throw std::invalid_argument("Memory constraint type is not LOCAL_INC_EDGES"); } @@ -184,16 +177,12 @@ struct ls_local_inc_edges_memory_constraint { std::vector>>(set_schedule->getInstance().numberOfProcessors())); } - inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, - unsigned to_step) { - + inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) { step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(vertex); step_processor_memory[from_step][from_proc] -= graph->vertex_comm_weight(vertex); for (const auto &pred : graph->parents(vertex)) { - if (vector_schedule->assignedSuperstep(pred) < to_step) { - auto pair = step_processor_pred[to_step][to_proc].insert(pred); if (pair.second) { step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(pred); @@ -201,16 +190,14 @@ struct ls_local_inc_edges_memory_constraint { } if (vector_schedule->assignedSuperstep(pred) < from_step) { - bool remove = true; for (const auto &succ : graph->children(pred)) { - if (succ == vertex) { continue; } - if (vector_schedule->assignedProcessor(succ) == from_proc && - vector_schedule->assignedSuperstep(succ) == from_step) { + if (vector_schedule->assignedProcessor(succ) == from_proc + && vector_schedule->assignedSuperstep(succ) == from_step) { remove = false; break; } @@ -224,36 +211,27 @@ struct ls_local_inc_edges_memory_constraint { } if (to_step != from_step) { - for (const auto &succ : graph->children(vertex)) { - if (to_step > from_step && vector_schedule->assignedSuperstep(succ) == to_step) { - - if (step_processor_pred[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] - .find(vertex) != step_processor_pred[vector_schedule->assignedSuperstep( - succ)][vector_schedule->assignedProcessor(succ)] - .end()) { - - step_processor_memory[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] -= - graph->vertex_comm_weight(vertex); - - step_processor_pred[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] - .erase(vertex); + if (step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].find( + vertex) + != step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + .end()) { + step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + -= graph->vertex_comm_weight(vertex); + + step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].erase( + vertex); } } if (vector_schedule->assignedSuperstep(succ) > to_step) { - - auto pair = step_processor_pred[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] - .insert(vertex); + auto pair + = step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + .insert(vertex); if (pair.second) { - step_processor_memory[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] += - graph->vertex_comm_weight(vertex); + step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + += graph->vertex_comm_weight(vertex); } } } @@ -266,22 +244,16 @@ struct ls_local_inc_edges_memory_constraint { } void compute_memory_datastructure(unsigned start_step, unsigned end_step) { - for (unsigned step = start_step; step <= end_step; step++) { - for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; step_processor_pred[step][proc].clear(); for (const auto &node : set_schedule->step_processor_vertices[step][proc]) { - step_processor_memory[step][proc] += graph->vertex_comm_weight(node); for (const auto &pred : graph->parents(node)) { - if (vector_schedule->assignedSuperstep(pred) < step) { - auto pair = step_processor_pred[step][proc].insert(pred); if (pair.second) { step_processor_memory[step][proc] += graph->vertex_comm_weight(pred); @@ -299,7 +271,6 @@ struct ls_local_inc_edges_memory_constraint { } inline void reset_superstep(unsigned step) { - for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) { step_processor_memory[step][proc] = 0; step_processor_pred[step][proc].clear(); @@ -312,12 +283,9 @@ struct ls_local_inc_edges_memory_constraint { } inline bool can_move(vertex_idx_t vertex, const unsigned proc, unsigned step) const { - v_memw_t inc_memory = graph->vertex_comm_weight(vertex); for (const auto &pred : graph->parents(vertex)) { - if (vector_schedule->assignedSuperstep(pred) < step) { - if (step_processor_pred[step][proc].find(pred) == step_processor_pred[step][proc].end()) { inc_memory += graph->vertex_comm_weight(pred); } @@ -331,27 +299,22 @@ struct ls_local_inc_edges_memory_constraint { } if (step >= vector_schedule->assignedSuperstep(vertex)) { - return step_processor_memory[step][proc] + inc_memory <= - set_schedule->getInstance().getArchitecture().memoryBound(proc); + return step_processor_memory[step][proc] + inc_memory + <= set_schedule->getInstance().getArchitecture().memoryBound(proc); } - if (step_processor_memory[step][proc] + inc_memory > - set_schedule->getInstance().getArchitecture().memoryBound(proc)) { - + if (step_processor_memory[step][proc] + inc_memory > set_schedule->getInstance().getArchitecture().memoryBound(proc)) { return false; } for (const auto &succ : graph->children(vertex)) { - const auto &succ_step = vector_schedule->assignedSuperstep(succ); const auto &succ_proc = vector_schedule->assignedProcessor(succ); - if (succ_step == vector_schedule->assignedSuperstep(vertex) and - succ_proc != vector_schedule->assignedProcessor(vertex)) { - - if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) > - set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { - + if (succ_step == vector_schedule->assignedSuperstep(vertex) + and succ_proc != vector_schedule->assignedProcessor(vertex)) { + if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) + > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { return false; } } @@ -361,9 +324,8 @@ struct ls_local_inc_edges_memory_constraint { } }; -template +template struct ls_local_sources_inc_edges_memory_constraint { - using Graph_impl_t = Graph_t; const SetSchedule *set_schedule; @@ -381,8 +343,8 @@ struct ls_local_sources_inc_edges_memory_constraint { } inline void initialize(const SetSchedule &set_schedule_, const VectorSchedule &vec_schedule_) { - - if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() != MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES) { + if (set_schedule_.getInstance().getArchitecture().getMemoryConstraintType() + != MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES) { throw std::invalid_argument("Memory constraint type is not LOCAL_SOURCES_INC_EDGES"); } @@ -397,18 +359,14 @@ struct ls_local_sources_inc_edges_memory_constraint { std::vector>>(set_schedule->getInstance().numberOfProcessors())); } - inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, - unsigned to_step) { - + inline void apply_move(vertex_idx_t vertex, unsigned from_proc, unsigned from_step, unsigned to_proc, unsigned to_step) { if (is_source(vertex, *graph)) { step_processor_memory[to_step][to_proc] += graph->vertex_mem_weight(vertex); step_processor_memory[from_step][from_proc] -= graph->vertex_mem_weight(vertex); } for (const auto &pred : graph->parents(vertex)) { - if (vector_schedule->assignedSuperstep(pred) < to_step) { - auto pair = step_processor_pred[to_step][to_proc].insert(pred); if (pair.second) { step_processor_memory[to_step][to_proc] += graph->vertex_comm_weight(pred); @@ -416,16 +374,14 @@ struct ls_local_sources_inc_edges_memory_constraint { } if (vector_schedule->assignedSuperstep(pred) < from_step) { - bool remove = true; for (const auto &succ : graph->children(pred)) { - if (succ == vertex) { continue; } - if (vector_schedule->assignedProcessor(succ) == from_proc && - vector_schedule->assignedSuperstep(succ) == from_step) { + if (vector_schedule->assignedProcessor(succ) == from_proc + && vector_schedule->assignedSuperstep(succ) == from_step) { remove = false; break; } @@ -439,36 +395,27 @@ struct ls_local_sources_inc_edges_memory_constraint { } if (to_step != from_step) { - for (const auto &succ : graph->children(vertex)) { - if (to_step > from_step && vector_schedule->assignedSuperstep(succ) == to_step) { - - if (step_processor_pred[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] - .find(vertex) != step_processor_pred[vector_schedule->assignedSuperstep( - succ)][vector_schedule->assignedProcessor(succ)] - .end()) { - - step_processor_memory[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] -= - graph->vertex_comm_weight(vertex); - - step_processor_pred[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] - .erase(vertex); + if (step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].find( + vertex) + != step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + .end()) { + step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + -= graph->vertex_comm_weight(vertex); + + step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)].erase( + vertex); } } if (vector_schedule->assignedSuperstep(succ) > to_step) { - - auto pair = step_processor_pred[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] - .insert(vertex); + auto pair + = step_processor_pred[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + .insert(vertex); if (pair.second) { - step_processor_memory[vector_schedule->assignedSuperstep(succ)] - [vector_schedule->assignedProcessor(succ)] += - graph->vertex_comm_weight(vertex); + step_processor_memory[vector_schedule->assignedSuperstep(succ)][vector_schedule->assignedProcessor(succ)] + += graph->vertex_comm_weight(vertex); } } } @@ -476,24 +423,18 @@ struct ls_local_sources_inc_edges_memory_constraint { } void compute_memory_datastructure(unsigned start_step, unsigned end_step) { - for (unsigned step = start_step; step <= end_step; step++) { - for (unsigned proc = 0; proc < set_schedule->getInstance().numberOfProcessors(); proc++) { - step_processor_memory[step][proc] = 0; step_processor_pred[step][proc].clear(); for (const auto &node : set_schedule->step_processor_vertices[step][proc]) { - if (is_source(node, *graph)) { step_processor_memory[step][proc] += graph->vertex_mem_weight(node); } for (const auto &pred : graph->parents(node)) { - if (vector_schedule->assignedSuperstep(pred) < step) { - auto pair = step_processor_pred[step][proc].insert(pred); if (pair.second) { step_processor_memory[step][proc] += graph->vertex_comm_weight(pred); @@ -511,7 +452,6 @@ struct ls_local_sources_inc_edges_memory_constraint { } inline void reset_superstep(unsigned step) { - for (unsigned proc = 0; proc < set_schedule->getInstance().getArchitecture().numberOfProcessors(); proc++) { step_processor_memory[step][proc] = 0; step_processor_pred[step][proc].clear(); @@ -524,7 +464,6 @@ struct ls_local_sources_inc_edges_memory_constraint { } inline bool can_move(vertex_idx_t vertex, const unsigned proc, unsigned step) const { - v_memw_t inc_memory = 0; if (is_source(vertex, *graph)) { @@ -532,9 +471,7 @@ struct ls_local_sources_inc_edges_memory_constraint { } for (const auto &pred : graph->parents(vertex)) { - if (vector_schedule->assignedSuperstep(pred) < step) { - if (step_processor_pred[step][proc].find(pred) == step_processor_pred[step][proc].end()) { inc_memory += graph->vertex_comm_weight(pred); } @@ -548,37 +485,30 @@ struct ls_local_sources_inc_edges_memory_constraint { } if (vector_schedule->assignedSuperstep(vertex) <= step) { - - return step_processor_memory[step][proc] + inc_memory <= - set_schedule->getInstance().getArchitecture().memoryBound(proc); + return step_processor_memory[step][proc] + inc_memory + <= set_schedule->getInstance().getArchitecture().memoryBound(proc); } - if (step_processor_memory[step][proc] + inc_memory > - set_schedule->getInstance().getArchitecture().memoryBound(proc)) { + if (step_processor_memory[step][proc] + inc_memory > set_schedule->getInstance().getArchitecture().memoryBound(proc)) { return false; } for (const auto &succ : graph->children(vertex)) { - const auto &succ_step = vector_schedule->assignedSuperstep(succ); const auto &succ_proc = vector_schedule->assignedProcessor(succ); if (succ_step == vector_schedule->assignedSuperstep(vertex)) { - if (vector_schedule->assignedProcessor(vertex) != succ_proc || (not is_source(vertex, *graph))) { - - if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) > - set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { + if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) + > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { return false; } } else { - if (is_source(vertex, *graph)) { - - if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) - - graph->vertex_mem_weight(vertex) > - set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { + if (step_processor_memory[succ_step][succ_proc] + graph->vertex_comm_weight(vertex) + - graph->vertex_mem_weight(vertex) + > set_schedule->getInstance().getArchitecture().memoryBound(succ_proc)) { return false; } } @@ -590,4 +520,4 @@ struct ls_local_sources_inc_edges_memory_constraint { } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/bsp/scheduler/MaxBspScheduler.hpp b/include/osp/bsp/scheduler/MaxBspScheduler.hpp index c6accf25..5a78e382 100644 --- a/include/osp/bsp/scheduler/MaxBspScheduler.hpp +++ b/include/osp/bsp/scheduler/MaxBspScheduler.hpp @@ -31,10 +31,9 @@ namespace osp { * The Scheduler class provides a common interface for scheduling scheduler in the BSP scheduling system. * It defines methods for setting and getting the time limit, as well as computing schedules. */ -template +template class MaxBspScheduler : public Scheduler { - public: - + public: static_assert(is_computational_dag_v, "BspSchedule can only be used with computational DAGs."); /** @@ -85,4 +84,4 @@ class MaxBspScheduler : public Scheduler { }; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp b/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp index 90227221..abd2a1a4 100644 --- a/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp +++ b/include/osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp @@ -32,7 +32,7 @@ limitations under the License. namespace osp { -template +template class MultilevelCoarseAndSchedule : public Scheduler { private: const BspInstance *original_inst; @@ -58,23 +58,31 @@ class MultilevelCoarseAndSchedule : public Scheduler { public: MultilevelCoarseAndSchedule() - : Scheduler(), original_inst(nullptr), sched(nullptr), improver(nullptr), ml_coarser(nullptr), + : Scheduler(), original_inst(nullptr), sched(nullptr), improver(nullptr), ml_coarser(nullptr), active_graph(-1L) { + }; + MultilevelCoarseAndSchedule(Scheduler &sched_, MultilevelCoarser &ml_coarser_) + : Scheduler(), + original_inst(nullptr), + sched(&sched_), + improver(nullptr), + ml_coarser(&ml_coarser_), active_graph(-1L) {}; MultilevelCoarseAndSchedule(Scheduler &sched_, + ImprovementScheduler &improver_, MultilevelCoarser &ml_coarser_) - : Scheduler(), original_inst(nullptr), sched(&sched_), improver(nullptr), ml_coarser(&ml_coarser_), - active_graph(-1L) {}; - MultilevelCoarseAndSchedule(Scheduler &sched_, ImprovementScheduler &improver_, - MultilevelCoarser &ml_coarser_) - : Scheduler(), original_inst(nullptr), sched(&sched_), improver(&improver_), ml_coarser(&ml_coarser_), + : Scheduler(), + original_inst(nullptr), + sched(&sched_), + improver(&improver_), + ml_coarser(&ml_coarser_), active_graph(-1L) {}; virtual ~MultilevelCoarseAndSchedule() = default; inline void setInitialScheduler(Scheduler &sched_) { sched = &sched_; }; + inline void setImprovementScheduler(ImprovementScheduler &improver_) { improver = &improver_; }; - inline void setMultilevelCoarser(MultilevelCoarser &ml_coarser_) { - ml_coarser = &ml_coarser_; - }; + + inline void setMultilevelCoarser(MultilevelCoarser &ml_coarser_) { ml_coarser = &ml_coarser_; }; RETURN_STATUS computeSchedule(BspSchedule &schedule) override; @@ -82,13 +90,12 @@ class MultilevelCoarseAndSchedule : public Scheduler { if (improver == nullptr) { return "C:" + ml_coarser->getCoarserName() + "-S:" + sched->getScheduleName(); } else { - return "C:" + ml_coarser->getCoarserName() + "-S:" + sched->getScheduleName() + - "-I:" + improver->getScheduleName(); + return "C:" + ml_coarser->getCoarserName() + "-S:" + sched->getScheduleName() + "-I:" + improver->getScheduleName(); } }; }; -template +template RETURN_STATUS MultilevelCoarseAndSchedule::compute_initial_schedule() { active_graph = static_cast(ml_coarser->dag_history.size()); active_graph--; @@ -109,32 +116,33 @@ RETURN_STATUS MultilevelCoarseAndSchedule::compute_init return status; } -template +template RETURN_STATUS MultilevelCoarseAndSchedule::improve_active_schedule() { if (improver) { - if (active_instance->getComputationalDag().num_vertices() == 0) + if (active_instance->getComputationalDag().num_vertices() == 0) { return RETURN_STATUS::OSP_SUCCESS; + } return improver->improveSchedule(*active_schedule); } return RETURN_STATUS::OSP_SUCCESS; } -template +template RETURN_STATUS MultilevelCoarseAndSchedule::expand_active_schedule() { assert((active_graph > 0L) && (static_cast(active_graph) < ml_coarser->dag_history.size())); std::unique_ptr> expanded_instance = std::make_unique>( *(ml_coarser->dag_history.at(static_cast(active_graph) - 1)), original_inst->getArchitecture()); - std::unique_ptr> expanded_schedule = - std::make_unique>(*expanded_instance); + std::unique_ptr> expanded_schedule + = std::make_unique>(*expanded_instance); for (const auto &node : expanded_instance->getComputationalDag().vertices()) { expanded_schedule->setAssignedProcessor( - node, active_schedule->assignedProcessor( - ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); + node, + active_schedule->assignedProcessor(ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); expanded_schedule->setAssignedSuperstep( - node, active_schedule->assignedSuperstep( - ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); + node, + active_schedule->assignedSuperstep(ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); } assert(expanded_schedule->satisfiesPrecedenceConstraints()); @@ -157,18 +165,18 @@ RETURN_STATUS MultilevelCoarseAndSchedule::expand_activ return RETURN_STATUS::OSP_SUCCESS; } -template +template RETURN_STATUS MultilevelCoarseAndSchedule::expand_active_schedule_to_original_schedule( BspSchedule &schedule) { assert(active_graph == 0L); for (const auto &node : getOriginalInstance()->getComputationalDag().vertices()) { schedule.setAssignedProcessor( - node, active_schedule->assignedProcessor( - ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); + node, + active_schedule->assignedProcessor(ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); schedule.setAssignedSuperstep( - node, active_schedule->assignedSuperstep( - ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); + node, + active_schedule->assignedSuperstep(ml_coarser->contraction_maps.at(static_cast(active_graph))->at(node))); } active_graph--; @@ -180,7 +188,7 @@ RETURN_STATUS MultilevelCoarseAndSchedule::expand_activ return RETURN_STATUS::OSP_SUCCESS; } -template +template RETURN_STATUS MultilevelCoarseAndSchedule::run_expansions(BspSchedule &schedule) { assert(active_graph >= 0L && static_cast(active_graph) == ml_coarser->dag_history.size() - 1); @@ -196,14 +204,14 @@ RETURN_STATUS MultilevelCoarseAndSchedule::run_expansio return status; } -template +template void MultilevelCoarseAndSchedule::clear_computation_data() { active_graph = -1L; active_instance = std::unique_ptr>(); active_schedule = std::unique_ptr>(); } -template +template RETURN_STATUS MultilevelCoarseAndSchedule::computeSchedule(BspSchedule &schedule) { clear_computation_data(); @@ -234,4 +242,4 @@ RETURN_STATUS MultilevelCoarseAndSchedule::computeSched return status; } -} // end namespace osp \ No newline at end of file +} // end namespace osp diff --git a/include/osp/bsp/scheduler/Scheduler.hpp b/include/osp/bsp/scheduler/Scheduler.hpp index fa458ba9..53ee1529 100644 --- a/include/osp/bsp/scheduler/Scheduler.hpp +++ b/include/osp/bsp/scheduler/Scheduler.hpp @@ -18,12 +18,13 @@ limitations under the License. #pragma once +#include + #include "osp/auxiliary/return_status.hpp" #include "osp/bsp/model/BspInstance.hpp" #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/model/BspScheduleCS.hpp" #include "osp/concepts/computational_dag_concept.hpp" -#include namespace osp { @@ -35,9 +36,8 @@ namespace osp { * It specifies the contract for computing standard BSP schedules (BspSchedule) and communication-aware schedules * (BspScheduleCS). */ -template +template class Scheduler { - static_assert(is_computational_dag_v, "Scheduler can only be used with computational DAGs."); public: @@ -80,7 +80,6 @@ class Scheduler { * RETURN_STATUS::ERROR if an error occurred, or other status codes as appropriate. */ virtual RETURN_STATUS computeScheduleCS(BspScheduleCS &schedule) { - auto result = computeSchedule(schedule); if (result == RETURN_STATUS::OSP_SUCCESS || result == RETURN_STATUS::BEST_FOUND) { schedule.setAutoCommunicationSchedule(); @@ -91,4 +90,4 @@ class Scheduler { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/bsp/scheduler/Serial.hpp b/include/osp/bsp/scheduler/Serial.hpp index db2aeef9..cde1b706 100644 --- a/include/osp/bsp/scheduler/Serial.hpp +++ b/include/osp/bsp/scheduler/Serial.hpp @@ -18,11 +18,13 @@ limitations under the License. #pragma once -#include "Scheduler.hpp" #include #include #include #include + +#include "Scheduler.hpp" + namespace osp { /** @@ -32,9 +34,8 @@ namespace osp { * smallest number of supersteps. * */ -template +template class Serial : public Scheduler { - public: /** * @brief Default constructor for Serial. @@ -51,8 +52,9 @@ class Serial : public Scheduler { const auto &dag = instance.getComputationalDag(); const auto num_vertices = dag.num_vertices(); - if (num_vertices == 0) + if (num_vertices == 0) { return RETURN_STATUS::OSP_SUCCESS; + } const auto &arch = instance.getArchitecture(); @@ -114,8 +116,7 @@ class Serial : public Scheduler { for (const auto &p : node_type_compatible_processors[v_type]) { bool parents_compatible = true; for (const auto &parent : dag.parents(v)) { - if (schedule.assignedSuperstep(parent) == current_superstep && - schedule.assignedProcessor(parent) != p) { + if (schedule.assignedSuperstep(parent) == current_superstep && schedule.assignedProcessor(parent) != p) { parents_compatible = false; break; } @@ -155,4 +156,4 @@ class Serial : public Scheduler { std::string getScheduleName() const override { return "Serial"; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/coarser/BspScheduleCoarser.hpp b/include/osp/coarser/BspScheduleCoarser.hpp index ea4cf9f9..a65e92e5 100644 --- a/include/osp/coarser/BspScheduleCoarser.hpp +++ b/include/osp/coarser/BspScheduleCoarser.hpp @@ -31,9 +31,8 @@ namespace osp { * @brief Abstract base class for coarsening ComputationalDags. * */ -template +template class BspScheduleCoarser : public CoarserGenContractionMap { - private: const BspSchedule *schedule; @@ -56,7 +55,6 @@ class BspScheduleCoarser : public CoarserGenContractionMap> &reverse_vertex_map) override { virtual std::vector> generate_vertex_contraction_map(const Graph_t_in &dag_in) override { - using VertexType_in = vertex_idx_t; using VertexType_out = vertex_idx_t; @@ -70,23 +68,18 @@ class BspScheduleCoarser : public CoarserGenContractionMapnumberOfSupersteps(); step++) { - for (unsigned proc = 0; proc < schedule->getInstance().numberOfProcessors(); proc++) { - if (set_schedule.step_processor_vertices[step][proc].size() > 0) { - v_workw_t total_work = 0; v_memw_t total_memory = 0; v_commw_t total_communication = 0; vertex_map.push_back(std::vector()); - v_type_t type = - dag_in.vertex_type(*(set_schedule.step_processor_vertices[step][proc].begin())); + v_type_t type = dag_in.vertex_type(*(set_schedule.step_processor_vertices[step][proc].begin())); bool homogeneous_types = true; for (const auto &vertex : set_schedule.step_processor_vertices[step][proc]) { - if (dag_in.vertex_type(vertex) != type) { homogeneous_types = false; } @@ -99,8 +92,9 @@ class BspScheduleCoarser : public CoarserGenContractionMap #include -#include "osp/bsp/model/BspSchedule.hpp" #include "coarser_util.hpp" +#include "osp/bsp/model/BspSchedule.hpp" #include "osp/concepts/computational_dag_concept.hpp" #include "osp/concepts/constructable_computational_dag_concept.hpp" #include "osp/concepts/graph_traits.hpp" @@ -35,11 +35,11 @@ namespace osp { * @brief Abstract base class for coarsening ComputationalDags. * */ -template +template class Coarser { - static_assert(is_computational_dag_v, "Graph_t_in must be a computational DAG"); - static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, "Graph_t_out must be a (direct) constructable computational DAG"); + static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, + "Graph_t_out must be a (direct) constructable computational DAG"); // probably too strict, need to be refined. // maybe add concept for when Gtaph_t2 is constructable/coarseable from Graph_t_in @@ -59,8 +59,10 @@ class Coarser { * @param vertex_contraction_map Output mapping from dag_in to coarsened_dag. * @return A status code indicating the success or failure of the coarsening operation. */ - virtual bool coarsenDag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, - std::vector> &vertex_contraction_map) = 0; + virtual bool coarsenDag(const Graph_t_in &dag_in, + Graph_t_out &coarsened_dag, + std::vector> &vertex_contraction_map) + = 0; /** * @brief Get the name of the coarsening algorithm. @@ -79,16 +81,16 @@ class Coarser { * @brief Abstract base class for coarsening ComputationalDags. * */ -template +template class CoarserGenContractionMap : public Coarser { - public: virtual std::vector> generate_vertex_contraction_map(const Graph_t_in &dag_in) = 0; - virtual bool coarsenDag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, + virtual bool coarsenDag(const Graph_t_in &dag_in, + Graph_t_out &coarsened_dag, std::vector> &vertex_contraction_map) override { - - vertex_contraction_map = dag_in.num_vertices() == 0? std::vector>() : generate_vertex_contraction_map(dag_in); + vertex_contraction_map = dag_in.num_vertices() == 0 ? std::vector>() + : generate_vertex_contraction_map(dag_in); return coarser_util::construct_coarse_dag(dag_in, coarsened_dag, vertex_contraction_map); } @@ -104,19 +106,17 @@ class CoarserGenContractionMap : public Coarser { * @brief Abstract base class for coarsening ComputationalDags. * */ -template +template class CoarserGenExpansionMap : public Coarser { - public: - virtual std::vector>> - generate_vertex_expansion_map(const Graph_t_in &dag_in) = 0; + virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in) = 0; - virtual bool coarsenDag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, + virtual bool coarsenDag(const Graph_t_in &dag_in, + Graph_t_out &coarsened_dag, std::vector> &vertex_contraction_map) override { - if (dag_in.num_vertices() == 0) { - vertex_contraction_map = std::vector>(); - return true; + vertex_contraction_map = std::vector>(); + return true; } std::vector>> vertex_expansion_map = generate_vertex_expansion_map(dag_in); @@ -124,8 +124,7 @@ class CoarserGenExpansionMap : public Coarser { coarser_util::reorder_expansion_map(dag_in, vertex_expansion_map); - vertex_contraction_map = - coarser_util::invert_vertex_expansion_map(vertex_expansion_map); + vertex_contraction_map = coarser_util::invert_vertex_expansion_map(vertex_expansion_map); return coarser_util::construct_coarse_dag(dag_in, coarsened_dag, vertex_contraction_map); } @@ -142,4 +141,4 @@ class CoarserGenExpansionMap : public Coarser { virtual ~CoarserGenExpansionMap() = default; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/coarser/MultilevelCoarser.hpp b/include/osp/coarser/MultilevelCoarser.hpp index f8a1434e..4baecd3d 100644 --- a/include/osp/coarser/MultilevelCoarser.hpp +++ b/include/osp/coarser/MultilevelCoarser.hpp @@ -30,10 +30,10 @@ limitations under the License. namespace osp { -template +template class MultilevelCoarseAndSchedule; -template +template class MultilevelCoarser : public Coarser { friend class MultilevelCoarseAndSchedule; @@ -48,7 +48,8 @@ class MultilevelCoarser : public Coarser { RETURN_STATUS add_contraction(const std::vector> &contraction_map); RETURN_STATUS add_contraction(std::vector> &&contraction_map); - RETURN_STATUS add_contraction(const std::vector> &contraction_map, const Graph_t_coarse &contracted_graph); + RETURN_STATUS add_contraction(const std::vector> &contraction_map, + const Graph_t_coarse &contracted_graph); RETURN_STATUS add_contraction(std::vector> &&contraction_map, Graph_t_coarse &&contracted_graph); void add_identity_contraction(); @@ -64,7 +65,8 @@ class MultilevelCoarser : public Coarser { MultilevelCoarser(const Graph_t &graph) : original_graph(&graph) {}; virtual ~MultilevelCoarser() = default; - bool coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag, + bool coarsenDag(const Graph_t &dag_in, + Graph_t_coarse &coarsened_dag, std::vector> &vertex_contraction_map) override; RETURN_STATUS run(const Graph_t &graph); @@ -73,7 +75,7 @@ class MultilevelCoarser : public Coarser { virtual std::string getCoarserName() const override = 0; }; -template +template RETURN_STATUS MultilevelCoarser::run(const Graph_t &graph) { clear_computation_data(); original_graph = &graph; @@ -88,12 +90,12 @@ RETURN_STATUS MultilevelCoarser::run(const Graph_t &gra return status; } -template +template RETURN_STATUS MultilevelCoarser::run(const BspInstance &inst) { return run(inst.getComputationalDag()); } -template +template void MultilevelCoarser::clear_computation_data() { dag_history.clear(); dag_history.shrink_to_fit(); @@ -102,10 +104,11 @@ void MultilevelCoarser::clear_computation_data() { contraction_maps.shrink_to_fit(); } -template +template void MultilevelCoarser::compactify_dag_history() { - if (dag_history.size() < 3) + if (dag_history.size() < 3) { return; + } size_t dag_indx_first = dag_history.size() - 2; size_t map_indx_first = contraction_maps.size() - 2; @@ -113,11 +116,15 @@ void MultilevelCoarser::compactify_dag_history() { size_t dag_indx_second = dag_history.size() - 1; size_t map_indx_second = contraction_maps.size() - 1; - if ((static_cast(dag_history[dag_indx_first - 1]->num_vertices()) / static_cast(dag_history[dag_indx_second - 1]->num_vertices())) > 1.25) + if ((static_cast(dag_history[dag_indx_first - 1]->num_vertices()) + / static_cast(dag_history[dag_indx_second - 1]->num_vertices())) + > 1.25) { return; + } // Compute combined contraction_map - std::unique_ptr>> combi_contraction_map = std::make_unique>>(contraction_maps[map_indx_first]->size()); + std::unique_ptr>> combi_contraction_map + = std::make_unique>>(contraction_maps[map_indx_first]->size()); for (std::size_t vert = 0; vert < contraction_maps[map_indx_first]->size(); ++vert) { combi_contraction_map->at(vert) = contraction_maps[map_indx_second]->at(contraction_maps[map_indx_first]->at(vert)); } @@ -136,8 +143,9 @@ void MultilevelCoarser::compactify_dag_history() { contraction_maps[map_indx_first] = std::move(combi_contraction_map); } -template -RETURN_STATUS MultilevelCoarser::add_contraction(const std::vector> &contraction_map) { +template +RETURN_STATUS MultilevelCoarser::add_contraction( + const std::vector> &contraction_map) { std::unique_ptr new_graph = std::make_unique(); contraction_maps.emplace_back(contraction_map); @@ -145,9 +153,11 @@ RETURN_STATUS MultilevelCoarser::add_contraction(const bool success = false; if (dag_history.size() == 0) { - success = coarser_util::construct_coarse_dag(*(getOriginalGraph()), *new_graph, *(contraction_maps.back())); + success = coarser_util::construct_coarse_dag( + *(getOriginalGraph()), *new_graph, *(contraction_maps.back())); } else { - success = coarser_util::construct_coarse_dag(*(dag_history.back()), *new_graph, *(contraction_maps.back())); + success = coarser_util::construct_coarse_dag( + *(dag_history.back()), *new_graph, *(contraction_maps.back())); } dag_history.emplace_back(std::move(new_graph)); @@ -160,19 +170,23 @@ RETURN_STATUS MultilevelCoarser::add_contraction(const } } -template -RETURN_STATUS MultilevelCoarser::add_contraction(std::vector> &&contraction_map) { +template +RETURN_STATUS MultilevelCoarser::add_contraction( + std::vector> &&contraction_map) { std::unique_ptr new_graph = std::make_unique(); - std::unique_ptr>> contr_map_ptr(new std::vector>(std::move(contraction_map))); + std::unique_ptr>> contr_map_ptr( + new std::vector>(std::move(contraction_map))); contraction_maps.emplace_back(std::move(contr_map_ptr)); bool success = false; if (dag_history.size() == 0) { - success = coarser_util::construct_coarse_dag(*(getOriginalGraph()), *new_graph, *(contraction_maps.back())); + success = coarser_util::construct_coarse_dag( + *(getOriginalGraph()), *new_graph, *(contraction_maps.back())); } else { - success = coarser_util::construct_coarse_dag(*(dag_history.back()), *new_graph, *(contraction_maps.back())); + success = coarser_util::construct_coarse_dag( + *(dag_history.back()), *new_graph, *(contraction_maps.back())); } dag_history.emplace_back(std::move(new_graph)); @@ -185,31 +199,35 @@ RETURN_STATUS MultilevelCoarser::add_contraction(std::v } } -template -RETURN_STATUS MultilevelCoarser::add_contraction(const std::vector> &contraction_map, const Graph_t_coarse &contracted_graph) { +template +RETURN_STATUS MultilevelCoarser::add_contraction( + const std::vector> &contraction_map, const Graph_t_coarse &contracted_graph) { std::unique_ptr graph_ptr(new Graph_t_coarse(contracted_graph)); dag_history.emplace_back(std::move(graph_ptr)); - std::unique_ptr>> contr_map_ptr(new std::vector>(contraction_map)); + std::unique_ptr>> contr_map_ptr( + new std::vector>(contraction_map)); contraction_maps.emplace_back(std::move(contr_map_ptr)); compactify_dag_history(); return RETURN_STATUS::OSP_SUCCESS; } -template -RETURN_STATUS MultilevelCoarser::add_contraction(std::vector> &&contraction_map, Graph_t_coarse &&contracted_graph) { +template +RETURN_STATUS MultilevelCoarser::add_contraction( + std::vector> &&contraction_map, Graph_t_coarse &&contracted_graph) { std::unique_ptr graph_ptr(new Graph_t_coarse(std::move(contracted_graph))); dag_history.emplace_back(std::move(graph_ptr)); - std::unique_ptr>> contr_map_ptr(new std::vector>(std::move(contraction_map))); + std::unique_ptr>> contr_map_ptr( + new std::vector>(std::move(contraction_map))); contraction_maps.emplace_back(std::move(contr_map_ptr)); compactify_dag_history(); return RETURN_STATUS::OSP_SUCCESS; } -template +template std::vector> MultilevelCoarser::getCombinedContractionMap() const { std::vector> combinedContractionMap(original_graph->num_vertices()); std::iota(combinedContractionMap.begin(), combinedContractionMap.end(), 0); @@ -223,15 +241,17 @@ std::vector> MultilevelCoarser -bool MultilevelCoarser::coarsenDag(const Graph_t &dag_in, Graph_t_coarse &coarsened_dag, +template +bool MultilevelCoarser::coarsenDag(const Graph_t &dag_in, + Graph_t_coarse &coarsened_dag, std::vector> &vertex_contraction_map) { clear_computation_data(); RETURN_STATUS status = run(dag_in); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) + if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { return false; + } assert(dag_history.size() != 0); coarsened_dag = *(dag_history.back()); @@ -241,7 +261,7 @@ bool MultilevelCoarser::coarsenDag(const Graph_t &dag_i return true; } -template +template void MultilevelCoarser::add_identity_contraction() { std::size_t n_vert; if (dag_history.size() == 0) { @@ -257,4 +277,4 @@ void MultilevelCoarser::add_identity_contraction() { compactify_dag_history(); } -} // end namespace osp \ No newline at end of file +} // end namespace osp diff --git a/include/osp/coarser/Sarkar/Sarkar.hpp b/include/osp/coarser/Sarkar/Sarkar.hpp index aa0d5560..672fc6d2 100644 --- a/include/osp/coarser/Sarkar/Sarkar.hpp +++ b/include/osp/coarser/Sarkar/Sarkar.hpp @@ -35,63 +35,96 @@ limitations under the License. namespace osp { namespace SarkarParams { -enum class Mode { LINES, FAN_IN_FULL, FAN_IN_PARTIAL, FAN_OUT_FULL, FAN_OUT_PARTIAL, LEVEL_EVEN, LEVEL_ODD, FAN_IN_BUFFER, FAN_OUT_BUFFER, HOMOGENEOUS_BUFFER }; -template +enum class Mode { + LINES, + FAN_IN_FULL, + FAN_IN_PARTIAL, + FAN_OUT_FULL, + FAN_OUT_PARTIAL, + LEVEL_EVEN, + LEVEL_ODD, + FAN_IN_BUFFER, + FAN_OUT_BUFFER, + HOMOGENEOUS_BUFFER +}; + +template struct Parameters { double geomDecay{0.875}; double leniency{0.0}; Mode mode{Mode::LINES}; - commCostType commCost{ static_cast(0) }; - commCostType maxWeight{ std::numeric_limits::max() }; - commCostType smallWeightThreshold{ std::numeric_limits::lowest() }; + commCostType commCost{static_cast(0)}; + commCostType maxWeight{std::numeric_limits::max()}; + commCostType smallWeightThreshold{std::numeric_limits::lowest()}; bool useTopPoset{true}; }; -} // end namespace SarkarParams -template +} // end namespace SarkarParams + +template class Sarkar : public CoarserGenExpansionMap { - private: - SarkarParams::Parameters< v_workw_t > params; - - std::vector< vertex_idx_t > getBotPosetMap(const Graph_t_in &graph) const; - std::vector< v_workw_t > getTopDistance(v_workw_t commCost, const Graph_t_in &graph) const; - std::vector< v_workw_t > getBotDistance(v_workw_t commCost, const Graph_t_in &graph) const; - - vertex_idx_t singleContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const; - vertex_idx_t allChildrenContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const; - vertex_idx_t someChildrenContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const; - vertex_idx_t allParentsContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const; - vertex_idx_t someParentsContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const; - vertex_idx_t levelContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const; - - vertex_idx_t homogeneous_buffer_merge(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const; - std::vector homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const; - - std::vector computeNodeHashes(const Graph_t_in &graph, const std::vector< vertex_idx_t > &vertexPoset, const std::vector< v_workw_t > &dist) const; - - public: - virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in) override; - std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in, vertex_idx_t &diff); - - inline void setParameters(const SarkarParams::Parameters< v_workw_t >& params_) { params = params_; }; - inline SarkarParams::Parameters< v_workw_t >& getParameters() { return params; }; - inline const SarkarParams::Parameters< v_workw_t >& getParameters() const { return params; }; - - Sarkar(SarkarParams::Parameters< v_workw_t > params_ = SarkarParams::Parameters< v_workw_t >()) : params(params_) {}; - - Sarkar(const Sarkar &) = default; - Sarkar(Sarkar &&) = default; - Sarkar &operator=(const Sarkar &) = default; - Sarkar &operator=(Sarkar &&) = default; - virtual ~Sarkar() override = default; - - std::string getCoarserName() const override { return "Sarkar"; } + private: + SarkarParams::Parameters> params; + + std::vector> getBotPosetMap(const Graph_t_in &graph) const; + std::vector> getTopDistance(v_workw_t commCost, const Graph_t_in &graph) const; + std::vector> getBotDistance(v_workw_t commCost, const Graph_t_in &graph) const; + + vertex_idx_t singleContraction(v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const; + vertex_idx_t allChildrenContraction(v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const; + vertex_idx_t someChildrenContraction(v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const; + vertex_idx_t allParentsContraction(v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const; + vertex_idx_t someParentsContraction(v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const; + vertex_idx_t levelContraction(v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const; + + vertex_idx_t homogeneous_buffer_merge(v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const; + std::vector homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const; + + std::vector computeNodeHashes(const Graph_t_in &graph, + const std::vector> &vertexPoset, + const std::vector> &dist) const; + + public: + virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in) override; + std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in, + vertex_idx_t &diff); + + inline void setParameters(const SarkarParams::Parameters> ¶ms_) { params = params_; }; + + inline SarkarParams::Parameters> &getParameters() { return params; }; + + inline const SarkarParams::Parameters> &getParameters() const { return params; }; + + Sarkar(SarkarParams::Parameters> params_ = SarkarParams::Parameters>()) + : params(params_) {}; + + Sarkar(const Sarkar &) = default; + Sarkar(Sarkar &&) = default; + Sarkar &operator=(const Sarkar &) = default; + Sarkar &operator=(Sarkar &&) = default; + virtual ~Sarkar() override = default; + + std::string getCoarserName() const override { return "Sarkar"; } }; -template -std::vector< vertex_idx_t > Sarkar::getBotPosetMap(const Graph_t_in &graph) const { - std::vector< vertex_idx_t > botPosetMap = get_bottom_node_distance>(graph); +template +std::vector> Sarkar::getBotPosetMap(const Graph_t_in &graph) const { + std::vector> botPosetMap = get_bottom_node_distance>(graph); vertex_idx_t max = *std::max_element(botPosetMap.begin(), botPosetMap.end()); ++max; @@ -103,9 +136,10 @@ std::vector< vertex_idx_t > Sarkar::getBotP return botPosetMap; } -template -std::vector< v_workw_t > Sarkar::getTopDistance(v_workw_t commCost, const Graph_t_in &graph) const { - std::vector< v_workw_t > topDist(graph.num_vertices(), 0); +template +std::vector> Sarkar::getTopDistance(v_workw_t commCost, + const Graph_t_in &graph) const { + std::vector> topDist(graph.num_vertices(), 0); for (const auto &vertex : GetTopOrder(graph)) { v_workw_t max_temp = 0; @@ -123,9 +157,10 @@ std::vector< v_workw_t > Sarkar::getTopDist return topDist; } -template -std::vector< v_workw_t > Sarkar::getBotDistance(v_workw_t commCost, const Graph_t_in &graph) const { - std::vector< v_workw_t > botDist(graph.num_vertices(), 0); +template +std::vector> Sarkar::getBotDistance(v_workw_t commCost, + const Graph_t_in &graph) const { + std::vector> botDist(graph.num_vertices(), 0); for (const auto &vertex : GetTopOrderReverse(graph)) { v_workw_t max_temp = 0; @@ -143,33 +178,47 @@ std::vector< v_workw_t > Sarkar::getBotDist return botDist; } -template -vertex_idx_t Sarkar::singleContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const { +template +vertex_idx_t Sarkar::singleContraction( + v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const { using VertexType = vertex_idx_t; assert(expansionMapOutput.size() == 0); - const std::vector< vertex_idx_t > vertexPoset = params.useTopPoset ? get_top_node_distance>(graph) : getBotPosetMap(graph); - const std::vector< v_workw_t > topDist = getTopDistance(commCost, graph); - const std::vector< v_workw_t > botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset + = params.useTopPoset ? get_top_node_distance>(graph) : getBotPosetMap(graph); + const std::vector> topDist = getTopDistance(commCost, graph); + const std::vector> botDist = getBotDistance(commCost, graph); auto cmp = [](const std::tuple &lhs, const std::tuple &rhs) { return (std::get<0>(lhs) > std::get<0>(rhs)) - || ((std::get<0>(lhs) == std::get<0>(rhs)) && (std::get<1>(lhs) < std::get<1>(rhs))) - || ((std::get<0>(lhs) == std::get<0>(rhs)) && (std::get<1>(lhs) == std::get<1>(rhs)) && (std::get<2>(lhs) < std::get<2>(rhs))); + || ((std::get<0>(lhs) == std::get<0>(rhs)) && (std::get<1>(lhs) < std::get<1>(rhs))) + || ((std::get<0>(lhs) == std::get<0>(rhs)) && (std::get<1>(lhs) == std::get<1>(rhs)) + && (std::get<2>(lhs) < std::get<2>(rhs))); }; std::set, decltype(cmp)> edgePriority(cmp); for (const VertexType &edgeSrc : graph.vertices()) { for (const VertexType &edgeTgt : graph.children(edgeSrc)) { - if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(edgeSrc) != graph.vertex_type(edgeTgt)) continue; + if (graph.vertex_type(edgeSrc) != graph.vertex_type(edgeTgt)) { + continue; + } } - if (vertexPoset[edgeSrc] + 1 != vertexPoset[edgeTgt]) continue; - if (topDist[edgeSrc] + commCost + graph.vertex_work_weight(edgeTgt) != topDist[edgeTgt]) continue; - if (botDist[edgeTgt] + commCost + graph.vertex_work_weight(edgeSrc) != botDist[edgeSrc]) continue; - if (graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt) > params.maxWeight) continue; + if (vertexPoset[edgeSrc] + 1 != vertexPoset[edgeTgt]) { + continue; + } + if (topDist[edgeSrc] + commCost + graph.vertex_work_weight(edgeTgt) != topDist[edgeTgt]) { + continue; + } + if (botDist[edgeTgt] + commCost + graph.vertex_work_weight(edgeSrc) != botDist[edgeSrc]) { + continue; + } + if (graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt) > params.maxWeight) { + continue; + } v_workw_t maxPath = topDist[edgeSrc] + botDist[edgeTgt] + commCost; v_workw_t maxParentDist = 0; @@ -179,19 +228,24 @@ vertex_idx_t Sarkar::singleContraction(v_wo maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } for (const auto &par : graph.parents(edgeTgt)) { - if (par == edgeSrc) continue; + if (par == edgeSrc) { + continue; + } maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } for (const auto &chld : graph.children(edgeSrc)) { - if (chld == edgeTgt) continue; + if (chld == edgeTgt) { + continue; + } maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } for (const auto &chld : graph.children(edgeTgt)) { maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } - v_workw_t newMaxPath = maxParentDist + maxChildDist + graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt); + v_workw_t newMaxPath + = maxParentDist + maxChildDist + graph.vertex_work_weight(edgeSrc) + graph.vertex_work_weight(edgeTgt); long savings = static_cast(maxPath) - static_cast(newMaxPath); // cannot have leniency here as it may destroy symmetries @@ -204,8 +258,9 @@ vertex_idx_t Sarkar::singleContraction(v_wo std::vector partitionedSourceFlag(graph.num_vertices(), false); std::vector partitionedTargetFlag(graph.num_vertices(), false); - vertex_idx_t maxCorseningNum = graph.num_vertices() - static_cast< vertex_idx_t >(static_cast(graph.num_vertices()) * params.geomDecay); - + vertex_idx_t maxCorseningNum + = graph.num_vertices() + - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); vertex_idx_t counter = 0; long minSave = std::numeric_limits::lowest(); @@ -215,13 +270,23 @@ vertex_idx_t Sarkar::singleContraction(v_wo const VertexType &edgeTgt = std::get<2>(*prioIter); // Iterations halt - if (edgeSave < minSave) break; + if (edgeSave < minSave) { + break; + } // Check whether we can glue - if (partitionedSourceFlag[edgeSrc]) continue; - if (partitionedSourceFlag[edgeTgt]) continue; - if (partitionedTargetFlag[edgeSrc]) continue; - if (partitionedTargetFlag[edgeTgt]) continue; + if (partitionedSourceFlag[edgeSrc]) { + continue; + } + if (partitionedSourceFlag[edgeTgt]) { + continue; + } + if (partitionedTargetFlag[edgeSrc]) { + continue; + } + if (partitionedTargetFlag[edgeTgt]) { + continue; + } bool shouldSkipSrc = false; for (const VertexType &chld : graph.children(edgeSrc)) { @@ -237,7 +302,9 @@ vertex_idx_t Sarkar::singleContraction(v_wo break; } } - if (shouldSkipSrc && shouldSkipTgt) continue; + if (shouldSkipSrc && shouldSkipTgt) { + continue; + } // Adding to partition expansionMapOutput.emplace_back(std::initializer_list{edgeSrc, edgeTgt}); @@ -251,8 +318,12 @@ vertex_idx_t Sarkar::singleContraction(v_wo expansionMapOutput.reserve(graph.num_vertices() - counter); for (const VertexType &vert : graph.vertices()) { - if (partitionedSourceFlag[vert]) continue; - if (partitionedTargetFlag[vert]) continue; + if (partitionedSourceFlag[vert]) { + continue; + } + if (partitionedTargetFlag[vert]) { + continue; + } expansionMapOutput.emplace_back(std::initializer_list{vert}); } @@ -260,23 +331,27 @@ vertex_idx_t Sarkar::singleContraction(v_wo return counter; } -template -vertex_idx_t Sarkar::allChildrenContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const { +template +vertex_idx_t Sarkar::allChildrenContraction( + v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const { using VertexType = vertex_idx_t; assert(expansionMapOutput.size() == 0); - const std::vector< vertex_idx_t > vertexPoset = get_top_node_distance>(graph); - const std::vector< v_workw_t > topDist = getTopDistance(commCost, graph); - const std::vector< v_workw_t > botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset = get_top_node_distance>(graph); + const std::vector> topDist = getTopDistance(commCost, graph); + const std::vector> botDist = getBotDistance(commCost, graph); auto cmp = [](const std::pair &lhs, const std::pair &rhs) { - return (lhs.first > rhs.first) - || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); + return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set, decltype(cmp)> vertPriority(cmp); for (const VertexType &groupHead : graph.vertices()) { - if (graph.out_degree(groupHead) < 2) continue; + if (graph.out_degree(groupHead) < 2) { + continue; + } bool shouldSkip = false; if constexpr (has_typed_vertices_v) { @@ -287,19 +362,25 @@ vertex_idx_t Sarkar::allChildrenContraction } } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } for (const VertexType &groupFoot : graph.children(groupHead)) { if (vertexPoset[groupFoot] != vertexPoset[groupHead] + 1) { shouldSkip = true; break; } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } v_workw_t combined_weight = graph.vertex_work_weight(groupHead); for (const VertexType &groupFoot : graph.children(groupHead)) { combined_weight += graph.vertex_work_weight(groupFoot); } - if (combined_weight > params.maxWeight) continue; + if (combined_weight > params.maxWeight) { + continue; + } v_workw_t maxPath = topDist[groupHead] + botDist[groupHead] - graph.vertex_work_weight(groupHead); for (const VertexType &chld : graph.children(groupHead)) { @@ -314,7 +395,9 @@ vertex_idx_t Sarkar::allChildrenContraction } for (const VertexType &groupFoot : graph.children(groupHead)) { for (const VertexType &par : graph.parents(groupFoot)) { - if (par == groupHead) continue; + if (par == groupHead) { + continue; + } maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } } @@ -338,7 +421,9 @@ vertex_idx_t Sarkar::allChildrenContraction std::vector partitionedFlag(graph.num_vertices(), false); - vertex_idx_t maxCorseningNum = graph.num_vertices() - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + vertex_idx_t maxCorseningNum + = graph.num_vertices() + - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); vertex_idx_t counter = 0; long minSave = std::numeric_limits::lowest(); @@ -347,10 +432,14 @@ vertex_idx_t Sarkar::allChildrenContraction const VertexType &groupHead = prioIter->second; // Iterations halt - if (vertSave < minSave) break; + if (vertSave < minSave) { + break; + } // Check whether we can glue - if (partitionedFlag[groupHead]) continue; + if (partitionedFlag[groupHead]) { + continue; + } bool shouldSkip = false; for (const VertexType &groupFoot : graph.children(groupHead)) { if (partitionedFlag[groupFoot]) { @@ -358,7 +447,9 @@ vertex_idx_t Sarkar::allChildrenContraction break; } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } // Adding to partition std::vector part; @@ -368,8 +459,8 @@ vertex_idx_t Sarkar::allChildrenContraction part.emplace_back(groupFoot); } - expansionMapOutput.emplace_back( std::move(part) ); - counter += static_cast>( graph.out_degree(groupHead) ); + expansionMapOutput.emplace_back(std::move(part)); + counter += static_cast>(graph.out_degree(groupHead)); if (counter > maxCorseningNum) { minSave = vertSave; } @@ -380,30 +471,36 @@ vertex_idx_t Sarkar::allChildrenContraction } for (const VertexType &vert : graph.vertices()) { - if (partitionedFlag[vert]) continue; + if (partitionedFlag[vert]) { + continue; + } expansionMapOutput.emplace_back(std::initializer_list{vert}); } return counter; } -template -vertex_idx_t Sarkar::allParentsContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const { +template +vertex_idx_t Sarkar::allParentsContraction( + v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const { using VertexType = vertex_idx_t; assert(expansionMapOutput.size() == 0); - const std::vector< vertex_idx_t > vertexPoset = getBotPosetMap(graph); - const std::vector< v_workw_t > topDist = getTopDistance(commCost, graph); - const std::vector< v_workw_t > botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset = getBotPosetMap(graph); + const std::vector> topDist = getTopDistance(commCost, graph); + const std::vector> botDist = getBotDistance(commCost, graph); auto cmp = [](const std::pair &lhs, const std::pair &rhs) { - return (lhs.first > rhs.first) - || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); + return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set, decltype(cmp)> vertPriority(cmp); for (const VertexType &groupFoot : graph.vertices()) { - if (graph.in_degree(groupFoot) < 2) continue; + if (graph.in_degree(groupFoot) < 2) { + continue; + } bool shouldSkip = false; if constexpr (has_typed_vertices_v) { @@ -414,19 +511,25 @@ vertex_idx_t Sarkar::allParentsContraction( } } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } for (const VertexType &groupHead : graph.parents(groupFoot)) { if (vertexPoset[groupFoot] != vertexPoset[groupHead] + 1) { shouldSkip = true; break; } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } v_workw_t combined_weight = graph.vertex_work_weight(groupFoot); for (const VertexType &groupHead : graph.parents(groupFoot)) { combined_weight += graph.vertex_work_weight(groupHead); } - if (combined_weight > params.maxWeight) continue; + if (combined_weight > params.maxWeight) { + continue; + } v_workw_t maxPath = topDist[groupFoot] + botDist[groupFoot] - graph.vertex_work_weight(groupFoot); for (const VertexType &par : graph.parents(groupFoot)) { @@ -441,7 +544,9 @@ vertex_idx_t Sarkar::allParentsContraction( } for (const VertexType &groupHead : graph.parents(groupFoot)) { for (const VertexType &chld : graph.children(groupHead)) { - if (chld == groupFoot) continue; + if (chld == groupFoot) { + continue; + } maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } } @@ -465,7 +570,9 @@ vertex_idx_t Sarkar::allParentsContraction( std::vector partitionedFlag(graph.num_vertices(), false); - vertex_idx_t maxCorseningNum = graph.num_vertices() - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + vertex_idx_t maxCorseningNum + = graph.num_vertices() + - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); vertex_idx_t counter = 0; long minSave = std::numeric_limits::lowest(); @@ -474,10 +581,14 @@ vertex_idx_t Sarkar::allParentsContraction( const VertexType &groupFoot = prioIter->second; // Iterations halt - if (vertSave < minSave) break; + if (vertSave < minSave) { + break; + } // Check whether we can glue - if (partitionedFlag[groupFoot]) continue; + if (partitionedFlag[groupFoot]) { + continue; + } bool shouldSkip = false; for (const VertexType &groupHead : graph.parents(groupFoot)) { if (partitionedFlag[groupHead]) { @@ -485,7 +596,9 @@ vertex_idx_t Sarkar::allParentsContraction( break; } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } // Adding to partition std::vector part; @@ -495,8 +608,8 @@ vertex_idx_t Sarkar::allParentsContraction( part.emplace_back(groupHead); } - expansionMapOutput.emplace_back( std::move(part) ); - counter += static_cast>( graph.in_degree(groupFoot) ); + expansionMapOutput.emplace_back(std::move(part)); + counter += static_cast>(graph.in_degree(groupFoot)); if (counter > maxCorseningNum) { minSave = vertSave; } @@ -507,89 +620,62 @@ vertex_idx_t Sarkar::allParentsContraction( } for (const VertexType &vert : graph.vertices()) { - if (partitionedFlag[vert]) continue; + if (partitionedFlag[vert]) { + continue; + } expansionMapOutput.emplace_back(std::initializer_list{vert}); } return counter; } - - - - - - - - - - - -template -std::vector>> Sarkar::generate_vertex_expansion_map(const Graph_t_in &dag_in, vertex_idx_t &diff) { +template +std::vector>> Sarkar::generate_vertex_expansion_map( + const Graph_t_in &dag_in, vertex_idx_t &diff) { std::vector>> expansionMap; // std::cout << "Mode: " << static_cast(params.mode) << "\n"; - switch (params.mode) - { - case SarkarParams::Mode::LINES: - { - diff = singleContraction(params.commCost, dag_in, expansionMap); - } - break; + switch (params.mode) { + case SarkarParams::Mode::LINES: { + diff = singleContraction(params.commCost, dag_in, expansionMap); + } break; - case SarkarParams::Mode::FAN_IN_FULL: - { - diff = allParentsContraction(params.commCost, dag_in, expansionMap); - } - break; + case SarkarParams::Mode::FAN_IN_FULL: { + diff = allParentsContraction(params.commCost, dag_in, expansionMap); + } break; - case SarkarParams::Mode::FAN_IN_PARTIAL: - { - diff = someParentsContraction(params.commCost, dag_in, expansionMap); - } - break; + case SarkarParams::Mode::FAN_IN_PARTIAL: { + diff = someParentsContraction(params.commCost, dag_in, expansionMap); + } break; - case SarkarParams::Mode::FAN_OUT_FULL: - { - diff = allChildrenContraction(params.commCost, dag_in, expansionMap); - } - break; + case SarkarParams::Mode::FAN_OUT_FULL: { + diff = allChildrenContraction(params.commCost, dag_in, expansionMap); + } break; - case SarkarParams::Mode::FAN_OUT_PARTIAL: - { - diff = someChildrenContraction(params.commCost, dag_in, expansionMap); - } - break; + case SarkarParams::Mode::FAN_OUT_PARTIAL: { + diff = someChildrenContraction(params.commCost, dag_in, expansionMap); + } break; - case SarkarParams::Mode::LEVEL_EVEN: - { - diff = levelContraction(params.commCost, dag_in, expansionMap); - } - break; + case SarkarParams::Mode::LEVEL_EVEN: { + diff = levelContraction(params.commCost, dag_in, expansionMap); + } break; - case SarkarParams::Mode::LEVEL_ODD: - { - diff = levelContraction(params.commCost, dag_in, expansionMap); - } - break; + case SarkarParams::Mode::LEVEL_ODD: { + diff = levelContraction(params.commCost, dag_in, expansionMap); + } break; case SarkarParams::Mode::FAN_IN_BUFFER: case SarkarParams::Mode::FAN_OUT_BUFFER: - case SarkarParams::Mode::HOMOGENEOUS_BUFFER: - { - diff = homogeneous_buffer_merge(params.commCost, dag_in, expansionMap); - } - break; - - default: - { - #ifdef __cpp_lib_unreachable - std::unreachable(); - #endif - assert(false); - } - break; + case SarkarParams::Mode::HOMOGENEOUS_BUFFER: { + diff = homogeneous_buffer_merge(params.commCost, dag_in, expansionMap); + } break; + + default: { +#ifdef __cpp_lib_unreachable + std::unreachable(); +#endif + assert(false); + } break; } // std::cout << " Diff: " << diff << '\n'; @@ -597,34 +683,38 @@ std::vector>> Sarkar -std::vector>> Sarkar::generate_vertex_expansion_map(const Graph_t_in &dag_in) { +template +std::vector>> Sarkar::generate_vertex_expansion_map( + const Graph_t_in &dag_in) { vertex_idx_t dummy; return generate_vertex_expansion_map(dag_in, dummy); } -template -vertex_idx_t Sarkar::someChildrenContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const { +template +vertex_idx_t Sarkar::someChildrenContraction( + v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const { using VertexType = vertex_idx_t; assert(expansionMapOutput.size() == 0); - const std::vector< vertex_idx_t > vertexPoset = get_top_node_distance>(graph); - const std::vector< v_workw_t > topDist = getTopDistance(commCost, graph); - const std::vector< v_workw_t > botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset = get_top_node_distance>(graph); + const std::vector> topDist = getTopDistance(commCost, graph); + const std::vector> botDist = getBotDistance(commCost, graph); auto cmp = [](const std::pair> &lhs, const std::pair> &rhs) { - return (lhs.first > rhs.first) - || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); + return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set>, decltype(cmp)> vertPriority(cmp); for (const VertexType &groupHead : graph.vertices()) { - if (graph.out_degree(groupHead) < 2) continue; + if (graph.out_degree(groupHead) < 2) { + continue; + } auto cmp_chld = [&topDist, &botDist](const VertexType &lhs, const VertexType &rhs) { - return (topDist[lhs] < topDist[rhs]) - || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] > botDist[rhs])) - || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] == botDist[rhs]) && (lhs < rhs)); + return (topDist[lhs] < topDist[rhs]) || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] > botDist[rhs])) + || ((topDist[lhs] == topDist[rhs]) && (botDist[lhs] == botDist[rhs]) && (lhs < rhs)); }; std::set childrenPriority(cmp_chld); for (const VertexType &chld : graph.children(groupHead)) { @@ -632,9 +722,13 @@ vertex_idx_t Sarkar::someChildrenContractio childrenPriority.emplace(chld); } } - if (childrenPriority.size() < 2) continue; + if (childrenPriority.size() < 2) { + continue; + } - std::vector< std::pair< typename std::set::const_iterator, typename std::set::const_iterator > > admissble_children_groups; + std::vector::const_iterator, + typename std::set::const_iterator>> + admissble_children_groups; for (auto chld_iter_start = childrenPriority.cbegin(); chld_iter_start != childrenPriority.cend();) { if constexpr (has_typed_vertices_v) { if (graph.vertex_type(groupHead) != graph.vertex_type(*chld_iter_start)) { @@ -646,7 +740,8 @@ vertex_idx_t Sarkar::someChildrenContractio const v_workw_t t_dist = topDist[*chld_iter_start]; const v_workw_t b_dist = botDist[*chld_iter_start]; auto chld_iter_end = chld_iter_start; - while (chld_iter_end != childrenPriority.cend() && t_dist == topDist[*chld_iter_end] && b_dist == botDist[*chld_iter_end]) { + while (chld_iter_end != childrenPriority.cend() && t_dist == topDist[*chld_iter_end] + && b_dist == botDist[*chld_iter_end]) { if constexpr (has_typed_vertices_v) { if (graph.vertex_type(groupHead) != graph.vertex_type(*chld_iter_end)) { break; @@ -674,7 +769,9 @@ vertex_idx_t Sarkar::someChildrenContractio contractionChildrenSet.emplace(*it); added_weight += graph.vertex_work_weight(*it); } - if (added_weight > params.maxWeight) break; + if (added_weight > params.maxWeight) { + break; + } v_workw_t maxPath = 0; for (const VertexType &vert : contractionEnsemble) { @@ -686,7 +783,9 @@ vertex_idx_t Sarkar::someChildrenContractio for (const VertexType &vert : contractionEnsemble) { for (const VertexType &par : graph.parents(vert)) { - if (par == groupHead) continue; + if (par == groupHead) { + continue; + } maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } } @@ -718,7 +817,9 @@ vertex_idx_t Sarkar::someChildrenContractio std::vector partitionedFlag(graph.num_vertices(), false); std::vector partitionedHeadFlag(graph.num_vertices(), false); - vertex_idx_t maxCorseningNum = graph.num_vertices() - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + vertex_idx_t maxCorseningNum + = graph.num_vertices() + - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); vertex_idx_t counter = 0; long minSave = std::numeric_limits::lowest(); @@ -728,7 +829,9 @@ vertex_idx_t Sarkar::someChildrenContractio const std::vector &contractionEnsemble = prioIter->second; // Iterations halt - if (vertSave < minSave) break; + if (vertSave < minSave) { + break; + } // Check whether we can glue bool shouldSkip = false; @@ -738,21 +841,26 @@ vertex_idx_t Sarkar::someChildrenContractio break; } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } for (const VertexType &chld : graph.children(groupHead)) { - if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), chld) == contractionEnsemble.cend()) && (vertexPoset[chld] == vertexPoset[groupHead] + 1)) { + if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), chld) == contractionEnsemble.cend()) + && (vertexPoset[chld] == vertexPoset[groupHead] + 1)) { if ((partitionedFlag[chld]) && (!partitionedHeadFlag[chld])) { shouldSkip = true; break; } } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } // Adding to partition expansionMapOutput.emplace_back(contractionEnsemble); - counter += static_cast>( contractionEnsemble.size() ) - 1; + counter += static_cast>(contractionEnsemble.size()) - 1; if (counter > maxCorseningNum) { minSave = vertSave; } @@ -763,35 +871,40 @@ vertex_idx_t Sarkar::someChildrenContractio } for (const VertexType &vert : graph.vertices()) { - if (partitionedFlag[vert]) continue; + if (partitionedFlag[vert]) { + continue; + } expansionMapOutput.emplace_back(std::initializer_list{vert}); } return counter; } -template -vertex_idx_t Sarkar::someParentsContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const { +template +vertex_idx_t Sarkar::someParentsContraction( + v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const { using VertexType = vertex_idx_t; assert(expansionMapOutput.size() == 0); - const std::vector< vertex_idx_t > vertexPoset = getBotPosetMap(graph); - const std::vector< v_workw_t > topDist = getTopDistance(commCost, graph); - const std::vector< v_workw_t > botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset = getBotPosetMap(graph); + const std::vector> topDist = getTopDistance(commCost, graph); + const std::vector> botDist = getBotDistance(commCost, graph); auto cmp = [](const std::pair> &lhs, const std::pair> &rhs) { - return (lhs.first > rhs.first) - || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); + return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set>, decltype(cmp)> vertPriority(cmp); for (const VertexType &groupFoot : graph.vertices()) { - if (graph.in_degree(groupFoot) < 2) continue; + if (graph.in_degree(groupFoot) < 2) { + continue; + } auto cmp_par = [&topDist, &botDist](const VertexType &lhs, const VertexType &rhs) { - return (botDist[lhs] < botDist[rhs]) - || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] > topDist[rhs])) - || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] == topDist[rhs]) && (lhs < rhs)); + return (botDist[lhs] < botDist[rhs]) || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] > topDist[rhs])) + || ((botDist[lhs] == botDist[rhs]) && (topDist[lhs] == topDist[rhs]) && (lhs < rhs)); }; std::set parentsPriority(cmp_par); for (const VertexType &par : graph.parents(groupFoot)) { @@ -799,9 +912,13 @@ vertex_idx_t Sarkar::someParentsContraction parentsPriority.emplace(par); } } - if (parentsPriority.size() < 2) continue; + if (parentsPriority.size() < 2) { + continue; + } - std::vector< std::pair< typename std::set::const_iterator, typename std::set::const_iterator > > admissble_parent_groups; + std::vector::const_iterator, + typename std::set::const_iterator>> + admissble_parent_groups; for (auto par_iter_start = parentsPriority.cbegin(); par_iter_start != parentsPriority.cend();) { if constexpr (has_typed_vertices_v) { if (graph.vertex_type(groupFoot) != graph.vertex_type(*par_iter_start)) { @@ -841,19 +958,23 @@ vertex_idx_t Sarkar::someParentsContraction contractionParentsSet.emplace(*it); added_weight += graph.vertex_work_weight(*it); } - if (added_weight > params.maxWeight) break; + if (added_weight > params.maxWeight) { + break; + } v_workw_t maxPath = 0; for (const VertexType &vert : contractionEnsemble) { maxPath = std::max(maxPath, topDist[vert] + botDist[vert] - graph.vertex_work_weight(vert)); } - + v_workw_t maxParentDist = 0; - v_workw_t maxChildDist = 0; + v_workw_t maxChildDist = 0; for (const VertexType &vert : contractionEnsemble) { for (const VertexType &chld : graph.children(vert)) { - if (chld == groupFoot) continue; + if (chld == groupFoot) { + continue; + } maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } } @@ -885,7 +1006,9 @@ vertex_idx_t Sarkar::someParentsContraction std::vector partitionedFlag(graph.num_vertices(), false); std::vector partitionedFootFlag(graph.num_vertices(), false); - vertex_idx_t maxCorseningNum = graph.num_vertices() - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); + vertex_idx_t maxCorseningNum + = graph.num_vertices() + - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); vertex_idx_t counter = 0; long minSave = std::numeric_limits::lowest(); @@ -895,7 +1018,9 @@ vertex_idx_t Sarkar::someParentsContraction const std::vector &contractionEnsemble = prioIter->second; // Iterations halt - if (vertSave < minSave) break; + if (vertSave < minSave) { + break; + } // Check whether we can glue bool shouldSkip = false; @@ -905,21 +1030,26 @@ vertex_idx_t Sarkar::someParentsContraction break; } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } for (const VertexType &par : graph.parents(groupFoot)) { - if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), par) == contractionEnsemble.cend()) && (vertexPoset[par] + 1 == vertexPoset[groupFoot])) { + if ((std::find(contractionEnsemble.cbegin(), contractionEnsemble.cend(), par) == contractionEnsemble.cend()) + && (vertexPoset[par] + 1 == vertexPoset[groupFoot])) { if ((partitionedFlag[par]) && (!partitionedFootFlag[par])) { shouldSkip = true; break; } } } - if (shouldSkip) continue; + if (shouldSkip) { + continue; + } // Adding to partition expansionMapOutput.emplace_back(contractionEnsemble); - counter += static_cast>( contractionEnsemble.size() ) - 1; + counter += static_cast>(contractionEnsemble.size()) - 1; if (counter > maxCorseningNum) { minSave = vertSave; } @@ -930,43 +1060,48 @@ vertex_idx_t Sarkar::someParentsContraction } for (const VertexType &vert : graph.vertices()) { - if (partitionedFlag[vert]) continue; + if (partitionedFlag[vert]) { + continue; + } expansionMapOutput.emplace_back(std::initializer_list{vert}); } return counter; } -template -vertex_idx_t Sarkar::levelContraction(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const { +template +vertex_idx_t Sarkar::levelContraction( + v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const { using VertexType = vertex_idx_t; assert(expansionMapOutput.size() == 0); - const std::vector< vertex_idx_t > vertexPoset = params.useTopPoset ? get_top_node_distance>(graph) : getBotPosetMap(graph); - const std::vector< v_workw_t > topDist = getTopDistance(commCost, graph); - const std::vector< v_workw_t > botDist = getBotDistance(commCost, graph); + const std::vector> vertexPoset + = params.useTopPoset ? get_top_node_distance>(graph) : getBotPosetMap(graph); + const std::vector> topDist = getTopDistance(commCost, graph); + const std::vector> botDist = getBotDistance(commCost, graph); auto cmp = [](const std::pair> &lhs, const std::pair> &rhs) { - return (lhs.first > rhs.first) - || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); + return (lhs.first > rhs.first) || ((lhs.first == rhs.first) && (lhs.second < rhs.second)); }; std::set>, decltype(cmp)> vertPriority(cmp); const vertex_idx_t minLevel = *std::min_element(vertexPoset.cbegin(), vertexPoset.cend()); const vertex_idx_t maxLevel = *std::max_element(vertexPoset.cbegin(), vertexPoset.cend()); - const vertex_idx_t parity = params.mode == SarkarParams::Mode::LEVEL_EVEN? 0 : 1; + const vertex_idx_t parity = params.mode == SarkarParams::Mode::LEVEL_EVEN ? 0 : 1; std::vector>> levels(maxLevel - minLevel + 1); for (const VertexType &vert : graph.vertices()) { - levels[ vertexPoset[vert] - minLevel ].emplace_back(vert); + levels[vertexPoset[vert] - minLevel].emplace_back(vert); } for (vertex_idx_t headLevel = minLevel + parity; headLevel < maxLevel; headLevel += 2) { const vertex_idx_t footLevel = headLevel + 1; - - const std::vector> &headVertices = levels[ headLevel - minLevel ]; - const std::vector> &footVertices = levels[ footLevel - minLevel ]; + + const std::vector> &headVertices = levels[headLevel - minLevel]; + const std::vector> &footVertices = levels[footLevel - minLevel]; Union_Find_Universe, v_memw_t> uf; for (const VertexType &vert : headVertices) { @@ -978,10 +1113,14 @@ vertex_idx_t Sarkar::levelContraction(v_wor for (const VertexType &srcVert : headVertices) { for (const VertexType &tgtVert : graph.children(srcVert)) { - if (vertexPoset[tgtVert] != footLevel) continue; - + if (vertexPoset[tgtVert] != footLevel) { + continue; + } + if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(srcVert) != graph.vertex_type(tgtVert)) continue; + if (graph.vertex_type(srcVert) != graph.vertex_type(tgtVert)) { + continue; + } } uf.join_by_name(srcVert, tgtVert); @@ -990,8 +1129,12 @@ vertex_idx_t Sarkar::levelContraction(v_wor std::vector> components = uf.get_connected_components(); for (std::vector &comp : components) { - if (comp.size() < 2) continue; - if (uf.get_weight_of_component_by_name(comp.at(0)) > params.maxWeight) continue; + if (comp.size() < 2) { + continue; + } + if (uf.get_weight_of_component_by_name(comp.at(0)) > params.maxWeight) { + continue; + } std::sort(comp.begin(), comp.end()); @@ -1003,7 +1146,9 @@ vertex_idx_t Sarkar::levelContraction(v_wor v_workw_t maxParentDist = 0; for (const VertexType &vert : comp) { for (const VertexType &par : graph.parents(vert)) { - if (std::binary_search(comp.cbegin(), comp.cend(), par)) continue; + if (std::binary_search(comp.cbegin(), comp.cend(), par)) { + continue; + } maxParentDist = std::max(maxParentDist, topDist[par] + commCost); } @@ -1012,31 +1157,32 @@ vertex_idx_t Sarkar::levelContraction(v_wor v_workw_t maxChildDist = 0; for (const VertexType &vert : comp) { for (const VertexType &chld : graph.children(vert)) { - if (std::binary_search(comp.cbegin(), comp.cend(), chld)) continue; + if (std::binary_search(comp.cbegin(), comp.cend(), chld)) { + continue; + } maxChildDist = std::max(maxChildDist, botDist[chld] + commCost); } } - v_workw_t newMaxPath = maxParentDist + maxChildDist; for (const VertexType &vert : comp) { newMaxPath += graph.vertex_work_weight(vert); } long savings = static_cast(maxPath) - static_cast(newMaxPath); - + if (savings + static_cast(params.leniency * static_cast(maxPath)) >= 0) { vertPriority.emplace(savings, comp); } - } } std::vector partitionedFlag(graph.num_vertices(), false); - vertex_idx_t maxCorseningNum = graph.num_vertices() - static_cast< vertex_idx_t >(static_cast(graph.num_vertices()) * params.geomDecay); - + vertex_idx_t maxCorseningNum + = graph.num_vertices() + - static_cast>(static_cast(graph.num_vertices()) * params.geomDecay); vertex_idx_t counter = 0; long minSave = std::numeric_limits::lowest(); @@ -1045,13 +1191,15 @@ vertex_idx_t Sarkar::levelContraction(v_wor const std::vector &comp = prioIter->second; // Iterations halt - if (compSave < minSave) break; + if (compSave < minSave) { + break; + } // Check whether we can glue bool shouldSkipHead = false; bool shouldSkipFoot = false; for (const VertexType &vert : comp) { - if (((vertexPoset[vert] - minLevel - parity) % 2) == 0) { // head vertex + if (((vertexPoset[vert] - minLevel - parity) % 2) == 0) { // head vertex for (const VertexType &chld : graph.children(vert)) { if ((vertexPoset[chld] == vertexPoset[vert] + 1) && partitionedFlag[chld]) { shouldSkipHead = true; @@ -1066,11 +1214,13 @@ vertex_idx_t Sarkar::levelContraction(v_wor } } - if (shouldSkipHead && shouldSkipFoot) continue; + if (shouldSkipHead && shouldSkipFoot) { + continue; + } // Adding to partition expansionMapOutput.emplace_back(comp); - counter += static_cast>( comp.size() - 1 ); + counter += static_cast>(comp.size() - 1); if (counter > maxCorseningNum) { minSave = compSave; } @@ -1082,16 +1232,20 @@ vertex_idx_t Sarkar::levelContraction(v_wor expansionMapOutput.reserve(graph.num_vertices() - counter); for (const VertexType &vert : graph.vertices()) { - if (partitionedFlag[vert]) continue; - + if (partitionedFlag[vert]) { + continue; + } + expansionMapOutput.emplace_back(std::initializer_list{vert}); } return counter; } -template -std::vector Sarkar::computeNodeHashes(const Graph_t_in &graph, const std::vector< vertex_idx_t > &vertexPoset, const std::vector< v_workw_t > &dist) const { +template +std::vector Sarkar::computeNodeHashes(const Graph_t_in &graph, + const std::vector> &vertexPoset, + const std::vector> &dist) const { using VertexType = vertex_idx_t; std::vector hashes(graph.num_vertices()); @@ -1108,14 +1262,18 @@ std::vector Sarkar::computeNodeHashes(cons return hashes; } -template -std::vector Sarkar::homogeneousMerge(const std::size_t number, const std::size_t minSize, const std::size_t maxSize) const { +template +std::vector Sarkar::homogeneousMerge(const std::size_t number, + const std::size_t minSize, + const std::size_t maxSize) const { assert(minSize <= maxSize); assert(number > 0); std::size_t bestDiv = 1U; for (std::size_t div : divisorsList(number)) { - if (div > maxSize) continue; + if (div > maxSize) { + continue; + } if (div < minSize && bestDiv < div) { bestDiv = div; @@ -1131,12 +1289,12 @@ std::vector Sarkar::homogeneousMerge(const std::size_t bestScore = 0U; std::size_t bestBins = number / minSize; - for (std::size_t bins = std::max( number / maxSize, static_cast(2U)); bins <= number / minSize; ++bins) { + for (std::size_t bins = std::max(number / maxSize, static_cast(2U)); bins <= number / minSize; ++bins) { if (number % bins == 0U && number != bins) { return std::vector(bins, number / bins); } - std::size_t score = std::min( divisorsList(number / bins).size(), divisorsList((number / bins) + 1).size() ); + std::size_t score = std::min(divisorsList(number / bins).size(), divisorsList((number / bins) + 1).size()); if (score >= bestScore) { bestScore = score; bestBins = bins; @@ -1145,7 +1303,7 @@ std::vector Sarkar::homogeneousMerge(const std::size_t remainder = number % bestBins; std::size_t size = number / bestBins; - + std::vector groups; for (std::size_t i = 0U; i < bestBins; ++i) { if (remainder != 0U) { @@ -1159,15 +1317,18 @@ std::vector Sarkar::homogeneousMerge(const return groups; } -template -vertex_idx_t Sarkar::homogeneous_buffer_merge(v_workw_t commCost, const Graph_t_in &graph, std::vector>> &expansionMapOutput) const { +template +vertex_idx_t Sarkar::homogeneous_buffer_merge( + v_workw_t commCost, + const Graph_t_in &graph, + std::vector>> &expansionMapOutput) const { using VertexType = vertex_idx_t; assert(expansionMapOutput.size() == 0); - const std::vector< vertex_idx_t > vertexTopPoset = get_top_node_distance>(graph); - const std::vector< vertex_idx_t > vertexBotPoset = getBotPosetMap(graph); - const std::vector< v_workw_t > topDist = getTopDistance(commCost, graph); - const std::vector< v_workw_t > botDist = getBotDistance(commCost, graph); + const std::vector> vertexTopPoset = get_top_node_distance>(graph); + const std::vector> vertexBotPoset = getBotPosetMap(graph); + const std::vector> topDist = getTopDistance(commCost, graph); + const std::vector> botDist = getBotDistance(commCost, graph); std::vector hashValuesCombined(graph.num_vertices(), 1729U); @@ -1177,7 +1338,7 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer for (const VertexType &par : graph.vertices()) { for (const VertexType &chld : graph.children(par)) { hash_combine(hashValuesWithParents[chld], hashValues[par]); - } + } } for (const VertexType &vert : graph.vertices()) { hash_combine(hashValuesCombined[vert], hashValuesWithParents[vert]); @@ -1189,7 +1350,7 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer for (const VertexType &chld : graph.vertices()) { for (const VertexType &par : graph.parents(chld)) { hash_combine(hashValuesWithChildren[par], hashValues[chld]); - } + } } for (const VertexType &vert : graph.vertices()) { hash_combine(hashValuesCombined[vert], hashValuesWithChildren[vert]); @@ -1198,12 +1359,16 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer std::unordered_map> orbits; for (const VertexType &vert : graph.vertices()) { - if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) continue; + if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) { + continue; + } const std::size_t hash = hashValuesCombined[vert]; - auto found_iter = orbits.find(hash); + auto found_iter = orbits.find(hash); if (found_iter == orbits.end()) { - orbits.emplace(std::piecewise_construct, std::forward_as_tuple(hash), std::forward_as_tuple(std::initializer_list< vertex_idx_t >{vert})); + orbits.emplace(std::piecewise_construct, + std::forward_as_tuple(hash), + std::forward_as_tuple(std::initializer_list>{vert})); } else { found_iter->second.emplace(vert); } @@ -1211,13 +1376,19 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer vertex_idx_t counter = 0; std::vector partitionedFlag(graph.num_vertices(), false); - + for (const VertexType &vert : graph.vertices()) { - if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) continue; - if (partitionedFlag[vert]) continue; + if (graph.vertex_work_weight(vert) > params.smallWeightThreshold) { + continue; + } + if (partitionedFlag[vert]) { + continue; + } const std::set &orb = orbits.at(hashValuesCombined[vert]); - if (orb.size() <= 1U) continue; + if (orb.size() <= 1U) { + continue; + } std::set parents; if (params.mode == SarkarParams::Mode::FAN_OUT_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { @@ -1235,13 +1406,25 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer std::set secureOrb; for (const VertexType &vertCandidate : orb) { - if (vertexTopPoset[vertCandidate] != vertexTopPoset[vert]) continue; - if (vertexBotPoset[vertCandidate] != vertexBotPoset[vert]) continue; - if (graph.vertex_work_weight(vertCandidate) != graph.vertex_work_weight(vert)) continue; - if (topDist[vertCandidate] != topDist[vert]) continue; - if (botDist[vertCandidate] != botDist[vert]) continue; + if (vertexTopPoset[vertCandidate] != vertexTopPoset[vert]) { + continue; + } + if (vertexBotPoset[vertCandidate] != vertexBotPoset[vert]) { + continue; + } + if (graph.vertex_work_weight(vertCandidate) != graph.vertex_work_weight(vert)) { + continue; + } + if (topDist[vertCandidate] != topDist[vert]) { + continue; + } + if (botDist[vertCandidate] != botDist[vert]) { + continue; + } if constexpr (has_typed_vertices_v) { - if (graph.vertex_type(vertCandidate) != graph.vertex_type(vert)) continue; + if (graph.vertex_type(vertCandidate) != graph.vertex_type(vert)) { + continue; + } } if (params.mode == SarkarParams::Mode::FAN_OUT_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { @@ -1249,7 +1432,9 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer for (const VertexType &par : graph.parents(vertCandidate)) { candidateParents.emplace(par); } - if (candidateParents != parents) continue; + if (candidateParents != parents) { + continue; + } } if (params.mode == SarkarParams::Mode::FAN_IN_BUFFER || params.mode == SarkarParams::Mode::HOMOGENEOUS_BUFFER) { @@ -1257,18 +1442,27 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer for (const VertexType &chld : graph.children(vertCandidate)) { candidateChildren.emplace(chld); } - if (candidateChildren != children) continue; + if (candidateChildren != children) { + continue; + } } secureOrb.emplace(vertCandidate); } - if (secureOrb.size() <= 1U) continue; + if (secureOrb.size() <= 1U) { + continue; + } - const v_workw_t desiredVerticesInGroup = graph.vertex_work_weight(vert) == 0 ? std::numeric_limits>::lowest() : params.smallWeightThreshold / graph.vertex_work_weight(vert); - const v_workw_t maxVerticesInGroup = graph.vertex_work_weight(vert) == 0 ? std::numeric_limits>::max() : params.maxWeight / graph.vertex_work_weight(vert); + const v_workw_t desiredVerticesInGroup = graph.vertex_work_weight(vert) == 0 + ? std::numeric_limits>::lowest() + : params.smallWeightThreshold / graph.vertex_work_weight(vert); + const v_workw_t maxVerticesInGroup = graph.vertex_work_weight(vert) == 0 + ? std::numeric_limits>::max() + : params.maxWeight / graph.vertex_work_weight(vert); const std::size_t minDesiredSize = desiredVerticesInGroup < 2 ? 2U : static_cast(desiredVerticesInGroup); - const std::size_t maxDesiredSize = std::max(minDesiredSize, std::min(minDesiredSize * 2U, static_cast(maxVerticesInGroup))); + const std::size_t maxDesiredSize + = std::max(minDesiredSize, std::min(minDesiredSize * 2U, static_cast(maxVerticesInGroup))); std::vector groups = homogeneousMerge(secureOrb.size(), minDesiredSize, maxDesiredSize); @@ -1279,7 +1473,7 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer cluster.emplace_back(*secureOrbIter); ++secureOrbIter; } - expansionMapOutput.emplace_back( std::move(cluster) ); + expansionMapOutput.emplace_back(std::move(cluster)); counter += static_cast(groupSize) - 1; } @@ -1289,11 +1483,13 @@ vertex_idx_t Sarkar::homogeneous_buffer_mer } for (const VertexType &vert : graph.vertices()) { - if (partitionedFlag[vert]) continue; + if (partitionedFlag[vert]) { + continue; + } expansionMapOutput.emplace_back(std::initializer_list{vert}); } return counter; } -} // end namespace osp \ No newline at end of file +} // end namespace osp diff --git a/include/osp/coarser/Sarkar/SarkarMul.hpp b/include/osp/coarser/Sarkar/SarkarMul.hpp index 4eb91732..a89bd1bf 100644 --- a/include/osp/coarser/Sarkar/SarkarMul.hpp +++ b/include/osp/coarser/Sarkar/SarkarMul.hpp @@ -28,58 +28,63 @@ namespace SarkarParams { enum class BufferMergeMode { OFF, FAN_IN, FAN_OUT, HOMOGENEOUS, FULL }; -template +template struct MulParameters { std::size_t seed{42U}; double geomDecay{0.875}; double leniency{0.0}; - std::vector< commCostType > commCostVec{ std::initializer_list{} }; - commCostType maxWeight{ std::numeric_limits::max() }; - commCostType smallWeightThreshold{ std::numeric_limits::lowest() }; + std::vector commCostVec{std::initializer_list{}}; + commCostType maxWeight{std::numeric_limits::max()}; + commCostType smallWeightThreshold{std::numeric_limits::lowest()}; unsigned max_num_iteration_without_changes{3U}; BufferMergeMode buffer_merge_mode{BufferMergeMode::OFF}; }; -} // end namespace SarkarParams -template +} // end namespace SarkarParams + +template class SarkarMul : public MultilevelCoarser { - private: - bool first_coarsen{true}; - Thue_Morse_Sequence thue_coin{42U}; - Biased_Random balanced_random{42U}; - - // Multilevel coarser parameters - SarkarParams::MulParameters< v_workw_t > ml_params; - // Coarser parameters - SarkarParams::Parameters< v_workw_t > params; - // Initial coarser - Sarkar coarser_initial; - // Subsequent coarser - Sarkar coarser_secondary; - - void setSeed(); - void initParams(); - void updateParams(); - - RETURN_STATUS run_single_contraction_mode(vertex_idx_t &diff_vertices); - RETURN_STATUS run_buffer_merges(); - RETURN_STATUS run_contractions(v_workw_t commCost); - RETURN_STATUS run_contractions() override; - - public: - void setParameters(SarkarParams::MulParameters< v_workw_t > ml_params_) { ml_params = std::move(ml_params_); setSeed(); initParams(); }; - - std::string getCoarserName() const { return "Sarkar"; }; + private: + bool first_coarsen{true}; + Thue_Morse_Sequence thue_coin{42U}; + Biased_Random balanced_random{42U}; + + // Multilevel coarser parameters + SarkarParams::MulParameters> ml_params; + // Coarser parameters + SarkarParams::Parameters> params; + // Initial coarser + Sarkar coarser_initial; + // Subsequent coarser + Sarkar coarser_secondary; + + void setSeed(); + void initParams(); + void updateParams(); + + RETURN_STATUS run_single_contraction_mode(vertex_idx_t &diff_vertices); + RETURN_STATUS run_buffer_merges(); + RETURN_STATUS run_contractions(v_workw_t commCost); + RETURN_STATUS run_contractions() override; + + public: + void setParameters(SarkarParams::MulParameters> ml_params_) { + ml_params = std::move(ml_params_); + setSeed(); + initParams(); + }; + + std::string getCoarserName() const { return "Sarkar"; }; }; -template +template void SarkarMul::setSeed() { constexpr std::size_t seedReduction = 4096U; thue_coin = Thue_Morse_Sequence(ml_params.seed % seedReduction); balanced_random = Biased_Random(ml_params.seed); } -template +template void SarkarMul::initParams() { first_coarsen = true; @@ -91,25 +96,25 @@ void SarkarMul::initParams() { if (ml_params.commCostVec.empty()) { v_workw_t syncCosts = 128; syncCosts = std::max(syncCosts, static_cast>(1)); - + while (syncCosts >= static_cast>(1)) { - ml_params.commCostVec.emplace_back( syncCosts ); + ml_params.commCostVec.emplace_back(syncCosts); syncCosts /= 2; } } std::sort(ml_params.commCostVec.begin(), ml_params.commCostVec.end()); - + updateParams(); } -template +template void SarkarMul::updateParams() { coarser_initial.setParameters(params); coarser_secondary.setParameters(params); } -template +template RETURN_STATUS SarkarMul::run_single_contraction_mode(vertex_idx_t &diff_vertices) { RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; @@ -125,32 +130,35 @@ RETURN_STATUS SarkarMul::run_single_contraction_mode(ve bool coarsen_success; if (first_coarsen) { - coarsen_success = coarser_initial.coarsenDag(*(MultilevelCoarser::getOriginalGraph()), coarsened_dag, contraction_map); + coarsen_success = coarser_initial.coarsenDag( + *(MultilevelCoarser::getOriginalGraph()), coarsened_dag, contraction_map); first_coarsen = false; } else { - coarsen_success = coarser_secondary.coarsenDag(*(MultilevelCoarser::dag_history.back()), coarsened_dag, contraction_map); + coarsen_success = coarser_secondary.coarsenDag( + *(MultilevelCoarser::dag_history.back()), coarsened_dag, contraction_map); } - + if (!coarsen_success) { status = RETURN_STATUS::ERROR; } - status = std::max(status, MultilevelCoarser::add_contraction(std::move(contraction_map), std::move(coarsened_dag))); - + status = std::max( + status, MultilevelCoarser::add_contraction(std::move(contraction_map), std::move(coarsened_dag))); + vertex_idx_t new_num_vertices = MultilevelCoarser::dag_history.back()->num_vertices(); diff_vertices = current_num_vertices - new_num_vertices; return status; } -template +template RETURN_STATUS SarkarMul::run_contractions(v_workw_t commCost) { RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; vertex_idx_t diff = 0; - + params.commCost = commCost; updateParams(); - + unsigned outer_no_change = 0; while (outer_no_change < ml_params.max_num_iteration_without_changes) { unsigned inner_no_change = 0; @@ -207,7 +215,7 @@ RETURN_STATUS SarkarMul::run_contractions(v_workw_t::run_contractions(v_workw_t::run_contractions(v_workw_t +template RETURN_STATUS SarkarMul::run_buffer_merges() { RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; unsigned no_change = 0; - while (no_change < ml_params.max_num_iteration_without_changes) { + while (no_change < ml_params.max_num_iteration_without_changes) { vertex_idx_t diff = 0; - if ((ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::HOMOGENEOUS) || (ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::FULL && diff == 0)) { + if ((ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::HOMOGENEOUS) + || (ml_params.buffer_merge_mode == SarkarParams::BufferMergeMode::FULL && diff == 0)) { params.mode = SarkarParams::Mode::HOMOGENEOUS_BUFFER; updateParams(); status = std::max(status, run_single_contraction_mode(diff)); @@ -271,7 +277,7 @@ RETURN_STATUS SarkarMul::run_buffer_merges() { if (diff > 0) { no_change = 0; - status = std::max(status, run_contractions( ml_params.commCostVec.back() )); + status = std::max(status, run_contractions(ml_params.commCostVec.back())); } else { no_change++; } @@ -280,13 +286,12 @@ RETURN_STATUS SarkarMul::run_buffer_merges() { return status; } - -template +template RETURN_STATUS SarkarMul::run_contractions() { initParams(); RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; - + for (const v_workw_t commCost : ml_params.commCostVec) { status = std::max(status, run_contractions(commCost)); } @@ -298,10 +303,4 @@ RETURN_STATUS SarkarMul::run_contractions() { return status; } - - - - - - -} // end namespace osp \ No newline at end of file +} // end namespace osp diff --git a/include/osp/coarser/SquashA/SquashA.hpp b/include/osp/coarser/SquashA/SquashA.hpp index ac5910df..f41b3c5c 100644 --- a/include/osp/coarser/SquashA/SquashA.hpp +++ b/include/osp/coarser/SquashA/SquashA.hpp @@ -31,7 +31,9 @@ limitations under the License. namespace osp { namespace SquashAParams { + enum class Mode { EDGE_WEIGHT, TRIANGLES }; + struct Parameters { double geom_decay_num_nodes{17.0 / 16.0}; double poisson_par{0.0}; @@ -44,27 +46,28 @@ struct Parameters { bool use_structured_poset{false}; bool use_top_poset{true}; }; -} // end namespace SquashAParams -template +} // end namespace SquashAParams + +template class SquashA : public CoarserGenExpansionMap { private: SquashAParams::Parameters params; std::vector generate_poset_in_map(const Graph_t_in &dag_in); - template - std::vector>> - gen_exp_map_from_contractable_edges(const std::multiset, T>, CMP> &edge_weights, - const std::vector &poset_int_mapping, const Graph_t_in &dag_in) { + template + std::vector>> gen_exp_map_from_contractable_edges( + const std::multiset, T>, CMP> &edge_weights, + const std::vector &poset_int_mapping, + const Graph_t_in &dag_in) { static_assert(std::is_arithmetic_v, "T must be of arithmetic type!"); auto lower_third_it = edge_weights.begin(); std::advance(lower_third_it, edge_weights.size() / 3); - T lower_third_wt = std::max(lower_third_it->second, static_cast(1)); // Could be 0 + T lower_third_wt = std::max(lower_third_it->second, static_cast(1)); // Could be 0 - Union_Find_Universe, vertex_idx_t, v_workw_t, - v_memw_t> + Union_Find_Universe, vertex_idx_t, v_workw_t, v_memw_t> connected_components; for (const auto &vert : dag_in.vertices()) { connected_components.add_object(vert, dag_in.vertex_work_weight(vert), dag_in.vertex_mem_weight(vert)); @@ -73,30 +76,32 @@ class SquashA : public CoarserGenExpansionMap { std::vector merged_nodes(dag_in.num_vertices(), false); vertex_idx_t num_nodes_decrease = 0; - vertex_idx_t num_nodes_aim = - dag_in.num_vertices() - static_cast>(static_cast(dag_in.num_vertices()) / - params.geom_decay_num_nodes); + vertex_idx_t num_nodes_aim + = dag_in.num_vertices() + - static_cast>(static_cast(dag_in.num_vertices()) / params.geom_decay_num_nodes); double temperature = 1; unsigned temperature_increase_iteration = 0; - while (num_nodes_decrease < num_nodes_aim && - temperature_increase_iteration <= params.number_of_temperature_increases) { + while (num_nodes_decrease < num_nodes_aim && temperature_increase_iteration <= params.number_of_temperature_increases) { for (const auto &wt_edge : edge_weights) { const auto &edge_d = wt_edge.first; const vertex_idx_t edge_source = source(edge_d, dag_in); const vertex_idx_t edge_target = target(edge_d, dag_in); // Previously merged - if (merged_nodes[edge_source]) + if (merged_nodes[edge_source]) { continue; - if (merged_nodes[edge_target]) + } + if (merged_nodes[edge_target]) { continue; + } // weight check - if (connected_components.get_weight_of_component_by_name(edge_source) + - connected_components.get_weight_of_component_by_name(edge_target) > - static_cast(lower_third_wt) * temperature) + if (connected_components.get_weight_of_component_by_name(edge_source) + + connected_components.get_weight_of_component_by_name(edge_target) + > static_cast(lower_third_wt) * temperature) { continue; + } // no loops criteria check bool check_failed = false; @@ -105,56 +110,72 @@ class SquashA : public CoarserGenExpansionMap { // Checks over all affected edges // In edges first for (const auto &node : dag_in.parents(edge_source)) { - if (node == edge_target) + if (node == edge_target) { continue; - if (!merged_nodes[node]) + } + if (!merged_nodes[node]) { continue; - if (poset_int_mapping[edge_source] >= poset_int_mapping[node] + 2) + } + if (poset_int_mapping[edge_source] >= poset_int_mapping[node] + 2) { continue; + } check_failed = true; break; } - if (check_failed) + if (check_failed) { continue; + } // Out edges first for (const auto &node : dag_in.children(edge_source)) { - if (node == edge_target) + if (node == edge_target) { continue; - if (!merged_nodes[node]) + } + if (!merged_nodes[node]) { continue; - if (poset_int_mapping[node] >= poset_int_mapping[edge_source] + 2) + } + if (poset_int_mapping[node] >= poset_int_mapping[edge_source] + 2) { continue; + } check_failed = true; break; } - if (check_failed) + if (check_failed) { continue; + } // In edges second for (const auto &node : dag_in.parents(edge_target)) { - if (node == edge_source) + if (node == edge_source) { continue; - if (!merged_nodes[node]) + } + if (!merged_nodes[node]) { continue; - if (poset_int_mapping[edge_target] >= poset_int_mapping[node] + 2) + } + if (poset_int_mapping[edge_target] >= poset_int_mapping[node] + 2) { continue; + } check_failed = true; break; } - if (check_failed) + if (check_failed) { continue; + } // Out edges second for (const auto &node : dag_in.children(edge_target)) { - if (node == edge_source) + if (node == edge_source) { continue; - if (!merged_nodes[node]) + } + if (!merged_nodes[node]) { continue; - if (poset_int_mapping[node] >= poset_int_mapping[edge_target] + 2) + } + if (poset_int_mapping[node] >= poset_int_mapping[edge_target] + 2) { continue; + } check_failed = true; break; } - if (check_failed) + if (check_failed) { continue; + } // merging connected_components.join_by_name(edge_source, edge_target); @@ -170,9 +191,10 @@ class SquashA : public CoarserGenExpansionMap { // Getting components to contract and adding graph contraction std::vector>> partition_vec; - vertex_idx_t min_node_decrease = - dag_in.num_vertices() - static_cast>(static_cast(dag_in.num_vertices()) / - std::pow(params.geom_decay_num_nodes, 0.25)); + vertex_idx_t min_node_decrease + = dag_in.num_vertices() + - static_cast>(static_cast(dag_in.num_vertices()) + / std::pow(params.geom_decay_num_nodes, 0.25)); if (num_nodes_decrease > 0 && num_nodes_decrease >= min_node_decrease) { partition_vec = connected_components.get_connected_components(); @@ -189,8 +211,7 @@ class SquashA : public CoarserGenExpansionMap { } public: - virtual std::vector>> - generate_vertex_expansion_map(const Graph_t_in &dag_in) override; + virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &dag_in) override; SquashA(SquashAParams::Parameters params_ = SquashAParams::Parameters()) : params(params_) {}; @@ -201,12 +222,13 @@ class SquashA : public CoarserGenExpansionMap { virtual ~SquashA() override = default; inline SquashAParams::Parameters &getParams() { return params; } + inline void setParams(SquashAParams::Parameters params_) { params = params_; } std::string getCoarserName() const override { return "SquashA"; } }; -template +template std::vector SquashA::generate_poset_in_map(const Graph_t_in &dag_in) { std::vector poset_int_mapping; if (!params.use_structured_poset) { @@ -225,11 +247,10 @@ std::vector SquashA::generate_poset_in_map(const G return poset_int_mapping; } -template -std::vector>> -SquashA::generate_vertex_expansion_map(const Graph_t_in &dag_in) { - static_assert(is_directed_graph_edge_desc_v, - "Graph_t_in must satisfy the directed_graph_edge_desc concept"); +template +std::vector>> SquashA::generate_vertex_expansion_map( + const Graph_t_in &dag_in) { + static_assert(is_directed_graph_edge_desc_v, "Graph_t_in must satisfy the directed_graph_edge_desc concept"); static_assert(is_computational_dag_edge_desc_v, "Graph_t_in must satisfy the is_computational_dag_edge_desc concept"); // static_assert(has_hashable_edge_desc_v, "Graph_t_in must have hashable edge descriptors"); @@ -238,17 +259,14 @@ SquashA::generate_vertex_expansion_map(const Graph_t_in if constexpr (has_edge_weights_v) { if (params.mode == SquashAParams::Mode::EDGE_WEIGHT) { - auto edge_w_cmp = [](const std::pair, e_commw_t> &lhs, - const std::pair, e_commw_t> &rhs) { - return lhs.second < rhs.second; - }; - std::multiset, e_commw_t>, decltype(edge_w_cmp)> edge_weights( - edge_w_cmp); + auto edge_w_cmp + = [](const std::pair, e_commw_t> &lhs, + const std::pair, e_commw_t> &rhs) { return lhs.second < rhs.second; }; + std::multiset, e_commw_t>, decltype(edge_w_cmp)> edge_weights(edge_w_cmp); { - std::vector> contractable_edges = - get_contractable_edges_from_poset_int_map(poset_int_mapping, dag_in); + std::vector> contractable_edges + = get_contractable_edges_from_poset_int_map(poset_int_mapping, dag_in); for (const auto &edge : contractable_edges) { - if constexpr (has_edge_weights_v) { edge_weights.emplace(edge, dag_in.edge_comm_weight(edge)); } else { @@ -259,28 +277,23 @@ SquashA::generate_vertex_expansion_map(const Graph_t_in return gen_exp_map_from_contractable_edges, decltype(edge_w_cmp)>( edge_weights, poset_int_mapping, dag_in); - } - } + } if (params.mode == SquashAParams::Mode::TRIANGLES) { auto edge_w_cmp = [](const std::pair, std::size_t> &lhs, - const std::pair, std::size_t> &rhs) { - return lhs.second < rhs.second; - }; + const std::pair, std::size_t> &rhs) { return lhs.second < rhs.second; }; std::multiset, std::size_t>, decltype(edge_w_cmp)> edge_weights(edge_w_cmp); { - std::vector> contractable_edges = - get_contractable_edges_from_poset_int_map(poset_int_mapping, dag_in); + std::vector> contractable_edges + = get_contractable_edges_from_poset_int_map(poset_int_mapping, dag_in); for (const auto &edge : contractable_edges) { - std::size_t num_common_triangles = - num_common_parents(dag_in, source(edge, dag_in), target(edge, dag_in)); + std::size_t num_common_triangles = num_common_parents(dag_in, source(edge, dag_in), target(edge, dag_in)); num_common_triangles += num_common_children(dag_in, source(edge, dag_in), target(edge, dag_in)); edge_weights.emplace(edge, num_common_triangles); } } - return gen_exp_map_from_contractable_edges(edge_weights, poset_int_mapping, - dag_in); + return gen_exp_map_from_contractable_edges(edge_weights, poset_int_mapping, dag_in); } else { throw std::runtime_error("Edge sorting mode not recognised."); @@ -289,4 +302,4 @@ SquashA::generate_vertex_expansion_map(const Graph_t_in return {}; } -} // end namespace osp \ No newline at end of file +} // end namespace osp diff --git a/include/osp/coarser/SquashA/SquashAMul.hpp b/include/osp/coarser/SquashA/SquashAMul.hpp index e4019566..f238d576 100644 --- a/include/osp/coarser/SquashA/SquashAMul.hpp +++ b/include/osp/coarser/SquashA/SquashAMul.hpp @@ -24,32 +24,33 @@ limitations under the License. namespace osp { -template +template class SquashAMul : public MultilevelCoarser { - private: - vertex_idx_t min_nodes{ 1 }; - Thue_Morse_Sequence thue_coin{}; - Biased_Random balanced_random{}; - - // Coarser Params - SquashAParams::Parameters params; - // Initial coarser - SquashA coarser_initial; - // Subsequent coarser - SquashA coarser_secondary; - - void updateParams(); - - RETURN_STATUS run_contractions() override; - - public: - void setParams(SquashAParams::Parameters params_) { params = params_; }; - void setMinimumNumberVertices(vertex_idx_t num) { min_nodes = num; }; - - std::string getCoarserName() const { return "SquashA"; }; + private: + vertex_idx_t min_nodes{1}; + Thue_Morse_Sequence thue_coin{}; + Biased_Random balanced_random{}; + + // Coarser Params + SquashAParams::Parameters params; + // Initial coarser + SquashA coarser_initial; + // Subsequent coarser + SquashA coarser_secondary; + + void updateParams(); + + RETURN_STATUS run_contractions() override; + + public: + void setParams(SquashAParams::Parameters params_) { params = params_; }; + + void setMinimumNumberVertices(vertex_idx_t num) { min_nodes = num; }; + + std::string getCoarserName() const { return "SquashA"; }; }; -template +template void SquashAMul::updateParams() { params.use_structured_poset = thue_coin.get_flip(); params.use_top_poset = balanced_random.get_flip(); @@ -58,17 +59,17 @@ void SquashAMul::updateParams() { coarser_secondary.setParams(params); } -template +template RETURN_STATUS SquashAMul::run_contractions() { RETURN_STATUS status = RETURN_STATUS::OSP_SUCCESS; - Biased_Random_with_side_bias coin( params.edge_sort_ratio ); + Biased_Random_with_side_bias coin(params.edge_sort_ratio); bool first_coarsen = true; unsigned no_change_in_a_row = 0; vertex_idx_t current_num_vertices = MultilevelCoarser::getOriginalGraph()->num_vertices(); - while( no_change_in_a_row < params.num_rep_without_node_decrease && current_num_vertices > min_nodes ) { + while (no_change_in_a_row < params.num_rep_without_node_decrease && current_num_vertices > min_nodes) { updateParams(); Graph_t_coarse coarsened_dag; @@ -76,18 +77,22 @@ RETURN_STATUS SquashAMul::run_contractions() { bool coarsen_success; if (first_coarsen) { - coarsen_success = coarser_initial.coarsenDag(*(MultilevelCoarser::getOriginalGraph()), coarsened_dag, contraction_map); + coarsen_success = coarser_initial.coarsenDag( + *(MultilevelCoarser::getOriginalGraph()), coarsened_dag, contraction_map); first_coarsen = false; } else { - coarsen_success = coarser_secondary.coarsenDag(*(MultilevelCoarser::dag_history.back()), coarsened_dag, contraction_map); + coarsen_success = coarser_secondary.coarsenDag( + *(MultilevelCoarser::dag_history.back()), coarsened_dag, contraction_map); } - + if (!coarsen_success) { status = RETURN_STATUS::ERROR; } - status = std::max(status, MultilevelCoarser::add_contraction(std::move(contraction_map), std::move(coarsened_dag))); - + status = std::max( + status, + MultilevelCoarser::add_contraction(std::move(contraction_map), std::move(coarsened_dag))); + vertex_idx_t new_num_vertices = MultilevelCoarser::dag_history.back()->num_vertices(); if (new_num_vertices == current_num_vertices) { @@ -101,9 +106,4 @@ RETURN_STATUS SquashAMul::run_contractions() { return status; } - - - - - -} // end namespace osp \ No newline at end of file +} // end namespace osp diff --git a/include/osp/coarser/StepByStep/StepByStepCoarser.hpp b/include/osp/coarser/StepByStep/StepByStepCoarser.hpp index ba2dbdf1..4f655d62 100644 --- a/include/osp/coarser/StepByStep/StepByStepCoarser.hpp +++ b/include/osp/coarser/StepByStep/StepByStepCoarser.hpp @@ -18,45 +18,41 @@ limitations under the License. #pragma once +#include "osp/coarser/Coarser.hpp" #include "osp/concepts/computational_dag_concept.hpp" #include "osp/concepts/constructable_computational_dag_concept.hpp" -#include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" #include "osp/graph_algorithms/computational_dag_construction_util.hpp" +#include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" -#include "osp/coarser/Coarser.hpp" #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" namespace osp { -template +template class StepByStepCoarser : public CoarserGenContractionMap { - using vertex_idx = vertex_idx_t; - using vertex_type_t_or_default = std::conditional_t, v_type_t, unsigned>; - using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; - - using boost_graph_t = boost_graph, v_commw_t, v_memw_t, vertex_type_t_or_default, edge_commw_t_or_default >; + using vertex_type_t_or_default + = std::conditional_t, v_type_t, unsigned>; + using edge_commw_t_or_default = std::conditional_t, e_commw_t, v_commw_t>; + + using boost_graph_t + = boost_graph, v_commw_t, v_memw_t, vertex_type_t_or_default, edge_commw_t_or_default>; public: - enum COARSENING_STRATEGY - { - EDGE_BY_EDGE, - BOTTOM_LEVEL_CLUSTERS - }; + enum COARSENING_STRATEGY { EDGE_BY_EDGE, BOTTOM_LEVEL_CLUSTERS }; - enum PROBLEM_TYPE - { - SCHEDULING, - PEBBLING - }; + enum PROBLEM_TYPE { SCHEDULING, PEBBLING }; - struct EdgeToContract{ + struct EdgeToContract { std::pair edge; v_workw_t work_weight; v_commw_t comm_weight; - EdgeToContract(const vertex_idx source, const vertex_idx target, const v_workw_t work_weight_, const v_commw_t comm_weight_) + EdgeToContract(const vertex_idx source, + const vertex_idx target, + const v_workw_t work_weight_, + const v_commw_t comm_weight_) : edge(source, target), work_weight(work_weight_), comm_weight(comm_weight_) {} bool operator<(const EdgeToContract &other) const { @@ -65,8 +61,7 @@ class StepByStepCoarser : public CoarserGenContractionMap { }; private: - - std::vector > contractionHistory; + std::vector> contractionHistory; COARSENING_STRATEGY coarsening_strategy = COARSENING_STRATEGY::EDGE_BY_EDGE; PROBLEM_TYPE problem_type = PROBLEM_TYPE::SCHEDULING; @@ -78,12 +73,12 @@ class StepByStepCoarser : public CoarserGenContractionMap { std::vector> contains; - std::map, v_commw_t > edgeWeights; - std::map, v_commw_t > contractable; + std::map, v_commw_t> edgeWeights; + std::map, v_commw_t> contractable; std::vector node_valid; std::vector top_order_idx; - v_memw_t fast_mem_capacity = std::numeric_limits>::max(); // for pebbling + v_memw_t fast_mem_capacity = std::numeric_limits>::max(); // for pebbling // Utility functions for coarsening in general void ContractSingleEdge(std::pair edge); @@ -95,48 +90,44 @@ class StepByStepCoarser : public CoarserGenContractionMap { std::set getContractableParents(vertex_idx node) const; void updateDistantEdgeContractibility(std::pair edge); - std::pair PickEdgeToContract(const std::vector& candidates) const; + std::pair PickEdgeToContract(const std::vector &candidates) const; std::vector CreateEdgeCandidateList() const; // Utility functions for cluster coarsening - std::vector > ClusterCoarsen() const; + std::vector> ClusterCoarsen() const; std::vector ComputeFilteredTopLevel() const; // Utility functions for coarsening in a pebbling problem - bool IncontractableForPebbling(const std::pair&) const; + bool IncontractableForPebbling(const std::pair &) const; void MergeSourcesInPebbling(); // Utility for contracting into final format void SetIdVector(std::vector> &new_vertex_id) const; - static std::vector GetFilteredTopOrderIdx(const Graph_t& G, const std::vector& is_valid); - + static std::vector GetFilteredTopOrderIdx(const Graph_t &G, const std::vector &is_valid); public: virtual ~StepByStepCoarser() = default; virtual std::string getCoarserName() const override { return "StepByStepCoarsening"; } - - // DAG coarsening virtual std::vector> generate_vertex_contraction_map(const Graph_t &dag_in) override; - - // Coarsening for pebbling problems - leaves source nodes intact, considers memory bound - void coarsenForPebbling(const Graph_t& dag_in, Graph_t &coarsened_dag, - std::vector> &new_vertex_id); + void coarsenForPebbling(const Graph_t &dag_in, Graph_t &coarsened_dag, std::vector> &new_vertex_id); + + void setCoarseningStrategy(COARSENING_STRATEGY strategy_) { coarsening_strategy = strategy_; } + void setTargetNumberOfNodes(const unsigned nr_nodes_) { target_nr_of_nodes = nr_nodes_; } + void setFastMemCapacity(const v_memw_t capacity_) { fast_mem_capacity = capacity_; } - void setCoarseningStrategy(COARSENING_STRATEGY strategy_){ coarsening_strategy = strategy_;} - void setTargetNumberOfNodes(const unsigned nr_nodes_){ target_nr_of_nodes = nr_nodes_;} - void setFastMemCapacity(const v_memw_t capacity_){ fast_mem_capacity = capacity_;} + std::vector> getContractionHistory() const { return contractionHistory; } - std::vector > getContractionHistory() const {return contractionHistory;} std::vector GetIntermediateIDs(vertex_idx until_which_step) const; Graph_t Contract(const std::vector> &new_vertex_id) const; - const Graph_t& getOriginalDag() const {return G_full;} + + const Graph_t &getOriginalDag() const { return G_full; } }; // template @@ -144,14 +135,12 @@ class StepByStepCoarser : public CoarserGenContractionMap { // std::vector>> &old_vertex_ids, // std::vector> &new_vertex_id) -template -std::vector> StepByStepCoarser::generate_vertex_contraction_map(const Graph_t &dag_in) -{ +template +std::vector> StepByStepCoarser::generate_vertex_contraction_map(const Graph_t &dag_in) { const unsigned N = static_cast(dag_in.num_vertices()); G_full = dag_in; - for(vertex_idx node = G_coarse.num_vertices(); node > 0;) - { + for (vertex_idx node = G_coarse.num_vertices(); node > 0;) { --node; G_coarse.remove_vertex(node); } @@ -161,8 +150,9 @@ std::vector> StepByStepCoarser::generate_vertex_c contractionHistory.clear(); // target nr of nodes must be reasonable - if(target_nr_of_nodes == 0 || target_nr_of_nodes > N) - target_nr_of_nodes = std::max(N/2, 1U); + if (target_nr_of_nodes == 0 || target_nr_of_nodes > N) { + target_nr_of_nodes = std::max(N / 2, 1U); + } // list of original node indices contained in each contracted node contains.clear(); @@ -171,36 +161,36 @@ std::vector> StepByStepCoarser::generate_vertex_c node_valid.clear(); node_valid.resize(N, true); - for (vertex_idx node = 0; node < N; ++node) + for (vertex_idx node = 0; node < N; ++node) { contains[node].insert(node); + } - //used for original, slow coarsening + // used for original, slow coarsening edgeWeights.clear(); contractable.clear(); - - if(coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) - { + + if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) { // Init edge weights - for (vertex_idx node = 0; node < N; ++node) - for (vertex_idx succ: G_full.children(node)) + for (vertex_idx node = 0; node < N; ++node) { + for (vertex_idx succ : G_full.children(node)) { edgeWeights[std::make_pair(node, succ)] = G_full.vertex_comm_weight(node); + } + } // get original contractable edges InitializeContractableEdges(); } - for (unsigned NrOfNodes = N; NrOfNodes > target_nr_of_nodes; ) { + for (unsigned NrOfNodes = N; NrOfNodes > target_nr_of_nodes;) { // Single contraction step std::vector> edgesToContract; // choose edges to contract in this step - if(coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) - { + if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) { std::vector candidates = CreateEdgeCandidateList(); - if(candidates.empty()) - { - std::cout<<"Error: no more edges to contract"< chosenEdge = PickEdgeToContract(candidates); @@ -208,60 +198,62 @@ std::vector> StepByStepCoarser::generate_vertex_c // Update far-away edges that become uncontractable now updateDistantEdgeContractibility(chosenEdge); - } - else + } else { edgesToContract = ClusterCoarsen(); + } - if(edgesToContract.empty()) + if (edgesToContract.empty()) { break; - + } + // contract these edges - for(const std::pair& edge : edgesToContract) - { - if(coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) - { - //Update contractable edges - edge.b - for(vertex_idx pred : G_coarse.parents(edge.second)) + for (const std::pair &edge : edgesToContract) { + if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) { + // Update contractable edges - edge.b + for (vertex_idx pred : G_coarse.parents(edge.second)) { contractable.erase(std::make_pair(pred, edge.second)); - - for(vertex_idx succ : G_coarse.children(edge.second)) + } + + for (vertex_idx succ : G_coarse.children(edge.second)) { contractable.erase(std::make_pair(edge.second, succ)); + } } ContractSingleEdge(edge); node_valid[edge.second] = false; - if(coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) - { + if (coarsening_strategy == COARSENING_STRATEGY::EDGE_BY_EDGE) { ComputeFilteredTopOrderIdx(); - //Update contractable edges - edge.a + // Update contractable edges - edge.a std::set contractableParents = getContractableParents(edge.first); - for (vertex_idx pred : G_coarse.parents(edge.first)) - { - if(contractableParents.find(pred) != contractableParents.end()) + for (vertex_idx pred : G_coarse.parents(edge.first)) { + if (contractableParents.find(pred) != contractableParents.end()) { contractable[std::make_pair(pred, edge.first)] = edgeWeights[std::make_pair(pred, edge.first)]; - else + } else { contractable.erase(std::make_pair(pred, edge.first)); + } } - + std::set contractableChildren = getContractableChildren(edge.first); - for (vertex_idx succ : G_coarse.children(edge.first)) - { - if(contractableChildren.find(succ) != contractableChildren.end()) + for (vertex_idx succ : G_coarse.children(edge.first)) { + if (contractableChildren.find(succ) != contractableChildren.end()) { contractable[std::make_pair(edge.first, succ)] = edgeWeights[std::make_pair(edge.first, succ)]; - else + } else { contractable.erase(std::make_pair(edge.first, succ)); + } } } --NrOfNodes; - if(NrOfNodes == target_nr_of_nodes) + if (NrOfNodes == target_nr_of_nodes) { break; + } } } - if(problem_type == PROBLEM_TYPE::PEBBLING) + if (problem_type == PROBLEM_TYPE::PEBBLING) { MergeSourcesInPebbling(); + } std::vector> new_vertex_id; SetIdVector(new_vertex_id); @@ -269,15 +261,14 @@ std::vector> StepByStepCoarser::generate_vertex_c return new_vertex_id; } -template -void StepByStepCoarser::ContractSingleEdge(std::pair edge) -{ +template +void StepByStepCoarser::ContractSingleEdge(std::pair edge) { G_coarse.set_vertex_work_weight(edge.first, G_coarse.vertex_work_weight(edge.first) + G_coarse.vertex_work_weight(edge.second)); G_coarse.set_vertex_work_weight(edge.second, 0); G_coarse.set_vertex_comm_weight(edge.first, G_coarse.vertex_comm_weight(edge.first) + G_coarse.vertex_comm_weight(edge.second)); G_coarse.set_vertex_comm_weight(edge.second, 0); - + G_coarse.set_vertex_mem_weight(edge.first, G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second)); G_coarse.set_vertex_mem_weight(edge.second, 0); @@ -285,24 +276,27 @@ void StepByStepCoarser::ContractSingleEdge(std::pair parents_of_source; - for(vertex_idx pred : G_coarse.parents(edge.first)) + for (vertex_idx pred : G_coarse.parents(edge.first)) { parents_of_source.insert(pred); + } - for(vertex_idx pred : G_coarse.parents(edge.second)) - { - if(pred == edge.first) + for (vertex_idx pred : G_coarse.parents(edge.second)) { + if (pred == edge.first) { continue; - if(parents_of_source.find(pred) != parents_of_source.end()) // combine edges + } + if (parents_of_source.find(pred) != parents_of_source.end()) // combine edges { edgeWeights[std::make_pair(pred, edge.first)] = 0; - for (vertex_idx node: contains[pred]) - for (vertex_idx succ: G_coarse.children(node)) - if (succ == edge.first || succ == edge.second) + for (vertex_idx node : contains[pred]) { + for (vertex_idx succ : G_coarse.children(node)) { + if (succ == edge.first || succ == edge.second) { edgeWeights[std::make_pair(pred, edge.first)] += G_full.vertex_comm_weight(node); - + } + } + } + edgeWeights.erase(std::make_pair(pred, edge.second)); - } - else // add incoming edge + } else // add incoming edge { G_coarse.add_edge(pred, edge.first); edgeWeights[std::make_pair(pred, edge.first)] = edgeWeights[std::make_pair(pred, edge.second)]; @@ -311,17 +305,16 @@ void StepByStepCoarser::ContractSingleEdge(std::pair children_of_source; - for(vertex_idx succ : G_coarse.children(edge.first)) + for (vertex_idx succ : G_coarse.children(edge.first)) { children_of_source.insert(succ); + } - for(vertex_idx succ : G_coarse.children(edge.second)) - { - if(children_of_source.find(succ) != children_of_source.end()) // combine edges + for (vertex_idx succ : G_coarse.children(edge.second)) { + if (children_of_source.find(succ) != children_of_source.end()) // combine edges { - edgeWeights[std::make_pair(edge.first, succ)] += edgeWeights[std::make_pair(edge.second, succ)]; + edgeWeights[std::make_pair(edge.first, succ)] += edgeWeights[std::make_pair(edge.second, succ)]; edgeWeights.erase(std::make_pair(edge.second, succ)); - } - else // add outgoing edge + } else // add outgoing edge { G_coarse.add_edge(edge.first, succ); edgeWeights[std::make_pair(edge.first, succ)] = edgeWeights[std::make_pair(edge.second, succ)]; @@ -330,30 +323,31 @@ void StepByStepCoarser::ContractSingleEdge(std::pair -bool StepByStepCoarser::isContractable(std::pair edge) const -{ - +template +bool StepByStepCoarser::isContractable(std::pair edge) const { std::deque Queue; std::set visited; - for (vertex_idx succ : G_coarse.children(edge.first)) + for (vertex_idx succ : G_coarse.children(edge.first)) { if (node_valid[succ] && top_order_idx[succ] < top_order_idx[edge.second]) { Queue.push_back(succ); visited.insert(succ); } + } while (!Queue.empty()) { const vertex_idx node = Queue.front(); Queue.pop_front(); for (vertex_idx succ : G_coarse.children(node)) { - if (succ == edge.second) + if (succ == edge.second) { return false; + } if (node_valid[succ] && top_order_idx[succ] < top_order_idx[edge.second] && visited.count(succ) == 0) { Queue.push_back(succ); @@ -364,21 +358,21 @@ bool StepByStepCoarser::isContractable(std::pair -std::set > StepByStepCoarser::getContractableChildren(const vertex_idx node) const -{ +template +std::set> StepByStepCoarser::getContractableChildren(const vertex_idx node) const { std::deque Queue; std::set visited; std::set succ_contractable; vertex_idx topOrderMax = top_order_idx[node]; - for (vertex_idx succ : G_coarse.children(node)) - { - if(node_valid[succ]) + for (vertex_idx succ : G_coarse.children(node)) { + if (node_valid[succ]) { succ_contractable.insert(succ); - - if(top_order_idx[succ] > topOrderMax) + } + + if (top_order_idx[succ] > topOrderMax) { topOrderMax = top_order_idx[succ]; + } if (node_valid[succ]) { Queue.push_back(succ); @@ -390,7 +384,6 @@ std::set > StepByStepCoarser::getContractableChil const vertex_idx node_local = Queue.front(); Queue.pop_front(); for (vertex_idx succ : G_coarse.children(node_local)) { - succ_contractable.erase(succ); if (node_valid[succ] && top_order_idx[succ] < topOrderMax && visited.count(succ) == 0) { @@ -403,21 +396,21 @@ std::set > StepByStepCoarser::getContractableChil return succ_contractable; } -template -std::set > StepByStepCoarser::getContractableParents(const vertex_idx node) const -{ +template +std::set> StepByStepCoarser::getContractableParents(const vertex_idx node) const { std::deque Queue; std::set visited; std::set pred_contractable; vertex_idx topOrderMin = top_order_idx[node]; - for (vertex_idx pred : G_coarse.parents(node)) - { - if(node_valid[pred]) + for (vertex_idx pred : G_coarse.parents(node)) { + if (node_valid[pred]) { pred_contractable.insert(pred); - - if(top_order_idx[pred] < topOrderMin) + } + + if (top_order_idx[pred] < topOrderMin) { topOrderMin = top_order_idx[pred]; + } if (node_valid[pred]) { Queue.push_back(pred); @@ -429,7 +422,6 @@ std::set > StepByStepCoarser::getContractablePare const vertex_idx node_local = Queue.front(); Queue.pop_front(); for (vertex_idx pred : G_coarse.parents(node_local)) { - pred_contractable.erase(pred); if (node_valid[pred] && top_order_idx[pred] > topOrderMin && visited.count(pred) == 0) { @@ -442,108 +434,116 @@ std::set > StepByStepCoarser::getContractablePare return pred_contractable; } -template +template void StepByStepCoarser::InitializeContractableEdges() { - ComputeFilteredTopOrderIdx(); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) - { + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { std::set succ_contractable = getContractableChildren(node); - for(vertex_idx succ : succ_contractable) + for (vertex_idx succ : succ_contractable) { contractable[std::make_pair(node, succ)] = G_full.vertex_comm_weight(node); + } } } -template -void StepByStepCoarser::updateDistantEdgeContractibility(std::pair edge) -{ - +template +void StepByStepCoarser::updateDistantEdgeContractibility(std::pair edge) { std::unordered_set ancestors, descendant; std::deque Queue; - for (vertex_idx succ : G_coarse.children(edge.first)) + for (vertex_idx succ : G_coarse.children(edge.first)) { if (succ != edge.second) { Queue.push_back(succ); descendant.insert(succ); } + } while (!Queue.empty()) { const vertex_idx node = Queue.front(); Queue.pop_front(); - for (vertex_idx succ : G_coarse.children(node)) + for (vertex_idx succ : G_coarse.children(node)) { if (descendant.count(succ) == 0) { Queue.push_back(succ); descendant.insert(succ); } + } } - for (vertex_idx pred : G_coarse.parents(edge.second)) + for (vertex_idx pred : G_coarse.parents(edge.second)) { if (pred != edge.first) { Queue.push_back(pred); ancestors.insert(pred); } + } while (!Queue.empty()) { const vertex_idx node = Queue.front(); Queue.pop_front(); - for (vertex_idx pred : G_coarse.parents(node)) + for (vertex_idx pred : G_coarse.parents(node)) { if (ancestors.count(pred) == 0) { Queue.push_back(pred); ancestors.insert(pred); } + } } - for (const vertex_idx node : ancestors) - for (const vertex_idx succ : G_coarse.children(node)) - if (descendant.count(succ) > 0) + for (const vertex_idx node : ancestors) { + for (const vertex_idx succ : G_coarse.children(node)) { + if (descendant.count(succ) > 0) { contractable.erase(std::make_pair(node, succ)); + } + } + } } -template -std::vector::EdgeToContract> StepByStepCoarser::CreateEdgeCandidateList() const -{ +template +std::vector::EdgeToContract> StepByStepCoarser::CreateEdgeCandidateList() const { std::vector candidates; - for (auto it = contractable.cbegin(); it != contractable.cend(); ++it) - { - if(problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(it->first)) + for (auto it = contractable.cbegin(); it != contractable.cend(); ++it) { + if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(it->first)) { continue; + } - candidates.emplace_back(it->first.first, it->first.second, contains[it->first.first].size() + contains[it->first.second].size(), it->second); + candidates.emplace_back( + it->first.first, it->first.second, contains[it->first.first].size() + contains[it->first.second].size(), it->second); } std::sort(candidates.begin(), candidates.end()); return candidates; } -template -std::pair, vertex_idx_t> StepByStepCoarser::PickEdgeToContract(const std::vector& candidates) const -{ +template +std::pair, vertex_idx_t> StepByStepCoarser::PickEdgeToContract( + const std::vector &candidates) const { size_t limit = (candidates.size() + 2) / 3; v_workw_t limitCardinality = candidates[limit].work_weight; - while (limit < candidates.size() - 1 && candidates[limit + 1].work_weight == limitCardinality) + while (limit < candidates.size() - 1 && candidates[limit + 1].work_weight == limitCardinality) { ++limit; + } // an edge case - if (candidates.size() == 1) + if (candidates.size() == 1) { limit = 0; + } EdgeToContract chosen = candidates[0]; unsigned best = 0; - for (unsigned idx = 1; idx <= limit; ++idx) - if (candidates[idx].comm_weight > candidates[best].comm_weight) + for (unsigned idx = 1; idx <= limit; ++idx) { + if (candidates[idx].comm_weight > candidates[best].comm_weight) { best = idx; + } + } chosen = candidates[best]; return chosen.edge; } /** - * @brief Acyclic graph contractor based on (Herrmann, Julien, et al. "Acyclic partitioning of large directed acyclic graphs." 2017 17th IEEE/ACM international symposium on cluster, cloud and grid computing (CCGRID). IEEE, 2017.)) + * @brief Acyclic graph contractor based on (Herrmann, Julien, et al. "Acyclic partitioning of large directed acyclic graphs." + * 2017 17th IEEE/ACM international symposium on cluster, cloud and grid computing (CCGRID). IEEE, 2017.)) * @brief with minor changes and fixes - * + * */ -template -std::vector, vertex_idx_t > > StepByStepCoarser::ClusterCoarsen() const -{ +template +std::vector, vertex_idx_t>> StepByStepCoarser::ClusterCoarsen() const { std::vector singleton(G_full.num_vertices(), true); std::vector leader(G_full.num_vertices()); std::vector weight(G_full.num_vertices()); @@ -554,87 +554,98 @@ std::vector, vertex_idx_t > > StepBySte std::vector maxTopLevel(G_full.num_vertices()); std::vector clusterNewID(G_full.num_vertices()); - std::vector > contractionSteps; + std::vector> contractionSteps; std::vector topLevel = ComputeFilteredTopLevel(); - for(vertex_idx node = 0; node < G_full.num_vertices(); ++node) - if(node_valid[node]) - { - leader[node]=node; - weight[node]=1 /*G_coarse.vertex_work_weight(node)*/; - nrBadNeighbors[node]=0; - leaderBadNeighbors[node]=UINT_MAX; - clusterNewID[node]=node; - minTopLevel[node]=topLevel[node]; - maxTopLevel[node]=topLevel[node]; - } - - for(vertex_idx node = 0; node < G_full.num_vertices(); ++node) - { - if(!node_valid[node] || !singleton[node]) + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { + if (node_valid[node]) { + leader[node] = node; + weight[node] = 1 /*G_coarse.vertex_work_weight(node)*/; + nrBadNeighbors[node] = 0; + leaderBadNeighbors[node] = UINT_MAX; + clusterNewID[node] = node; + minTopLevel[node] = topLevel[node]; + maxTopLevel[node] = topLevel[node]; + } + } + + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { + if (!node_valid[node] || !singleton[node]) { continue; + } - if(nrBadNeighbors[node] > 1) + if (nrBadNeighbors[node] > 1) { continue; + } std::vector validNeighbors; - for(vertex_idx pred: G_coarse.parents(node)) - { + for (vertex_idx pred : G_coarse.parents(node)) { // direct check of condition 1 - if(topLevel[node] < maxTopLevel[leader[pred]]-1 || topLevel[node] > minTopLevel[leader[pred]]+1) + if (topLevel[node] < maxTopLevel[leader[pred]] - 1 || topLevel[node] > minTopLevel[leader[pred]] + 1) { continue; + } // indirect check of condition 2 - if(nrBadNeighbors[node] > 1 || (nrBadNeighbors[node] == 1 && leaderBadNeighbors[node] != leader[pred])) + if (nrBadNeighbors[node] > 1 || (nrBadNeighbors[node] == 1 && leaderBadNeighbors[node] != leader[pred])) { continue; - //check condition 2 for pred if it is a singleton - if(singleton[pred] && nrBadNeighbors[pred] > 0) + } + // check condition 2 for pred if it is a singleton + if (singleton[pred] && nrBadNeighbors[pred] > 0) { continue; + } // check viability for pebbling - if(problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(pred, node))) + if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(pred, node))) { continue; + } validNeighbors.push_back(pred); } - for(vertex_idx succ: G_coarse.children(node)) - { + for (vertex_idx succ : G_coarse.children(node)) { // direct check of condition 1 - if(topLevel[node] < maxTopLevel[leader[succ]]-1 || topLevel[node] > minTopLevel[leader[succ]]+1) + if (topLevel[node] < maxTopLevel[leader[succ]] - 1 || topLevel[node] > minTopLevel[leader[succ]] + 1) { continue; + } // indirect check of condition 2 - if(nrBadNeighbors[node] > 1 || (nrBadNeighbors[node] == 1 && leaderBadNeighbors[node] != leader[succ])) + if (nrBadNeighbors[node] > 1 || (nrBadNeighbors[node] == 1 && leaderBadNeighbors[node] != leader[succ])) { continue; - //check condition 2 for pred if it is a singleton - if(singleton[succ] && nrBadNeighbors[succ] > 0) + } + // check condition 2 for pred if it is a singleton + if (singleton[succ] && nrBadNeighbors[succ] > 0) { continue; + } // check viability for pebbling - if(problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(node, succ))) + if (problem_type == PROBLEM_TYPE::PEBBLING && IncontractableForPebbling(std::make_pair(node, succ))) { continue; + } validNeighbors.push_back(succ); } vertex_idx bestNeighbor = std::numeric_limits::max(); - for(vertex_idx neigh : validNeighbors) - if(bestNeighbor == std::numeric_limits::max() || weight[leader[neigh]] < weight[leader[bestNeighbor]]) + for (vertex_idx neigh : validNeighbors) { + if (bestNeighbor == std::numeric_limits::max() || weight[leader[neigh]] < weight[leader[bestNeighbor]]) { bestNeighbor = neigh; + } + } - if(bestNeighbor == std::numeric_limits::max()) + if (bestNeighbor == std::numeric_limits::max()) { continue; + } vertex_idx newLead = leader[bestNeighbor]; leader[node] = newLead; weight[newLead] += weight[node]; bool is_parent = false; - for(vertex_idx pred : G_coarse.parents(node)) - if(pred == bestNeighbor) + for (vertex_idx pred : G_coarse.parents(node)) { + if (pred == bestNeighbor) { is_parent = true; + } + } - if(is_parent) + if (is_parent) { contractionSteps.emplace_back(clusterNewID[newLead], node); - else - { + } else { contractionSteps.emplace_back(node, clusterNewID[newLead]); clusterNewID[newLead] = node; } @@ -642,64 +653,59 @@ std::vector, vertex_idx_t > > StepBySte minTopLevel[newLead] = std::min(minTopLevel[newLead], topLevel[node]); maxTopLevel[newLead] = std::max(maxTopLevel[newLead], topLevel[node]); - for(vertex_idx pred: G_coarse.parents(node)) - { - if(std::abs( static_cast(topLevel[pred]) - static_cast(maxTopLevel[newLead]) ) != 1 && - std::abs( static_cast(topLevel[pred]) - static_cast(minTopLevel[newLead]) ) != 1) + for (vertex_idx pred : G_coarse.parents(node)) { + if (std::abs(static_cast(topLevel[pred]) - static_cast(maxTopLevel[newLead])) != 1 + && std::abs(static_cast(topLevel[pred]) - static_cast(minTopLevel[newLead])) != 1) { continue; + } - if(nrBadNeighbors[pred] == 0) - { + if (nrBadNeighbors[pred] == 0) { ++nrBadNeighbors[pred]; leaderBadNeighbors[pred] = newLead; - } - else if(nrBadNeighbors[pred] == 1 && leaderBadNeighbors[pred] != newLead) + } else if (nrBadNeighbors[pred] == 1 && leaderBadNeighbors[pred] != newLead) { ++nrBadNeighbors[pred]; + } } - for(vertex_idx succ: G_coarse.children(node)) - { - if(std::abs( static_cast(topLevel[succ]) - static_cast(maxTopLevel[newLead]) ) != 1 && - std::abs( static_cast(topLevel[succ]) - static_cast(minTopLevel[newLead]) ) != 1) + for (vertex_idx succ : G_coarse.children(node)) { + if (std::abs(static_cast(topLevel[succ]) - static_cast(maxTopLevel[newLead])) != 1 + && std::abs(static_cast(topLevel[succ]) - static_cast(minTopLevel[newLead])) != 1) { continue; + } - if(nrBadNeighbors[succ]==0) - { + if (nrBadNeighbors[succ] == 0) { ++nrBadNeighbors[succ]; leaderBadNeighbors[succ] = newLead; - } - else if(nrBadNeighbors[succ] == 1 && leaderBadNeighbors[succ] != newLead) + } else if (nrBadNeighbors[succ] == 1 && leaderBadNeighbors[succ] != newLead) { ++nrBadNeighbors[succ]; + } } - if(singleton[bestNeighbor]) - { - for(vertex_idx pred: G_coarse.parents(bestNeighbor) ) - { - if(std::abs( static_cast(topLevel[pred]) - static_cast(maxTopLevel[newLead]) ) != 1 && - std::abs( static_cast(topLevel[pred]) - static_cast(minTopLevel[newLead]) ) != 1) + if (singleton[bestNeighbor]) { + for (vertex_idx pred : G_coarse.parents(bestNeighbor)) { + if (std::abs(static_cast(topLevel[pred]) - static_cast(maxTopLevel[newLead])) != 1 + && std::abs(static_cast(topLevel[pred]) - static_cast(minTopLevel[newLead])) != 1) { continue; + } - if(nrBadNeighbors[pred] == 0) - { + if (nrBadNeighbors[pred] == 0) { ++nrBadNeighbors[pred]; leaderBadNeighbors[pred] = newLead; - } - else if(nrBadNeighbors[pred] == 1 && leaderBadNeighbors[pred] != newLead) + } else if (nrBadNeighbors[pred] == 1 && leaderBadNeighbors[pred] != newLead) { ++nrBadNeighbors[pred]; + } } - for(vertex_idx succ: G_coarse.children(bestNeighbor)) - { - if(std::abs( static_cast(topLevel[succ]) - static_cast(maxTopLevel[newLead]) ) != 1 && - std::abs( static_cast(topLevel[succ]) - static_cast(minTopLevel[newLead]) ) != 1) + for (vertex_idx succ : G_coarse.children(bestNeighbor)) { + if (std::abs(static_cast(topLevel[succ]) - static_cast(maxTopLevel[newLead])) != 1 + && std::abs(static_cast(topLevel[succ]) - static_cast(minTopLevel[newLead])) != 1) { continue; + } - if(nrBadNeighbors[succ]==0) - { + if (nrBadNeighbors[succ] == 0) { ++nrBadNeighbors[succ]; leaderBadNeighbors[succ] = newLead; - } - else if(nrBadNeighbors[succ] == 1 && leaderBadNeighbors[succ] != newLead) + } else if (nrBadNeighbors[succ] == 1 && leaderBadNeighbors[succ] != newLead) { ++nrBadNeighbors[succ]; + } } singleton[bestNeighbor] = false; } @@ -709,290 +715,312 @@ std::vector, vertex_idx_t > > StepBySte return contractionSteps; } -template -std::vector StepByStepCoarser::ComputeFilteredTopLevel() const -{ +template +std::vector StepByStepCoarser::ComputeFilteredTopLevel() const { std::vector TopLevel(G_full.num_vertices()); for (const vertex_idx node : top_sort_view(G_coarse)) { - if(!node_valid[node]) + if (!node_valid[node]) { continue; + } TopLevel[node] = 0; - for (const vertex_idx pred: G_coarse.parents(node) ) + for (const vertex_idx pred : G_coarse.parents(node)) { TopLevel[node] = std::max(TopLevel[node], TopLevel[pred] + 1); - + } } return TopLevel; } -template +template void StepByStepCoarser::ComputeFilteredTopOrderIdx() { top_order_idx = GetFilteredTopOrderIdx(G_coarse, node_valid); } -template -std::vector > StepByStepCoarser::GetFilteredTopOrderIdx(const Graph_t& G, const std::vector& is_valid) { +template +std::vector> StepByStepCoarser::GetFilteredTopOrderIdx(const Graph_t &G, + const std::vector &is_valid) { std::vector top_order = GetFilteredTopOrder(is_valid, G); std::vector idx(G.num_vertices()); - for (vertex_idx node = 0; node < top_order.size(); ++node) + for (vertex_idx node = 0; node < top_order.size(); ++node) { idx[top_order[node]] = node; + } return idx; } - -template -void StepByStepCoarser::coarsenForPebbling(const Graph_t& dag_in, Graph_t &coarsened_dag, - std::vector> &new_vertex_id) -{ - +template +void StepByStepCoarser::coarsenForPebbling(const Graph_t &dag_in, + Graph_t &coarsened_dag, + std::vector> &new_vertex_id) { problem_type = PROBLEM_TYPE::PEBBLING; coarsening_strategy = COARSENING_STRATEGY::EDGE_BY_EDGE; unsigned nr_sources = 0; - for(vertex_idx node = 0; node < dag_in.num_vertices(); ++node) - if(dag_in.in_degree(node) == 0) + for (vertex_idx node = 0; node < dag_in.num_vertices(); ++node) { + if (dag_in.in_degree(node) == 0) { ++nr_sources; + } + } target_nr_of_nodes = std::max(target_nr_of_nodes, nr_sources + 1); CoarserGenContractionMap::coarsenDag(dag_in, coarsened_dag, new_vertex_id); } -template -bool StepByStepCoarser::IncontractableForPebbling(const std::pair& edge) const -{ - if(G_coarse.in_degree(edge.first) == 0) +template +bool StepByStepCoarser::IncontractableForPebbling(const std::pair &edge) const { + if (G_coarse.in_degree(edge.first) == 0) { return true; + } v_memw_t sum_weight = G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second); std::set parents; - for(vertex_idx pred : G_coarse.parents(edge.first)) + for (vertex_idx pred : G_coarse.parents(edge.first)) { parents.insert(pred); - for(vertex_idx pred : G_coarse.parents(edge.second)) - if(pred != edge.first) + } + for (vertex_idx pred : G_coarse.parents(edge.second)) { + if (pred != edge.first) { parents.insert(pred); - for(vertex_idx node : parents) + } + } + for (vertex_idx node : parents) { sum_weight += G_coarse.vertex_mem_weight(node); + } - if(sum_weight > fast_mem_capacity) + if (sum_weight > fast_mem_capacity) { return true; - + } + std::set children; - for(vertex_idx succ: G_coarse.children(edge.second)) + for (vertex_idx succ : G_coarse.children(edge.second)) { children.insert(succ); - for(vertex_idx succ: G_coarse.children(edge.first)) - if(succ != edge.second) + } + for (vertex_idx succ : G_coarse.children(edge.first)) { + if (succ != edge.second) { children.insert(succ); + } + } - for(vertex_idx child : children) - { - sum_weight = G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second) + G_coarse.vertex_mem_weight(child); - for(vertex_idx pred: G_coarse.parents(child)) - { - if(pred != edge.first && pred != edge.second) + for (vertex_idx child : children) { + sum_weight = G_coarse.vertex_mem_weight(edge.first) + G_coarse.vertex_mem_weight(edge.second) + + G_coarse.vertex_mem_weight(child); + for (vertex_idx pred : G_coarse.parents(child)) { + if (pred != edge.first && pred != edge.second) { sum_weight += G_coarse.vertex_mem_weight(pred); + } } - - if(sum_weight > fast_mem_capacity) + + if (sum_weight > fast_mem_capacity) { return true; + } } return false; } -template -void StepByStepCoarser::MergeSourcesInPebbling() -{ +template +void StepByStepCoarser::MergeSourcesInPebbling() { // initialize memory requirement sums to check viability later - std::vector > memory_sum(G_coarse.num_vertices(), 0); + std::vector> memory_sum(G_coarse.num_vertices(), 0); std::vector sources; - for(vertex_idx node = 0; node < G_coarse.num_vertices(); ++node) - { - if(!node_valid[node]) + for (vertex_idx node = 0; node < G_coarse.num_vertices(); ++node) { + if (!node_valid[node]) { continue; + } - if(G_coarse.in_degree(node)>0) - { + if (G_coarse.in_degree(node) > 0) { memory_sum[node] = G_coarse.vertex_mem_weight(node); - for(vertex_idx pred: G_coarse.parents(node)) + for (vertex_idx pred : G_coarse.parents(node)) { memory_sum[node] += G_coarse.vertex_mem_weight(pred); - } - else + } + } else { sources.push_back(node); + } } - + std::set invalidated_sources; bool could_merge = true; - while(could_merge) - { + while (could_merge) { could_merge = false; - for(unsigned idx1 = 0; idx1 < sources.size(); ++idx1) - { + for (unsigned idx1 = 0; idx1 < sources.size(); ++idx1) { vertex_idx source_a = sources[idx1]; - if(invalidated_sources.find(source_a) != invalidated_sources.end()) + if (invalidated_sources.find(source_a) != invalidated_sources.end()) { continue; - - for(unsigned idx2 = idx1 + 1; idx2 < sources.size(); ++idx2) - { + } + + for (unsigned idx2 = idx1 + 1; idx2 < sources.size(); ++idx2) { vertex_idx source_b = sources[idx2]; - if(invalidated_sources.find(source_b) != invalidated_sources.end()) + if (invalidated_sources.find(source_b) != invalidated_sources.end()) { continue; - + } + // check if we can merge source_a and source_b std::set a_children, b_children; - for(vertex_idx succ: G_coarse.children(source_a)) + for (vertex_idx succ : G_coarse.children(source_a)) { a_children.insert(succ); - for(vertex_idx succ: G_coarse.children(source_b)) + } + for (vertex_idx succ : G_coarse.children(source_b)) { b_children.insert(succ); - + } + std::set only_a, only_b, both; - for(vertex_idx succ: G_coarse.children(source_a)) - { - if(b_children.find(succ) == b_children.end()) + for (vertex_idx succ : G_coarse.children(source_a)) { + if (b_children.find(succ) == b_children.end()) { only_a.insert(succ); - else + } else { both.insert(succ); + } } - for(vertex_idx succ: G_coarse.children(source_b)) - { - if(a_children.find(succ) == a_children.end()) + for (vertex_idx succ : G_coarse.children(source_b)) { + if (a_children.find(succ) == a_children.end()) { only_b.insert(succ); + } } bool violates_constraint = false; - for(vertex_idx node : only_a) - if(memory_sum[node] + G_coarse.vertex_mem_weight(source_b) > fast_mem_capacity) + for (vertex_idx node : only_a) { + if (memory_sum[node] + G_coarse.vertex_mem_weight(source_b) > fast_mem_capacity) { violates_constraint = true; - for(vertex_idx node : only_b) - if(memory_sum[node] + G_coarse.vertex_mem_weight(source_a) > fast_mem_capacity) + } + } + for (vertex_idx node : only_b) { + if (memory_sum[node] + G_coarse.vertex_mem_weight(source_a) > fast_mem_capacity) { violates_constraint = true; + } + } - if(violates_constraint) + if (violates_constraint) { continue; + } // check if we want to merge source_a and source_b - double sim_diff = (only_a.size() + only_b.size() == 0) ? 0.0001 : static_cast(only_a.size() + only_b.size()); + double sim_diff = (only_a.size() + only_b.size() == 0) ? 0.0001 + : static_cast(only_a.size() + only_b.size()); double ratio = static_cast(both.size()) / sim_diff; - - if(ratio > 2) - { + + if (ratio > 2) { ContractSingleEdge(std::make_pair(source_a, source_b)); invalidated_sources.insert(source_b); could_merge = true; - for(vertex_idx node : only_a) + for (vertex_idx node : only_a) { memory_sum[node] += G_coarse.vertex_mem_weight(source_b); - for(vertex_idx node : only_b) + } + for (vertex_idx node : only_b) { memory_sum[node] += G_coarse.vertex_mem_weight(source_a); + } } } } } } -template -Graph_t StepByStepCoarser::Contract(const std::vector> &new_vertex_id) const -{ +template +Graph_t StepByStepCoarser::Contract(const std::vector> &new_vertex_id) const { Graph_t G_contracted; std::vector is_valid(G_full.num_vertices(), false); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { is_valid[new_vertex_id[node]] = true; + } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) - if(is_valid[node]) + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { + if (is_valid[node]) { G_contracted.add_vertex(0, 0, 0, 0); + } + } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) - { - G_contracted.set_vertex_work_weight(new_vertex_id[node], - G_contracted.vertex_work_weight(new_vertex_id[node]) + G_full.vertex_work_weight(node)); - G_contracted.set_vertex_comm_weight(new_vertex_id[node], - G_contracted.vertex_comm_weight(new_vertex_id[node]) + G_full.vertex_comm_weight(node)); + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { + G_contracted.set_vertex_work_weight( + new_vertex_id[node], G_contracted.vertex_work_weight(new_vertex_id[node]) + G_full.vertex_work_weight(node)); + G_contracted.set_vertex_comm_weight( + new_vertex_id[node], G_contracted.vertex_comm_weight(new_vertex_id[node]) + G_full.vertex_comm_weight(node)); G_contracted.set_vertex_mem_weight(new_vertex_id[node], - G_contracted.vertex_mem_weight(new_vertex_id[node]) + G_full.vertex_mem_weight(node)); + G_contracted.vertex_mem_weight(new_vertex_id[node]) + G_full.vertex_mem_weight(node)); G_contracted.set_vertex_type(new_vertex_id[node], G_full.vertex_type(node)); } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) - for (const auto &out_edge : out_edges(node, G_full)) - { + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { + for (const auto &out_edge : out_edges(node, G_full)) { const vertex_idx succ = target(out_edge, G_full); - if (new_vertex_id[node] == new_vertex_id[succ]) + if (new_vertex_id[node] == new_vertex_id[succ]) { continue; - - if constexpr (has_edge_weights_v) { + } + if constexpr (has_edge_weights_v) { const auto pair = edge_desc(new_vertex_id[node], new_vertex_id[succ], G_contracted); if (pair.second) { - G_contracted.set_edge_comm_weight(pair.first, G_contracted.edge_comm_weight(pair.first) + - G_full.edge_comm_weight(out_edge)); + G_contracted.set_edge_comm_weight( + pair.first, G_contracted.edge_comm_weight(pair.first) + G_full.edge_comm_weight(out_edge)); } else { G_contracted.add_edge(new_vertex_id[node], new_vertex_id[succ], G_full.edge_comm_weight(out_edge)); } } else { - if (not edge(new_vertex_id[node], new_vertex_id[succ], G_contracted)) { G_contracted.add_edge(new_vertex_id[node], new_vertex_id[succ]); } } } - + } + return G_contracted; } -template -void StepByStepCoarser::SetIdVector(std::vector> &new_vertex_id) const -{ +template +void StepByStepCoarser::SetIdVector(std::vector> &new_vertex_id) const { new_vertex_id.clear(); new_vertex_id.resize(G_full.num_vertices()); new_vertex_id = GetIntermediateIDs(contractionHistory.size()); } -template -std::vector > StepByStepCoarser::GetIntermediateIDs(vertex_idx until_which_step) const { - +template +std::vector> StepByStepCoarser::GetIntermediateIDs(vertex_idx until_which_step) const { std::vector target(G_full.num_vertices()), pointsTo(G_full.num_vertices(), std::numeric_limits::max()); - for(vertex_idx iterate = 0; iterate < contractionHistory.size() && iterate < until_which_step; ++iterate) - { - const std::pair& contractionStep = contractionHistory[iterate]; + for (vertex_idx iterate = 0; iterate < contractionHistory.size() && iterate < until_which_step; ++iterate) { + const std::pair &contractionStep = contractionHistory[iterate]; pointsTo[contractionStep.second] = contractionStep.first; } for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { target[node] = node; - while (pointsTo[target[node]] != std::numeric_limits::max()) + while (pointsTo[target[node]] != std::numeric_limits::max()) { target[node] = pointsTo[target[node]]; + } } - if (contractionHistory.empty() || until_which_step == 0) + if (contractionHistory.empty() || until_which_step == 0) { return target; + } std::vector is_valid(G_full.num_vertices(), false); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { is_valid[target[node]] = true; + } std::vector new_id(G_full.num_vertices()); vertex_idx current_index = 0; - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) - if(is_valid[node]) + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { + if (is_valid[node]) { new_id[node] = current_index++; + } + } - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { target[node] = new_id[target[node]]; + } boost_graph_t temp_dag; temp_dag = Contract(target); std::vector all_valid(temp_dag.num_vertices(), true); std::vector top_idx = GetFilteredTopOrderIdx(temp_dag, all_valid); - for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) + for (vertex_idx node = 0; node < G_full.num_vertices(); ++node) { target[node] = top_idx[target[node]]; + } return target; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/coarser/coarser_util.hpp b/include/osp/coarser/coarser_util.hpp index e3e7fa79..e2b1fa31 100644 --- a/include/osp/coarser/coarser_util.hpp +++ b/include/osp/coarser/coarser_util.hpp @@ -31,9 +31,10 @@ limitations under the License. #include "osp/concepts/specific_graph_impl.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" -namespace osp { namespace coarser_util { +namespace osp { +namespace coarser_util { -template +template bool check_valid_contraction_map(const std::vector> &vertex_contraction_map) { std::set> image(vertex_contraction_map.cbegin(), vertex_contraction_map.cend()); const vertex_idx_t image_size = static_cast>(image.size()); @@ -42,15 +43,13 @@ bool check_valid_contraction_map(const std::vector> &v }); } -template +template struct acc_sum { - T operator()(const T &a, const T &b) { return a + b; } }; -template +template struct acc_max { - T operator()(const T &a, const T &b) { return std::max(a, b); } }; @@ -63,13 +62,15 @@ struct acc_max { * @return A status code indicating the success or failure of the coarsening operation. */ -template -bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, +template +bool construct_coarse_dag(const Graph_t_in &dag_in, + Graph_t_out &coarsened_dag, const std::vector> &vertex_contraction_map) { - static_assert(is_directed_graph_v && is_directed_graph_v, "Graph types need to satisfy the is_directed_graph concept."); + static_assert(is_directed_graph_v && is_directed_graph_v, + "Graph types need to satisfy the is_directed_graph concept."); static_assert(is_computational_dag_v, "Graph_t_in must be a computational DAG"); - static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, "Graph_t_out must be a (direct) constructable computational DAG"); + static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, + "Graph_t_out must be a (direct) constructable computational DAG"); assert(check_valid_contraction_map(vertex_contraction_map)); @@ -79,8 +80,8 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, } if constexpr (is_direct_constructable_cdag_v) { - const vertex_idx_t num_vert_quotient = - (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; + const vertex_idx_t num_vert_quotient + = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; std::set, vertex_idx_t>> quotient_edges; @@ -96,9 +97,12 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, coarsened_dag = Graph_t_out(num_vert_quotient, quotient_edges); if constexpr (has_vertex_weights_v && is_modifiable_cdag_vertex_v) { - static_assert(std::is_same_v, v_workw_t>, "Work weight types of in-graph and out-graph must be the same."); - static_assert(std::is_same_v, v_commw_t>, "Vertex communication types of in-graph and out-graph must be the same."); - static_assert(std::is_same_v, v_memw_t>, "Memory weight types of in-graph and out-graph must be the same."); + static_assert(std::is_same_v, v_workw_t>, + "Work weight types of in-graph and out-graph must be the same."); + static_assert(std::is_same_v, v_commw_t>, + "Vertex communication types of in-graph and out-graph must be the same."); + static_assert(std::is_same_v, v_memw_t>, + "Memory weight types of in-graph and out-graph must be the same."); for (const vertex_idx_t &vert : coarsened_dag.vertices()) { coarsened_dag.set_vertex_work_weight(vert, 0); @@ -110,17 +114,17 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, coarsened_dag.set_vertex_work_weight( vertex_contraction_map[vert], v_work_acc_method()(coarsened_dag.vertex_work_weight(vertex_contraction_map[vert]), - dag_in.vertex_work_weight(vert))); + dag_in.vertex_work_weight(vert))); coarsened_dag.set_vertex_comm_weight( vertex_contraction_map[vert], v_comm_acc_method()(coarsened_dag.vertex_comm_weight(vertex_contraction_map[vert]), - dag_in.vertex_comm_weight(vert))); + dag_in.vertex_comm_weight(vert))); coarsened_dag.set_vertex_mem_weight( vertex_contraction_map[vert], v_mem_acc_method()(coarsened_dag.vertex_mem_weight(vertex_contraction_map[vert]), - dag_in.vertex_mem_weight(vert))); + dag_in.vertex_mem_weight(vert))); } } @@ -139,7 +143,7 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, if constexpr (has_edge_weights_v && is_modifiable_cdag_comm_edge_v) { static_assert(std::is_same_v, e_commw_t>, - "Edge weight type of in graph and out graph must be the same!"); + "Edge weight type of in graph and out graph must be the same!"); for (const auto &edge : edges(coarsened_dag)) { coarsened_dag.set_edge_comm_weight(edge, 0); @@ -148,43 +152,42 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, for (const auto &ori_edge : edges(dag_in)) { vertex_idx_t src = vertex_contraction_map[source(ori_edge, dag_in)]; vertex_idx_t tgt = vertex_contraction_map[target(ori_edge, dag_in)]; - - if (src == tgt) continue; + + if (src == tgt) { + continue; + } const auto [cont_edge, found] = edge_desc(src, tgt, coarsened_dag); assert(found && "The edge should already exist"); - coarsened_dag.set_edge_comm_weight(cont_edge, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge))); + coarsened_dag.set_edge_comm_weight( + cont_edge, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge))); } - } + } return true; } if constexpr (is_constructable_cdag_v) { coarsened_dag = Graph_t_out(); - const vertex_idx_t num_vert_quotient = - (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; + const vertex_idx_t num_vert_quotient + = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; for (vertex_idx_t vert = 0; vert < num_vert_quotient; ++vert) { coarsened_dag.add_vertex(0, 0, 0); } for (const vertex_idx_t &vert : dag_in.vertices()) { + coarsened_dag.set_vertex_work_weight(vertex_contraction_map[vert], + v_work_acc_method()(coarsened_dag.vertex_work_weight(vertex_contraction_map[vert]), + dag_in.vertex_work_weight(vert))); - coarsened_dag.set_vertex_work_weight( - vertex_contraction_map[vert], - v_work_acc_method()(coarsened_dag.vertex_work_weight(vertex_contraction_map[vert]), - dag_in.vertex_work_weight(vert))); - - coarsened_dag.set_vertex_comm_weight( - vertex_contraction_map[vert], - v_comm_acc_method()(coarsened_dag.vertex_comm_weight(vertex_contraction_map[vert]), - dag_in.vertex_comm_weight(vert))); + coarsened_dag.set_vertex_comm_weight(vertex_contraction_map[vert], + v_comm_acc_method()(coarsened_dag.vertex_comm_weight(vertex_contraction_map[vert]), + dag_in.vertex_comm_weight(vert))); coarsened_dag.set_vertex_mem_weight( vertex_contraction_map[vert], - v_mem_acc_method()(coarsened_dag.vertex_mem_weight(vertex_contraction_map[vert]), - dag_in.vertex_mem_weight(vert))); + v_mem_acc_method()(coarsened_dag.vertex_mem_weight(vertex_contraction_map[vert]), dag_in.vertex_mem_weight(vert))); } if constexpr (has_typed_vertices_v && is_constructable_cdag_typed_vertex_v) { @@ -211,15 +214,14 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, "Edge weight type of in graph and out graph must be the same!"); edge_desc_t ori_edge = edge_desc(vert, chld, dag_in).first; - const auto pair = - edge_desc(vertex_contraction_map[vert], vertex_contraction_map[chld], coarsened_dag); + const auto pair = edge_desc(vertex_contraction_map[vert], vertex_contraction_map[chld], coarsened_dag); if (pair.second) { - coarsened_dag.set_edge_comm_weight(pair.first, - e_comm_acc_method()(coarsened_dag.edge_comm_weight(pair.first), - dag_in.edge_comm_weight(ori_edge))); + coarsened_dag.set_edge_comm_weight( + pair.first, + e_comm_acc_method()(coarsened_dag.edge_comm_weight(pair.first), dag_in.edge_comm_weight(ori_edge))); } else { - coarsened_dag.add_edge(vertex_contraction_map[vert], vertex_contraction_map[chld], - dag_in.edge_comm_weight(ori_edge)); + coarsened_dag.add_edge( + vertex_contraction_map[vert], vertex_contraction_map[chld], dag_in.edge_comm_weight(ori_edge)); } } else { if (not edge(vertex_contraction_map[vert], vertex_contraction_map[chld], coarsened_dag)) { @@ -233,13 +235,15 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, return false; } -template>, - typename v_comm_acc_method = acc_sum>, - typename v_mem_acc_method = acc_sum>, - typename e_comm_acc_method = acc_sum>> -bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, +template >, + typename v_comm_acc_method = acc_sum>, + typename v_mem_acc_method = acc_sum>, + typename e_comm_acc_method = acc_sum>> +bool construct_coarse_dag(const Graph_t_in &dag_in, + Graph_t_out &coarsened_dag, std::vector> &vertex_contraction_map) { - if constexpr (is_Compact_Sparse_Graph_reorder_v) { static_assert(is_directed_graph_v && is_directed_graph_v, "Graph types need to satisfy the is_directed_graph concept."); @@ -253,8 +257,8 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, coarsened_dag = Graph_t_out(); return true; } - const vertex_idx_t num_vert_quotient = - (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; + const vertex_idx_t num_vert_quotient + = (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; std::set, vertex_idx_t>> quotient_edges; @@ -269,16 +273,19 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, coarsened_dag = Graph_t_out(num_vert_quotient, quotient_edges); - const auto& pushforward_map = coarsened_dag.get_pushforward_permutation(); + const auto &pushforward_map = coarsened_dag.get_pushforward_permutation(); std::vector> combined_expansion_map(dag_in.num_vertices()); for (const auto &vert : dag_in.vertices()) { combined_expansion_map[vert] = pushforward_map[vertex_contraction_map[vert]]; } if constexpr (has_vertex_weights_v && is_modifiable_cdag_vertex_v) { - static_assert(std::is_same_v, v_workw_t>, "Work weight types of in-graph and out-graph must be the same."); - static_assert(std::is_same_v, v_commw_t>, "Vertex communication types of in-graph and out-graph must be the same."); - static_assert(std::is_same_v, v_memw_t>, "Memory weight types of in-graph and out-graph must be the same."); + static_assert(std::is_same_v, v_workw_t>, + "Work weight types of in-graph and out-graph must be the same."); + static_assert(std::is_same_v, v_commw_t>, + "Vertex communication types of in-graph and out-graph must be the same."); + static_assert(std::is_same_v, v_memw_t>, + "Memory weight types of in-graph and out-graph must be the same."); for (const vertex_idx_t &vert : coarsened_dag.vertices()) { coarsened_dag.set_vertex_work_weight(vert, 0); @@ -290,23 +297,23 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, coarsened_dag.set_vertex_work_weight( vertex_contraction_map[vert], v_work_acc_method()(coarsened_dag.vertex_work_weight(combined_expansion_map[vert]), - dag_in.vertex_work_weight(vert))); + dag_in.vertex_work_weight(vert))); coarsened_dag.set_vertex_comm_weight( vertex_contraction_map[vert], v_comm_acc_method()(coarsened_dag.vertex_comm_weight(combined_expansion_map[vert]), - dag_in.vertex_comm_weight(vert))); + dag_in.vertex_comm_weight(vert))); coarsened_dag.set_vertex_mem_weight( vertex_contraction_map[vert], v_mem_acc_method()(coarsened_dag.vertex_mem_weight(combined_expansion_map[vert]), - dag_in.vertex_mem_weight(vert))); + dag_in.vertex_mem_weight(vert))); } } if constexpr (has_typed_vertices_v && is_modifiable_cdag_typed_vertex_v) { static_assert(std::is_same_v, v_type_t>, - "Vertex type types of in graph and out graph must be the same!"); + "Vertex type types of in graph and out graph must be the same!"); for (const vertex_idx_t &vert : dag_in.vertices()) { coarsened_dag.set_vertex_type(vertex_contraction_map[vert], dag_in.vertex_type(vert)); @@ -317,16 +324,17 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, // && "Contracted vertices must be of the same type"); } - if constexpr (has_edge_weights_v && has_edge_weights_v) { static_assert(std::is_same_v, e_commw_t>, - "Edge weight type of in graph and out graph must be the same!"); - + "Edge weight type of in graph and out graph must be the same!"); + for (const auto &ori_edge : edges(dag_in)) { vertex_idx_t src = vertex_contraction_map[source(ori_edge, dag_in)]; vertex_idx_t tgt = vertex_contraction_map[target(ori_edge, dag_in)]; - - if (src == tgt) continue; + + if (src == tgt) { + continue; + } coarsened_dag.set_edge_comm_weight(src, tgt, 0); } @@ -334,27 +342,28 @@ bool construct_coarse_dag(const Graph_t_in &dag_in, Graph_t_out &coarsened_dag, for (const auto &ori_edge : edges(dag_in)) { vertex_idx_t src = vertex_contraction_map[source(ori_edge, dag_in)]; vertex_idx_t tgt = vertex_contraction_map[target(ori_edge, dag_in)]; - - if (src == tgt) continue; + + if (src == tgt) { + continue; + } const auto cont_edge = coarsened_dag.edge(pushforward_map[src], pushforward_map[tgt]); - assert(source(cont_edge, coarsened_dag) == pushforward_map[src] && target(cont_edge, coarsened_dag) == pushforward_map[tgt]); - coarsened_dag.set_edge_comm_weight(src, tgt, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge))); + assert(source(cont_edge, coarsened_dag) == pushforward_map[src] + && target(cont_edge, coarsened_dag) == pushforward_map[tgt]); + coarsened_dag.set_edge_comm_weight( + src, tgt, e_comm_acc_method()(coarsened_dag.edge_comm_weight(cont_edge), dag_in.edge_comm_weight(ori_edge))); } } std::swap(vertex_contraction_map, combined_expansion_map); return true; } else { - return construct_coarse_dag(dag_in, coarsened_dag, - static_cast> &>( - vertex_contraction_map)); + return construct_coarse_dag( + dag_in, coarsened_dag, static_cast> &>(vertex_contraction_map)); } } - -template +template bool check_valid_expansion_map(const std::vector>> &vertex_expansion_map) { std::size_t cntr = 0; @@ -385,15 +394,15 @@ bool check_valid_expansion_map(const std::vector -std::vector>> -invert_vertex_contraction_map(const std::vector> &vertex_contraction_map) { +template +std::vector>> invert_vertex_contraction_map( + const std::vector> &vertex_contraction_map) { assert(check_valid_contraction_map(vertex_contraction_map)); - - - vertex_idx_t num_vert = vertex_contraction_map.size() == 0? 0 : - *std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend()) + 1; + vertex_idx_t num_vert + = vertex_contraction_map.size() == 0 + ? 0 + : *std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend()) + 1; std::vector>> expansion_map(num_vert); @@ -404,9 +413,9 @@ invert_vertex_contraction_map(const std::vector> &vert return expansion_map; } -template -std::vector> -invert_vertex_expansion_map(const std::vector>> &vertex_expansion_map) { +template +std::vector> invert_vertex_expansion_map( + const std::vector>> &vertex_expansion_map) { assert(check_valid_expansion_map(vertex_expansion_map)); vertex_idx_t num_vert = 0; @@ -426,8 +435,7 @@ invert_vertex_expansion_map(const std::vector +template void reorder_expansion_map(const Graph_t_in &graph, std::vector>> &vertex_expansion_map) { assert(check_valid_expansion_map(vertex_expansion_map)); @@ -452,7 +460,7 @@ void reorder_expansion_map(const Graph_t_in &graph, std::vector vertex_expansion_map[rhs]; // because priority queue is a max_priority queue + return vertex_expansion_map[lhs] > vertex_expansion_map[rhs]; // because priority queue is a max_priority queue }; std::priority_queue, decltype(cmp)> ready(cmp); @@ -477,7 +485,7 @@ void reorder_expansion_map(const Graph_t_in &graph, std::vector +template bool pull_back_schedule(const BspSchedule &schedule_in, const std::vector>> &vertex_map, BspSchedule &schedule_out) { - for (unsigned v = 0; v < vertex_map.size(); ++v) { - const auto proc = schedule_in.assignedProcessor(v); const auto step = schedule_in.assignedSuperstep(v); @@ -510,11 +512,10 @@ bool pull_back_schedule(const BspSchedule &schedule_in, return true; } -template +template bool pull_back_schedule(const BspSchedule &schedule_in, const std::vector> &reverse_vertex_map, BspSchedule &schedule_out) { - for (unsigned idx = 0; idx < reverse_vertex_map.size(); ++idx) { const auto &v = reverse_vertex_map[idx]; @@ -525,17 +526,18 @@ bool pull_back_schedule(const BspSchedule &schedule_in, return true; } -template -std::vector compose_vertex_contraction_map(const std::vector &firstMap, const std::vector &secondMap) { +template +std::vector compose_vertex_contraction_map(const std::vector &firstMap, + const std::vector &secondMap) { static_assert(std::is_integral_v); std::vector composedMap(firstMap.size()); for (std::size_t i = 0; i < composedMap.size(); ++i) { - composedMap[i] = secondMap[ firstMap[i] ]; + composedMap[i] = secondMap[firstMap[i]]; } return composedMap; } -} // end namespace coarser_util -} // end namespace osp \ No newline at end of file +} // end namespace coarser_util +} // end namespace osp diff --git a/include/osp/coarser/funnel/FunnelBfs.hpp b/include/osp/coarser/funnel/FunnelBfs.hpp index da1c38be..07ba93d0 100644 --- a/include/osp/coarser/funnel/FunnelBfs.hpp +++ b/include/osp/coarser/funnel/FunnelBfs.hpp @@ -17,9 +17,10 @@ limitations under the License. */ #pragma once +#include + #include "osp/coarser/Coarser.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp" -#include namespace osp { @@ -28,16 +29,14 @@ namespace osp { * (from outside the group) * */ -template +template class FunnelBfs : public CoarserGenExpansionMap { - public: /** * @brief Parameters for Funnel coarsener * */ struct FunnelBfs_parameters { - bool funnel_incoming; bool use_approx_transitive_reduction; @@ -49,23 +48,23 @@ class FunnelBfs : public CoarserGenExpansionMap { FunnelBfs_parameters(v_workw_t max_work_weight_ = std::numeric_limits>::max(), v_memw_t max_memory_weight_ = std::numeric_limits>::max(), - unsigned max_depth_ = std::numeric_limits::max(), + unsigned max_depth_ = std::numeric_limits::max(), bool funnel_incoming_ = true, bool use_approx_transitive_reduction_ = true) - : funnel_incoming(funnel_incoming_), use_approx_transitive_reduction(use_approx_transitive_reduction_), - max_work_weight(max_work_weight_), max_memory_weight(max_memory_weight_), max_depth(max_depth_) {}; + : funnel_incoming(funnel_incoming_), + use_approx_transitive_reduction(use_approx_transitive_reduction_), + max_work_weight(max_work_weight_), + max_memory_weight(max_memory_weight_), + max_depth(max_depth_) {}; ~FunnelBfs_parameters() = default; }; - FunnelBfs(FunnelBfs_parameters parameters_ = FunnelBfs_parameters()) - : parameters(parameters_) {} + FunnelBfs(FunnelBfs_parameters parameters_ = FunnelBfs_parameters()) : parameters(parameters_) {} virtual ~FunnelBfs() = default; - virtual std::vector>> - generate_vertex_expansion_map(const Graph_t_in &graph) override { - + virtual std::vector>> generate_vertex_expansion_map(const Graph_t_in &graph) override { if constexpr (use_architecture_memory_contraints) { if (max_memory_per_vertex_type.size() < graph.num_vertex_types()) { throw std::runtime_error("FunnelBfs: max_memory_per_vertex_type has insufficient size."); @@ -93,23 +92,22 @@ class FunnelBfs : public CoarserGenExpansionMap { std::vector> max_memory_per_vertex_type; void run_in_contraction(const Graph_t_in &graph, std::vector>> &partition) { - using vertex_idx_t = vertex_idx_t; const std::unordered_set> edge_mask = parameters.use_approx_transitive_reduction - ? long_edges_in_triangles_parallel(graph) - : std::unordered_set>(); + ? long_edges_in_triangles_parallel(graph) + : std::unordered_set>(); std::vector visited(graph.num_vertices(), false); const std::vector top_order = GetTopOrder(graph); for (auto rev_top_it = top_order.rbegin(); rev_top_it != top_order.crend(); rev_top_it++) { - const vertex_idx_t &bottom_node = *rev_top_it; - if (visited[bottom_node]) + if (visited[bottom_node]) { continue; + } v_workw_t work_weight_of_group = 0; v_memw_t memory_weight_of_group = 0; @@ -123,7 +121,6 @@ class FunnelBfs : public CoarserGenExpansionMap { unsigned depth_counter = 0; while ((not vertex_processing_fifo.empty()) || (not next_vertex_processing_fifo.empty())) { - if (vertex_processing_fifo.empty()) { vertex_processing_fifo = next_vertex_processing_fifo; next_vertex_processing_fifo.clear(); @@ -136,19 +133,23 @@ class FunnelBfs : public CoarserGenExpansionMap { vertex_idx_t active_node = vertex_processing_fifo.front(); vertex_processing_fifo.pop_front(); - if (graph.vertex_type(active_node) != graph.vertex_type(bottom_node)) + if (graph.vertex_type(active_node) != graph.vertex_type(bottom_node)) { continue; + } - if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight) + if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight) { continue; + } - if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight) + if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight) { continue; + } if constexpr (use_architecture_memory_contraints) { - if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > - max_memory_per_vertex_type[graph.vertex_type(bottom_node)]) + if (memory_weight_of_group + graph.vertex_mem_weight(active_node) + > max_memory_per_vertex_type[graph.vertex_type(bottom_node)]) { continue; + } } group.emplace_back(active_node); @@ -156,9 +157,9 @@ class FunnelBfs : public CoarserGenExpansionMap { memory_weight_of_group += graph.vertex_mem_weight(active_node); for (const auto &in_edge : in_edges(active_node, graph)) { - - if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend())) + if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend())) { continue; + } const vertex_idx_t &par = source(in_edge, graph); @@ -166,14 +167,13 @@ class FunnelBfs : public CoarserGenExpansionMap { children_not_in_group[par] -= 1; } else { - if (parameters.use_approx_transitive_reduction) { - children_not_in_group[par] = 0; for (const auto out_edge : out_edges(par, graph)) { - if (edge_mask.find(out_edge) != edge_mask.cend()) + if (edge_mask.find(out_edge) != edge_mask.cend()) { continue; + } children_not_in_group[par] += 1; } @@ -184,9 +184,9 @@ class FunnelBfs : public CoarserGenExpansionMap { } } for (const auto &in_edge : in_edges(active_node, graph)) { - - if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend())) + if (parameters.use_approx_transitive_reduction && (edge_mask.find(in_edge) != edge_mask.cend())) { continue; + } const vertex_idx_t &par = source(in_edge, graph); if (children_not_in_group[par] == 0) { @@ -204,19 +204,18 @@ class FunnelBfs : public CoarserGenExpansionMap { } void run_out_contraction(const Graph_t_in &graph, std::vector>> &partition) { - using vertex_idx_t = vertex_idx_t; const std::unordered_set> edge_mask = parameters.use_approx_transitive_reduction - ? long_edges_in_triangles_parallel(graph) - : std::unordered_set>(); + ? long_edges_in_triangles_parallel(graph) + : std::unordered_set>(); std::vector visited(graph.num_vertices(), false); for (const auto &top_node : top_sort_view(graph)) { - - if (visited[top_node]) + if (visited[top_node]) { continue; + } v_workw_t work_weight_of_group = 0; v_memw_t memory_weight_of_group = 0; @@ -230,7 +229,6 @@ class FunnelBfs : public CoarserGenExpansionMap { unsigned depth_counter = 0; while ((not vertex_processing_fifo.empty()) || (not next_vertex_processing_fifo.empty())) { - if (vertex_processing_fifo.empty()) { vertex_processing_fifo = next_vertex_processing_fifo; next_vertex_processing_fifo.clear(); @@ -243,19 +241,23 @@ class FunnelBfs : public CoarserGenExpansionMap { vertex_idx_t active_node = vertex_processing_fifo.front(); vertex_processing_fifo.pop_front(); - if (graph.vertex_type(active_node) != graph.vertex_type(top_node)) + if (graph.vertex_type(active_node) != graph.vertex_type(top_node)) { continue; + } - if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight) + if (work_weight_of_group + graph.vertex_work_weight(active_node) > parameters.max_work_weight) { continue; + } - if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight) + if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > parameters.max_memory_weight) { continue; + } if constexpr (use_architecture_memory_contraints) { - if (memory_weight_of_group + graph.vertex_mem_weight(active_node) > - max_memory_per_vertex_type[graph.vertex_type(top_node)]) + if (memory_weight_of_group + graph.vertex_mem_weight(active_node) + > max_memory_per_vertex_type[graph.vertex_type(top_node)]) { continue; + } } group.emplace_back(active_node); @@ -263,9 +265,9 @@ class FunnelBfs : public CoarserGenExpansionMap { memory_weight_of_group += graph.vertex_mem_weight(active_node); for (const auto &out_edge : out_edges(active_node, graph)) { - - if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend())) + if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend())) { continue; + } const vertex_idx_t &child = target(out_edge, graph); @@ -273,14 +275,13 @@ class FunnelBfs : public CoarserGenExpansionMap { parents_not_in_group[child] -= 1; } else { - if (parameters.use_approx_transitive_reduction) { - parents_not_in_group[child] = 0; for (const auto in_edge : in_edges(child, graph)) { - if (edge_mask.find(in_edge) != edge_mask.cend()) + if (edge_mask.find(in_edge) != edge_mask.cend()) { continue; + } parents_not_in_group[child] += 1; } @@ -291,9 +292,9 @@ class FunnelBfs : public CoarserGenExpansionMap { } } for (const auto &out_edge : out_edges(active_node, graph)) { - - if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend())) + if (parameters.use_approx_transitive_reduction && (edge_mask.find(out_edge) != edge_mask.cend())) { continue; + } const vertex_idx_t &child = target(out_edge, graph); if (parents_not_in_group[child] == 0) { @@ -310,4 +311,5 @@ class FunnelBfs : public CoarserGenExpansionMap { } } }; -} // namespace osp \ No newline at end of file + +} // namespace osp diff --git a/include/osp/coarser/hdagg/hdagg_coarser.hpp b/include/osp/coarser/hdagg/hdagg_coarser.hpp index 489ef5bb..7d206187 100644 --- a/include/osp/coarser/hdagg/hdagg_coarser.hpp +++ b/include/osp/coarser/hdagg/hdagg_coarser.hpp @@ -18,19 +18,17 @@ limitations under the License. #pragma once -#include "osp/coarser/Coarser.hpp" +#include +#include "osp/coarser/Coarser.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" -#include namespace osp { -template +template class hdagg_coarser : public CoarserGenContractionMap { - - static_assert(is_directed_graph_edge_desc_v, - "Graph_t_in must satisfy the directed_graph edge desc concept"); + static_assert(is_directed_graph_edge_desc_v, "Graph_t_in must satisfy the directed_graph edge desc concept"); static_assert(has_hashable_edge_desc_v, "Graph_t_in must satisfy the has_hashable_edge_desc concept"); static_assert(has_typed_vertices_v, "Graph_t_in must have typed vertices"); @@ -55,7 +53,6 @@ class hdagg_coarser : public CoarserGenContractionMap { v_type_t current_v_type = 0; void add_new_super_node(const Graph_t_in &dag_in, VertexType_in node) { - v_memw_t node_mem = dag_in.vertex_mem_weight(node); current_memory = node_mem; @@ -72,7 +69,6 @@ class hdagg_coarser : public CoarserGenContractionMap { virtual std::string getCoarserName() const override { return "hdagg_coarser"; }; virtual std::vector> generate_vertex_contraction_map(const Graph_t_in &dag_in) override { - std::vector visited(dag_in.num_vertices(), false); std::vector reverse_vertex_map(dag_in.num_vertices()); @@ -94,21 +90,20 @@ class hdagg_coarser : public CoarserGenContractionMap { add_new_super_node(dag_in, vertex_map[part_ind][vert_ind]); while (vert_ind < part_size) { - const VertexType_in vert = vertex_map[part_ind][vert_ind]; reverse_vertex_map[vert] = current_super_node_idx; bool indegree_one = true; for (const auto &in_edge : in_edges(vert, dag_in)) { - - if (edge_mask.find(in_edge) != edge_mast_end) + if (edge_mask.find(in_edge) != edge_mast_end) { continue; + } unsigned count = 0; for (const auto &out_edge : out_edges(source(in_edge, dag_in), dag_in)) { - - if (edge_mask.find(out_edge) != edge_mast_end) + if (edge_mask.find(out_edge) != edge_mast_end) { continue; + } count++; if (count > 1) { @@ -124,22 +119,21 @@ class hdagg_coarser : public CoarserGenContractionMap { if (indegree_one) { for (const auto &in_edge : in_edges(vert, dag_in)) { - - if (edge_mask.find(in_edge) != edge_mast_end) + if (edge_mask.find(in_edge) != edge_mast_end) { continue; + } const auto &edge_source = source(in_edge, dag_in); v_memw_t node_mem = dag_in.vertex_mem_weight(edge_source); - if (((current_memory + node_mem > memory_threshold) || - (current_work + dag_in.vertex_work_weight(edge_source) > work_threshold) || - (vertex_map[part_ind].size() >= super_node_size_threshold) || - (current_communication + dag_in.vertex_comm_weight(edge_source) > - communication_threshold)) || + if (((current_memory + node_mem > memory_threshold) + || (current_work + dag_in.vertex_work_weight(edge_source) > work_threshold) + || (vertex_map[part_ind].size() >= super_node_size_threshold) + || (current_communication + dag_in.vertex_comm_weight(edge_source) > communication_threshold)) + || // or node type changes (current_v_type != dag_in.vertex_type(edge_source))) { - if (!visited[edge_source]) { vertex_map.push_back(std::vector({edge_source})); partition_size++; @@ -147,7 +141,6 @@ class hdagg_coarser : public CoarserGenContractionMap { } } else { - current_memory += node_mem; current_work += dag_in.vertex_work_weight(edge_source); current_communication += dag_in.vertex_comm_weight(edge_source); @@ -158,9 +151,9 @@ class hdagg_coarser : public CoarserGenContractionMap { } } else { for (const auto &in_edge : in_edges(vert, dag_in)) { - - if (edge_mask.find(in_edge) != edge_mast_end) + if (edge_mask.find(in_edge) != edge_mast_end) { continue; + } const auto &edge_source = source(in_edge, dag_in); @@ -181,13 +174,16 @@ class hdagg_coarser : public CoarserGenContractionMap { } inline void set_work_threshold(v_workw_t work_threshold_) { work_threshold = work_threshold_; } + inline void set_memory_threshold(v_memw_t memory_threshold_) { memory_threshold = memory_threshold_; } + inline void set_communication_threshold(v_commw_t communication_threshold_) { communication_threshold = communication_threshold_; } + inline void set_super_node_size_threshold(std::size_t super_node_size_threshold_) { super_node_size_threshold = super_node_size_threshold_; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/coarser/top_order/top_order_coarser.hpp b/include/osp/coarser/top_order/top_order_coarser.hpp index bad2c46e..deaf476f 100644 --- a/include/osp/coarser/top_order/top_order_coarser.hpp +++ b/include/osp/coarser/top_order/top_order_coarser.hpp @@ -26,10 +26,8 @@ limitations under the License. namespace osp { -template> (*top_sort_func)(const Graph_t_in &)> +template > (*top_sort_func)(const Graph_t_in &)> class top_order_coarser : public Coarser { - private: using VertexType = vertex_idx_t; @@ -46,42 +44,36 @@ class top_order_coarser : public Coarser { v_workw_t current_work = 0; v_commw_t current_communication = 0; VertexType current_super_node_idx = 0; - - - void finish_super_node_add_edges(const Graph_t_in &dag_in, Graph_t_out &dag_out, - const std::vector &nodes, std::vector> &reverse_vertex_map) { + void finish_super_node_add_edges(const Graph_t_in &dag_in, + Graph_t_out &dag_out, + const std::vector &nodes, + std::vector> &reverse_vertex_map) { dag_out.set_vertex_mem_weight(current_super_node_idx, current_memory); dag_out.set_vertex_work_weight(current_super_node_idx, current_work); dag_out.set_vertex_comm_weight(current_super_node_idx, current_communication); for (const auto &node : nodes) { - if constexpr (has_edge_weights_v && has_edge_weights_v) { - for (const auto &in_edge : in_edges(node, dag_in)) { - const VertexType parent_rev = reverse_vertex_map[source(in_edge, dag_in)]; if (parent_rev != current_super_node_idx && parent_rev != std::numeric_limits::max()) { - auto pair = edge_desc(parent_rev, current_super_node_idx, dag_out); if (pair.second) { - dag_out.set_edge_comm_weight(pair.first, dag_out.edge_comm_weight(pair.first) + - dag_in.edge_comm_weight(in_edge)); + dag_out.set_edge_comm_weight(pair.first, + dag_out.edge_comm_weight(pair.first) + dag_in.edge_comm_weight(in_edge)); } else { dag_out.add_edge(parent_rev, current_super_node_idx, dag_in.edge_comm_weight(in_edge)); } } } } else { - for (const auto &parent : dag_in.parents(node)) { - const VertexType parent_rev = reverse_vertex_map[parent]; if (parent_rev != current_super_node_idx && parent_rev != std::numeric_limits::max()) { - - if (not edge(parent_rev, current_super_node_idx, dag_out)) + if (not edge(parent_rev, current_super_node_idx, dag_out)) { dag_out.add_edge(parent_rev, current_super_node_idx); + } } } } @@ -89,7 +81,6 @@ class top_order_coarser : public Coarser { } void add_new_super_node(const Graph_t_in &dag_in, Graph_t_out &dag_out, VertexType node) { - // int node_mem = dag_in.nodeMemoryWeight(node); // if (memory_constraint_type == LOCAL_INC_EDGES_2) { @@ -103,11 +94,9 @@ class top_order_coarser : public Coarser { current_work = dag_in.vertex_work_weight(node); current_communication = dag_in.vertex_comm_weight(node); - if constexpr (is_computational_dag_typed_vertices_v && - is_computational_dag_typed_vertices_v) { - - current_super_node_idx = - dag_out.add_vertex(current_work, current_communication, current_memory, dag_in.vertex_type(node)); + if constexpr (is_computational_dag_typed_vertices_v && is_computational_dag_typed_vertices_v) { + current_super_node_idx + = dag_out.add_vertex(current_work, current_communication, current_memory, dag_in.vertex_type(node)); } else { current_super_node_idx = dag_out.add_vertex(current_work, current_communication, current_memory); } @@ -118,8 +107,11 @@ class top_order_coarser : public Coarser { virtual ~top_order_coarser() = default; inline void set_degree_threshold(unsigned degree_threshold_) { degree_threshold = degree_threshold_; } + inline void set_work_threshold(v_workw_t work_threshold_) { work_threshold = work_threshold_; } + inline void set_memory_threshold(v_memw_t memory_threshold_) { memory_threshold = memory_threshold_; } + inline void set_communication_threshold(v_commw_t communication_threshold_) { communication_threshold = communication_threshold_; } @@ -127,7 +119,7 @@ class top_order_coarser : public Coarser { inline void set_super_node_size_threshold(VertexType super_node_size_threshold_) { super_node_size_threshold = super_node_size_threshold_; } - + inline void set_node_dist_threshold(unsigned node_dist_threshold_) { node_dist_threshold = node_dist_threshold_; } // inline void set_memory_constraint_type(MEMORY_CONSTRAINT_TYPE memory_constraint_type_) { memory_constraint_type = @@ -135,9 +127,9 @@ class top_order_coarser : public Coarser { virtual std::string getCoarserName() const override { return "top_order_coarser"; }; - virtual bool coarsenDag(const Graph_t_in &dag_in, Graph_t_out &dag_out, + virtual bool coarsenDag(const Graph_t_in &dag_in, + Graph_t_out &dag_out, std::vector> &reverse_vertex_map) override { - assert(dag_out.num_vertices() == 0); if (dag_in.num_vertices() == 0) { reverse_vertex_map = std::vector>(); @@ -157,7 +149,6 @@ class top_order_coarser : public Coarser { reverse_vertex_map[top_ordering[0]] = current_super_node_idx; for (size_t i = 1; i < top_ordering.size(); i++) { - const auto v = top_ordering[i]; // int node_mem = dag_in.vertex_mem_weight(v); @@ -172,31 +163,27 @@ class top_order_coarser : public Coarser { const unsigned dist = source_node_dist[v] - source_node_dist[top_ordering[i - 1]]; // start new super node if thresholds are exceeded - if (((current_memory + dag_in.vertex_mem_weight(v) > memory_threshold) || - (current_work + dag_in.vertex_work_weight(v) > work_threshold) || - (vertex_map.back().size() >= super_node_size_threshold) || - (current_communication + dag_in.vertex_comm_weight(v) > communication_threshold)) || - (dist > node_dist_threshold) || + if (((current_memory + dag_in.vertex_mem_weight(v) > memory_threshold) + || (current_work + dag_in.vertex_work_weight(v) > work_threshold) + || (vertex_map.back().size() >= super_node_size_threshold) + || (current_communication + dag_in.vertex_comm_weight(v) > communication_threshold)) + || (dist > node_dist_threshold) || // or prev node high out degree (dag_in.out_degree(top_ordering[i - 1]) > degree_threshold)) { - - finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(),reverse_vertex_map); + finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(), reverse_vertex_map); vertex_map.push_back(std::vector({v})); add_new_super_node(dag_in, dag_out, v); - } else { // grow current super node - - if constexpr (is_computational_dag_typed_vertices_v && - is_computational_dag_typed_vertices_v) { + } else { // grow current super node + if constexpr (is_computational_dag_typed_vertices_v + && is_computational_dag_typed_vertices_v) { if (dag_out.vertex_type(current_super_node_idx) != dag_in.vertex_type(v)) { - finish_super_node_add_edges(dag_in, dag_out, vertex_map.back(), reverse_vertex_map); vertex_map.push_back(std::vector({v})); add_new_super_node(dag_in, dag_out, v); } else { - current_memory += dag_in.vertex_mem_weight(v); current_work += dag_in.vertex_work_weight(v); current_communication += dag_in.vertex_comm_weight(v); @@ -205,7 +192,6 @@ class top_order_coarser : public Coarser { } } else { - current_memory += dag_in.vertex_mem_weight(v); current_work += dag_in.vertex_work_weight(v); current_communication += dag_in.vertex_comm_weight(v); @@ -225,4 +211,4 @@ class top_order_coarser : public Coarser { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/concepts/computational_dag_concept.hpp b/include/osp/concepts/computational_dag_concept.hpp index 546b6467..29ca517e 100644 --- a/include/osp/concepts/computational_dag_concept.hpp +++ b/include/osp/concepts/computational_dag_concept.hpp @@ -52,20 +52,19 @@ namespace osp { * * @tparam T The graph type. */ -template +template struct has_vertex_weights : std::false_type {}; -template +template struct has_vertex_weights().vertex_work_weight(std::declval>())), decltype(std::declval().vertex_comm_weight(std::declval>())), decltype(std::declval().vertex_mem_weight(std::declval>()))>> - : std::conjunction< - std::is_arithmetic().vertex_work_weight(std::declval>()))>, - std::is_arithmetic().vertex_comm_weight(std::declval>()))>, - std::is_arithmetic().vertex_mem_weight(std::declval>()))>> {}; + : std::conjunction().vertex_work_weight(std::declval>()))>, + std::is_arithmetic().vertex_comm_weight(std::declval>()))>, + std::is_arithmetic().vertex_mem_weight(std::declval>()))>> {}; -template +template inline constexpr bool has_vertex_weights_v = has_vertex_weights::value; /** @@ -80,16 +79,17 @@ inline constexpr bool has_vertex_weights_v = has_vertex_weights::value; * * @tparam T The graph type. */ -template +template struct has_typed_vertices : std::false_type {}; -template -struct has_typed_vertices().vertex_type(std::declval>())), - decltype(std::declval().num_vertex_types())>> +template +struct has_typed_vertices().vertex_type(std::declval>())), + decltype(std::declval().num_vertex_types())>> : std::conjunction().vertex_type(std::declval>()))>, std::is_integral().num_vertex_types())>> {}; -template +template inline constexpr bool has_typed_vertices_v = has_typed_vertices::value; /** @@ -101,18 +101,17 @@ inline constexpr bool has_typed_vertices_v = has_typed_vertices::value; * * @tparam T The graph type. */ -template +template struct has_edge_weights : std::false_type {}; -template +template struct has_edge_weights::directed_edge_descriptor, decltype(std::declval().edge_comm_weight(std::declval>()))>> - : std::conjunction< - std::is_arithmetic().edge_comm_weight(std::declval>()))>, - is_directed_graph_edge_desc> {}; + : std::conjunction().edge_comm_weight(std::declval>()))>, + is_directed_graph_edge_desc> {}; -template +template inline constexpr bool has_edge_weights_v = has_edge_weights::value; /** @@ -124,13 +123,13 @@ inline constexpr bool has_edge_weights_v = has_edge_weights::value; * * @tparam T The graph type. */ -template +template struct is_computational_dag : std::false_type {}; -template +template struct is_computational_dag> : std::conjunction, has_vertex_weights> {}; -template +template inline constexpr bool is_computational_dag_v = is_computational_dag::value; /** @@ -140,14 +139,13 @@ inline constexpr bool is_computational_dag_v = is_computational_dag::value; * * @tparam T The graph type. */ -template +template struct is_computational_dag_typed_vertices : std::false_type {}; -template -struct is_computational_dag_typed_vertices> - : std::conjunction, has_typed_vertices> {}; +template +struct is_computational_dag_typed_vertices> : std::conjunction, has_typed_vertices> {}; -template +template inline constexpr bool is_computational_dag_typed_vertices_v = is_computational_dag_typed_vertices::value; /** @@ -158,14 +156,14 @@ inline constexpr bool is_computational_dag_typed_vertices_v = is_computational_d * * @tparam T The graph type. */ -template +template struct is_computational_dag_edge_desc : std::false_type {}; -template +template struct is_computational_dag_edge_desc> : std::conjunction, is_computational_dag> {}; -template +template inline constexpr bool is_computational_dag_edge_desc_v = is_computational_dag_edge_desc::value; /** @@ -175,15 +173,14 @@ inline constexpr bool is_computational_dag_edge_desc_v = is_computational_dag_ed * * @tparam T The graph type. */ -template +template struct is_computational_dag_typed_vertices_edge_desc : std::false_type {}; -template +template struct is_computational_dag_typed_vertices_edge_desc> : std::conjunction, is_computational_dag_typed_vertices> {}; -template -inline constexpr bool is_computational_dag_typed_vertices_edge_desc_v = - is_computational_dag_typed_vertices_edge_desc::value; +template +inline constexpr bool is_computational_dag_typed_vertices_edge_desc_v = is_computational_dag_typed_vertices_edge_desc::value; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/concepts/constructable_computational_dag_concept.hpp b/include/osp/concepts/constructable_computational_dag_concept.hpp index 26e5a4a9..0caa561e 100644 --- a/include/osp/concepts/constructable_computational_dag_concept.hpp +++ b/include/osp/concepts/constructable_computational_dag_concept.hpp @@ -48,14 +48,15 @@ namespace osp { * * @tparam T The graph type. */ -template +template struct is_modifiable_cdag_vertex : std::false_type {}; -template +template struct is_modifiable_cdag_vertex< - T, std::void_t().set_vertex_work_weight(std::declval>(), std::declval>())), - decltype(std::declval().set_vertex_comm_weight(std::declval>(), std::declval>())), - decltype(std::declval().set_vertex_mem_weight(std::declval>(), std::declval>()))>> + T, + std::void_t().set_vertex_work_weight(std::declval>(), std::declval>())), + decltype(std::declval().set_vertex_comm_weight(std::declval>(), std::declval>())), + decltype(std::declval().set_vertex_mem_weight(std::declval>(), std::declval>()))>> : std::conjunction, std::is_default_constructible, std::is_copy_constructible, @@ -63,7 +64,7 @@ struct is_modifiable_cdag_vertex< std::is_copy_assignable, std::is_move_assignable> {}; -template +template inline constexpr bool is_modifiable_cdag_vertex_v = is_modifiable_cdag_vertex::value; /** @@ -75,16 +76,16 @@ inline constexpr bool is_modifiable_cdag_vertex_v = is_modifiable_cdag_vertex * * @tparam T The graph type. */ -template +template struct is_constructable_cdag_vertex : std::false_type {}; -template -struct is_constructable_cdag_vertex< - T, std::void_t().add_vertex(std::declval>(), std::declval>(), std::declval>()))>> - : std::conjunction, - std::is_constructible>> {}; +template +struct is_constructable_cdag_vertex().add_vertex( + std::declval>(), std::declval>(), std::declval>()))>> + : std::conjunction, std::is_constructible>> {}; -template +template inline constexpr bool is_constructable_cdag_vertex_v = is_constructable_cdag_vertex::value; /** @@ -95,16 +96,16 @@ inline constexpr bool is_constructable_cdag_vertex_v = is_constructable_cdag_ver * * @tparam T The graph type. */ -template +template struct is_modifiable_cdag_typed_vertex : std::false_type {}; -template +template struct is_modifiable_cdag_typed_vertex< - T, std::void_t().set_vertex_type(std::declval>(), std::declval>()))>> - : std::conjunction, - is_computational_dag_typed_vertices> {}; // for default node type + T, + std::void_t().set_vertex_type(std::declval>(), std::declval>()))>> + : std::conjunction, is_computational_dag_typed_vertices> {}; // for default node type -template +template inline constexpr bool is_modifiable_cdag_typed_vertex_v = is_modifiable_cdag_typed_vertex::value; /** @@ -115,16 +116,17 @@ inline constexpr bool is_modifiable_cdag_typed_vertex_v = is_modifiable_cdag_typ * * @tparam T The graph type. */ -template +template struct is_constructable_cdag_typed_vertex : std::false_type {}; -template +template struct is_constructable_cdag_typed_vertex< - T, std::void_t().add_vertex(std::declval>(), std::declval>(), std::declval>(), std::declval>()))>> - : std::conjunction, - is_modifiable_cdag_typed_vertex> {}; // for default node type + T, + std::void_t().add_vertex( + std::declval>(), std::declval>(), std::declval>(), std::declval>()))>> + : std::conjunction, is_modifiable_cdag_typed_vertex> {}; // for default node type -template +template inline constexpr bool is_constructable_cdag_typed_vertex_v = is_constructable_cdag_typed_vertex::value; /** @@ -135,15 +137,16 @@ inline constexpr bool is_constructable_cdag_typed_vertex_v = is_constructable_cd * * @tparam T The graph type. */ -template +template struct is_constructable_cdag_edge : std::false_type {}; -template -struct is_constructable_cdag_edge().add_edge(std::declval>(), - std::declval>()))>> +template +struct is_constructable_cdag_edge< + T, + std::void_t().add_edge(std::declval>(), std::declval>()))>> : is_directed_graph {}; -template +template inline constexpr bool is_constructable_cdag_edge_v = is_constructable_cdag_edge::value; /** @@ -154,15 +157,16 @@ inline constexpr bool is_constructable_cdag_edge_v = is_constructable_cdag_edge< * * @tparam T The graph type. */ -template +template struct is_modifiable_cdag_comm_edge : std::false_type {}; -template +template struct is_modifiable_cdag_comm_edge< - T, std::void_t().set_edge_comm_weight(std::declval>(), std::declval>()))>> - : std::conjunction> {}; // for default edge weight + T, + std::void_t().set_edge_comm_weight(std::declval>(), std::declval>()))>> + : std::conjunction> {}; // for default edge weight -template +template inline constexpr bool is_modifiable_cdag_comm_edge_v = is_modifiable_cdag_comm_edge::value; /** @@ -173,17 +177,18 @@ inline constexpr bool is_modifiable_cdag_comm_edge_v = is_modifiable_cdag_comm_e * * @tparam T The graph type. */ -template +template struct is_constructable_cdag_comm_edge : std::false_type {}; -template +template struct is_constructable_cdag_comm_edge< - T, std::void_t().add_edge(std::declval>(), std::declval>(), std::declval>()))>> - : std::conjunction, - is_computational_dag_edge_desc, - is_modifiable_cdag_comm_edge> {}; // for default edge weight + T, + std::void_t().add_edge( + std::declval>(), std::declval>(), std::declval>()))>> + : std::conjunction, is_computational_dag_edge_desc, is_modifiable_cdag_comm_edge> { +}; // for default edge weight -template +template inline constexpr bool is_constructable_cdag_comm_edge_v = is_constructable_cdag_comm_edge::value; /** @@ -193,20 +198,21 @@ inline constexpr bool is_constructable_cdag_comm_edge_v = is_constructable_cdag_ * * @tparam T The graph type. */ -template +template struct is_constructable_cdag : std::false_type {}; -template +template struct is_constructable_cdag> : std::conjunction, is_constructable_cdag_vertex, is_constructable_cdag_edge> {}; -template +template inline constexpr bool is_constructable_cdag_v = is_constructable_cdag::value; /** * @brief Helper trait to check if a graph can be directly constructed from a vertex count and a set of edges. */ -template -inline constexpr bool is_direct_constructable_cdag_v = std::is_constructible, std::set, vertex_idx_t>>>::value; +template +inline constexpr bool is_direct_constructable_cdag_v + = std::is_constructible, std::set, vertex_idx_t>>>::value; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/concepts/directed_graph_concept.hpp b/include/osp/concepts/directed_graph_concept.hpp index c6a470b3..0bd65d77 100644 --- a/include/osp/concepts/directed_graph_concept.hpp +++ b/include/osp/concepts/directed_graph_concept.hpp @@ -20,6 +20,7 @@ limitations under the License. #include "graph_traits.hpp" #include "iterator_concepts.hpp" + namespace osp { /** @@ -50,29 +51,28 @@ namespace osp { * * @tparam T The graph type to check against the concept. */ -template +template struct is_directed_graph : std::false_type {}; -template -struct is_directed_graph< - T, std::void_t::vertex_idx, - decltype(std::declval().vertices()), - decltype(std::declval().num_vertices()), - decltype(std::declval().num_edges()), - decltype(std::declval().parents(std::declval>())), - decltype(std::declval().children(std::declval>())), - decltype(std::declval().in_degree(std::declval>())), - decltype(std::declval().out_degree(std::declval>()))>> - : std::conjunction< - is_forward_range_of().vertices()), vertex_idx_t>, - std::is_integral().num_vertices())>, - std::is_integral().num_edges())>, - is_input_range_of().parents(std::declval>())), vertex_idx_t>, - is_input_range_of().children(std::declval>())), vertex_idx_t>, - std::is_integral().in_degree(std::declval>()))>, - std::is_integral().out_degree(std::declval>()))>> {}; +template +struct is_directed_graph::vertex_idx, + decltype(std::declval().vertices()), + decltype(std::declval().num_vertices()), + decltype(std::declval().num_edges()), + decltype(std::declval().parents(std::declval>())), + decltype(std::declval().children(std::declval>())), + decltype(std::declval().in_degree(std::declval>())), + decltype(std::declval().out_degree(std::declval>()))>> + : std::conjunction().vertices()), vertex_idx_t>, + std::is_integral().num_vertices())>, + std::is_integral().num_edges())>, + is_input_range_of().parents(std::declval>())), vertex_idx_t>, + is_input_range_of().children(std::declval>())), vertex_idx_t>, + std::is_integral().in_degree(std::declval>()))>, + std::is_integral().out_degree(std::declval>()))>> {}; -template +template inline constexpr bool is_directed_graph_v = is_directed_graph::value; /** @@ -86,17 +86,28 @@ inline constexpr bool is_directed_graph_v = is_directed_graph::value; * @tparam v_type The vertex type. * @tparam e_type The size type (usually integral). */ -template +template struct is_edge_list_type : std::false_type {}; -template +template struct is_edge_list_type< - T, v_type, e_type, std::void_t().begin()), decltype(std::declval().end()), decltype(std::declval().size()), typename std::iterator_traits()))>::value_type, decltype(std::declval()))>::value_type>().source), decltype(std::declval()))>::value_type>().target)>> - : std::conjunction()))>::value_type>().source), v_type>, - std::is_same()))>::value_type>().target), v_type>, - std::is_same().size()), e_type>> {}; + T, + v_type, + e_type, + std::void_t().begin()), + decltype(std::declval().end()), + decltype(std::declval().size()), + typename std::iterator_traits()))>::value_type, + decltype(std::declval()))>::value_type>().source), + decltype(std::declval()))>::value_type>().target)>> + : std::conjunction< + std::is_same()))>::value_type>().source), + v_type>, + std::is_same()))>::value_type>().target), + v_type>, + std::is_same().size()), e_type>> {}; -template +template inline constexpr bool is_edge_list_type_v = is_edge_list_type::value; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/concepts/directed_graph_edge_desc_concept.hpp b/include/osp/concepts/directed_graph_edge_desc_concept.hpp index dadd827b..d3a8227b 100644 --- a/include/osp/concepts/directed_graph_edge_desc_concept.hpp +++ b/include/osp/concepts/directed_graph_edge_desc_concept.hpp @@ -45,7 +45,7 @@ namespace osp { * @param edge The edge descriptor. * @return The source vertex index. */ -template +template inline vertex_idx_t source(const directed_edge &edge, const Graph_t &) { return edge.source; } @@ -57,7 +57,7 @@ inline vertex_idx_t source(const directed_edge &edge, const Gr * @param edge The edge descriptor. * @return The target vertex index. */ -template +template inline vertex_idx_t target(const directed_edge &edge, const Graph_t &) { return edge.target; } @@ -69,7 +69,7 @@ inline vertex_idx_t target(const directed_edge &edge, const Gr * @param graph The graph instance. * @return An `edge_view` allowing iteration over all edges. */ -template +template inline edge_view edges(const Graph_t &graph) { return edge_view(graph); } @@ -82,7 +82,7 @@ inline edge_view edges(const Graph_t &graph) { * @param graph The graph instance. * @return An `out_edge_view` allowing iteration over outgoing edges from `u`. */ -template +template inline OutEdgeView out_edges(vertex_idx_t u, const Graph_t &graph) { return OutEdgeView(graph, u); } @@ -95,7 +95,7 @@ inline OutEdgeView out_edges(vertex_idx_t u, const Graph_t &gr * @param graph The graph instance. * @return An `in_edge_view` allowing iteration over incoming edges to `v`. */ -template +template inline InEdgeView in_edges(vertex_idx_t v, const Graph_t &graph) { return InEdgeView(graph, v); } @@ -112,10 +112,10 @@ inline InEdgeView in_edges(vertex_idx_t v, const Graph_t &grap * * @tparam T The graph type to check. */ -template +template struct is_directed_graph_edge_desc : std::false_type {}; -template +template struct is_directed_graph_edge_desc::directed_edge_descriptor, decltype(edges(std::declval())), @@ -123,16 +123,16 @@ struct is_directed_graph_edge_desc>(), std::declval())), decltype(source(std::declval>(), std::declval())), decltype(target(std::declval>(), std::declval()))>> - : std::conjunction< - is_directed_graph, std::is_default_constructible>, - std::is_copy_constructible>, - is_input_range_of())), edge_desc_t>, - is_input_range_of>(), std::declval())), edge_desc_t>, - is_input_range_of>(), std::declval())), edge_desc_t>, - std::is_same>(), std::declval())), vertex_idx_t>, - std::is_same>(), std::declval())), vertex_idx_t>> {}; - -template + : std::conjunction, + std::is_default_constructible>, + std::is_copy_constructible>, + is_input_range_of())), edge_desc_t>, + is_input_range_of>(), std::declval())), edge_desc_t>, + is_input_range_of>(), std::declval())), edge_desc_t>, + std::is_same>(), std::declval())), vertex_idx_t>, + std::is_same>(), std::declval())), vertex_idx_t>> {}; + +template inline constexpr bool is_directed_graph_edge_desc_v = is_directed_graph_edge_desc::value; /** @@ -142,18 +142,19 @@ inline constexpr bool is_directed_graph_edge_desc_v = is_directed_graph_edge_des * * @tparam T The graph type. */ -template +template struct has_hashable_edge_desc : std::false_type {}; -template +template struct has_hashable_edge_desc>{}(std::declval>())), decltype(std::declval>() == std::declval>()), decltype(std::declval>() != std::declval>())>> - : std::conjunction, std::is_default_constructible>, + : std::conjunction, + std::is_default_constructible>, std::is_copy_constructible>> {}; -template +template inline constexpr bool has_hashable_edge_desc_v = has_hashable_edge_desc::value; -} // namespace osp +} // namespace osp diff --git a/include/osp/concepts/graph_traits.hpp b/include/osp/concepts/graph_traits.hpp index 48e980ac..05b86a56 100644 --- a/include/osp/concepts/graph_traits.hpp +++ b/include/osp/concepts/graph_traits.hpp @@ -38,39 +38,46 @@ namespace osp { * These structs inherit from `std::true_type` if the specified member type exists in `T`, * otherwise they inherit from `std::false_type`. */ -template +template struct has_vertex_idx_tmember : std::false_type {}; -template + +template struct has_vertex_idx_tmember> : std::true_type {}; -template +template struct has_edge_desc_tmember : std::false_type {}; -template + +template struct has_edge_desc_tmember> : std::true_type {}; -template +template struct has_vertex_work_weight_tmember : std::false_type {}; -template + +template struct has_vertex_work_weight_tmember> : std::true_type {}; -template +template struct has_vertex_comm_weight_tmember : std::false_type {}; -template + +template struct has_vertex_comm_weight_tmember> : std::true_type {}; -template +template struct has_vertex_mem_weight_tmember : std::false_type {}; -template + +template struct has_vertex_mem_weight_tmember> : std::true_type {}; -template +template struct has_vertex_type_tmember : std::false_type {}; -template + +template struct has_vertex_type_tmember> : std::true_type {}; -template +template struct has_edge_comm_weight_tmember : std::false_type {}; -template + +template struct has_edge_comm_weight_tmember> : std::true_type {}; /** @@ -80,7 +87,7 @@ struct has_edge_comm_weight_tmember +template struct directed_graph_traits { static_assert(has_vertex_idx_tmember::value, "graph must have vertex_idx"); using vertex_idx = typename T::vertex_idx; @@ -89,7 +96,7 @@ struct directed_graph_traits { /** * @brief Alias to easily access the vertex index type of a graph. */ -template +template using vertex_idx_t = typename directed_graph_traits::vertex_idx; /** @@ -100,14 +107,17 @@ using vertex_idx_t = typename directed_graph_traits::vertex_idx; * * @tparam Graph_t The graph type. */ -template +template struct directed_edge { vertex_idx_t source; vertex_idx_t target; bool operator==(const directed_edge &other) const { return source == other.source && target == other.target; } + bool operator!=(const directed_edge &other) const { return !(*this == other); } + directed_edge() : source(0), target(0) {} + directed_edge(const directed_edge &other) = default; directed_edge(directed_edge &&other) = default; directed_edge &operator=(const directed_edge &other) = default; @@ -122,23 +132,23 @@ struct directed_edge { * * If the graph defines `directed_edge_descriptor`, it is extracted; otherwise, `directed_edge` is used as a default implementation. */ -template +template struct directed_graph_edge_desc_traits_helper { using directed_edge_descriptor = directed_edge; }; -template +template struct directed_graph_edge_desc_traits_helper { using directed_edge_descriptor = typename T::directed_edge_descriptor; }; -template +template struct directed_graph_edge_desc_traits { using directed_edge_descriptor = typename directed_graph_edge_desc_traits_helper::value>::directed_edge_descriptor; }; -template +template using edge_desc_t = typename directed_graph_edge_desc_traits::directed_edge_descriptor; /** @@ -151,7 +161,7 @@ using edge_desc_t = typename directed_graph_edge_desc_traits::directed_edge_d * * @tparam T The computational DAG type. */ -template +template struct computational_dag_traits { static_assert(has_vertex_work_weight_tmember::value, "cdag must have vertex work weight type"); static_assert(has_vertex_comm_weight_tmember::value, "cdag must have vertex comm weight type"); @@ -162,13 +172,13 @@ struct computational_dag_traits { using vertex_mem_weight_type = typename T::vertex_mem_weight_type; }; -template +template using v_workw_t = typename computational_dag_traits::vertex_work_weight_type; -template +template using v_commw_t = typename computational_dag_traits::vertex_comm_weight_type; -template +template using v_memw_t = typename computational_dag_traits::vertex_mem_weight_type; /** @@ -176,17 +186,17 @@ using v_memw_t = typename computational_dag_traits::vertex_mem_weight_type; * * If the DAG defines `vertex_type_type`, it is extracted; otherwise, `void` is used. */ -template +template struct computational_dag_typed_vertices_traits { using vertex_type_type = void; }; -template +template struct computational_dag_typed_vertices_traits> { using vertex_type_type = typename T::vertex_type_type; }; -template +template using v_type_t = typename computational_dag_typed_vertices_traits::vertex_type_type; /** @@ -194,17 +204,17 @@ using v_type_t = typename computational_dag_typed_vertices_traits::vertex_typ * * If the DAG defines `edge_comm_weight_type`, it is extracted; otherwise, `void` is used. */ -template +template struct computational_dag_edge_desc_traits { using edge_comm_weight_type = void; }; -template +template struct computational_dag_edge_desc_traits> { using edge_comm_weight_type = typename T::edge_comm_weight_type; }; -template +template using e_commw_t = typename computational_dag_edge_desc_traits::edge_comm_weight_type; // ----------------------------------------------------------------------------- @@ -213,57 +223,59 @@ using e_commw_t = typename computational_dag_edge_desc_traits::edge_comm_weig /** * @brief Check if a graph guarantees vertices are stored/iterated in topological order. - * It allows a graph implementation to notify algorithms that vertices are stored/iterated in topological order which can be used to optimize the algorithm. + * It allows a graph implementation to notify algorithms that vertices are stored/iterated in topological order which can be used + * to optimize the algorithm. */ -template +template struct has_vertices_in_top_order_trait : std::false_type {}; -template +template struct has_vertices_in_top_order_trait> : std::bool_constant && T::vertices_in_top_order> {}; -template +template inline constexpr bool has_vertices_in_top_order_v = has_vertices_in_top_order_trait::value; /** * @brief Check if a graph guarantees children of a vertex are stored/iterated in vertex index order. */ -template +template struct has_children_in_vertex_order_trait : std::false_type {}; -template +template struct has_children_in_vertex_order_trait> : std::bool_constant && T::children_in_vertex_order> {}; -template +template inline constexpr bool has_children_in_vertex_order_v = has_children_in_vertex_order_trait::value; /** * @brief Check if a graph guarantees parents of a vertex are stored/iterated in vertex index order. */ -template +template struct has_parents_in_vertex_order_trait : std::false_type {}; -template +template struct has_parents_in_vertex_order_trait> : std::bool_constant && T::parents_in_vertex_order> {}; -template +template inline constexpr bool has_parents_in_vertex_order_v = has_parents_in_vertex_order_trait::value; -} // namespace osp +} // namespace osp /** * @brief Specialization of std::hash for osp::directed_edge. * - * This specialization provides a hash function for osp::directed_edge, which is used in hash-based containers like std::unordered_set and std::unordered_map. + * This specialization provides a hash function for osp::directed_edge, which is used in hash-based containers like + * std::unordered_set and std::unordered_map. */ -template +template struct std::hash> { std::size_t operator()(const osp::directed_edge &p) const noexcept { // Combine hashes of source and target std::size_t h1 = std::hash>{}(p.source); std::size_t h2 = std::hash>{}(p.target); - return h1 ^ (h2 << 1); // Simple hash combining + return h1 ^ (h2 << 1); // Simple hash combining } -}; \ No newline at end of file +}; diff --git a/include/osp/concepts/iterator_concepts.hpp b/include/osp/concepts/iterator_concepts.hpp index 2bf9c4cb..bb827ae6 100644 --- a/include/osp/concepts/iterator_concepts.hpp +++ b/include/osp/concepts/iterator_concepts.hpp @@ -22,6 +22,7 @@ limitations under the License. #include namespace osp { + /** * @file iterator_concepts.hpp * @brief C++17 compatible concept checks (type traits) for iterators and ranges. @@ -42,20 +43,19 @@ namespace osp { * * @tparam T The type to check. */ -template +template struct is_forward_iterator : std::false_type {}; -template -struct is_forward_iterator< - T, std::void_t::difference_type, - typename std::iterator_traits::value_type, - typename std::iterator_traits::pointer, - typename std::iterator_traits::reference, - typename std::iterator_traits::iterator_category>> - : std::conjunction< - std::is_base_of::iterator_category>> {}; - -template +template +struct is_forward_iterator::difference_type, + typename std::iterator_traits::value_type, + typename std::iterator_traits::pointer, + typename std::iterator_traits::reference, + typename std::iterator_traits::iterator_category>> + : std::conjunction::iterator_category>> {}; + +template inline constexpr bool is_forward_iterator_v = is_forward_iterator::value; /** @@ -69,19 +69,16 @@ inline constexpr bool is_forward_iterator_v = is_forward_iterator::value; * @tparam T The range type to check. * @tparam ValueType The expected value type of the range. */ -template +template struct is_forward_range_of : std::false_type {}; -template -struct is_forward_range_of< - T, ValueType, - std::void_t())), - decltype(std::end(std::declval()))>> - : std::conjunction< - is_forward_iterator()))>, - std::is_same()))>::value_type>> {}; +template +struct is_forward_range_of())), decltype(std::end(std::declval()))>> + : std::conjunction()))>, + std::is_same()))>::value_type>> { +}; -template +template inline constexpr bool is_forward_range_of_v = is_forward_range_of::value; /** @@ -95,17 +92,14 @@ inline constexpr bool is_forward_range_of_v = is_forward_range_of: * @tparam T The container type to check. * @tparam ValueType The expected value type of the container. */ -template +template struct is_container_of : std::false_type {}; -template -struct is_container_of< - T, ValueType, - std::void_t()))>> - : std::conjunction< - is_forward_range_of> {}; +template +struct is_container_of()))>> + : std::conjunction> {}; -template +template inline constexpr bool is_container_of_v = is_container_of::value; /** @@ -119,20 +113,19 @@ inline constexpr bool is_container_of_v = is_container_of::value; * * @tparam T The type to check. */ -template +template struct is_input_iterator : std::false_type {}; -template -struct is_input_iterator< - T, std::void_t::difference_type, - typename std::iterator_traits::value_type, - typename std::iterator_traits::pointer, - typename std::iterator_traits::reference, - typename std::iterator_traits::iterator_category>> - : std::conjunction< - std::is_base_of::iterator_category>> {}; - -template +template +struct is_input_iterator::difference_type, + typename std::iterator_traits::value_type, + typename std::iterator_traits::pointer, + typename std::iterator_traits::reference, + typename std::iterator_traits::iterator_category>> + : std::conjunction::iterator_category>> {}; + +template inline constexpr bool is_input_iterator_v = is_input_iterator::value; /** @@ -146,19 +139,16 @@ inline constexpr bool is_input_iterator_v = is_input_iterator::value; * @tparam T The range type to check. * @tparam ValueType The expected value type of the range. */ -template +template struct is_input_range_of : std::false_type {}; -template -struct is_input_range_of< - T, ValueType, - std::void_t())), - decltype(std::end(std::declval()))>> - : std::conjunction< - is_input_iterator()))>, - std::is_same()))>::value_type>> {}; +template +struct is_input_range_of())), decltype(std::end(std::declval()))>> + : std::conjunction()))>, + std::is_same()))>::value_type>> { +}; -template +template inline constexpr bool is_input_range_of_v = is_input_range_of::value; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/concepts/specific_graph_impl.hpp b/include/osp/concepts/specific_graph_impl.hpp index 29c3f75c..810211c3 100644 --- a/include/osp/concepts/specific_graph_impl.hpp +++ b/include/osp/concepts/specific_graph_impl.hpp @@ -34,10 +34,10 @@ namespace osp { * * @tparam T The graph type. */ -template +template struct is_Compact_Sparse_Graph : std::false_type {}; -template +template inline constexpr bool is_Compact_Sparse_Graph_v = is_Compact_Sparse_Graph::value; /** @@ -45,10 +45,10 @@ inline constexpr bool is_Compact_Sparse_Graph_v = is_Compact_Sparse_Graph::va * * @tparam T The graph type. */ -template +template struct is_Compact_Sparse_Graph_reorder : std::false_type {}; -template +template inline constexpr bool is_Compact_Sparse_Graph_reorder_v = is_Compact_Sparse_Graph_reorder::value; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/AbstractWavefrontScheduler.hpp b/include/osp/dag_divider/AbstractWavefrontScheduler.hpp index 69a3c80c..627ee33d 100644 --- a/include/osp/dag_divider/AbstractWavefrontScheduler.hpp +++ b/include/osp/dag_divider/AbstractWavefrontScheduler.hpp @@ -16,15 +16,16 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once +#include +#include +#include +#include + #include "DagDivider.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/graph_algorithms/computational_dag_util.hpp" #include "osp/graph_algorithms/subgraph_algorithms.hpp" #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" -#include -#include -#include -#include namespace osp { @@ -32,7 +33,7 @@ namespace osp { * @class AbstractWavefrontScheduler * @brief Base class for schedulers that operate on wavefronts of a DAG. */ -template +template class AbstractWavefrontScheduler : public Scheduler { protected: IDagDivider *divider; @@ -44,11 +45,9 @@ class AbstractWavefrontScheduler : public Scheduler { * @param allocation A reference to the vector that will be filled with the processor allocation. * @return True if the scarcity case was hit (fewer processors than active components), false otherwise. */ - bool distributeProcessors( - unsigned total_processors_of_type, - const std::vector &work_weights, - std::vector &allocation) const { - + bool distributeProcessors(unsigned total_processors_of_type, + const std::vector &work_weights, + std::vector &allocation) const { allocation.assign(work_weights.size(), 0); double total_work = std::accumulate(work_weights.begin(), work_weights.end(), 0.0); if (total_work <= 1e-9 || total_processors_of_type == 0) { @@ -86,7 +85,7 @@ class AbstractWavefrontScheduler : public Scheduler { for (unsigned i = 0; i < remaining_procs; ++i) { allocation[sorted_work[i].second]++; } - return true; // Scarcity case was hit. + return true; // Scarcity case was hit. } // --- Stage 2: Proportional Distribution of Remaining Processors --- @@ -109,7 +108,7 @@ class AbstractWavefrontScheduler : public Scheduler { for (size_t i = 0; i < active_indices.size(); ++i) { double exact_share = (adjusted_work_weights[i] / adjusted_total_work) * remaining_procs; unsigned additional_alloc = static_cast(std::floor(exact_share)); - allocation[active_indices[i]] += additional_alloc; // Add to the base allocation of 1 + allocation[active_indices[i]] += additional_alloc; // Add to the base allocation of 1 remainders.push_back({exact_share - additional_alloc, active_indices[i]}); allocated_count += additional_alloc; } @@ -124,22 +123,21 @@ class AbstractWavefrontScheduler : public Scheduler { } } } - return false; // Scarcity case was not hit. + return false; // Scarcity case was not hit. } - BspArchitecture createSubArchitecture( - const BspArchitecture &original_arch, - const std::vector &sub_dag_proc_types) const { - + BspArchitecture createSubArchitecture(const BspArchitecture &original_arch, + const std::vector &sub_dag_proc_types) const { // The calculation is now inside the assert, so it only happens in debug builds. - assert(std::accumulate(sub_dag_proc_types.begin(), sub_dag_proc_types.end(), 0u) > 0 && "Attempted to create a sub-architecture with zero processors."); + assert(std::accumulate(sub_dag_proc_types.begin(), sub_dag_proc_types.end(), 0u) > 0 + && "Attempted to create a sub-architecture with zero processors."); BspArchitecture sub_architecture(original_arch); std::vector> sub_dag_processor_memory(original_arch.getProcessorTypeCount().size(), std::numeric_limits>::max()); for (unsigned i = 0; i < original_arch.numberOfProcessors(); ++i) { - sub_dag_processor_memory[original_arch.processorType(i)] = - std::min(original_arch.memoryBound(i), sub_dag_processor_memory[original_arch.processorType(i)]); + sub_dag_processor_memory[original_arch.processorType(i)] + = std::min(original_arch.memoryBound(i), sub_dag_processor_memory[original_arch.processorType(i)]); } sub_architecture.SetProcessorsConsequTypes(sub_dag_proc_types, sub_dag_processor_memory); return sub_architecture; @@ -156,10 +154,10 @@ class AbstractWavefrontScheduler : public Scheduler { } if (sum_of_compatible_works_for_rep > total_rep_work + 1e-9) { - if constexpr (enable_debug_prints) - std::cerr << "ERROR: Sum of compatible work (" << sum_of_compatible_works_for_rep - << ") exceeds total work (" << total_rep_work - << ") for a sub-dag. Aborting." << std::endl; + if constexpr (enable_debug_prints) { + std::cerr << "ERROR: Sum of compatible work (" << sum_of_compatible_works_for_rep << ") exceeds total work (" + << total_rep_work << ") for a sub-dag. Aborting." << std::endl; + } return false; } } @@ -167,8 +165,7 @@ class AbstractWavefrontScheduler : public Scheduler { } public: - AbstractWavefrontScheduler(IDagDivider &div, Scheduler &sched) - : divider(&div), scheduler(&sched) {} + AbstractWavefrontScheduler(IDagDivider &div, Scheduler &sched) : divider(&div), scheduler(&sched) {} }; -} // namespace osp +} // namespace osp diff --git a/include/osp/dag_divider/ConnectedComponentDivider.hpp b/include/osp/dag_divider/ConnectedComponentDivider.hpp index 5059d10a..7edc747f 100644 --- a/include/osp/dag_divider/ConnectedComponentDivider.hpp +++ b/include/osp/dag_divider/ConnectedComponentDivider.hpp @@ -27,13 +27,11 @@ limitations under the License. namespace osp { -template +template class ConnectedComponentDivider : public IDagDivider { - static_assert(is_computational_dag_v, "Graph must be a computational DAG"); static_assert(is_computational_dag_v, "Constr_Graph_t must be a computational DAG"); - static_assert(is_constructable_cdag_v, - "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); + static_assert(is_constructable_cdag_v, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); static_assert(std::is_same_v, vertex_idx_t>, "Graph_t and Constr_Graph_t must have the same vertex_idx types"); @@ -53,14 +51,19 @@ class ConnectedComponentDivider : public IDagDivider { public: inline std::vector &get_sub_dags() { return sub_dags; } + inline const std::vector &get_sub_dags() const { return sub_dags; } + inline const std::vector> &get_vertex_mapping() const { return vertex_mapping; } + inline const std::vector &get_component() const { return component; } + inline const std::vector &get_vertex_map() const { return vertex_map; } virtual std::vector>>> divide(const Graph_t &dag) override { - if (dag.num_vertices() == 0) + if (dag.num_vertices() == 0) { return {}; + } bool has_more_than_one_connected_component = compute_connected_components(dag); @@ -68,8 +71,9 @@ class ConnectedComponentDivider : public IDagDivider { if (has_more_than_one_connected_component) { vertex_maps[0].resize(sub_dags.size()); - for (unsigned i = 0; i < sub_dags.size(); ++i) + for (unsigned i = 0; i < sub_dags.size(); ++i) { vertex_maps[0][i].resize(sub_dags[i].num_vertices()); + } for (const auto &v : dag.vertices()) { vertex_maps[0][component[v]][vertex_map[v]] = v; @@ -93,13 +97,13 @@ class ConnectedComponentDivider : public IDagDivider { return vertex_maps; } - std::vector>>> compute_vertex_maps(const Graph_t &dag) { std::vector>>> vertex_maps(1); vertex_maps[0].resize(sub_dags.size()); - for (unsigned i = 0; i < sub_dags.size(); ++i) + for (unsigned i = 0; i < sub_dags.size(); ++i) { vertex_maps[0][i].resize(sub_dags[i].num_vertices()); + } for (const auto &v : dag.vertices()) { vertex_maps[0][component[v]][vertex_map[v]] = v; @@ -168,8 +172,9 @@ class ConnectedComponentDivider : public IDagDivider { vertex_idx local_idx = current_index_in_subdag[comp_id]++; vertex_map[v] = local_idx; - if (vertex_mapping[comp_id].empty()) + if (vertex_mapping[comp_id].empty()) { vertex_mapping[comp_id].resize(sub_dags[comp_id].num_vertices()); + } vertex_mapping[comp_id][local_idx] = v; } @@ -178,4 +183,4 @@ class ConnectedComponentDivider : public IDagDivider { } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/dag_divider/ConnectedComponentScheduler.hpp b/include/osp/dag_divider/ConnectedComponentScheduler.hpp index a25a1ab6..7d6cdece 100644 --- a/include/osp/dag_divider/ConnectedComponentScheduler.hpp +++ b/include/osp/dag_divider/ConnectedComponentScheduler.hpp @@ -22,12 +22,10 @@ limitations under the License. #include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/graph_algorithms/computational_dag_util.hpp" - namespace osp { -template +template class ConnectedComponentScheduler : public Scheduler { - Scheduler *scheduler; public: @@ -36,7 +34,6 @@ class ConnectedComponentScheduler : public Scheduler { std::string getScheduleName() const override { return "SubDagScheduler"; } RETURN_STATUS computeSchedule(BspSchedule &schedule) override { - const auto &instance = schedule.getInstance(); const Graph_t &dag = instance.getComputationalDag(); @@ -57,8 +54,10 @@ class ConnectedComponentScheduler : public Scheduler { BspInstance sub_instance(sub_dag, instance.getArchitecture()); BspArchitecture &sub_architecture = sub_instance.getArchitecture(); - const double sub_dag_work_weight_percent = static_cast(sub_dag_work_weight) / static_cast(total_work_weight); - const unsigned sub_dag_processors = static_cast(sub_dag_work_weight_percent * sub_architecture.numberOfProcessors()); + const double sub_dag_work_weight_percent + = static_cast(sub_dag_work_weight) / static_cast(total_work_weight); + const unsigned sub_dag_processors + = static_cast(sub_dag_work_weight_percent * sub_architecture.numberOfProcessors()); sub_architecture.setNumberOfProcessors(sub_dag_processors); @@ -81,4 +80,4 @@ class ConnectedComponentScheduler : public Scheduler { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/DagDivider.hpp b/include/osp/dag_divider/DagDivider.hpp index ffa4559e..e89c2b01 100644 --- a/include/osp/dag_divider/DagDivider.hpp +++ b/include/osp/dag_divider/DagDivider.hpp @@ -17,9 +17,10 @@ limitations under the License. */ #pragma once +#include + #include "osp/concepts/directed_graph_concept.hpp" #include "osp/concepts/graph_traits.hpp" -#include namespace osp { @@ -28,10 +29,9 @@ namespace osp { * @brief Divides the wavefronts of a computational DAG into consecutive groups or sections. * */ -template +template class IDagDivider { - - static_assert(is_directed_graph_v, "Graph must be directed"); + static_assert(is_directed_graph_v, "Graph must be directed"); public: virtual ~IDagDivider() = default; @@ -50,4 +50,4 @@ class IDagDivider { virtual std::vector>>> divide(const Graph_t &dag) = 0; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp b/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp index 270b7073..5b61736f 100644 --- a/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp +++ b/include/osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp @@ -25,9 +25,9 @@ namespace osp { * @class IsomorphicWavefrontComponentScheduler * @brief Schedules wavefronts by grouping isomorphic components. */ -template +template class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler { -public: + public: IsomorphicWavefrontComponentScheduler(IDagDivider &div, Scheduler &scheduler) : AbstractWavefrontScheduler(div, scheduler) {} @@ -43,19 +43,24 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< } IsomorphismGroups iso_groups; - std::vector>>> vertex_maps = this->divider->divide(instance.getComputationalDag()); + std::vector>>> vertex_maps + = this->divider->divide(instance.getComputationalDag()); iso_groups.compute_isomorphism_groups(vertex_maps, instance.getComputationalDag()); unsigned superstep_offset = 0; for (std::size_t i = 0; i < vertex_maps.size(); ++i) { - if (this->enable_debug_prints) std::cout << "\n--- Processing Wavefront Set " << i << " ---" << std::endl; - + if (this->enable_debug_prints) { + std::cout << "\n--- Processing Wavefront Set " << i << " ---" << std::endl; + } + unsigned supersteps_in_set = 0; - auto status = process_wavefront_set(schedule, vertex_maps[i], + auto status = process_wavefront_set(schedule, + vertex_maps[i], iso_groups.get_isomorphism_groups()[i], iso_groups.get_isomorphism_groups_subgraphs()[i], global_ids_by_type, - superstep_offset, supersteps_in_set); + superstep_offset, + supersteps_in_set); if (status != RETURN_STATUS::OSP_SUCCESS) { return status; } @@ -64,27 +69,25 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< return RETURN_STATUS::OSP_SUCCESS; } -private: - RETURN_STATUS process_wavefront_set( - BspSchedule& schedule, - const std::vector>>& vertex_map_for_set, - const std::vector>& iso_groups_for_set, - const std::vector& subgraphs_for_set, - const std::vector>& global_ids_by_type, - unsigned superstep_offset, - unsigned& supersteps_in_set) { - + private: + RETURN_STATUS process_wavefront_set(BspSchedule &schedule, + const std::vector>> &vertex_map_for_set, + const std::vector> &iso_groups_for_set, + const std::vector &subgraphs_for_set, + const std::vector> &global_ids_by_type, + unsigned superstep_offset, + unsigned &supersteps_in_set) { const auto &instance = schedule.getInstance(); const auto &original_arch = instance.getArchitecture(); - const auto& original_proc_type_count = original_arch.getProcessorTypeCount(); + const auto &original_proc_type_count = original_arch.getProcessorTypeCount(); if constexpr (this->enable_debug_prints) { std::cout << " Found " << iso_groups_for_set.size() << " isomorphism groups in this wavefront set." << std::endl; } // Calculate work for each isomorphism group - std::vector> group_work_by_type( - iso_groups_for_set.size(), std::vector(original_proc_type_count.size(), 0.0)); + std::vector> group_work_by_type(iso_groups_for_set.size(), + std::vector(original_proc_type_count.size(), 0.0)); for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) { const constr_graph_t &rep_sub_dag = subgraphs_for_set[j]; @@ -97,26 +100,28 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< assert(this->validateWorkDistribution(subgraphs_for_set, instance)); // Distribute processors among isomorphism groups - std::vector> group_proc_allocations(iso_groups_for_set.size(), std::vector(original_proc_type_count.size())); - - for(unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { + std::vector> group_proc_allocations(iso_groups_for_set.size(), + std::vector(original_proc_type_count.size())); + + for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { std::vector work_for_this_type; - for(size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) { + for (size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) { work_for_this_type.push_back(group_work_by_type[group_idx][type_idx]); } - + std::vector type_allocation; - bool starvation_hit = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation); + bool starvation_hit + = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation); if (starvation_hit) { if constexpr (this->enable_debug_prints) { - std::cerr << "ERROR: Processor starvation detected for type " << type_idx + std::cerr << "ERROR: Processor starvation detected for type " << type_idx << ". Not enough processors to assign one to each active isomorphism group." << std::endl; } return RETURN_STATUS::ERROR; } - - for(size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) { + + for (size_t group_idx = 0; group_idx < iso_groups_for_set.size(); ++group_idx) { group_proc_allocations[group_idx][type_idx] = type_allocation[group_idx]; } } @@ -124,15 +129,23 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< // Schedule each group unsigned max_supersteps = 0; std::vector proc_type_offsets(original_arch.getNumberOfProcessorTypes(), 0); - + std::vector num_supersteps_per_iso_group(iso_groups_for_set.size()); for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) { unsigned supersteps_for_group = 0; - auto status = schedule_isomorphism_group(schedule, vertex_map_for_set, iso_groups_for_set[j], subgraphs_for_set[j], - group_proc_allocations[j], global_ids_by_type, proc_type_offsets, - superstep_offset, supersteps_for_group); - if (status != RETURN_STATUS::OSP_SUCCESS) return status; + auto status = schedule_isomorphism_group(schedule, + vertex_map_for_set, + iso_groups_for_set[j], + subgraphs_for_set[j], + group_proc_allocations[j], + global_ids_by_type, + proc_type_offsets, + superstep_offset, + supersteps_for_group); + if (status != RETURN_STATUS::OSP_SUCCESS) { + return status; + } num_supersteps_per_iso_group[j] = supersteps_for_group; max_supersteps = std::max(max_supersteps, supersteps_for_group); @@ -144,34 +157,31 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< for (std::size_t j = 0; j < iso_groups_for_set.size(); ++j) { num_supersteps_per_iso_group[j] = max_supersteps - num_supersteps_per_iso_group[j]; - - if (num_supersteps_per_iso_group[j] > 0) { // This is the padding - const auto& group_members = iso_groups_for_set[j]; - for (const auto& original_comp_idx : group_members) { - const auto& component_vertices = vertex_map_for_set[original_comp_idx]; - for (const auto& vertex : component_vertices) { + + if (num_supersteps_per_iso_group[j] > 0) { // This is the padding + const auto &group_members = iso_groups_for_set[j]; + for (const auto &original_comp_idx : group_members) { + const auto &component_vertices = vertex_map_for_set[original_comp_idx]; + for (const auto &vertex : component_vertices) { schedule.setAssignedSuperstep(vertex, schedule.assignedSuperstep(vertex) + num_supersteps_per_iso_group[j]); } } } } - supersteps_in_set = max_supersteps; return RETURN_STATUS::OSP_SUCCESS; } - RETURN_STATUS schedule_isomorphism_group( - BspSchedule& schedule, - const std::vector>>& vertex_map_for_set, - const std::vector& group_members, - const constr_graph_t& rep_sub_dag, - const std::vector& procs_for_group, - const std::vector>& global_ids_by_type, - const std::vector& proc_type_offsets, - unsigned superstep_offset, - unsigned& supersteps_for_group) { - + RETURN_STATUS schedule_isomorphism_group(BspSchedule &schedule, + const std::vector>> &vertex_map_for_set, + const std::vector &group_members, + const constr_graph_t &rep_sub_dag, + const std::vector &procs_for_group, + const std::vector> &global_ids_by_type, + const std::vector &proc_type_offsets, + unsigned superstep_offset, + unsigned &supersteps_for_group) { const auto &instance = schedule.getInstance(); const auto &original_arch = instance.getArchitecture(); const size_t num_members = group_members.size(); @@ -189,11 +199,14 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< if (scarcity_found) { // --- SCARCITY/INDIVISIBLE CASE: Schedule sequentially on the shared processor block --- - if constexpr (this->enable_debug_prints) std::cout << " Group with " << num_members << " members: Scarcity/Indivisible case. Scheduling sequentially." << std::endl; - + if constexpr (this->enable_debug_prints) { + std::cout << " Group with " << num_members << " members: Scarcity/Indivisible case. Scheduling sequentially." + << std::endl; + } + BspInstance sub_instance(rep_sub_dag, this->createSubArchitecture(original_arch, procs_for_group)); sub_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); - auto & sub_architecture = sub_instance.getArchitecture(); + auto &sub_architecture = sub_instance.getArchitecture(); if constexpr (this->enable_debug_prints) { std::cout << " Sub-architecture for sequential scheduling: { "; @@ -207,7 +220,9 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< for (const auto &group_member_idx : group_members) { BspSchedule sub_schedule(sub_instance); auto status = this->scheduler->computeSchedule(sub_schedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return status; + if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + return status; + } const auto sub_proc_type_count = sub_architecture.getProcessorTypeCount(); std::vector sub_proc_type_corrections(sub_architecture.getNumberOfProcessorTypes(), 0); @@ -215,18 +230,21 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< sub_proc_type_corrections[k] = sub_proc_type_corrections[k - 1] + sub_proc_type_count[k - 1]; } - std::vector> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(), vertex_map_for_set[group_member_idx].end()); + std::vector> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(), + vertex_map_for_set[group_member_idx].end()); std::sort(sorted_component_vertices.begin(), sorted_component_vertices.end()); - + vertex_idx_t subdag_vertex = 0; for (const auto &vertex : sorted_component_vertices) { const unsigned proc_in_sub_sched = sub_schedule.assignedProcessor(subdag_vertex); const unsigned proc_type = sub_architecture.processorType(proc_in_sub_sched); const unsigned local_proc_id_within_type = proc_in_sub_sched - sub_proc_type_corrections[proc_type]; - unsigned global_proc_id = global_ids_by_type[proc_type][proc_type_offsets[proc_type] + local_proc_id_within_type]; - + unsigned global_proc_id + = global_ids_by_type[proc_type][proc_type_offsets[proc_type] + local_proc_id_within_type]; + schedule.setAssignedProcessor(vertex, global_proc_id); - schedule.setAssignedSuperstep(vertex, superstep_offset + sequential_superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex)); + schedule.setAssignedSuperstep( + vertex, superstep_offset + sequential_superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex)); subdag_vertex++; } sequential_superstep_offset += sub_schedule.numberOfSupersteps(); @@ -235,29 +253,37 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< } else { // --- ABUNDANCE/DIVISIBLE CASE: Replicate Schedule --- - if constexpr (this->enable_debug_prints) std::cout << " Group with " << num_members << " members: Abundance/Divisible case. Replicating schedule." << std::endl; + if constexpr (this->enable_debug_prints) { + std::cout << " Group with " << num_members << " members: Abundance/Divisible case. Replicating schedule." + << std::endl; + } std::vector single_sub_dag_proc_types = procs_for_group; if (num_members > 0) { - for(auto& count : single_sub_dag_proc_types) count /= static_cast(num_members); + for (auto &count : single_sub_dag_proc_types) { + count /= static_cast(num_members); + } } - BspInstance sub_instance(rep_sub_dag, this->createSubArchitecture(original_arch, single_sub_dag_proc_types)); + BspInstance sub_instance(rep_sub_dag, + this->createSubArchitecture(original_arch, single_sub_dag_proc_types)); sub_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); - + if constexpr (this->enable_debug_prints) { - const auto& sub_arch = sub_instance.getArchitecture(); + const auto &sub_arch = sub_instance.getArchitecture(); std::cout << " Sub-architecture for replication (per member): { "; for (unsigned type_idx = 0; type_idx < sub_arch.getNumberOfProcessorTypes(); ++type_idx) { std::cout << "Type " << type_idx << ": " << sub_arch.getProcessorTypeCount()[type_idx] << "; "; } std::cout << "}" << std::endl; } - + BspSchedule sub_schedule(sub_instance); auto status = this->scheduler->computeSchedule(sub_schedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return status; - + if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + return status; + } + const auto sub_proc_type_count = sub_schedule.getInstance().getArchitecture().getProcessorTypeCount(); std::vector sub_proc_type_corrections(sub_proc_type_count.size(), 0); for (std::size_t k = 1; k < sub_proc_type_corrections.size(); ++k) { @@ -266,7 +292,8 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< std::vector current_member_proc_offsets = proc_type_offsets; for (const auto &group_member_idx : group_members) { - std::vector> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(), vertex_map_for_set[group_member_idx].end()); + std::vector> sorted_component_vertices(vertex_map_for_set[group_member_idx].begin(), + vertex_map_for_set[group_member_idx].end()); std::sort(sorted_component_vertices.begin(), sorted_component_vertices.end()); vertex_idx_t subdag_vertex = 0; @@ -274,8 +301,9 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< const unsigned proc_in_sub_sched = sub_schedule.assignedProcessor(subdag_vertex); const unsigned proc_type = sub_schedule.getInstance().getArchitecture().processorType(proc_in_sub_sched); const unsigned local_proc_id_within_type = proc_in_sub_sched - sub_proc_type_corrections[proc_type]; - unsigned global_proc_id = global_ids_by_type[proc_type][current_member_proc_offsets[proc_type] + local_proc_id_within_type]; - + unsigned global_proc_id + = global_ids_by_type[proc_type][current_member_proc_offsets[proc_type] + local_proc_id_within_type]; + schedule.setAssignedProcessor(vertex, global_proc_id); schedule.setAssignedSuperstep(vertex, superstep_offset + sub_schedule.assignedSuperstep(subdag_vertex)); subdag_vertex++; @@ -290,7 +318,7 @@ class IsomorphicWavefrontComponentScheduler : public AbstractWavefrontScheduler< } }; -template +template using IsomorphicWavefrontComponentScheduler_def_int_t = IsomorphicWavefrontComponentScheduler; -} +} // namespace osp diff --git a/include/osp/dag_divider/IsomorphismGroups.hpp b/include/osp/dag_divider/IsomorphismGroups.hpp index c8e1db11..46b91c3d 100644 --- a/include/osp/dag_divider/IsomorphismGroups.hpp +++ b/include/osp/dag_divider/IsomorphismGroups.hpp @@ -22,27 +22,24 @@ limitations under the License. #include #include "osp/concepts/graph_traits.hpp" -#include "osp/graph_algorithms/subgraph_algorithms.hpp" #include "osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp" +#include "osp/graph_algorithms/subgraph_algorithms.hpp" namespace osp { -template +template class IsomorphismGroups { - private: std::vector>> isomorphism_groups; std::vector> isomorphism_groups_subgraphs; void print_isomorphism_groups() const { - std::cout << "Isomorphism groups: " << std::endl; for (std::size_t i = 0; i < isomorphism_groups.size(); i++) { std::cout << "Level " << i << std::endl; for (size_t j = 0; j < isomorphism_groups[i].size(); j++) { - std::cout << "Group " << j << " of size " << isomorphism_groups_subgraphs[i][j].num_vertices() - << " : "; + std::cout << "Group " << j << " of size " << isomorphism_groups_subgraphs[i][j].num_vertices() << " : "; // ComputationalDagWriter writer(isomorphism_groups_subgraphs[i][j]); // writer.write_dot("isomorphism_group_" + std::to_string(i) + "_" + std::to_string(j) + ".dot"); @@ -72,13 +69,9 @@ class IsomorphismGroups { * @return const std::vector>>& * A constant reference to the vertex maps. */ - const std::vector>> &get_isomorphism_groups() const { - return isomorphism_groups; - } + const std::vector>> &get_isomorphism_groups() const { return isomorphism_groups; } - std::vector>> &get_isomorphism_groups() { - return isomorphism_groups; - } + std::vector>> &get_isomorphism_groups() { return isomorphism_groups; } /** * @brief Retrieves the isomorphism groups subgraphs. @@ -105,25 +98,19 @@ class IsomorphismGroups { * * Reqires the dag to be divided before calling this function. */ - void compute_isomorphism_groups(std::vector>>> &vertex_maps, - const Graph_t &dag) { - + void compute_isomorphism_groups(std::vector>>> &vertex_maps, const Graph_t &dag) { isomorphism_groups = std::vector>>(vertex_maps.size()); isomorphism_groups_subgraphs = std::vector>(vertex_maps.size()); for (size_t i = 0; i < vertex_maps.size(); i++) { - for (std::size_t j = 0; j < vertex_maps[i].size(); j++) { - Constr_Graph_t current_subgraph; create_induced_subgraph(dag, current_subgraph, vertex_maps[i][j]); bool isomorphism_group_found = false; for (size_t k = 0; k < isomorphism_groups[i].size(); k++) { - if (are_isomorphic_by_merkle_hash(isomorphism_groups_subgraphs[i][k], current_subgraph)) { - isomorphism_groups[i][k].emplace_back(j); isomorphism_group_found = true; break; @@ -131,7 +118,6 @@ class IsomorphismGroups { } if (!isomorphism_group_found) { - isomorphism_groups[i].emplace_back(std::vector{j}); isomorphism_groups_subgraphs[i].emplace_back(std::move(current_subgraph)); } @@ -148,30 +134,27 @@ class IsomorphismGroups { * @param merge_threshold If a group has more members than this, it will be merged. * @param target_group_count The number of larger groups to create from a single large group. */ - void merge_large_isomorphism_groups( - std::vector>>>& vertex_maps, - const Graph_t& dag, - size_t merge_threshold, - size_t target_group_count = 8) { - + void merge_large_isomorphism_groups(std::vector>>> &vertex_maps, + const Graph_t &dag, + size_t merge_threshold, + size_t target_group_count = 8) { // Ensure the merge logic is sound: the threshold must be larger than the target. assert(merge_threshold > target_group_count); for (size_t i = 0; i < isomorphism_groups.size(); ++i) { - std::vector>> new_vertex_maps_for_level; std::vector> new_iso_groups_for_level; std::vector new_iso_subgraphs_for_level; - + size_t new_component_idx = 0; for (size_t j = 0; j < isomorphism_groups[i].size(); ++j) { - const auto& group = isomorphism_groups[i][j]; - + const auto &group = isomorphism_groups[i][j]; + if (group.size() <= merge_threshold) { // This group is small enough, copy it over as is. std::vector new_group; - for (const auto& original_comp_idx : group) { + for (const auto &original_comp_idx : group) { new_vertex_maps_for_level.push_back(vertex_maps[i][original_comp_idx]); new_group.push_back(new_component_idx++); } @@ -179,11 +162,12 @@ class IsomorphismGroups { new_iso_subgraphs_for_level.push_back(isomorphism_groups_subgraphs[i][j]); } else { // This group is too large and needs to be merged. - std::cout << "Merging iso group of size " << group.size() << " into " << target_group_count << " new groups." << std::endl; - + std::cout << "Merging iso group of size " << group.size() << " into " << target_group_count << " new groups." + << std::endl; + size_t base_mult = group.size() / target_group_count; size_t remainder = group.size() % target_group_count; - + std::vector new_merged_group_indices; size_t current_original_idx = 0; @@ -192,7 +176,7 @@ class IsomorphismGroups { size_t num_to_merge = base_mult + (k < remainder ? 1 : 0); for (size_t m = 0; m < num_to_merge; ++m) { - const auto& original_comp = vertex_maps[i][group[current_original_idx++]]; + const auto &original_comp = vertex_maps[i][group[current_original_idx++]]; merged_component.insert(merged_component.end(), original_comp.begin(), original_comp.end()); } std::sort(merged_component.begin(), merged_component.end()); @@ -215,4 +199,4 @@ class IsomorphismGroups { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/WavefrontComponentScheduler.hpp b/include/osp/dag_divider/WavefrontComponentScheduler.hpp index 8c7e2845..a38d801f 100644 --- a/include/osp/dag_divider/WavefrontComponentScheduler.hpp +++ b/include/osp/dag_divider/WavefrontComponentScheduler.hpp @@ -24,9 +24,9 @@ namespace osp { * @class WavefrontComponentScheduler * @brief Schedules wavefronts by treating each component individually. */ -template +template class WavefrontComponentScheduler : public AbstractWavefrontScheduler { -public: + public: WavefrontComponentScheduler(IDagDivider &div, Scheduler &scheduler_) : AbstractWavefrontScheduler(div, scheduler_) {} @@ -35,8 +35,8 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler &schedule) override { const auto &instance = schedule.getInstance(); const auto &original_arch = instance.getArchitecture(); - const auto& original_proc_type_count = original_arch.getProcessorTypeCount(); - const auto& computational_dag = instance.getComputationalDag(); + const auto &original_proc_type_count = original_arch.getProcessorTypeCount(); + const auto &computational_dag = instance.getComputationalDag(); std::vector> global_ids_by_type(original_arch.getNumberOfProcessorTypes()); for (unsigned i = 0; i < original_arch.numberOfProcessors(); ++i) { @@ -46,14 +46,17 @@ class WavefrontComponentScheduler : public AbstractWavefrontSchedulerdivider->divide(computational_dag); unsigned superstep_offset = 0; - for (std::size_t i = 0; i < vertex_maps.size(); ++i) { // For each wavefront set - if (this->enable_debug_prints) std::cout << "\n--- Processing Wavefront Set " << i << " (No Isomorphism) ---" << std::endl; - - const auto& components = vertex_maps[i]; + for (std::size_t i = 0; i < vertex_maps.size(); ++i) { // For each wavefront set + if (this->enable_debug_prints) { + std::cout << "\n--- Processing Wavefront Set " << i << " (No Isomorphism) ---" << std::endl; + } + + const auto &components = vertex_maps[i]; std::vector sub_dags(components.size()); - std::vector> work_by_type(components.size(), std::vector(original_proc_type_count.size(), 0.0)); + std::vector> work_by_type(components.size(), + std::vector(original_proc_type_count.size(), 0.0)); - for(size_t j = 0; j < components.size(); ++j) { + for (size_t j = 0; j < components.size(); ++j) { create_induced_subgraph(computational_dag, sub_dags[j], components[j]); for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { work_by_type[j][type_idx] = sumOfCompatibleWorkWeights(sub_dags[j], instance, type_idx); @@ -63,25 +66,27 @@ class WavefrontComponentScheduler : public AbstractWavefrontSchedulervalidateWorkDistribution(sub_dags, instance)); // Distribute Processors - std::vector> proc_allocations(components.size(), std::vector(original_proc_type_count.size())); - for(unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { + std::vector> proc_allocations(components.size(), + std::vector(original_proc_type_count.size())); + for (unsigned type_idx = 0; type_idx < original_proc_type_count.size(); ++type_idx) { std::vector work_for_this_type(components.size()); - for(size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) { + for (size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) { work_for_this_type[comp_idx] = work_by_type[comp_idx][type_idx]; } - + std::vector type_allocation; - bool starvation_hit = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation); + bool starvation_hit + = this->distributeProcessors(original_proc_type_count[type_idx], work_for_this_type, type_allocation); if (starvation_hit) { if constexpr (this->enable_debug_prints) { - std::cerr << "ERROR: Processor starvation detected for type " << type_idx - << " in wavefront set " << i << ". Not enough processors to assign one to each active component." << std::endl; + std::cerr << "ERROR: Processor starvation detected for type " << type_idx << " in wavefront set " << i + << ". Not enough processors to assign one to each active component." << std::endl; } return RETURN_STATUS::ERROR; } - for(size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) { + for (size_t comp_idx = 0; comp_idx < components.size(); ++comp_idx) { proc_allocations[comp_idx][type_idx] = type_allocation[comp_idx]; } } @@ -104,7 +109,9 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler sub_schedule(sub_instance); const auto status = this->scheduler->computeSchedule(sub_schedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) return status; + if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { + return status; + } const auto sub_proc_type_count = sub_architecture.getProcessorTypeCount(); std::vector sub_proc_type_corrections(sub_architecture.getNumberOfProcessorTypes(), 0); @@ -120,8 +127,9 @@ class WavefrontComponentScheduler : public AbstractWavefrontScheduler +template using WavefrontComponentScheduler_def_int_t = WavefrontComponentScheduler; -} \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp index 30b878c1..d7a461b0 100644 --- a/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp @@ -18,33 +18,35 @@ limitations under the License. #pragma once - -#include "osp/graph_algorithms/directed_graph_util.hpp" -#include "osp/bsp/model/BspInstance.hpp" -#include -#include -#include -#include #include #include +#include #include +#include #include +#include +#include + +#include "osp/bsp/model/BspInstance.hpp" +#include "osp/graph_algorithms/directed_graph_util.hpp" namespace osp { struct SubgraphSchedule { double makespan; - std::vector> node_assigned_worker_per_type; + std::vector> node_assigned_worker_per_type; std::vector was_trimmed; }; -template +template class EftSubgraphScheduler { -public: - + public: EftSubgraphScheduler() = default; - SubgraphSchedule run(const BspInstance& instance, const std::vector& multiplicities, const std::vector>> & required_proc_types, const std::vector& max_num_procs) { + SubgraphSchedule run(const BspInstance &instance, + const std::vector &multiplicities, + const std::vector>> &required_proc_types, + const std::vector &max_num_procs) { prepare_for_scheduling(instance, multiplicities, required_proc_types, max_num_procs); return execute_schedule(instance); } @@ -53,21 +55,14 @@ class EftSubgraphScheduler { min_work_per_processor_ = min_work_per_processor; } -private: - + private: static constexpr bool verbose = false; using job_id_t = vertex_idx_t; v_workw_t min_work_per_processor_ = 2000; - enum class JobStatus { - WAITING, - READY, - RUNNING, - COMPLETED - }; - + enum class JobStatus { WAITING, READY, RUNNING, COMPLETED }; struct Job { job_id_t id; @@ -78,7 +73,7 @@ class EftSubgraphScheduler { unsigned max_num_procs = 1; job_id_t in_degree_current = 0; - + JobStatus status = JobStatus::WAITING; v_workw_t upward_rank = 0.0; @@ -86,35 +81,39 @@ class EftSubgraphScheduler { std::vector assigned_workers; double start_time = -1.0; double finish_time = -1.0; - }; // Custom comparator for storing Job pointers in the ready set, sorted by rank. struct JobPtrCompare { - bool operator()(const Job* lhs, const Job* rhs) const { + bool operator()(const Job *lhs, const Job *rhs) const { if (lhs->upward_rank != rhs->upward_rank) { return lhs->upward_rank > rhs->upward_rank; } - return lhs->id > rhs->id; // Tie-breaking + return lhs->id > rhs->id; // Tie-breaking } }; std::vector jobs_; - std::set ready_jobs_; + std::set ready_jobs_; - void prepare_for_scheduling(const BspInstance& instance, const std::vector& multiplicities, const std::vector>> & required_proc_types, const std::vector& max_num_procs) { + void prepare_for_scheduling(const BspInstance &instance, + const std::vector &multiplicities, + const std::vector>> &required_proc_types, + const std::vector &max_num_procs) { jobs_.resize(instance.numberOfVertices()); if constexpr (verbose) { std::cout << "--- Preparing for Subgraph Scheduling ---" << std::endl; } - const auto & graph = instance.getComputationalDag(); + const auto &graph = instance.getComputationalDag(); const size_t num_worker_types = instance.getArchitecture().getProcessorTypeCount().size(); calculate_upward_ranks(graph); - if constexpr (verbose) std::cout << "Initializing jobs..." << std::endl; + if constexpr (verbose) { + std::cout << "Initializing jobs..." << std::endl; + } job_id_t idx = 0; - for (auto& job : jobs_) { + for (auto &job : jobs_) { job.id = idx; job.in_degree_current = graph.in_degree(idx); if (job.in_degree_current == 0) { @@ -124,68 +123,74 @@ class EftSubgraphScheduler { job.status = JobStatus::WAITING; } job.total_work = graph.vertex_work_weight(idx); - job.max_num_procs = std::min(max_num_procs[idx], static_cast((job.total_work + min_work_per_processor_ - 1) / min_work_per_processor_)); + job.max_num_procs + = std::min(max_num_procs[idx], + static_cast((job.total_work + min_work_per_processor_ - 1) / min_work_per_processor_)); job.multiplicity = std::min(multiplicities[idx], job.max_num_procs); - job.required_proc_types = required_proc_types[idx]; + job.required_proc_types = required_proc_types[idx]; job.assigned_workers.resize(num_worker_types, 0); job.start_time = -1.0; job.finish_time = -1.0; if constexpr (verbose) { - std::cout << " - Job " << idx << ": rank=" << job.upward_rank << ", mult=" << job.multiplicity << ", max_procs=" << job.max_num_procs - << ", work=" << job.total_work << ", status=" << (job.status == JobStatus::READY ? "READY" : "WAITING") << std::endl; + std::cout << " - Job " << idx << ": rank=" << job.upward_rank << ", mult=" << job.multiplicity + << ", max_procs=" << job.max_num_procs << ", work=" << job.total_work + << ", status=" << (job.status == JobStatus::READY ? "READY" : "WAITING") << std::endl; } idx++; - } + } } - void calculate_upward_ranks(const Graph_t & graph) { + void calculate_upward_ranks(const Graph_t &graph) { const auto reverse_top_order = GetTopOrderReverse(graph); - for (const auto& vertex : reverse_top_order) { + for (const auto &vertex : reverse_top_order) { v_workw_t max_successor_rank = 0.0; - for (const auto& child : graph.children(vertex)) { + for (const auto &child : graph.children(vertex)) { max_successor_rank = std::max(max_successor_rank, jobs_.at(child).upward_rank); } - - Job& job = jobs_.at(vertex); + + Job &job = jobs_.at(vertex); job.upward_rank = graph.vertex_work_weight(vertex) + max_successor_rank; } } - SubgraphSchedule execute_schedule(const BspInstance& instance) { - double current_time = 0.0; + SubgraphSchedule execute_schedule(const BspInstance &instance) { + double current_time = 0.0; std::vector available_workers = instance.getArchitecture().getProcessorTypeCount(); const size_t num_worker_types = available_workers.size(); std::vector running_jobs; unsigned completed_count = 0; - const auto& graph = instance.getComputationalDag(); + const auto &graph = instance.getComputationalDag(); if constexpr (verbose) { std::cout << "\n--- Subgraph Scheduling Execution Started ---" << std::endl; std::cout << "Total jobs: " << jobs_.size() << std::endl; std::cout << "Initial available workers: "; - for(size_t i=0; i jobs_to_start; + std::vector jobs_to_start; v_workw_t total_runnable_priority = 0.0; // Iterate through ready jobs and assign minimum resources if available. - for (const Job* job_ptr : ready_jobs_) { - Job& job = jobs_[job_ptr->id]; + for (const Job *job_ptr : ready_jobs_) { + Job &job = jobs_[job_ptr->id]; bool can_start = true; for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { if (job.required_proc_types[type_idx] > 0 && available_workers[type_idx] < job.multiplicity) { @@ -211,21 +216,31 @@ class EftSubgraphScheduler { std::cout << "Allocating workers to " << jobs_to_start.size() << " runnable jobs..." << std::endl; } - //Distribute remaining workers proportionally among the jobs that just started. + // Distribute remaining workers proportionally among the jobs that just started. const std::vector remaining_workers_pool = available_workers; - for (Job* job_ptr : jobs_to_start) { - Job& job = *job_ptr; + for (Job *job_ptr : jobs_to_start) { + Job &job = *job_ptr; for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { if (job.required_proc_types[type_idx] > 0) { - const unsigned current_total_assigned = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u); - const unsigned max_additional_workers = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0; - - const double proportion = (total_runnable_priority > 0) ? (static_cast(job.upward_rank) / static_cast(total_runnable_priority)) : (1.0 / static_cast(jobs_to_start.size())); - const unsigned proportional_share = static_cast(static_cast(remaining_workers_pool[type_idx]) * proportion); - const unsigned num_proportional_chunks = (job.multiplicity > 0) ? proportional_share / job.multiplicity : 0; - const unsigned num_available_chunks = (job.multiplicity > 0) ? available_workers[type_idx] / job.multiplicity : 0; - const unsigned num_chunks_allowed_by_max = (job.multiplicity > 0) ? max_additional_workers / job.multiplicity : 0; - const unsigned num_chunks_to_assign = std::min({num_proportional_chunks, num_available_chunks, num_chunks_allowed_by_max}); + const unsigned current_total_assigned + = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u); + const unsigned max_additional_workers + = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0; + + const double proportion + = (total_runnable_priority > 0) + ? (static_cast(job.upward_rank) / static_cast(total_runnable_priority)) + : (1.0 / static_cast(jobs_to_start.size())); + const unsigned proportional_share + = static_cast(static_cast(remaining_workers_pool[type_idx]) * proportion); + const unsigned num_proportional_chunks + = (job.multiplicity > 0) ? proportional_share / job.multiplicity : 0; + const unsigned num_available_chunks + = (job.multiplicity > 0) ? available_workers[type_idx] / job.multiplicity : 0; + const unsigned num_chunks_allowed_by_max + = (job.multiplicity > 0) ? max_additional_workers / job.multiplicity : 0; + const unsigned num_chunks_to_assign + = std::min({num_proportional_chunks, num_available_chunks, num_chunks_allowed_by_max}); const unsigned assigned = num_chunks_to_assign * job.multiplicity; job.assigned_workers[type_idx] += assigned; available_workers[type_idx] -= assigned; @@ -233,13 +248,15 @@ class EftSubgraphScheduler { } } - //Greedily assign any remaining workers to the highest-rank jobs that can take them. - for (Job* job_ptr : jobs_to_start) { - Job& job = *job_ptr; + // Greedily assign any remaining workers to the highest-rank jobs that can take them. + for (Job *job_ptr : jobs_to_start) { + Job &job = *job_ptr; for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { if (job.required_proc_types[type_idx] > 0 && job.multiplicity > 0) { - const unsigned current_total_assigned = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u); - const unsigned max_additional_workers = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0; + const unsigned current_total_assigned + = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u); + const unsigned max_additional_workers + = (job.max_num_procs > current_total_assigned) ? (job.max_num_procs - current_total_assigned) : 0; const unsigned num_available_chunks = available_workers[type_idx] / job.multiplicity; const unsigned num_chunks_allowed_by_max = max_additional_workers / job.multiplicity; const unsigned assigned = std::min(num_available_chunks, num_chunks_allowed_by_max) * job.multiplicity; @@ -249,15 +266,17 @@ class EftSubgraphScheduler { } } - for (Job* job_ptr : jobs_to_start) { - Job& job = *job_ptr; + for (Job *job_ptr : jobs_to_start) { + Job &job = *job_ptr; job.status = JobStatus::RUNNING; job.start_time = current_time; // Calculate finish time based on total work and total assigned workers. unsigned total_assigned_workers = std::accumulate(job.assigned_workers.begin(), job.assigned_workers.end(), 0u); - double exec_time = (total_assigned_workers > 0) ? static_cast(job.total_work) / static_cast(total_assigned_workers) : 0.0; + double exec_time = (total_assigned_workers > 0) + ? static_cast(job.total_work) / static_cast(total_assigned_workers) + : 0.0; job.finish_time = current_time + exec_time; running_jobs.push_back(job.id); @@ -267,14 +286,14 @@ class EftSubgraphScheduler { // 2. ADVANCE TIME if (running_jobs.empty() && completed_count < jobs_.size()) { - std::cerr << "Error: Deadlock detected. No running jobs and " - << jobs_.size() - completed_count << " jobs incomplete." << std::endl; + std::cerr << "Error: Deadlock detected. No running jobs and " << jobs_.size() - completed_count + << " jobs incomplete." << std::endl; if constexpr (verbose) { std::cout << "Deadlock! Ready queue:" << std::endl; - for (const auto* ready_job_ptr : ready_jobs_) { - const Job& job = *ready_job_ptr; + for (const auto *ready_job_ptr : ready_jobs_) { + const Job &job = *ready_job_ptr; std::cout << " - Job " << job.id << " (mult " << job.multiplicity << ") needs workers: "; - for(size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { + for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { if (job.required_proc_types[type_idx] > 0) { std::cout << "T" << type_idx << ":" << job.multiplicity << " "; } @@ -282,47 +301,59 @@ class EftSubgraphScheduler { std::cout << std::endl; } std::cout << "Available workers: "; - for(size_t i=0; i::max(); for (job_id_t id : running_jobs) { next_event_time = std::min(next_event_time, jobs_.at(id).finish_time); } - if constexpr (verbose) std::cout << "Advancing time from " << current_time << " to " << next_event_time << std::endl; + if constexpr (verbose) { + std::cout << "Advancing time from " << current_time << " to " << next_event_time << std::endl; + } current_time = next_event_time; // 3. PROCESS COMPLETED JOBS auto it = running_jobs.begin(); while (it != running_jobs.end()) { - Job& job = jobs_.at(*it); + Job &job = jobs_.at(*it); if (job.finish_time <= current_time) { job.status = JobStatus::COMPLETED; - if constexpr (verbose) std::cout << "Job " << job.id << " finished at T=" << current_time << std::endl; + if constexpr (verbose) { + std::cout << "Job " << job.id << " finished at T=" << current_time << std::endl; + } // Release workers - for(size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { + for (size_t type_idx = 0; type_idx < num_worker_types; ++type_idx) { available_workers[type_idx] += job.assigned_workers[type_idx]; } completed_count++; // Update successors - if constexpr (verbose) std::cout << " - Updating successors..." << std::endl; - for (const auto& successor_id : graph.children(job.id)) { - Job& successor_job = jobs_.at(successor_id); + if constexpr (verbose) { + std::cout << " - Updating successors..." << std::endl; + } + for (const auto &successor_id : graph.children(job.id)) { + Job &successor_job = jobs_.at(successor_id); successor_job.in_degree_current--; if (successor_job.in_degree_current == 0) { successor_job.status = JobStatus::READY; ready_jobs_.insert(&successor_job); - if constexpr (verbose) std::cout << " - Successor " << successor_job.id << " is now READY." << std::endl; + if constexpr (verbose) { + std::cout << " - Successor " << successor_job.id << " is now READY." << std::endl; + } } } - it = running_jobs.erase(it); // Remove from running list + it = running_jobs.erase(it); // Remove from running list } else { ++it; } @@ -333,10 +364,12 @@ class EftSubgraphScheduler { std::cout << "\n--- Subgraph Scheduling Finished ---" << std::endl; std::cout << "Final Makespan: " << current_time << std::endl; std::cout << "Job Summary:" << std::endl; - for(const auto& job : jobs_) { - std::cout << " - Job " << job.id << ": Multiplicity=" << job.multiplicity << ", Max Procs=" << job.max_num_procs << ", Work=" << job.total_work << ", Start=" << job.start_time << ", Finish=" << job.finish_time << ", Workers=["; - for(size_t i=0; i #include +#include namespace osp { /** -* @class HashComputer -* @brief Abstract base class for computing and managing hash values and orbits for graph vertices. -* -* This class provides an interface for obtaining hash values for individual vertices, -* the full list of vertex hashes, the number of unique orbits, and the vertices belonging to specific orbits. -* -* @tparam index_type The type used for indexing vertices in the graph. -*/ -template + * @class HashComputer + * @brief Abstract base class for computing and managing hash values and orbits for graph vertices. + * + * This class provides an interface for obtaining hash values for individual vertices, + * the full list of vertex hashes, the number of unique orbits, and the vertices belonging to specific orbits. + * + * @tparam index_type The type used for indexing vertices in the graph. + */ +template class HashComputer { -public: + public: virtual ~HashComputer() = default; virtual std::size_t get_vertex_hash(const index_type &v) const = 0; virtual const std::vector &get_vertex_hashes() const = 0; virtual std::size_t num_orbits() const = 0; - + virtual const std::vector &get_orbit(const index_type &v) const = 0; virtual const std::unordered_map> &get_orbits() const = 0; - virtual const std::vector& get_orbit_from_hash(const std::size_t& hash) const = 0; + virtual const std::vector &get_orbit_from_hash(const std::size_t &hash) const = 0; }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp index 83556089..507fa12e 100644 --- a/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp @@ -16,6 +16,11 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ +#include +#include +#include +#include + #include "EftSubgraphScheduler.hpp" #include "HashComputer.hpp" #include "MerkleHashComputer.hpp" @@ -24,10 +29,6 @@ limitations under the License. #include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/graph_algorithms/subgraph_algorithms.hpp" -#include -#include -#include -#include namespace osp { @@ -49,12 +50,11 @@ namespace osp { * @tparam Graph_t The type of the input computational DAG. * @tparam Constr_Graph_t The type of the constructable computational DAG used for internal representations. */ -template +template class IsomorphicSubgraphScheduler { static_assert(is_computational_dag_v, "Graph must be a computational DAG"); static_assert(is_computational_dag_v, "Constr_Graph_t must be a computational DAG"); - static_assert(is_constructable_cdag_v, - "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); + static_assert(is_constructable_cdag_v, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); static_assert(std::is_same_v, vertex_idx_t>, "Graph_t and Constr_Graph_t must have the same vertex_idx types"); @@ -85,19 +85,34 @@ class IsomorphicSubgraphScheduler { virtual ~IsomorphicSubgraphScheduler() {} void setMergeDifferentTypes(bool flag) { merge_different_node_types = flag; } + void setWorkThreshold(v_workw_t work_threshold) { work_threshold_ = work_threshold; } - void setCriticalPathThreshold(v_workw_t critical_path_threshold) { critical_path_threshold_ = critical_path_threshold; } + + void setCriticalPathThreshold(v_workw_t critical_path_threshold) { + critical_path_threshold_ = critical_path_threshold; + } + void setOrbitLockRatio(double orbit_lock_ratio) { orbit_lock_ratio_ = orbit_lock_ratio; } - void setNaturalBreaksCountPercentage(double natural_breaks_count_percentage) { natural_breaks_count_percentage_ = natural_breaks_count_percentage; } + + void setNaturalBreaksCountPercentage(double natural_breaks_count_percentage) { + natural_breaks_count_percentage_ = natural_breaks_count_percentage; + } + void setAllowTrimmedScheduler(bool flag) { allow_use_trimmed_scheduler = flag; } + void set_plot_dot_graphs(bool plot) { plot_dot_graphs_ = plot; } + void disable_use_max_group_size() { use_max_group_size_ = false; } + void setUseMaxBsp(bool flag) { use_max_bsp = flag; } + void enable_use_max_group_size(const unsigned max_group_size) { use_max_group_size_ = true; max_group_size_ = max_group_size; } + void setEnableAdaptiveSymmetryThreshold() { use_adaptive_symmetry_threshold = true; } + void setUseStaticSymmetryLevel(size_t static_symmetry_level) { use_adaptive_symmetry_threshold = false; symmetry_ = static_symmetry_level; @@ -116,7 +131,8 @@ class IsomorphicSubgraphScheduler { std::unique_ptr>> local_hasher; if (!hash_computer_) { - local_hasher = std::make_unique, true>>(instance.getComputationalDag(), instance.getComputationalDag()); + local_hasher = std::make_unique, true>>( + instance.getComputationalDag(), instance.getComputationalDag()); hash_computer_ = local_hasher.get(); } @@ -125,13 +141,14 @@ class IsomorphicSubgraphScheduler { auto isomorphic_groups = orbit_processor.get_final_groups(); std::vector was_trimmed(isomorphic_groups.size(), false); - trim_subgraph_groups(isomorphic_groups, instance, was_trimmed); // Apply trimming and record which groups were affected + trim_subgraph_groups(isomorphic_groups, instance, was_trimmed); // Apply trimming and record which groups were affected auto input = prepare_subgraph_scheduling_input(instance, isomorphic_groups, was_trimmed); EftSubgraphScheduler etf_scheduler; - SubgraphSchedule subgraph_schedule = etf_scheduler.run(input.instance, input.multiplicities, input.required_proc_types, input.max_num_processors); - subgraph_schedule.was_trimmed = std::move(was_trimmed); // Pass through trimming info + SubgraphSchedule subgraph_schedule + = etf_scheduler.run(input.instance, input.multiplicities, input.required_proc_types, input.max_num_processors); + subgraph_schedule.was_trimmed = std::move(was_trimmed); // Pass through trimming info std::vector> partition(instance.numberOfVertices(), 0); schedule_isomorphic_group(instance, isomorphic_groups, subgraph_schedule, partition); @@ -144,8 +161,10 @@ class IsomorphicSubgraphScheduler { std::string timestamp = ss.str() + "_"; DotFileWriter writer; - writer.write_colored_graph(timestamp + "isomorphic_groups.dot", instance.getComputationalDag(), orbit_processor.get_final_contraction_map()); - writer.write_colored_graph(timestamp + "orbits_colored.dot", instance.getComputationalDag(), orbit_processor.get_contraction_map()); + writer.write_colored_graph( + timestamp + "isomorphic_groups.dot", instance.getComputationalDag(), orbit_processor.get_final_contraction_map()); + writer.write_colored_graph( + timestamp + "orbits_colored.dot", instance.getComputationalDag(), orbit_processor.get_contraction_map()); writer.write_graph(timestamp + "iso_groups_contracted.dot", input.instance.getComputationalDag()); writer.write_colored_graph(timestamp + "graph_partition.dot", instance.getComputationalDag(), partition); Constr_Graph_t corase_graph; @@ -156,7 +175,7 @@ class IsomorphicSubgraphScheduler { } protected: - template + template struct subgraph_scheduler_input { BspInstance instance; std::vector multiplicities; @@ -173,14 +192,16 @@ class IsomorphicSubgraphScheduler { for (size_t group_idx = 0; group_idx < isomorphic_groups.size(); ++group_idx) { auto &group = isomorphic_groups[group_idx]; const unsigned group_size = static_cast(group.size()); - if (group_size <= 1) + if (group_size <= 1) { continue; + } unsigned effective_min_proc_type_count = 0; if (use_max_group_size_) { if constexpr (verbose) { - std::cout << "Group " << group_idx << " (size " << group_size << "): Using fixed max_group_size_ = " << max_group_size_ << " for trimming." << std::endl; + std::cout << "Group " << group_idx << " (size " << group_size + << "): Using fixed max_group_size_ = " << max_group_size_ << " for trimming." << std::endl; } effective_min_proc_type_count = max_group_size_; } else { @@ -219,14 +240,16 @@ class IsomorphicSubgraphScheduler { } if (found_compatible_processor) { if constexpr (verbose) { - std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" << common_node_type - << "). Min compatible processors: " << min_compatible_processors << "." << std::endl; + std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" + << common_node_type << "). Min compatible processors: " << min_compatible_processors << "." + << std::endl; } effective_min_proc_type_count = min_compatible_processors; } else { if constexpr (verbose) { - std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" << common_node_type - << ") but no compatible processors found. Disabling trimming." << std::endl; + std::cout << "Group " << group_idx << " (size " << group_size << "): Single node type (" + << common_node_type << ") but no compatible processors found. Disabling trimming." + << std::endl; } // If no compatible processors found for this type, effectively disable trimming for this group. effective_min_proc_type_count = 1; @@ -239,7 +262,9 @@ class IsomorphicSubgraphScheduler { } effective_min_proc_type_count = *std::min_element(type_count.begin(), type_count.end()); if constexpr (verbose) { - std::cout << "Group " << group_idx << " (size " << group_size << "): Multi-type or untyped group. Using default min_proc_type_count: " << effective_min_proc_type_count << "." << std::endl; + std::cout << "Group " << group_idx << " (size " << group_size + << "): Multi-type or untyped group. Using default min_proc_type_count: " + << effective_min_proc_type_count << "." << std::endl; } } } @@ -258,12 +283,14 @@ class IsomorphicSubgraphScheduler { if (gcd < group_size) { if constexpr (verbose) { - std::cout << " -> Trimming group " << group_idx << ". GCD(" << group_size << ", " << effective_min_proc_type_count - << ") = " << gcd << ". Merging " << group_size / gcd << " subgraphs at a time." << std::endl; + std::cout << " -> Trimming group " << group_idx << ". GCD(" << group_size << ", " + << effective_min_proc_type_count << ") = " << gcd << ". Merging " << group_size / gcd + << " subgraphs at a time." << std::endl; } - if (allow_use_trimmed_scheduler) + if (allow_use_trimmed_scheduler) { gcd = 1; + } was_trimmed[group_idx] = true; const unsigned merge_size = group_size / gcd; @@ -282,7 +309,8 @@ class IsomorphicSubgraphScheduler { for (unsigned k = 0; k < merge_size; ++k) { const auto &sg_to_merge_vertices = group.subgraphs[original_sg_cursor]; original_sg_cursor++; - merged_sg_vertices.insert(merged_sg_vertices.end(), sg_to_merge_vertices.begin(), sg_to_merge_vertices.end()); + merged_sg_vertices.insert( + merged_sg_vertices.end(), sg_to_merge_vertices.begin(), sg_to_merge_vertices.end()); } new_subgraphs.push_back(std::move(merged_sg_vertices)); } @@ -300,7 +328,6 @@ class IsomorphicSubgraphScheduler { const BspInstance &original_instance, const std::vector::Group> &isomorphic_groups, const std::vector &was_trimmed) { - subgraph_scheduler_input result; result.instance.getArchitecture() = original_instance.getArchitecture(); const unsigned num_proc_types = original_instance.getArchitecture().getNumberOfProcessorTypes(); @@ -312,9 +339,10 @@ class IsomorphicSubgraphScheduler { size_t coarse_node_idx = 0; for (const auto &group : isomorphic_groups) { - result.max_num_processors[coarse_node_idx] = static_cast(group.size() * group.subgraphs[0].size()); - result.multiplicities[coarse_node_idx] = (was_trimmed[coarse_node_idx] && allow_use_trimmed_scheduler) ? 1 : static_cast(group.subgraphs.size()); + result.multiplicities[coarse_node_idx] = (was_trimmed[coarse_node_idx] && allow_use_trimmed_scheduler) + ? 1 + : static_cast(group.subgraphs.size()); result.required_proc_types[coarse_node_idx].assign(num_proc_types, 0); for (const auto &subgraph : group.subgraphs) { @@ -332,8 +360,8 @@ class IsomorphicSubgraphScheduler { ++coarse_node_idx; } - coarser_util::construct_coarse_dag(original_instance.getComputationalDag(), result.instance.getComputationalDag(), - contraction_map); + coarser_util::construct_coarse_dag( + original_instance.getComputationalDag(), result.instance.getComputationalDag(), contraction_map); if constexpr (verbose) { std::cout << "\n--- Preparing Subgraph Scheduling Input ---\n"; @@ -342,7 +370,8 @@ class IsomorphicSubgraphScheduler { std::cout << " - Coarse Node " << j << " (from " << isomorphic_groups[j].subgraphs.size() << " isomorphic subgraphs):\n"; std::cout << " - Multiplicity for scheduling: " << result.multiplicities[j] << "\n"; - std::cout << " - Total Work (in coarse graph): " << result.instance.getComputationalDag().vertex_work_weight(j) << "\n"; + std::cout << " - Total Work (in coarse graph): " << result.instance.getComputationalDag().vertex_work_weight(j) + << "\n"; std::cout << " - Required Processor Types: "; for (unsigned k = 0; k < num_proc_types; ++k) { std::cout << result.required_proc_types[j][k] << " "; @@ -371,13 +400,15 @@ class IsomorphicSubgraphScheduler { std::sort(rep_subgraph_vertices_sorted.begin(), rep_subgraph_vertices_sorted.end()); BspInstance representative_instance; - auto rep_global_to_local_map = create_induced_subgraph_map(instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted); + auto rep_global_to_local_map = create_induced_subgraph_map( + instance.getComputationalDag(), representative_instance.getComputationalDag(), rep_subgraph_vertices_sorted); representative_instance.getArchitecture() = instance.getArchitecture(); const auto &procs_for_group = sub_sched.node_assigned_worker_per_type[group_idx]; std::vector> mem_weights(procs_for_group.size(), 0); for (unsigned proc_type = 0; proc_type < procs_for_group.size(); ++proc_type) { - mem_weights[proc_type] = static_cast>(instance.getArchitecture().maxMemoryBoundProcType(proc_type)); + mem_weights[proc_type] + = static_cast>(instance.getArchitecture().maxMemoryBoundProcType(proc_type)); } representative_instance.getArchitecture().SetProcessorsConsequTypes(procs_for_group, mem_weights); representative_instance.setNodeProcessorCompatibility(instance.getProcessorCompatibilityMatrix()); @@ -395,13 +426,16 @@ class IsomorphicSubgraphScheduler { Scheduler *scheduler_for_group_ptr; std::unique_ptr> trimmed_scheduler_owner; if (use_trimmed_scheduler) { - if constexpr (verbose) + if constexpr (verbose) { std::cout << "Using TrimmedGroupScheduler for group " << group_idx << std::endl; - trimmed_scheduler_owner = std::make_unique>(*bsp_scheduler_, min_non_zero_procs); + } + trimmed_scheduler_owner + = std::make_unique>(*bsp_scheduler_, min_non_zero_procs); scheduler_for_group_ptr = trimmed_scheduler_owner.get(); } else { - if constexpr (verbose) + if constexpr (verbose) { std::cout << "Using standard BSP scheduler for group " << group_idx << std::endl; + } scheduler_for_group_ptr = bsp_scheduler_; } @@ -434,7 +468,8 @@ class IsomorphicSubgraphScheduler { std::cout << "T" << type_idx << ":" << type_counts[type_idx] << " "; } std::cout << std::endl; - std::cout << " Sync cost: " << sub_arch.synchronisationCosts() << ", Comm cost: " << sub_arch.communicationCosts() << std::endl; + std::cout << " Sync cost: " << sub_arch.synchronisationCosts() + << ", Comm cost: " << sub_arch.communicationCosts() << std::endl; } scheduler_for_group_ptr->computeSchedule(bsp_schedule); @@ -470,7 +505,8 @@ class IsomorphicSubgraphScheduler { writer.write_colored_graph(timestamp + "iso_group_rep_" + std::to_string(group_idx) + ".dot", rep_dag, colors); } - const bool max_bsp = use_max_bsp && (representative_instance.getComputationalDag().num_edges() == 0) && (representative_instance.getComputationalDag().vertex_type(0) == 0); + const bool max_bsp = use_max_bsp && (representative_instance.getComputationalDag().num_edges() == 0) + && (representative_instance.getComputationalDag().vertex_type(0) == 0); // Build data structures for applying the pattern --- // Map (superstep, processor) -> relative partition ID @@ -479,8 +515,9 @@ class IsomorphicSubgraphScheduler { for (vertex_idx_t j = 0; j < static_cast>(rep_subgraph_vertices_sorted.size()); ++j) { auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(j), bsp_schedule.assignedProcessor(j)); - if (max_bsp) + if (max_bsp) { sp_pair = std::make_pair(j, 0); + } if (sp_proc_to_relative_partition.find(sp_pair) == sp_proc_to_relative_partition.end()) { sp_proc_to_relative_partition[sp_pair] = num_partitions_per_subgraph++; @@ -498,11 +535,12 @@ class IsomorphicSubgraphScheduler { // Map from a vertex in the current subgraph to its corresponding local index (0, 1, ...) in the representative's schedule std::unordered_map, vertex_idx_t> current_vertex_to_rep_local_idx; - if (i == 0) { // The first subgraph is the representative itself + if (i == 0) { // The first subgraph is the representative itself current_vertex_to_rep_local_idx = std::move(rep_global_to_local_map); - } else { // For other subgraphs, build the isomorphic mapping + } else { // For other subgraphs, build the isomorphic mapping Constr_Graph_t current_subgraph_graph; - create_induced_subgraph(instance.getComputationalDag(), current_subgraph_graph, current_subgraph_vertices_sorted); + create_induced_subgraph( + instance.getComputationalDag(), current_subgraph_graph, current_subgraph_vertices_sorted); MerkleHashComputer current_hasher(current_subgraph_graph); @@ -510,7 +548,8 @@ class IsomorphicSubgraphScheduler { const auto ¤t_orbit_nodes = current_hasher.get_orbit_from_hash(hash); for (size_t k = 0; k < rep_orbit_nodes.size(); ++k) { // Map: current_subgraph_vertex -> representative_subgraph_local_idx - current_vertex_to_rep_local_idx[current_subgraph_vertices_sorted[current_orbit_nodes[k]]] = static_cast>(rep_orbit_nodes[k]); + current_vertex_to_rep_local_idx[current_subgraph_vertices_sorted[current_orbit_nodes[k]]] + = static_cast>(rep_orbit_nodes[k]); } } } @@ -518,10 +557,12 @@ class IsomorphicSubgraphScheduler { // Apply the partition pattern for (const auto ¤t_vertex : current_subgraph_vertices_sorted) { const auto rep_local_idx = current_vertex_to_rep_local_idx.at(current_vertex); - auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(rep_local_idx), bsp_schedule.assignedProcessor(rep_local_idx)); + auto sp_pair = std::make_pair(bsp_schedule.assignedSuperstep(rep_local_idx), + bsp_schedule.assignedProcessor(rep_local_idx)); - if (max_bsp) + if (max_bsp) { sp_pair = std::make_pair(rep_local_idx, 0); + } partition[current_vertex] = current_partition_idx + sp_proc_to_relative_partition.at(sp_pair); } @@ -531,4 +572,4 @@ class IsomorphicSubgraphScheduler { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp b/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp index 07a9d241..9b7c6856 100644 --- a/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp +++ b/include/osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp @@ -17,10 +17,10 @@ limitations under the License. #pragma once #include +#include #include #include #include -#include #include #include "MerkleHashComputer.hpp" @@ -40,11 +40,10 @@ namespace osp { */ template class IsomorphismMapper { + using VertexC = vertex_idx_t; // Local vertex ID + using VertexG = vertex_idx_t; // Global vertex ID - using VertexC = vertex_idx_t; // Local vertex ID - using VertexG = vertex_idx_t; // Global vertex ID - - const Constr_Graph_t& rep_graph; + const Constr_Graph_t &rep_graph; const MerkleHashComputer rep_hasher; public: @@ -52,9 +51,8 @@ class IsomorphismMapper { * @brief Constructs an IsomorphismMapper. * @param representative_graph The subgraph to use as the "pattern". */ - IsomorphismMapper(const Constr_Graph_t& representative_graph) - : rep_graph(representative_graph), rep_hasher(representative_graph), - num_vertices(representative_graph.num_vertices()) {} + IsomorphismMapper(const Constr_Graph_t &representative_graph) + : rep_graph(representative_graph), rep_hasher(representative_graph), num_vertices(representative_graph.num_vertices()) {} virtual ~IsomorphismMapper() = default; @@ -66,7 +64,7 @@ class IsomorphismMapper { * @param current_graph The new isomorphic subgraph. * @return A map from `current_local_vertex_id` -> `representative_local_vertex_id`. */ - std::unordered_map find_mapping(const Constr_Graph_t& current_graph) const { + std::unordered_map find_mapping(const Constr_Graph_t ¤t_graph) const { if (current_graph.num_vertices() != num_vertices) { throw std::runtime_error("IsomorphismMapper: Graph sizes do not match."); } @@ -76,14 +74,14 @@ class IsomorphismMapper { // 1. Compute hashes and orbits for the current graph. MerkleHashComputer current_hasher(current_graph); - const auto& rep_orbits = rep_hasher.get_orbits(); - const auto& current_orbits = current_hasher.get_orbits(); + const auto &rep_orbits = rep_hasher.get_orbits(); + const auto ¤t_orbits = current_hasher.get_orbits(); // 2. Verify that the orbit structures are identical. if (rep_orbits.size() != current_orbits.size()) { throw std::runtime_error("IsomorphismMapper: Graphs have a different number of orbits."); } - for (const auto& [hash, rep_orbit_nodes] : rep_orbits) { + for (const auto &[hash, rep_orbit_nodes] : rep_orbits) { auto it = current_orbits.find(hash); if (it == current_orbits.end() || it->second.size() != rep_orbit_nodes.size()) { throw std::runtime_error("IsomorphismMapper: Mismatched orbit structure between graphs."); @@ -108,12 +106,14 @@ class IsomorphismMapper { } } - if (rep_seed == std::numeric_limits::max()) break; // Should be unreachable if mapped_count < num_vertices + if (rep_seed == std::numeric_limits::max()) { + break; // Should be unreachable if mapped_count < num_vertices + } // Find a corresponding unmapped vertex in the current graph's orbit. - const auto& candidates = current_orbits.at(rep_hasher.get_vertex_hash(rep_seed)); - VertexC current_seed = std::numeric_limits::max(); // Should always be found - for (const auto& candidate : candidates) { + const auto &candidates = current_orbits.at(rep_hasher.get_vertex_hash(rep_seed)); + VertexC current_seed = std::numeric_limits::max(); // Should always be found + for (const auto &candidate : candidates) { if (!current_is_mapped[candidate]) { current_seed = candidate; break; @@ -135,8 +135,26 @@ class IsomorphismMapper { q.pop(); // Match neighbors (both parents and children) - match_neighbors(current_graph, current_hasher, u_rep, u_curr, map_current_to_rep, rep_is_mapped, current_is_mapped, mapped_count, q, true); - match_neighbors(current_graph, current_hasher, u_rep, u_curr, map_current_to_rep, rep_is_mapped, current_is_mapped, mapped_count, q, false); + match_neighbors(current_graph, + current_hasher, + u_rep, + u_curr, + map_current_to_rep, + rep_is_mapped, + current_is_mapped, + mapped_count, + q, + true); + match_neighbors(current_graph, + current_hasher, + u_rep, + u_curr, + map_current_to_rep, + rep_is_mapped, + current_is_mapped, + mapped_count, + q, + false); } } @@ -147,26 +165,37 @@ class IsomorphismMapper { // 4. Return the inverted map. std::unordered_map current_local_to_rep_local; current_local_to_rep_local.reserve(num_vertices); - for (VertexC i = 0; i < num_vertices; ++i) current_local_to_rep_local[map_current_to_rep[i]] = i; + for (VertexC i = 0; i < num_vertices; ++i) { + current_local_to_rep_local[map_current_to_rep[i]] = i; + } return current_local_to_rep_local; } -private: + private: const size_t num_vertices; - void match_neighbors(const Constr_Graph_t& current_graph, const MerkleHashComputer& current_hasher, - VertexC u_rep, VertexC u_curr, std::vector& map_current_to_rep, - std::vector& rep_is_mapped, std::vector& current_is_mapped, - size_t& mapped_count, std::queue>& q, bool match_children) const { - - const auto& rep_neighbors_range = match_children ? rep_graph.children(u_rep) : rep_graph.parents(u_rep); - const auto& curr_neighbors_range = match_children ? current_graph.children(u_curr) : current_graph.parents(u_curr); - - for (const auto& v_rep : rep_neighbors_range) { - if (rep_is_mapped[v_rep]) continue; + void match_neighbors(const Constr_Graph_t ¤t_graph, + const MerkleHashComputer ¤t_hasher, + VertexC u_rep, + VertexC u_curr, + std::vector &map_current_to_rep, + std::vector &rep_is_mapped, + std::vector ¤t_is_mapped, + size_t &mapped_count, + std::queue> &q, + bool match_children) const { + const auto &rep_neighbors_range = match_children ? rep_graph.children(u_rep) : rep_graph.parents(u_rep); + const auto &curr_neighbors_range = match_children ? current_graph.children(u_curr) : current_graph.parents(u_curr); + + for (const auto &v_rep : rep_neighbors_range) { + if (rep_is_mapped[v_rep]) { + continue; + } - for (const auto& v_curr : curr_neighbors_range) { - if (current_is_mapped[v_curr]) continue; + for (const auto &v_curr : curr_neighbors_range) { + if (current_is_mapped[v_curr]) { + continue; + } if (rep_hasher.get_vertex_hash(v_rep) == current_hasher.get_vertex_hash(v_curr)) { map_current_to_rep[v_rep] = v_curr; @@ -174,11 +203,11 @@ class IsomorphismMapper { current_is_mapped[v_curr] = true; mapped_count++; q.push({v_rep, v_curr}); - break; // Found a match for v_rep, move to the next rep neighbor. + break; // Found a match for v_rep, move to the next rep neighbor. } } } } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp index 61a85e9e..f7bc7106 100644 --- a/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp +++ b/include/osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp @@ -18,20 +18,20 @@ limitations under the License. #pragma once -#include -#include #include -#include +#include +#include +#include + +#include "osp/auxiliary/hash_util.hpp" #include "osp/concepts/computational_dag_concept.hpp" +#include "osp/dag_divider/isomorphism_divider/HashComputer.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" -#include "osp/auxiliary/hash_util.hpp" -#include "osp/dag_divider/isomorphism_divider/HashComputer.hpp" - namespace osp { -/** +/** * @brief Computes Merkle hashes for graph vertices to identify isomorphic orbits. * * The Merkle hash of a vertex is computed recursively based on its own properties @@ -44,91 +44,90 @@ namespace osp { * @tparam forward If true, hashes are computed based on parents (top-down). * If false, hashes are computed based on children (bottom-up). */ -template>, bool forward = true> +template >, bool forward = true> class MerkleHashComputer : public HashComputer> { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - static_assert(std::is_invocable_r>::value, "node_hash_func_t must be invocable with one vertex_idx_t argument and return std::size_t."); + static_assert(std::is_invocable_r>::value, + "node_hash_func_t must be invocable with one vertex_idx_t argument and return std::size_t."); - using VertexType = vertex_idx_t; + using VertexType = vertex_idx_t; std::vector vertex_hashes; std::unordered_map> orbits; node_hash_func_t node_hash_func; - inline void compute_hashes_helper(const VertexType &v, std::vector & parent_child_hashes) { + inline void compute_hashes_helper(const VertexType &v, std::vector &parent_child_hashes) { + std::sort(parent_child_hashes.begin(), parent_child_hashes.end()); - std::sort(parent_child_hashes.begin(),parent_child_hashes.end()); + std::size_t hash = node_hash_func(v); + for (const auto &pc_hash : parent_child_hashes) { + hash_combine(hash, pc_hash); + } - std::size_t hash = node_hash_func(v); - for (const auto& pc_hash : parent_child_hashes) { - hash_combine(hash, pc_hash); - } - - vertex_hashes[v] = hash; + vertex_hashes[v] = hash; - if (orbits.find(hash) == orbits.end()) { - orbits[hash] = {v}; - } else { - orbits[hash].push_back(v); - } + if (orbits.find(hash) == orbits.end()) { + orbits[hash] = {v}; + } else { + orbits[hash].push_back(v); + } } - template - std::enable_if_t compute_hashes(const Graph_t & graph) { - + template + std::enable_if_t compute_hashes(const Graph_t &graph) { vertex_hashes.resize(graph.num_vertices()); - + for (const VertexType &v : top_sort_view(graph)) { std::vector parent_hashes; - for (const VertexType& parent : graph.parents(v)) { + for (const VertexType &parent : graph.parents(v)) { parent_hashes.push_back(vertex_hashes[parent]); } compute_hashes_helper(v, parent_hashes); } } - template - std::enable_if_t compute_hashes(const Graph_t & graph) { - + template + std::enable_if_t compute_hashes(const Graph_t &graph) { vertex_hashes.resize(graph.num_vertices()); - + const auto top_sort = GetTopOrderReverse(graph); for (auto it = top_sort.cbegin(); it != top_sort.cend(); ++it) { const VertexType &v = *it; std::vector child_hashes; - for (const VertexType& child : graph.children(v)) { + for (const VertexType &child : graph.children(v)) { child_hashes.push_back(vertex_hashes[child]); } - compute_hashes_helper(v, child_hashes); - } + compute_hashes_helper(v, child_hashes); + } } - public: - - template - MerkleHashComputer(const Graph_t &graph_, Args &&...args) : HashComputer(), node_hash_func(std::forward(args)...) { - compute_hashes(graph_); + public: + template + MerkleHashComputer(const Graph_t &graph_, Args &&...args) + : HashComputer(), node_hash_func(std::forward(args)...) { + compute_hashes(graph_); } virtual ~MerkleHashComputer() override = default; inline std::size_t get_vertex_hash(const VertexType &v) const override { return vertex_hashes[v]; } + inline const std::vector &get_vertex_hashes() const override { return vertex_hashes; } + inline std::size_t num_orbits() const override { return orbits.size(); } - - inline const std::vector &get_orbit(const VertexType &v) const override { return this->get_orbit_from_hash(this->get_vertex_hash(v)); } - inline const std::unordered_map> &get_orbits() const override { return orbits; } - inline const std::vector& get_orbit_from_hash(const std::size_t& hash) const override { - return orbits.at(hash); + inline const std::vector &get_orbit(const VertexType &v) const override { + return this->get_orbit_from_hash(this->get_vertex_hash(v)); } -}; + inline const std::unordered_map> &get_orbits() const override { return orbits; } + + inline const std::vector &get_orbit_from_hash(const std::size_t &hash) const override { return orbits.at(hash); } +}; -template>, bool Forward = true> -bool are_isomorphic_by_merkle_hash(const Graph_t& g1, const Graph_t& g2) { +template >, bool Forward = true> +bool are_isomorphic_by_merkle_hash(const Graph_t &g1, const Graph_t &g2) { // Basic check: Different numbers of vertices or edges mean they can't be isomorphic. if (g1.num_vertices() != g2.num_vertices() || g1.num_edges() != g2.num_edges()) { return false; @@ -137,48 +136,44 @@ bool are_isomorphic_by_merkle_hash(const Graph_t& g1, const Graph_t& g2) { // --- Compute Hashes in the Specified Direction --- MerkleHashComputer hash1(g1); MerkleHashComputer hash2(g2); - - const auto& orbits1 = hash1.get_orbits(); - const auto& orbits2 = hash2.get_orbits(); + + const auto &orbits1 = hash1.get_orbits(); + const auto &orbits2 = hash2.get_orbits(); if (orbits1.size() != orbits2.size()) { return false; } - for (const auto& pair : orbits1) { + for (const auto &pair : orbits1) { const std::size_t hash = pair.first; - const auto& orbit_vec = pair.second; + const auto &orbit_vec = pair.second; auto it = orbits2.find(hash); if (it == orbits2.end() || it->second.size() != orbit_vec.size()) { return false; } } - + return true; } - -template +template struct bwd_merkle_node_hash_func { MerkleHashComputer>, false> bw_merkle_hash; - - bwd_merkle_node_hash_func(const Graph_t & graph) : bw_merkle_hash(graph) { } - std::size_t operator()(const vertex_idx_t & v) const { - return bw_merkle_hash.get_vertex_hash(v); - } + bwd_merkle_node_hash_func(const Graph_t &graph) : bw_merkle_hash(graph) {} + + std::size_t operator()(const vertex_idx_t &v) const { return bw_merkle_hash.get_vertex_hash(v); } }; -template +template struct precom_bwd_merkle_node_hash_func { MerkleHashComputer>, false> bw_merkle_hash; - - precom_bwd_merkle_node_hash_func(const Graph_t & graph, const std::vector& node_hashes) : bw_merkle_hash(graph, node_hashes) { } - std::size_t operator()(const vertex_idx_t & v) const { - return bw_merkle_hash.get_vertex_hash(v); - } + precom_bwd_merkle_node_hash_func(const Graph_t &graph, const std::vector &node_hashes) + : bw_merkle_hash(graph, node_hashes) {} + + std::size_t operator()(const vertex_idx_t &v) const { return bw_merkle_hash.get_vertex_hash(v); } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp b/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp index ddb99122..03bff72d 100644 --- a/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp +++ b/include/osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp @@ -18,6 +18,10 @@ limitations under the License. #pragma once +#include +#include +#include +#include #include #include "osp/coarser/coarser_util.hpp" @@ -27,10 +31,6 @@ limitations under the License. #include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/graph_algorithms/subgraph_algorithms.hpp" #include "osp/graph_algorithms/transitive_reduction.hpp" -#include -#include -#include -#include namespace osp { @@ -42,10 +42,9 @@ namespace osp { * It then partitions the DAG by grouping all nodes with the same hash into an "orbit". * A coarse graph is constructed where each node represents one such orbit. */ -template +template class OrbitGraphProcessor { public: - /** * @brief Heuristics for selecting which symmetry levels to test during coarsening. */ @@ -66,8 +65,7 @@ class OrbitGraphProcessor { static_assert(is_computational_dag_v, "Graph must be a computational DAG"); static_assert(is_computational_dag_v, "Constr_Graph_t must be a computational DAG"); - static_assert(is_constructable_cdag_v, - "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); + static_assert(is_constructable_cdag_v, "Constr_Graph_t must satisfy the constructable_cdag_vertex concept"); static_assert(std::is_same_v, vertex_idx_t>, "Graph_t and Constr_Graph_t must have the same vertex_idx types"); @@ -79,7 +77,7 @@ class OrbitGraphProcessor { struct Group { // Each vector of vertices represents one of the isomorphic subgraphs in this group. std::vector> subgraphs; - + inline size_t size() const { return subgraphs.size(); } }; @@ -92,9 +90,9 @@ class OrbitGraphProcessor { Constr_Graph_t final_coarse_graph_; std::vector final_contraction_map_; std::vector final_groups_; - size_t current_symmetry; + size_t current_symmetry; - size_t min_symmetry_ = 2; // min symmetry threshold + size_t min_symmetry_ = 2; // min symmetry threshold v_workw_t work_threshold_ = 0; v_workw_t critical_path_threshold_ = 0; bool merge_different_node_types_ = true; @@ -102,12 +100,12 @@ class OrbitGraphProcessor { SymmetryLevelHeuristic symmetry_level_heuristic_ = SymmetryLevelHeuristic::NATURAL_BREAKS; std::vector work_percentiles_ = {0.50, 0.75}; - double natural_breaks_count_percentage_ = 0.2; + double natural_breaks_count_percentage_ = 0.2; bool use_adaptive_symmetry_threshold_ = true; struct PairHasher { - template + template std::size_t operator()(const std::pair &p) const { auto h1 = std::hash{}(p.first); auto h2 = std::hash{}(p.second); @@ -121,8 +119,9 @@ class OrbitGraphProcessor { /** * @brief Simulates the merge of node v into u and returns the resulting temporary graph. */ - std::pair> - simulate_merge(VertexType u, VertexType v, const Constr_Graph_t ¤t_coarse_graph) const { + std::pair> simulate_merge(VertexType u, + VertexType v, + const Constr_Graph_t ¤t_coarse_graph) const { std::vector temp_contraction_map(current_coarse_graph.num_vertices()); VertexType new_idx = 0; for (VertexType i = 0; i < static_cast(temp_contraction_map.size()); ++i) { @@ -141,11 +140,14 @@ class OrbitGraphProcessor { /** * @brief Commits a merge operation by updating the graph state. */ - void commit_merge(VertexType u, VertexType v, Constr_Graph_t &&next_coarse_graph, + void commit_merge(VertexType u, + VertexType v, + Constr_Graph_t &&next_coarse_graph, const std::vector &group_remap, - std::vector> &&new_subgraphs, Constr_Graph_t ¤t_coarse_graph, - std::vector ¤t_groups, std::vector ¤t_contraction_map) { - + std::vector> &&new_subgraphs, + Constr_Graph_t ¤t_coarse_graph, + std::vector ¤t_groups, + std::vector ¤t_contraction_map) { current_coarse_graph = std::move(next_coarse_graph); // Update caches for new vertex indices @@ -158,7 +160,7 @@ class OrbitGraphProcessor { if (old_u != v && old_v != v && new_u != new_v) { next_non_viable_edges.insert({new_u, new_v}); - } + } } non_viable_edges_cache_ = std::move(next_non_viable_edges); @@ -172,7 +174,6 @@ class OrbitGraphProcessor { if (old_u != v && old_v != v && new_u != new_v) { next_non_viable_crit_path_edges.insert({new_u, new_v}); } - } non_viable_crit_path_edges_cache_ = std::move(next_non_viable_crit_path_edges); @@ -195,30 +196,28 @@ class OrbitGraphProcessor { /** * @brief Merges small orbits based on work threshold (final cleanup pass). */ - void merge_small_orbits(const Graph_t &original_dag, - Constr_Graph_t& current_coarse_graph, - std::vector& current_groups, - std::vector& current_contraction_map, - const v_workw_t work_threshold, - const v_workw_t path_threshold = 0) { - + void merge_small_orbits(const Graph_t &original_dag, + Constr_Graph_t ¤t_coarse_graph, + std::vector ¤t_groups, + std::vector ¤t_contraction_map, + const v_workw_t work_threshold, + const v_workw_t path_threshold = 0) { bool changed = true; while (changed) { - const std::vector> vertexPoset = - get_top_node_distance>(current_coarse_graph); - const std::vector> vertexBotPoset = - get_bottom_node_distance>(current_coarse_graph); + const std::vector> vertexPoset + = get_top_node_distance>(current_coarse_graph); + const std::vector> vertexBotPoset + = get_bottom_node_distance>(current_coarse_graph); changed = false; for (const auto u : current_coarse_graph.vertices()) { - for (const auto v : current_coarse_graph.children(u)) { - + for (const auto v : current_coarse_graph.children(u)) { if constexpr (has_typed_vertices_v) { if (not merge_different_node_types_) { if (current_coarse_graph.vertex_type(u) != current_coarse_graph.vertex_type(v)) { if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " not viable (different node types)\n"; - } + } continue; } } @@ -233,8 +232,10 @@ class OrbitGraphProcessor { const v_workw_t u_work_weight = current_coarse_graph.vertex_work_weight(u); const v_workw_t v_work_weight = current_coarse_graph.vertex_work_weight(v); - const v_workw_t v_threshold = work_threshold * static_cast>(current_groups[v].size()); - const v_workw_t u_threshold = work_threshold * static_cast>(current_groups[u].size()); + const v_workw_t v_threshold + = work_threshold * static_cast>(current_groups[v].size()); + const v_workw_t u_threshold + = work_threshold * static_cast>(current_groups[u].size()); if (u_work_weight > u_threshold && v_work_weight > v_threshold) { if constexpr (verbose) { @@ -246,15 +247,15 @@ class OrbitGraphProcessor { if ((vertexPoset[u] + 1 != vertexPoset[v]) && (vertexBotPoset[u] != 1 + vertexBotPoset[v])) { if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v - << " not viable poset. poste v: " << vertexBotPoset[v] - << " poste u: " << vertexBotPoset[u] << "\n"; + << " not viable poset. poste v: " << vertexBotPoset[v] << " poste u: " << vertexBotPoset[u] + << "\n"; } continue; } std::vector> new_subgraphs; const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs); - + if (!merge_is_valid) { if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " and " << v @@ -266,10 +267,14 @@ class OrbitGraphProcessor { auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph); - if (critical_path_weight(temp_coarse_graph) > (path_threshold * static_cast>(new_subgraphs.size()) + critical_path_weight(current_coarse_graph))) { + if (critical_path_weight(temp_coarse_graph) + > (path_threshold * static_cast>(new_subgraphs.size()) + + critical_path_weight(current_coarse_graph))) { if constexpr (verbose) { - std::cout << " - Merge of " << u << " and " << v << " increases critical path. Old cirtical path: " << critical_path_weight(current_coarse_graph) - << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + " << path_threshold * static_cast>(new_subgraphs.size()) << "\n"; + std::cout << " - Merge of " << u << " and " << v << " increases critical path. Old cirtical path: " + << critical_path_weight(current_coarse_graph) + << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + " + << path_threshold * static_cast>(new_subgraphs.size()) << "\n"; } non_viable_crit_path_edges_cache_.insert({u, v}); continue; @@ -280,8 +285,14 @@ class OrbitGraphProcessor { << temp_coarse_graph.num_vertices() << " nodes.\n"; } - commit_merge(u, v, std::move(temp_coarse_graph), temp_contraction_map, std::move(new_subgraphs), - current_coarse_graph, current_groups, current_contraction_map); + commit_merge(u, + v, + std::move(temp_coarse_graph), + temp_contraction_map, + std::move(new_subgraphs), + current_coarse_graph, + current_groups, + current_contraction_map); changed = true; break; @@ -296,14 +307,19 @@ class OrbitGraphProcessor { /** * @brief Deprecated non-adaptive merge function. */ - void contract_edges(const Graph_t &original_dag, Constr_Graph_t& current_coarse_graph, std::vector& current_groups, std::vector& current_contraction_map, const bool merge_symmetry_narrowing, const bool merge_different_node_types, const v_workw_t path_threshold = 0) { - + void contract_edges(const Graph_t &original_dag, + Constr_Graph_t ¤t_coarse_graph, + std::vector ¤t_groups, + std::vector ¤t_contraction_map, + const bool merge_symmetry_narrowing, + const bool merge_different_node_types, + const v_workw_t path_threshold = 0) { bool changed = true; while (changed) { - const std::vector> vertexPoset = - get_top_node_distance>(current_coarse_graph); - const std::vector> vertexBotPoset = - get_bottom_node_distance>(current_coarse_graph); + const std::vector> vertexPoset + = get_top_node_distance>(current_coarse_graph); + const std::vector> vertexBotPoset + = get_bottom_node_distance>(current_coarse_graph); changed = false; for (const auto &edge : edges(current_coarse_graph)) { @@ -329,8 +345,8 @@ class OrbitGraphProcessor { const std::size_t v_size = current_groups[v].size(); const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs); const std::size_t new_size = new_subgraphs.size(); - - const bool merge_viable = (new_size >= current_symmetry); + + const bool merge_viable = (new_size >= current_symmetry); const bool both_below_symmetry_threshold = (u_size < current_symmetry) && (v_size < current_symmetry); if (!merge_is_valid) { @@ -349,38 +365,44 @@ class OrbitGraphProcessor { auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph); - if (critical_path_weight(temp_coarse_graph) > (path_threshold * static_cast>(new_subgraphs.size()) + critical_path_weight(current_coarse_graph))) { + if (critical_path_weight(temp_coarse_graph) + > (path_threshold * static_cast>(new_subgraphs.size()) + + critical_path_weight(current_coarse_graph))) { non_viable_crit_path_edges_cache_.insert({u, v}); continue; } - commit_merge(u, v, std::move(temp_coarse_graph), temp_contraction_map, std::move(new_subgraphs), - current_coarse_graph, current_groups, current_contraction_map); + commit_merge(u, + v, + std::move(temp_coarse_graph), + temp_contraction_map, + std::move(new_subgraphs), + current_coarse_graph, + current_groups, + current_contraction_map); changed = true; break; } } } - /** * @brief Core adaptive merging function. */ - void contract_edges_adpative_sym(const Graph_t &original_dag, - Constr_Graph_t& current_coarse_graph, - std::vector& current_groups, - std::vector& current_contraction_map, - const bool merge_different_node_types, - const bool merge_below_threshold, - const std::vector>& lock_threshold_per_type, - const v_workw_t path_threshold = 0) { - + void contract_edges_adpative_sym(const Graph_t &original_dag, + Constr_Graph_t ¤t_coarse_graph, + std::vector ¤t_groups, + std::vector ¤t_contraction_map, + const bool merge_different_node_types, + const bool merge_below_threshold, + const std::vector> &lock_threshold_per_type, + const v_workw_t path_threshold = 0) { bool changed = true; while (changed) { - const std::vector> vertexPoset = - get_top_node_distance>(current_coarse_graph); - const std::vector> vertexBotPoset = - get_bottom_node_distance>(current_coarse_graph); + const std::vector> vertexPoset + = get_top_node_distance>(current_coarse_graph); + const std::vector> vertexBotPoset + = get_bottom_node_distance>(current_coarse_graph); changed = false; for (const auto &edge : edges(current_coarse_graph)) { @@ -399,7 +421,7 @@ class OrbitGraphProcessor { if (current_coarse_graph.vertex_type(u) != current_coarse_graph.vertex_type(v)) { if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " not viable (different node types)\n"; - } + } continue; } } @@ -407,8 +429,7 @@ class OrbitGraphProcessor { if ((vertexPoset[u] + 1 != vertexPoset[v]) && (vertexBotPoset[u] != 1 + vertexBotPoset[v])) { if constexpr (verbose) { - std::cout << " - Merge of " << u << " and " << v - << " not viable poset. poste v: " << vertexBotPoset[v] + std::cout << " - Merge of " << u << " and " << v << " not viable poset. poste v: " << vertexBotPoset[v] << " poste u: " << vertexBotPoset[u] << "\n"; } continue; @@ -417,10 +438,10 @@ class OrbitGraphProcessor { std::vector> new_subgraphs; const std::size_t u_size = current_groups[u].size(); const std::size_t v_size = current_groups[v].size(); - + const bool merge_is_valid = is_merge_viable(original_dag, current_groups[u], current_groups[v], new_subgraphs); const std::size_t new_size = new_subgraphs.size(); - + if (!merge_is_valid) { if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " and " << v @@ -431,14 +452,15 @@ class OrbitGraphProcessor { } const bool merge_viable = (new_size >= current_symmetry); - const bool both_below_minimal_threshold = merge_below_threshold && (u_size < min_symmetry_) && (v_size < min_symmetry_); - + const bool both_below_minimal_threshold = merge_below_threshold && (u_size < min_symmetry_) + && (v_size < min_symmetry_); + if (!merge_viable && !both_below_minimal_threshold) { if constexpr (verbose) { std::cout << " - Merge of " << u << " and " << v << " not viable (Symmetry Threshold)\n"; std::cout << " - u_sym: " << u_size << ", v_sym: " << v_size << " -> new_sym: " << new_size - << " (current_threshold: " << current_symmetry - << ", global_min_threshold: " << min_symmetry_ << ")\n"; + << " (current_threshold: " << current_symmetry << ", global_min_threshold: " << min_symmetry_ + << ")\n"; } non_viable_edges_cache_.insert({u, v}); continue; @@ -446,36 +468,35 @@ class OrbitGraphProcessor { v_type_t u_type = 0; v_type_t v_type = 0; - if (not merge_different_node_types && has_typed_vertices_v ) { + if (not merge_different_node_types && has_typed_vertices_v) { u_type = current_coarse_graph.vertex_type(u); v_type = current_coarse_graph.vertex_type(v); } - const bool u_is_significant = (u_size >= min_symmetry_) && - (current_coarse_graph.vertex_work_weight(u) > lock_threshold_per_type[u_type]); - const bool v_is_significant = (v_size >= min_symmetry_) && - (current_coarse_graph.vertex_work_weight(v) > lock_threshold_per_type[v_type]); + const bool u_is_significant = (u_size >= min_symmetry_) + && (current_coarse_graph.vertex_work_weight(u) > lock_threshold_per_type[u_type]); + const bool v_is_significant = (v_size >= min_symmetry_) + && (current_coarse_graph.vertex_work_weight(v) > lock_threshold_per_type[v_type]); - if (u_is_significant && v_is_significant) - { + if (u_is_significant && v_is_significant) { // Both are significant --- if (new_size < std::min(u_size, v_size)) { if constexpr (verbose) { - std::cout << " - Merge of " << u << " and " << v << " not viable (Symmetry Narrowing below min of two significant nodes)\n"; + std::cout << " - Merge of " << u << " and " << v + << " not viable (Symmetry Narrowing below min of two significant nodes)\n"; std::cout << " - u_sym: " << u_size << ", v_sym: " << v_size << " -> new_sym: " << new_size << "\n"; } non_viable_edges_cache_.insert({u, v}); continue; } - } - else if (u_is_significant || v_is_significant) - { + } else if (u_is_significant || v_is_significant) { // Exactly one is significant --- const std::size_t significant_node_size = u_is_significant ? u_size : v_size; - + if (new_size < significant_node_size) { if constexpr (verbose) { - std::cout << " - Merge of " << u << " and " << v << " not viable (Symmetry Narrowing of a single significant node)\n"; + std::cout << " - Merge of " << u << " and " << v + << " not viable (Symmetry Narrowing of a single significant node)\n"; std::cout << " - u_sym: " << u_size << " (sig: " << u_is_significant << ")" << ", v_sym: " << v_size << " (sig: " << v_is_significant << ")" << " -> new_sym: " << new_size << "\n"; @@ -484,14 +505,18 @@ class OrbitGraphProcessor { continue; } } - + // Critical Path Check auto [temp_coarse_graph, temp_contraction_map] = simulate_merge(u, v, current_coarse_graph); - if (critical_path_weight(temp_coarse_graph) > (path_threshold * static_cast>(new_subgraphs.size()) + critical_path_weight(current_coarse_graph))) { + if (critical_path_weight(temp_coarse_graph) + > (path_threshold * static_cast>(new_subgraphs.size()) + + critical_path_weight(current_coarse_graph))) { if constexpr (verbose) { - std::cout << " - Merge of " << u << " and " << v << " increases critical path. Old cirtical path: " << critical_path_weight(current_coarse_graph) - << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + " << path_threshold * static_cast>(new_subgraphs.size()) << "\n"; + std::cout << " - Merge of " << u << " and " << v + << " increases critical path. Old cirtical path: " << critical_path_weight(current_coarse_graph) + << " new critical path: " << critical_path_weight(temp_coarse_graph) << " + " + << path_threshold * static_cast>(new_subgraphs.size()) << "\n"; } non_viable_crit_path_edges_cache_.insert({u, v}); continue; @@ -503,8 +528,14 @@ class OrbitGraphProcessor { << temp_coarse_graph.num_vertices() << " nodes.\n"; } - commit_merge(u, v, std::move(temp_coarse_graph), temp_contraction_map, std::move(new_subgraphs), - current_coarse_graph, current_groups, current_contraction_map); + commit_merge(u, + v, + std::move(temp_coarse_graph), + temp_contraction_map, + std::move(new_subgraphs), + current_coarse_graph, + current_groups, + current_contraction_map); changed = true; break; @@ -512,31 +543,34 @@ class OrbitGraphProcessor { } } - public: - explicit OrbitGraphProcessor() {} void setMergeDifferentNodeTypes(bool flag) { merge_different_node_types_ = flag; } + void set_work_threshold(v_workw_t work_threshold) { work_threshold_ = work_threshold; } - void setCriticalPathThreshold(v_workw_t critical_path_threshold) { critical_path_threshold_ = critical_path_threshold; } + + void setCriticalPathThreshold(v_workw_t critical_path_threshold) { + critical_path_threshold_ = critical_path_threshold; + } + void setLockRatio(double lock_ratio) { lock_orbit_ratio = lock_ratio; } - + void setSymmetryLevelHeuristic(SymmetryLevelHeuristic heuristic) { symmetry_level_heuristic_ = heuristic; } - void setWorkPercentiles(const std::vector& percentiles) { + + void setWorkPercentiles(const std::vector &percentiles) { work_percentiles_ = percentiles; std::sort(work_percentiles_.begin(), work_percentiles_.end()); } - void setUseStaticSymmetryLevel(size_t static_symmetry_level) { + void setUseStaticSymmetryLevel(size_t static_symmetry_level) { symmetry_level_heuristic_ = SymmetryLevelHeuristic::NATURAL_BREAKS; - use_adaptive_symmetry_threshold_ = false; - current_symmetry = static_symmetry_level; + use_adaptive_symmetry_threshold_ = false; + current_symmetry = static_symmetry_level; } void setNaturalBreaksCountPercentage(double percentage) { natural_breaks_count_percentage_ = percentage; } - /** * @brief Discovers isomorphic groups (orbits) and constructs a coarse graph. */ @@ -565,17 +599,19 @@ class OrbitGraphProcessor { } coarse_node_idx++; } - + std::vector> work_per_vertex_type; work_per_vertex_type.resize(merge_different_node_types_ ? 1U : dag.num_vertex_types(), 0); - + std::map orbit_size_counts; std::map> work_per_orbit_size; v_workw_t total_work = 0; for (const auto &[hash, vertices] : orbits) { const size_t orbit_size = vertices.size(); - - if (orbit_size == 1U) continue; // exclude single node orbits from total work + + if (orbit_size == 1U) { + continue; // exclude single node orbits from total work + } orbit_size_counts[orbit_size]++; @@ -583,7 +619,7 @@ class OrbitGraphProcessor { for (const auto v : vertices) { orbit_work += dag.vertex_work_weight(v); } - + if (not merge_different_node_types_ && has_typed_vertices_v) { work_per_vertex_type[dag.vertex_type(vertices[0])] += orbit_work; } else { @@ -591,46 +627,52 @@ class OrbitGraphProcessor { } work_per_orbit_size[orbit_size] += orbit_work; - total_work += orbit_work; + total_work += orbit_work; } std::vector> lock_threshold_per_type(work_per_vertex_type.size()); for (size_t i = 0; i < work_per_vertex_type.size(); ++i) { lock_threshold_per_type[i] = static_cast>(lock_orbit_ratio * work_per_vertex_type[i]); } - + std::vector rel_acc_work_per_orbit_size; - std::vector symmetry_levels_to_test = compute_symmetry_levels(rel_acc_work_per_orbit_size, work_per_orbit_size, total_work, orbit_size_counts); + std::vector symmetry_levels_to_test + = compute_symmetry_levels(rel_acc_work_per_orbit_size, work_per_orbit_size, total_work, orbit_size_counts); if constexpr (verbose) { std::cout << "\n--- Orbit Analysis ---\n"; - for (auto const& [size, count] : orbit_size_counts) { - if (total_work > 0) - std::cout << " - Orbits of size " << size << ": " << count << " groups, weight: " << 100.0 * static_cast(work_per_orbit_size[size]) / static_cast(total_work) << "%\n"; - else + for (auto const &[size, count] : orbit_size_counts) { + if (total_work > 0) { + std::cout << " - Orbits of size " << size << ": " << count << " groups, weight: " + << 100.0 * static_cast(work_per_orbit_size[size]) / static_cast(total_work) << "%\n"; + } else { std::cout << " - Orbits of size " << size << ": " << count << " groups, weight: 0.0%\n"; + } } std::cout << " Cumulative work distribution by orbit size (largest to smallest):\n"; size_t i = 0; - for (auto it = orbit_size_counts.rbegin(); it != orbit_size_counts.rend() && i < rel_acc_work_per_orbit_size.size(); ++it, ++i) { - std::cout << " - Orbits with size >= " << it->first << ": " - << std::fixed << std::setprecision(2) << rel_acc_work_per_orbit_size[i] * 100 << "%\n"; + for (auto it = orbit_size_counts.rbegin(); it != orbit_size_counts.rend() && i < rel_acc_work_per_orbit_size.size(); + ++it, ++i) { + std::cout << " - Orbits with size >= " << it->first << ": " << std::fixed << std::setprecision(2) + << rel_acc_work_per_orbit_size[i] * 100 << "%\n"; } std::cout << " Work distribution by vertex type:\n"; for (size_t j = 0; j < work_per_vertex_type.size(); ++j) { - if (total_work > 0) - std::cout << " - Vertex type " << j << ": " << 100.0 * static_cast(work_per_vertex_type[j]) / static_cast(total_work) << "%\n"; - else - std::cout << " - Vertex type " << j << ": 0.0%\n"; + if (total_work > 0) { + std::cout << " - Vertex type " << j << ": " + << 100.0 * static_cast(work_per_vertex_type[j]) / static_cast(total_work) << "%\n"; + } else { + std::cout << " - Vertex type " << j << ": 0.0%\n"; + } } - + std::cout << "--------------------------------\n"; std::cout << " Symmetry levels to test: " << "\n"; for (const auto level : symmetry_levels_to_test) { std::cout << " - " << level << "\n"; } - std::cout << "--------------------------------\n"; - } + std::cout << "--------------------------------\n"; + } coarser_util::construct_coarse_dag(dag, coarse_graph_, contraction_map_); @@ -638,17 +680,20 @@ class OrbitGraphProcessor { perform_coarsening_adaptive_symmetry(dag, coarse_graph_, lock_threshold_per_type, symmetry_levels_to_test); } else { size_t total_size_count = 0U; - for (const auto& [size, count] : orbit_size_counts) { + for (const auto &[size, count] : orbit_size_counts) { total_size_count += count; - } + } + + for (const auto &[size, count] : orbit_size_counts) { + if (size == 1U || size > current_symmetry) { + continue; + } - for (const auto& [size, count] : orbit_size_counts) { - if (size == 1U || size > current_symmetry) continue; - if (count > total_size_count / 2) { - if constexpr (verbose) { - std::cout << "Setting current_symmetry to " << size << " because " << count << " orbits of size " << size << " are more than half of the total number of orbits.\n"; - } + if constexpr (verbose) { + std::cout << "Setting current_symmetry to " << size << " because " << count << " orbits of size " << size + << " are more than half of the total number of orbits.\n"; + } current_symmetry = size; } } @@ -658,31 +703,34 @@ class OrbitGraphProcessor { } private: - - std::vector compute_symmetry_levels(std::vector & rel_acc_work_per_orbit_size, const std::map> work_per_orbit_size, const v_workw_t total_work, const std::map orbit_size_counts) { - + std::vector compute_symmetry_levels(std::vector &rel_acc_work_per_orbit_size, + const std::map> work_per_orbit_size, + const v_workw_t total_work, + const std::map orbit_size_counts) { std::vector symmetry_levels_to_test; min_symmetry_ = 2; switch (symmetry_level_heuristic_) { - case SymmetryLevelHeuristic::PERCENTILE_BASED: - { - if constexpr (verbose) { std::cout << "Using PERCENTILE_BASED heuristic for symmetry levels.\n"; } + case SymmetryLevelHeuristic::PERCENTILE_BASED: { + if constexpr (verbose) { + std::cout << "Using PERCENTILE_BASED heuristic for symmetry levels.\n"; + } size_t percentile_idx = 0; v_workw_t cumulative_work = 0; - for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) - { + for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) { cumulative_work += it->second; - if (total_work == 0) continue; // Avoid division by zero + if (total_work == 0) { + continue; // Avoid division by zero + } double current_work_ratio = static_cast(cumulative_work) / static_cast(total_work); - rel_acc_work_per_orbit_size.push_back(current_work_ratio); // For printing + rel_acc_work_per_orbit_size.push_back(current_work_ratio); // For printing if (percentile_idx < work_percentiles_.size() && current_work_ratio >= work_percentiles_[percentile_idx]) { if (it->first > min_symmetry_) { symmetry_levels_to_test.push_back(it->first); } - while (percentile_idx < work_percentiles_.size() && - current_work_ratio >= work_percentiles_[percentile_idx]) { + while (percentile_idx < work_percentiles_.size() + && current_work_ratio >= work_percentiles_[percentile_idx]) { percentile_idx++; } } @@ -690,36 +738,43 @@ class OrbitGraphProcessor { break; } - case SymmetryLevelHeuristic::NATURAL_BREAKS: - { - if constexpr (verbose) { std::cout << "Using NATURAL_BREAKS heuristic for symmetry levels.\n"; } + case SymmetryLevelHeuristic::NATURAL_BREAKS: { + if constexpr (verbose) { + std::cout << "Using NATURAL_BREAKS heuristic for symmetry levels.\n"; + } size_t total_orbit_groups = 0; - for (const auto& [size, count] : orbit_size_counts) { + for (const auto &[size, count] : orbit_size_counts) { total_orbit_groups += count; } - size_t count_threshold = static_cast(static_cast(total_orbit_groups) * natural_breaks_count_percentage_); + size_t count_threshold + = static_cast(static_cast(total_orbit_groups) * natural_breaks_count_percentage_); if (count_threshold == 0 && total_orbit_groups > 0) { - count_threshold = 1; // Ensure threshold is at least 1 if possible + count_threshold = 1; // Ensure threshold is at least 1 if possible + } + if constexpr (verbose) { + std::cout << " - Total orbit groups: " << total_orbit_groups << ", count threshold: " << count_threshold + << "\n"; } - if constexpr (verbose) { std::cout << " - Total orbit groups: " << total_orbit_groups << ", count threshold: " << count_threshold << "\n"; } std::vector sorted_sizes; sorted_sizes.reserve(orbit_size_counts.size()); - for (const auto& [size, count] : orbit_size_counts) { + for (const auto &[size, count] : orbit_size_counts) { sorted_sizes.push_back(size); } - std::sort(sorted_sizes.rbegin(), sorted_sizes.rend()); // Sort descending + std::sort(sorted_sizes.rbegin(), sorted_sizes.rend()); // Sort descending if (!sorted_sizes.empty()) { for (size_t i = 0; i < sorted_sizes.size(); ++i) { const size_t current_size = sorted_sizes[i]; - if (current_size < min_symmetry_) continue; + if (current_size < min_symmetry_) { + continue; + } // Add if this size's count is significant const size_t current_count = orbit_size_counts.at(current_size); bool count_significant = (current_count >= count_threshold); - + if (count_significant) { symmetry_levels_to_test.push_back(current_size); continue; @@ -730,7 +785,7 @@ class OrbitGraphProcessor { if (symmetry_levels_to_test.empty()) { size_t max_count = 0; size_t size_with_max_count = 0; - for (const auto& [size, count] : orbit_size_counts) { + for (const auto &[size, count] : orbit_size_counts) { if (count > max_count) { max_count = count; size_with_max_count = size; @@ -745,23 +800,27 @@ class OrbitGraphProcessor { v_workw_t cumulative_work = 0; for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) { cumulative_work += it->second; - if (total_work > 0) - rel_acc_work_per_orbit_size.push_back(static_cast(cumulative_work) / static_cast(total_work)); + if (total_work > 0) { + rel_acc_work_per_orbit_size.push_back(static_cast(cumulative_work) + / static_cast(total_work)); + } } break; } case SymmetryLevelHeuristic::CURRENT_DEFAULT: - default: - { - if constexpr (verbose) { std::cout << "Using CURRENT_DEFAULT heuristic for symmetry levels.\n"; } + default: { + if constexpr (verbose) { + std::cout << "Using CURRENT_DEFAULT heuristic for symmetry levels.\n"; + } double threshold = lock_orbit_ratio; v_workw_t cumulative_work = 0; for (auto it = work_per_orbit_size.rbegin(); it != work_per_orbit_size.rend(); ++it) { cumulative_work += it->second; - const double rel_work = (total_work == 0) ? 0 : static_cast(cumulative_work) / static_cast(total_work); - rel_acc_work_per_orbit_size.push_back(rel_work); // For printing - + const double rel_work + = (total_work == 0) ? 0 : static_cast(cumulative_work) / static_cast(total_work); + rel_acc_work_per_orbit_size.push_back(rel_work); // For printing + if (rel_work >= threshold && it->first > min_symmetry_) { symmetry_levels_to_test.push_back(it->first); threshold += lock_orbit_ratio * 0.5; @@ -770,12 +829,13 @@ class OrbitGraphProcessor { break; } } - - if (symmetry_levels_to_test.empty()) + + if (symmetry_levels_to_test.empty()) { symmetry_levels_to_test.push_back(2); + } min_symmetry_ = symmetry_levels_to_test.back(); - + // De-duplicate and sort descending std::sort(symmetry_levels_to_test.rbegin(), symmetry_levels_to_test.rend()); auto last = std::unique(symmetry_levels_to_test.begin(), symmetry_levels_to_test.end()); @@ -784,7 +844,6 @@ class OrbitGraphProcessor { return symmetry_levels_to_test; } - /** * @brief Non-adaptive coarsening (deprecated). */ @@ -806,7 +865,6 @@ class OrbitGraphProcessor { current_groups[coarse_node].subgraphs.push_back({i}); } - if constexpr (has_typed_vertices_v) { if constexpr (verbose) { std::cout << "Attempting to merge same node types.\n"; @@ -815,13 +873,13 @@ class OrbitGraphProcessor { contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, true, false); } - if constexpr (verbose) { std::cout << "Attempting to merge different node types.\n"; } - contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, false, merge_different_node_types_); - contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, true, merge_different_node_types_); - + contract_edges( + original_dag, current_coarse_graph, current_groups, current_contraction_map, false, merge_different_node_types_); + contract_edges( + original_dag, current_coarse_graph, current_groups, current_contraction_map, true, merge_different_node_types_); if constexpr (verbose) { std::cout << "Attempting to merge small orbits.\n"; @@ -831,8 +889,14 @@ class OrbitGraphProcessor { non_viable_crit_path_edges_cache_.clear(); non_viable_edges_cache_.clear(); - contract_edges(original_dag, current_coarse_graph, current_groups, current_contraction_map, true, merge_different_node_types_, work_threshold_); - + contract_edges(original_dag, + current_coarse_graph, + current_groups, + current_contraction_map, + true, + merge_different_node_types_, + work_threshold_); + final_coarse_graph_ = std::move(current_coarse_graph); final_contraction_map_ = std::move(current_contraction_map); final_groups_ = std::move(current_groups); @@ -842,7 +906,10 @@ class OrbitGraphProcessor { } } - void perform_coarsening_adaptive_symmetry(const Graph_t &original_dag, const Constr_Graph_t &initial_coarse_graph, const std::vector>& lock_threshold_per_type, const std::vector& symmetry_levels_to_test) { + void perform_coarsening_adaptive_symmetry(const Graph_t &original_dag, + const Constr_Graph_t &initial_coarse_graph, + const std::vector> &lock_threshold_per_type, + const std::vector &symmetry_levels_to_test) { final_coarse_graph_ = Constr_Graph_t(); final_contraction_map_.clear(); @@ -858,9 +925,10 @@ class OrbitGraphProcessor { const VertexType coarse_node = contraction_map_[i]; current_groups[coarse_node].subgraphs.push_back({i}); } - + if constexpr (verbose) { - std::cout << " Starting adaptive symmetry coarsening with critical_path_threshold: " << critical_path_threshold_ << "\n"; + std::cout << " Starting adaptive symmetry coarsening with critical_path_threshold: " << critical_path_threshold_ + << "\n"; } for (const auto sym : symmetry_levels_to_test) { @@ -872,22 +940,41 @@ class OrbitGraphProcessor { non_viable_edges_cache_.clear(); - contract_edges_adpative_sym(original_dag, current_coarse_graph, current_groups, current_contraction_map, false, is_last_loop, lock_threshold_per_type); - - if (merge_different_node_types_) - contract_edges_adpative_sym(original_dag, current_coarse_graph, current_groups, current_contraction_map, merge_different_node_types_, is_last_loop, lock_threshold_per_type); - - non_viable_crit_path_edges_cache_.clear(); - contract_edges_adpative_sym(original_dag, current_coarse_graph, current_groups, current_contraction_map, merge_different_node_types_, is_last_loop, lock_threshold_per_type, critical_path_threshold_); + contract_edges_adpative_sym(original_dag, + current_coarse_graph, + current_groups, + current_contraction_map, + false, + is_last_loop, + lock_threshold_per_type); + + if (merge_different_node_types_) { + contract_edges_adpative_sym(original_dag, + current_coarse_graph, + current_groups, + current_contraction_map, + merge_different_node_types_, + is_last_loop, + lock_threshold_per_type); + } + non_viable_crit_path_edges_cache_.clear(); + contract_edges_adpative_sym(original_dag, + current_coarse_graph, + current_groups, + current_contraction_map, + merge_different_node_types_, + is_last_loop, + lock_threshold_per_type, + critical_path_threshold_); } - + if constexpr (verbose) { std::cout << " Merging small orbits with work threshold: " << work_threshold_ << "\n"; } non_viable_edges_cache_.clear(); merge_small_orbits(original_dag, current_coarse_graph, current_groups, current_contraction_map, work_threshold_); - + final_coarse_graph_ = std::move(current_coarse_graph); final_contraction_map_ = std::move(current_contraction_map); final_groups_ = std::move(current_groups); @@ -913,12 +1000,13 @@ class OrbitGraphProcessor { /** * @brief Checks if merging two groups is structurally viable. */ - bool is_merge_viable(const Graph_t &original_dag, const Group &group_u, const Group &group_v, + bool is_merge_viable(const Graph_t &original_dag, + const Group &group_u, + const Group &group_v, std::vector> &out_new_subgraphs) const { - std::vector all_nodes; - all_nodes.reserve(group_u.subgraphs.size() * (group_u.subgraphs.empty() ? 0 : group_u.subgraphs[0].size()) + - group_v.subgraphs.size() * (group_v.subgraphs.empty() ? 0 : group_v.subgraphs[0].size())); + all_nodes.reserve(group_u.subgraphs.size() * (group_u.subgraphs.empty() ? 0 : group_u.subgraphs[0].size()) + + group_v.subgraphs.size() * (group_v.subgraphs.empty() ? 0 : group_v.subgraphs[0].size())); for (const auto &sg : group_u.subgraphs) { all_nodes.insert(all_nodes.end(), sg.begin(), sg.end()); } @@ -937,12 +1025,12 @@ class OrbitGraphProcessor { Constr_Graph_t induced_subgraph; auto map = create_induced_subgraph_map(original_dag, induced_subgraph, all_nodes); - std::vector components; // local -> component_id + std::vector components; // local -> component_id size_t num_components = compute_weakly_connected_components(induced_subgraph, components); out_new_subgraphs.assign(num_components, std::vector()); - - if (all_nodes.empty()) { // Handle empty graph case - return true; + + if (all_nodes.empty()) { // Handle empty graph case + return true; } for (const auto &node : all_nodes) { @@ -971,10 +1059,14 @@ class OrbitGraphProcessor { public: const Graph_t &get_coarse_graph() const { return coarse_graph_; } + const std::vector &get_contraction_map() const { return contraction_map_; } + const Graph_t &get_final_coarse_graph() const { return final_coarse_graph_; } + const std::vector &get_final_contraction_map() const { return final_contraction_map_; } + const std::vector &get_final_groups() const { return final_groups_; } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp b/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp index c179161d..391c5819 100644 --- a/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp +++ b/include/osp/dag_divider/isomorphism_divider/PrecomputedHashComputer.hpp @@ -18,8 +18,9 @@ limitations under the License. #pragma once -#include #include +#include + #include "osp/dag_divider/isomorphism_divider/HashComputer.hpp" namespace osp { @@ -33,9 +34,8 @@ namespace osp { * * @tparam IndexType The type used for indexing the objects */ -template +template class PrecomputedHashComputer : public HashComputer { - std::vector vertex_hashes; std::unordered_map> orbits; @@ -45,9 +45,9 @@ class PrecomputedHashComputer : public HashComputer { * * @param precomputed_hashes A vector of hash values for objects 0 to n-1. */ - PrecomputedHashComputer(const std::vector& precomputed_hashes) : vertex_hashes(precomputed_hashes) { + PrecomputedHashComputer(const std::vector &precomputed_hashes) : vertex_hashes(precomputed_hashes) { for (std::size_t i = 0; i < vertex_hashes.size(); ++i) { - const auto& hash = vertex_hashes[i]; + const auto &hash = vertex_hashes[i]; orbits[hash].push_back(static_cast(i)); } } @@ -55,15 +55,18 @@ class PrecomputedHashComputer : public HashComputer { virtual ~PrecomputedHashComputer() override = default; inline std::size_t get_vertex_hash(const IndexType &v) const override { return vertex_hashes[v]; } + inline const std::vector &get_vertex_hashes() const override { return vertex_hashes; } + inline std::size_t num_orbits() const override { return orbits.size(); } - - inline const std::vector &get_orbit(const IndexType &v) const override { return this->get_orbit_from_hash(this->get_vertex_hash(v)); } - inline const std::unordered_map> &get_orbits() const override { return orbits; } - inline const std::vector& get_orbit_from_hash(const std::size_t& hash) const override { - return orbits.at(hash); + inline const std::vector &get_orbit(const IndexType &v) const override { + return this->get_orbit_from_hash(this->get_vertex_hash(v)); } + + inline const std::unordered_map> &get_orbits() const override { return orbits; } + + inline const std::vector &get_orbit_from_hash(const std::size_t &hash) const override { return orbits.at(hash); } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp index 97fa53a5..4b52b935 100644 --- a/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp +++ b/include/osp/dag_divider/isomorphism_divider/TrimmedGroupScheduler.hpp @@ -18,11 +18,12 @@ limitations under the License. #pragma once +#include +#include + #include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/graph_algorithms/computational_dag_util.hpp" #include "osp/graph_algorithms/subgraph_algorithms.hpp" -#include -#include namespace osp { @@ -35,9 +36,8 @@ namespace osp { * potentially disconnected, subgraph that resulted from merging smaller isomorphic subgraphs. It divides * the input graph into its weakly connected components and schedules them on proportionally allocated processors. */ -template +template class TrimmedGroupScheduler : public Scheduler { - Scheduler *sub_scheduler; unsigned min_non_zero_procs_; @@ -122,30 +122,33 @@ class TrimmedGroupScheduler : public Scheduler { std::vector sub_arch_proc_type_offsets(sub_arch.getNumberOfProcessorTypes(), 0); const auto &sub_arch_proc_type_counts = sub_arch.getProcessorTypeCount(); for (unsigned type_idx = 1; type_idx < sub_arch.getNumberOfProcessorTypes(); ++type_idx) { - sub_arch_proc_type_offsets[type_idx] = sub_arch_proc_type_offsets[type_idx - 1] + sub_arch_proc_type_counts[type_idx - 1]; + sub_arch_proc_type_offsets[type_idx] + = sub_arch_proc_type_offsets[type_idx - 1] + sub_arch_proc_type_counts[type_idx - 1]; } unsigned max_supersteps = 0; for (unsigned i = 0; i < min_non_zero_procs_; ++i) { - std::vector> group_vertices; for (unsigned comp_idx : component_indices_per_group[i]) { - group_vertices.insert(group_vertices.end(), components_vertices[comp_idx].begin(), components_vertices[comp_idx].end()); + group_vertices.insert( + group_vertices.end(), components_vertices[comp_idx].begin(), components_vertices[comp_idx].end()); } std::sort(group_vertices.begin(), group_vertices.end()); BspInstance sub_instanc; sub_instanc.getArchitecture() = sub_arch; - sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix()); // Inherit compatibility - auto global_to_local_map = create_induced_subgraph_map(dag, sub_instanc.getComputationalDag(), group_vertices); // Create induced subgraph + sub_instanc.setNodeProcessorCompatibility(instance.getNodeProcessorCompatibilityMatrix()); // Inherit compatibility + auto global_to_local_map = create_induced_subgraph_map( + dag, sub_instanc.getComputationalDag(), group_vertices); // Create induced subgraph // Create a schedule object for the sub-problem BspSchedule sub_schedule(sub_instanc); // Call the sub-scheduler to compute the schedule for this group of components auto status = sub_scheduler->computeSchedule(sub_schedule); - if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) + if (status != RETURN_STATUS::OSP_SUCCESS && status != RETURN_STATUS::BEST_FOUND) { return status; + } // Map the sub-schedule back to the main schedule. for (const auto &v_global : group_vertices) { @@ -161,9 +164,8 @@ class TrimmedGroupScheduler : public Scheduler { // The base offset of this processor type in the main 'arch'. // The offset for the current 'i'-th block of processors of this type. // The local index within that type block. - const unsigned global_proc = arch_proc_type_offsets[proc_type] + - (i * sub_proc_counts[proc_type]) + - local_idx_within_type; + const unsigned global_proc + = arch_proc_type_offsets[proc_type] + (i * sub_proc_counts[proc_type]) + local_idx_within_type; schedule.setAssignedProcessor(v_global, global_proc); schedule.setAssignedSuperstep(v_global, sub_superstep); } @@ -175,4 +177,4 @@ class TrimmedGroupScheduler : public Scheduler { } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp index eb484409..c916b55c 100644 --- a/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp +++ b/include/osp/dag_divider/wavefront_divider/AbstractWavefrontDivider.hpp @@ -17,15 +17,16 @@ limitations under the License. */ #pragma once -#include "osp/concepts/computational_dag_concept.hpp" -#include #include #include #include #include -#include "osp/auxiliary/datastructures/union_find.hpp" -#include "SequenceSplitter.hpp" +#include + #include "SequenceGenerator.hpp" +#include "SequenceSplitter.hpp" +#include "osp/auxiliary/datastructures/union_find.hpp" +#include "osp/concepts/computational_dag_concept.hpp" #include "osp/dag_divider/DagDivider.hpp" namespace osp { @@ -34,35 +35,37 @@ namespace osp { * @class AbstractWavefrontDivider * @brief Base class for wavefront-based DAG dividers. */ -template +template class AbstractWavefrontDivider : public IDagDivider { - static_assert(is_computational_dag_v, - "AbstractWavefrontDivider can only be used with computational DAGs."); + static_assert(is_computational_dag_v, "AbstractWavefrontDivider can only be used with computational DAGs."); -protected: + protected: using VertexType = vertex_idx_t; - const Graph_t* dag_ptr_ = nullptr; + const Graph_t *dag_ptr_ = nullptr; /** * @brief Helper to get connected components for a specific range of levels. * This method is now const-correct. */ - std::vector> get_components_for_range( - size_t start_level, size_t end_level, - const std::vector>& level_sets) const { - + std::vector> get_components_for_range(size_t start_level, + size_t end_level, + const std::vector> &level_sets) const { union_find_universe_t uf; for (size_t i = start_level; i < end_level; ++i) { for (const auto vertex : level_sets[i]) { uf.add_object(vertex, dag_ptr_->vertex_work_weight(vertex), dag_ptr_->vertex_mem_weight(vertex)); } - for (const auto& node : level_sets[i]) { - for (const auto& child : dag_ptr_->children(node)) { - if (uf.is_in_universe(child)) uf.join_by_name(node, child); + for (const auto &node : level_sets[i]) { + for (const auto &child : dag_ptr_->children(node)) { + if (uf.is_in_universe(child)) { + uf.join_by_name(node, child); + } } - for (const auto& parent : dag_ptr_->parents(node)) { - if (uf.is_in_universe(parent)) uf.join_by_name(parent, node); + for (const auto &parent : dag_ptr_->parents(node)) { + if (uf.is_in_universe(parent)) { + uf.join_by_name(parent, node); + } } } } @@ -83,19 +86,19 @@ class AbstractWavefrontDivider : public IDagDivider { * @brief Computes wavefronts for a specific subset of vertices. * This method is now const. */ - std::vector> compute_wavefronts_for_subgraph( - const std::vector& vertices) const { - - if (vertices.empty()) return {}; + std::vector> compute_wavefronts_for_subgraph(const std::vector &vertices) const { + if (vertices.empty()) { + return {}; + } std::vector> level_sets; std::unordered_set vertex_set(vertices.begin(), vertices.end()); std::unordered_map in_degree; std::queue q; - for (const auto& v : vertices) { + for (const auto &v : vertices) { in_degree[v] = 0; - for (const auto& p : dag_ptr_->parents(v)) { + for (const auto &p : dag_ptr_->parents(v)) { if (vertex_set.count(p)) { in_degree[v]++; } @@ -112,7 +115,7 @@ class AbstractWavefrontDivider : public IDagDivider { VertexType u = q.front(); q.pop(); current_level.push_back(u); - for (const auto& v : dag_ptr_->children(u)) { + for (const auto &v : dag_ptr_->children(u)) { if (vertex_set.count(v)) { in_degree[v]--; if (in_degree[v] == 0) { @@ -127,4 +130,4 @@ class AbstractWavefrontDivider : public IDagDivider { } }; -} // end namespace osp \ No newline at end of file +} // end namespace osp diff --git a/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp index 7eb60b46..c382169b 100644 --- a/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp +++ b/include/osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp @@ -17,14 +17,15 @@ limitations under the License. */ #pragma once -#include #include #include -#include #include +#include +#include + #include "AbstractWavefrontDivider.hpp" -#include "SequenceSplitter.hpp" #include "SequenceGenerator.hpp" +#include "SequenceSplitter.hpp" namespace osp { @@ -37,9 +38,9 @@ namespace osp { * section, it recursively repeats the process, allowing for a hierarchical * division of the DAG. */ -template +template class RecursiveWavefrontDivider : public AbstractWavefrontDivider { -public: + public: constexpr static bool enable_debug_print = true; RecursiveWavefrontDivider() { @@ -63,51 +64,49 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider { return all_sections; } - RecursiveWavefrontDivider& set_metric(SequenceMetric metric) { + RecursiveWavefrontDivider &set_metric(SequenceMetric metric) { sequence_metric_ = metric; return *this; } - RecursiveWavefrontDivider& use_variance_splitter(double mult, double threshold, size_t min_len = 1) { + RecursiveWavefrontDivider &use_variance_splitter(double mult, double threshold, size_t min_len = 1) { splitter_ = std::make_unique(mult, threshold, min_len); min_subseq_len_ = min_len; return *this; } - RecursiveWavefrontDivider& use_largest_step_splitter(double threshold, size_t min_len) { + RecursiveWavefrontDivider &use_largest_step_splitter(double threshold, size_t min_len) { splitter_ = std::make_unique(threshold, min_len); min_subseq_len_ = min_len; return *this; } - RecursiveWavefrontDivider& use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) { + RecursiveWavefrontDivider &use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) { splitter_ = std::make_unique(diff_threshold, abs_threshold, min_len); min_subseq_len_ = min_len; return *this; } - - RecursiveWavefrontDivider& set_max_depth(size_t max_depth) { + + RecursiveWavefrontDivider &set_max_depth(size_t max_depth) { max_depth_ = max_depth; return *this; } -private: + private: using VertexType = vertex_idx_t; using LevelSetConstIterator = typename std::vector>::const_iterator; using DifferenceType = typename std::iterator_traits::difference_type; - SequenceMetric sequence_metric_ = SequenceMetric::COMPONENT_COUNT; std::unique_ptr splitter_; size_t min_subseq_len_ = 4; size_t max_depth_ = std::numeric_limits::max(); - void divide_recursive( - LevelSetConstIterator level_begin, LevelSetConstIterator level_end, - const std::vector>& global_level_sets, - std::vector>>& all_sections, - size_t current_depth) const { - + void divide_recursive(LevelSetConstIterator level_begin, + LevelSetConstIterator level_end, + const std::vector> &global_level_sets, + std::vector>> &all_sections, + size_t current_depth) const { const auto current_range_size = static_cast(std::distance(level_begin, level_end)); size_t start_level_idx = static_cast(std::distance(global_level_sets.cbegin(), level_begin)); size_t end_level_idx = static_cast(std::distance(global_level_sets.cbegin(), level_end)); @@ -115,7 +114,7 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider { // --- Base Cases for Recursion --- if (current_depth >= max_depth_ || current_range_size < min_subseq_len_) { if constexpr (enable_debug_print) { - std::cout << "[DEBUG depth " << current_depth << "] Base case reached. Creating section from levels " + std::cout << "[DEBUG depth " << current_depth << "] Base case reached. Creating section from levels " << start_level_idx << " to " << end_level_idx << "." << std::endl; } // Ensure the section is not empty before adding @@ -133,7 +132,9 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider { if constexpr (enable_debug_print) { std::cout << "[DEBUG depth " << current_depth << "] Analyzing sequence: "; - for(const auto& val : sequence) std::cout << val << " "; + for (const auto &val : sequence) { + std::cout << val << " "; + } std::cout << std::endl; } @@ -142,7 +143,7 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider { // --- Base Case: No further cuts found --- if (local_cuts.empty()) { if constexpr (enable_debug_print) { - std::cout << "[DEBUG depth " << current_depth << "] No cuts found. Creating section from levels " + std::cout << "[DEBUG depth " << current_depth << "] No cuts found. Creating section from levels " << start_level_idx << " to " << end_level_idx << "." << std::endl; } all_sections.push_back(this->get_components_for_range(start_level_idx, end_level_idx, global_level_sets)); @@ -153,9 +154,8 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider { std::cout << "[DEBUG depth " << current_depth << "] Found " << local_cuts.size() << " cuts: "; for (const auto c : local_cuts) { std::cout << c << ", "; - } - std::cout << "in level range [" - << start_level_idx << ", " << end_level_idx << "). Recursing." << std::endl; + } + std::cout << "in level range [" << start_level_idx << ", " << end_level_idx << "). Recursing." << std::endl; } // --- Recurse on the new, smaller sub-problems --- @@ -163,20 +163,18 @@ class RecursiveWavefrontDivider : public AbstractWavefrontDivider { local_cuts.erase(std::unique(local_cuts.begin(), local_cuts.end()), local_cuts.end()); auto current_sub_begin = level_begin; - for (const auto& local_cut_idx : local_cuts) { + for (const auto &local_cut_idx : local_cuts) { auto cut_iterator = level_begin + static_cast(local_cut_idx); if (cut_iterator > current_sub_begin) { - divide_recursive(current_sub_begin, cut_iterator, - global_level_sets, all_sections, current_depth + 1); + divide_recursive(current_sub_begin, cut_iterator, global_level_sets, all_sections, current_depth + 1); } current_sub_begin = cut_iterator; } // Recurse on the final segment from the last cut to the end. if (current_sub_begin < level_end) { - divide_recursive(current_sub_begin, level_end, - global_level_sets, all_sections, current_depth + 1); + divide_recursive(current_sub_begin, level_end, global_level_sets, all_sections, current_depth + 1); } } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp b/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp index e32be944..c815b615 100644 --- a/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp +++ b/include/osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp @@ -17,13 +17,14 @@ limitations under the License. */ #pragma once -#include #include #include #include +#include + #include "AbstractWavefrontDivider.hpp" -#include "SequenceSplitter.hpp" #include "SequenceGenerator.hpp" +#include "SequenceSplitter.hpp" namespace osp { @@ -32,14 +33,12 @@ namespace osp { * @brief Divides a DAG by scanning all wavefronts and applying a splitting algorithm. * This revised version uses a fluent API for safer and clearer algorithm configuration. */ -template +template class ScanWavefrontDivider : public AbstractWavefrontDivider { -public: + public: constexpr static bool enable_debug_print = true; - ScanWavefrontDivider() { - use_largest_step_splitter(3.0, 4); - } + ScanWavefrontDivider() { use_largest_step_splitter(3.0, 4); } std::vector>>> divide(const Graph_t &dag) override { this->dag_ptr_ = &dag; @@ -54,67 +53,69 @@ class ScanWavefrontDivider : public AbstractWavefrontDivider { SequenceGenerator generator(dag, level_sets); std::vector sequence = generator.generate(sequence_metric_); - + if constexpr (enable_debug_print) { std::cout << "[DEBUG] Metric: " << static_cast(sequence_metric_) << std::endl; std::cout << "[DEBUG] Generated sequence: "; - for(const auto& val : sequence) std::cout << val << " "; + for (const auto &val : sequence) { + std::cout << val << " "; + } std::cout << std::endl; } - + std::vector cut_levels = splitter_->split(sequence); std::sort(cut_levels.begin(), cut_levels.end()); cut_levels.erase(std::unique(cut_levels.begin(), cut_levels.end()), cut_levels.end()); - + if constexpr (enable_debug_print) { std::cout << "[DEBUG] Final cut levels: "; - for(const auto& level : cut_levels) std::cout << level << " "; + for (const auto &level : cut_levels) { + std::cout << level << " "; + } std::cout << std::endl; } - + return create_vertex_maps_from_cuts(cut_levels, level_sets); } - ScanWavefrontDivider& set_metric(SequenceMetric metric) { + ScanWavefrontDivider &set_metric(SequenceMetric metric) { sequence_metric_ = metric; return *this; } - ScanWavefrontDivider& use_variance_splitter(double mult, double threshold, size_t min_len = 1) { + ScanWavefrontDivider &use_variance_splitter(double mult, double threshold, size_t min_len = 1) { splitter_ = std::make_unique(mult, threshold, min_len); return *this; } - ScanWavefrontDivider& use_largest_step_splitter(double threshold, size_t min_len) { + ScanWavefrontDivider &use_largest_step_splitter(double threshold, size_t min_len) { splitter_ = std::make_unique(threshold, min_len); return *this; } - ScanWavefrontDivider& use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) { + ScanWavefrontDivider &use_threshold_scan_splitter(double diff_threshold, double abs_threshold, size_t min_len = 1) { splitter_ = std::make_unique(diff_threshold, abs_threshold, min_len); return *this; } -private: + private: using VertexType = vertex_idx_t; SequenceMetric sequence_metric_ = SequenceMetric::COMPONENT_COUNT; std::unique_ptr splitter_; std::vector>> create_vertex_maps_from_cuts( - const std::vector& cut_levels, - const std::vector>& level_sets) const { - + const std::vector &cut_levels, const std::vector> &level_sets) const { if (cut_levels.empty()) { // If there are no cuts, return a single section with all components. - return { this->get_components_for_range(0, level_sets.size(), level_sets) }; + return {this->get_components_for_range(0, level_sets.size(), level_sets)}; } std::vector>> vertex_maps; size_t start_level = 0; - for (const auto& cut_level : cut_levels) { - if (start_level < cut_level) { // Avoid creating empty sections + for (const auto &cut_level : cut_levels) { + if (start_level < cut_level) { // Avoid creating empty sections vertex_maps.push_back(this->get_components_for_range(start_level, cut_level, level_sets)); } start_level = cut_level; @@ -128,4 +129,4 @@ class ScanWavefrontDivider : public AbstractWavefrontDivider { } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp b/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp index 73c978e0..9dd925ac 100644 --- a/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp +++ b/include/osp/dag_divider/wavefront_divider/SequenceGenerator.hpp @@ -17,8 +17,9 @@ limitations under the License. */ #pragma once -#include #include +#include + #include "WavefrontStatisticsCollector.hpp" namespace osp { @@ -29,12 +30,12 @@ enum class SequenceMetric { COMPONENT_COUNT, AVAILABLE_PARALLELISM }; * @class SequenceGenerator * @brief Helper to generate a numerical sequence based on a chosen metric. */ -template +template class SequenceGenerator { using VertexType = vertex_idx_t; -public: - SequenceGenerator(const Graph_t& dag, const std::vector>& level_sets) + public: + SequenceGenerator(const Graph_t &dag, const std::vector> &level_sets) : dag_(dag), level_sets_(level_sets) {} std::vector generate(SequenceMetric metric) const { @@ -47,13 +48,13 @@ class SequenceGenerator { } } -private: + private: std::vector generate_component_count() const { WavefrontStatisticsCollector collector(dag_, level_sets_); auto fwd_stats = collector.compute_forward(); std::vector seq; seq.reserve(fwd_stats.size()); - for (const auto& stat : fwd_stats) { + for (const auto &stat : fwd_stats) { seq.push_back(static_cast(stat.connected_components_vertices.size())); } return seq; @@ -65,7 +66,7 @@ class SequenceGenerator { double cumulative_work = 0.0; for (size_t i = 0; i < level_sets_.size(); ++i) { double level_work = 0.0; - for (const auto& vertex : level_sets_[i]) { + for (const auto &vertex : level_sets_[i]) { level_work += dag_.vertex_work_weight(vertex); } cumulative_work += level_work; @@ -74,8 +75,8 @@ class SequenceGenerator { return seq; } - const Graph_t& dag_; - const std::vector>& level_sets_; + const Graph_t &dag_; + const std::vector> &level_sets_; }; -} // end namespace osp +} // end namespace osp diff --git a/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp b/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp index 1cfc7018..2cde1ad0 100644 --- a/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp +++ b/include/osp/dag_divider/wavefront_divider/SequenceSplitter.hpp @@ -16,12 +16,12 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once -#include -#include -#include #include #include -#include // Required for std::distance and std::iterator_traits +#include // Required for std::distance and std::iterator_traits +#include +#include +#include namespace osp { @@ -32,7 +32,7 @@ enum class SplitAlgorithm { LARGEST_STEP, VARIANCE, THRESHOLD_SCAN }; * @brief Abstract base class for algorithms that split a sequence of numbers. */ class SequenceSplitter { -public: + public: virtual ~SequenceSplitter() = default; /** @@ -40,10 +40,9 @@ class SequenceSplitter { * @param seq The sequence of numbers to split. * @return A vector of indices where the sequence is split. */ - virtual std::vector split(const std::vector& seq) = 0; + virtual std::vector split(const std::vector &seq) = 0; }; - /** * @class VarianceSplitter * @brief Splits a sequence recursively based on variance reduction. @@ -51,17 +50,17 @@ class SequenceSplitter { * sub-sequences by a factor (var_mult_) and if the original variance is above a threshold. */ class VarianceSplitter : public SequenceSplitter { -public: - VarianceSplitter(double var_mult, double var_threshold, + public: + VarianceSplitter(double var_mult, + double var_threshold, size_t min_subseq_len = 1, size_t max_depth = std::numeric_limits::max()) - : var_mult_(var_mult), - var_threshold_(var_threshold), - min_subseq_len_(min_subseq_len), - max_depth_(max_depth) {} + : var_mult_(var_mult), var_threshold_(var_threshold), min_subseq_len_(min_subseq_len), max_depth_(max_depth) {} - std::vector split(const std::vector& seq) override { - if (seq.empty()) return {}; + std::vector split(const std::vector &seq) override { + if (seq.empty()) { + return {}; + } // Precompute prefix sums for the entire sequence prefix_sum_.assign(seq.size() + 1, 0.0); @@ -78,9 +77,9 @@ class VarianceSplitter : public SequenceSplitter { return splits; } -private: + private: // Compute mean & variance in [l, r) in O(1) - void compute_variance(size_t l, size_t r, double& mean, double& variance) const { + void compute_variance(size_t l, size_t r, double &mean, double &variance) const { size_t n = r - l; if (n <= 1) { mean = (n == 1) ? (prefix_sum_[r] - prefix_sum_[l]) : 0.0; @@ -93,8 +92,10 @@ class VarianceSplitter : public SequenceSplitter { variance = sq_sum / static_cast(n) - mean * mean; } - void split_recursive(size_t l, size_t r, std::vector& splits, size_t depth) { - if (depth >= max_depth_ || r - l < 2 * min_subseq_len_) return; + void split_recursive(size_t l, size_t r, std::vector &splits, size_t depth) { + if (depth >= max_depth_ || r - l < 2 * min_subseq_len_) { + return; + } double mean, variance; compute_variance(l, r, mean, variance); @@ -112,9 +113,11 @@ class VarianceSplitter : public SequenceSplitter { } } - bool compute_best_split(size_t l, size_t r, size_t& best_split, double original_variance) const { + bool compute_best_split(size_t l, size_t r, size_t &best_split, double original_variance) const { size_t n = r - l; - if (n < 2) return false; + if (n < 2) { + return false; + } double min_weighted_variance_sum = std::numeric_limits::max(); best_split = 0; @@ -124,8 +127,7 @@ class VarianceSplitter : public SequenceSplitter { compute_variance(l, i, left_mean, left_var); compute_variance(i, r, right_mean, right_var); - double weighted_sum = static_cast(i - l) * left_var + - static_cast(r - i) * right_var; + double weighted_sum = static_cast(i - l) * left_var + static_cast(r - i) * right_var; if (weighted_sum < min_weighted_variance_sum) { min_weighted_variance_sum = weighted_sum; @@ -134,8 +136,7 @@ class VarianceSplitter : public SequenceSplitter { } double total_original_variance = original_variance * static_cast(n); - return best_split > l && - min_weighted_variance_sum < var_mult_ * total_original_variance; + return best_split > l && min_weighted_variance_sum < var_mult_ * total_original_variance; } double var_mult_; @@ -146,7 +147,6 @@ class VarianceSplitter : public SequenceSplitter { std::vector prefix_sq_sum_; }; - /** * @class LargestStepSplitter * @brief Splits a monotonic sequence recursively at the point of the largest change. @@ -154,32 +154,31 @@ class VarianceSplitter : public SequenceSplitter { * exceeds a given threshold. */ class LargestStepSplitter : public SequenceSplitter { -private: + private: using ConstIterator = std::vector::const_iterator; using difference_type = typename std::iterator_traits::difference_type; -public: - LargestStepSplitter(double diff_threshold, - size_t min_subseq_len, - size_t max_depth = std::numeric_limits::max()) - : diff_threshold_(diff_threshold), - min_subseq_len_(min_subseq_len), - max_depth_(max_depth) {} + public: + LargestStepSplitter(double diff_threshold, size_t min_subseq_len, size_t max_depth = std::numeric_limits::max()) + : diff_threshold_(diff_threshold), min_subseq_len_(min_subseq_len), max_depth_(max_depth) {} - std::vector split(const std::vector& seq) override { + std::vector split(const std::vector &seq) override { std::vector splits; split_recursive(seq.begin(), seq.end(), splits, 0, 0); std::sort(splits.begin(), splits.end()); return splits; } -private: - void split_recursive(ConstIterator begin, ConstIterator end, - std::vector& splits, size_t offset, size_t current_depth) { - if (current_depth >= max_depth_) return; + private: + void split_recursive(ConstIterator begin, ConstIterator end, std::vector &splits, size_t offset, size_t current_depth) { + if (current_depth >= max_depth_) { + return; + } const difference_type size = std::distance(begin, end); - if (static_cast(size) < 2 * min_subseq_len_) return; + if (static_cast(size) < 2 * min_subseq_len_) { + return; + } double max_diff = 0.0; difference_type split_point_local = 0; @@ -197,8 +196,8 @@ class LargestStepSplitter : public SequenceSplitter { if (max_diff > diff_threshold_ && split_point_local > 0) { size_t split_point_global = static_cast(split_point_local) + offset; - if ((split_point_local >= static_cast(min_subseq_len_)) && - ((size - split_point_local) >= static_cast(min_subseq_len_))) { + if ((split_point_local >= static_cast(min_subseq_len_)) + && ((size - split_point_local) >= static_cast(min_subseq_len_))) { splits.push_back(split_point_global); ConstIterator split_it = begin + split_point_local; @@ -213,47 +212,41 @@ class LargestStepSplitter : public SequenceSplitter { size_t max_depth_; }; - /** * @class ThresholdScanSplitter * @brief Splits a sequence by scanning for significant changes or crossing an absolute threshold. * This is a non-recursive splitter that performs a single pass. */ class ThresholdScanSplitter : public SequenceSplitter { -public: - ThresholdScanSplitter(double diff_threshold, - double absolute_threshold, - size_t min_subseq_len = 1) - : diff_threshold_(diff_threshold), - absolute_threshold_(absolute_threshold), - min_subseq_len_(min_subseq_len) {} - - std::vector split(const std::vector& seq) override { + public: + ThresholdScanSplitter(double diff_threshold, double absolute_threshold, size_t min_subseq_len = 1) + : diff_threshold_(diff_threshold), absolute_threshold_(absolute_threshold), min_subseq_len_(min_subseq_len) {} + + std::vector split(const std::vector &seq) override { std::vector splits; - if (seq.size() < 2) return splits; + if (seq.size() < 2) { + return splits; + } size_t last_cut = 0; for (size_t i = 0; i < seq.size() - 1; ++i) { bool should_cut = false; double current = seq[i]; - double next = seq[i+1]; + double next = seq[i + 1]; // A split is triggered by a significant change OR by crossing the absolute threshold. - if (current > next) { // Dropping - if ((current - next) > diff_threshold_ || - (next < absolute_threshold_ && current >= absolute_threshold_)) { + if (current > next) { // Dropping + if ((current - next) > diff_threshold_ || (next < absolute_threshold_ && current >= absolute_threshold_)) { should_cut = true; } - } else if (current < next) { // Rising - if ((next - current) > diff_threshold_ || - (next > absolute_threshold_ && current <= absolute_threshold_)) { + } else if (current < next) { // Rising + if ((next - current) > diff_threshold_ || (next > absolute_threshold_ && current <= absolute_threshold_)) { should_cut = true; } } - + if (should_cut) { - if ((i + 1 - last_cut) >= min_subseq_len_ && - (seq.size() - (i + 1)) >= min_subseq_len_) { + if ((i + 1 - last_cut) >= min_subseq_len_ && (seq.size() - (i + 1)) >= min_subseq_len_) { splits.push_back(i + 1); last_cut = i + 1; } @@ -262,10 +255,10 @@ class ThresholdScanSplitter : public SequenceSplitter { return splits; } -private: + private: double diff_threshold_; double absolute_threshold_; size_t min_subseq_len_; }; -} // namespace osp +} // namespace osp diff --git a/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp b/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp index 77622b38..65f7d3e5 100644 --- a/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp +++ b/include/osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp @@ -16,8 +16,9 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once +#include // for std::reverse #include -#include // for std::reverse + #include "osp/auxiliary/datastructures/union_find.hpp" namespace osp { @@ -26,7 +27,7 @@ namespace osp { * @struct WavefrontStatistics * @brief Holds statistical data for a single wavefront. */ -template +template struct WavefrontStatistics { using VertexType = vertex_idx_t; @@ -39,12 +40,12 @@ struct WavefrontStatistics { * @class WavefrontStatisticsCollector * @brief Computes forward and backward wavefront statistics for a given DAG. */ -template +template class WavefrontStatisticsCollector { using VertexType = vertex_idx_t; using UnionFind = union_find_universe_t; -public: + public: WavefrontStatisticsCollector(const Graph_t &dag, const std::vector> &level_sets) : dag_(dag), level_sets_(level_sets) {} @@ -79,9 +80,8 @@ class WavefrontStatisticsCollector { return stats; } - -private: - void update_union_find(UnionFind& uf, size_t level_idx) const { + private: + void update_union_find(UnionFind &uf, size_t level_idx) const { // Add all vertices from the current level to the universe for (const auto vertex : level_sets_[level_idx]) { uf.add_object(vertex, dag_.vertex_work_weight(vertex), dag_.vertex_mem_weight(vertex)); @@ -101,14 +101,14 @@ class WavefrontStatisticsCollector { } } - void collect_stats_for_level(WavefrontStatistics& stats, UnionFind& uf) const { + void collect_stats_for_level(WavefrontStatistics &stats, UnionFind &uf) const { const auto components = uf.get_connected_components_weights_and_memories(); stats.connected_components_vertices.reserve(components.size()); stats.connected_components_weights.reserve(components.size()); stats.connected_components_memories.reserve(components.size()); - for (const auto& comp : components) { - auto& [vertices, weight, memory] = comp; + for (const auto &comp : components) { + auto &[vertices, weight, memory] = comp; stats.connected_components_vertices.emplace_back(vertices); stats.connected_components_weights.emplace_back(weight); stats.connected_components_memories.emplace_back(memory); @@ -119,4 +119,4 @@ class WavefrontStatisticsCollector { const std::vector> &level_sets_; }; -} // end namespace osp +} // end namespace osp diff --git a/include/osp/graph_algorithms/computational_dag_construction_util.hpp b/include/osp/graph_algorithms/computational_dag_construction_util.hpp index 553996a6..597b7dc1 100644 --- a/include/osp/graph_algorithms/computational_dag_construction_util.hpp +++ b/include/osp/graph_algorithms/computational_dag_construction_util.hpp @@ -27,15 +27,16 @@ namespace osp { * @brief Constructs a computational DAG from another graph. * * This function copies the structure and properties of a source graph into a target graph structure. - * Assumes that the vertices of the source graph are indexed from 0 to N-1. If the target graph is empty, indices are sequentially assigned starting from 0. - * If the target graph is not empty, new vertices will be added to the target graph and their indices will be sequentially assigned starting from the index N. + * Assumes that the vertices of the source graph are indexed from 0 to N-1. If the target graph is empty, indices are sequentially + * assigned starting from 0. If the target graph is not empty, new vertices will be added to the target graph and their indices + * will be sequentially assigned starting from the index N. * * @tparam Graph_from The type of the source graph. Must satisfy `is_computational_dag`. * @tparam Graph_to The type of the target graph. Must satisfy `is_constructable_cdag_vertex`. * @param from The source graph. * @param to The target graph. */ -template +template void constructComputationalDag(const Graph_from &from, Graph_to &to) { static_assert(is_computational_dag_v, "Graph_from must satisfy the computational_dag concept"); static_assert(is_constructable_cdag_vertex_v, "Graph_to must satisfy the constructable_cdag_vertex concept"); @@ -45,11 +46,13 @@ void constructComputationalDag(const Graph_from &from, Graph_to &to) { for (const auto &v_idx : from.vertices()) { if constexpr (has_typed_vertices_v and has_typed_vertices_v) { - vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx), - from.vertex_mem_weight(v_idx), from.vertex_type(v_idx))); + vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx), + from.vertex_comm_weight(v_idx), + from.vertex_mem_weight(v_idx), + from.vertex_type(v_idx))); } else { - vertex_map.push_back(to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx), - from.vertex_mem_weight(v_idx))); + vertex_map.push_back( + to.add_vertex(from.vertex_work_weight(v_idx), from.vertex_comm_weight(v_idx), from.vertex_mem_weight(v_idx))); } } @@ -66,4 +69,4 @@ void constructComputationalDag(const Graph_from &from, Graph_to &to) { } } -} // namespace osp +} // namespace osp diff --git a/include/osp/graph_algorithms/computational_dag_util.hpp b/include/osp/graph_algorithms/computational_dag_util.hpp index 5fba1c8a..3c8a339b 100644 --- a/include/osp/graph_algorithms/computational_dag_util.hpp +++ b/include/osp/graph_algorithms/computational_dag_util.hpp @@ -20,14 +20,13 @@ limitations under the License. #include -#include "osp/concepts/computational_dag_concept.hpp" #include "directed_graph_top_sort.hpp" +#include "osp/concepts/computational_dag_concept.hpp" namespace osp { -template +template v_memw_t max_memory_weight(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); @@ -39,9 +38,8 @@ v_memw_t max_memory_weight(const Graph_t &graph) { return max_memory_weight; } -template +template v_memw_t max_memory_weight(const v_type_t &nodeType_, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); static_assert(has_typed_vertices_v, "Graph_t must have typed vertices"); @@ -56,36 +54,35 @@ v_memw_t max_memory_weight(const v_type_t &nodeType_, const Gr return max_memory_weight; } -template +template v_workw_t sumOfVerticesWorkWeights(VertexIterator begin, VertexIterator end, const Graph_t &graph) { static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); - return std::accumulate(begin, end, 0, [&](const auto sum, const vertex_idx_t &v) { - return sum + graph.vertex_work_weight(v); - }); + return std::accumulate( + begin, end, 0, [&](const auto sum, const vertex_idx_t &v) { return sum + graph.vertex_work_weight(v); }); } -template +template v_workw_t sumOfVerticesWorkWeights(const Graph_t &graph) { static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); - return std::accumulate(graph.vertices().begin(), graph.vertices().end(), static_cast>(0), [&](const v_workw_t sum, const vertex_idx_t &v) { - return sum + graph.vertex_work_weight(v); - }); + return std::accumulate( + graph.vertices().begin(), + graph.vertices().end(), + static_cast>(0), + [&](const v_workw_t sum, const vertex_idx_t &v) { return sum + graph.vertex_work_weight(v); }); } -template -v_workw_t sumOfVerticesWorkWeights(const std::initializer_list> vertices_, - const Graph_t &graph) { +template +v_workw_t sumOfVerticesWorkWeights(const std::initializer_list> vertices_, const Graph_t &graph) { return sumOfVerticesWorkWeights(vertices_.begin(), vertices_.end(), graph); } -template +template v_commw_t sumOfVerticesCommunicationWeights(VertexIterator begin, VertexIterator end, const Graph_t &graph) { static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); - return std::accumulate(begin, end, 0, [&](const auto sum, const vertex_idx_t &v) { - return sum + graph.vertex_comm_weight(v); - }); + return std::accumulate( + begin, end, 0, [&](const auto sum, const vertex_idx_t &v) { return sum + graph.vertex_comm_weight(v); }); } /** @@ -94,12 +91,12 @@ v_commw_t sumOfVerticesCommunicationWeights(VertexIterator begin, Verte * @tparam Instance_t The type of the instance object (e.g., BspInstance) used for compatibility checks. * @tparam VertexIterator An iterator over vertex indices of the subgraph. */ -template -v_workw_t sumOfCompatibleWorkWeights(VertexIterator begin, VertexIterator end, const SubGraph_t &graph, - const Instance_t& main_instance, unsigned processorType) { +template +v_workw_t sumOfCompatibleWorkWeights( + VertexIterator begin, VertexIterator end, const SubGraph_t &graph, const Instance_t &main_instance, unsigned processorType) { static_assert(has_vertex_weights_v, "SubGraph_t must have vertex weights"); - return std::accumulate(begin, end, static_cast>(0), - [&](const v_workw_t sum, const vertex_idx_t &v) { + return std::accumulate( + begin, end, static_cast>(0), [&](const v_workw_t sum, const vertex_idx_t &v) { if (main_instance.isCompatibleType(graph.vertex_type(v), processorType)) { return sum + graph.vertex_work_weight(v); } @@ -110,44 +107,42 @@ v_workw_t sumOfCompatibleWorkWeights(VertexIterator begin, VertexIte /** * @brief Overload to calculate compatible work weight for all vertices in a graph. */ -template -v_workw_t sumOfCompatibleWorkWeights(const SubGraph_t &graph, const Instance_t& main_instance, - unsigned processorType) { +template +v_workw_t sumOfCompatibleWorkWeights(const SubGraph_t &graph, const Instance_t &main_instance, unsigned processorType) { return sumOfCompatibleWorkWeights(graph.vertices().begin(), graph.vertices().end(), graph, main_instance, processorType); } -template +template v_commw_t sumOfVerticesCommunicationWeights(const Graph_t &graph) { static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); - return std::accumulate(graph.vertices().begin(), graph.vertices().end(), static_cast>(0), [&](const v_commw_t sum, const vertex_idx_t &v) { - return sum + graph.vertex_comm_weight(v); - }); + return std::accumulate( + graph.vertices().begin(), + graph.vertices().end(), + static_cast>(0), + [&](const v_commw_t sum, const vertex_idx_t &v) { return sum + graph.vertex_comm_weight(v); }); } -template +template v_commw_t sumOfVerticesCommunicationWeights(const std::initializer_list> &vertices_, const Graph_t &graph) { return sumOfVerticesCommunicationWeights(vertices_.begin(), vertices_.end(), graph); } -template +template e_commw_t sumOfEdgesCommunicationWeights(EdgeIterator begin, EdgeIterator end, const Graph_t &graph) { - static_assert(has_edge_weights_v, "Graph_t must have edge weights"); return std::accumulate( begin, end, 0, [&](const auto sum, const edge_desc_t &e) { return sum + graph.edge_comm_weight(e); }); } -template -e_commw_t sumOfEdgesCommunicationWeights(const std::initializer_list> &edges_, - const Graph_t &graph) { +template +e_commw_t sumOfEdgesCommunicationWeights(const std::initializer_list> &edges_, const Graph_t &graph) { return sumOfEdgesCommunicationWeights(edges_.begin(), edges_.end(), graph); } -template +template v_workw_t critical_path_weight(const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph concept"); static_assert(has_vertex_weights_v, "Graph_t must have vertex weights"); @@ -160,7 +155,6 @@ v_workw_t critical_path_weight(const Graph_t &graph) { // calculating lenght of longest path for (const auto &node : GetTopOrder(graph)) { - v_workw_t max_temp = 0; for (const auto &parent : graph.parents(node)) { max_temp = std::max(max_temp, top_length[parent]); @@ -169,7 +163,6 @@ v_workw_t critical_path_weight(const Graph_t &graph) { top_length[node] = max_temp + graph.vertex_work_weight(node); if (top_length[node] > critical_path_weight) { - critical_path_weight = top_length[node]; } } @@ -177,4 +170,4 @@ v_workw_t critical_path_weight(const Graph_t &graph) { return critical_path_weight; } -} // namespace osp +} // namespace osp diff --git a/include/osp/graph_algorithms/cuthill_mckee.hpp b/include/osp/graph_algorithms/cuthill_mckee.hpp index f99fc19a..848330d4 100644 --- a/include/osp/graph_algorithms/cuthill_mckee.hpp +++ b/include/osp/graph_algorithms/cuthill_mckee.hpp @@ -23,13 +23,13 @@ limitations under the License. #include #include "osp/concepts/computational_dag_concept.hpp" -#include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/graph_algorithms/directed_graph_path_util.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" +#include "osp/graph_algorithms/directed_graph_util.hpp" namespace osp { -template +template struct cm_vertex { using VertexType = vertex_idx_t; VertexType vertex; @@ -39,19 +39,18 @@ struct cm_vertex { VertexType degree; cm_vertex() : vertex(0), parent_position(0), degree(0) {} + cm_vertex(VertexType vertex_, VertexType degree_, VertexType parent_position_) : vertex(vertex_), parent_position(parent_position_), degree(degree_) {} bool operator<(cm_vertex const &rhs) const { - return (parent_position < rhs.parent_position) || - (parent_position == rhs.parent_position and degree < rhs.degree) || - (parent_position == rhs.parent_position and degree == rhs.degree and vertex < rhs.vertex); + return (parent_position < rhs.parent_position) || (parent_position == rhs.parent_position and degree < rhs.degree) + || (parent_position == rhs.parent_position and degree == rhs.degree and vertex < rhs.vertex); } }; -template +template std::vector> cuthill_mckee_wavefront(const Graph_t &dag, bool permutation = false) { - using VertexType = vertex_idx_t; using cm_vertex = cm_vertex; @@ -67,29 +66,25 @@ std::vector> cuthill_mckee_wavefront(const Graph_t &dag, b std::vector new_wavefront; VertexType node_counter = 0; while (node_counter < dag.num_vertices()) { - new_wavefront.clear(); std::sort(current_wavefront.begin(), current_wavefront.end()); if (permutation) { for (VertexType i = 0; i < static_cast(current_wavefront.size()); i++) { - result[current_wavefront[i].vertex] = node_counter + i; } } else { for (size_t i = 0; i < current_wavefront.size(); i++) { - result[node_counter + i] = current_wavefront[i].vertex; } } - if (node_counter + static_cast(current_wavefront.size()) == dag.num_vertices()) + if (node_counter + static_cast(current_wavefront.size()) == dag.num_vertices()) { break; + } for (VertexType i = 0; i < static_cast(current_wavefront.size()); i++) { - for (const auto &child : dag.children(current_wavefront[i].vertex)) { - predecessors_count[child]++; predecessors_position[child] = std::min(predecessors_position[child], node_counter + i); @@ -107,10 +102,8 @@ std::vector> cuthill_mckee_wavefront(const Graph_t &dag, b return result; } - -template +template std::vector> cuthill_mckee_undirected(const Graph_t &dag, bool start_at_sink, bool perm = false) { - using VertexType = vertex_idx_t; using cm_vertex = cm_vertex; @@ -125,7 +118,6 @@ std::vector> cuthill_mckee_undirected(const Graph_t &dag, const std::vector top_node_distance = get_top_node_distance(dag); for (const auto &i : dag.vertices()) { if (is_sink(i, dag)) { - max_node_distances[i] = top_node_distance[i]; if (top_node_distance[i] > max_distance) { @@ -139,7 +131,6 @@ std::vector> cuthill_mckee_undirected(const Graph_t &dag, const std::vector bottom_node_distance = get_bottom_node_distance(dag); for (const auto &i : dag.vertices()) { if (is_source(i, dag)) { - max_node_distances[i] = bottom_node_distance[i]; if (bottom_node_distance[i] > max_distance) { @@ -174,7 +165,6 @@ std::vector> cuthill_mckee_undirected(const Graph_t &dag, VertexType node_counter = 1; while (node_counter < dag.num_vertices()) { - std::sort(current_level.begin(), current_level.end()); if (perm) { @@ -191,14 +181,11 @@ std::vector> cuthill_mckee_undirected(const Graph_t &dag, break; } - std::unordered_map node_priority; + std::unordered_map node_priority; for (VertexType i = 0; i < current_level.size(); i++) { - for (const auto &child : dag.children(current_level[i].vertex)) { - if (visited.find(child) == visited.end()) { - if (node_priority.find(child) == node_priority.end()) { node_priority[child] = node_counter + i; } else { @@ -208,7 +195,6 @@ std::vector> cuthill_mckee_undirected(const Graph_t &dag, } for (const auto &parent : dag.parents(current_level[i].vertex)) { - if (visited.find(parent) == visited.end()) { if (node_priority.find(parent) == node_priority.end()) { node_priority[parent] = node_counter + i; @@ -221,11 +207,10 @@ std::vector> cuthill_mckee_undirected(const Graph_t &dag, node_counter += current_level.size(); - if (node_priority.empty()) { // the dag has more than one connected components + if (node_priority.empty()) { // the dag has more than one connected components unsigned max_distance = 0; for (const auto [node, distance] : max_node_distances) { - if (visited.find(node) == visited.end() and distance > max_distance) { max_distance = distance; first_node = node; @@ -243,30 +228,23 @@ std::vector> cuthill_mckee_undirected(const Graph_t &dag, current_level.reserve(dag.in_degree(first_node) + dag.out_degree(first_node)); for (const auto &child : dag.children(first_node)) { - - current_level.push_back( - cm_vertex(child, dag.in_degree(child) + dag.out_degree(child), node_counter)); + current_level.push_back(cm_vertex(child, dag.in_degree(child) + dag.out_degree(child), node_counter)); visited.insert(child); } for (const auto &parent : dag.parents(first_node)) { - - current_level.push_back( - cm_vertex(parent, dag.in_degree(parent) + dag.out_degree(parent), node_counter)); + current_level.push_back(cm_vertex(parent, dag.in_degree(parent) + dag.out_degree(parent), node_counter)); visited.insert(parent); } node_counter++; } else { - current_level.clear(); current_level.reserve(node_priority.size()); for (const auto &[node, priority] : node_priority) { - - current_level.push_back( - cm_vertex(node, dag.in_degree(node) + dag.out_degree(node), priority)); + current_level.push_back(cm_vertex(node, dag.in_degree(node) + dag.out_degree(node), priority)); visited.insert(node); } } @@ -276,7 +254,7 @@ std::vector> cuthill_mckee_undirected(const Graph_t &dag, } // Cuthill-McKee Wavefront -template +template inline std::vector> GetTopOrderCuthillMcKeeWavefront(const Graph_t &dag) { std::vector> order; if (dag.num_vertices() > 0) { @@ -290,7 +268,7 @@ inline std::vector> GetTopOrderCuthillMcKeeWavefront(const } // Cuthill-McKee Undirected -template +template inline std::vector> GetTopOrderCuthillMcKeeUndirected(const Graph_t &dag) { std::vector> order; if (dag.num_vertices() > 0) { @@ -303,5 +281,4 @@ inline std::vector> GetTopOrderCuthillMcKeeUndirected(cons return order; } - -} // namespace osp +} // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp b/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp index c8eb6975..845cc27d 100644 --- a/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp +++ b/include/osp/graph_algorithms/directed_graph_coarsen_util.hpp @@ -25,17 +25,16 @@ limitations under the License. #include #include -#include "osp/auxiliary/Balanced_Coin_Flips.hpp" -#include "osp/concepts/directed_graph_concept.hpp" #include "directed_graph_top_sort.hpp" #include "directed_graph_util.hpp" +#include "osp/auxiliary/Balanced_Coin_Flips.hpp" +#include "osp/concepts/directed_graph_concept.hpp" namespace osp { - - -template -std::vector> get_contractable_edges_from_poset_int_map(const std::vector &poset_int_map, const Graph_t &graph) { +template +std::vector> get_contractable_edges_from_poset_int_map(const std::vector &poset_int_map, + const Graph_t &graph) { static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph_edge_desc concept"); std::vector> output; @@ -43,13 +42,13 @@ std::vector> get_contractable_edges_from_poset_int_map(cons for (const auto &edge : edges(graph)) { vertex_idx_t src = source(edge, graph); vertex_idx_t tgt = target(edge, graph); - + if (poset_int_map[tgt] == poset_int_map[src] + 1) { output.emplace_back(edge); } } - + return output; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp b/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp index 286fcc59..ff0ff192 100644 --- a/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp +++ b/include/osp/graph_algorithms/directed_graph_edge_desc_util.hpp @@ -22,14 +22,14 @@ limitations under the License. #include #include -#include "osp/concepts/directed_graph_concept.hpp" +#include "osp/concepts/directed_graph_edge_desc_concept.hpp" namespace osp { -template -std::pair, bool> edge_desc(const vertex_idx_t &src, const vertex_idx_t &dest, +template +std::pair, bool> edge_desc(const vertex_idx_t &src, + const vertex_idx_t &dest, const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph edge desc concept"); for (const auto &edge : out_edges(src, graph)) { @@ -40,16 +40,14 @@ std::pair, bool> edge_desc(const vertex_idx_t &src return {edge_desc_t(), false}; } -template +template std::unordered_set> long_edges_in_triangles(const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph edge desc concept"); static_assert(has_hashable_edge_desc_v, "Graph_t must satisfy the has_hashable_edge_desc concept"); std::unordered_set> long_edges; for (const auto &vertex : graph.vertices()) { - std::unordered_set> children_set; for (const auto &v : graph.children(vertex)) { @@ -57,11 +55,9 @@ std::unordered_set> long_edges_in_triangles(const Graph_t & } for (const auto &edge : out_edges(vertex, graph)) { - const auto &child = target(edge, graph); for (const auto &parent : graph.parents(child)) { - if (children_set.find(parent) != children_set.cend()) { long_edges.emplace(edge); break; @@ -73,4 +69,4 @@ std::unordered_set> long_edges_in_triangles(const Graph_t & return long_edges; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp b/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp index b05e54c4..af73869b 100644 --- a/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp +++ b/include/osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp @@ -19,18 +19,18 @@ limitations under the License. #pragma once #include + #include #include #include -#include "osp/concepts/directed_graph_concept.hpp" #include "directed_graph_edge_desc_util.hpp" +#include "osp/concepts/directed_graph_concept.hpp" namespace osp { -template +template std::unordered_set> long_edges_in_triangles_parallel(const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph edge desc concept"); static_assert(has_hashable_edge_desc_v, "Graph_t must satisfy the has_hashable_edge_desc concept"); @@ -43,7 +43,7 @@ std::unordered_set> long_edges_in_triangles_parallel(const #pragma omp parallel for schedule(dynamic, 4) for (vertex_idx_t vertex = 0; vertex < graph.num_vertices(); ++vertex) { - // for (const auto &vertex : graph.vertices()) { + // for (const auto &vertex : graph.vertices()) { const unsigned int proc = static_cast(omp_get_thread_num()); @@ -53,11 +53,9 @@ std::unordered_set> long_edges_in_triangles_parallel(const } for (const auto &edge : out_edges(vertex, graph)) { - const auto &child = target(edge, graph); for (const auto &parent : graph.parents(child)) { - if (children_set.find(parent) != children_set.cend()) { deleted_edges_thread[proc].emplace_back(edge); break; @@ -75,4 +73,4 @@ std::unordered_set> long_edges_in_triangles_parallel(const return long_edges; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_edge_view.hpp b/include/osp/graph_algorithms/directed_graph_edge_view.hpp index cf2829a5..e66cfa71 100644 --- a/include/osp/graph_algorithms/directed_graph_edge_view.hpp +++ b/include/osp/graph_algorithms/directed_graph_edge_view.hpp @@ -17,9 +17,10 @@ limitations under the License. */ #pragma once -#include "osp/concepts/directed_graph_concept.hpp" #include +#include "osp/concepts/directed_graph_concept.hpp" + namespace osp { /** @@ -31,14 +32,14 @@ namespace osp { * * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph_v` concept. */ -template +template class edge_view { private: static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph_; - template + template class DirectedEdgeIterator { public: using iterator_category = std::forward_iterator_tag; @@ -49,14 +50,15 @@ class edge_view { struct arrow_proxy { value_type value; + const value_type *operator->() const noexcept { return &value; } }; private: - const Graph_t *graph_; // Pointer to the graph - vertex_idx_t currentVertex_; // Current source vertex - child_iterator_t currentChild_; // Iterator to the current target vertex in current_vertex's adjacency list - vertex_idx_t currentEdgeIdx_; // Global index of the current edge in the traversal order + const Graph_t *graph_; // Pointer to the graph + vertex_idx_t currentVertex_; // Current source vertex + child_iterator_t currentChild_; // Iterator to the current target vertex in current_vertex's adjacency list + vertex_idx_t currentEdgeIdx_; // Global index of the current edge in the traversal order void advanceToValid() { while (currentVertex_ != graph_->num_vertices()) { @@ -83,7 +85,6 @@ class edge_view { DirectedEdgeIterator(const vertex_idx_t edge_idx, const Graph_t &graph) : graph_(&graph), currentVertex_(0), currentEdgeIdx_(edge_idx) { - if (currentEdgeIdx_ >= graph_->num_edges()) { currentEdgeIdx_ = graph_->num_edges(); currentVertex_ = graph_->num_vertices(); @@ -110,6 +111,7 @@ class edge_view { } [[nodiscard]] value_type operator*() const { return {currentVertex_, *currentChild_}; } + [[nodiscard]] arrow_proxy operator->() const { return {operator*()}; } DirectedEdgeIterator &operator++() { @@ -137,16 +139,19 @@ class edge_view { }; public: - using DirEdgeIterator = DirectedEdgeIterator().children(std::declval>()).begin())>; + using DirEdgeIterator + = DirectedEdgeIterator().children(std::declval>()).begin())>; using iterator = DirEdgeIterator; using constIterator = DirEdgeIterator; explicit edge_view(const Graph_t &graph) : graph_(graph) {} [[nodiscard]] auto begin() const { return DirEdgeIterator(graph_); } + [[nodiscard]] auto cbegin() const { return DirEdgeIterator(graph_); } [[nodiscard]] auto end() const { return DirEdgeIterator(graph_.num_edges(), graph_); } + [[nodiscard]] auto cend() const { return DirEdgeIterator(graph_.num_edges(), graph_); } [[nodiscard]] auto size() const { return graph_.num_edges(); } @@ -163,7 +168,7 @@ class edge_view { * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph_v` concept. * @tparam IsOutgoing If true, iterates over outgoing edges; otherwise, incoming edges. */ -template +template class IncidentEdgeView { private: static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); @@ -171,7 +176,7 @@ class IncidentEdgeView { const Graph_t &graph_; vertex_idx_t anchorVertex_; - template + template class IncidentEdgeIterator { public: using iterator_category = typename std::iterator_traits::iterator_category; @@ -182,6 +187,7 @@ class IncidentEdgeView { struct arrow_proxy { value_type value; + const value_type *operator->() const noexcept { return &value; } }; @@ -191,6 +197,7 @@ class IncidentEdgeView { public: IncidentEdgeIterator() = default; + IncidentEdgeIterator(vertex_idx_t u, child_iterator_t it) : anchorVertex_(u), currentIt_(it) {} [[nodiscard]] value_type operator*() const { @@ -200,6 +207,7 @@ class IncidentEdgeView { return {*currentIt_, anchorVertex_}; } } + [[nodiscard]] arrow_proxy operator->() const { return {operator*()}; } IncidentEdgeIterator &operator++() { @@ -224,17 +232,16 @@ class IncidentEdgeView { return temp; } - [[nodiscard]] bool operator==(const IncidentEdgeIterator &other) const noexcept { - return currentIt_ == other.currentIt_; - } + [[nodiscard]] bool operator==(const IncidentEdgeIterator &other) const noexcept { return currentIt_ == other.currentIt_; } [[nodiscard]] bool operator!=(const IncidentEdgeIterator &other) const noexcept { return !(*this == other); } }; // Helper to deduce iterator type based on direction - using base_iterator_type = - std::conditional_t().children(std::declval>()).begin()), - decltype(std::declval().parents(std::declval>()).begin())>; + using base_iterator_type + = std::conditional_t().children(std::declval>()).begin()), + decltype(std::declval().parents(std::declval>()).begin())>; public: using iterator = IncidentEdgeIterator; @@ -249,6 +256,7 @@ class IncidentEdgeView { return iterator(anchorVertex_, graph_.parents(anchorVertex_).begin()); } } + [[nodiscard]] auto cbegin() const { return begin(); } [[nodiscard]] auto end() const { @@ -258,6 +266,7 @@ class IncidentEdgeView { return iterator(anchorVertex_, graph_.parents(anchorVertex_).end()); } } + [[nodiscard]] auto cend() const { return end(); } [[nodiscard]] auto size() const { @@ -267,6 +276,7 @@ class IncidentEdgeView { return graph_.in_degree(anchorVertex_); } } + [[nodiscard]] bool empty() const { if constexpr (IsOutgoing) { return graph_.out_degree(anchorVertex_) == 0; @@ -279,13 +289,13 @@ class IncidentEdgeView { /** * @brief A view over the outgoing edges of a specific vertex in a directed graph. */ -template +template using OutEdgeView = IncidentEdgeView; /** * @brief A view over the incoming edges of a specific vertex in a directed graph. */ -template +template using InEdgeView = IncidentEdgeView; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_path_util.hpp b/include/osp/graph_algorithms/directed_graph_path_util.hpp index ee6e972d..a1675964 100644 --- a/include/osp/graph_algorithms/directed_graph_path_util.hpp +++ b/include/osp/graph_algorithms/directed_graph_path_util.hpp @@ -26,12 +26,13 @@ limitations under the License. #include #include -#include "osp/auxiliary/Balanced_Coin_Flips.hpp" -#include "osp/concepts/directed_graph_edge_desc_concept.hpp" #include "directed_graph_top_sort.hpp" #include "directed_graph_util.hpp" +#include "osp/auxiliary/Balanced_Coin_Flips.hpp" +#include "osp/concepts/directed_graph_edge_desc_concept.hpp" namespace osp { + /** * @brief Checks if a path exists between two vertices in a directed graph. * @@ -44,9 +45,8 @@ namespace osp { * @param graph The graph to search in. * @return true if a path exists from src to dest, false otherwise. */ -template +template bool has_path(const vertex_idx_t src, const vertex_idx_t dest, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); for (const auto &child : bfs_view(graph, src)) { @@ -58,9 +58,8 @@ bool has_path(const vertex_idx_t src, const vertex_idx_t dest, return false; } -template +template std::size_t longestPath(const std::set> &vertices, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; @@ -71,9 +70,11 @@ std::size_t longestPath(const std::set> &vertices, const G // Find source nodes for (const VertexType &node : vertices) { unsigned indeg = 0; - for (const VertexType &parent : graph.parents(node)) - if (vertices.count(parent) == 1) + for (const VertexType &parent : graph.parents(node)) { + if (vertices.count(parent) == 1) { ++indeg; + } + } if (indeg == 0) { bfs_queue.push(node); @@ -89,8 +90,9 @@ std::size_t longestPath(const std::set> &vertices, const G bfs_queue.pop(); for (const VertexType &child : graph.children(current)) { - if (vertices.count(child) == 0) + if (vertices.count(child) == 0) { continue; + } ++visit_counter[child]; if (visit_counter[child] == in_degrees[child]) { @@ -100,13 +102,13 @@ std::size_t longestPath(const std::set> &vertices, const G } } - return std::accumulate(vertices.cbegin(), vertices.cend(), 0u, - [&](const std::size_t mx, const VertexType &node) { return std::max(mx, distances[node]); }); + return std::accumulate(vertices.cbegin(), vertices.cend(), 0u, [&](const std::size_t mx, const VertexType &node) { + return std::max(mx, distances[node]); + }); } -template +template std::size_t longestPath(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; @@ -126,7 +128,6 @@ std::size_t longestPath(const Graph_t &graph) { bfs_queue.pop(); for (const VertexType &child : graph.children(current)) { - ++visit_counter[child]; if (visit_counter[child] == graph.in_degree(child)) { bfs_queue.push(child); @@ -139,9 +140,8 @@ std::size_t longestPath(const Graph_t &graph) { return max_edgecount; } -template +template std::vector> longestChain(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; @@ -159,7 +159,6 @@ std::vector> longestChain(const Graph_t &graph) { // calculating lenght of longest path for (const VertexType &node : top_sort_view(graph)) { - unsigned max_temp = 0; for (const auto &parent : graph.parents(node)) { max_temp = std::max(max_temp, top_length[parent]); @@ -175,7 +174,6 @@ std::vector> longestChain(const Graph_t &graph) { // reconstructing longest path chain.push_back(end_longest_chain); while (graph.in_degree(end_longest_chain) != 0) { - for (const VertexType &in_node : graph.parents(end_longest_chain)) { if (top_length[in_node] != top_length[end_longest_chain] - 1) { continue; @@ -191,7 +189,7 @@ std::vector> longestChain(const Graph_t &graph) { return chain; } -template +template std::vector get_bottom_node_distance(const Graph_t &graph) { static_assert(std::is_integral_v, "T must be of integral type"); @@ -201,7 +199,6 @@ std::vector get_bottom_node_distance(const Graph_t &graph) { const auto top_order = GetTopOrder(graph); for (std::size_t i = top_order.size() - 1; i < top_order.size(); i--) { - T max_temp = 0; for (const auto &j : graph.children(top_order[i])) { max_temp = std::max(max_temp, bottom_distance[j]); @@ -211,7 +208,7 @@ std::vector get_bottom_node_distance(const Graph_t &graph) { return bottom_distance; } -template +template std::vector get_top_node_distance(const Graph_t &graph) { static_assert(std::is_integral_v, "T must be of integral type"); @@ -229,9 +226,8 @@ std::vector get_top_node_distance(const Graph_t &graph) { return top_distance; } -template +template std::vector>> compute_wavefronts(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); std::vector>> wavefronts; @@ -239,7 +235,6 @@ std::vector>> compute_wavefronts(const Graph_t wavefronts.push_back(std::vector>()); for (const auto &vertex : graph.vertices()) { - if (graph.in_degree(vertex) == 0) { wavefronts.back().push_back(vertex); } else { @@ -250,12 +245,9 @@ std::vector>> compute_wavefronts(const Graph_t vertex_idx_t counter = static_cast>(wavefronts.back().size()); while (counter < graph.num_vertices()) { - std::vector> next_wavefront; for (const auto &v_prev_wavefront : wavefronts.back()) { - for (const auto &child : graph.children(v_prev_wavefront)) { - parents_visited[child]--; if (parents_visited[child] == 0) { next_wavefront.push_back(child); @@ -270,9 +262,8 @@ std::vector>> compute_wavefronts(const Graph_t return wavefronts; } -template +template std::vector get_strict_poset_integer_map(unsigned const noise, double const poisson_param, const Graph_t &graph) { - static_assert(is_directed_graph_edge_desc_v, "Graph_t must satisfy the directed_graph_edge_desc concept"); if (noise > static_cast(std::numeric_limits::max())) { @@ -307,32 +298,27 @@ std::vector get_strict_poset_integer_map(unsigned const noise, double const } for (const auto &source : source_vertices_view(graph)) { - - if (max_path - bot_distance[source] + 1U + 2U * noise > - static_cast(std::numeric_limits::max())) { + if (max_path - bot_distance[source] + 1U + 2U * noise > static_cast(std::numeric_limits::max())) { throw std::overflow_error("Overflow in get_strict_poset_integer_map"); } - new_top[source] = - randInt(static_cast(max_path - bot_distance[source] + 1 + 2 * noise)) - static_cast(noise); + new_top[source] = randInt(static_cast(max_path - bot_distance[source] + 1 + 2 * noise)) - static_cast(noise); } for (const auto &sink : sink_vertices_view(graph)) { if (max_path - top_distance[sink] + 1U + 2U * noise > static_cast(std::numeric_limits::max())) { throw std::overflow_error("Overflow in get_strict_poset_integer_map"); } - new_bot[sink] = - randInt(static_cast(max_path - top_distance[sink] + 1U + 2U * noise)) - static_cast(noise); + new_bot[sink] = randInt(static_cast(max_path - top_distance[sink] + 1U + 2U * noise)) - static_cast(noise); } for (const auto &vertex : top_order) { - - if (is_source(vertex, graph)) + if (is_source(vertex, graph)) { continue; + } int max_temp = std::numeric_limits::min(); for (const auto &edge : in_edges(vertex, graph)) { - int temp = new_top[source(edge, graph)]; if (up_or_down.at(edge)) { if (poisson_param <= 0.0) { @@ -347,9 +333,9 @@ std::vector get_strict_poset_integer_map(unsigned const noise, double const } for (std::reverse_iterator iter = top_order.crbegin(); iter != top_order.crend(); ++iter) { - - if (is_sink(*iter, graph)) + if (is_sink(*iter, graph)) { continue; + } int max_temp = std::numeric_limits::min(); @@ -370,4 +356,4 @@ std::vector get_strict_poset_integer_map(unsigned const noise, double const return output; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_top_sort.hpp b/include/osp/graph_algorithms/directed_graph_top_sort.hpp index 492bc37e..f8d24124 100644 --- a/include/osp/graph_algorithms/directed_graph_top_sort.hpp +++ b/include/osp/graph_algorithms/directed_graph_top_sort.hpp @@ -23,10 +23,10 @@ limitations under the License. #include #include +#include "directed_graph_util.hpp" #include "osp/auxiliary/math/math_helper.hpp" #include "osp/auxiliary/misc.hpp" #include "osp/concepts/directed_graph_concept.hpp" -#include "directed_graph_util.hpp" /** * @file directed_graph_top_sort.hpp @@ -55,9 +55,8 @@ namespace osp { * @param graph The graph to check. * @return true if the vertices are in topological order, false otherwise. */ -template +template bool checkNodesInTopologicalOrder(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); for (const auto &node : graph.vertices()) { @@ -71,17 +70,14 @@ bool checkNodesInTopologicalOrder(const Graph_t &graph) { return true; } -template +template std::vector> GetTopOrder(const Graph_t &graph) { - if constexpr (has_vertices_in_top_order_v) { - std::vector> topOrd(graph.num_vertices()); std::iota(topOrd.begin(), topOrd.end(), static_cast>(0)); return topOrd; } else { - using VertexType = vertex_idx_t; std::vector predecessors_count(graph.num_vertices(), 0); @@ -91,8 +87,9 @@ std::vector> GetTopOrder(const Graph_t &graph) { std::queue next; // Find source nodes - for (const VertexType &v : source_vertices_view(graph)) + for (const VertexType &v : source_vertices_view(graph)) { next.push(v); + } // Execute BFS while (!next.empty()) { @@ -102,31 +99,30 @@ std::vector> GetTopOrder(const Graph_t &graph) { for (const VertexType ¤t : graph.children(node)) { ++predecessors_count[current]; - if (predecessors_count[current] == graph.in_degree(current)) + if (predecessors_count[current] == graph.in_degree(current)) { next.push(current); + } } } - if (static_cast(TopOrder.size()) != graph.num_vertices()) - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + - std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + - "]"); + if (static_cast(TopOrder.size()) != graph.num_vertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + } return TopOrder; } } -template +template std::vector> GetTopOrderReverse(const Graph_t &graph) { - std::vector> TopOrder = GetTopOrder(graph); std::reverse(TopOrder.begin(), TopOrder.end()); return TopOrder; } -template +template std::vector> GetTopOrderGorder(const Graph_t &graph) { - // Generating modified Gorder topological order cf. "Speedup Graph Processing by Graph Ordering" by Hao Wei, Jeffrey // Xu Yu, Can Lu, and Xuemin Lin @@ -143,10 +139,9 @@ std::vector> GetTopOrderGorder(const Graph_t &graph) { std::vector priorities(graph.num_vertices(), 0.0); auto v_cmp = [&priorities, &graph](const VertexType &lhs, const VertexType &rhs) { - return (priorities[lhs] < priorities[rhs]) || - ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) < graph.out_degree(rhs))) || - ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) == graph.out_degree(rhs)) && - (lhs > rhs)); + return (priorities[lhs] < priorities[rhs]) + || ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) < graph.out_degree(rhs))) + || ((priorities[lhs] <= priorities[rhs]) && (graph.out_degree(lhs) == graph.out_degree(rhs)) && (lhs > rhs)); }; std::priority_queue, decltype(v_cmp)> ready_q(v_cmp); @@ -187,22 +182,24 @@ std::vector> GetTopOrderGorder(const Graph_t &graph) { } } - if (TopOrder.size() != graph.num_vertices()) - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + - std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (TopOrder.size() != graph.num_vertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + } return TopOrder; } -template +template std::vector> GetFilteredTopOrder(const std::vector &valid, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); std::vector> filteredOrder; - for (const auto &node : GetTopOrder(graph)) - if (valid[node]) + for (const auto &node : GetTopOrder(graph)) { + if (valid[node]) { filteredOrder.push_back(node); + } + } return filteredOrder; } @@ -216,26 +213,27 @@ std::vector> GetFilteredTopOrder(const std::vector & * @tparam T The type of the container wrapper. * @tparam Graph_t The type of the graph. */ -template +template struct is_container_wrapper { private: - template + template static auto test(int) -> decltype(std::declval().push(std::declval>()), - std::declval().pop_next(), std::declval().empty(), std::true_type()); + std::declval().pop_next(), + std::declval().empty(), + std::true_type()); - template + template static std::false_type test(...); public: static constexpr bool value = decltype(test(0))::value; }; -template +template inline constexpr bool is_container_wrapper_v = is_container_wrapper::value; -template +template struct top_sort_iterator { - static_assert(is_container_wrapper_v, "container_wrapper must satisfy the container wrapper concept"); @@ -255,7 +253,6 @@ struct top_sort_iterator { top_sort_iterator(const Graph_t &graph_, container_wrapper &next_, vertex_idx_t start) : graph(graph_), next(next_), current_vertex(start), predecessors_count(graph_.num_vertices(), 0) { - if (current_vertex == graph.num_vertices()) { return; } @@ -264,7 +261,7 @@ struct top_sort_iterator { if (is_source(v, graph)) { next.push(v); } else { - predecessors_count[v] = static_cast>( graph.in_degree(v) ); + predecessors_count[v] = static_cast>(graph.in_degree(v)); } } current_vertex = next.pop_next(); @@ -281,7 +278,6 @@ struct top_sort_iterator { // Prefix increment top_sort_iterator &operator++() { - if (next.empty()) { current_vertex = graph.num_vertices(); return *this; @@ -308,6 +304,7 @@ struct top_sort_iterator { friend bool operator==(const top_sort_iterator &one, const top_sort_iterator &other) { return one.current_vertex == other.current_vertex; }; + friend bool operator!=(const top_sort_iterator &one, const top_sort_iterator &other) { return one.current_vertex != other.current_vertex; }; @@ -325,9 +322,8 @@ struct top_sort_iterator { * @tparam Graph_t The type of the directed graph. Must satisfy the `is_directed_graph` concept. * */ -template +template class top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -365,9 +361,8 @@ class top_sort_view { * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph` concept. * */ -template +template class dfs_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -393,9 +388,8 @@ class dfs_top_sort_view { * @tparam Graph_t The type of the graph, which must satisfy the `is_directed_graph` concept. * */ -template +template class bfs_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -411,9 +405,8 @@ class bfs_top_sort_view { auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } }; -template +template std::vector> bfs_top_sort(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); std::vector> top_sort; @@ -423,9 +416,8 @@ std::vector> bfs_top_sort(const Graph_t &graph) { return top_sort; } -template +template std::vector> dfs_top_sort(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); std::vector> top_sort; @@ -435,18 +427,17 @@ std::vector> dfs_top_sort(const Graph_t &graph) { return top_sort; } -template +template struct priority_queue_wrapper { - priority_eval_f prio_f; struct heap_node { - vertex_idx_t node; T priority; heap_node() : node(0), priority(0) {} + heap_node(vertex_idx_t n, T p) : node(n), priority(p) {} bool operator<(heap_node const &rhs) const { @@ -457,7 +448,7 @@ struct priority_queue_wrapper { std::vector heap; public: - template + template priority_queue_wrapper(Args &&...args) : prio_f(std::forward(args)...) {} void push(const vertex_idx_t &v) { @@ -475,9 +466,8 @@ struct priority_queue_wrapper { bool empty() const { return heap.empty(); } }; -template +template class priority_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -487,7 +477,7 @@ class priority_top_sort_view { using ts_iterator = top_sort_iterator; public: - template + template priority_top_sort_view(const Graph_t &graph_, Args &&...args) : graph(graph_), vertex_container(std::forward(args)...) {} @@ -496,9 +486,8 @@ class priority_top_sort_view { auto end() const { return ts_iterator(graph, vertex_container, graph.num_vertices()); } }; -template +template class locality_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -519,9 +508,8 @@ class locality_top_sort_view { auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } }; -template +template std::vector> GetTopOrderMinIndex(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; @@ -533,22 +521,21 @@ std::vector> GetTopOrderMinIndex(const Graph_t &graph) { TopOrder.push_back(vert); } - if (TopOrder.size() != graph.num_vertices()) - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + - std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (TopOrder.size() != graph.num_vertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + } return TopOrder; } -template +template class max_children_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; struct max_children_eval_f { - const Graph_t &graph; max_children_eval_f(const Graph_t &g) : graph(g) {} @@ -558,8 +545,7 @@ class max_children_top_sort_view { priority_queue_wrapper> vertex_container; - using ts_iterator = - top_sort_iterator>>; + using ts_iterator = top_sort_iterator>>; public: max_children_top_sort_view(const Graph_t &graph_) : graph(graph_), vertex_container(graph_) {} @@ -569,9 +555,8 @@ class max_children_top_sort_view { auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } }; -template +template std::vector> GetTopOrderMaxChildren(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; @@ -583,26 +568,24 @@ std::vector> GetTopOrderMaxChildren(const Graph_t &graph) TopOrder.push_back(vert); } - if (TopOrder.size() != graph.num_vertices()) - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + - std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (TopOrder.size() != graph.num_vertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + } return TopOrder; } -template +template class random_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; struct random_eval_f { - std::vector> priority; random_eval_f(const std::size_t num) : priority(num, 0) { - std::iota(priority.begin(), priority.end(), 0); std::random_device rd; std::mt19937 g(rd()); @@ -614,8 +597,7 @@ class random_top_sort_view { priority_queue_wrapper> vertex_container; - using ts_iterator = - top_sort_iterator>>; + using ts_iterator = top_sort_iterator>>; public: random_top_sort_view(const Graph_t &graph_) : graph(graph_), vertex_container(graph.num_vertices()) {} @@ -625,9 +607,8 @@ class random_top_sort_view { auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } }; -template +template std::vector> GetTopOrderRandom(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; @@ -639,22 +620,21 @@ std::vector> GetTopOrderRandom(const Graph_t &graph) { TopOrder.push_back(vert); } - if (TopOrder.size() != graph.num_vertices()) - throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + - std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + if (TopOrder.size() != graph.num_vertices()) { + throw std::runtime_error("Error during topological ordering: TopOrder.size() != graph.num_vertices() [" + + std::to_string(TopOrder.size()) + " != " + std::to_string(graph.num_vertices()) + "]"); + } return TopOrder; } -template +template class priority_vec_top_sort_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; struct priority_eval_f { - const std::vector &priority; priority_eval_f(const std::vector &p) : priority(p) {} @@ -675,4 +655,4 @@ class priority_vec_top_sort_view { auto end() { return ts_iterator(graph, vertex_container, graph.num_vertices()); } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_algorithms/directed_graph_util.hpp b/include/osp/graph_algorithms/directed_graph_util.hpp index ffd6b482..8e373acd 100644 --- a/include/osp/graph_algorithms/directed_graph_util.hpp +++ b/include/osp/graph_algorithms/directed_graph_util.hpp @@ -18,6 +18,7 @@ limitations under the License. #pragma once +#include #include #include #include @@ -45,9 +46,8 @@ namespace osp { * @param graph The graph to check. * @return true if there is an edge from src to dest, false otherwise. */ -template +template bool edge(const vertex_idx_t &src, const vertex_idx_t &dest, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); for (const auto &child : graph.children(src)) { if (child == dest) { @@ -65,7 +65,7 @@ bool edge(const vertex_idx_t &src, const vertex_idx_t &dest, c * @param graph The graph to check. * @return true if the vertex is a sink, false otherwise. */ -template +template bool is_sink(const vertex_idx_t &v, const Graph_t &graph) { static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); return graph.out_degree(v) == 0u; @@ -79,7 +79,7 @@ bool is_sink(const vertex_idx_t &v, const Graph_t &graph) { * @param graph The graph to check. * @return true if the vertex is a source, false otherwise. */ -template +template bool is_source(const vertex_idx_t &v, const Graph_t &graph) { static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); return graph.in_degree(v) == 0u; @@ -92,9 +92,8 @@ bool is_source(const vertex_idx_t &v, const Graph_t &graph) { * It is used to create views for source and sink vertices in a directed graph. * */ -template +template struct vertex_cond_iterator { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); // TODO static_assert(is_callabl_v; @@ -110,7 +109,6 @@ struct vertex_cond_iterator { using reference = const value_type &; vertex_cond_iterator(const Graph_t &graph_, const iterator_t &start) : graph(graph_), current_vertex(start) { - while (current_vertex != graph.vertices().end()) { // if (cond.eval(graph, *current_vertex)) { if (cond(graph, *current_vertex)) { @@ -144,6 +142,7 @@ struct vertex_cond_iterator { } inline bool operator==(const vertex_cond_iterator &other) { return current_vertex == other.current_vertex; }; + inline bool operator!=(const vertex_cond_iterator &other) { return current_vertex != other.current_vertex; }; }; @@ -153,12 +152,12 @@ struct vertex_cond_iterator { * These classes provide iterators to traverse the source and sink vertices * of a directed graph. */ -template +template class source_vertices_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; + struct source_eval { // static bool eval(const Graph_t &graph, const vertex_idx_t &v) { return graph.in_degree(v) == 0; } bool operator()(const Graph_t &graph, const vertex_idx_t &v) const { return graph.in_degree(v) == 0; } @@ -182,12 +181,12 @@ class source_vertices_view { * These classes provide iterators to traverse the source and sink vertices * of a directed graph. */ -template +template class sink_vertices_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; + struct sink_eval { // static bool eval(const Graph_t &graph, const vertex_idx_t &v) { return graph.out_degree(v) == 0; } bool operator()(const Graph_t &graph, const vertex_idx_t &v) { return graph.out_degree(v) == 0; } @@ -212,9 +211,8 @@ class sink_vertices_view { * @param graph The graph to check. * @return A vector containing the indices of the source vertices. */ -template +template std::vector> source_vertices(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); std::vector> vec; for (const auto &source : source_vertices_view(graph)) { @@ -230,9 +228,8 @@ std::vector> source_vertices(const Graph_t &graph) { * @param graph The graph to check. * @return A vector containing the indices of the sink vertices. */ -template +template std::vector> sink_vertices(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); std::vector> vec; @@ -249,9 +246,8 @@ std::vector> sink_vertices(const Graph_t &graph) { * It uses a container wrapper to manage the traversal order. * The adj_iterator can be used to setup the traversal along children or parents. */ -template +template struct traversal_iterator { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -272,7 +268,6 @@ struct traversal_iterator { traversal_iterator(const Graph_t &graph_, const vertex_idx_t &start) : graph(graph_), adj_iter(graph_), current_vertex(start) { - if (graph.num_vertices() == start) { return; } @@ -289,7 +284,6 @@ struct traversal_iterator { // Prefix increment traversal_iterator &operator++() { - if (vertex_container.empty()) { current_vertex = graph.num_vertices(); return *this; @@ -315,10 +309,11 @@ struct traversal_iterator { } inline bool operator==(const traversal_iterator &other) { return current_vertex == other.current_vertex; }; + inline bool operator!=(const traversal_iterator &other) { return current_vertex != other.current_vertex; }; }; -template +template struct child_iterator { const Graph_t &graph; @@ -327,7 +322,7 @@ struct child_iterator { inline auto iterate(const vertex_idx_t &v) const { return graph.children(v); } }; -template +template struct bfs_queue_wrapper { std::queue> queue; @@ -348,9 +343,8 @@ struct bfs_queue_wrapper { * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex * using breadth-first search (BFS). */ -template +template class bfs_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -368,7 +362,7 @@ class bfs_view { auto size() const { return graph.num_vertices(); } }; -template +template struct dfs_stack_wrapper { std::vector> stack; @@ -389,9 +383,8 @@ struct dfs_stack_wrapper { * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex * using depth-first search (DFS). */ -template +template class dfs_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -409,7 +402,7 @@ class dfs_view { auto size() const { return graph.num_vertices(); } }; -template +template struct parents_iterator { const Graph_t &graph; @@ -424,9 +417,8 @@ struct parents_iterator { * These classes provide iterators to traverse the vertices of a directed graph strating from a given vertex * using breadth-first search (BFS) in reverse order. */ -template +template class bfs_reverse_view { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); const Graph_t &graph; @@ -452,9 +444,8 @@ class bfs_reverse_view { * @param graph The graph to check. * @return A vector containing the indices of the successors of the vertex. */ -template +template std::vector> successors(const vertex_idx_t &v, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); std::vector> vec; for (const auto &suc : bfs_view(graph, v)) { @@ -471,9 +462,8 @@ std::vector> successors(const vertex_idx_t &v, co * @param graph The graph to check. * @return A vector containing the indices of the ancestors of the vertex. */ -template +template std::vector> ancestors(const vertex_idx_t &v, const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); std::vector> vec; for (const auto &anc : bfs_reverse_view(graph, v)) { @@ -482,23 +472,24 @@ std::vector> ancestors(const vertex_idx_t &v, con return vec; } -template +template bool is_acyclic(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; - if (graph.num_vertices() < 2) + if (graph.num_vertices() < 2) { return true; + } std::vector predecessors_count(graph.num_vertices(), 0); std::queue next; // Find source nodes - for (const VertexType &v : source_vertices_view(graph)) + for (const VertexType &v : source_vertices_view(graph)) { next.push(v); + } VertexType node_count = 0; while (!next.empty()) { @@ -508,23 +499,24 @@ bool is_acyclic(const Graph_t &graph) { for (const VertexType ¤t : graph.children(node)) { ++predecessors_count[current]; - if (predecessors_count[current] == graph.in_degree(current)) + if (predecessors_count[current] == graph.in_degree(current)) { next.push(current); + } } } return node_count == graph.num_vertices(); } -template +template bool is_connected(const Graph_t &graph) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; - if (graph.num_vertices() < 2) + if (graph.num_vertices() < 2) { return true; + } std::unordered_set visited; @@ -549,19 +541,19 @@ bool is_connected(const Graph_t &graph) { return node_count == graph.num_vertices(); } -template +template std::size_t num_common_parents(const Graph_t &graph, vertex_idx_t v1, vertex_idx_t v2) { static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - - std::unordered_set> parents; + + std::unordered_set> parents; parents.reserve(graph.in_degree(v1)); for (const auto &par : graph.parents(v1)) { parents.emplace(par); } - + std::size_t num = 0; for (const auto &par : graph.parents(v2)) { - if(parents.find(par) != parents.end()) { + if (parents.find(par) != parents.end()) { ++num; } } @@ -569,7 +561,7 @@ std::size_t num_common_parents(const Graph_t &graph, vertex_idx_t v1, v return num; } -template +template std::size_t num_common_children(const Graph_t &graph, vertex_idx_t v1, vertex_idx_t v2) { static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); @@ -578,10 +570,10 @@ std::size_t num_common_children(const Graph_t &graph, vertex_idx_t v1, for (const auto &chld : graph.children(v1)) { childrn.emplace(chld); } - + std::size_t num = 0; for (const auto &chld : graph.children(v2)) { - if(childrn.find(chld) != childrn.end()) { + if (childrn.find(chld) != childrn.end()) { ++num; } } @@ -601,8 +593,8 @@ std::size_t num_common_children(const Graph_t &graph, vertex_idx_t v1, * @param[out] components A vector where `components[i]` will be the component ID for vertex `i`. * @return The total number of weakly connected components. */ -template -std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vector>& components) { +template +std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vector> &components) { static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); using VertexType = vertex_idx_t; @@ -614,22 +606,22 @@ std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vecto components.assign(graph.num_vertices(), std::numeric_limits::max()); VertexType component_id = 0; - for (const auto& v : graph.vertices()) { + for (const auto &v : graph.vertices()) { if (components[v] == std::numeric_limits::max()) { std::vector q; q.push_back(v); components[v] = component_id; size_t head = 0; - while(head < q.size()) { + while (head < q.size()) { VertexType u = q[head++]; - for (const auto& neighbor : graph.parents(u)) { + for (const auto &neighbor : graph.parents(u)) { if (components[neighbor] == std::numeric_limits::max()) { components[neighbor] = component_id; q.push_back(neighbor); } } - for (const auto& neighbor : graph.children(u)) { + for (const auto &neighbor : graph.children(u)) { if (components[neighbor] == std::numeric_limits::max()) { components[neighbor] = component_id; q.push_back(neighbor); @@ -647,10 +639,10 @@ std::size_t compute_weakly_connected_components(const Graph_t &graph, std::vecto * @param graph The input directed graph. * @return The number of weakly connected components. */ -template +template std::size_t count_weakly_connected_components(const Graph_t &graph) { std::vector> components; return compute_weakly_connected_components(graph, components); } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_algorithms/strongly_connected_components.hpp b/include/osp/graph_algorithms/strongly_connected_components.hpp index 78c43817..7de49e29 100644 --- a/include/osp/graph_algorithms/strongly_connected_components.hpp +++ b/include/osp/graph_algorithms/strongly_connected_components.hpp @@ -18,12 +18,13 @@ limitations under the License. #pragma once -#include "osp/concepts/directed_graph_concept.hpp" #include #include #include #include +#include "osp/concepts/directed_graph_concept.hpp" + namespace osp { /** @@ -76,8 +77,7 @@ std::vector>> strongly_connected_components(co ++child_iter; if (ids[to] == unvisited) { - dfs_stack.emplace_back( - to, std::make_pair(graph.children(to).begin(), graph.children(to).end())); + dfs_stack.emplace_back(to, std::make_pair(graph.children(to).begin(), graph.children(to).end())); s.push(to); on_stack[to] = true; ids[to] = low[to] = id_counter++; @@ -92,8 +92,9 @@ std::vector>> strongly_connected_components(co s.pop(); on_stack[node] = false; scc.push_back(node); - if (node == at) + if (node == at) { break; + } } sccs.emplace_back(std::move(scc)); } @@ -112,4 +113,4 @@ std::vector>> strongly_connected_components(co return sccs; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_algorithms/subgraph_algorithms.hpp b/include/osp/graph_algorithms/subgraph_algorithms.hpp index 193dcaa1..44c0a3a8 100644 --- a/include/osp/graph_algorithms/subgraph_algorithms.hpp +++ b/include/osp/graph_algorithms/subgraph_algorithms.hpp @@ -18,27 +18,26 @@ limitations under the License. #pragma once -#include "osp/concepts/constructable_computational_dag_concept.hpp" -#include "osp/concepts/directed_graph_concept.hpp" #include #include #include +#include "osp/concepts/constructable_computational_dag_concept.hpp" +#include "osp/concepts/directed_graph_concept.hpp" + namespace osp { -template -void create_induced_subgraph(const Graph_t_in &dag, Graph_t_out &dag_out, +template +void create_induced_subgraph(const Graph_t_in &dag, + Graph_t_out &dag_out, const std::set> &selected_nodes, const std::set> &extra_sources = {}) { - static_assert(std::is_same_v, vertex_idx_t>, "Graph_t_in and out must have the same vertex_idx types"); - static_assert(is_constructable_cdag_vertex_v, - "Graph_t_out must satisfy the constructable_cdag_vertex concept"); + static_assert(is_constructable_cdag_vertex_v, "Graph_t_out must satisfy the constructable_cdag_vertex concept"); - static_assert(is_constructable_cdag_edge_v, - "Graph_t_out must satisfy the constructable_cdag_edge concept"); + static_assert(is_constructable_cdag_edge_v, "Graph_t_out must satisfy the constructable_cdag_edge concept"); assert(dag_out.num_vertices() == 0); @@ -60,8 +59,8 @@ void create_induced_subgraph(const Graph_t_in &dag, Graph_t_out &dag_out, if constexpr (is_constructable_cdag_typed_vertex_v and has_typed_vertices_v) { // add vertex with type - dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), - dag.vertex_type(node)); + dag_out.add_vertex( + dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), dag.vertex_type(node)); } else { // add vertex without type dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node)); @@ -69,85 +68,90 @@ void create_induced_subgraph(const Graph_t_in &dag, Graph_t_out &dag_out, } if constexpr (has_edge_weights_v and has_edge_weights_v) { - // add edges with edge comm weights - for (const auto &node : selected_nodes) + for (const auto &node : selected_nodes) { for (const auto &in_edge : in_edges(node, dag)) { const auto &pred = source(in_edge, dag); - if (selected_nodes.find(pred) != selected_nodes.end() || extra_sources.find(pred) != extra_sources.end()) + if (selected_nodes.find(pred) != selected_nodes.end() || extra_sources.find(pred) != extra_sources.end()) { dag_out.add_edge(local_idx[pred], local_idx[node], dag.edge_comm_weight(in_edge)); + } } + } } else { - // add edges without edge comm weights - for (const auto &node : selected_nodes) + for (const auto &node : selected_nodes) { for (const auto &pred : dag.parents(node)) { - - if (selected_nodes.find(pred) != selected_nodes.end() || - extra_sources.find(pred) != extra_sources.end()) + if (selected_nodes.find(pred) != selected_nodes.end() || extra_sources.find(pred) != extra_sources.end()) { dag_out.add_edge(local_idx[pred], local_idx[node]); + } } + } } } - -template -void create_induced_subgraph(const Graph_t_in &dag, Graph_t_out &dag_out, +template +void create_induced_subgraph(const Graph_t_in &dag, + Graph_t_out &dag_out, const std::vector> &selected_nodes) { return create_induced_subgraph(dag, dag_out, std::set>(selected_nodes.begin(), selected_nodes.end())); } - -template +template bool checkOrderedIsomorphism(const Graph_t &first, const Graph_t &second) { - static_assert(is_directed_graph_v, "Graph_t must satisfy the directed_graph concept"); - if (first.num_vertices() != second.num_vertices() || first.num_edges() != second.num_edges()) + if (first.num_vertices() != second.num_vertices() || first.num_edges() != second.num_edges()) { return false; + } for (const auto &node : first.vertices()) { - if (first.vertex_work_weight(node) != second.vertex_work_weight(node) || - first.vertex_mem_weight(node) != second.vertex_mem_weight(node) || - first.vertex_comm_weight(node) != second.vertex_comm_weight(node) || - first.vertex_type(node) != second.vertex_type(node)) + if (first.vertex_work_weight(node) != second.vertex_work_weight(node) + || first.vertex_mem_weight(node) != second.vertex_mem_weight(node) + || first.vertex_comm_weight(node) != second.vertex_comm_weight(node) + || first.vertex_type(node) != second.vertex_type(node)) { return false; + } - if (first.in_degree(node) != second.in_degree(node) || first.out_degree(node) != second.out_degree(node)) + if (first.in_degree(node) != second.in_degree(node) || first.out_degree(node) != second.out_degree(node)) { return false; + } if constexpr (has_edge_weights_v) { - std::set, e_commw_t>> first_children, second_children; - for (const auto &out_edge : out_edges(node, first)) + for (const auto &out_edge : out_edges(node, first)) { first_children.emplace(target(out_edge, first), first.edge_comm_weight(out_edge)); + } - for (const auto &out_edge : out_edges(node, second)) + for (const auto &out_edge : out_edges(node, second)) { second_children.emplace(target(out_edge, second), second.edge_comm_weight(out_edge)); + } auto itr = first_children.begin(), second_itr = second_children.begin(); for (; itr != first_children.end() && second_itr != second_children.end(); ++itr) { - if (*itr != *second_itr) + if (*itr != *second_itr) { return false; + } ++second_itr; } } else { - std::set> first_children, second_children; - for (const auto &child : first.children(node)) + for (const auto &child : first.children(node)) { first_children.emplace(child); + } - for (const auto &child : second.children(node)) + for (const auto &child : second.children(node)) { second_children.emplace(child); + } auto itr = first_children.begin(), second_itr = second_children.begin(); for (; itr != first_children.end() && second_itr != second_children.end(); ++itr) { - if (*itr != *second_itr) + if (*itr != *second_itr) { return false; + } ++second_itr; } } @@ -156,25 +160,21 @@ bool checkOrderedIsomorphism(const Graph_t &first, const Graph_t &second) { return true; } -template -std::vector create_induced_subgraphs(const Graph_t_in &dag_in, - const std::vector &partition_IDs) { +template +std::vector create_induced_subgraphs(const Graph_t_in &dag_in, const std::vector &partition_IDs) { // assumes that input partition IDs are consecutive and starting from 0 static_assert(std::is_same_v, vertex_idx_t>, "Graph_t_in and out must have the same vertex_idx types"); - static_assert(is_constructable_cdag_vertex_v, - "Graph_t_out must satisfy the constructable_cdag_vertex concept"); - - static_assert(is_constructable_cdag_edge_v, - "Graph_t_out must satisfy the constructable_cdag_edge concept"); - + static_assert(is_constructable_cdag_vertex_v, "Graph_t_out must satisfy the constructable_cdag_vertex concept"); + static_assert(is_constructable_cdag_edge_v, "Graph_t_out must satisfy the constructable_cdag_edge concept"); unsigned number_of_parts = 0; - for (const auto id : partition_IDs) + for (const auto id : partition_IDs) { number_of_parts = std::max(number_of_parts, id + 1); + } std::vector split_dags(number_of_parts); @@ -184,33 +184,32 @@ std::vector create_induced_subgraphs(const Graph_t_in &dag_in, local_idx[node] = split_dags[partition_IDs[node]].num_vertices(); if constexpr (is_constructable_cdag_typed_vertex_v and has_typed_vertices_v) { - - split_dags[partition_IDs[node]].add_vertex(dag_in.vertex_work_weight(node), dag_in.vertex_comm_weight(node), - dag_in.vertex_mem_weight(node), dag_in.vertex_type(node)); + split_dags[partition_IDs[node]].add_vertex(dag_in.vertex_work_weight(node), + dag_in.vertex_comm_weight(node), + dag_in.vertex_mem_weight(node), + dag_in.vertex_type(node)); } else { - split_dags[partition_IDs[node]].add_vertex(dag_in.vertex_work_weight(node), dag_in.vertex_comm_weight(node), - dag_in.vertex_mem_weight(node)); + split_dags[partition_IDs[node]].add_vertex( + dag_in.vertex_work_weight(node), dag_in.vertex_comm_weight(node), dag_in.vertex_mem_weight(node)); } } if constexpr (has_edge_weights_v and has_edge_weights_v) { - for (const auto node : dag_in.vertices()) { for (const auto &out_edge : out_edges(node, dag_in)) { - auto succ = target(out_edge, dag_in); - if (partition_IDs[node] == partition_IDs[succ]) - split_dags[partition_IDs[node]].add_edge(local_idx[node], local_idx[succ], - dag_in.edge_comm_weight(out_edge)); + if (partition_IDs[node] == partition_IDs[succ]) { + split_dags[partition_IDs[node]].add_edge(local_idx[node], local_idx[succ], dag_in.edge_comm_weight(out_edge)); + } } } } else { - for (const auto node : dag_in.vertices()) { for (const auto &child : dag_in.children(node)) { - if (partition_IDs[node] == partition_IDs[child]) + if (partition_IDs[node] == partition_IDs[child]) { split_dags[partition_IDs[node]].add_edge(local_idx[node], local_idx[child]); + } } } } @@ -218,18 +217,15 @@ std::vector create_induced_subgraphs(const Graph_t_in &dag_in, return split_dags; } -template -std::unordered_map, vertex_idx_t> create_induced_subgraph_map(const Graph_t_in &dag, Graph_t_out &dag_out, - const std::vector> &selected_nodes) { - +template +std::unordered_map, vertex_idx_t> create_induced_subgraph_map( + const Graph_t_in &dag, Graph_t_out &dag_out, const std::vector> &selected_nodes) { static_assert(std::is_same_v, vertex_idx_t>, "Graph_t_in and out must have the same vertex_idx types"); - static_assert(is_constructable_cdag_vertex_v, - "Graph_t_out must satisfy the constructable_cdag_vertex concept"); + static_assert(is_constructable_cdag_vertex_v, "Graph_t_out must satisfy the constructable_cdag_vertex concept"); - static_assert(is_constructable_cdag_edge_v, - "Graph_t_out must satisfy the constructable_cdag_edge concept"); + static_assert(is_constructable_cdag_edge_v, "Graph_t_out must satisfy the constructable_cdag_edge concept"); assert(dag_out.num_vertices() == 0); @@ -241,8 +237,8 @@ std::unordered_map, vertex_idx_t> create_in if constexpr (is_constructable_cdag_typed_vertex_v and has_typed_vertices_v) { // add vertex with type - dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), - dag.vertex_type(node)); + dag_out.add_vertex( + dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node), dag.vertex_type(node)); } else { // add vertex without type dag_out.add_vertex(dag.vertex_work_weight(node), dag.vertex_comm_weight(node), dag.vertex_mem_weight(node)); @@ -250,26 +246,28 @@ std::unordered_map, vertex_idx_t> create_in } if constexpr (has_edge_weights_v and has_edge_weights_v) { - // add edges with edge comm weights - for (const auto &node : selected_nodes) + for (const auto &node : selected_nodes) { for (const auto &in_edge : in_edges(node, dag)) { const auto &pred = source(in_edge, dag); - if (local_idx.count(pred)) + if (local_idx.count(pred)) { dag_out.add_edge(local_idx[pred], local_idx[node], dag.edge_comm_weight(in_edge)); + } } + } } else { - // add edges without edge comm weights - for (const auto &node : selected_nodes) + for (const auto &node : selected_nodes) { for (const auto &pred : dag.parents(node)) { - if (local_idx.count(pred)) + if (local_idx.count(pred)) { dag_out.add_edge(local_idx[pred], local_idx[node]); + } } + } } return local_idx; } -} // end namespace osp +} // end namespace osp diff --git a/include/osp/graph_algorithms/transitive_reduction.hpp b/include/osp/graph_algorithms/transitive_reduction.hpp index d6b5767a..a5ec6772 100644 --- a/include/osp/graph_algorithms/transitive_reduction.hpp +++ b/include/osp/graph_algorithms/transitive_reduction.hpp @@ -18,12 +18,13 @@ limitations under the License. #pragma once +#include + #include "osp/concepts/constructable_computational_dag_concept.hpp" #include "osp/concepts/directed_graph_edge_desc_concept.hpp" #include "osp/graph_algorithms/directed_graph_path_util.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" -#include namespace osp { /** @@ -58,11 +59,13 @@ void transitive_reduction_sparse(const Graph_t_in &graph_in, Graph_t_out &graph_ // 1. Copy vertices and their properties from graph_in to graph_out. for (const auto &v_idx : graph_in.vertices()) { if constexpr (has_typed_vertices_v && is_constructable_cdag_typed_vertex_v) { - graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), - graph_in.vertex_mem_weight(v_idx), graph_in.vertex_type(v_idx)); + graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), + graph_in.vertex_comm_weight(v_idx), + graph_in.vertex_mem_weight(v_idx), + graph_in.vertex_type(v_idx)); } else { - graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), - graph_in.vertex_mem_weight(v_idx)); + graph_out.add_vertex( + graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), graph_in.vertex_mem_weight(v_idx)); } } @@ -121,11 +124,13 @@ void transitive_reduction_dense(const Graph_t_in &graph_in, Graph_t_out &graph_o // 1. Copy vertices and their properties from graph_in to graph_out. for (const auto &v_idx : graph_in.vertices()) { if constexpr (has_typed_vertices_v && is_constructable_cdag_typed_vertex_v) { - graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), - graph_in.vertex_mem_weight(v_idx), graph_in.vertex_type(v_idx)); + graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), + graph_in.vertex_comm_weight(v_idx), + graph_in.vertex_mem_weight(v_idx), + graph_in.vertex_type(v_idx)); } else { - graph_out.add_vertex(graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), - graph_in.vertex_mem_weight(v_idx)); + graph_out.add_vertex( + graph_in.vertex_work_weight(v_idx), graph_in.vertex_comm_weight(v_idx), graph_in.vertex_mem_weight(v_idx)); } } @@ -169,4 +174,4 @@ void transitive_reduction_dense(const Graph_t_in &graph_in, Graph_t_out &graph_o } } -} // namespace osp +} // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp b/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp index 616aea6b..fd950cc3 100644 --- a/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp +++ b/include/osp/graph_implementations/adj_list_impl/cdag_vertex_impl.hpp @@ -17,7 +17,7 @@ limitations under the License. */ #pragma once -#include // for std::size_t +#include // for std::size_t namespace osp { @@ -33,9 +33,8 @@ namespace osp { * @tparam memw_t Type for memory weights. * @tparam vertex_type_t Type for vertex types. */ -template +template struct cdag_vertex_impl { - using vertex_idx_type = vertex_idx_t; using work_weight_type = workw_t; using comm_weight_type = commw_t; @@ -58,10 +57,8 @@ struct cdag_vertex_impl { * @param mem_w The memory weight. * @param vertex_t The type of the vertex. */ - cdag_vertex_impl(vertex_idx_t vertex_idx_, workw_t work_w, commw_t comm_w, memw_t mem_w, - vertex_type_t vertex_t) - : id(vertex_idx_), work_weight(work_w), comm_weight(comm_w), mem_weight(mem_w), - vertex_type(vertex_t) {} + cdag_vertex_impl(vertex_idx_t vertex_idx_, workw_t work_w, commw_t comm_w, memw_t mem_w, vertex_type_t vertex_t) + : id(vertex_idx_), work_weight(work_w), comm_weight(comm_w), mem_weight(mem_w), vertex_type(vertex_t) {} vertex_idx_t id = 0; @@ -86,4 +83,4 @@ using cdag_vertex_impl_int = cdag_vertex_impl; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp index 6ef6007c..f933a7b6 100644 --- a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp +++ b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp @@ -18,6 +18,7 @@ limitations under the License. #pragma once #include +#include #include #include #include @@ -27,862 +28,1041 @@ limitations under the License. #include "osp/auxiliary/math/math_helper.hpp" #include "osp/concepts/computational_dag_concept.hpp" -#include "osp/concepts/directed_graph_edge_desc_concept.hpp" #include "osp/concepts/constructable_computational_dag_concept.hpp" +#include "osp/concepts/directed_graph_edge_desc_concept.hpp" #include "osp/concepts/specific_graph_impl.hpp" #include "osp/graph_algorithms/directed_graph_edge_view.hpp" #include "osp/graph_implementations/integral_range.hpp" namespace osp { -template +template class Compact_Sparse_Graph { - static_assert(std::is_integral::value && std::is_integral::value, "Vertex and edge type must be of integral nature."); + static_assert(std::is_integral::value && std::is_integral::value, + "Vertex and edge type must be of integral nature."); static_assert(std::is_arithmetic_v && "Work weight must be of arithmetic type."); static_assert(std::is_arithmetic_v && "Communication weight must be of arithmetic type."); static_assert(std::is_arithmetic_v && "Memory weight must be of arithmetic type."); static_assert(std::is_integral_v && "Vertex type type must be of integral type."); - public: - using vertex_idx = vert_t; - - using vertex_work_weight_type = std::conditional_t; - using vertex_comm_weight_type = comm_weight_type; - using vertex_mem_weight_type = mem_weight_type; - using vertex_type_type = vertex_type_template_type; - - static bool constexpr vertices_in_top_order = true; - static bool constexpr children_in_top_order = true; - static bool constexpr children_in_vertex_order = true; - static bool constexpr parents_in_top_order = true; - static bool constexpr parents_in_vertex_order = true; - - private: - using ThisT = Compact_Sparse_Graph; - - protected: - class Compact_Parent_Edges { - private: - // Compressed Sparse Row (CSR) - std::vector csr_edge_parents; - std::vector csr_target_ptr; - - public: - Compact_Parent_Edges() = default; - Compact_Parent_Edges(const Compact_Parent_Edges &other) = default; - Compact_Parent_Edges(Compact_Parent_Edges &&other) = default; - Compact_Parent_Edges &operator=(const Compact_Parent_Edges &other) = default; - Compact_Parent_Edges &operator=(Compact_Parent_Edges &&other) = default; - virtual ~Compact_Parent_Edges() = default; - - Compact_Parent_Edges(const std::vector &csr_edge_parents_, const std::vector &csr_target_ptr_) : csr_edge_parents(csr_edge_parents_), csr_target_ptr(csr_target_ptr_) {}; - Compact_Parent_Edges(std::vector &&csr_edge_parents_, std::vector &&csr_target_ptr_) : csr_edge_parents(std::move(csr_edge_parents_)), csr_target_ptr(std::move(csr_target_ptr_)) {}; - - inline edge_t number_of_parents(const vertex_idx v) const { - return csr_target_ptr[v + 1] - csr_target_ptr[v]; - } + public: + using vertex_idx = vert_t; + + using vertex_work_weight_type = std::conditional_t; + using vertex_comm_weight_type = comm_weight_type; + using vertex_mem_weight_type = mem_weight_type; + using vertex_type_type = vertex_type_template_type; + + static bool constexpr vertices_in_top_order = true; + static bool constexpr children_in_top_order = true; + static bool constexpr children_in_vertex_order = true; + static bool constexpr parents_in_top_order = true; + static bool constexpr parents_in_vertex_order = true; + + private: + using ThisT = Compact_Sparse_Graph; + + protected: + class Compact_Parent_Edges { + private: + // Compressed Sparse Row (CSR) + std::vector csr_edge_parents; + std::vector csr_target_ptr; + + public: + Compact_Parent_Edges() = default; + Compact_Parent_Edges(const Compact_Parent_Edges &other) = default; + Compact_Parent_Edges(Compact_Parent_Edges &&other) = default; + Compact_Parent_Edges &operator=(const Compact_Parent_Edges &other) = default; + Compact_Parent_Edges &operator=(Compact_Parent_Edges &&other) = default; + virtual ~Compact_Parent_Edges() = default; + + Compact_Parent_Edges(const std::vector &csr_edge_parents_, const std::vector &csr_target_ptr_) + : csr_edge_parents(csr_edge_parents_), csr_target_ptr(csr_target_ptr_) {}; + Compact_Parent_Edges(std::vector &&csr_edge_parents_, std::vector &&csr_target_ptr_) + : csr_edge_parents(std::move(csr_edge_parents_)), csr_target_ptr(std::move(csr_target_ptr_)) {}; + + inline edge_t number_of_parents(const vertex_idx v) const { return csr_target_ptr[v + 1] - csr_target_ptr[v]; } + + class Parent_range { + private: + const std::vector &_csr_edge_parents; + const std::vector &_csr_target_ptr; + const vertex_idx _vert; + + public: + Parent_range(const std::vector &csr_edge_parents, + const std::vector &csr_target_ptr, + const vertex_idx vert) + : _csr_edge_parents(csr_edge_parents), _csr_target_ptr(csr_target_ptr), _vert(vert) {}; + + inline auto cbegin() const { + auto it = _csr_edge_parents.cbegin(); + std::advance(it, _csr_target_ptr[_vert]); + return it; + } + + inline auto cend() const { + auto it = _csr_edge_parents.cbegin(); + std::advance(it, _csr_target_ptr[_vert + 1]); + return it; + } - class Parent_range { - private: - const std::vector &_csr_edge_parents; - const std::vector &_csr_target_ptr; - const vertex_idx _vert; + inline auto begin() const { return cbegin(); } - public: - Parent_range (const std::vector &csr_edge_parents, const std::vector &csr_target_ptr, const vertex_idx vert) : _csr_edge_parents(csr_edge_parents), _csr_target_ptr(csr_target_ptr), _vert(vert) {}; + inline auto end() const { return cend(); } - inline auto cbegin() const { auto it = _csr_edge_parents.cbegin(); std::advance(it, _csr_target_ptr[_vert]); return it; } - inline auto cend() const { auto it = _csr_edge_parents.cbegin(); std::advance(it, _csr_target_ptr[_vert + 1]); return it; } - - inline auto begin() const { return cbegin(); } - inline auto end() const { return cend(); } + inline auto crbegin() const { + auto it = _csr_edge_parents.crbegin(); + std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert + 1]); + return it; + }; - inline auto crbegin() const { auto it = _csr_edge_parents.crbegin(); std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert + 1]); return it; }; - inline auto crend() const { auto it = _csr_edge_parents.crbegin(); std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert]); return it; }; + inline auto crend() const { + auto it = _csr_edge_parents.crbegin(); + std::advance(it, _csr_target_ptr[_csr_target_ptr.size() - 1] - _csr_target_ptr[_vert]); + return it; + }; - inline auto rbegin() const { return crbegin(); }; - inline auto rend() const { return crend(); }; - }; + inline auto rbegin() const { return crbegin(); }; - inline Parent_range parents(const vertex_idx vert) const { return Parent_range(csr_edge_parents, csr_target_ptr, vert); } + inline auto rend() const { return crend(); }; }; - class Compact_Children_Edges { - private: - // Compressed Sparse Column (CSC) - std::vector csc_edge_children; - std::vector csc_source_ptr; - - public: - Compact_Children_Edges() = default; - Compact_Children_Edges(const Compact_Children_Edges &other) = default; - Compact_Children_Edges(Compact_Children_Edges &&other) = default; - Compact_Children_Edges &operator=(const Compact_Children_Edges &other) = default; - Compact_Children_Edges &operator=(Compact_Children_Edges &&other) = default; - virtual ~Compact_Children_Edges() = default; - - Compact_Children_Edges(const std::vector &csc_edge_children_, const std::vector &csc_source_ptr_) : csc_edge_children(csc_edge_children_), csc_source_ptr(csc_source_ptr_) {}; - Compact_Children_Edges(std::vector &&csc_edge_children_, std::vector &&csc_source_ptr_) : csc_edge_children(std::move(csc_edge_children_)), csc_source_ptr(std::move(csc_source_ptr_)) {}; - - inline edge_t number_of_children(const vertex_idx v) const { - return csc_source_ptr[v + 1] - csc_source_ptr[v]; - } + inline Parent_range parents(const vertex_idx vert) const { return Parent_range(csr_edge_parents, csr_target_ptr, vert); } + }; + + class Compact_Children_Edges { + private: + // Compressed Sparse Column (CSC) + std::vector csc_edge_children; + std::vector csc_source_ptr; + + public: + Compact_Children_Edges() = default; + Compact_Children_Edges(const Compact_Children_Edges &other) = default; + Compact_Children_Edges(Compact_Children_Edges &&other) = default; + Compact_Children_Edges &operator=(const Compact_Children_Edges &other) = default; + Compact_Children_Edges &operator=(Compact_Children_Edges &&other) = default; + virtual ~Compact_Children_Edges() = default; + + Compact_Children_Edges(const std::vector &csc_edge_children_, const std::vector &csc_source_ptr_) + : csc_edge_children(csc_edge_children_), csc_source_ptr(csc_source_ptr_) {}; + Compact_Children_Edges(std::vector &&csc_edge_children_, std::vector &&csc_source_ptr_) + : csc_edge_children(std::move(csc_edge_children_)), csc_source_ptr(std::move(csc_source_ptr_)) {}; + + inline edge_t number_of_children(const vertex_idx v) const { return csc_source_ptr[v + 1] - csc_source_ptr[v]; } + + inline vertex_idx source(const edge_t &indx) const { + auto it = std::upper_bound(csc_source_ptr.cbegin(), csc_source_ptr.cend(), indx); + vertex_idx src = static_cast(std::distance(csc_source_ptr.cbegin(), it) - 1); + return src; + }; + + inline vertex_idx target(const edge_t &indx) const { return csc_edge_children[indx]; }; - inline vertex_idx source(const edge_t &indx) const { auto it = std::upper_bound(csc_source_ptr.cbegin(), csc_source_ptr.cend(), indx); vertex_idx src = static_cast(std::distance(csc_source_ptr.cbegin(), it) - 1); return src;}; - inline vertex_idx target(const edge_t &indx) const { return csc_edge_children[indx]; }; + inline edge_t children_indx_begin(const vertex_idx &vert) const { return csc_source_ptr[vert]; }; - inline edge_t children_indx_begin(const vertex_idx &vert) const { return csc_source_ptr[vert]; }; + class Children_range { + private: + const std::vector &_csc_edge_children; + const std::vector &_csc_source_ptr; + const vertex_idx _vert; - class Children_range { - private: - const std::vector &_csc_edge_children; - const std::vector &_csc_source_ptr; - const vertex_idx _vert; + public: + Children_range(const std::vector &csc_edge_children, + const std::vector &csc_source_ptr, + const vertex_idx vert) + : _csc_edge_children(csc_edge_children), _csc_source_ptr(csc_source_ptr), _vert(vert) {}; - public: - Children_range (const std::vector &csc_edge_children, const std::vector &csc_source_ptr, const vertex_idx vert) : _csc_edge_children(csc_edge_children), _csc_source_ptr(csc_source_ptr), _vert(vert) {}; + inline auto cbegin() const { + auto it = _csc_edge_children.cbegin(); + std::advance(it, _csc_source_ptr[_vert]); + return it; + }; - inline auto cbegin() const { auto it = _csc_edge_children.cbegin(); std::advance(it, _csc_source_ptr[_vert]); return it; }; - inline auto cend() const { auto it = _csc_edge_children.cbegin(); std::advance(it, _csc_source_ptr[_vert + 1]); return it; }; + inline auto cend() const { + auto it = _csc_edge_children.cbegin(); + std::advance(it, _csc_source_ptr[_vert + 1]); + return it; + }; - inline auto begin() const { return cbegin(); }; - inline auto end() const { return cend(); }; + inline auto begin() const { return cbegin(); }; - inline auto crbegin() const { auto it = _csc_edge_children.crbegin(); std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert + 1]); return it; }; - inline auto crend() const { auto it = _csc_edge_children.crbegin(); std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert]); return it; }; + inline auto end() const { return cend(); }; - inline auto rbegin() const { return crbegin(); }; - inline auto rend() const { return crend(); }; - }; + inline auto crbegin() const { + auto it = _csc_edge_children.crbegin(); + std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert + 1]); + return it; + }; - inline Children_range children(const vertex_idx vert) const { return Children_range(csc_edge_children, csc_source_ptr, vert); } + inline auto crend() const { + auto it = _csc_edge_children.crbegin(); + std::advance(it, _csc_source_ptr[_csc_source_ptr.size() - 1] - _csc_source_ptr[_vert]); + return it; + }; + + inline auto rbegin() const { return crbegin(); }; + + inline auto rend() const { return crend(); }; }; + inline Children_range children(const vertex_idx vert) const { + return Children_range(csc_edge_children, csc_source_ptr, vert); + } + }; + vertex_idx number_of_vertices = static_cast(0); + edge_t number_of_edges = static_cast(0); - vertex_idx number_of_vertices = static_cast(0); - edge_t number_of_edges = static_cast(0); + Compact_Parent_Edges csr_in_edges; + Compact_Children_Edges csc_out_edges; - Compact_Parent_Edges csr_in_edges; - Compact_Children_Edges csc_out_edges; + vertex_type_type number_of_vertex_types = static_cast(1); - vertex_type_type number_of_vertex_types = static_cast(1); + std::vector vert_work_weights; + std::vector vert_comm_weights; + std::vector vert_mem_weights; + std::vector vert_types; - std::vector vert_work_weights; - std::vector vert_comm_weights; - std::vector vert_mem_weights; - std::vector vert_types; + std::vector vertex_permutation_from_internal_to_original; + std::vector vertex_permutation_from_original_to_internal; - std::vector vertex_permutation_from_internal_to_original; - std::vector vertex_permutation_from_original_to_internal; + template + std::enable_if_t _update_num_vertex_types() { + number_of_vertex_types = static_cast(1); + } - template - std::enable_if_t _update_num_vertex_types() { - number_of_vertex_types = static_cast(1); + template + std::enable_if_t _update_num_vertex_types() { + number_of_vertex_types = static_cast(1); + for (const auto vt : vert_types) { + number_of_vertex_types = std::max(number_of_vertex_types, vt); + } + } + + public: + Compact_Sparse_Graph() = default; + Compact_Sparse_Graph(const Compact_Sparse_Graph &other) = default; + Compact_Sparse_Graph(Compact_Sparse_Graph &&other) = default; + Compact_Sparse_Graph &operator=(const Compact_Sparse_Graph &other) = default; + Compact_Sparse_Graph &operator=(Compact_Sparse_Graph &&other) = default; + virtual ~Compact_Sparse_Graph() = default; + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type &edges) + : number_of_vertices(num_vertices_), number_of_edges(static_cast(edges.size())) { + static_assert(is_container_of>::value + || is_edge_list_type::value); + + assert((0 <= num_vertices_) && "Number of vertices must be non-negative."); + assert((edges.size() < static_cast(std::numeric_limits::max())) + && "Number of edges must be strictly smaller than the maximally representable number."); + + if constexpr (is_container_of>::value) { + assert(std::all_of(edges.begin(), + edges.end(), + [num_vertices_](const auto &edge) { + return (0 <= edge.first) && (edge.first < num_vertices_) && (0 <= edge.second) + && (edge.second < num_vertices_); + }) + && "Source and target of edges must be non-negative and less than the number of vertices."); + } + + if constexpr (is_edge_list_type_v) { + assert(std::all_of(edges.begin(), + edges.end(), + [num_vertices_](const auto &edge) { + return (0 <= edge.source) && (edge.source < num_vertices_) && (0 <= edge.target) + && (edge.target < num_vertices_); + }) + && "Source and target of edges must be non-negative and less than the number of vertices."); } - template - std::enable_if_t _update_num_vertex_types() { - number_of_vertex_types = static_cast(1); - for (const auto vt : vert_types) { - number_of_vertex_types = std::max(number_of_vertex_types, vt); + if constexpr (keep_vertex_order) { + if constexpr (is_container_of>::value) { + assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.first < edge.second; }) + && "Vertex order must be a topological order."); + } + if constexpr (is_edge_list_type_v) { + assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.source < edge.target; }) + && "Vertex order must be a topological order."); } } - - public: - Compact_Sparse_Graph() = default; - Compact_Sparse_Graph(const Compact_Sparse_Graph &other) = default; - Compact_Sparse_Graph(Compact_Sparse_Graph &&other) = default; - Compact_Sparse_Graph &operator=(const Compact_Sparse_Graph &other) = default; - Compact_Sparse_Graph &operator=(Compact_Sparse_Graph &&other) = default; - virtual ~Compact_Sparse_Graph() = default; + if constexpr (use_work_weights) { + vert_work_weights = std::vector(num_vertices(), 1); + } + if constexpr (use_comm_weights) { + vert_comm_weights = std::vector(num_vertices(), 0); + } + if constexpr (use_mem_weights) { + vert_mem_weights = std::vector(num_vertices(), 0); + } + if constexpr (use_vert_types) { + number_of_vertex_types = 1; + vert_types = std::vector(num_vertices(), 0); + } + if constexpr (!keep_vertex_order) { + vertex_permutation_from_internal_to_original.reserve(num_vertices()); + vertex_permutation_from_original_to_internal.reserve(num_vertices()); + } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges) : number_of_vertices(num_vertices_), number_of_edges(static_cast(edges.size())) { - static_assert( is_container_of>::value - || is_edge_list_type::value); - - assert((0 <= num_vertices_) && "Number of vertices must be non-negative."); - assert((edges.size() < static_cast(std::numeric_limits::max())) && "Number of edges must be strictly smaller than the maximally representable number."); - - if constexpr ( is_container_of>::value ) { - assert(std::all_of(edges.begin(), edges.end(), [num_vertices_](const auto &edge) { return (0 <= edge.first) && (edge.first < num_vertices_) && (0 <= edge.second) && (edge.second < num_vertices_); } ) && "Source and target of edges must be non-negative and less than the number of vertices."); - } + // Construction + std::vector> children_tmp(num_vertices()); + std::vector num_parents_tmp(num_vertices(), 0); - if constexpr ( is_edge_list_type_v ) { - assert(std::all_of(edges.begin(), edges.end(), [num_vertices_](const auto &edge) { return (0 <= edge.source) && (edge.source < num_vertices_) && (0 <= edge.target) && (edge.target < num_vertices_); } ) && "Source and target of edges must be non-negative and less than the number of vertices."); + if constexpr (is_container_of>::value) { + for (const auto &edge : edges) { + children_tmp[edge.first].push_back(edge.second); + num_parents_tmp[edge.second]++; + } + } + if constexpr (is_edge_list_type_v) { + for (const auto &edge : edges) { + children_tmp[edge.source].push_back(edge.target); + num_parents_tmp[edge.target]++; } + } - if constexpr (keep_vertex_order) { - if constexpr ( is_container_of>::value ) { - assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.first < edge.second; } ) && "Vertex order must be a topological order."); - } - if constexpr ( is_edge_list_type_v ) { - assert(std::all_of(edges.begin(), edges.end(), [](const auto &edge) { return edge.source < edge.target; } ) && "Vertex order must be a topological order."); + std::vector csc_edge_children; + csc_edge_children.reserve(num_edges()); + std::vector csc_source_ptr(num_vertices() + 1); + std::vector csr_edge_parents(num_edges()); + std::vector csr_target_ptr; + csr_target_ptr.reserve(num_vertices() + 1); + + if constexpr (keep_vertex_order) { + for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { + csc_source_ptr[vert] = static_cast(csc_edge_children.size()); + + std::sort(children_tmp[vert].begin(), children_tmp[vert].end()); + for (const auto &chld : children_tmp[vert]) { + csc_edge_children.emplace_back(chld); } } + csc_source_ptr[num_vertices()] = static_cast(csc_edge_children.size()); - if constexpr (use_work_weights) { - vert_work_weights = std::vector(num_vertices(), 1); - } - if constexpr (use_comm_weights) { - vert_comm_weights = std::vector(num_vertices(), 0); + csr_target_ptr = std::vector(num_vertices() + 1, 0); + for (std::size_t i = 0U; i < num_parents_tmp.size(); ++i) { + csr_target_ptr[i + 1] = csr_target_ptr[i] + num_parents_tmp[i]; } - if constexpr (use_mem_weights) { - vert_mem_weights = std::vector(num_vertices(), 0); - } - if constexpr (use_vert_types) { - number_of_vertex_types = 1; - vert_types = std::vector(num_vertices(), 0); - } - if constexpr (!keep_vertex_order) { - vertex_permutation_from_internal_to_original.reserve(num_vertices()); - vertex_permutation_from_original_to_internal.reserve(num_vertices()); + + std::vector offset = csr_target_ptr; + for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { + for (const auto &chld : children_tmp[vert]) { + csr_edge_parents[offset[chld]++] = vert; + } } - // Construction - std::vector> children_tmp(num_vertices()); - std::vector num_parents_tmp(num_vertices(), 0); + } else { + std::vector> parents_tmp(num_vertices()); - if constexpr ( is_container_of>::value ) { + if constexpr (is_container_of>::value) { for (const auto &edge : edges) { - children_tmp[edge.first].push_back(edge.second); - num_parents_tmp[edge.second]++; + parents_tmp[edge.second].push_back(edge.first); } } - if constexpr ( is_edge_list_type_v ) { + if constexpr (is_edge_list_type_v) { for (const auto &edge : edges) { - children_tmp[edge.source].push_back(edge.target); - num_parents_tmp[edge.target]++; + parents_tmp[edge.target].push_back(edge.source); } } - std::vector csc_edge_children; - csc_edge_children.reserve(num_edges()); - std::vector csc_source_ptr(num_vertices() + 1); - std::vector csr_edge_parents(num_edges()); - std::vector csr_target_ptr; - csr_target_ptr.reserve(num_vertices() + 1); - - if constexpr (keep_vertex_order) { - for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { - csc_source_ptr[vert] = static_cast( csc_edge_children.size() ); - - std::sort(children_tmp[vert].begin(), children_tmp[vert].end()); - for (const auto &chld : children_tmp[vert]) { - csc_edge_children.emplace_back(chld); - } - } - csc_source_ptr[num_vertices()] = static_cast( csc_edge_children.size() ); + // Generating modified Gorder topological order cf. "Speedup Graph Processing by Graph Ordering" by Hao Wei, Jeffrey + // Xu Yu, Can Lu, and Xuemin Lin + const double decay = 8.0; - csr_target_ptr = std::vector(num_vertices() + 1, 0); - for (std::size_t i = 0U; i < num_parents_tmp.size(); ++i) { - csr_target_ptr[i + 1] = csr_target_ptr[i] + num_parents_tmp[i]; - } + std::vector prec_remaining = num_parents_tmp; + std::vector priorities(num_vertices(), 0.0); - std::vector offset = csr_target_ptr; - for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { - for (const auto &chld : children_tmp[vert]) { - csr_edge_parents[offset[chld]++] = vert; - } - } - - } else { - std::vector> parents_tmp(num_vertices()); + auto v_cmp = [&priorities, &children_tmp](const vertex_idx &lhs, const vertex_idx &rhs) { + return (priorities[lhs] < priorities[rhs]) + || ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() < children_tmp[rhs].size())) + || ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() == children_tmp[rhs].size()) + && (lhs > rhs)); + }; - if constexpr ( is_container_of>::value ) { - for (const auto &edge : edges) { - parents_tmp[edge.second].push_back(edge.first); - } - } - if constexpr ( is_edge_list_type_v ) { - for (const auto &edge : edges) { - parents_tmp[edge.target].push_back(edge.source); - } + std::priority_queue, decltype(v_cmp)> ready_q(v_cmp); + for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { + if (prec_remaining[vert] == 0) { + ready_q.push(vert); } + } - // Generating modified Gorder topological order cf. "Speedup Graph Processing by Graph Ordering" by Hao Wei, Jeffrey Xu Yu, Can Lu, and Xuemin Lin - const double decay = 8.0; + while (!ready_q.empty()) { + vertex_idx vert = ready_q.top(); + ready_q.pop(); - std::vector prec_remaining = num_parents_tmp; - std::vector priorities(num_vertices(), 0.0); + double pos = static_cast(vertex_permutation_from_internal_to_original.size()); + pos /= decay; - auto v_cmp = [&priorities, &children_tmp] (const vertex_idx &lhs, const vertex_idx &rhs) { - return (priorities[lhs] < priorities[rhs]) || - ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() < children_tmp[rhs].size())) || - ((priorities[lhs] <= priorities[rhs]) && (children_tmp[lhs].size() == children_tmp[rhs].size()) && (lhs > rhs)); - }; + vertex_permutation_from_internal_to_original.push_back(vert); - std::priority_queue, decltype(v_cmp)> ready_q(v_cmp); - for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { - if (prec_remaining[vert] == 0) { - ready_q.push(vert); - } + // update priorities + for (vertex_idx chld : children_tmp[vert]) { + priorities[chld] = log_sum_exp(priorities[chld], pos); } - - while (!ready_q.empty()) { - vertex_idx vert = ready_q.top(); - ready_q.pop(); - - double pos = static_cast(vertex_permutation_from_internal_to_original.size()); - pos /= decay; - - vertex_permutation_from_internal_to_original.push_back(vert); - - // update priorities - for (vertex_idx chld : children_tmp[vert]) { - priorities[chld] = log_sum_exp(priorities[chld], pos); + for (vertex_idx par : parents_tmp[vert]) { + for (vertex_idx sibling : children_tmp[par]) { + priorities[sibling] = log_sum_exp(priorities[sibling], pos); } - for (vertex_idx par : parents_tmp[vert]) { - for (vertex_idx sibling : children_tmp[par]) { - priorities[sibling] = log_sum_exp(priorities[sibling], pos); - } - } - for (vertex_idx chld : children_tmp[vert]) { - for (vertex_idx couple : parents_tmp[chld]) { - priorities[couple] = log_sum_exp(priorities[couple], pos); - } - } - - // update constraints and push to queue - for (vertex_idx chld : children_tmp[vert]) { - --prec_remaining[chld]; - if (prec_remaining[chld] == 0) { - ready_q.push(chld); - } - } - } - - assert(vertex_permutation_from_internal_to_original.size() == static_cast(num_vertices())); - - - // constructing the csr and csc - vertex_permutation_from_original_to_internal = std::vector(num_vertices(), 0); - for (vertex_idx new_pos = 0; new_pos < num_vertices(); ++new_pos) { - vertex_permutation_from_original_to_internal[vertex_permutation_from_internal_to_original[new_pos]] = new_pos; } - - for (vertex_idx vert_new_pos = 0; vert_new_pos < num_vertices(); ++vert_new_pos) { - csc_source_ptr[vert_new_pos] = static_cast( csc_edge_children.size() ); - - vertex_idx vert_old_name = vertex_permutation_from_internal_to_original[vert_new_pos]; - - std::vector children_new_name; - children_new_name.reserve( children_tmp[vert_old_name].size() ); - - for (vertex_idx chld_old_name : children_tmp[vert_old_name]) { - children_new_name.push_back( vertex_permutation_from_original_to_internal[chld_old_name] ); + for (vertex_idx chld : children_tmp[vert]) { + for (vertex_idx couple : parents_tmp[chld]) { + priorities[couple] = log_sum_exp(priorities[couple], pos); } - - - std::sort(children_new_name.begin(), children_new_name.end()); - for (const auto &chld : children_new_name) { - csc_edge_children.emplace_back(chld); - } - } - csc_source_ptr[num_vertices()] = static_cast( csc_edge_children.size() ); - - edge_t acc = 0; - for (vertex_idx vert_old_name : vertex_permutation_from_internal_to_original) { - csr_target_ptr.push_back(acc); - acc += num_parents_tmp[vert_old_name]; } - csr_target_ptr.push_back(acc); - std::vector offset = csr_target_ptr; - for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { - for (edge_t indx = csc_source_ptr[vert]; indx < csc_source_ptr[vert + 1]; ++indx) { - const vertex_idx chld = csc_edge_children[indx]; - csr_edge_parents[offset[chld]++] = vert; + // update constraints and push to queue + for (vertex_idx chld : children_tmp[vert]) { + --prec_remaining[chld]; + if (prec_remaining[chld] == 0) { + ready_q.push(chld); } } } - csc_out_edges = Compact_Children_Edges(std::move(csc_edge_children), std::move(csc_source_ptr)); - csr_in_edges = Compact_Parent_Edges(std::move(csr_edge_parents), std::move(csr_target_ptr)); - } + assert(vertex_permutation_from_internal_to_original.size() == static_cast(num_vertices())); - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &ww) : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - assert((ww.size() == static_cast(num_vertices())) && "Work weights vector must have the same length as the number of vertices."); - - if constexpr (keep_vertex_order) { - vert_work_weights = ww; - } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; - } + // constructing the csr and csc + vertex_permutation_from_original_to_internal = std::vector(num_vertices(), 0); + for (vertex_idx new_pos = 0; new_pos < num_vertices(); ++new_pos) { + vertex_permutation_from_original_to_internal[vertex_permutation_from_internal_to_original[new_pos]] = new_pos; } - } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, edge_list_type & edges, const std::vector &&ww) : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - assert((ww.size() == static_cast(num_vertices())) && "Work weights vector must have the same length as the number of vertices."); + for (vertex_idx vert_new_pos = 0; vert_new_pos < num_vertices(); ++vert_new_pos) { + csc_source_ptr[vert_new_pos] = static_cast(csc_edge_children.size()); - if constexpr (keep_vertex_order) { - vert_work_weights = std::move(ww); - } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; - } - } - } + vertex_idx vert_old_name = vertex_permutation_from_internal_to_original[vert_new_pos]; - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &ww, const std::vector &cw) : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - assert((ww.size() == static_cast(num_vertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) && "Communication weights vector must have the same length as the number of vertices."); + std::vector children_new_name; + children_new_name.reserve(children_tmp[vert_old_name].size()); - if constexpr (keep_vertex_order) { - vert_work_weights = ww; - } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + for (vertex_idx chld_old_name : children_tmp[vert_old_name]) { + children_new_name.push_back(vertex_permutation_from_original_to_internal[chld_old_name]); } - } - if constexpr (keep_vertex_order) { - vert_comm_weights = cw; - } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + std::sort(children_new_name.begin(), children_new_name.end()); + for (const auto &chld : children_new_name) { + csc_edge_children.emplace_back(chld); } } - } - - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, std::vector &&ww, std::vector &&cw) : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - assert((ww.size() == static_cast(num_vertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) && "Communication weights vector must have the same length as the number of vertices."); + csc_source_ptr[num_vertices()] = static_cast(csc_edge_children.size()); - if constexpr (keep_vertex_order) { - vert_work_weights = std::move(ww); - } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; - } + edge_t acc = 0; + for (vertex_idx vert_old_name : vertex_permutation_from_internal_to_original) { + csr_target_ptr.push_back(acc); + acc += num_parents_tmp[vert_old_name]; } + csr_target_ptr.push_back(acc); - if constexpr (keep_vertex_order) { - vert_comm_weights = std::move(cw); - } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + std::vector offset = csr_target_ptr; + for (vertex_idx vert = 0; vert < num_vertices(); ++vert) { + for (edge_t indx = csc_source_ptr[vert]; indx < csc_source_ptr[vert + 1]; ++indx) { + const vertex_idx chld = csc_edge_children[indx]; + csr_edge_parents[offset[chld]++] = vert; } } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &ww, const std::vector &cw, const std::vector &mw) : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); - assert((ww.size() == static_cast(num_vertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) && "Communication weights vector must have the same length as the number of vertices."); - assert((mw.size() == static_cast(num_vertices())) && "Memory weights vector must have the same length as the number of vertices."); - - if constexpr (keep_vertex_order) { - vert_work_weights = ww; - } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; - } - } - - if constexpr (keep_vertex_order) { - vert_comm_weights = cw; - } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; - } - } - - if constexpr (keep_vertex_order) { - vert_mem_weights = mw; - } else { - for (auto vert : vertices()) { - vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; - } + csc_out_edges = Compact_Children_Edges(std::move(csc_edge_children), std::move(csc_source_ptr)); + csr_in_edges = Compact_Parent_Edges(std::move(csr_edge_parents), std::move(csr_target_ptr)); + } + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type &edges, const std::vector &ww) + : Compact_Sparse_Graph(num_vertices_, edges) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + assert((ww.size() == static_cast(num_vertices())) + && "Work weights vector must have the same length as the number of vertices."); + + if constexpr (keep_vertex_order) { + vert_work_weights = ww; + } else { + for (auto vert : vertices()) { + vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; } } - - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, std::vector &&ww, std::vector &&cw, std::vector &&mw) : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); - assert((ww.size() == static_cast(num_vertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) && "Communication weights vector must have the same length as the number of vertices."); - assert((mw.size() == static_cast(num_vertices())) && "Memory weights vector must have the same length as the number of vertices."); - - if constexpr (keep_vertex_order) { - vert_work_weights = std::move(ww); - } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; - } - } - - if constexpr (keep_vertex_order) { - vert_comm_weights = std::move(cw); - } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; - } + } + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, edge_list_type &edges, const std::vector &&ww) + : Compact_Sparse_Graph(num_vertices_, edges) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + assert((ww.size() == static_cast(num_vertices())) + && "Work weights vector must have the same length as the number of vertices."); + + if constexpr (keep_vertex_order) { + vert_work_weights = std::move(ww); + } else { + for (auto vert : vertices()) { + vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; } - - if constexpr (keep_vertex_order) { - vert_mem_weights = std::move(mw); - } else { - for (auto vert : vertices()) { - vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; - } + } + } + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &ww, + const std::vector &cw) + : Compact_Sparse_Graph(num_vertices_, edges) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); + assert((ww.size() == static_cast(num_vertices())) + && "Work weights vector must have the same length as the number of vertices."); + assert((cw.size() == static_cast(num_vertices())) + && "Communication weights vector must have the same length as the number of vertices."); + + if constexpr (keep_vertex_order) { + vert_work_weights = ww; + } else { + for (auto vert : vertices()) { + vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &ww, const std::vector &cw, const std::vector &mw, const std::vector &vt) : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); - static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types."); - assert((ww.size() == static_cast(num_vertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) && "Communication weights vector must have the same length as the number of vertices."); - assert((mw.size() == static_cast(num_vertices())) && "Memory weights vector must have the same length as the number of vertices."); - assert((vt.size() == static_cast(num_vertices())) && "Vertex type vector must have the same length as the number of vertices."); - - if constexpr (keep_vertex_order) { - vert_work_weights = ww; - } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; - } + if constexpr (keep_vertex_order) { + vert_comm_weights = cw; + } else { + for (auto vert : vertices()) { + vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; } - - if constexpr (keep_vertex_order) { - vert_comm_weights = cw; - } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; - } + } + } + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, + const edge_list_type &edges, + std::vector &&ww, + std::vector &&cw) + : Compact_Sparse_Graph(num_vertices_, edges) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); + assert((ww.size() == static_cast(num_vertices())) + && "Work weights vector must have the same length as the number of vertices."); + assert((cw.size() == static_cast(num_vertices())) + && "Communication weights vector must have the same length as the number of vertices."); + + if constexpr (keep_vertex_order) { + vert_work_weights = std::move(ww); + } else { + for (auto vert : vertices()) { + vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; } + } - if constexpr (keep_vertex_order) { - vert_mem_weights = mw; - } else { - for (auto vert : vertices()) { - vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; - } + if constexpr (keep_vertex_order) { + vert_comm_weights = std::move(cw); + } else { + for (auto vert : vertices()) { + vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; } - - if constexpr (keep_vertex_order) { - vert_types = vt; - } else { - for (auto vert : vertices()) { - vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]]; - } + } + } + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &ww, + const std::vector &cw, + const std::vector &mw) + : Compact_Sparse_Graph(num_vertices_, edges) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); + static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); + assert((ww.size() == static_cast(num_vertices())) + && "Work weights vector must have the same length as the number of vertices."); + assert((cw.size() == static_cast(num_vertices())) + && "Communication weights vector must have the same length as the number of vertices."); + assert((mw.size() == static_cast(num_vertices())) + && "Memory weights vector must have the same length as the number of vertices."); + + if constexpr (keep_vertex_order) { + vert_work_weights = ww; + } else { + for (auto vert : vertices()) { + vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; } } - template - Compact_Sparse_Graph(vertex_idx num_vertices_, const edge_list_type & edges, std::vector &&ww, std::vector &&cw, std::vector &&mw, std::vector &&vt) : Compact_Sparse_Graph(num_vertices_, edges) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); - static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); - static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types."); - assert((ww.size() == static_cast(num_vertices())) && "Work weights vector must have the same length as the number of vertices."); - assert((cw.size() == static_cast(num_vertices())) && "Communication weights vector must have the same length as the number of vertices."); - assert((mw.size() == static_cast(num_vertices())) && "Memory weights vector must have the same length as the number of vertices."); - assert((vt.size() == static_cast(num_vertices())) && "Vertex type vector must have the same length as the number of vertices."); - - if constexpr (keep_vertex_order) { - vert_work_weights = std::move(ww); - } else { - for (auto vert : vertices()) { - vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; - } + if constexpr (keep_vertex_order) { + vert_comm_weights = cw; + } else { + for (auto vert : vertices()) { + vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; } + } - if constexpr (keep_vertex_order) { - vert_comm_weights = std::move(cw); - } else { - for (auto vert : vertices()) { - vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; - } + if constexpr (keep_vertex_order) { + vert_mem_weights = mw; + } else { + for (auto vert : vertices()) { + vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; } - - if constexpr (keep_vertex_order) { - vert_mem_weights = std::move(mw); - } else { - for (auto vert : vertices()) { - vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; - } + } + } + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, + const edge_list_type &edges, + std::vector &&ww, + std::vector &&cw, + std::vector &&mw) + : Compact_Sparse_Graph(num_vertices_, edges) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); + static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); + assert((ww.size() == static_cast(num_vertices())) + && "Work weights vector must have the same length as the number of vertices."); + assert((cw.size() == static_cast(num_vertices())) + && "Communication weights vector must have the same length as the number of vertices."); + assert((mw.size() == static_cast(num_vertices())) + && "Memory weights vector must have the same length as the number of vertices."); + + if constexpr (keep_vertex_order) { + vert_work_weights = std::move(ww); + } else { + for (auto vert : vertices()) { + vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; } + } - if constexpr (keep_vertex_order) { - vert_types = std::move(vt); - } else { - for (auto vert : vertices()) { - vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]]; - } + if constexpr (keep_vertex_order) { + vert_comm_weights = std::move(cw); + } else { + for (auto vert : vertices()) { + vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; } } - template - Compact_Sparse_Graph(const Graph_type & graph) : Compact_Sparse_Graph(graph.num_vertices(), edge_view(graph)) { - static_assert(is_directed_graph_v); - - if constexpr (is_computational_dag_v && use_work_weights) { - for (const auto &vert : graph.vertices()) { - set_vertex_work_weight(vert, graph.vertex_work_weight(vert)); - } + if constexpr (keep_vertex_order) { + vert_mem_weights = std::move(mw); + } else { + for (auto vert : vertices()) { + vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; } - - if constexpr (is_computational_dag_v && use_comm_weights) { - for (const auto &vert : graph.vertices()) { - set_vertex_comm_weight(vert, graph.vertex_comm_weight(vert)); - } + } + } + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &ww, + const std::vector &cw, + const std::vector &mw, + const std::vector &vt) + : Compact_Sparse_Graph(num_vertices_, edges) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); + static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); + static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types."); + assert((ww.size() == static_cast(num_vertices())) + && "Work weights vector must have the same length as the number of vertices."); + assert((cw.size() == static_cast(num_vertices())) + && "Communication weights vector must have the same length as the number of vertices."); + assert((mw.size() == static_cast(num_vertices())) + && "Memory weights vector must have the same length as the number of vertices."); + assert((vt.size() == static_cast(num_vertices())) + && "Vertex type vector must have the same length as the number of vertices."); + + if constexpr (keep_vertex_order) { + vert_work_weights = ww; + } else { + for (auto vert : vertices()) { + vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; } + } - if constexpr (is_computational_dag_v && use_mem_weights) { - for (const auto &vert : graph.vertices()) { - set_vertex_mem_weight(vert, graph.vertex_mem_weight(vert)); - } + if constexpr (keep_vertex_order) { + vert_comm_weights = cw; + } else { + for (auto vert : vertices()) { + vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; } + } - if constexpr (is_computational_dag_typed_vertices_v && use_vert_types) { - for (const auto &vert : graph.vertices()) { - set_vertex_type(vert, graph.vertex_type(vert)); - } + if constexpr (keep_vertex_order) { + vert_mem_weights = mw; + } else { + for (auto vert : vertices()) { + vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; } } - inline auto vertices() const { return integral_range(number_of_vertices); }; - - inline vert_t num_vertices() const { return number_of_vertices; }; - inline edge_t num_edges() const { return number_of_edges; } - - inline auto parents(const vertex_idx &v) const { return csr_in_edges.parents(v); }; - inline auto children(const vertex_idx &v) const { return csc_out_edges.children(v); }; - - inline edge_t in_degree(const vertex_idx &v) const { - return csr_in_edges.number_of_parents(v); - }; - inline edge_t out_degree(const vertex_idx &v) const { - return csc_out_edges.number_of_children(v); - }; - - template - inline std::enable_if_t vertex_work_weight(const vertex_idx &v) const { - return vert_work_weights[v]; + if constexpr (keep_vertex_order) { + vert_types = vt; + } else { + for (auto vert : vertices()) { + vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]]; + } } - template - inline std::enable_if_t vertex_work_weight(const vertex_idx &v) const { - return static_cast(1) + in_degree(v); + } + + template + Compact_Sparse_Graph(vertex_idx num_vertices_, + const edge_list_type &edges, + std::vector &&ww, + std::vector &&cw, + std::vector &&mw, + std::vector &&vt) + : Compact_Sparse_Graph(num_vertices_, edges) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + static_assert(use_comm_weights, "To set communication weight, graph type must allow communication weights."); + static_assert(use_mem_weights, "To set memory weight, graph type must allow memory weights."); + static_assert(use_vert_types, "To set vertex types, graph type must allow vertex types."); + assert((ww.size() == static_cast(num_vertices())) + && "Work weights vector must have the same length as the number of vertices."); + assert((cw.size() == static_cast(num_vertices())) + && "Communication weights vector must have the same length as the number of vertices."); + assert((mw.size() == static_cast(num_vertices())) + && "Memory weights vector must have the same length as the number of vertices."); + assert((vt.size() == static_cast(num_vertices())) + && "Vertex type vector must have the same length as the number of vertices."); + + if constexpr (keep_vertex_order) { + vert_work_weights = std::move(ww); + } else { + for (auto vert : vertices()) { + vert_work_weights[vert] = ww[vertex_permutation_from_internal_to_original[vert]]; + } } - template - inline std::enable_if_t vertex_comm_weight(const vertex_idx &v) const { - return vert_comm_weights[v]; - } - template - inline std::enable_if_t vertex_comm_weight(const vertex_idx) const { - return static_cast(0); + if constexpr (keep_vertex_order) { + vert_comm_weights = std::move(cw); + } else { + for (auto vert : vertices()) { + vert_comm_weights[vert] = cw[vertex_permutation_from_internal_to_original[vert]]; + } } - template - inline std::enable_if_t vertex_mem_weight(const vertex_idx &v) const { - return vert_mem_weights[v]; - } - template - inline std::enable_if_t vertex_mem_weight(const vertex_idx) const { - return static_cast(0); + if constexpr (keep_vertex_order) { + vert_mem_weights = std::move(mw); + } else { + for (auto vert : vertices()) { + vert_mem_weights[vert] = mw[vertex_permutation_from_internal_to_original[vert]]; + } } - template - inline std::enable_if_t vertex_type(const vertex_idx &v) const { - return vert_types[v]; - } - template - inline std::enable_if_t vertex_type(const vertex_idx) const { - return static_cast(0); + if constexpr (keep_vertex_order) { + vert_types = std::move(vt); + } else { + for (auto vert : vertices()) { + vert_types[vert] = vt[vertex_permutation_from_internal_to_original[vert]]; + } } + } - inline vertex_type_type num_vertex_types() const { return number_of_vertex_types; }; + template + Compact_Sparse_Graph(const Graph_type &graph) : Compact_Sparse_Graph(graph.num_vertices(), edge_view(graph)) { + static_assert(is_directed_graph_v); - template - inline std::enable_if_t set_vertex_work_weight(const vertex_idx &v, const vertex_work_weight_type work_weight) { - if constexpr (keep_vertex_order) { - vert_work_weights[v] = work_weight; - } else { - vert_work_weights[vertex_permutation_from_original_to_internal[v]] = work_weight; + if constexpr (is_computational_dag_v && use_work_weights) { + for (const auto &vert : graph.vertices()) { + set_vertex_work_weight(vert, graph.vertex_work_weight(vert)); } } - template - inline std::enable_if_t set_vertex_work_weight(const vertex_idx &v, const vertex_work_weight_type work_weight) { - static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); - } - template - inline std::enable_if_t set_vertex_comm_weight(const vertex_idx &v, const vertex_comm_weight_type comm_weight) { - if constexpr (keep_vertex_order) { - vert_comm_weights[v] = comm_weight; - } else { - vert_comm_weights[vertex_permutation_from_original_to_internal[v]] = comm_weight; + if constexpr (is_computational_dag_v && use_comm_weights) { + for (const auto &vert : graph.vertices()) { + set_vertex_comm_weight(vert, graph.vertex_comm_weight(vert)); } } - template - inline std::enable_if_t set_vertex_comm_weight(const vertex_idx &v, const vertex_comm_weight_type comm_weight) { - static_assert(use_comm_weights, "To set comm weight, graph type must allow comm weights."); - } - - template - inline std::enable_if_t set_vertex_mem_weight(const vertex_idx &v, const vertex_mem_weight_type mem_weight) { - if constexpr (keep_vertex_order) { - vert_mem_weights[v] = mem_weight; - } else { - vert_mem_weights[vertex_permutation_from_original_to_internal[v]] = mem_weight; + + if constexpr (is_computational_dag_v && use_mem_weights) { + for (const auto &vert : graph.vertices()) { + set_vertex_mem_weight(vert, graph.vertex_mem_weight(vert)); } } - template - inline std::enable_if_t set_vertex_mem_weight(const vertex_idx &v, const vertex_mem_weight_type mem_weight) { - static_assert(use_mem_weights, "To set mem weight, graph type must allow mem weights."); - } - - template - inline std::enable_if_t set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) { - if constexpr (keep_vertex_order) { - vert_types[v] = vertex_type_; - } else { - vert_types[vertex_permutation_from_original_to_internal[v]] = vertex_type_; + + if constexpr (is_computational_dag_typed_vertices_v && use_vert_types) { + for (const auto &vert : graph.vertices()) { + set_vertex_type(vert, graph.vertex_type(vert)); } - number_of_vertex_types = std::max(number_of_vertex_types, vertex_type_); - } - template - inline std::enable_if_t set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) { - static_assert(use_vert_types, "To set vert type, graph type must allow vertex types."); } + } - template &> - inline std::enable_if_t get_pullback_permutation() const { - static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function."); - return {}; - } + inline auto vertices() const { return integral_range(number_of_vertices); }; - template &> - inline std::enable_if_t get_pullback_permutation() const { - return vertex_permutation_from_internal_to_original; - } + inline vert_t num_vertices() const { return number_of_vertices; }; - template &> - inline std::enable_if_t get_pushforward_permutation() const { - static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function."); - return {}; - } + inline edge_t num_edges() const { return number_of_edges; } - template &> - inline std::enable_if_t get_pushforward_permutation() const { - return vertex_permutation_from_original_to_internal; - } -}; + inline auto parents(const vertex_idx &v) const { return csr_in_edges.parents(v); }; + + inline auto children(const vertex_idx &v) const { return csc_out_edges.children(v); }; + inline edge_t in_degree(const vertex_idx &v) const { return csr_in_edges.number_of_parents(v); }; + inline edge_t out_degree(const vertex_idx &v) const { return csc_out_edges.number_of_children(v); }; + template + inline std::enable_if_t vertex_work_weight(const vertex_idx &v) const { + return vert_work_weights[v]; + } + template + inline std::enable_if_t vertex_work_weight(const vertex_idx &v) const { + return static_cast(1) + in_degree(v); + } -template -struct is_Compact_Sparse_Graph, void> : std::true_type {}; + template + inline std::enable_if_t vertex_comm_weight(const vertex_idx &v) const { + return vert_comm_weights[v]; + } -template -struct is_Compact_Sparse_Graph_reorder, void> : std::true_type {}; + template + inline std::enable_if_t vertex_comm_weight(const vertex_idx) const { + return static_cast(0); + } + template + inline std::enable_if_t vertex_mem_weight(const vertex_idx &v) const { + return vert_mem_weights[v]; + } + template + inline std::enable_if_t vertex_mem_weight(const vertex_idx) const { + return static_cast(0); + } + template + inline std::enable_if_t vertex_type(const vertex_idx &v) const { + return vert_types[v]; + } + template + inline std::enable_if_t vertex_type(const vertex_idx) const { + return static_cast(0); + } + inline vertex_type_type num_vertex_types() const { return number_of_vertex_types; }; + + template + inline std::enable_if_t set_vertex_work_weight(const vertex_idx &v, + const vertex_work_weight_type work_weight) { + if constexpr (keep_vertex_order) { + vert_work_weights[v] = work_weight; + } else { + vert_work_weights[vertex_permutation_from_original_to_internal[v]] = work_weight; + } + } + + template + inline std::enable_if_t set_vertex_work_weight(const vertex_idx &v, + const vertex_work_weight_type work_weight) { + static_assert(use_work_weights, "To set work weight, graph type must allow work weights."); + } + + template + inline std::enable_if_t set_vertex_comm_weight(const vertex_idx &v, + const vertex_comm_weight_type comm_weight) { + if constexpr (keep_vertex_order) { + vert_comm_weights[v] = comm_weight; + } else { + vert_comm_weights[vertex_permutation_from_original_to_internal[v]] = comm_weight; + } + } + + template + inline std::enable_if_t set_vertex_comm_weight(const vertex_idx &v, + const vertex_comm_weight_type comm_weight) { + static_assert(use_comm_weights, "To set comm weight, graph type must allow comm weights."); + } + + template + inline std::enable_if_t set_vertex_mem_weight(const vertex_idx &v, + const vertex_mem_weight_type mem_weight) { + if constexpr (keep_vertex_order) { + vert_mem_weights[v] = mem_weight; + } else { + vert_mem_weights[vertex_permutation_from_original_to_internal[v]] = mem_weight; + } + } + + template + inline std::enable_if_t set_vertex_mem_weight(const vertex_idx &v, + const vertex_mem_weight_type mem_weight) { + static_assert(use_mem_weights, "To set mem weight, graph type must allow mem weights."); + } + + template + inline std::enable_if_t set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) { + if constexpr (keep_vertex_order) { + vert_types[v] = vertex_type_; + } else { + vert_types[vertex_permutation_from_original_to_internal[v]] = vertex_type_; + } + number_of_vertex_types = std::max(number_of_vertex_types, vertex_type_); + } + + template + inline std::enable_if_t set_vertex_type(const vertex_idx &v, const vertex_type_type vertex_type_) { + static_assert(use_vert_types, "To set vert type, graph type must allow vertex types."); + } + + template &> + inline std::enable_if_t get_pullback_permutation() const { + static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function."); + return {}; + } + + template &> + inline std::enable_if_t get_pullback_permutation() const { + return vertex_permutation_from_internal_to_original; + } + + template &> + inline std::enable_if_t get_pushforward_permutation() const { + static_assert(!keep_vertex_order, "No permutation was applied. This is a deleted function."); + return {}; + } + + template &> + inline std::enable_if_t get_pushforward_permutation() const { + return vertex_permutation_from_original_to_internal; + } +}; +template +struct is_Compact_Sparse_Graph, + void> : std::true_type {}; + +template +struct is_Compact_Sparse_Graph_reorder, + void> : std::true_type {}; static_assert(is_Compact_Sparse_Graph_v>); static_assert(is_Compact_Sparse_Graph_v>); static_assert(!is_Compact_Sparse_Graph_reorder_v>); static_assert(is_Compact_Sparse_Graph_reorder_v>); +static_assert(has_vertex_weights_v>, + "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); +static_assert(has_vertex_weights_v>, + "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); +static_assert(is_directed_graph_v>, + "Compact_Sparse_Graph must satisfy the directed_graph concept"); -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); +static_assert(is_directed_graph_v>, + "Compact_Sparse_Graph must satisfy the directed_graph concept"); -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); +static_assert(is_directed_graph_v>, + "Compact_Sparse_Graph must satisfy the directed_graph concept"); -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph must satisfy the directed_graph concept"); +static_assert(is_directed_graph_v>, + "Compact_Sparse_Graph must satisfy the directed_graph concept"); -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph must satisfy the directed_graph concept"); +static_assert(is_computational_dag_v>, + "Compact_Sparse_Graph must satisfy the is_computation_dag concept"); -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph must satisfy the directed_graph concept"); - -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph must satisfy the directed_graph concept"); - -static_assert(is_computational_dag_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag concept"); - -static_assert(is_computational_dag_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag concept"); +static_assert(is_computational_dag_v>, + "Compact_Sparse_Graph must satisfy the is_computation_dag concept"); static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept"); + "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept"); static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept"); + "Compact_Sparse_Graph must satisfy the is_computation_dag with types concept"); -static_assert(is_direct_constructable_cdag_v>, - "Compact_Sparse_Graph must be directly constructable"); +static_assert(is_direct_constructable_cdag_v>, + "Compact_Sparse_Graph must be directly constructable"); -static_assert(is_direct_constructable_cdag_v>, - "Compact_Sparse_Graph must be directly constructable"); +static_assert(is_direct_constructable_cdag_v>, + "Compact_Sparse_Graph must be directly constructable"); using CSG = Compact_Sparse_Graph; -static_assert(is_directed_graph_edge_desc_v, - "CSG must satisfy the directed_graph_edge_desc concept"); - - - - - - +static_assert(is_directed_graph_edge_desc_v, "CSG must satisfy the directed_graph_edge_desc concept"); // // Graph specific implementations -// template +// template // bool coarser_util::construct_coarse_dag( // const Graph_t_in &dag_in, -// Compact_Sparse_Graph &coarsened_dag, -// std::vector>> &vertex_contraction_map) { +// Compact_Sparse_Graph &coarsened_dag, +// std::vector>> +// &vertex_contraction_map) { -// using Graph_out_type = Compact_Sparse_Graph; +// using Graph_out_type = Compact_Sparse_Graph; -// static_assert(is_directed_graph_v && is_directed_graph_v, "Graph types need to satisfy the is_directed_graph concept."); -// static_assert(is_computational_dag_v, "Graph_t_in must be a computational DAG"); -// static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, "Graph_out_type must be a (direct) constructable computational DAG"); +// static_assert(is_directed_graph_v && is_directed_graph_v, "Graph types need to satisfy the +// is_directed_graph concept."); static_assert(is_computational_dag_v, "Graph_t_in must be a computational DAG"); +// static_assert(is_constructable_cdag_v || is_direct_constructable_cdag_v, "Graph_out_type +// must be a (direct) constructable computational DAG"); // assert(check_valid_contraction_map(vertex_contraction_map)); - // const vertex_idx_t num_vert_quotient = // (*std::max_element(vertex_contraction_map.cbegin(), vertex_contraction_map.cend())) + 1; @@ -906,9 +1086,10 @@ static_assert(is_directed_graph_edge_desc_v, // } // if constexpr (has_vertex_weights_v && is_modifiable_cdag_vertex_v) { -// static_assert(std::is_same_v, v_workw_t>, "Work weight types of in-graph and out-graph must be the same."); -// static_assert(std::is_same_v, v_commw_t>, "Vertex communication types of in-graph and out-graph must be the same."); -// static_assert(std::is_same_v, v_memw_t>, "Memory weight types of in-graph and out-graph must be the same."); +// static_assert(std::is_same_v, v_workw_t>, "Work weight types of in-graph and +// out-graph must be the same."); static_assert(std::is_same_v, v_commw_t>, "Vertex +// communication types of in-graph and out-graph must be the same."); static_assert(std::is_same_v, +// v_memw_t>, "Memory weight types of in-graph and out-graph must be the same."); // for (const vertex_idx_t &vert : coarsened_dag.vertices()) { // coarsened_dag.set_vertex_work_weight(vert, 0); @@ -951,31 +1132,7 @@ static_assert(is_directed_graph_edge_desc_v, // std::cout << "Specific Template construct coarsen dag" << std::endl; - // return true; // }; - - - - - - - - - - - - - - - - - - - - - - - -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp index c455b582..68b8b967 100644 --- a/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp +++ b/include/osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp @@ -17,361 +17,698 @@ limitations under the License. */ #pragma once -#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" #include "osp/concepts/directed_graph_edge_desc_concept.hpp" +#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" namespace osp { -template -class Compact_Sparse_Graph_EdgeDesc : public Compact_Sparse_Graph { - private: - using ThisT = Compact_Sparse_Graph_EdgeDesc; - using BaseT = Compact_Sparse_Graph; - - public: - using vertex_idx = typename BaseT::vertex_idx; - - using vertex_work_weight_type = typename BaseT::vertex_work_weight_type; - using vertex_comm_weight_type = typename BaseT::vertex_comm_weight_type; - using vertex_mem_weight_type = typename BaseT::vertex_mem_weight_type; - using vertex_type_type = typename BaseT::vertex_type_type; - - using directed_edge_descriptor = edge_t; - using edge_comm_weight_type = e_comm_weight_type; - - protected: - std::vector edge_comm_weights; - - class In_Edges_range { - private: - const vertex_idx tgt_vert; - const typename BaseT::Compact_Parent_Edges::Parent_range par_range; - const typename BaseT::Compact_Children_Edges &csc_out_edges; - - class In_Edges_iterator { - public: - using iterator_category = std::bidirectional_iterator_tag; - using difference_type = std::ptrdiff_t; - using value_type = edge_t; - using pointer = vertex_idx *; - using reference = edge_t &; - - private: - const vertex_idx target_vert; - const typename BaseT::Compact_Children_Edges &csc_out_edges; - - typename std::vector::const_iterator current; - - public: - In_Edges_iterator(const vertex_idx &target_vert_, const typename BaseT::Compact_Children_Edges &csc_out_edges_, const typename std::vector::const_iterator start_) : target_vert(target_vert_), csc_out_edges(csc_out_edges_), current(start_) { }; - In_Edges_iterator(const In_Edges_iterator &other) : target_vert(other.target_vert), csc_out_edges(other.csc_out_edges), current(other.current) { }; - In_Edges_iterator &operator=(const In_Edges_iterator &other) { - if (this != &other) { - target_vert = other.target_vert; - csc_out_edges = other.csc_out_edges; - current = other.current; - } - return *this; - }; - - inline value_type operator*() const { - const vertex_idx src_vert = *current; - typename BaseT::Compact_Children_Edges::Children_range range = csc_out_edges.children(src_vert); - - assert(std::binary_search(range.cbegin(), range.cend(), target_vert)); - auto it = std::lower_bound(range.cbegin(), range.cend(), target_vert); - - edge_t diff = static_cast( std::distance(range.cbegin(), it) ); - edge_t edge_desc_val = csc_out_edges.children_indx_begin(src_vert) + diff; - - return edge_desc_val; - }; - - inline In_Edges_iterator &operator++() { - ++current; - return *this; - }; - - inline In_Edges_iterator operator++(int) { - In_Edges_iterator temp = *this; - ++(*this); - return temp; - }; - - inline In_Edges_iterator &operator--() { - --current; - return *this; - }; - - inline In_Edges_iterator operator--(int) { - In_Edges_iterator temp = *this; - --(*this); - return temp; - }; - - inline bool operator==(const In_Edges_iterator &other) const { return current == other.current; }; - inline bool operator!=(const In_Edges_iterator &other) const { return !(*this == other); }; - - inline bool operator<=(const In_Edges_iterator &other) const { return current <= other.current; }; - inline bool operator<(const In_Edges_iterator &other) const { return (*this <= other) && (*this != other); }; - inline bool operator>=(const In_Edges_iterator &other) const { return (!(*this <= other)) || (*this == other); }; - inline bool operator>(const In_Edges_iterator &other) const { return !(*this <= other); }; - }; - - public: - In_Edges_range() = default; - In_Edges_range(const In_Edges_range &other) = default; - In_Edges_range(In_Edges_range &&other) = default; - In_Edges_range &operator=(const In_Edges_range &other) = default; - In_Edges_range &operator=(In_Edges_range &&other) = default; - virtual ~In_Edges_range() = default; - - In_Edges_range(const vertex_idx &tgt_vert_, const ThisT &graph, const typename BaseT::Compact_Children_Edges &csc_out_edges_) : tgt_vert(tgt_vert_), par_range(graph.parents(tgt_vert_)), csc_out_edges(csc_out_edges_) { }; - - inline auto cbegin() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cbegin()); }; - inline auto cend() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cend()); }; - - inline auto begin() const { return cbegin(); }; - inline auto end() const { return cend(); }; +template +class Compact_Sparse_Graph_EdgeDesc : public Compact_Sparse_Graph { + private: + using ThisT = Compact_Sparse_Graph_EdgeDesc; + using BaseT = Compact_Sparse_Graph; + + public: + using vertex_idx = typename BaseT::vertex_idx; + + using vertex_work_weight_type = typename BaseT::vertex_work_weight_type; + using vertex_comm_weight_type = typename BaseT::vertex_comm_weight_type; + using vertex_mem_weight_type = typename BaseT::vertex_mem_weight_type; + using vertex_type_type = typename BaseT::vertex_type_type; + + using directed_edge_descriptor = edge_t; + using edge_comm_weight_type = e_comm_weight_type; + + protected: + std::vector edge_comm_weights; + + class In_Edges_range { + private: + const vertex_idx tgt_vert; + const typename BaseT::Compact_Parent_Edges::Parent_range par_range; + const typename BaseT::Compact_Children_Edges &csc_out_edges; + + class In_Edges_iterator { + public: + using iterator_category = std::bidirectional_iterator_tag; + using difference_type = std::ptrdiff_t; + using value_type = edge_t; + using pointer = vertex_idx *; + using reference = edge_t &; + + private: + const vertex_idx target_vert; + const typename BaseT::Compact_Children_Edges &csc_out_edges; + + typename std::vector::const_iterator current; + + public: + In_Edges_iterator(const vertex_idx &target_vert_, + const typename BaseT::Compact_Children_Edges &csc_out_edges_, + const typename std::vector::const_iterator start_) + : target_vert(target_vert_), csc_out_edges(csc_out_edges_), current(start_) {}; + In_Edges_iterator(const In_Edges_iterator &other) + : target_vert(other.target_vert), csc_out_edges(other.csc_out_edges), current(other.current) {}; + + In_Edges_iterator &operator=(const In_Edges_iterator &other) { + if (this != &other) { + target_vert = other.target_vert; + csc_out_edges = other.csc_out_edges; + current = other.current; + } + return *this; + }; + + inline value_type operator*() const { + const vertex_idx src_vert = *current; + typename BaseT::Compact_Children_Edges::Children_range range = csc_out_edges.children(src_vert); + + assert(std::binary_search(range.cbegin(), range.cend(), target_vert)); + auto it = std::lower_bound(range.cbegin(), range.cend(), target_vert); + + edge_t diff = static_cast(std::distance(range.cbegin(), it)); + edge_t edge_desc_val = csc_out_edges.children_indx_begin(src_vert) + diff; + + return edge_desc_val; + }; + + inline In_Edges_iterator &operator++() { + ++current; + return *this; + }; + + inline In_Edges_iterator operator++(int) { + In_Edges_iterator temp = *this; + ++(*this); + return temp; + }; + + inline In_Edges_iterator &operator--() { + --current; + return *this; + }; + + inline In_Edges_iterator operator--(int) { + In_Edges_iterator temp = *this; + --(*this); + return temp; + }; + + inline bool operator==(const In_Edges_iterator &other) const { return current == other.current; }; + + inline bool operator!=(const In_Edges_iterator &other) const { return !(*this == other); }; + + inline bool operator<=(const In_Edges_iterator &other) const { return current <= other.current; }; + + inline bool operator<(const In_Edges_iterator &other) const { return (*this <= other) && (*this != other); }; + + inline bool operator>=(const In_Edges_iterator &other) const { return (!(*this <= other)) || (*this == other); }; + inline bool operator>(const In_Edges_iterator &other) const { return !(*this <= other); }; }; - public: - Compact_Sparse_Graph_EdgeDesc() = default; - Compact_Sparse_Graph_EdgeDesc(const Compact_Sparse_Graph_EdgeDesc &other) = default; - Compact_Sparse_Graph_EdgeDesc(Compact_Sparse_Graph_EdgeDesc &&other) = default; - Compact_Sparse_Graph_EdgeDesc &operator=(const Compact_Sparse_Graph_EdgeDesc &other) = default; - Compact_Sparse_Graph_EdgeDesc &operator=(Compact_Sparse_Graph_EdgeDesc &&other) = default; - virtual ~Compact_Sparse_Graph_EdgeDesc() = default; - - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges) : BaseT(num_vertices_, edges) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } - } + public: + In_Edges_range() = default; + In_Edges_range(const In_Edges_range &other) = default; + In_Edges_range(In_Edges_range &&other) = default; + In_Edges_range &operator=(const In_Edges_range &other) = default; + In_Edges_range &operator=(In_Edges_range &&other) = default; + virtual ~In_Edges_range() = default; - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &ww) : BaseT(num_vertices_, edges, ww) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } - } + In_Edges_range(const vertex_idx &tgt_vert_, const ThisT &graph, const typename BaseT::Compact_Children_Edges &csc_out_edges_) + : tgt_vert(tgt_vert_), par_range(graph.parents(tgt_vert_)), csc_out_edges(csc_out_edges_) {}; - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &&ww) : BaseT(num_vertices_, edges, std::move(ww)) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } - } + inline auto cbegin() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cbegin()); }; - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, std::vector &ww, std::vector &cw) : BaseT(num_vertices_, edges, ww, cw) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } + inline auto cend() const { return In_Edges_iterator(tgt_vert, csc_out_edges, par_range.cend()); }; + + inline auto begin() const { return cbegin(); }; + + inline auto end() const { return cend(); }; + }; + + public: + Compact_Sparse_Graph_EdgeDesc() = default; + Compact_Sparse_Graph_EdgeDesc(const Compact_Sparse_Graph_EdgeDesc &other) = default; + Compact_Sparse_Graph_EdgeDesc(Compact_Sparse_Graph_EdgeDesc &&other) = default; + Compact_Sparse_Graph_EdgeDesc &operator=(const Compact_Sparse_Graph_EdgeDesc &other) = default; + Compact_Sparse_Graph_EdgeDesc &operator=(Compact_Sparse_Graph_EdgeDesc &&other) = default; + virtual ~Compact_Sparse_Graph_EdgeDesc() = default; + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type &edges) : BaseT(num_vertices_, edges) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); } - - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, std::vector &&ww, std::vector &&cw) : BaseT(num_vertices_, edges, std::move(ww), std::move(cw)) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } + } + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &ww) + : BaseT(num_vertices_, edges, ww) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); } - - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &ww, const std::vector &cw, const std::vector &mw) : BaseT(num_vertices_, edges, ww, cw, mw) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } + } + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &&ww) + : BaseT(num_vertices_, edges, std::move(ww)) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); } - - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &&ww, const std::vector &&cw, const std::vector &&mw) : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw)) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } + } + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, + const edge_list_type &edges, + std::vector &ww, + std::vector &cw) + : BaseT(num_vertices_, edges, ww, cw) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); } - - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &ww, const std::vector &cw, const std::vector &mw, const std::vector &vt) : BaseT(num_vertices_, edges, ww, cw, mw, vt) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } + } + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, + const edge_list_type &edges, + std::vector &&ww, + std::vector &&cw) + : BaseT(num_vertices_, edges, std::move(ww), std::move(cw)) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); } - - template - Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, const edge_list_type & edges, const std::vector &&ww, const std::vector &&cw, const std::vector &&mw, const std::vector &&vt) : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw), std::move(vt)) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } + } + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &ww, + const std::vector &cw, + const std::vector &mw) + : BaseT(num_vertices_, edges, ww, cw, mw) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + } + } + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &&ww, + const std::vector &&cw, + const std::vector &&mw) + : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw)) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + } + } + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &ww, + const std::vector &cw, + const std::vector &mw, + const std::vector &vt) + : BaseT(num_vertices_, edges, ww, cw, mw, vt) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); } + } + + template + Compact_Sparse_Graph_EdgeDesc(vertex_idx num_vertices_, + const edge_list_type &edges, + const std::vector &&ww, + const std::vector &&cw, + const std::vector &&mw, + const std::vector &&vt) + : BaseT(num_vertices_, edges, std::move(ww), std::move(cw), std::move(mw), std::move(vt)) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + } + } - template - Compact_Sparse_Graph_EdgeDesc(const Graph_type & graph) : BaseT(graph) { - if constexpr (use_edge_comm_weights) { - edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); - } + template + Compact_Sparse_Graph_EdgeDesc(const Graph_type &graph) : BaseT(graph) { + if constexpr (use_edge_comm_weights) { + edge_comm_weights = std::vector(BaseT::num_edges(), static_cast(0)); + } - if constexpr (has_edge_weights_v && use_edge_comm_weights) { - for (const auto& edge : edges(graph)) { - const auto src = source(edge, graph); - const auto tgt = target(edge, graph); - set_edge_comm_weight(src, tgt, graph.edge_comm_weight(edge)); - } + if constexpr (has_edge_weights_v && use_edge_comm_weights) { + for (const auto &edge : edges(graph)) { + const auto src = source(edge, graph); + const auto tgt = target(edge, graph); + set_edge_comm_weight(src, tgt, graph.edge_comm_weight(edge)); } } + } + inline auto edges() const { return integral_range(BaseT::number_of_edges); }; + inline directed_edge_descriptor edge(const vertex_idx &src, const vertex_idx &tgt) const { + typename BaseT::Compact_Children_Edges::Children_range range = BaseT::csc_out_edges.children(src); - inline auto edges() const { return integral_range(BaseT::number_of_edges); }; - inline directed_edge_descriptor edge(const vertex_idx &src, const vertex_idx &tgt) const { - typename BaseT::Compact_Children_Edges::Children_range range = BaseT::csc_out_edges.children(src); + assert(std::binary_search(range.cbegin(), range.cend(), tgt)); + auto it = std::lower_bound(range.cbegin(), range.cend(), tgt); - assert(std::binary_search(range.cbegin(), range.cend(), tgt)); - auto it = std::lower_bound(range.cbegin(), range.cend(), tgt); - - directed_edge_descriptor diff = static_cast( std::distance(range.cbegin(), it) ); - directed_edge_descriptor edge_desc_val = BaseT::csc_out_edges.children_indx_begin(src) + diff; + directed_edge_descriptor diff = static_cast(std::distance(range.cbegin(), it)); + directed_edge_descriptor edge_desc_val = BaseT::csc_out_edges.children_indx_begin(src) + diff; - return edge_desc_val; - }; - - inline vertex_idx source(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.source(edge); }; - inline vertex_idx target(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.target(edge); }; - - inline auto out_edges(const vertex_idx &vert) const { return integral_range(BaseT::csc_out_edges.children_indx_begin(vert), BaseT::csc_out_edges.children_indx_begin(vert + 1)); }; - inline auto in_edges(const vertex_idx &vert) const { return In_Edges_range(vert, *this, BaseT::csc_out_edges); }; - - template - inline std::enable_if_t edge_comm_weight(const directed_edge_descriptor &edge) const { - return edge_comm_weights[edge]; - } - template - inline std::enable_if_t edge_comm_weight(const directed_edge_descriptor &edge) const { - return static_cast(1); - } + return edge_desc_val; + }; - template - inline std::enable_if_t set_edge_comm_weight(const vertex_idx &src, const vertex_idx &tgt, const edge_comm_weight_type e_comm_weight) { - if constexpr (keep_vertex_order) { - edge_comm_weights[edge(src, tgt)] = e_comm_weight; - } else { - const vertex_idx internal_src = BaseT::vertex_permutation_from_original_to_internal[src]; - const vertex_idx internal_tgt = BaseT::vertex_permutation_from_original_to_internal[tgt]; - edge_comm_weights[edge(internal_src, internal_tgt)] = e_comm_weight; - } - } - template - inline std::enable_if_t set_edge_comm_weight(const vertex_idx &src, const vertex_idx &tgt, const edge_comm_weight_type e_comm_weight) { - static_assert(use_edge_comm_weights, "To set edge communication weight, graph type must allow edge communication weights."); + inline vertex_idx source(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.source(edge); }; + + inline vertex_idx target(const directed_edge_descriptor &edge) const { return BaseT::csc_out_edges.target(edge); }; + + inline auto out_edges(const vertex_idx &vert) const { + return integral_range(BaseT::csc_out_edges.children_indx_begin(vert), + BaseT::csc_out_edges.children_indx_begin(vert + 1)); + }; + + inline auto in_edges(const vertex_idx &vert) const { return In_Edges_range(vert, *this, BaseT::csc_out_edges); }; + + template + inline std::enable_if_t edge_comm_weight(const directed_edge_descriptor &edge) const { + return edge_comm_weights[edge]; + } + + template + inline std::enable_if_t edge_comm_weight(const directed_edge_descriptor &edge) const { + return static_cast(1); + } + + template + inline std::enable_if_t set_edge_comm_weight(const vertex_idx &src, + const vertex_idx &tgt, + const edge_comm_weight_type e_comm_weight) { + if constexpr (keep_vertex_order) { + edge_comm_weights[edge(src, tgt)] = e_comm_weight; + } else { + const vertex_idx internal_src = BaseT::vertex_permutation_from_original_to_internal[src]; + const vertex_idx internal_tgt = BaseT::vertex_permutation_from_original_to_internal[tgt]; + edge_comm_weights[edge(internal_src, internal_tgt)] = e_comm_weight; } + } + + template + inline std::enable_if_t set_edge_comm_weight(const vertex_idx &src, + const vertex_idx &tgt, + const edge_comm_weight_type e_comm_weight) { + static_assert(use_edge_comm_weights, "To set edge communication weight, graph type must allow edge communication weights."); + } }; - -template -inline auto edges(const Compact_Sparse_Graph_EdgeDesc &graph) { +template +inline auto edges(const Compact_Sparse_Graph_EdgeDesc &graph) { return graph.edges(); } -template -inline auto out_edges(vertex_idx_t> v, - const Compact_Sparse_Graph_EdgeDesc &graph) { +template +inline auto out_edges(vertex_idx_t> v, + const Compact_Sparse_Graph_EdgeDesc &graph) { return graph.out_edges(v); } -template -inline auto in_edges(vertex_idx_t> v, - const Compact_Sparse_Graph_EdgeDesc &graph) { +template +inline auto in_edges(vertex_idx_t> v, + const Compact_Sparse_Graph_EdgeDesc &graph) { return graph.in_edges(v); } -template -inline vertex_idx_t> source(const edge_desc_t> &edge, const Compact_Sparse_Graph_EdgeDesc &graph) { +template +inline vertex_idx_t> +source(const edge_desc_t> &edge, + const Compact_Sparse_Graph_EdgeDesc &graph) { return graph.source(edge); } -template -inline vertex_idx_t> target(const edge_desc_t> &edge, const Compact_Sparse_Graph_EdgeDesc &graph) { +template +inline vertex_idx_t> +target(const edge_desc_t> &edge, + const Compact_Sparse_Graph_EdgeDesc &graph) { return graph.target(edge); } - -template -struct is_Compact_Sparse_Graph, void> : std::true_type {}; - -template -struct is_Compact_Sparse_Graph_reorder, void> : std::true_type {}; - - - - - - - - +template +struct is_Compact_Sparse_Graph, + void> : std::true_type {}; + +template +struct is_Compact_Sparse_Graph_reorder, + void> : std::true_type {}; static_assert(is_Compact_Sparse_Graph_v>); static_assert(is_Compact_Sparse_Graph_v>); static_assert(!is_Compact_Sparse_Graph_reorder_v>); static_assert(is_Compact_Sparse_Graph_reorder_v>); +static_assert(has_vertex_weights_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept"); -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept"); +static_assert(has_vertex_weights_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept"); -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_vertex_weights concept"); +static_assert(is_directed_graph_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); +static_assert(is_directed_graph_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); +static_assert(is_directed_graph_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); +static_assert(is_directed_graph_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); -static_assert(is_directed_graph_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed_graph concept"); +static_assert(is_computational_dag_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept"); -static_assert(is_computational_dag_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept"); - -static_assert(is_computational_dag_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept"); +static_assert(is_computational_dag_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag concept"); static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept"); + "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept"); static_assert(is_computational_dag_typed_vertices_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept"); + "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computation_dag with types concept"); static_assert(is_directed_graph_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept."); + "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept."); static_assert(is_directed_graph_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept."); + "Compact_Sparse_Graph_EdgeDesc must satisfy the directed graph edge descriptor concept."); -static_assert(is_computational_dag_typed_vertices_edge_desc_v>, +static_assert( + is_computational_dag_typed_vertices_edge_desc_v>, "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computational_dag_typed_vertices_edge_desc_v with types concept"); -static_assert(is_computational_dag_typed_vertices_edge_desc_v>, +static_assert( + is_computational_dag_typed_vertices_edge_desc_v>, "Compact_Sparse_Graph_EdgeDesc must satisfy the is_computational_dag_typed_vertices_edge_desc_v with types concept"); static_assert(has_edge_weights_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept"); + "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept"); static_assert(has_edge_weights_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept"); - -static_assert(has_hashable_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept"); + "Compact_Sparse_Graph_EdgeDesc must satisfy the has_edge_weights concept"); -static_assert(has_hashable_edge_desc_v>, - "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept"); +static_assert(has_hashable_edge_desc_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept"); -using CSGE = Compact_Sparse_Graph_EdgeDesc; +static_assert(has_hashable_edge_desc_v>, + "Compact_Sparse_Graph_EdgeDesc must satisfy the has_hashable_edge_desc concept"); +using CSGE + = Compact_Sparse_Graph_EdgeDesc; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp index 5a439664..f4445622 100644 --- a/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp +++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp @@ -17,18 +17,18 @@ limitations under the License. */ #pragma once -#include "osp/auxiliary/hash_util.hpp" +#include + #include "cdag_vertex_impl.hpp" #include "edge_iterator.hpp" -#include "osp/graph_implementations/integral_range.hpp" +#include "osp/auxiliary/hash_util.hpp" #include "osp/graph_algorithms/computational_dag_construction_util.hpp" -#include +#include "osp/graph_implementations/integral_range.hpp" namespace osp { -template +template struct directed_edge_descriptor_impl { - using vertex_idx = typename v_impl::vertex_idx_type; vertex_idx idx; @@ -37,12 +37,15 @@ struct directed_edge_descriptor_impl { vertex_idx target; directed_edge_descriptor_impl() : idx(0), source(0), target(0) {} + directed_edge_descriptor_impl(const directed_edge_descriptor_impl &other) = default; directed_edge_descriptor_impl(directed_edge_descriptor_impl &&other) = default; directed_edge_descriptor_impl &operator=(const directed_edge_descriptor_impl &other) = default; directed_edge_descriptor_impl &operator=(directed_edge_descriptor_impl &&other) = default; + directed_edge_descriptor_impl(vertex_idx source_arg, vertex_idx target_arg, vertex_idx idx_arg) : idx(idx_arg), source(source_arg), target(target_arg) {} + ~directed_edge_descriptor_impl() = default; bool operator==(const directed_edge_descriptor_impl &other) const { @@ -52,19 +55,19 @@ struct directed_edge_descriptor_impl { bool operator!=(const directed_edge_descriptor_impl &other) const { return !(*this == other); } }; -template +template struct cdag_edge_impl { - using cdag_edge_comm_weight_type = edge_comm_weight_t; cdag_edge_impl(edge_comm_weight_t comm_weight_arg = 1) : comm_weight(comm_weight_arg) {} + edge_comm_weight_t comm_weight; }; using cdag_edge_impl_int = cdag_edge_impl; using cdag_edge_impl_unsigned = cdag_edge_impl; -template +template class computational_dag_edge_idx_vector_impl { public: // graph_traits specialization @@ -119,9 +122,8 @@ class computational_dag_edge_idx_vector_impl { computational_dag_edge_idx_vector_impl(const computational_dag_edge_idx_vector_impl &other) = default; - template + template computational_dag_edge_idx_vector_impl(const Graph_t &other) { - static_assert(is_computational_dag_v, "Graph_t must satisfy the is_computation_dag concept"); constructComputationalDag(other, *this); @@ -130,10 +132,11 @@ class computational_dag_edge_idx_vector_impl { computational_dag_edge_idx_vector_impl &operator=(const computational_dag_edge_idx_vector_impl &other) = default; computational_dag_edge_idx_vector_impl(computational_dag_edge_idx_vector_impl &&other) - : vertices_(std::move(other.vertices_)), edges_(std::move(other.edges_)), - num_vertex_types_(other.num_vertex_types_), out_edges_(std::move(other.out_edges_)), + : vertices_(std::move(other.vertices_)), + edges_(std::move(other.edges_)), + num_vertex_types_(other.num_vertex_types_), + out_edges_(std::move(other.out_edges_)), in_edges_(std::move(other.in_edges_)) { - other.num_vertex_types_ = 0; } @@ -152,38 +155,45 @@ class computational_dag_edge_idx_vector_impl { virtual ~computational_dag_edge_idx_vector_impl() = default; inline vertex_idx num_edges() const { return static_cast(edges_.size()); } + inline vertex_idx num_vertices() const { return static_cast(vertices_.size()); } inline auto edges() const { return edge_range_vector_impl(*this); } inline auto parents(vertex_idx v) const { return edge_source_range(in_edges_[v], *this); } + inline auto children(vertex_idx v) const { return edge_target_range(out_edges_[v], *this); } inline auto vertices() const { return integral_range(static_cast(vertices_.size())); } inline const std::vector &in_edges(vertex_idx v) const { return in_edges_[v]; } + inline const std::vector &out_edges(vertex_idx v) const { return out_edges_[v]; } inline vertex_idx in_degree(vertex_idx v) const { return static_cast(in_edges_[v].size()); } + inline vertex_idx out_degree(vertex_idx v) const { return static_cast(out_edges_[v].size()); } - inline edge_comm_weight_type edge_comm_weight(directed_edge_descriptor e) const { - return edges_[e.idx].comm_weight; - } + inline edge_comm_weight_type edge_comm_weight(directed_edge_descriptor e) const { return edges_[e.idx].comm_weight; } inline vertex_work_weight_type vertex_work_weight(vertex_idx v) const { return vertices_[v].work_weight; } + inline vertex_comm_weight_type vertex_comm_weight(vertex_idx v) const { return vertices_[v].comm_weight; } + inline vertex_mem_weight_type vertex_mem_weight(vertex_idx v) const { return vertices_[v].mem_weight; } inline unsigned num_vertex_types() const { return num_vertex_types_; } + inline vertex_type_type vertex_type(vertex_idx v) const { return vertices_[v].vertex_type; } inline vertex_idx source(const directed_edge_descriptor &e) const { return e.source; } - inline vertex_idx target(const directed_edge_descriptor &e) const { return e.target; } - vertex_idx add_vertex(vertex_work_weight_type work_weight, vertex_comm_weight_type comm_weight, - vertex_mem_weight_type mem_weight, vertex_type_type vertex_type = 0) { + inline vertex_idx target(const directed_edge_descriptor &e) const { return e.target; } + vertex_idx add_vertex(vertex_work_weight_type work_weight, + vertex_comm_weight_type comm_weight, + vertex_mem_weight_type mem_weight, + vertex_type_type vertex_type = 0) { vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type); out_edges_.push_back({}); @@ -194,9 +204,7 @@ class computational_dag_edge_idx_vector_impl { return vertices_.back().id; } - std::pair add_edge(vertex_idx source, vertex_idx target, - edge_comm_weight_type comm_weight = 1) { - + std::pair add_edge(vertex_idx source, vertex_idx target, edge_comm_weight_type comm_weight = 1) { if (source == target) { return {directed_edge_descriptor{}, false}; } @@ -222,12 +230,13 @@ class computational_dag_edge_idx_vector_impl { inline void set_vertex_work_weight(vertex_idx v, vertex_work_weight_type work_weight) { vertices_[v].work_weight = work_weight; } + inline void set_vertex_comm_weight(vertex_idx v, vertex_comm_weight_type comm_weight) { vertices_[v].comm_weight = comm_weight; } - inline void set_vertex_mem_weight(vertex_idx v, vertex_mem_weight_type mem_weight) { - vertices_[v].mem_weight = mem_weight; - } + + inline void set_vertex_mem_weight(vertex_idx v, vertex_mem_weight_type mem_weight) { vertices_[v].mem_weight = mem_weight; } + inline void set_vertex_type(vertex_idx v, vertex_type_type vertex_type) { vertices_[v].vertex_type = vertex_type; num_vertex_types_ = std::max(num_vertex_types_, vertex_type + 1); @@ -238,58 +247,59 @@ class computational_dag_edge_idx_vector_impl { } inline const v_impl &get_vertex_impl(vertex_idx v) const { return vertices_[v]; } + inline const e_impl &get_edge_impl(directed_edge_descriptor e) const { return edges_[e.idx]; } }; -template +template inline auto edges(const computational_dag_edge_idx_vector_impl &graph) { return graph.edges(); } -template +template inline auto out_edges(vertex_idx_t> v, const computational_dag_edge_idx_vector_impl &graph) { return graph.out_edges(v); } -template +template inline auto in_edges(vertex_idx_t> v, const computational_dag_edge_idx_vector_impl &graph) { return graph.in_edges(v); } - // default implementation to get the source of an edge -template -inline vertex_idx_t> source(const edge_desc_t> &edge, const computational_dag_edge_idx_vector_impl &graph) { +template +inline vertex_idx_t> source( + const edge_desc_t> &edge, + const computational_dag_edge_idx_vector_impl &graph) { return graph.source(edge); } // default implementation to get the target of an edge -template -inline vertex_idx_t> target(const edge_desc_t> &edge, const computational_dag_edge_idx_vector_impl &graph) { +template +inline vertex_idx_t> target( + const edge_desc_t> &edge, + const computational_dag_edge_idx_vector_impl &graph) { return graph.target(edge); } - // default template specialization -using computational_dag_edge_idx_vector_impl_def_t = - computational_dag_edge_idx_vector_impl; - -using computational_dag_edge_idx_vector_impl_def_int_t = - computational_dag_edge_idx_vector_impl; +using computational_dag_edge_idx_vector_impl_def_t + = computational_dag_edge_idx_vector_impl; +using computational_dag_edge_idx_vector_impl_def_int_t + = computational_dag_edge_idx_vector_impl; static_assert(is_directed_graph_edge_desc_v, "computational_dag_edge_idx_vector_impl must satisfy the directed_graph_edge_desc concept"); -static_assert( - is_computational_dag_typed_vertices_edge_desc_v, - "computational_dag_edge_idx_vector_impl must satisfy the computation_dag_typed_vertices_edge_desc concept"); +static_assert(is_computational_dag_typed_vertices_edge_desc_v, + "computational_dag_edge_idx_vector_impl must satisfy the computation_dag_typed_vertices_edge_desc concept"); -} // namespace osp +} // namespace osp -template +template struct std::hash> { using vertex_idx = typename v_impl::vertex_idx_type; @@ -299,4 +309,4 @@ struct std::hash> { return h1; } -}; \ No newline at end of file +}; diff --git a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp index 0a1b676a..b6621760 100644 --- a/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp +++ b/include/osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp @@ -17,14 +17,14 @@ limitations under the License. */ #pragma once +#include +#include + #include "cdag_vertex_impl.hpp" #include "osp/concepts/computational_dag_concept.hpp" #include "osp/concepts/directed_graph_edge_desc_concept.hpp" #include "osp/graph_algorithms/computational_dag_construction_util.hpp" #include "osp/graph_implementations/integral_range.hpp" -#include - -#include namespace osp { @@ -54,11 +54,12 @@ namespace osp { * - `mem_weight`: Of type `mem_weight_type`. * - `vertex_type`: Of type `cdag_vertex_type_type`. * - It must be constructible with the signature: - * `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight, cdag_vertex_type_type vertex_type)` + * `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight, + * cdag_vertex_type_type vertex_type)` * * @see cdag_vertex_impl for a reference implementation of the vertex type. */ -template +template class computational_dag_vector_impl { public: using vertex_idx = typename v_impl::vertex_idx_type; @@ -76,8 +77,7 @@ class computational_dag_vector_impl { * @param num_vertices The number of vertices to initialize. */ explicit computational_dag_vector_impl(const vertex_idx num_vertices) - : vertices_(num_vertices), out_neigbors(num_vertices), in_neigbors(num_vertices), num_edges_(0), - num_vertex_types_(0) { + : vertices_(num_vertices), out_neigbors(num_vertices), in_neigbors(num_vertices), num_edges_(0), num_vertex_types_(0) { for (vertex_idx i = 0; i < num_vertices; ++i) { vertices_[i].id = i; } @@ -95,17 +95,18 @@ class computational_dag_vector_impl { * @tparam Graph_t The type of the source graph. Must satisfy `is_computational_dag_v`. * @param other The source graph to copy from. */ - template + template explicit computational_dag_vector_impl(const Graph_t &other) { static_assert(is_computational_dag_v, "Graph_t must satisfy the is_computation_dag concept"); constructComputationalDag(other, *this); } computational_dag_vector_impl(computational_dag_vector_impl &&other) noexcept - : vertices_(std::move(other.vertices_)), out_neigbors(std::move(other.out_neigbors)), - in_neigbors(std::move(other.in_neigbors)), num_edges_(other.num_edges_), + : vertices_(std::move(other.vertices_)), + out_neigbors(std::move(other.out_neigbors)), + in_neigbors(std::move(other.in_neigbors)), + num_edges_(other.num_edges_), num_vertex_types_(other.num_vertex_types_) { - other.num_edges_ = 0; other.num_vertex_types_ = 0; }; @@ -191,8 +192,10 @@ class computational_dag_vector_impl { * @param vertex_type Type of the vertex. * @return The index of the newly added vertex. */ - vertex_idx add_vertex(const vertex_work_weight_type work_weight, const vertex_comm_weight_type comm_weight, - const vertex_mem_weight_type mem_weight, const vertex_type_type vertex_type = 0) { + vertex_idx add_vertex(const vertex_work_weight_type work_weight, + const vertex_comm_weight_type comm_weight, + const vertex_mem_weight_type mem_weight, + const vertex_type_type vertex_type = 0) { vertices_.emplace_back(vertices_.size(), work_weight, comm_weight, mem_weight, vertex_type); out_neigbors.push_back({}); in_neigbors.push_back({}); @@ -227,8 +230,10 @@ class computational_dag_vector_impl { * @return True if the edge was added, false if it already exists or vertices are invalid. */ bool add_edge(const vertex_idx source, const vertex_idx target) { - if (source >= static_cast(vertices_.size()) || target >= static_cast(vertices_.size()) || source == target) + if (source >= static_cast(vertices_.size()) || target >= static_cast(vertices_.size()) + || source == target) { return false; + } const auto &out = out_neigbors.at(source); if (std::find(out.begin(), out.end(), target) != out.end()) { @@ -274,4 +279,4 @@ static_assert(is_directed_graph_v>, "computational_dag_vector_impl must satisfy the is_computation_dag concept"); -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp b/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp index 3ab94872..56b9ac38 100644 --- a/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp +++ b/include/osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp @@ -17,11 +17,12 @@ limitations under the License. */ #pragma once +#include + #include "cdag_vertex_impl.hpp" #include "osp/concepts/computational_dag_concept.hpp" #include "osp/graph_implementations/integral_range.hpp" #include "vector_cast_view.hpp" -#include namespace osp { @@ -52,13 +53,13 @@ namespace osp { * - `mem_weight`: Of type `mem_weight_type`. * - `vertex_type`: Of type `cdag_vertex_type_type`. * - It must be constructible with the signature: - * `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight, cdag_vertex_type_type vertex_type)` + * `v_impl(vertex_idx_type id, work_weight_type work_weight, comm_weight_type comm_weight, mem_weight_type mem_weight, + * cdag_vertex_type_type vertex_type)` * * @tparam index_t The type used for vertex indices in the adjacency lists. */ -template +template class dag_vector_adapter { - public: using vertex_idx = typename v_impl::vertex_idx_type; @@ -78,7 +79,12 @@ class dag_vector_adapter { * @warning The adapter stores pointers to these vectors. They must remain valid for the lifetime of the adapter. */ dag_vector_adapter(const std::vector> &out_neigbors_, - const std::vector> &in_neigbors_) : vertices_(out_neigbors_.size()), out_neigbors(&out_neigbors_), in_neigbors(&in_neigbors_), num_edges_(0), num_vertex_types_(1) { + const std::vector> &in_neigbors_) + : vertices_(out_neigbors_.size()), + out_neigbors(&out_neigbors_), + in_neigbors(&in_neigbors_), + num_edges_(0), + num_vertex_types_(1) { for (vertex_idx i = 0; i < static_cast(out_neigbors_.size()); ++i) { vertices_[i].id = i; num_edges_ += out_neigbors_[i].size(); @@ -99,7 +105,8 @@ class dag_vector_adapter { * @param in_neigbors_ New in-neighbors adjacency list. * @param out_neigbors_ New out-neighbors adjacency list. */ - void set_in_out_neighbors(const std::vector> &in_neigbors_, const std::vector> &out_neigbors_) { + void set_in_out_neighbors(const std::vector> &in_neigbors_, + const std::vector> &out_neigbors_) { out_neigbors = &out_neigbors_; in_neigbors = &in_neigbors_; @@ -204,4 +211,4 @@ static_assert(is_directed_graph_v>, "dag_vector_adapter must satisfy the is_computation_dag concept"); -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp b/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp index aed4fe18..56cce059 100644 --- a/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp +++ b/include/osp/graph_implementations/adj_list_impl/edge_iterator.hpp @@ -24,16 +24,14 @@ limitations under the License. namespace osp { -template +template class edge_range_vector_impl { - using directed_edge_descriptor = typename directed_graph_edge_desc_traits::directed_edge_descriptor; using vertex_idx = typename directed_graph_traits::vertex_idx; using iter = typename Graph_t::out_edges_iterator_t; const Graph_t &graph; struct edge_iterator { - vertex_idx current_vertex; std::size_t current_edge_idx; iter current_edge; @@ -48,6 +46,7 @@ class edge_range_vector_impl { using reference = const value_type &; edge_iterator() : current_vertex(0u), current_edge_idx(0u), graph(nullptr) {} + edge_iterator(const edge_iterator &other) : current_vertex(other.current_vertex), current_edge_idx(other.current_edge_idx), graph(other.graph) {} @@ -61,7 +60,6 @@ class edge_range_vector_impl { } edge_iterator(const Graph_t &graph_) : current_vertex(0u), current_edge_idx(0u), graph(&graph_) { - while (current_vertex != graph->num_vertices()) { if (graph->out_edges(current_vertex).begin() != graph->out_edges(current_vertex).end()) { current_edge = graph->out_edges(current_vertex).begin(); @@ -73,19 +71,14 @@ class edge_range_vector_impl { edge_iterator(std::size_t current_edge_idx_, const Graph_t &graph_) : current_vertex(0u), current_edge_idx(current_edge_idx_), graph(&graph_) { - if (current_edge_idx < graph->num_edges()) { - std::size_t tmp = 0u; if (tmp < current_edge_idx) { - while (current_vertex != graph->num_vertices()) { - current_edge = graph->out_edges(current_vertex).begin(); while (current_edge != graph->out_edges(current_vertex).end()) { - if (tmp == current_edge_idx) { break; } @@ -105,20 +98,18 @@ class edge_range_vector_impl { } const value_type &operator*() const { return *current_edge; } + const value_type *operator->() const { return &(*current_edge); } // Prefix increment edge_iterator &operator++() { - current_edge++; current_edge_idx++; if (current_edge == graph->out_edges(current_vertex).end()) { - current_vertex++; while (current_vertex != graph->num_vertices()) { - if (graph->out_edges(current_vertex).begin() != graph->out_edges(current_vertex).end()) { current_edge = graph->out_edges(current_vertex).begin(); break; @@ -139,6 +130,7 @@ class edge_range_vector_impl { } inline bool operator==(const edge_iterator &other) const { return current_edge_idx == other.current_edge_idx; } + inline bool operator!=(const edge_iterator &other) const { return current_edge_idx != other.current_edge_idx; } }; @@ -152,9 +144,8 @@ class edge_range_vector_impl { auto size() const { return graph.num_edges(); } }; -template +template class edge_source_range { - using directed_edge_descriptor = typename directed_graph_edge_desc_traits::directed_edge_descriptor; using vertex_idx = typename directed_graph_traits::vertex_idx; using iter = typename Graph_t::in_edges_iterator_t; @@ -163,7 +154,6 @@ class edge_source_range { const std::vector &edges; struct source_iterator { - const Graph_t *graph; iter current_edge; @@ -204,6 +194,7 @@ class edge_source_range { } inline bool operator==(const source_iterator &other) const { return current_edge == other.current_edge; } + inline bool operator!=(const source_iterator &other) const { return current_edge != other.current_edge; } }; @@ -218,9 +209,8 @@ class edge_source_range { auto size() const { return edges.size(); } }; -template +template class edge_target_range { - using directed_edge_descriptor = typename directed_graph_edge_desc_traits::directed_edge_descriptor; using vertex_idx = typename directed_graph_traits::vertex_idx; using iter = typename Graph_t::out_edges_iterator_t; @@ -228,7 +218,6 @@ class edge_target_range { const std::vector &edges; struct target_iterator { - const Graph_t *graph; iter current_edge; @@ -240,6 +229,7 @@ class edge_target_range { using reference = const value_type &; target_iterator() : graph(nullptr) {} + target_iterator(const target_iterator &other) : graph(other.graph), current_edge(other.current_edge) {} target_iterator &operator=(const target_iterator &other) { @@ -268,6 +258,7 @@ class edge_target_range { } inline bool operator==(const target_iterator &other) const { return current_edge == other.current_edge; } + inline bool operator!=(const target_iterator &other) const { return current_edge != other.current_edge; } }; @@ -282,4 +273,4 @@ class edge_target_range { auto size() const { return edges.size(); } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp b/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp index b42ea17d..4de995e9 100644 --- a/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp +++ b/include/osp/graph_implementations/adj_list_impl/vector_cast_view.hpp @@ -31,9 +31,8 @@ namespace osp { * @tparam from_t The original type of elements in the vector. * @tparam to_t The target type to cast elements to. */ -template +template class vector_cast_view { - using iter = typename std::vector::const_iterator; const std::vector &vec; @@ -53,6 +52,7 @@ class vector_cast_view { iter current_edge; cast_iterator() = default; + explicit cast_iterator(iter current_edge_) : current_edge(current_edge_) {} value_type operator*() const { return static_cast(*current_edge); } @@ -96,10 +96,15 @@ class vector_cast_view { difference_type operator-(const cast_iterator &other) const { return current_edge - other.current_edge; } bool operator==(const cast_iterator &other) const { return current_edge == other.current_edge; } + bool operator!=(const cast_iterator &other) const { return current_edge != other.current_edge; } + bool operator<(const cast_iterator &other) const { return current_edge < other.current_edge; } + bool operator>(const cast_iterator &other) const { return current_edge > other.current_edge; } + bool operator<=(const cast_iterator &other) const { return current_edge <= other.current_edge; } + bool operator>=(const cast_iterator &other) const { return current_edge >= other.current_edge; } }; @@ -144,4 +149,4 @@ class vector_cast_view { [[nodiscard]] auto operator[](std::size_t i) const { return static_cast(vec[i]); } }; -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/graph_implementations/boost_graphs/boost_graph.hpp b/include/osp/graph_implementations/boost_graphs/boost_graph.hpp index 35360fdd..8dcb069d 100644 --- a/include/osp/graph_implementations/boost_graphs/boost_graph.hpp +++ b/include/osp/graph_implementations/boost_graphs/boost_graph.hpp @@ -17,27 +17,27 @@ limitations under the License. */ #pragma once -#include -#include - #include #include +#include +#include #include "osp/auxiliary/hash_util.hpp" #include "osp/concepts/computational_dag_concept.hpp" -#include "osp/concepts/directed_graph_edge_desc_concept.hpp" #include "osp/concepts/constructable_computational_dag_concept.hpp" +#include "osp/concepts/directed_graph_edge_desc_concept.hpp" #include "osp/graph_algorithms/computational_dag_construction_util.hpp" #include "source_iterator_range.hpp" -template +template struct boost_vertex { - boost_vertex() : workWeight(0), communicationWeight(0), memoryWeight(0), nodeType(0) {} - boost_vertex(vertex_workw_t workWeight_, vertex_commw_t communicationWeight_, vertex_memw_t memoryWeight_, + + boost_vertex(vertex_workw_t workWeight_, + vertex_commw_t communicationWeight_, + vertex_memw_t memoryWeight_, vertex_type_t nodeType_ = 0) - : workWeight(workWeight_), communicationWeight(communicationWeight_), memoryWeight(memoryWeight_), - nodeType(nodeType_) {} + : workWeight(workWeight_), communicationWeight(communicationWeight_), memoryWeight(memoryWeight_), nodeType(nodeType_) {} vertex_workw_t workWeight; vertex_commw_t communicationWeight; @@ -48,9 +48,10 @@ struct boost_vertex { using boost_vertex_def_int = boost_vertex; using boost_vertex_def_uint = boost_vertex; -template +template struct boost_edge { boost_edge() : communicationWeight(0) {} + boost_edge(edge_commw_t communicationWeight_) : communicationWeight(communicationWeight_) {} edge_commw_t communicationWeight; @@ -59,17 +60,17 @@ struct boost_edge { using boost_edge_def_int = boost_edge; using boost_edge_def_uint = boost_edge; -template -using boost_graph_impl = - boost::adjacency_list, - boost_edge>; +template +using boost_graph_impl = boost::adjacency_list, + boost_edge>; -using boost_edge_desc = typename boost::graph_traits< - boost::adjacency_list>::edge_descriptor; +using boost_edge_desc = + typename boost::graph_traits>::edge_descriptor; -template<> +template <> struct std::hash { std::size_t operator()(const boost_edge_desc &p) const noexcept { auto h1 = std::hash{}(p.m_source); @@ -89,12 +90,9 @@ struct std::hash { * calculating the longest path, and retrieving topological order of vertices. */ -template +template class boost_graph { - - using boost_graph_impl_t = - boost_graph_impl; + using boost_graph_impl_t = boost_graph_impl; public: // graph_traits specialization @@ -108,10 +106,10 @@ class boost_graph { using vertex_type_type = vertex_type_t; using edge_comm_weight_type = edge_commw_t; - boost_graph( - const std::vector> &out_, const std::vector &workW_, - const std::vector &commW_, - const std::unordered_map, edge_comm_weight_type, osp::pair_hash> &comm_edge_W) + boost_graph(const std::vector> &out_, + const std::vector &workW_, + const std::vector &commW_, + const std::unordered_map, edge_comm_weight_type, osp::pair_hash> &comm_edge_W) : number_of_vertex_types(0) { graph.m_vertices.reserve(out_.size()); @@ -122,7 +120,6 @@ class boost_graph { add_vertex(workW_[i], commW_[i]); } for (size_t i = 0; i < out_.size(); ++i) { - for (const auto &j : out_[i]) { assert(comm_edge_W.find(std::make_pair(i, j)) != comm_edge_W.cend()); add_edge(i, j, comm_edge_W.at(std::make_pair(i, j))); @@ -131,7 +128,8 @@ class boost_graph { updateNumberOfVertexTypes(); } - boost_graph(const std::vector> &out_, const std::vector &workW_, + boost_graph(const std::vector> &out_, + const std::vector &workW_, const std::vector &commW_) : number_of_vertex_types(0) { graph.m_vertices.reserve(out_.size()); @@ -143,7 +141,6 @@ class boost_graph { add_vertex(workW_[i], commW_[i]); } for (size_t i = 0; i < out_.size(); ++i) { - for (const auto &j : out_[i]) { add_edge(i, j); } @@ -151,8 +148,10 @@ class boost_graph { updateNumberOfVertexTypes(); } - boost_graph(const std::vector> &out_, const std::vector &workW_, - const std::vector &commW_, const std::vector &nodeType_) + boost_graph(const std::vector> &out_, + const std::vector &workW_, + const std::vector &commW_, + const std::vector &nodeType_) : number_of_vertex_types(0) { graph.m_vertices.reserve(out_.size()); @@ -164,7 +163,6 @@ class boost_graph { add_vertex(workW_[i], commW_[i], 0, nodeType_[i]); } for (size_t i = 0; i < out_.size(); ++i) { - for (const auto &j : out_[i]) { add_edge(i, j); } @@ -176,9 +174,10 @@ class boost_graph { * @brief Default constructor for the ComputationalDag class. */ explicit boost_graph() : graph(0), number_of_vertex_types(0) {} + boost_graph(vertex_idx number_of_nodes) : graph(number_of_nodes), number_of_vertex_types(0) {} - boost_graph(unsigned number_of_nodes) - : graph(static_cast(number_of_nodes)), number_of_vertex_types(0) {} + + boost_graph(unsigned number_of_nodes) : graph(static_cast(number_of_nodes)), number_of_vertex_types(0) {} boost_graph(const boost_graph &other) = default; @@ -201,9 +200,8 @@ class boost_graph { virtual ~boost_graph() = default; - template + template boost_graph(const Graph_t &other) : number_of_vertex_types(0) { - static_assert(osp::is_computational_dag_v, "Graph_t must satisfy the is_computation_dag concept"); graph.m_vertices.reserve(other.num_vertices()); @@ -212,13 +210,14 @@ class boost_graph { } inline const boost_graph_impl_t &get_boost_graph() const { return graph; } + inline boost_graph_impl_t &get_boost_graph() { return graph; } inline size_t num_vertices() const { return boost::num_vertices(graph); } + inline size_t num_edges() const { return boost::num_edges(graph); } void updateNumberOfVertexTypes() { - number_of_vertex_types = 0; for (const auto &v : vertices()) { if (vertex_type(v) >= number_of_vertex_types) { @@ -230,6 +229,7 @@ class boost_graph { inline unsigned num_vertex_types() const { return number_of_vertex_types; }; auto vertices() const { return boost::make_iterator_range(boost::vertices(graph)); } + auto vertices() { return boost::make_iterator_range(boost::vertices(graph)); } // template @@ -262,43 +262,42 @@ class boost_graph { auto edges() { return boost::extensions::make_source_iterator_range(boost::edges(graph)); } - auto in_edges(const vertex_idx &v) const { - return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph)); - } + auto in_edges(const vertex_idx &v) const { return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph)); } - auto in_edges(const vertex_idx &v) { - return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph)); - } + auto in_edges(const vertex_idx &v) { return boost::extensions::make_source_iterator_range(boost::in_edges(v, graph)); } auto out_edges(const vertex_idx &v) const { return boost::extensions::make_source_iterator_range(boost::out_edges(v, graph)); } - auto out_edges(const vertex_idx &v) { - return boost::extensions::make_source_iterator_range(boost::out_edges(v, graph)); - } + auto out_edges(const vertex_idx &v) { return boost::extensions::make_source_iterator_range(boost::out_edges(v, graph)); } vertex_idx source(const directed_edge_descriptor &e) const { return boost::source(e, graph); } + vertex_idx target(const directed_edge_descriptor &e) const { return boost::target(e, graph); } inline size_t out_degree(const vertex_idx &v) const { return boost::out_degree(v, graph); } + inline size_t in_degree(const vertex_idx &v) const { return boost::in_degree(v, graph); } vertex_work_weight_type vertex_work_weight(const vertex_idx &v) const { return graph[v].workWeight; } + vertex_comm_weight_type vertex_comm_weight(const vertex_idx &v) const { return graph[v].communicationWeight; } + vertex_mem_weight_type vertex_mem_weight(const vertex_idx &v) const { return graph[v].memoryWeight; } + vertex_type_type vertex_type(const vertex_idx &v) const { return graph[v].nodeType; } - edge_comm_weight_type edge_comm_weight(const directed_edge_descriptor &e) const { - return graph[e].communicationWeight; - } + edge_comm_weight_type edge_comm_weight(const directed_edge_descriptor &e) const { return graph[e].communicationWeight; } void set_vertex_mem_weight(const vertex_idx &v, const vertex_mem_weight_type memory_weight) { graph[v].memoryWeight = memory_weight; } + void set_vertex_work_weight(const vertex_idx &v, const vertex_work_weight_type work_weight) { graph[v].workWeight = work_weight; } + void set_vertex_type(const vertex_idx &v, const vertex_type_type node_type) { graph[v].nodeType = node_type; number_of_vertex_types = std::max(number_of_vertex_types, node_type + 1); @@ -307,22 +306,24 @@ class boost_graph { void set_vertex_comm_weight(const vertex_idx &v, const vertex_comm_weight_type comm_weight) { graph[v].communicationWeight = comm_weight; } + void set_edge_comm_weight(const directed_edge_descriptor &e, const edge_comm_weight_type comm_weight) { graph[e].communicationWeight = comm_weight; } - vertex_idx add_vertex(const vertex_work_weight_type work_weight, const vertex_comm_weight_type comm_weight, - const vertex_mem_weight_type memory_weight = 0, const vertex_type_type node_type = 0) { + vertex_idx add_vertex(const vertex_work_weight_type work_weight, + const vertex_comm_weight_type comm_weight, + const vertex_mem_weight_type memory_weight = 0, + const vertex_type_type node_type = 0) { number_of_vertex_types = std::max(number_of_vertex_types, node_type + 1); return boost::add_vertex(boost_vertex{work_weight, comm_weight, memory_weight, node_type}, graph); } - std::pair, bool> - add_edge(const vertex_idx &src, const vertex_idx &tar, edge_commw_t comm_weight = DEFAULT_EDGE_COMM_WEIGHT) { - + std::pair, bool> add_edge( + const vertex_idx &src, const vertex_idx &tar, edge_commw_t comm_weight = DEFAULT_EDGE_COMM_WEIGHT) { const auto pair = boost::add_edge(src, tar, {comm_weight}, graph); - number_of_vertex_types = std::max(number_of_vertex_types, 1u); // in case adding edges adds vertices + number_of_vertex_types = std::max(number_of_vertex_types, 1u); // in case adding edges adds vertices return pair; } @@ -343,37 +344,40 @@ class boost_graph { static constexpr edge_comm_weight_type DEFAULT_EDGE_COMM_WEIGHT = 1; }; -template +template inline auto edges(const boost_graph &graph) { return graph.edges(); } -template +template inline auto out_edges(osp::vertex_idx_t> v, const boost_graph &graph) { return graph.out_edges(v); } -template +template inline auto in_edges(osp::vertex_idx_t> v, const boost_graph &graph) { return graph.in_edges(v); } -template -inline osp::vertex_idx_t> source(const osp::edge_desc_t> &edge, const boost_graph &graph) { +template +inline osp::vertex_idx_t> source( + const osp::edge_desc_t> &edge, + const boost_graph &graph) { return graph.source(edge); } -template -inline osp::vertex_idx_t> target(const osp::edge_desc_t> &edge, const boost_graph &graph) { +template +inline osp::vertex_idx_t> target( + const osp::edge_desc_t> &edge, + const boost_graph &graph) { return graph.target(edge); } using boost_graph_int_t = boost_graph; using boost_graph_uint_t = boost_graph; - static_assert(osp::is_directed_graph_edge_desc_v, "boost_graph_adapter does not satisfy the directed_graph_edge_desc concept"); @@ -390,4 +394,4 @@ static_assert(osp::is_constructable_cdag_edge_v, "boost_graph_adapter must satisfy the is_constructable_cdag_edge concept"); static_assert(osp::is_constructable_cdag_comm_edge_v, - "boost_graph_adapter must satisfy the is_constructable_cdag_comm_edge concept"); \ No newline at end of file + "boost_graph_adapter must satisfy the is_constructable_cdag_comm_edge concept"); diff --git a/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp b/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp index a1ab3634..17fcf796 100644 --- a/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp +++ b/include/osp/graph_implementations/boost_graphs/inv_breadth_first_search.hpp @@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once @@ -28,14 +28,15 @@ limitations under the License. #include namespace boost::extensions { -template -void inv_breadth_first_visit(const IncidenceGraph &g, SourceIterator sources_begin, SourceIterator sources_end, - Buffer &Q, BFSVisitor vis, ColorMap color) { - BOOST_CONCEPT_ASSERT((IncidenceGraphConcept)); + +template +void inv_breadth_first_visit( + const IncidenceGraph &g, SourceIterator sources_begin, SourceIterator sources_end, Buffer &Q, BFSVisitor vis, ColorMap color) { + BOOST_CONCEPT_ASSERT((IncidenceGraphConcept) ); typedef graph_traits GTraits; typedef typename graph_traits::vertex_descriptor Vertex; - BOOST_CONCEPT_ASSERT((BFSVisitorConcept)); - BOOST_CONCEPT_ASSERT((ReadWritePropertyMapConcept)); + BOOST_CONCEPT_ASSERT((BFSVisitorConcept) ); + BOOST_CONCEPT_ASSERT((ReadWritePropertyMapConcept) ); typedef typename property_traits::value_type ColorValue; typedef color_traits Color; typename GTraits::in_edge_iterator ei, ei_end; @@ -61,19 +62,19 @@ void inv_breadth_first_visit(const IncidenceGraph &g, SourceIterator sources_beg Q.push(v); } else { vis.non_tree_edge(*ei, g); - if (v_color == Color::gray()) + if (v_color == Color::gray()) { vis.gray_target(*ei, g); - else + } else { vis.black_target(*ei, g); + } } - } // end for + } // end for put(color, u, Color::black()); vis.finish_vertex(u, g); - } // end while + } // end while } - -template +template void inv_breadth_first_search(const IncidenceGraph &graph, SourceVertex source, BFSVisitor vis) { const std::array sources = {source}; typedef typename graph_traits::vertex_descriptor VertexT; @@ -82,4 +83,4 @@ void inv_breadth_first_search(const IncidenceGraph &graph, SourceVertex source, inv_breadth_first_visit(graph, sources.begin(), sources.end(), q, vis, boost::associative_property_map(color_map)); } -} \ No newline at end of file +} // namespace boost::extensions diff --git a/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp b/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp index f025c28c..868a34f4 100644 --- a/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp +++ b/include/osp/graph_implementations/boost_graphs/source_iterator_range.hpp @@ -22,37 +22,38 @@ limitations under the License. namespace boost::extensions { -template +template struct source_iterator_range { public: - IteratorType begin() { return _begin; } + IteratorType begin() const { return _begin; } + IteratorType cbegin() const { return _begin; } IteratorType end() { return _end; } + IteratorType end() const { return _end; } + IteratorType cend() const { return _end; } std::size_t size() const { return std::distance(cbegin(), cend()); } - template + template explicit source_iterator_range(RangeType &r) : _begin(boost::begin(r)), _end(boost::end(r)) {} private: IteratorType _begin, _end; }; -template -source_iterator_range()))> -make_source_iterator_range(const ForwardRange &r) { +template +source_iterator_range()))> make_source_iterator_range(const ForwardRange &r) { return source_iterator_range()))>(r); } -template -source_iterator_range()))> -make_source_iterator_range(ForwardRange &r) { +template +source_iterator_range()))> make_source_iterator_range(ForwardRange &r) { return source_iterator_range()))>(r); } -} // namespace boost::extensions +} // namespace boost::extensions diff --git a/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp b/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp index 4e3f8c70..616b750c 100644 --- a/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp +++ b/include/osp/graph_implementations/eigen_matrix_adapter/eigen_sparse_iterator.hpp @@ -16,23 +16,22 @@ limitations under the License. @author Christos Matzoros, Toni Boehnlein, Pal Andras Papp, Raphael S. Steiner */ - - #pragma once #ifdef EIGEN_FOUND -#include -#include "osp/concepts/graph_traits.hpp" +# include + +# include "osp/concepts/graph_traits.hpp" namespace osp { -template +template class EigenCSRRange { - const Graph& graph_; + const Graph &graph_; eigen_idx_type index_; -public: + public: using CSRMatrix = Eigen::SparseMatrix; using Inner = typename CSRMatrix::InnerIterator; @@ -42,12 +41,12 @@ class EigenCSRRange { bool at_end_; void skip_diagonal() { - while ( ((!at_end_) && (it_.row() == skip_ )) & (it_.col() == skip_)) { + while (((!at_end_) && (it_.row() == skip_)) & (it_.col() == skip_)) { ++(*this); } } - public: + public: using value_type = std::size_t; using reference = value_type; using pointer = void; @@ -55,17 +54,17 @@ class EigenCSRRange { using iterator_category = std::input_iterator_tag; iterator() = default; - iterator(const iterator& other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {} - iterator& operator=(const iterator& other) { + + iterator(const iterator &other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {} + + iterator &operator=(const iterator &other) { it_ = other.it_; skip_ = other.skip_; at_end_ = other.at_end_; return *this; } - - iterator(const CSRMatrix& mat, eigen_idx_type idx, bool end = false) - : skip_(idx), at_end_(end) { + iterator(const CSRMatrix &mat, eigen_idx_type idx, bool end = false) : skip_(idx), at_end_(end) { if (!end) { it_ = Inner(mat, idx); at_end_ = !it_; @@ -74,7 +73,8 @@ class EigenCSRRange { } reference operator*() const { return static_cast(it_.col()); } - iterator& operator++() { + + iterator &operator++() { ++it_; at_end_ = !it_; skip_diagonal(); @@ -86,30 +86,25 @@ class EigenCSRRange { ++(*this); return temp; } - - bool operator==(const iterator&) const { return at_end_; } - bool operator!=(const iterator&) const { return !at_end_; } + + bool operator==(const iterator &) const { return at_end_; } + + bool operator!=(const iterator &) const { return !at_end_; } }; - EigenCSRRange(const Graph& graph, eigen_idx_type idx) - : graph_(graph), index_(idx) {} + EigenCSRRange(const Graph &graph, eigen_idx_type idx) : graph_(graph), index_(idx) {} - iterator begin() const { - return iterator(*graph_.getCSR(), index_); - } + iterator begin() const { return iterator(*graph_.getCSR(), index_); } - iterator end() const { - return iterator(*graph_.getCSR(), index_, true); - } + iterator end() const { return iterator(*graph_.getCSR(), index_, true); } }; - -template +template class EigenCSCRange { - const Graph& graph_; + const Graph &graph_; eigen_idx_type index_; -public: + public: using CSCMatrix = Eigen::SparseMatrix; using Inner = typename CSCMatrix::InnerIterator; @@ -123,8 +118,8 @@ class EigenCSCRange { ++(*this); } } - - public: + + public: using value_type = std::size_t; using reference = value_type; using pointer = void; @@ -132,17 +127,17 @@ class EigenCSCRange { using iterator_category = std::input_iterator_tag; iterator() = default; - iterator(const iterator& other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {} - iterator& operator=(const iterator& other) { + + iterator(const iterator &other) : it_(other.it_), skip_(other.skip_), at_end_(other.at_end_) {} + + iterator &operator=(const iterator &other) { it_ = other.it_; skip_ = other.skip_; at_end_ = other.at_end_; return *this; } - - iterator(const CSCMatrix& mat, eigen_idx_type idx, bool end = false) - : skip_(idx), at_end_(end) { + iterator(const CSCMatrix &mat, eigen_idx_type idx, bool end = false) : skip_(idx), at_end_(end) { if (!end) { it_ = Inner(mat, idx); at_end_ = !it_; @@ -151,7 +146,8 @@ class EigenCSCRange { } reference operator*() const { return static_cast(it_.row()); } - iterator& operator++() { + + iterator &operator++() { ++it_; at_end_ = !it_; skip_diagonal(); @@ -163,23 +159,19 @@ class EigenCSCRange { ++(*this); return temp; } - - bool operator==(const iterator&) const { return at_end_; } - bool operator!=(const iterator&) const { return !at_end_; } + bool operator==(const iterator &) const { return at_end_; } + + bool operator!=(const iterator &) const { return !at_end_; } }; - EigenCSCRange(const Graph& graph, eigen_idx_type idx) - : graph_(graph), index_(idx) {} + EigenCSCRange(const Graph &graph, eigen_idx_type idx) : graph_(graph), index_(idx) {} - iterator begin() const { - return iterator(*graph_.getCSC(), index_); - } + iterator begin() const { return iterator(*graph_.getCSC(), index_); } - iterator end() const { - return iterator(*graph_.getCSC(), index_, true); - } + iterator end() const { return iterator(*graph_.getCSC(), index_, true); } }; -} // namespace osp + +} // namespace osp #endif diff --git a/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp b/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp index 341389aa..7d989f69 100644 --- a/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp +++ b/include/osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp @@ -20,30 +20,32 @@ limitations under the License. #ifdef EIGEN_FOUND -#include -#include "osp/concepts/directed_graph_concept.hpp" -#include "osp/concepts/directed_graph_edge_desc_concept.hpp" -#include "osp/concepts/computational_dag_concept.hpp" -#include "osp/graph_implementations/integral_range.hpp" -#include "eigen_sparse_iterator.hpp" +# include + +# include "eigen_sparse_iterator.hpp" +# include "osp/concepts/computational_dag_concept.hpp" +# include "osp/concepts/directed_graph_concept.hpp" +# include "osp/concepts/directed_graph_edge_desc_concept.hpp" +# include "osp/graph_implementations/integral_range.hpp" namespace osp { /// @brief Implementation of a lower‐triangular sparse matrix as a directed acyclic graph. /// Wraps Eigen's sparse matrix and exposes graph-like methods for scheduling and analysis. -template +template class SparseMatrixImp { static_assert(std::is_integral_v, "Eigen index type must be integral"); -private: + + private: // Define Eigen-compatible matrix types using eigen_idx_type as the index type - using MatrixCSR = Eigen::SparseMatrix; // For parents - using MatrixCSC = Eigen::SparseMatrix; // For children + using MatrixCSR = Eigen::SparseMatrix; // For parents + using MatrixCSC = Eigen::SparseMatrix; // For children // Internal pointers to the sparse matrices (not owning) - MatrixCSR* L_csr_p = nullptr; - MatrixCSC* L_csc_p = nullptr; + MatrixCSR *L_csr_p = nullptr; + MatrixCSC *L_csc_p = nullptr; -public: + public: // Vertex index type must match Eigen's StorageIndex (signed 32-bit) using vertex_idx = size_t; @@ -53,32 +55,28 @@ class SparseMatrixImp { using vertex_mem_weight_type = int; using vertex_type_type = unsigned; - using eigen_idx_t = eigen_idx_type; + using eigen_idx_t = eigen_idx_type; SparseMatrixImp() = default; // Setters for the internal CSR and CSC matrix pointers - void setCSR(MatrixCSR* mat) { L_csr_p = mat; } - void setCSC(MatrixCSC* mat) { L_csc_p = mat; } + void setCSR(MatrixCSR *mat) { L_csr_p = mat; } + + void setCSC(MatrixCSC *mat) { L_csc_p = mat; } // Getters for internal matrices (used by EigenSparseRange) - const MatrixCSR* getCSR() const { return L_csr_p; } - const MatrixCSC* getCSC() const { return L_csc_p; } + const MatrixCSR *getCSR() const { return L_csr_p; } + + const MatrixCSC *getCSC() const { return L_csc_p; } /// @brief Number of vertices = number of rows in the matrix - size_t num_vertices() const noexcept { - return static_cast(L_csr_p->rows()); - } + size_t num_vertices() const noexcept { return static_cast(L_csr_p->rows()); } /// @brief Return a range over all vertices [0, num_vertices) - auto vertices() const { - return osp::integral_range(num_vertices()); - } + auto vertices() const { return osp::integral_range(num_vertices()); } /// @brief Number of edges = total non-zeros minus diagonal elements - vertex_idx num_edges() const noexcept { - return static_cast(L_csr_p->nonZeros() - L_csr_p->rows()); - } + vertex_idx num_edges() const noexcept { return static_cast(L_csr_p->nonZeros() - L_csr_p->rows()); } /// @brief In-degree = non-zero off-diagonal entries in row v (CSR) vertex_idx in_degree(vertex_idx v) const noexcept { @@ -107,38 +105,32 @@ class SparseMatrixImp { // Default zero weights (placeholders, extend as needed) vertex_comm_weight_type vertex_comm_weight(vertex_idx) const noexcept { return 0; } - vertex_mem_weight_type vertex_mem_weight(vertex_idx) const noexcept { return 0; } + + vertex_mem_weight_type vertex_mem_weight(vertex_idx) const noexcept { return 0; } inline unsigned num_vertex_types() const { return 1; }; - inline vertex_type_type vertex_type(const vertex_idx ) const { return 0; } + + inline vertex_type_type vertex_type(const vertex_idx) const { return 0; } }; using sparse_matrix_graph_int32_t = SparseMatrixImp; using sparse_matrix_graph_int64_t = SparseMatrixImp; - static_assert(is_directed_graph_edge_desc_v>, "SparseMatrix must satisfy the directed_graph_edge_desc concept"); // Verify that SparseMatrixImp satisfies the directed graph concept -static_assert(is_directed_graph_v>, - "SparseMatrix must satisfy directed_graph_concept"); +static_assert(is_directed_graph_v>, "SparseMatrix must satisfy directed_graph_concept"); -static_assert(is_directed_graph_v>, - "SparseMatrix must satisfy directed_graph_concept"); +static_assert(is_directed_graph_v>, "SparseMatrix must satisfy directed_graph_concept"); -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); +static_assert(has_vertex_weights_v>, "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); -static_assert(has_vertex_weights_v>, - "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); +static_assert(has_vertex_weights_v>, "Compact_Sparse_Graph must satisfy the has_vertex_weights concept"); static_assert(is_computational_dag_typed_vertices_v>, "Compact_Sparse_Graph must satisfy the is_computation_dag concept"); +} // namespace osp - -} // namespace osp - - -#endif \ No newline at end of file +#endif diff --git a/include/osp/graph_implementations/integral_range.hpp b/include/osp/graph_implementations/integral_range.hpp index 92611da7..0217c64f 100644 --- a/include/osp/graph_implementations/integral_range.hpp +++ b/include/osp/graph_implementations/integral_range.hpp @@ -44,20 +44,21 @@ class integral_range { * * This iterator satisfies the RandomAccessIterator concept. */ - class integral_iterator { // public for std::reverse_iterator + class integral_iterator { // public for std::reverse_iterator public: using iterator_category = std::random_access_iterator_tag; using difference_type = std::ptrdiff_t; using value_type = T; - using pointer = void; // Not a real pointer - using reference = T; // Not a real reference + using pointer = void; // Not a real pointer + using reference = T; // Not a real reference /** * @brief Proxy object to support operator-> for integral types. */ struct arrow_proxy { T value; - constexpr const T* operator->() const noexcept { return &value; } + + constexpr const T *operator->() const noexcept { return &value; } }; private: @@ -113,17 +114,22 @@ class integral_range { return temp; } - [[nodiscard]] constexpr bool operator==(const integral_iterator &other) const noexcept { return current == other.current; } + [[nodiscard]] constexpr bool operator==(const integral_iterator &other) const noexcept { + return current == other.current; + } + [[nodiscard]] constexpr bool operator!=(const integral_iterator &other) const noexcept { return !(*this == other); } constexpr integral_iterator &operator+=(difference_type n) noexcept { current = static_cast(current + n); return *this; } + [[nodiscard]] constexpr integral_iterator operator+(difference_type n) const noexcept { integral_iterator temp = *this; return temp += n; } + [[nodiscard]] friend constexpr integral_iterator operator+(difference_type n, const integral_iterator &it) noexcept { return it + n; } @@ -132,10 +138,12 @@ class integral_range { current = static_cast(current - n); return *this; } + [[nodiscard]] constexpr integral_iterator operator-(difference_type n) const noexcept { integral_iterator temp = *this; return temp -= n; } + [[nodiscard]] constexpr difference_type operator-(const integral_iterator &other) const noexcept { return static_cast(current) - static_cast(other.current); } @@ -143,9 +151,16 @@ class integral_range { [[nodiscard]] constexpr value_type operator[](difference_type n) const noexcept { return *(*this + n); } [[nodiscard]] constexpr bool operator<(const integral_iterator &other) const noexcept { return current < other.current; } + [[nodiscard]] constexpr bool operator>(const integral_iterator &other) const noexcept { return current > other.current; } - [[nodiscard]] constexpr bool operator<=(const integral_iterator &other) const noexcept { return current <= other.current; } - [[nodiscard]] constexpr bool operator>=(const integral_iterator &other) const noexcept { return current >= other.current; } + + [[nodiscard]] constexpr bool operator<=(const integral_iterator &other) const noexcept { + return current <= other.current; + } + + [[nodiscard]] constexpr bool operator>=(const integral_iterator &other) const noexcept { + return current >= other.current; + } }; using reverse_integral_iterator = std::reverse_iterator; @@ -165,15 +180,19 @@ class integral_range { constexpr integral_range(T start_, T end_) noexcept : start(start_), finish(end_) {} [[nodiscard]] constexpr integral_iterator begin() const noexcept { return integral_iterator(start); } + [[nodiscard]] constexpr integral_iterator cbegin() const noexcept { return integral_iterator(start); } [[nodiscard]] constexpr integral_iterator end() const noexcept { return integral_iterator(finish); } + [[nodiscard]] constexpr integral_iterator cend() const noexcept { return integral_iterator(finish); } [[nodiscard]] constexpr reverse_integral_iterator rbegin() const noexcept { return reverse_integral_iterator(end()); } + [[nodiscard]] constexpr reverse_integral_iterator crbegin() const noexcept { return reverse_integral_iterator(cend()); } [[nodiscard]] constexpr reverse_integral_iterator rend() const noexcept { return reverse_integral_iterator(begin()); } + [[nodiscard]] constexpr reverse_integral_iterator crend() const noexcept { return reverse_integral_iterator(cbegin()); } /** @@ -189,4 +208,4 @@ class integral_range { [[nodiscard]] constexpr bool empty() const noexcept { return start == finish; } }; -} // namespace osp +} // namespace osp diff --git a/include/osp/partitioning/model/hypergraph.hpp b/include/osp/partitioning/model/hypergraph.hpp index 39ca79b2..9c36beb6 100644 --- a/include/osp/partitioning/model/hypergraph.hpp +++ b/include/osp/partitioning/model/hypergraph.hpp @@ -17,31 +17,34 @@ limitations under the License. */ #pragma once -#include #include +#include + #include "osp/concepts/computational_dag_concept.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" namespace osp { -template +template class Hypergraph { - using this_t = Hypergraph; public: - using vertex_idx = index_type; - using vertex_work_weight_type = workw_type; - using vertex_mem_weight_type = memw_type; - using vertex_comm_weight_type = commw_type; + using vertex_work_weight_type = workw_type; + using vertex_mem_weight_type = memw_type; + using vertex_comm_weight_type = commw_type; Hypergraph() = default; Hypergraph(index_type num_vertices_, index_type num_hyperedges_) - : Num_vertices(num_vertices_), Num_hyperedges(num_hyperedges_), vertex_work_weights(num_vertices_, 1), - vertex_memory_weights(num_vertices_, 1), hyperedge_weights(num_hyperedges_, 1), - incident_hyperedges_to_vertex(num_vertices_), vertices_in_hyperedge(num_hyperedges_){} + : Num_vertices(num_vertices_), + Num_hyperedges(num_hyperedges_), + vertex_work_weights(num_vertices_, 1), + vertex_memory_weights(num_vertices_, 1), + hyperedge_weights(num_hyperedges_, 1), + incident_hyperedges_to_vertex(num_vertices_), + vertices_in_hyperedge(num_hyperedges_) {} Hypergraph(const this_t &other) = default; Hypergraph &operator=(const this_t &other) = default; @@ -49,27 +52,35 @@ class Hypergraph { virtual ~Hypergraph() = default; inline index_type num_vertices() const { return Num_vertices; } + inline index_type num_hyperedges() const { return Num_hyperedges; } + inline index_type num_pins() const { return Num_pins; } + inline workw_type get_vertex_work_weight(index_type node) const { return vertex_work_weights[node]; } + inline memw_type get_vertex_memory_weight(index_type node) const { return vertex_memory_weights[node]; } + inline commw_type get_hyperedge_weight(index_type hyperedge) const { return hyperedge_weights[hyperedge]; } void add_pin(index_type vertex_idx, index_type hyperedge_idx); void add_vertex(workw_type work_weight = 1, memw_type memory_weight = 1); void add_empty_hyperedge(commw_type weight = 1); - void add_hyperedge(const std::vector& pins, commw_type weight = 1); + void add_hyperedge(const std::vector &pins, commw_type weight = 1); void set_vertex_work_weight(index_type vertex_idx, workw_type weight); void set_vertex_memory_weight(index_type vertex_idx, memw_type weight); void set_hyperedge_weight(index_type hyperedge_idx, commw_type weight); - void clear(); void reset(index_type num_vertices_, index_type num_hyperedges_); - inline const std::vector &get_incident_hyperedges(index_type vertex) const { return incident_hyperedges_to_vertex[vertex]; } - inline const std::vector &get_vertices_in_hyperedge(index_type hyperedge) const { return vertices_in_hyperedge[hyperedge]; } + inline const std::vector &get_incident_hyperedges(index_type vertex) const { + return incident_hyperedges_to_vertex[vertex]; + } + inline const std::vector &get_vertices_in_hyperedge(index_type hyperedge) const { + return vertices_in_hyperedge[hyperedge]; + } private: index_type Num_vertices = 0, Num_hyperedges = 0, Num_pins = 0; @@ -84,83 +95,75 @@ class Hypergraph { using Hypergraph_def_t = Hypergraph; -template -void Hypergraph::add_pin(index_type vertex, index_type hyperedge_idx) -{ - if(vertex >= Num_vertices) - { +template +void Hypergraph::add_pin(index_type vertex, index_type hyperedge_idx) { + if (vertex >= Num_vertices) { throw std::invalid_argument("Invalid Argument while adding pin: vertex index out of range."); - } - else if(hyperedge_idx >= Num_hyperedges) - { + } else if (hyperedge_idx >= Num_hyperedges) { throw std::invalid_argument("Invalid Argument while adding pin: hyperedge index out of range."); - } - else{ + } else { incident_hyperedges_to_vertex[vertex].push_back(hyperedge_idx); vertices_in_hyperedge[hyperedge_idx].push_back(vertex); ++Num_pins; } } -template -void Hypergraph::add_vertex(workw_type work_weight, memw_type memory_weight) -{ +template +void Hypergraph::add_vertex(workw_type work_weight, memw_type memory_weight) { vertex_work_weights.push_back(work_weight); vertex_memory_weights.push_back(memory_weight); incident_hyperedges_to_vertex.emplace_back(); ++Num_vertices; } -template -void Hypergraph::add_empty_hyperedge(commw_type weight) -{ +template +void Hypergraph::add_empty_hyperedge(commw_type weight) { vertices_in_hyperedge.emplace_back(); hyperedge_weights.push_back(weight); ++Num_hyperedges; } -template -void Hypergraph::add_hyperedge(const std::vector& pins, commw_type weight) -{ +template +void Hypergraph::add_hyperedge(const std::vector &pins, + commw_type weight) { vertices_in_hyperedge.emplace_back(pins); hyperedge_weights.push_back(weight); - for(index_type vertex : pins) + for (index_type vertex : pins) { incident_hyperedges_to_vertex[vertex].push_back(Num_hyperedges); + } ++Num_hyperedges; Num_pins += static_cast(pins.size()); } -template -void Hypergraph::set_vertex_work_weight(index_type vertex, workw_type weight) -{ - if(vertex >= Num_vertices) +template +void Hypergraph::set_vertex_work_weight(index_type vertex, workw_type weight) { + if (vertex >= Num_vertices) { throw std::invalid_argument("Invalid Argument while setting vertex weight: vertex index out of range."); - else + } else { vertex_work_weights[vertex] = weight; + } } -template -void Hypergraph::set_vertex_memory_weight(index_type vertex, memw_type weight) -{ - if(vertex >= Num_vertices) +template +void Hypergraph::set_vertex_memory_weight(index_type vertex, memw_type weight) { + if (vertex >= Num_vertices) { throw std::invalid_argument("Invalid Argument while setting vertex weight: vertex index out of range."); - else + } else { vertex_memory_weights[vertex] = weight; + } } -template -void Hypergraph::set_hyperedge_weight(index_type hyperedge_idx, commw_type weight) -{ - if(hyperedge_idx >= Num_hyperedges) +template +void Hypergraph::set_hyperedge_weight(index_type hyperedge_idx, commw_type weight) { + if (hyperedge_idx >= Num_hyperedges) { throw std::invalid_argument("Invalid Argument while setting hyperedge weight: hyepredge index out of range."); - else + } else { hyperedge_weights[hyperedge_idx] = weight; + } } - -template -void Hypergraph::clear() -{ +template +void Hypergraph::clear() { Num_vertices = 0; Num_hyperedges = 0; Num_pins = 0; @@ -172,9 +175,8 @@ void Hypergraph::clear() vertices_in_hyperedge.clear(); } -template -void Hypergraph::reset(index_type num_vertices_, index_type num_hyperedges_) -{ +template +void Hypergraph::reset(index_type num_vertices_, index_type num_hyperedges_) { clear(); Num_vertices = num_vertices_; @@ -187,7 +189,4 @@ void Hypergraph::reset(index_type vertices_in_hyperedge.resize(num_hyperedges_); } - - - -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/partitioning/model/hypergraph_utility.hpp b/include/osp/partitioning/model/hypergraph_utility.hpp index 46e698b4..a641519f 100644 --- a/include/osp/partitioning/model/hypergraph_utility.hpp +++ b/include/osp/partitioning/model/hypergraph_utility.hpp @@ -33,82 +33,79 @@ limitations under the License. namespace osp { - // summing up weights -template -typename hypergraph_t::vertex_work_weight_type compute_total_vertex_work_weight(const hypergraph_t& hgraph) -{ +template +typename hypergraph_t::vertex_work_weight_type compute_total_vertex_work_weight(const hypergraph_t &hgraph) { using index_type = typename hypergraph_t::vertex_idx; using workw_type = typename hypergraph_t::vertex_work_weight_type; workw_type total = 0; - for(index_type node = 0; node < hgraph.num_vertices(); ++node) + for (index_type node = 0; node < hgraph.num_vertices(); ++node) { total += hgraph.get_vertex_work_weight(node); + } return total; } -template -typename hypergraph_t::vertex_mem_weight_type compute_total_vertex_memory_weight(const hypergraph_t& hgraph) -{ +template +typename hypergraph_t::vertex_mem_weight_type compute_total_vertex_memory_weight(const hypergraph_t &hgraph) { using index_type = typename hypergraph_t::vertex_idx; using memw_type = typename hypergraph_t::vertex_mem_weight_type; memw_type total = 0; - for(index_type node = 0; node < hgraph.num_vertices(); ++node) + for (index_type node = 0; node < hgraph.num_vertices(); ++node) { total += hgraph.get_vertex_memory_weight(node); + } return total; } - // get induced subhypergraph -template -hypergraph_t create_induced_hypergraph(const hypergraph_t& hgraph, const std::vector& include) -{ - if(include.size() != hgraph.num_vertices()) +template +hypergraph_t create_induced_hypergraph(const hypergraph_t &hgraph, const std::vector &include) { + if (include.size() != hgraph.num_vertices()) { throw std::invalid_argument("Invalid Argument while extracting induced hypergraph: input bool array has incorrect size."); + } using index_type = typename hypergraph_t::vertex_idx; - std::vector new_index(hgraph.num_vertices()); unsigned current_index = 0; - for(index_type node = 0; node < hgraph.num_vertices(); ++node) - if(include[node]) + for (index_type node = 0; node < hgraph.num_vertices(); ++node) { + if (include[node]) { new_index[node] = current_index++; - + } + } + hypergraph_t new_hgraph(current_index, 0); - for(index_type node = 0; node < hgraph.num_vertices(); ++node) - if(include[node]) - { + for (index_type node = 0; node < hgraph.num_vertices(); ++node) { + if (include[node]) { new_hgraph.set_vertex_work_weight(new_index[node], hgraph.get_vertex_work_weight(node)); new_hgraph.set_vertex_memory_weight(new_index[node], hgraph.get_vertex_memory_weight(node)); } + } - for(index_type hyperedge = 0; hyperedge < hgraph.num_hyperedges(); ++hyperedge) - { + for (index_type hyperedge = 0; hyperedge < hgraph.num_hyperedges(); ++hyperedge) { unsigned nr_induced_pins = 0; std::vector induced_hyperedge; - for(index_type node : hgraph.get_vertices_in_hyperedge(hyperedge)) - if(include[node]) - { + for (index_type node : hgraph.get_vertices_in_hyperedge(hyperedge)) { + if (include[node]) { induced_hyperedge.push_back(new_index[node]); ++nr_induced_pins; } - - if(nr_induced_pins >= 2) + } + + if (nr_induced_pins >= 2) { new_hgraph.add_hyperedge(induced_hyperedge, hgraph.get_hyperedge_weight(hyperedge)); + } } return new_hgraph; } - // conversion -template -hypergraph_t convert_from_cdag_as_dag(const Graph_t& dag) -{ +template +hypergraph_t convert_from_cdag_as_dag(const Graph_t &dag) { using index_type = typename hypergraph_t::vertex_idx; using workw_type = typename hypergraph_t::vertex_work_weight_type; using memw_type = typename hypergraph_t::vertex_mem_weight_type; @@ -117,25 +114,26 @@ hypergraph_t convert_from_cdag_as_dag(const Graph_t& dag) static_assert(std::is_same_v, index_type>, "Index type mismatch, cannot convert DAG to hypergraph."); static_assert(std::is_same_v, workw_type>, "Work weight type mismatch, cannot convert DAG to hypergraph."); static_assert(std::is_same_v, memw_type>, "Memory weight type mismatch, cannot convert DAG to hypergraph."); - static_assert(!has_edge_weights_v || std::is_same_v, commw_type>, "Communication weight type mismatch, cannot convert DAG to hypergraph."); + static_assert(!has_edge_weights_v || std::is_same_v, commw_type>, + "Communication weight type mismatch, cannot convert DAG to hypergraph."); hypergraph_t hgraph(dag.num_vertices(), 0); - for(const auto &node : dag.vertices()) - { + for (const auto &node : dag.vertices()) { hgraph.set_vertex_work_weight(node, dag.vertex_work_weight(node)); hgraph.set_vertex_memory_weight(node, dag.vertex_mem_weight(node)); - for (const auto &child : dag.children(node)) - if constexpr(has_edge_weights_v) + for (const auto &child : dag.children(node)) { + if constexpr (has_edge_weights_v) { hgraph.add_hyperedge({node, child}, dag.edge_comm_weight(edge_desc(node, child, dag).first)); - else + } else { hgraph.add_hyperedge({node, child}); + } + } } return hgraph; } -template -hypergraph_t convert_from_cdag_as_hyperdag(const Graph_t& dag) -{ +template +hypergraph_t convert_from_cdag_as_hyperdag(const Graph_t &dag) { using index_type = typename hypergraph_t::vertex_idx; using workw_type = typename hypergraph_t::vertex_work_weight_type; using memw_type = typename hypergraph_t::vertex_mem_weight_type; @@ -144,21 +142,23 @@ hypergraph_t convert_from_cdag_as_hyperdag(const Graph_t& dag) static_assert(std::is_same_v, index_type>, "Index type mismatch, cannot convert DAG to hypergraph."); static_assert(std::is_same_v, workw_type>, "Work weight type mismatch, cannot convert DAG to hypergraph."); static_assert(std::is_same_v, memw_type>, "Memory weight type mismatch, cannot convert DAG to hypergraph."); - static_assert(std::is_same_v, commw_type>, "Communication weight type mismatch, cannot convert DAG to hypergraph."); + static_assert(std::is_same_v, commw_type>, + "Communication weight type mismatch, cannot convert DAG to hypergraph."); hypergraph_t hgraph(dag.num_vertices(), 0); - for(const auto &node : dag.vertices()) - { + for (const auto &node : dag.vertices()) { hgraph.set_vertex_work_weight(node, dag.vertex_work_weight(node)); hgraph.set_vertex_memory_weight(node, dag.vertex_mem_weight(node)); - if(dag.out_degree(node) == 0) + if (dag.out_degree(node) == 0) { continue; + } std::vector new_hyperedge({node}); - for (const auto &child : dag.children(node)) + for (const auto &child : dag.children(node)) { new_hyperedge.push_back(child); + } hgraph.add_hyperedge(new_hyperedge, dag.vertex_comm_weight(node)); } return hgraph; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/partitioning/model/partitioning.hpp b/include/osp/partitioning/model/partitioning.hpp index e39328a1..ae4ee277 100644 --- a/include/osp/partitioning/model/partitioning.hpp +++ b/include/osp/partitioning/model/partitioning.hpp @@ -26,11 +26,9 @@ namespace osp { // Represents a partitioning where each vertex of a hypergraph is assigned to a specifc partition -template +template class Partitioning { - private: - using index_type = typename hypergraph_t::vertex_idx; using workw_type = typename hypergraph_t::vertex_work_weight_type; using memw_type = typename hypergraph_t::vertex_mem_weight_type; @@ -41,7 +39,6 @@ class Partitioning { std::vector node_to_partition_assignment; public: - Partitioning() = delete; Partitioning(const PartitioningProblem &inst) @@ -57,40 +54,37 @@ class Partitioning { virtual ~Partitioning() = default; - // getters and setters inline const PartitioningProblem &getInstance() const { return *instance; } inline unsigned assignedPartition(index_type node) const { return node_to_partition_assignment[node]; } + inline const std::vector &assignedPartitions() const { return node_to_partition_assignment; } + inline std::vector &assignedPartitions() { return node_to_partition_assignment; } inline void setAssignedPartition(index_type node, unsigned part) { node_to_partition_assignment.at(node) = part; } - void setAssignedPartitions(const std::vector &vec) { - if (vec.size() == static_cast(instance->getHypergraph().num_vertices()) ) { + void setAssignedPartitions(const std::vector &vec) { + if (vec.size() == static_cast(instance->getHypergraph().num_vertices())) { node_to_partition_assignment = vec; } else { - throw std::invalid_argument( - "Invalid Argument while assigning processors: size does not match number of nodes."); + throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } - void setAssignedPartitions(std::vector &&vec) { - if (vec.size() == static_cast(instance->getHypergraph().num_vertices()) ) { + void setAssignedPartitions(std::vector &&vec) { + if (vec.size() == static_cast(instance->getHypergraph().num_vertices())) { node_to_partition_assignment = vec; } else { - throw std::invalid_argument( - "Invalid Argument while assigning processors: size does not match number of nodes."); + throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } std::vector getPartitionContent(unsigned part) const { - std::vector content; for (index_type node = 0; node < node_to_partition_assignment.size(); ++node) { - if (node_to_partition_assignment[node] == part) { content.push_back(node); } @@ -111,76 +105,79 @@ class Partitioning { commw_type computeCutNetCost() const; bool satisfiesBalanceConstraint() const; - }; -template -std::vector Partitioning::computeLambdaForHyperedges() const -{ +template +std::vector Partitioning::computeLambdaForHyperedges() const { std::vector lambda(instance->getHypergraph().num_hyperedges(), 0); - for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) - { + for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { const std::vector &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx); - if(hyperedge.empty()) + if (hyperedge.empty()) { continue; + } std::vector intersects_part(instance->getNumberOfPartitions(), false); - for(const index_type& node : hyperedge) + for (const index_type &node : hyperedge) { intersects_part[node_to_partition_assignment[node]] = true; - for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) - if(intersects_part[part]) + } + for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { + if (intersects_part[part]) { ++lambda[edge_idx]; + } + } } return lambda; } -template +template typename hypergraph_t::vertex_comm_weight_type Partitioning::computeConnectivityCost() const { - commw_type total = 0; std::vector lambda = computeLambdaForHyperedges(); - - for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) - if(lambda[edge_idx] >= 1) - total += (static_cast(lambda[edge_idx])-1) * instance->getHypergraph().get_hyperedge_weight(edge_idx); - + + for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { + if (lambda[edge_idx] >= 1) { + total += (static_cast(lambda[edge_idx]) - 1) * instance->getHypergraph().get_hyperedge_weight(edge_idx); + } + } + return total; } -template +template typename hypergraph_t::vertex_comm_weight_type Partitioning::computeCutNetCost() const { - commw_type total = 0; std::vector lambda = computeLambdaForHyperedges(); - for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) - if(lambda[edge_idx] > 1) + for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { + if (lambda[edge_idx] > 1) { total += instance->getHypergraph().get_hyperedge_weight(edge_idx); - + } + } + return total; } -template +template bool Partitioning::satisfiesBalanceConstraint() const { std::vector work_weight(instance->getNumberOfPartitions(), 0); std::vector memory_weight(instance->getNumberOfPartitions(), 0); for (index_type node = 0; node < node_to_partition_assignment.size(); ++node) { - if (node_to_partition_assignment[node] > instance->getNumberOfPartitions()) + if (node_to_partition_assignment[node] > instance->getNumberOfPartitions()) { throw std::invalid_argument("Invalid Argument while checking balance constraint: partition ID out of range."); - else - { + } else { work_weight[node_to_partition_assignment[node]] += instance->getHypergraph().get_vertex_work_weight(node); memory_weight[node_to_partition_assignment[node]] += instance->getHypergraph().get_vertex_memory_weight(node); } } - for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) - { - if(work_weight[part] > instance->getMaxWorkWeightPerPartition()) + for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { + if (work_weight[part] > instance->getMaxWorkWeightPerPartition()) { return false; - if(memory_weight[part] > instance->getMaxMemoryWeightPerPartition()) + } + if (memory_weight[part] > instance->getMaxMemoryWeightPerPartition()) { return false; + } } return true; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/partitioning/model/partitioning_problem.hpp b/include/osp/partitioning/model/partitioning_problem.hpp index b121ddd9..90ae57c6 100644 --- a/include/osp/partitioning/model/partitioning_problem.hpp +++ b/include/osp/partitioning/model/partitioning_problem.hpp @@ -18,19 +18,17 @@ limitations under the License. #pragma once -#include #include +#include #include "osp/partitioning/model/hypergraph_utility.hpp" namespace osp { // represents a hypergraph partitioning problem into a fixed number of parts with a balance constraint -template +template class PartitioningProblem { - private: - using this_t = PartitioningProblem; using index_type = typename hypergraph_t::vertex_idx; @@ -47,20 +45,25 @@ class PartitioningProblem { bool allows_replication = false; public: - PartitioningProblem() = default; - PartitioningProblem(const hypergraph_t &hgraph_, unsigned nr_parts_ = 2, + PartitioningProblem(const hypergraph_t &hgraph_, + unsigned nr_parts_ = 2, workw_type max_work_weight_ = std::numeric_limits::max(), - memw_type max_memory_weight_ = std::numeric_limits::max()) : - hgraph(hgraph_), nr_of_partitions(nr_parts_), - max_work_weight_per_partition(max_work_weight_), max_memory_weight_per_partition(max_memory_weight_) {} - - PartitioningProblem(const hypergraph_t &&hgraph_, unsigned nr_parts_ = 2, + memw_type max_memory_weight_ = std::numeric_limits::max()) + : hgraph(hgraph_), + nr_of_partitions(nr_parts_), + max_work_weight_per_partition(max_work_weight_), + max_memory_weight_per_partition(max_memory_weight_) {} + + PartitioningProblem(const hypergraph_t &&hgraph_, + unsigned nr_parts_ = 2, workw_type max_work_weight_ = std::numeric_limits::max(), - memw_type max_memory_weight_ = std::numeric_limits::max()) : - hgraph(hgraph_), nr_of_partitions(nr_parts_), - max_work_weight_per_partition(max_work_weight_), max_memory_weight_per_partition(max_memory_weight_) {} + memw_type max_memory_weight_ = std::numeric_limits::max()) + : hgraph(hgraph_), + nr_of_partitions(nr_parts_), + max_work_weight_per_partition(max_work_weight_), + max_memory_weight_per_partition(max_memory_weight_) {} PartitioningProblem(const this_t &other) = default; PartitioningProblem(this_t &&other) = default; @@ -70,34 +73,45 @@ class PartitioningProblem { // getters inline const hypergraph_t &getHypergraph() const { return hgraph; } + inline hypergraph_t &getHypergraph() { return hgraph; } inline unsigned getNumberOfPartitions() const { return nr_of_partitions; } + inline workw_type getMaxWorkWeightPerPartition() const { return max_work_weight_per_partition; } + inline memw_type getMaxMemoryWeightPerPartition() const { return max_memory_weight_per_partition; } + inline bool getAllowsReplication() const { return allows_replication; } // setters inline void setHypergraph(const hypergraph_t &hgraph_) { hgraph = hgraph_; } - + inline void setNumberOfPartitions(unsigned nr_parts_) { nr_of_partitions = nr_parts_; } + inline void setAllowsReplication(bool allowed_) { allows_replication = allowed_; } inline void setMaxWorkWeightExplicitly(workw_type max_weight_) { max_work_weight_per_partition = max_weight_; } - void setMaxWorkWeightViaImbalanceFactor(double imbalance){ - if(imbalance < 0 ) + + void setMaxWorkWeightViaImbalanceFactor(double imbalance) { + if (imbalance < 0) { throw std::invalid_argument("Invalid Argument while setting imbalance parameter: parameter is negative."); - else - max_work_weight_per_partition = static_cast(ceil(compute_total_vertex_work_weight(hgraph)/ static_cast(nr_of_partitions) * (1.0+imbalance))); + } else { + max_work_weight_per_partition = static_cast( + ceil(compute_total_vertex_work_weight(hgraph) / static_cast(nr_of_partitions) * (1.0 + imbalance))); + } } + inline void setMaxMemoryWeightExplicitly(memw_type max_weight_) { max_memory_weight_per_partition = max_weight_; } - void setMaxMemoryWeightViaImbalanceFactor(double imbalance){ - if(imbalance < 0 ) + + void setMaxMemoryWeightViaImbalanceFactor(double imbalance) { + if (imbalance < 0) { throw std::invalid_argument("Invalid Argument while setting imbalance parameter: parameter is negative."); - else - max_memory_weight_per_partition = static_cast(ceil(compute_total_vertex_memory_weight(hgraph)/ static_cast(nr_of_partitions) * (1.0+imbalance))); + } else { + max_memory_weight_per_partition = static_cast( + ceil(compute_total_vertex_memory_weight(hgraph) / static_cast(nr_of_partitions) * (1.0 + imbalance))); + } } }; - -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/partitioning/model/partitioning_replication.hpp b/include/osp/partitioning/model/partitioning_replication.hpp index eae0b65e..698db721 100644 --- a/include/osp/partitioning/model/partitioning_replication.hpp +++ b/include/osp/partitioning/model/partitioning_replication.hpp @@ -26,29 +26,27 @@ namespace osp { // Represents a partitioning where each vertex of a hypergraph can be assinged to one or more partitions -template +template class PartitioningWithReplication { - private: - using index_type = typename hypergraph_t::vertex_idx; using workw_type = typename hypergraph_t::vertex_work_weight_type; using memw_type = typename hypergraph_t::vertex_mem_weight_type; using commw_type = typename hypergraph_t::vertex_comm_weight_type; - const PartitioningProblem *instance; - std::vector > node_to_partitions_assignment; + std::vector> node_to_partitions_assignment; public: - PartitioningWithReplication() = delete; PartitioningWithReplication(const PartitioningProblem &inst) - : instance(&inst), node_to_partitions_assignment(std::vector>(inst.getHypergraph().num_vertices(), {0})) {} + : instance(&inst), + node_to_partitions_assignment(std::vector>(inst.getHypergraph().num_vertices(), {0})) {} - PartitioningWithReplication(const PartitioningProblem &inst, const std::vector > &partition_assignment_) + PartitioningWithReplication(const PartitioningProblem &inst, + const std::vector> &partition_assignment_) : instance(&inst), node_to_partitions_assignment(partition_assignment_) {} PartitioningWithReplication(const PartitioningWithReplication &partitioning_) = default; @@ -58,41 +56,43 @@ class PartitioningWithReplication { virtual ~PartitioningWithReplication() = default; - // getters and setters inline const PartitioningProblem &getInstance() const { return *instance; } inline std::vector assignedPartitions(index_type node) const { return node_to_partitions_assignment[node]; } - inline const std::vector > &assignedPartitions() const { return node_to_partitions_assignment; } - inline std::vector > &assignedPartitions() { return node_to_partitions_assignment; } - inline void setAssignedPartitions(index_type node, const std::vector& parts) { node_to_partitions_assignment.at(node) = parts; } - void setAssignedPartitionVectors(const std::vector > &vec) { + inline const std::vector> &assignedPartitions() const { return node_to_partitions_assignment; } + + inline std::vector> &assignedPartitions() { return node_to_partitions_assignment; } - if (vec.size() == static_cast(instance->getHypergraph().num_vertices()) ) { + inline void setAssignedPartitions(index_type node, const std::vector &parts) { + node_to_partitions_assignment.at(node) = parts; + } + + void setAssignedPartitionVectors(const std::vector> &vec) { + if (vec.size() == static_cast(instance->getHypergraph().num_vertices())) { node_to_partitions_assignment = vec; } else { - throw std::invalid_argument( - "Invalid Argument while assigning processors: size does not match number of nodes."); + throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } - void setAssignedPartitionVectors(std::vector > &&vec) { - if (vec.size() == static_cast(instance->getHypergraph().num_vertices()) ) { + void setAssignedPartitionVectors(std::vector> &&vec) { + if (vec.size() == static_cast(instance->getHypergraph().num_vertices())) { node_to_partitions_assignment = vec; } else { - throw std::invalid_argument( - "Invalid Argument while assigning processors: size does not match number of nodes."); + throw std::invalid_argument("Invalid Argument while assigning processors: size does not match number of nodes."); } } - std::vector > getPartitionContents() const { - - std::vector > content(instance->getNumberOfPartitions()); - for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node) - for(unsigned part : node_to_partitions_assignment[node]) + std::vector> getPartitionContents() const { + std::vector> content(instance->getNumberOfPartitions()); + for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node) { + for (unsigned part : node_to_partitions_assignment[node]) { content[part].push_back(node); + } + } return content; } @@ -108,121 +108,125 @@ class PartitioningWithReplication { commw_type computeCutNetCost() const; bool satisfiesBalanceConstraint() const; - }; -template +template typename hypergraph_t::vertex_comm_weight_type PartitioningWithReplication::computeConnectivityCost() const { - // naive implementation. in the worst-case this is exponential in the number of parts - if(instance->getNumberOfPartitions() > 16) + if (instance->getNumberOfPartitions() > 16) { throw std::invalid_argument("Computing connectivity cost is not supported for more than 16 partitions."); + } commw_type total = 0; std::vector part_used(instance->getNumberOfPartitions(), false); - for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) - { + for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { const std::vector &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx); - if(hyperedge.empty()) + if (hyperedge.empty()) { continue; + } unsigned long mask = 0UL; std::vector nr_nodes_covered_by_part(instance->getNumberOfPartitions(), 0); - for(const index_type& node : hyperedge) - if(node_to_partitions_assignment[node].size() == 1) + for (const index_type &node : hyperedge) { + if (node_to_partitions_assignment[node].size() == 1) { mask = mask | (1UL << node_to_partitions_assignment[node].front()); + } + } unsigned min_parts_to_cover = instance->getNumberOfPartitions(); unsigned long mask_limit = 1UL << instance->getNumberOfPartitions(); - for(unsigned long subset_mask = 1UL; subset_mask < mask_limit; ++subset_mask) - { - if((subset_mask & mask)!= mask) + for (unsigned long subset_mask = 1UL; subset_mask < mask_limit; ++subset_mask) { + if ((subset_mask & mask) != mask) { continue; - + } + unsigned nr_parts_used = 0; - for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) - { + for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { part_used[part] = (((1UL << part) & subset_mask) > 0); nr_parts_used += static_cast(part_used[part]); } - + bool all_nodes_covered = true; - for(const index_type& node : hyperedge) - { - bool node_covered=false; - for(unsigned part : node_to_partitions_assignment[node]) - if(part_used[part]) - { + for (const index_type &node : hyperedge) { + bool node_covered = false; + for (unsigned part : node_to_partitions_assignment[node]) { + if (part_used[part]) { node_covered = true; break; } - if(!node_covered) - { + } + if (!node_covered) { all_nodes_covered = false; break; } } - if(all_nodes_covered) + if (all_nodes_covered) { min_parts_to_cover = std::min(min_parts_to_cover, nr_parts_used); + } } - - total += static_cast(min_parts_to_cover-1) * instance->getHypergraph().get_hyperedge_weight(edge_idx); + + total += static_cast(min_parts_to_cover - 1) * instance->getHypergraph().get_hyperedge_weight(edge_idx); } return total; } -template +template typename hypergraph_t::vertex_comm_weight_type PartitioningWithReplication::computeCutNetCost() const { - commw_type total = 0; - for(index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) - { + for (index_type edge_idx = 0; edge_idx < instance->getHypergraph().num_hyperedges(); ++edge_idx) { const std::vector &hyperedge = instance->getHypergraph().get_vertices_in_hyperedge(edge_idx); - if(hyperedge.empty()) + if (hyperedge.empty()) { continue; + } std::vector nr_nodes_covered_by_part(instance->getNumberOfPartitions(), 0); - for(const index_type& node : hyperedge) - for(unsigned part : node_to_partitions_assignment[node]) + for (const index_type &node : hyperedge) { + for (unsigned part : node_to_partitions_assignment[node]) { ++nr_nodes_covered_by_part[part]; - + } + } + bool covers_all = false; - for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) - if(nr_nodes_covered_by_part[part] == hyperedge.size()) + for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { + if (nr_nodes_covered_by_part[part] == hyperedge.size()) { covers_all = true; - - if(!covers_all) + } + } + + if (!covers_all) { total += instance->getHypergraph().get_hyperedge_weight(edge_idx); + } } return total; } -template +template bool PartitioningWithReplication::satisfiesBalanceConstraint() const { std::vector work_weight(instance->getNumberOfPartitions(), 0); std::vector memory_weight(instance->getNumberOfPartitions(), 0); - for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node) - for(unsigned part : node_to_partitions_assignment[node]){ - if (part > instance->getNumberOfPartitions()) + for (index_type node = 0; node < node_to_partitions_assignment.size(); ++node) { + for (unsigned part : node_to_partitions_assignment[node]) { + if (part > instance->getNumberOfPartitions()) { throw std::invalid_argument("Invalid Argument while checking balance constraint: partition ID out of range."); - else - { + } else { work_weight[part] += instance->getHypergraph().get_vertex_work_weight(node); memory_weight[part] += instance->getHypergraph().get_vertex_memory_weight(node); } } + } - for(unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) - { - if(work_weight[part] > instance->getMaxWorkWeightPerPartition()) + for (unsigned part = 0; part < instance->getNumberOfPartitions(); ++part) { + if (work_weight[part] > instance->getMaxWorkWeightPerPartition()) { return false; - if(memory_weight[part] > instance->getMaxMemoryWeightPerPartition()) + } + if (memory_weight[part] > instance->getMaxMemoryWeightPerPartition()) { return false; + } } return true; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/partitioning/partitioners/generic_FM.hpp b/include/osp/partitioning/partitioners/generic_FM.hpp index a7df5bec..c17c90f4 100644 --- a/include/osp/partitioning/partitioners/generic_FM.hpp +++ b/include/osp/partitioning/partitioners/generic_FM.hpp @@ -13,27 +13,25 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once -#include "osp/partitioning/model/partitioning.hpp" -#include #include +#include -namespace osp{ +#include "osp/partitioning/model/partitioning.hpp" -template -class GenericFM { +namespace osp { +template +class GenericFM { using index_type = typename hypergraph_t::vertex_idx; using workw_type = typename hypergraph_t::vertex_work_weight_type; using memw_type = typename hypergraph_t::vertex_mem_weight_type; using commw_type = typename hypergraph_t::vertex_comm_weight_type; - - protected: unsigned max_number_of_passes = 10; index_type max_nodes_in_part = 0; @@ -42,46 +40,48 @@ class GenericFM { std::vector getMaxNodesOnLevel(index_type nr_nodes, unsigned nr_parts) const; public: + void ImprovePartitioning(Partitioning &partition); - void ImprovePartitioning(Partitioning& partition); - - void RecursiveFM(Partitioning& partition); + void RecursiveFM(Partitioning &partition); inline unsigned getMaxNumberOfPasses() const { return max_number_of_passes; } + inline void setMaxNumberOfPasses(unsigned passes_) { max_number_of_passes = passes_; } + inline index_type getMaxNodesInPart() const { return max_nodes_in_part; } + inline void setMaxNodesInPart(index_type max_nodes_) { max_nodes_in_part = max_nodes_; } }; -template -void GenericFM::ImprovePartitioning(Partitioning& partition) -{ +template +void GenericFM::ImprovePartitioning(Partitioning &partition) { // Note: this algorithm disregards hyperedge weights, in order to keep the size of the gain bucket array bounded! - if(partition.getInstance().getNumberOfPartitions() != 2) - { + if (partition.getInstance().getNumberOfPartitions() != 2) { std::cout << "Error: FM can only be used for 2 partitions." << std::endl; return; } - - if(!partition.satisfiesBalanceConstraint()) - { + + if (!partition.satisfiesBalanceConstraint()) { std::cout << "Error: initial partition to FM does not satisfy balance constraint." << std::endl; return; } - const Hypergraph& Hgraph = partition.getInstance().getHypergraph(); + const Hypergraph &Hgraph = partition.getInstance().getHypergraph(); index_type max_degree = 0; - for(index_type node = 0; node < Hgraph.num_vertices(); ++node) + for (index_type node = 0; node < Hgraph.num_vertices(); ++node) { max_degree = std::max(max_degree, static_cast(Hgraph.get_incident_hyperedges(node).size())); + } - if(max_nodes_in_part == 0) // if not initialized - max_nodes_in_part = static_cast(ceil(static_cast(Hgraph.num_vertices()) * static_cast(partition.getInstance().getMaxWorkWeightPerPartition()) - / static_cast(compute_total_vertex_work_weight(Hgraph)) )); + if (max_nodes_in_part == 0) { // if not initialized + max_nodes_in_part + = static_cast(ceil(static_cast(Hgraph.num_vertices()) + * static_cast(partition.getInstance().getMaxWorkWeightPerPartition()) + / static_cast(compute_total_vertex_work_weight(Hgraph)))); + } - for(unsigned pass_idx = 0; pass_idx < max_number_of_passes; ++pass_idx) - { + for (unsigned pass_idx = 0; pass_idx < max_number_of_passes; ++pass_idx) { std::vector node_to_new_part = partition.assignedPartitions(); std::vector locked(Hgraph.num_vertices(), false); std::vector gain(Hgraph.num_vertices(), 0); @@ -89,53 +89,57 @@ void GenericFM::ImprovePartitioning(Partitioning& pa int cost = 0; index_type left_side = 0; - for(index_type node = 0; node < Hgraph.num_vertices(); ++node) - if(partition.assignedPartition(node) == 0) + for (index_type node = 0; node < Hgraph.num_vertices(); ++node) { + if (partition.assignedPartition(node) == 0) { ++left_side; + } + } - if(left_side > max_nodes_in_part || Hgraph.num_vertices() - left_side > max_nodes_in_part) - { - if(pass_idx == 0) - { - std::cout<<"Error: initial partitioning of FM is not balanced."< max_nodes_in_part || Hgraph.num_vertices() - left_side > max_nodes_in_part) { + if (pass_idx == 0) { + std::cout << "Error: initial partitioning of FM is not balanced." << std::endl; return; - } - else - { - std::cout<<"Error during FM: partitionming somehow became imbalanced."< max_gain(2, -static_cast(max_degree)-1); - std::vector > > gain_bucket_array(2, std::vector >(2*max_degree+1)); - for(index_type node = 0; node < Hgraph.num_vertices(); ++node) - { - const unsigned& part = partition.assignedPartition(node); + std::vector max_gain(2, -static_cast(max_degree) - 1); + std::vector > > gain_bucket_array( + 2, std::vector >(2 * max_degree + 1)); + for (index_type node = 0; node < Hgraph.num_vertices(); ++node) { + const unsigned &part = partition.assignedPartition(node); gain_bucket_array[part][static_cast(gain[node] + static_cast(max_degree))].push_back(node); max_gain[part] = std::max(max_gain[part], gain[node]); } @@ -145,27 +149,26 @@ void GenericFM::ImprovePartitioning(Partitioning& pa std::vector moved_nodes; // the pass itself: make moves - while(moved_nodes.size() < Hgraph.num_vertices()) - { + while (moved_nodes.size() < Hgraph.num_vertices()) { // select move index_type to_move = std::numeric_limits::max(); unsigned chosen_part = std::numeric_limits::max(); unsigned gain_index = static_cast(std::max(max_gain[0], max_gain[1]) + static_cast(max_degree)); - while(gain_index < std::numeric_limits::max()) - { - bool can_choose_left = (Hgraph.num_vertices() - left_side < max_nodes_in_part) && !gain_bucket_array[0][gain_index].empty(); + while (gain_index < std::numeric_limits::max()) { + bool can_choose_left = (Hgraph.num_vertices() - left_side < max_nodes_in_part) + && !gain_bucket_array[0][gain_index].empty(); bool can_choose_right = (left_side < max_nodes_in_part) && !gain_bucket_array[1][gain_index].empty(); - if(can_choose_left && can_choose_right) + if (can_choose_left && can_choose_right) { chosen_part = (left_side >= Hgraph.num_vertices() / 2) ? 1 : 0; - else if(can_choose_left) + } else if (can_choose_left) { chosen_part = 0; - else if(can_choose_right) + } else if (can_choose_right) { chosen_part = 1; + } - if(chosen_part < 2) - { + if (chosen_part < 2) { to_move = gain_bucket_array[chosen_part][gain_index].back(); gain_bucket_array[chosen_part][gain_index].pop_back(); break; @@ -173,84 +176,82 @@ void GenericFM::ImprovePartitioning(Partitioning& pa --gain_index; } - if(to_move == std::numeric_limits::max()) + if (to_move == std::numeric_limits::max()) { break; - + } + // make move moved_nodes.push_back(to_move); cost -= gain[to_move]; - if(cost < best_cost) - { + if (cost < best_cost) { best_cost = cost; best_index = static_cast(moved_nodes.size()) + 1; } locked[to_move] = true; node_to_new_part[to_move] = 1 - node_to_new_part[to_move]; - if(chosen_part == 0) + if (chosen_part == 0) { --left_side; - else + } else { ++left_side; + } - unsigned other_part = 1-chosen_part; + unsigned other_part = 1 - chosen_part; // update gain values - for(index_type hyperedge : Hgraph.get_incident_hyperedges(to_move)) - { - if(nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 1) - { - for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) - { - if(locked[node]) + for (index_type hyperedge : Hgraph.get_incident_hyperedges(to_move)) { + if (nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 1) { + for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { + if (locked[node]) { continue; + } - std::vector& vec = gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))]; + std::vector &vec + = gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))]; vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end()); --gain[node]; - gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))].push_back(node); + gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))].push_back( + node); } - } - else if(nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 2) - { - for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) - { - if(node_to_new_part[node] == chosen_part && !locked[node]) - { - std::vector& vec = gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))]; + } else if (nr_nodes_in_hyperedge_on_side[hyperedge][chosen_part] == 2) { + for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { + if (node_to_new_part[node] == chosen_part && !locked[node]) { + std::vector &vec + = gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))]; vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end()); ++gain[node]; - gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))].push_back(node); + gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))] + .push_back(node); max_gain[chosen_part] = std::max(max_gain[chosen_part], gain[node]); break; } } } - if(nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 1) - { - for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) - { - if(node_to_new_part[node] == other_part && !locked[node]) - { - std::vector& vec = gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))]; + if (nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 1) { + for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { + if (node_to_new_part[node] == other_part && !locked[node]) { + std::vector &vec + = gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))]; vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end()); --gain[node]; - gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))].push_back(node); + gain_bucket_array[other_part][static_cast(gain[node] + static_cast(max_degree))].push_back( + node); break; } } - } - else if(nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 0) - { - for(index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) - { - if(locked[node]) + } else if (nr_nodes_in_hyperedge_on_side[hyperedge][other_part] == 0) { + for (index_type node : Hgraph.get_vertices_in_hyperedge(hyperedge)) { + if (locked[node]) { continue; + } - std::vector& vec = gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))]; + std::vector &vec + = gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))]; vec.erase(std::remove(vec.begin(), vec.end(), node), vec.end()); ++gain[node]; - gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))].push_back(node); + gain_bucket_array[chosen_part][static_cast(gain[node] + static_cast(max_degree))].push_back( + node); max_gain[chosen_part] = std::max(max_gain[chosen_part], gain[node]); } } @@ -260,39 +261,41 @@ void GenericFM::ImprovePartitioning(Partitioning& pa } // apply best configuration seen - if(best_index == 0) + if (best_index == 0) { break; + } - for(index_type node_idx = 0; node_idx < best_index && node_idx < static_cast(moved_nodes.size()); ++node_idx) - partition.setAssignedPartition(moved_nodes[node_idx], 1U-partition.assignedPartition(moved_nodes[node_idx])); - + for (index_type node_idx = 0; node_idx < best_index && node_idx < static_cast(moved_nodes.size()); ++node_idx) { + partition.setAssignedPartition(moved_nodes[node_idx], 1U - partition.assignedPartition(moved_nodes[node_idx])); + } } } -template -void GenericFM::RecursiveFM(Partitioning& partition) -{ - const unsigned& nr_parts = partition.getInstance().getNumberOfPartitions(); - const index_type& nr_nodes = partition.getInstance().getHypergraph().num_vertices(); +template +void GenericFM::RecursiveFM(Partitioning &partition) { + const unsigned &nr_parts = partition.getInstance().getNumberOfPartitions(); + const index_type &nr_nodes = partition.getInstance().getHypergraph().num_vertices(); using Hgraph = Hypergraph; // Note: this is just a simple recursive heuristic for the case when the partitions are a small power of 2 - if(nr_parts != 4 && nr_parts != 8 && nr_parts != 16 && nr_parts != 32) - { + if (nr_parts != 4 && nr_parts != 8 && nr_parts != 16 && nr_parts != 32) { std::cout << "Error: Recursive FM can only be used for 4, 8, 16 or 32 partitions currently." << std::endl; return; } - for(index_type node = 0; node < nr_nodes; ++node) + for (index_type node = 0; node < nr_nodes; ++node) { partition.setAssignedPartition(node, static_cast(node % 2)); + } - if(max_nodes_in_part == 0) // if not initialized - max_nodes_in_part = static_cast(ceil(static_cast(nr_nodes) * static_cast(partition.getInstance().getMaxWorkWeightPerPartition()) - / static_cast(compute_total_vertex_work_weight(partition.getInstance().getHypergraph())) )); + if (max_nodes_in_part == 0) { // if not initialized + max_nodes_in_part = static_cast( + ceil(static_cast(nr_nodes) * static_cast(partition.getInstance().getMaxWorkWeightPerPartition()) + / static_cast(compute_total_vertex_work_weight(partition.getInstance().getHypergraph())))); + } const std::vector max_nodes_on_level = getMaxNodesOnLevel(nr_nodes, nr_parts); - + unsigned parts = 1; unsigned level = 0; std::vector sub_hgraphs({partition.getInstance().getHypergraph()}); @@ -300,42 +303,40 @@ void GenericFM::RecursiveFM(Partitioning& partition) std::map > node_to_new_hgraph_and_id; std::map, index_type> hgraph_and_id_to_old_idx; - for(index_type node = 0; node < nr_nodes; ++node) - { + for (index_type node = 0; node < nr_nodes; ++node) { node_to_new_hgraph_and_id[node] = std::make_pair(0, node); hgraph_and_id_to_old_idx[std::make_pair(0, node)] = node; } - while(parts < nr_parts) - { + while (parts < nr_parts) { unsigned end_idx = static_cast(sub_hgraphs.size()); - for(unsigned sub_hgraph_index = start_index; sub_hgraph_index < end_idx; ++sub_hgraph_index) - { - const Hgraph& hgraph = sub_hgraphs[sub_hgraph_index]; + for (unsigned sub_hgraph_index = start_index; sub_hgraph_index < end_idx; ++sub_hgraph_index) { + const Hgraph &hgraph = sub_hgraphs[sub_hgraph_index]; PartitioningProblem instance(hgraph, 2); Partitioning sub_partition(instance); - for(index_type node = 0; node < hgraph.num_vertices(); ++node) - sub_partition.setAssignedPartition(node, node%2); - + for (index_type node = 0; node < hgraph.num_vertices(); ++node) { + sub_partition.setAssignedPartition(node, node % 2); + } + GenericFM sub_fm; sub_fm.setMaxNodesInPart(max_nodes_on_level[level]); - //std::cout<<"Hgraph of size "< current_idx(2, 0); std::vector > part_indicator(2, std::vector(hgraph.num_vertices(), false)); - for(index_type node = 0; node < hgraph.num_vertices(); ++node) - { + for (index_type node = 0; node < hgraph.num_vertices(); ++node) { const unsigned part_id = sub_partition.assignedPartition(node); const index_type original_id = hgraph_and_id_to_old_idx[std::make_pair(sub_hgraph_index, node)]; - node_to_new_hgraph_and_id[original_id] = std::make_pair(sub_hgraphs.size()+part_id, current_idx[part_id]); - hgraph_and_id_to_old_idx[std::make_pair(sub_hgraphs.size()+part_id, current_idx[part_id])] = original_id; + node_to_new_hgraph_and_id[original_id] = std::make_pair(sub_hgraphs.size() + part_id, current_idx[part_id]); + hgraph_and_id_to_old_idx[std::make_pair(sub_hgraphs.size() + part_id, current_idx[part_id])] = original_id; ++current_idx[part_id]; part_indicator[part_id][node] = true; } - for(unsigned part = 0; part < 2; ++part) + for (unsigned part = 0; part < 2; ++part) { sub_hgraphs.push_back(create_induced_hypergraph(sub_hgraphs[sub_hgraph_index], part_indicator[part])); + } ++start_index; } @@ -343,33 +344,35 @@ void GenericFM::RecursiveFM(Partitioning& partition) parts *= 2; ++level; } - - for(index_type node = 0; node < nr_nodes; ++node) - partition.setAssignedPartition(node, node_to_new_hgraph_and_id[node].first - (static_cast(sub_hgraphs.size())-nr_parts)); - + + for (index_type node = 0; node < nr_nodes; ++node) { + partition.setAssignedPartition( + node, node_to_new_hgraph_and_id[node].first - (static_cast(sub_hgraphs.size()) - nr_parts)); + } } -template -std::vector GenericFM::getMaxNodesOnLevel(typename hypergraph_t::vertex_idx nr_nodes, unsigned nr_parts) const -{ +template +std::vector GenericFM::getMaxNodesOnLevel( + typename hypergraph_t::vertex_idx nr_nodes, unsigned nr_parts) const { std::vector max_nodes_on_level; std::vector limit_per_level({static_cast(ceil(static_cast(nr_nodes) / 2.0))}); - for(unsigned parts = nr_parts / 4; parts > 0; parts /= 2) + for (unsigned parts = nr_parts / 4; parts > 0; parts /= 2) { limit_per_level.push_back(static_cast(ceil(static_cast(limit_per_level.back()) / 2.0))); + } max_nodes_on_level.push_back(max_nodes_in_part); - for(unsigned parts = 2; parts < nr_parts; parts *= 2) - { - index_type next_limit = max_nodes_on_level.back()*2; - if(next_limit > limit_per_level.back()) + for (unsigned parts = 2; parts < nr_parts; parts *= 2) { + index_type next_limit = max_nodes_on_level.back() * 2; + if (next_limit > limit_per_level.back()) { --next_limit; - + } + limit_per_level.pop_back(); max_nodes_on_level.push_back(next_limit); } - std::reverse(max_nodes_on_level.begin(),max_nodes_on_level.end()); + std::reverse(max_nodes_on_level.begin(), max_nodes_on_level.end()); return max_nodes_on_level; } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/partitioning/partitioners/partitioning_ILP.hpp b/include/osp/partitioning/partitioners/partitioning_ILP.hpp index 2e6c4e0e..5623d581 100644 --- a/include/osp/partitioning/partitioners/partitioning_ILP.hpp +++ b/include/osp/partitioning/partitioners/partitioning_ILP.hpp @@ -27,9 +27,8 @@ limitations under the License. namespace osp { -template +template class HypergraphPartitioningILP : public HypergraphPartitioningILPBase { - protected: std::vector readCoptAssignment(const PartitioningProblem &instance, Model &model); @@ -45,7 +44,7 @@ class HypergraphPartitioningILP : public HypergraphPartitioningILPBase +template RETURN_STATUS HypergraphPartitioningILP::computePartitioning(Partitioning &result) { Envr env; Model model = env.CreateModel("HypergraphPart"); @@ -53,24 +52,21 @@ RETURN_STATUS HypergraphPartitioningILP::computePartitioning(Parti this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model); setupExtraVariablesConstraints(result.getInstance(), model); - if (this->use_initial_solution) + if (this->use_initial_solution) { setInitialSolution(result, model); + } this->solveILP(model); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - result.setAssignedPartitions(readCoptAssignment(result.getInstance(), model)); return RETURN_STATUS::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - result.setAssignedPartitions(readCoptAssignment(result.getInstance(), model)); return RETURN_STATUS::OSP_SUCCESS; @@ -80,9 +76,9 @@ RETURN_STATUS HypergraphPartitioningILP::computePartitioning(Parti } } -template -void HypergraphPartitioningILP::setupExtraVariablesConstraints(const PartitioningProblem &instance, Model &model) { - +template +void HypergraphPartitioningILP::setupExtraVariablesConstraints(const PartitioningProblem &instance, + Model &model) { using index_type = typename hypergraph_t::vertex_idx; const index_type numberOfParts = instance.getNumberOfPartitions(); @@ -92,52 +88,61 @@ void HypergraphPartitioningILP::setupExtraVariablesConstraints(con // each node assigned to exactly one partition for (index_type node = 0; node < numberOfVertices; node++) { - Expr expr; - for (unsigned part = 0; part < numberOfParts; part++) + for (unsigned part = 0; part < numberOfParts; part++) { expr += this->node_in_partition[node][static_cast(part)]; + } model.AddConstr(expr == 1); } // hyperedge indicators match node variables - for (unsigned part = 0; part < numberOfParts; part++) - for (index_type node = 0; node < numberOfVertices; node++) - for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) - model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part)] >= this->node_in_partition[node][static_cast(part)]); + for (unsigned part = 0; part < numberOfParts; part++) { + for (index_type node = 0; node < numberOfVertices; node++) { + for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { + model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part)] + >= this->node_in_partition[node][static_cast(part)]); + } + } + } } // convert generic one-to-many assingment (of base class function) to one-to-one -template -std::vector HypergraphPartitioningILP::readCoptAssignment(const PartitioningProblem &instance, Model &model) { +template +std::vector HypergraphPartitioningILP::readCoptAssignment(const PartitioningProblem &instance, + Model &model) { using index_type = typename hypergraph_t::vertex_idx; std::vector node_to_partition(instance.getHypergraph().num_vertices(), std::numeric_limits::max()); std::vector> assignmentsGenericForm = this->readAllCoptAssignments(instance, model); - for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) + for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) { node_to_partition[node] = assignmentsGenericForm[node].front(); + } return node_to_partition; } -template +template void HypergraphPartitioningILP::setInitialSolution(const Partitioning &partition, Model &model) { using index_type = typename hypergraph_t::vertex_idx; const std::vector &assignment = partition.assignedPartitions(); const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions(); - if (assignment.size() != partition.getInstance().getHypergraph().num_vertices()) + if (assignment.size() != partition.getInstance().getHypergraph().num_vertices()) { return; + } for (index_type node = 0; node < assignment.size(); ++node) { - if (assignment[node] >= numPartitions) + if (assignment[node] >= numPartitions) { continue; + } - for (unsigned part = 0; part < numPartitions; ++part) + for (unsigned part = 0; part < numPartitions; ++part) { model.SetMipStart(this->node_in_partition[node][static_cast(part)], static_cast(assignment[node] == part)); + } } model.LoadMipStart(); } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp b/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp index b5df2fb4..23e12d5b 100644 --- a/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp +++ b/include/osp/partitioning/partitioners/partitioning_ILP_base.hpp @@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once @@ -21,14 +21,13 @@ limitations under the License. #include #include +#include "osp/bsp/model/BspInstance.hpp" // for return statuses (stati?) #include "osp/partitioning/model/partitioning_problem.hpp" -#include "osp/bsp/model/BspInstance.hpp" // for return statuses (stati?) -namespace osp{ +namespace osp { -template +template class HypergraphPartitioningILPBase { - protected: std::vector node_in_partition; std::vector hyperedge_uses_partition; @@ -36,26 +35,26 @@ class HypergraphPartitioningILPBase { unsigned time_limit_seconds = 3600; bool use_initial_solution = false; - std::vector > readAllCoptAssignments(const PartitioningProblem &instance, Model& model); + std::vector > readAllCoptAssignments(const PartitioningProblem &instance, Model &model); - void setupFundamentalVariablesConstraintsObjective(const PartitioningProblem &instance, Model& model); + void setupFundamentalVariablesConstraintsObjective(const PartitioningProblem &instance, Model &model); - void solveILP(Model& model); + void solveILP(Model &model); public: - virtual std::string getAlgorithmName() const = 0; inline unsigned getTimeLimitSeconds() const { return time_limit_seconds; } + inline void setTimeLimitSeconds(unsigned limit_) { time_limit_seconds = limit_; } + inline void setUseInitialSolution(bool use_) { use_initial_solution = use_; } virtual ~HypergraphPartitioningILPBase() = default; }; -template -void HypergraphPartitioningILPBase::solveILP(Model& model) { - +template +void HypergraphPartitioningILPBase::solveILP(Model &model) { model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0); model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds); @@ -74,9 +73,9 @@ void HypergraphPartitioningILPBase::solveILP(Model& model) { model.Solve(); } -template -void HypergraphPartitioningILPBase::setupFundamentalVariablesConstraintsObjective(const PartitioningProblem &instance, Model& model) { - +template +void HypergraphPartitioningILPBase::setupFundamentalVariablesConstraintsObjective( + const PartitioningProblem &instance, Model &model) { using index_type = typename hypergraph_t::vertex_idx; using workw_type = typename hypergraph_t::vertex_work_weight_type; using memw_type = typename hypergraph_t::vertex_mem_weight_type; @@ -89,90 +88,93 @@ void HypergraphPartitioningILPBase::setupFundamentalVariablesConst node_in_partition = std::vector(numberOfVertices); - for (index_type node = 0; node < numberOfVertices; node++) + for (index_type node = 0; node < numberOfVertices; node++) { node_in_partition[node] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "node_in_partition"); + } hyperedge_uses_partition = std::vector(numberOfHyperedges); - for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) - hyperedge_uses_partition[hyperedge] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "hyperedge_uses_partition"); - + for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) { + hyperedge_uses_partition[hyperedge] + = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "hyperedge_uses_partition"); + } + // partition size constraints - if(instance.getMaxWorkWeightPerPartition() < std::numeric_limits::max()) - { - for (unsigned part = 0; part < numberOfParts; part++) - { + if (instance.getMaxWorkWeightPerPartition() < std::numeric_limits::max()) { + for (unsigned part = 0; part < numberOfParts; part++) { Expr expr; - for (index_type node = 0; node < numberOfVertices; node++) + for (index_type node = 0; node < numberOfVertices; node++) { expr += instance.getHypergraph().get_vertex_work_weight(node) * node_in_partition[node][static_cast(part)]; + } model.AddConstr(expr <= instance.getMaxWorkWeightPerPartition()); } - } - if(instance.getMaxMemoryWeightPerPartition() < std::numeric_limits::max()) - { - for (unsigned part = 0; part < numberOfParts; part++) - { + } + if (instance.getMaxMemoryWeightPerPartition() < std::numeric_limits::max()) { + for (unsigned part = 0; part < numberOfParts; part++) { Expr expr; - for (index_type node = 0; node < numberOfVertices; node++) + for (index_type node = 0; node < numberOfVertices; node++) { expr += instance.getHypergraph().get_vertex_memory_weight(node) * node_in_partition[node][static_cast(part)]; + } model.AddConstr(expr <= instance.getMaxMemoryWeightPerPartition()); } - } + } // set objective Expr expr; - for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) - { + for (index_type hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) { expr -= instance.getHypergraph().get_hyperedge_weight(hyperedge); - for (unsigned part = 0; part < numberOfParts; part++) - expr += instance.getHypergraph().get_hyperedge_weight(hyperedge) * hyperedge_uses_partition[hyperedge][static_cast(part)]; + for (unsigned part = 0; part < numberOfParts; part++) { + expr += instance.getHypergraph().get_hyperedge_weight(hyperedge) + * hyperedge_uses_partition[hyperedge][static_cast(part)]; + } } model.SetObjective(expr, COPT_MINIMIZE); - } -template -std::vector > HypergraphPartitioningILPBase::readAllCoptAssignments(const PartitioningProblem &instance, Model& model) -{ +template +std::vector > HypergraphPartitioningILPBase::readAllCoptAssignments( + const PartitioningProblem &instance, Model &model) { using index_type = typename hypergraph_t::vertex_idx; - std::vector > node_to_partitions(instance.getHypergraph().num_vertices()); std::set nonempty_partition_ids; - for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) - for(unsigned part = 0; part < instance.getNumberOfPartitions(); part++) - if(node_in_partition[node][static_cast(part)].Get(COPT_DBLINFO_VALUE) >= .99) - { + for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) { + for (unsigned part = 0; part < instance.getNumberOfPartitions(); part++) { + if (node_in_partition[node][static_cast(part)].Get(COPT_DBLINFO_VALUE) >= .99) { node_to_partitions[node].push_back(part); nonempty_partition_ids.insert(part); } + } + } - for(std::vector& chosen_partitions : node_to_partitions) - if(chosen_partitions.empty()) - { - std::cout<<"Error: partitioning returned by ILP seems incomplete!"< &chosen_partitions : node_to_partitions) { + if (chosen_partitions.empty()) { + std::cout << "Error: partitioning returned by ILP seems incomplete!" << std::endl; chosen_partitions.push_back(std::numeric_limits::max()); } - + } + unsigned current_index = 0; std::map new_part_index; - for(unsigned part_index : nonempty_partition_ids) - { + for (unsigned part_index : nonempty_partition_ids) { new_part_index[part_index] = current_index; ++current_index; } - for(index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) - for(unsigned entry_idx = 0; entry_idx < node_to_partitions[node].size(); ++entry_idx) + for (index_type node = 0; node < instance.getHypergraph().num_vertices(); node++) { + for (unsigned entry_idx = 0; entry_idx < node_to_partitions[node].size(); ++entry_idx) { node_to_partitions[node][entry_idx] = new_part_index[node_to_partitions[node][entry_idx]]; + } + } - std::cout<<"Hypergraph partitioning ILP best solution value: "< #include -#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp" #include "osp/partitioning/model/partitioning_replication.hpp" +#include "osp/partitioning/partitioners/partitioning_ILP_base.hpp" -namespace osp{ +namespace osp { -template +template class HypergraphPartitioningILPWithReplication : public HypergraphPartitioningILPBase { - public: enum class REPLICATION_MODEL_IN_ILP { ONLY_TWICE, GENERAL }; protected: - void setupExtraVariablesConstraints(const PartitioningProblem &instance, Model& model); + void setupExtraVariablesConstraints(const PartitioningProblem &instance, Model &model); - void setInitialSolution(const PartitioningWithReplication &partition, Model& model); + void setInitialSolution(const PartitioningWithReplication &partition, Model &model); REPLICATION_MODEL_IN_ILP replication_model = REPLICATION_MODEL_IN_ILP::ONLY_TWICE; public: - virtual ~HypergraphPartitioningILPWithReplication() override = default; - RETURN_STATUS computePartitioning(PartitioningWithReplication& result); + RETURN_STATUS computePartitioning(PartitioningWithReplication &result); virtual std::string getAlgorithmName() const override { return "HypergraphPartitioningILPWithReplication"; } void setReplicationModel(REPLICATION_MODEL_IN_ILP replication_model_) { replication_model = replication_model_; } }; -template -RETURN_STATUS HypergraphPartitioningILPWithReplication::computePartitioning(PartitioningWithReplication& result) -{ +template +RETURN_STATUS HypergraphPartitioningILPWithReplication::computePartitioning( + PartitioningWithReplication &result) { Envr env; Model model = env.CreateModel("HypergraphPartRepl"); this->setupFundamentalVariablesConstraintsObjective(result.getInstance(), model); setupExtraVariablesConstraints(result.getInstance(), model); - if(this->use_initial_solution) + if (this->use_initial_solution) { setInitialSolution(result, model); + } this->solveILP(model); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - result.setAssignedPartitionVectors(this->readAllCoptAssignments(result.getInstance(), model)); return RETURN_STATUS::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - result.setAssignedPartitionVectors(this->readAllCoptAssignments(result.getInstance(), model)); return RETURN_STATUS::OSP_SUCCESS; @@ -86,104 +81,126 @@ RETURN_STATUS HypergraphPartitioningILPWithReplication::computePar } } -template -void HypergraphPartitioningILPWithReplication::setupExtraVariablesConstraints(const PartitioningProblem &instance, Model& model) { - +template +void HypergraphPartitioningILPWithReplication::setupExtraVariablesConstraints( + const PartitioningProblem &instance, Model &model) { using index_type = typename hypergraph_t::vertex_idx; - const index_type numberOfParts = instance.getNumberOfPartitions(); const index_type numberOfVertices = instance.getHypergraph().num_vertices(); - if(replication_model == REPLICATION_MODEL_IN_ILP::GENERAL) - { + if (replication_model == REPLICATION_MODEL_IN_ILP::GENERAL) { // create variables for each pin+partition combination std::map, index_type> pin_ID_map; index_type nr_of_pins = 0; - for (index_type node = 0; node < numberOfVertices; node++) - for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) + for (index_type node = 0; node < numberOfVertices; node++) { + for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { pin_ID_map[std::make_pair(node, hyperedge)] = nr_of_pins++; - + } + } + std::vector pin_covered_by_partition = std::vector(nr_of_pins); - for (index_type pin = 0; pin < nr_of_pins; pin++) + for (index_type pin = 0; pin < nr_of_pins; pin++) { pin_covered_by_partition[pin] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "pin_covered_by_partition"); + } // each pin covered exactly once for (index_type pin = 0; pin < nr_of_pins; pin++) { - Expr expr; - for (unsigned part = 0; part < numberOfParts; part++) + for (unsigned part = 0; part < numberOfParts; part++) { expr += pin_covered_by_partition[pin][static_cast(part)]; + } model.AddConstr(expr == 1); } // pin covering requires node assignment - for (unsigned part = 0; part < numberOfParts; part++) - for (index_type node = 0; node < numberOfVertices; node++) - for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) - model.AddConstr(this->node_in_partition[node][static_cast(part)] >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast(part)]); + for (unsigned part = 0; part < numberOfParts; part++) { + for (index_type node = 0; node < numberOfVertices; node++) { + for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { + model.AddConstr( + this->node_in_partition[node][static_cast(part)] + >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast(part)]); + } + } + } // pin covering requires hyperedge use - for (unsigned part = 0; part < numberOfParts; part++) - for (index_type node = 0; node < numberOfVertices; node++) - for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) - model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part)] >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast(part)]); + for (unsigned part = 0; part < numberOfParts; part++) { + for (index_type node = 0; node < numberOfVertices; node++) { + for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { + model.AddConstr( + this->hyperedge_uses_partition[hyperedge][static_cast(part)] + >= pin_covered_by_partition[pin_ID_map[std::make_pair(node, hyperedge)]][static_cast(part)]); + } + } + } - } - else if(replication_model == REPLICATION_MODEL_IN_ILP::ONLY_TWICE) - { + } else if (replication_model == REPLICATION_MODEL_IN_ILP::ONLY_TWICE) { // each node has one or two copies VarArray node_replicated = model.AddVars(static_cast(numberOfVertices), COPT_BINARY, "node_replicated"); - - for (index_type node = 0; node < numberOfVertices; node++) { + for (index_type node = 0; node < numberOfVertices; node++) { Expr expr = -1; - for (unsigned part = 0; part < numberOfParts; part++) + for (unsigned part = 0; part < numberOfParts; part++) { expr += this->node_in_partition[node][static_cast(part)]; + } model.AddConstr(expr == node_replicated[static_cast(node)]); } // hyperedge indicators if node is not replicated - for (unsigned part = 0; part < numberOfParts; part++) - for (index_type node = 0; node < numberOfVertices; node++) - for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) - model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part)] >= this->node_in_partition[node][static_cast(part)] - node_replicated[static_cast(node)]); - + for (unsigned part = 0; part < numberOfParts; part++) { + for (index_type node = 0; node < numberOfVertices; node++) { + for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { + model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part)] + >= this->node_in_partition[node][static_cast(part)] + - node_replicated[static_cast(node)]); + } + } + } + // hyperedge indicators if node is replicated - for (index_type node = 0; node < numberOfVertices; node++) - for (const index_type& hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) - for (unsigned part1 = 0; part1 < numberOfParts; part1++) - for (unsigned part2 = part1+1; part2 < numberOfParts; part2++) - model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part1)] + this->hyperedge_uses_partition[hyperedge][static_cast(part2)] >= - this->node_in_partition[node][static_cast(part1)] + this->node_in_partition[node][static_cast(part2)] - 1); + for (index_type node = 0; node < numberOfVertices; node++) { + for (const index_type &hyperedge : instance.getHypergraph().get_incident_hyperedges(node)) { + for (unsigned part1 = 0; part1 < numberOfParts; part1++) { + for (unsigned part2 = part1 + 1; part2 < numberOfParts; part2++) { + model.AddConstr(this->hyperedge_uses_partition[hyperedge][static_cast(part1)] + + this->hyperedge_uses_partition[hyperedge][static_cast(part2)] + >= this->node_in_partition[node][static_cast(part1)] + + this->node_in_partition[node][static_cast(part2)] - 1); + } + } + } + } } - } -template -void HypergraphPartitioningILPWithReplication::setInitialSolution(const PartitioningWithReplication &partition, Model& model) -{ +template +void HypergraphPartitioningILPWithReplication::setInitialSolution( + const PartitioningWithReplication &partition, Model &model) { using index_type = typename hypergraph_t::vertex_idx; - const std::vector >& assignments = partition.assignedPartitions(); - const unsigned& numPartitions = partition.getInstance().getNumberOfPartitions(); - if(assignments.size() != partition.getInstance().getHypergraph().num_vertices()) + const std::vector > &assignments = partition.assignedPartitions(); + const unsigned &numPartitions = partition.getInstance().getNumberOfPartitions(); + if (assignments.size() != partition.getInstance().getHypergraph().num_vertices()) { return; + } - for(index_type node = 0; node < assignments.size(); ++node) - { + for (index_type node = 0; node < assignments.size(); ++node) { std::vector assingedToPart(numPartitions, false); - for(unsigned part : assignments[node]) - if(part < numPartitions) + for (unsigned part : assignments[node]) { + if (part < numPartitions) { assingedToPart[part] = true; - - for(unsigned part = 0; part < numPartitions; ++part) + } + } + + for (unsigned part = 0; part < numPartitions; ++part) { model.SetMipStart(this->node_in_partition[node][static_cast(part)], static_cast(assingedToPart[part])); + } } model.LoadMipStart(); } -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/include/osp/pebbling/PebblingSchedule.hpp b/include/osp/pebbling/PebblingSchedule.hpp index 008e3fa8..440d6254 100644 --- a/include/osp/pebbling/PebblingSchedule.hpp +++ b/include/osp/pebbling/PebblingSchedule.hpp @@ -13,17 +13,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once +#include +#include #include #include #include #include -#include -#include #include "osp/bsp/model/BspSchedule.hpp" #include "osp/concepts/computational_dag_concept.hpp" @@ -37,27 +37,27 @@ typedef std::tuple KeyTriple; * @brief Represents a schedule for 2-level MultiBSP model with memory constraints. * * Alternatively, can be understood as the generalization of multiprocessor red-blue pebble game with node weights. - * The synchronous interpretation is essentially a 2-level Multi-BSP, while the asynchronous interpretation is + * The synchronous interpretation is essentially a 2-level Multi-BSP, while the asynchronous interpretation is * closer to makespan metrics in classical schedules. - * - * Besides basic utility such as validity check, cost computation and conversion from a Bsp Schedule, it also allows + * + * Besides basic utility such as validity check, cost computation and conversion from a Bsp Schedule, it also allows * conversions to/from several MultiProcessorPebbling ILP methods that address this problem. * * Works with a `BspInstance` object, which represents the instance of the scheduling problem being solved. * * @see BspInstance */ -template +template class PebblingSchedule { - - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); + static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); private: using vertex_idx = vertex_idx_t; using cost_type = v_workw_t; using memweight_type = v_memw_t; - static_assert(std::is_same_v, v_commw_t>, "PebblingSchedule requires work and comm. weights to have the same type."); + static_assert(std::is_same_v, v_commw_t>, + "PebblingSchedule requires work and comm. weights to have the same type."); const BspInstance *instance; @@ -65,92 +65,95 @@ class PebblingSchedule { bool need_to_load_inputs = true; - struct compute_step - { - vertex_idx node; - std::vector nodes_evicted_after; + struct compute_step { + vertex_idx node; + std::vector nodes_evicted_after; - compute_step() {} - compute_step(vertex_idx node_) : node(node_) {} - compute_step(vertex_idx node_, const std::vector& evicted_) : node(node_), nodes_evicted_after(evicted_) {} + compute_step() {} + + compute_step(vertex_idx node_) : node(node_) {} + + compute_step(vertex_idx node_, const std::vector &evicted_) : node(node_), nodes_evicted_after(evicted_) {} }; // executed nodes in order in a computation phase, for processor p and superstep s - std::vector > > compute_steps_for_proc_superstep; + std::vector>> compute_steps_for_proc_superstep; // nodes evicted from cache in a given superstep's comm phase - std::vector > > nodes_evicted_in_comm; + std::vector>> nodes_evicted_in_comm; // nodes sent down to processor p in superstep s - std::vector > > nodes_sent_down; + std::vector>> nodes_sent_down; // nodes sent up from processor p in superstep s - std::vector > > nodes_sent_up; + std::vector>> nodes_sent_up; // set of nodes that need to have blue pebble at end, sinks by default, and // set of nodes on each processor that begin with red pebble, nothing by default // (TODO: maybe move to problem definition classes instead?) std::set needs_blue_at_end; - std::vector > has_red_in_beginning; + std::vector> has_red_in_beginning; // nodes that are from a previous part of a larger DAG, handled differently in conversion std::set external_sources; public: - - enum CACHE_EVICTION_STRATEGY - { - FORESIGHT, - LEAST_RECENTLY_USED, - LARGEST_ID - }; + enum CACHE_EVICTION_STRATEGY { FORESIGHT, LEAST_RECENTLY_USED, LARGEST_ID }; /** * @brief Default constructor for the PebblingSchedule class. */ PebblingSchedule() : instance(nullptr), number_of_supersteps(0) {} - PebblingSchedule(const BspInstance &inst) : instance(&inst) - { - BspSchedule schedule(inst, std::vector(inst.numberOfVertices(), 0), std::vector(inst.numberOfVertices(), 0)); - ConvertFromBsp(schedule); + PebblingSchedule(const BspInstance &inst) : instance(&inst) { + BspSchedule schedule( + inst, std::vector(inst.numberOfVertices(), 0), std::vector(inst.numberOfVertices(), 0)); + ConvertFromBsp(schedule); } - PebblingSchedule(const BspInstance &inst, const std::vector& processor_assignment_, - const std::vector& superstep_assignment_) : instance(&inst) - { - BspSchedule schedule(inst, processor_assignment_, superstep_assignment_); - ConvertFromBsp(schedule); + PebblingSchedule(const BspInstance &inst, + const std::vector &processor_assignment_, + const std::vector &superstep_assignment_) + : instance(&inst) { + BspSchedule schedule(inst, processor_assignment_, superstep_assignment_); + ConvertFromBsp(schedule); } PebblingSchedule(const BspInstance &inst, - const std::vector > >& compute_steps, - const std::vector > > >& nodes_evicted_after_compute, - const std::vector > >& nodes_sent_up_, - const std::vector > >& nodes_sent_down_, - const std::vector > >& nodes_evicted_in_comm_, - const std::set& needs_blue_at_end_ = std::set(), - const std::vector >& has_red_in_beginning_ = std::vector >(), - const bool need_to_load_inputs_ = false) : - instance(&inst), number_of_supersteps(0), - need_to_load_inputs (need_to_load_inputs_), - nodes_evicted_in_comm(nodes_evicted_in_comm_), nodes_sent_down(nodes_sent_down_), nodes_sent_up(nodes_sent_up_), - needs_blue_at_end(needs_blue_at_end_), has_red_in_beginning(has_red_in_beginning_) - - { - compute_steps_for_proc_superstep.resize(compute_steps.size(), std::vector >(compute_steps[0].size())); - for(unsigned proc = 0; proc < compute_steps.size(); ++proc) - { - number_of_supersteps = std::max(number_of_supersteps, static_cast(compute_steps[proc].size())); - for(unsigned supstep = 0; supstep < static_cast(compute_steps[proc].size()); ++supstep) - for(unsigned step_index = 0; step_index < static_cast(compute_steps[proc][supstep].size()); ++step_index) - compute_steps_for_proc_superstep[proc][supstep].emplace_back(compute_steps[proc][supstep][step_index], - nodes_evicted_after_compute[proc][supstep][step_index]); - } + const std::vector>> &compute_steps, + const std::vector>>> &nodes_evicted_after_compute, + const std::vector>> &nodes_sent_up_, + const std::vector>> &nodes_sent_down_, + const std::vector>> &nodes_evicted_in_comm_, + const std::set &needs_blue_at_end_ = std::set(), + const std::vector> &has_red_in_beginning_ = std::vector>(), + const bool need_to_load_inputs_ = false) + : instance(&inst), + number_of_supersteps(0), + need_to_load_inputs(need_to_load_inputs_), + nodes_evicted_in_comm(nodes_evicted_in_comm_), + nodes_sent_down(nodes_sent_down_), + nodes_sent_up(nodes_sent_up_), + needs_blue_at_end(needs_blue_at_end_), + has_red_in_beginning(has_red_in_beginning_) { + compute_steps_for_proc_superstep.resize(compute_steps.size(), + std::vector>(compute_steps[0].size())); + for (unsigned proc = 0; proc < compute_steps.size(); ++proc) { + number_of_supersteps = std::max(number_of_supersteps, static_cast(compute_steps[proc].size())); + for (unsigned supstep = 0; supstep < static_cast(compute_steps[proc].size()); ++supstep) { + for (unsigned step_index = 0; step_index < static_cast(compute_steps[proc][supstep].size()); + ++step_index) { + compute_steps_for_proc_superstep[proc][supstep].emplace_back( + compute_steps[proc][supstep][step_index], nodes_evicted_after_compute[proc][supstep][step_index]); + } + } + } } PebblingSchedule(const BspSchedule &schedule, CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID) - : instance(&schedule.getInstance()) { ConvertFromBsp(schedule, evict_rule); } + : instance(&schedule.getInstance()) { + ConvertFromBsp(schedule, evict_rule); + } virtual ~PebblingSchedule() = default; @@ -164,9 +167,10 @@ class PebblingSchedule { // convert from unconstrained schedule void ConvertFromBsp(const BspSchedule &schedule, CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID); - //auxiliary for conversion - std::vector > > computeTopOrdersDFS(const BspSchedule &schedule) const; - static bool hasValidSolution(const BspInstance &instance, const std::set& external_sources = std::set()); + // auxiliary for conversion + std::vector>> computeTopOrdersDFS(const BspSchedule &schedule) const; + static bool hasValidSolution(const BspInstance &instance, + const std::set &external_sources = std::set()); void SplitSupersteps(const BspSchedule &schedule); void SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_rule = LARGEST_ID); @@ -175,10 +179,13 @@ class PebblingSchedule { // other basic operations bool isValid() const; - static std::vector minimumMemoryRequiredPerNodeType(const BspInstance& instance, const std::set& external_sources = std::set()); + static std::vector minimumMemoryRequiredPerNodeType(const BspInstance &instance, + const std::set &external_sources + = std::set()); // expand a MemSchedule from a coarsened DAG to the original DAG - PebblingSchedule ExpandMemSchedule(const BspInstance& original_instance, const std::vector mapping_to_coarse) const; + PebblingSchedule ExpandMemSchedule(const BspInstance &original_instance, + const std::vector mapping_to_coarse) const; // convert to BSP (ignores vertical I/O and recomputation) BspSchedule ConvertToBsp() const; @@ -190,7 +197,6 @@ class PebblingSchedule { */ const BspInstance &getInstance() const { return *instance; } - /** * @brief Returns the number of supersteps in the schedule. * @@ -201,272 +207,310 @@ class PebblingSchedule { void updateNumberOfSupersteps(unsigned new_number_of_supersteps); inline bool needsToLoadInputs() const { return need_to_load_inputs; } - inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_;} - void getDataForMultiprocessorPebbling(std::vector > >& computeSteps, - std::vector > >& sendUpSteps, - std::vector > >& sendDownSteps, - std::vector > >& nodesEvictedAfterStep) const; + inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_; } + void getDataForMultiprocessorPebbling(std::vector>> &computeSteps, + std::vector>> &sendUpSteps, + std::vector>> &sendDownSteps, + std::vector>> &nodesEvictedAfterStep) const; // utility for partial ILPs - std::vector > getMemContentAtEnd() const; + std::vector> getMemContentAtEnd() const; void removeEvictStepsFromEnd(); void CreateFromPartialPebblings(const BspInstance &bsp_instance, - const std::vector >& pebblings, - const std::vector >& processors_to_parts, - const std::vector >& original_node_id, - const std::vector >& original_proc_id, - const std::vector > >& has_reds_in_beginning); + const std::vector> &pebblings, + const std::vector> &processors_to_parts, + const std::vector> &original_node_id, + const std::vector> &original_proc_id, + const std::vector>> &has_reds_in_beginning); - // auxiliary function to remove some unnecessary communications after assembling from partial pebblings - void FixForceEvicts(const std::vector > force_evict_node_proc_step); + void FixForceEvicts(const std::vector> force_evict_node_proc_step); // auxiliary after partial pebblings: try to merge supersteps void TryToMergeSupersteps(); - const std::vector& GetComputeStepsForProcSuperstep(unsigned proc, unsigned supstep) const {return compute_steps_for_proc_superstep[proc][supstep];} - const std::vector& GetNodesEvictedInComm(unsigned proc, unsigned supstep) const {return nodes_evicted_in_comm[proc][supstep];} - const std::vector& GetNodesSentDown(unsigned proc, unsigned supstep) const {return nodes_sent_down[proc][supstep];} - const std::vector& GetNodesSentUp(unsigned proc, unsigned supstep) const {return nodes_sent_up[proc][supstep];} + const std::vector &GetComputeStepsForProcSuperstep(unsigned proc, unsigned supstep) const { + return compute_steps_for_proc_superstep[proc][supstep]; + } + + const std::vector &GetNodesEvictedInComm(unsigned proc, unsigned supstep) const { + return nodes_evicted_in_comm[proc][supstep]; + } + + const std::vector &GetNodesSentDown(unsigned proc, unsigned supstep) const { + return nodes_sent_down[proc][supstep]; + } + + const std::vector &GetNodesSentUp(unsigned proc, unsigned supstep) const { return nodes_sent_up[proc][supstep]; } - void SetNeedsBlueAtEnd(const std::set& nodes_) {needs_blue_at_end = nodes_;} - void SetExternalSources(const std::set& nodes_) {external_sources = nodes_;} - void SetHasRedInBeginning(const std::vector >& nodes_) {has_red_in_beginning = nodes_;} + void SetNeedsBlueAtEnd(const std::set &nodes_) { needs_blue_at_end = nodes_; } + void SetExternalSources(const std::set &nodes_) { external_sources = nodes_; } + + void SetHasRedInBeginning(const std::vector> &nodes_) { has_red_in_beginning = nodes_; } }; -template -void PebblingSchedule::updateNumberOfSupersteps(unsigned new_number_of_supersteps) -{ +template +void PebblingSchedule::updateNumberOfSupersteps(unsigned new_number_of_supersteps) { number_of_supersteps = new_number_of_supersteps; compute_steps_for_proc_superstep.clear(); - compute_steps_for_proc_superstep.resize(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); + compute_steps_for_proc_superstep.resize(instance->numberOfProcessors(), + std::vector>(number_of_supersteps)); nodes_evicted_in_comm.clear(); - nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); + nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); nodes_sent_down.clear(); - nodes_sent_down.resize(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); + nodes_sent_down.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); nodes_sent_up.clear(); - nodes_sent_up.resize(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); + nodes_sent_up.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); } -template -v_workw_t PebblingSchedule::computeCost() const -{ +template +v_workw_t PebblingSchedule::computeCost() const { cost_type total_costs = 0; - for(unsigned step = 0; step < number_of_supersteps; ++step) - { + for (unsigned step = 0; step < number_of_supersteps; ++step) { // compute phase cost_type max_work = std::numeric_limits::min(); - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { cost_type work = 0; - for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step]) + for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { work += instance->getComputationalDag().vertex_work_weight(computeStep.node); + } - if(work > max_work) + if (work > max_work) { max_work = work; + } } total_costs += max_work; // communication phase cost_type max_send_up = std::numeric_limits::min(); - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { cost_type send_up = 0; - for(vertex_idx node : nodes_sent_up[proc][step]) - send_up += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts(); + for (vertex_idx node : nodes_sent_up[proc][step]) { + send_up += instance->getComputationalDag().vertex_comm_weight(node) + * instance->getArchitecture().communicationCosts(); + } - if(send_up > max_send_up) + if (send_up > max_send_up) { max_send_up = send_up; + } } total_costs += max_send_up; total_costs += static_cast(instance->getArchitecture().synchronisationCosts()); cost_type max_send_down = std::numeric_limits::min(); - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { cost_type send_down = 0; - for(vertex_idx node : nodes_sent_down[proc][step]) - send_down += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts(); + for (vertex_idx node : nodes_sent_down[proc][step]) { + send_down += instance->getComputationalDag().vertex_comm_weight(node) + * instance->getArchitecture().communicationCosts(); + } - if(send_down > max_send_down) + if (send_down > max_send_down) { max_send_down = send_down; + } } total_costs += max_send_down; - } return total_costs; } -template -v_workw_t PebblingSchedule::computeAsynchronousCost() const{ - +template +v_workw_t PebblingSchedule::computeAsynchronousCost() const { std::vector current_time_at_processor(instance->getArchitecture().numberOfProcessors(), 0); - std::vector time_when_node_gets_blue(instance->getComputationalDag().num_vertices(), std::numeric_limits::max()); - if(need_to_load_inputs) - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - if(instance->getComputationalDag().in_degree(node) == 0) + std::vector time_when_node_gets_blue(instance->getComputationalDag().num_vertices(), + std::numeric_limits::max()); + if (need_to_load_inputs) { + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + if (instance->getComputationalDag().in_degree(node) == 0) { time_when_node_gets_blue[node] = 0; + } + } + } - for(unsigned step = 0; step < number_of_supersteps; ++step) - { + for (unsigned step = 0; step < number_of_supersteps; ++step) { // compute phase - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step]) + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { current_time_at_processor[proc] += instance->getComputationalDag().vertex_work_weight(computeStep.node); + } + } // communication phase - send up - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - for(vertex_idx node : nodes_sent_up[proc][step]) - { - current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts(); - if(time_when_node_gets_blue[node] > current_time_at_processor[proc]) + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (vertex_idx node : nodes_sent_up[proc][step]) { + current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) + * instance->getArchitecture().communicationCosts(); + if (time_when_node_gets_blue[node] > current_time_at_processor[proc]) { time_when_node_gets_blue[node] = current_time_at_processor[proc]; + } } + } // communication phase - send down - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - for(vertex_idx node : nodes_sent_down[proc][step]) - { - if(current_time_at_processor[proc] < time_when_node_gets_blue[node]) + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (vertex_idx node : nodes_sent_down[proc][step]) { + if (current_time_at_processor[proc] < time_when_node_gets_blue[node]) { current_time_at_processor[proc] = time_when_node_gets_blue[node]; - current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts(); + } + current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) + * instance->getArchitecture().communicationCosts(); } - + } } cost_type makespan = 0; - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - if(current_time_at_processor[proc] > makespan) + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + if (current_time_at_processor[proc] > makespan) { makespan = current_time_at_processor[proc]; + } + } return makespan; } -template +template void PebblingSchedule::cleanSchedule() { - - if(!isValid()) + if (!isValid()) { return; + } // NOTE - this function removes unnecessary steps in most cases, but not all (some require e.g. multiple iterations) - std::vector > > needed(instance->numberOfVertices(), std::vector >(instance->numberOfProcessors())); - std::vector > keep_false(instance->numberOfVertices(), std::vector(instance->numberOfProcessors(), false)); - std::vector > has_red_after_cleaning(instance->numberOfVertices(), std::vector(instance->numberOfProcessors(), false)); - + std::vector>> needed(instance->numberOfVertices(), + std::vector>(instance->numberOfProcessors())); + std::vector> keep_false(instance->numberOfVertices(), + std::vector(instance->numberOfProcessors(), false)); + std::vector> has_red_after_cleaning(instance->numberOfVertices(), + std::vector(instance->numberOfProcessors(), false)); + std::vector ever_needed_as_blue(instance->numberOfVertices(), false); - if(needs_blue_at_end.empty()) - { - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - if(instance->getComputationalDag().out_degree(node) == 0) + if (needs_blue_at_end.empty()) { + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + if (instance->getComputationalDag().out_degree(node) == 0) { ever_needed_as_blue[node] = true; - } - else - { - for(vertex_idx node : needs_blue_at_end) + } + } + } else { + for (vertex_idx node : needs_blue_at_end) { ever_needed_as_blue[node] = true; + } } - for(unsigned step = 0; step < number_of_supersteps; ++step) - for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) - for(vertex_idx node : nodes_sent_down[proc][step]) + for (unsigned step = 0; step < number_of_supersteps; ++step) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (vertex_idx node : nodes_sent_down[proc][step]) { ever_needed_as_blue[node] = true; + } + } + } - if(!has_red_in_beginning.empty()) - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - for(vertex_idx node : has_red_in_beginning[proc]) + if (!has_red_in_beginning.empty()) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (vertex_idx node : has_red_in_beginning[proc]) { has_red_after_cleaning[node][proc] = true; - - for(unsigned step = 0; step < number_of_supersteps; ++step) - { + } + } + } + + for (unsigned step = 0; step < number_of_supersteps; ++step) { // compute phase - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step]) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { vertex_idx node = computeStep.node; needed[node][proc].emplace_back(false); keep_false[node][proc] = has_red_after_cleaning[node][proc]; - for(vertex_idx pred : instance->getComputationalDag().parents(node)) - { + for (vertex_idx pred : instance->getComputationalDag().parents(node)) { has_red_after_cleaning[pred][proc] = true; - if(!keep_false[pred][proc]) + if (!keep_false[pred][proc]) { needed[pred][proc].back() = true; + } } - for(vertex_idx to_evict : computeStep.nodes_evicted_after) + for (vertex_idx to_evict : computeStep.nodes_evicted_after) { has_red_after_cleaning[to_evict][proc] = false; + } } + } // send up phase - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - for(vertex_idx node : nodes_sent_up[proc][step]) - if(ever_needed_as_blue[node]) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (vertex_idx node : nodes_sent_up[proc][step]) { + if (ever_needed_as_blue[node]) { has_red_after_cleaning[node][proc] = true; - if(!keep_false[node][proc]) + if (!keep_false[node][proc]) { needed[node][proc].back() = true; + } } + } + } - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - for(vertex_idx node : nodes_evicted_in_comm[proc][step]) + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { has_red_after_cleaning[node][proc] = false; + } + } - //send down phase - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - for(vertex_idx node : nodes_sent_down[proc][step]) - { + // send down phase + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (vertex_idx node : nodes_sent_down[proc][step]) { needed[node][proc].emplace_back(false); keep_false[node][proc] = has_red_after_cleaning[node][proc]; } + } } - std::vector > > new_compute_steps_for_proc_superstep(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); - std::vector > > new_nodes_evicted_in_comm(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); - std::vector > > new_nodes_sent_down(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); - std::vector > > new_nodes_sent_up(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); - - std::vector > has_red(instance->numberOfVertices(), std::vector(instance->numberOfProcessors(), false)); - if(!has_red_in_beginning.empty()) - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - for(vertex_idx node : has_red_in_beginning[proc]) + std::vector>> new_compute_steps_for_proc_superstep( + instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + std::vector>> new_nodes_evicted_in_comm( + instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + std::vector>> new_nodes_sent_down( + instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + std::vector>> new_nodes_sent_up( + instance->numberOfProcessors(), std::vector>(number_of_supersteps)); + + std::vector> has_red(instance->numberOfVertices(), std::vector(instance->numberOfProcessors(), false)); + if (!has_red_in_beginning.empty()) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (vertex_idx node : has_red_in_beginning[proc]) { has_red[node][proc] = true; - + } + } + } + std::vector has_blue(instance->numberOfVertices()); - std::vector time_when_node_gets_blue(instance->getComputationalDag().num_vertices(), std::numeric_limits::max()); - if(need_to_load_inputs) - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - if(instance->getComputationalDag().in_degree(node) == 0) - { + std::vector time_when_node_gets_blue(instance->getComputationalDag().num_vertices(), + std::numeric_limits::max()); + if (need_to_load_inputs) { + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + if (instance->getComputationalDag().in_degree(node) == 0) { has_blue[node] = true; time_when_node_gets_blue[node] = 0; } + } + } std::vector current_time_at_processor(instance->getArchitecture().numberOfProcessors(), 0); - for(unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) - { + for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { // compute phase - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { std::vector step_remains(compute_steps_for_proc_superstep[proc][superstep].size(), false); - std::vector > new_evict_after(compute_steps_for_proc_superstep[proc][superstep].size()); - + std::vector> new_evict_after(compute_steps_for_proc_superstep[proc][superstep].size()); + unsigned new_stepIndex = 0; - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) - { + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - if(needed[node][proc].front()) - { + if (needed[node][proc].front()) { new_compute_steps_for_proc_superstep[proc][superstep].emplace_back(node, std::vector()); step_remains[stepIndex] = true; has_red[node][proc] = true; @@ -476,85 +520,88 @@ void PebblingSchedule::cleanSchedule() { needed[node][proc].pop_front(); - for(vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after) - { - if(has_red[to_evict][proc]) + for (vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after) { + if (has_red[to_evict][proc]) { new_evict_after[stepIndex].push_back(to_evict); + } has_red[to_evict][proc] = false; } } // go backwards to fix cache eviction steps std::vector to_evict; - for (size_t stepIndex = compute_steps_for_proc_superstep[proc][superstep].size() - 1; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); --stepIndex) - { - for(vertex_idx node : new_evict_after[stepIndex]) + for (size_t stepIndex = compute_steps_for_proc_superstep[proc][superstep].size() - 1; + stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); + --stepIndex) { + for (vertex_idx node : new_evict_after[stepIndex]) { to_evict.push_back(node); + } - if(step_remains[stepIndex]) - { - new_compute_steps_for_proc_superstep[proc][superstep][new_stepIndex-1].nodes_evicted_after = to_evict; + if (step_remains[stepIndex]) { + new_compute_steps_for_proc_superstep[proc][superstep][new_stepIndex - 1].nodes_evicted_after = to_evict; to_evict.clear(); --new_stepIndex; } } - if(!to_evict.empty() && superstep>=1) - for(vertex_idx node : to_evict) - { - auto itr = std::find(new_nodes_sent_down[proc][superstep-1].begin(), new_nodes_sent_down[proc][superstep-1].end(), node); - if(itr == new_nodes_sent_down[proc][superstep-1].end()) - new_nodes_evicted_in_comm[proc][superstep-1].push_back(node); - else - new_nodes_sent_down[proc][superstep-1].erase(itr); + if (!to_evict.empty() && superstep >= 1) { + for (vertex_idx node : to_evict) { + auto itr = std::find( + new_nodes_sent_down[proc][superstep - 1].begin(), new_nodes_sent_down[proc][superstep - 1].end(), node); + if (itr == new_nodes_sent_down[proc][superstep - 1].end()) { + new_nodes_evicted_in_comm[proc][superstep - 1].push_back(node); + } else { + new_nodes_sent_down[proc][superstep - 1].erase(itr); + } } + } } - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { // send up phase - for(vertex_idx node : nodes_sent_up[proc][superstep]) - { - if(!ever_needed_as_blue[node]) + for (vertex_idx node : nodes_sent_up[proc][superstep]) { + if (!ever_needed_as_blue[node]) { continue; + } - cost_type new_time_at_processor = current_time_at_processor[proc] + instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts(); + cost_type new_time_at_processor = current_time_at_processor[proc] + + instance->getComputationalDag().vertex_comm_weight(node) + * instance->getArchitecture().communicationCosts(); // only copy send up step if it is not obsolete in at least one of the two cases (sync or async schedule) - if(!has_blue[node] || new_time_at_processor < time_when_node_gets_blue[node]) - { + if (!has_blue[node] || new_time_at_processor < time_when_node_gets_blue[node]) { new_nodes_sent_up[proc][superstep].push_back(node); has_blue[node] = true; current_time_at_processor[proc] = new_time_at_processor; - if(time_when_node_gets_blue[node] > new_time_at_processor) + if (time_when_node_gets_blue[node] > new_time_at_processor) { time_when_node_gets_blue[node] = new_time_at_processor; + } } } } // comm phase evict - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - for(vertex_idx node : nodes_evicted_in_comm[proc][superstep]) - if(has_red[node][proc]) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + for (vertex_idx node : nodes_evicted_in_comm[proc][superstep]) { + if (has_red[node][proc]) { new_nodes_evicted_in_comm[proc][superstep].push_back(node); has_red[node][proc] = false; } + } + } - for(unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) - { - //send down phase - for(vertex_idx node : nodes_sent_down[proc][superstep]) - { - if(needed[node][proc].front()) - { + for (unsigned proc = 0; proc < instance->getArchitecture().numberOfProcessors(); ++proc) { + // send down phase + for (vertex_idx node : nodes_sent_down[proc][superstep]) { + if (needed[node][proc].front()) { new_nodes_sent_down[proc][superstep].push_back(node); has_red[node][proc] = true; - if(current_time_at_processor[proc] < time_when_node_gets_blue[node]) + if (current_time_at_processor[proc] < time_when_node_gets_blue[node]) { current_time_at_processor[proc] = time_when_node_gets_blue[node]; - current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) * instance->getArchitecture().communicationCosts(); + } + current_time_at_processor[proc] += instance->getComputationalDag().vertex_comm_weight(node) + * instance->getArchitecture().communicationCosts(); } needed[node][proc].pop_front(); } - } } @@ -564,15 +611,13 @@ void PebblingSchedule::cleanSchedule() { nodes_sent_up = new_nodes_sent_up; } -template -void PebblingSchedule::ConvertFromBsp(const BspSchedule &schedule, CACHE_EVICTION_STRATEGY evict_rule) -{ +template +void PebblingSchedule::ConvertFromBsp(const BspSchedule &schedule, CACHE_EVICTION_STRATEGY evict_rule) { instance = &schedule.getInstance(); // check if conversion possible at all - if(!hasValidSolution(schedule.getInstance(), external_sources)) - { - std::cout<<"Conversion failed."<::ConvertFromBsp(const BspSchedule &sched SplitSupersteps(schedule); // track memory - SetMemoryMovement(evict_rule); + SetMemoryMovement(evict_rule); } -template -bool PebblingSchedule::hasValidSolution(const BspInstance &instance, const std::set& external_sources) -{ +template +bool PebblingSchedule::hasValidSolution(const BspInstance &instance, + const std::set &external_sources) { std::vector memory_required = minimumMemoryRequiredPerNodeType(instance); std::vector has_enough_memory(instance.getComputationalDag().num_vertex_types(), true); - for(vertex_idx node = 0; node < instance.numberOfVertices(); ++node) - if(external_sources.find(node) == external_sources.end()) + for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) { + if (external_sources.find(node) == external_sources.end()) { has_enough_memory[instance.getComputationalDag().vertex_type(node)] = false; + } + } - for(v_type_t node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) - for(unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) - if(instance.isCompatibleType(node_type, instance.getArchitecture().processorType(proc)) && - instance.getArchitecture().memoryBound(proc) >= memory_required[node_type]) - { + for (v_type_t node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + if (instance.isCompatibleType(node_type, instance.getArchitecture().processorType(proc)) + && instance.getArchitecture().memoryBound(proc) >= memory_required[node_type]) { has_enough_memory[node_type] = true; break; } + } + } - for(v_type_t node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) - if(!has_enough_memory[node_type]) - { - std::cout<<"No valid solution exists. Minimum memory required for node type "< node_type = 0; node_type < instance.getComputationalDag().num_vertex_types(); ++node_type) { + if (!has_enough_memory[node_type]) { + std::cout << "No valid solution exists. Minimum memory required for node type " << node_type << " is " + << memory_required[node_type] << std::endl; return false; } + } return true; } -template -void PebblingSchedule::SplitSupersteps(const BspSchedule &schedule) -{ +template +void PebblingSchedule::SplitSupersteps(const BspSchedule &schedule) { // get DFS topological order in each superstep - std::vector > > top_orders = computeTopOrdersDFS(schedule); + std::vector>> top_orders = computeTopOrdersDFS(schedule); std::vector top_order_idx(instance->getComputationalDag().num_vertices(), 0); - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - for(unsigned step=0; stepnumberOfProcessors(); ++proc) { + for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { + for (unsigned idx = 0; idx < top_orders[proc][step].size(); ++idx) { top_order_idx[top_orders[proc][step][idx]] = idx; + } + } + } // split supersteps as needed std::vector new_superstep_ID(instance->getComputationalDag().num_vertices()); unsigned superstep_index = 0; - for(unsigned step=0; stepnumberOfProcessors(); ++proc) - { - if(top_orders[proc][step].empty()) + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (top_orders[proc][step].empty()) { continue; + } // the superstep will be split into smaller segments - std::vector > segments; + std::vector> segments; unsigned start_idx = 0; - while(start_idx < top_orders[proc][step].size()) - { + while (start_idx < top_orders[proc][step].size()) { // binary search for largest segment that still statisfies mem constraint bool doubling_phase = true; - unsigned end_lower_bound = start_idx, end_upper_bound = static_cast(top_orders[proc][step].size()-1); - while(end_lower_bound < end_upper_bound) - { + unsigned end_lower_bound = start_idx, end_upper_bound = static_cast(top_orders[proc][step].size() - 1); + while (end_lower_bound < end_upper_bound) { unsigned end_current; - - if(doubling_phase) - { - if(end_lower_bound == start_idx) + + if (doubling_phase) { + if (end_lower_bound == start_idx) { end_current = start_idx + 1; - else - end_current = std::min(start_idx + 2* (end_lower_bound - start_idx), - static_cast( top_orders[proc][step].size() ) - 1); - } - else + } else { + end_current = std::min(start_idx + 2 * (end_lower_bound - start_idx), + static_cast(top_orders[proc][step].size()) - 1); + } + } else { end_current = end_lower_bound + (end_upper_bound - end_lower_bound + 1) / 2; + } // check if this segment is valid bool valid = true; std::map neededAfter; - for(unsigned idx = start_idx; idx <= end_current; ++idx) - { + for (unsigned idx = start_idx; idx <= end_current; ++idx) { vertex_idx node = top_orders[proc][step][idx]; neededAfter[node] = false; - if(needs_blue_at_end.empty()) + if (needs_blue_at_end.empty()) { neededAfter[node] = (instance->getComputationalDag().out_degree(node) == 0); - else + } else { neededAfter[node] = (needs_blue_at_end.find(node) != needs_blue_at_end.end()); - for(vertex_idx succ : instance->getComputationalDag().children(node)) - { - if(schedule.assignedSuperstep(succ)>step) + } + for (vertex_idx succ : instance->getComputationalDag().children(node)) { + if (schedule.assignedSuperstep(succ) > step) { neededAfter[node] = true; - if(schedule.assignedSuperstep(succ) == step && top_order_idx[succ] <= end_current) + } + if (schedule.assignedSuperstep(succ) == step && top_order_idx[succ] <= end_current) { neededAfter[node] = true; + } } - } std::map lastUsedBy; std::set values_needed; - for(unsigned idx = start_idx; idx <= end_current; ++idx) - { + for (unsigned idx = start_idx; idx <= end_current; ++idx) { vertex_idx node = top_orders[proc][step][idx]; - for(vertex_idx pred : instance->getComputationalDag().parents(node)) - { - if(schedule.assignedSuperstep(pred)getComputationalDag().parents(node)) { + if (schedule.assignedSuperstep(pred) < step + || (schedule.assignedSuperstep(pred) == step && !neededAfter[pred])) { lastUsedBy[pred] = node; - if(schedule.assignedSuperstep(pred)getComputationalDag().in_degree(pred)==0) - || external_sources.find(pred) != external_sources.end() ) + } + if (schedule.assignedSuperstep(pred) < step + || (schedule.assignedSuperstep(pred) == step && top_order_idx[pred] < start_idx) + || (need_to_load_inputs && instance->getComputationalDag().in_degree(pred) == 0) + || external_sources.find(pred) != external_sources.end()) { values_needed.insert(pred); + } } } memweight_type mem_needed = 0; - for(vertex_idx node : values_needed) + for (vertex_idx node : values_needed) { mem_needed += instance->getComputationalDag().vertex_mem_weight(node); + } - - for(unsigned idx = start_idx; idx <= end_current; ++idx) - { + for (unsigned idx = start_idx; idx <= end_current; ++idx) { vertex_idx node = top_orders[proc][step][idx]; - if(need_to_load_inputs && instance->getComputationalDag().in_degree(node) == 0) + if (need_to_load_inputs && instance->getComputationalDag().in_degree(node) == 0) { continue; + } mem_needed += instance->getComputationalDag().vertex_mem_weight(node); - if(mem_needed > instance->getArchitecture().memoryBound(proc)) - { + if (mem_needed > instance->getArchitecture().memoryBound(proc)) { valid = false; break; } - for(vertex_idx pred : instance->getComputationalDag().parents(node)) - if(lastUsedBy[pred] == node) + for (vertex_idx pred : instance->getComputationalDag().parents(node)) { + if (lastUsedBy[pred] == node) { mem_needed -= instance->getComputationalDag().vertex_mem_weight(pred); + } + } } - if(valid) - { + if (valid) { end_lower_bound = end_current; - if(end_current == top_orders[proc][step].size()-1) - { + if (end_current == top_orders[proc][step].size() - 1) { doubling_phase = false; end_upper_bound = end_current; } - } - else - { + } else { doubling_phase = false; end_upper_bound = end_current - 1; } - } segments.emplace_back(start_idx, end_lower_bound); start_idx = end_lower_bound + 1; } unsigned step_idx = 0; - for(auto segment : segments) - { - for(unsigned idx = segment.first; idx <= segment.second; ++idx) + for (auto segment : segments) { + for (unsigned idx = segment.first; idx <= segment.second; ++idx) { new_superstep_ID[top_orders[proc][step][idx]] = superstep_index + step_idx; + } ++step_idx; } - if(step_idx>max_segments_in_superstep) + if (step_idx > max_segments_in_superstep) { max_segments_in_superstep = step_idx; + } } superstep_index += max_segments_in_superstep; } std::vector reindex_to_shrink(superstep_index); std::vector has_compute(superstep_index, false); - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - if(!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0) + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + if (!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0) { has_compute[new_superstep_ID[node]] = true; - + } + } + unsigned current_index = 0; - for(unsigned superstep = 0; superstep < superstep_index; ++superstep) - if(has_compute[superstep]) - { + for (unsigned superstep = 0; superstep < superstep_index; ++superstep) { + if (has_compute[superstep]) { reindex_to_shrink[superstep] = current_index; ++current_index; } + } unsigned offset = need_to_load_inputs ? 1 : 0; - updateNumberOfSupersteps(current_index+offset); - std::cout< "< " << number_of_supersteps << std::endl; // TODO: might not need offset for first step when beginning with red pebbles - for(unsigned step=0; stepnumberOfProcessors(); ++proc) - for(vertex_idx node : top_orders[proc][step]) - if(!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0) - compute_steps_for_proc_superstep[proc][reindex_to_shrink[new_superstep_ID[node]]+offset].emplace_back(node); - + for (unsigned step = 0; step < schedule.numberOfSupersteps(); ++step) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (vertex_idx node : top_orders[proc][step]) { + if (!need_to_load_inputs || instance->getComputationalDag().in_degree(node) > 0) { + compute_steps_for_proc_superstep[proc][reindex_to_shrink[new_superstep_ID[node]] + offset].emplace_back(node); + } + } + } + } } -template -void PebblingSchedule::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_rule) -{ +template +void PebblingSchedule::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_rule) { const size_t N = instance->getComputationalDag().num_vertices(); std::vector mem_used(instance->numberOfProcessors(), 0); - std::vector > in_mem(instance->numberOfProcessors()); + std::vector> in_mem(instance->numberOfProcessors()); std::vector in_slow_mem(N, false); - if(need_to_load_inputs) - for(vertex_idx node=0; nodegetComputationalDag().in_degree(node) == 0) + if (need_to_load_inputs) { + for (vertex_idx node = 0; node < N; ++node) { + if (instance->getComputationalDag().in_degree(node) == 0) { in_slow_mem[node] = true; + } + } + } + + std::vector, vertex_idx>>> evictable(instance->numberOfProcessors()); + std::vector> non_evictable(instance->numberOfProcessors()); - std::vector, vertex_idx>> > evictable(instance->numberOfProcessors()); - std::vector > non_evictable(instance->numberOfProcessors()); - // iterator to its position in "evictable" - for efficient delete - std::vector > place_in_evictable(N, - std::vector(instance->numberOfProcessors())); - for(vertex_idx node=0; nodenumberOfProcessors(); ++proc) + std::vector> place_in_evictable( + N, std::vector(instance->numberOfProcessors())); + for (vertex_idx node = 0; node < N; ++node) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { place_in_evictable[node][proc] = evictable[proc].end(); + } + } // utility for LRU eviction strategy - std::vector > node_last_used_on_proc; - if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) + std::vector> node_last_used_on_proc; + if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) { node_last_used_on_proc.resize(N, std::vector(instance->numberOfProcessors(), 0)); + } std::vector total_step_count_on_proc(instance->numberOfProcessors(), 0); // select a representative compute step for each node, in case of being computed multiple times @@ -816,135 +873,139 @@ void PebblingSchedule::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_ // its representative step, if the value in question is ever needed on another processor/superster // without being recomputed there - otherwise, it would be even hard to decide whether a solution exists) std::vector selected_processor(N); - std::vector > selected_step(N, std::make_pair(number_of_supersteps, 0)); - for(unsigned superstep=0; superstepnumberOfProcessors(); ++proc) - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) - { + std::vector> selected_step(N, std::make_pair(number_of_supersteps, 0)); + for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - if(selected_step[node].first > superstep || (selected_step[node].first == superstep && selected_step[node].second < stepIndex)) - { + if (selected_step[node].first > superstep + || (selected_step[node].first == superstep && selected_step[node].second < stepIndex)) { selected_processor[node] = proc; selected_step[node] = std::make_pair(superstep, stepIndex); } } + } + } // check if the node needs to be kept until the end of its representative superstep std::vector must_be_preserved(N, false); std::vector computed_in_current_superstep(N, false); - for(unsigned superstep=0; superstepnumberOfProcessors(); ++proc) - { - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) - { + for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; computed_in_current_superstep[node] = true; - for(vertex_idx pred : instance->getComputationalDag().parents(node)) - if(!computed_in_current_superstep[pred]) + for (vertex_idx pred : instance->getComputationalDag().parents(node)) { + if (!computed_in_current_superstep[pred]) { must_be_preserved[pred] = true; + } + } } - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { computed_in_current_superstep[compute_steps_for_proc_superstep[proc][superstep][stepIndex].node] = false; + } } - if(needs_blue_at_end.empty()) - { - for(vertex_idx node = 0; node < N; ++node) - if(instance->getComputationalDag().out_degree(node) == 0) - must_be_preserved[node] = true; } - else - { - for(vertex_idx node : needs_blue_at_end) + if (needs_blue_at_end.empty()) { + for (vertex_idx node = 0; node < N; ++node) { + if (instance->getComputationalDag().out_degree(node) == 0) { + must_be_preserved[node] = true; + } + } + } else { + for (vertex_idx node : needs_blue_at_end) { must_be_preserved[node] = true; + } } // superstep-step pairs where a node is required (on a given proc) - opening a separate queue after each time it's recomputed - std::vector > > > > node_used_at_proc_lists(N, std::vector > > >(instance->numberOfProcessors(), std::deque > >(1))); - for(unsigned superstep=0; superstepnumberOfProcessors(); ++proc) - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) - { + std::vector>>>> node_used_at_proc_lists( + N, + std::vector>>>( + instance->numberOfProcessors(), std::deque>>(1))); + for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - for(vertex_idx pred : instance->getComputationalDag().parents(node)) + for (vertex_idx pred : instance->getComputationalDag().parents(node)) { node_used_at_proc_lists[pred][proc].back().emplace_back(superstep, stepIndex); - + } + node_used_at_proc_lists[node][proc].emplace_back(); } + } + } // set up initial content of fast memories - if(!has_red_in_beginning.empty()) - { - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - { + if (!has_red_in_beginning.empty()) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { in_mem = has_red_in_beginning; - for(vertex_idx node : in_mem[proc]) - { + for (vertex_idx node : in_mem[proc]) { mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); std::pair prio; - if(evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) + if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) { prio = node_used_at_proc_lists[node][proc].front().front(); - else if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) + } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) { prio = std::make_pair(UINT_MAX - node_last_used_on_proc[node][proc], static_cast(node)); - else if(evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) + } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) { prio = std::make_pair(static_cast(node), 0); + } place_in_evictable[node][proc] = evictable[proc].emplace(prio, node).first; } } } - + // iterate through schedule - for(unsigned superstep=0; superstepnumberOfProcessors(); ++proc) - { - if(compute_steps_for_proc_superstep[proc][superstep].empty()) + for (unsigned superstep = 0; superstep < number_of_supersteps; ++superstep) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (compute_steps_for_proc_superstep[proc][superstep].empty()) { continue; + } // before compute phase, evict data in comm phase of previous superstep std::set new_values_needed; - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) - { + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; computed_in_current_superstep[node] = true; - for(vertex_idx pred : instance->getComputationalDag().parents(node)) - if(!computed_in_current_superstep[pred]) - { + for (vertex_idx pred : instance->getComputationalDag().parents(node)) { + if (!computed_in_current_superstep[pred]) { non_evictable[proc].insert(pred); - if(place_in_evictable[pred][proc] != evictable[proc].end()) - { + if (place_in_evictable[pred][proc] != evictable[proc].end()) { evictable[proc].erase(place_in_evictable[pred][proc]); place_in_evictable[pred][proc] = evictable[proc].end(); } - if(in_mem[proc].find(pred) == in_mem[proc].end()) + if (in_mem[proc].find(pred) == in_mem[proc].end()) { new_values_needed.insert(pred); + } } + } } - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { computed_in_current_superstep[compute_steps_for_proc_superstep[proc][superstep][stepIndex].node] = false; - - for(vertex_idx node : new_values_needed) - { + } + + for (vertex_idx node : new_values_needed) { in_mem[proc].insert(node); mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); - nodes_sent_down[proc][superstep-1].push_back(node); - if(!in_slow_mem[node]) - { + nodes_sent_down[proc][superstep - 1].push_back(node); + if (!in_slow_mem[node]) { in_slow_mem[node] = true; nodes_sent_up[selected_processor[node]][selected_step[node].first].push_back(node); } } - memweight_type first_node_weight = instance->getComputationalDag().vertex_mem_weight(compute_steps_for_proc_superstep[proc][superstep][0].node); + memweight_type first_node_weight + = instance->getComputationalDag().vertex_mem_weight(compute_steps_for_proc_superstep[proc][superstep][0].node); - while(mem_used[proc] + first_node_weight > instance->getArchitecture().memoryBound(proc)) // no sliding pebbles for now + while (mem_used[proc] + first_node_weight + > instance->getArchitecture().memoryBound(proc)) // no sliding pebbles for now { - if(evictable[proc].empty()) - { - std::cout<<"ERROR: Cannot create valid memory movement for these superstep lists."<second; @@ -954,26 +1015,22 @@ void PebblingSchedule::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_ mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(evicted); in_mem[proc].erase(evicted); - nodes_evicted_in_comm[proc][superstep-1].push_back(evicted); + nodes_evicted_in_comm[proc][superstep - 1].push_back(evicted); } // indicates if the node will be needed after (and thus cannot be deleted during) this compute phase - std::map needed_after; + std::map needed_after; // during compute phase - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) - { + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; memweight_type node_weight = instance->getComputationalDag().vertex_mem_weight(node); - if(stepIndex > 0) - { - //evict nodes to make space - while(mem_used[proc] + node_weight > instance->getArchitecture().memoryBound(proc)) - { - if(evictable[proc].empty()) - { - std::cout<<"ERROR: Cannot create valid memory movement for these superstep lists."< 0) { + // evict nodes to make space + while (mem_used[proc] + node_weight > instance->getArchitecture().memoryBound(proc)) { + if (evictable[proc].empty()) { + std::cout << "ERROR: Cannot create valid memory movement for these superstep lists." << std::endl; return; } vertex_idx evicted = (--evictable[proc].end())->second; @@ -981,9 +1038,9 @@ void PebblingSchedule::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_ place_in_evictable[evicted][proc] = evictable[proc].end(); mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(evicted); - in_mem[proc].erase(evicted); + in_mem[proc].erase(evicted); - compute_steps_for_proc_superstep[proc][superstep][stepIndex-1].nodes_evicted_after.push_back(evicted); + compute_steps_for_proc_superstep[proc][superstep][stepIndex - 1].nodes_evicted_after.push_back(evicted); } } @@ -992,83 +1049,79 @@ void PebblingSchedule::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_ non_evictable[proc].insert(node); - if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) // update usage times for LRU strategy + if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) // update usage times for LRU strategy { ++total_step_count_on_proc[proc]; node_last_used_on_proc[node][proc] = total_step_count_on_proc[proc]; - for(vertex_idx pred : instance->getComputationalDag().parents(node)) + for (vertex_idx pred : instance->getComputationalDag().parents(node)) { node_last_used_on_proc[pred][proc] = total_step_count_on_proc[proc]; + } } - if(selected_processor[node] == proc && selected_step[node] == std::make_pair(superstep, stepIndex) && must_be_preserved[node]) + if (selected_processor[node] == proc && selected_step[node] == std::make_pair(superstep, stepIndex) + && must_be_preserved[node]) { needed_after[node] = true; - else + } else { needed_after[node] = false; + } node_used_at_proc_lists[node][proc].pop_front(); - - for(vertex_idx pred : instance->getComputationalDag().parents(node)) - { + + for (vertex_idx pred : instance->getComputationalDag().parents(node)) { node_used_at_proc_lists[pred][proc].front().pop_front(); - if(needed_after[pred]) + if (needed_after[pred]) { continue; + } // autoevict - if(node_used_at_proc_lists[pred][proc].front().empty()) - { + if (node_used_at_proc_lists[pred][proc].front().empty()) { in_mem[proc].erase(pred); non_evictable[proc].erase(pred); mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(pred); - compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after.push_back(pred); - } - else if(node_used_at_proc_lists[pred][proc].front().front().first > superstep) - { + compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after.push_back(pred); + } else if (node_used_at_proc_lists[pred][proc].front().front().first > superstep) { non_evictable[proc].erase(pred); std::pair prio; - if(evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) + if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) { prio = node_used_at_proc_lists[pred][proc].front().front(); - else if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) + } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) { prio = std::make_pair(UINT_MAX - node_last_used_on_proc[pred][proc], static_cast(pred)); - else if(evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) + } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) { prio = std::make_pair(static_cast(pred), 0); + } place_in_evictable[pred][proc] = evictable[proc].emplace(prio, pred).first; } } - } // after compute phase - for(vertex_idx node : non_evictable[proc]) - { - if(node_used_at_proc_lists[node][proc].front().empty()) - { + for (vertex_idx node : non_evictable[proc]) { + if (node_used_at_proc_lists[node][proc].front().empty()) { mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node); in_mem[proc].erase(node); nodes_evicted_in_comm[proc][superstep].push_back(node); - if((instance->getComputationalDag().out_degree(node) == 0 || needs_blue_at_end.find(node) != needs_blue_at_end.end()) - && !in_slow_mem[node]) - { + if ((instance->getComputationalDag().out_degree(node) == 0 + || needs_blue_at_end.find(node) != needs_blue_at_end.end()) + && !in_slow_mem[node]) { in_slow_mem[node] = true; nodes_sent_up[proc][superstep].push_back(node); } - } - else - { + } else { std::pair prio; - if(evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) + if (evict_rule == CACHE_EVICTION_STRATEGY::FORESIGHT) { prio = node_used_at_proc_lists[node][proc].front().front(); - else if(evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) + } else if (evict_rule == CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED) { prio = std::make_pair(UINT_MAX - node_last_used_on_proc[node][proc], static_cast(node)); - else if(evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) + } else if (evict_rule == CACHE_EVICTION_STRATEGY::LARGEST_ID) { prio = std::make_pair(static_cast(node), 0); + } place_in_evictable[node][proc] = evictable[proc].emplace(prio, node).first; - if(needs_blue_at_end.find(node) != needs_blue_at_end.end() && !in_slow_mem[node]) - { + if (needs_blue_at_end.find(node) != needs_blue_at_end.end() && !in_slow_mem[node]) { in_slow_mem[node] = true; nodes_sent_up[proc][superstep].push_back(node); } @@ -1077,94 +1130,95 @@ void PebblingSchedule::SetMemoryMovement(CACHE_EVICTION_STRATEGY evict_ non_evictable[proc].clear(); } } - } -template -void PebblingSchedule::ResetToForesight() -{ +template +void PebblingSchedule::ResetToForesight() { nodes_evicted_in_comm.clear(); - nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); + nodes_evicted_in_comm.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); nodes_sent_down.clear(); - nodes_sent_down.resize(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); + nodes_sent_down.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); nodes_sent_up.clear(); - nodes_sent_up.resize(instance->numberOfProcessors(), std::vector >(number_of_supersteps)); + nodes_sent_up.resize(instance->numberOfProcessors(), std::vector>(number_of_supersteps)); SetMemoryMovement(CACHE_EVICTION_STRATEGY::FORESIGHT); } -template -bool PebblingSchedule::isValid() const -{ +template +bool PebblingSchedule::isValid() const { std::vector mem_used(instance->numberOfProcessors(), 0); - std::vector > in_fast_mem(instance->getComputationalDag().num_vertices(), - std::vector(instance->numberOfProcessors(), false)); + std::vector> in_fast_mem(instance->getComputationalDag().num_vertices(), + std::vector(instance->numberOfProcessors(), false)); std::vector in_slow_mem(instance->getComputationalDag().num_vertices(), false); - if(need_to_load_inputs) - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - if(instance->getComputationalDag().in_degree(node) == 0) + if (need_to_load_inputs) { + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + if (instance->getComputationalDag().in_degree(node) == 0) { in_slow_mem[node] = true; - - if(!has_red_in_beginning.empty()) - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - for(vertex_idx node : has_red_in_beginning[proc]) - { + } + } + } + + if (!has_red_in_beginning.empty()) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (vertex_idx node : has_red_in_beginning[proc]) { mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); in_fast_mem[node][proc] = true; } + } + } - for(unsigned step=0; stepnumberOfProcessors(); ++proc) - { + for (unsigned step = 0; step < number_of_supersteps; ++step) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { // computation phase - for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step]) - { - if(!instance->isCompatible(computeStep.node, proc)) + for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { + if (!instance->isCompatible(computeStep.node, proc)) { return false; + } - for(vertex_idx pred : instance->getComputationalDag().parents(computeStep.node)) - if(!in_fast_mem[pred][proc]) + for (vertex_idx pred : instance->getComputationalDag().parents(computeStep.node)) { + if (!in_fast_mem[pred][proc]) { return false; + } + } - if(need_to_load_inputs && instance->getComputationalDag().in_degree(computeStep.node) == 0) + if (need_to_load_inputs && instance->getComputationalDag().in_degree(computeStep.node) == 0) { return false; - - if(!in_fast_mem[computeStep.node][proc]) - { + } + + if (!in_fast_mem[computeStep.node][proc]) { in_fast_mem[computeStep.node][proc] = true; mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(computeStep.node); } - if(mem_used[proc] > instance->getArchitecture().memoryBound(proc)) + if (mem_used[proc] > instance->getArchitecture().memoryBound(proc)) { return false; + } - for(vertex_idx to_remove : computeStep.nodes_evicted_after) - { - if(!in_fast_mem[to_remove][proc]) + for (vertex_idx to_remove : computeStep.nodes_evicted_after) { + if (!in_fast_mem[to_remove][proc]) { return false; + } in_fast_mem[to_remove][proc] = false; mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_remove); - } } - //communication phase - sendup and eviction - for(vertex_idx node : nodes_sent_up[proc][step]) - { - if(!in_fast_mem[node][proc]) + // communication phase - sendup and eviction + for (vertex_idx node : nodes_sent_up[proc][step]) { + if (!in_fast_mem[node][proc]) { return false; - + } + in_slow_mem[node] = true; } - for(vertex_idx node : nodes_evicted_in_comm[proc][step]) - { - if(!in_fast_mem[node][proc]) + for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { + if (!in_fast_mem[node][proc]) { return false; + } in_fast_mem[node][proc] = false; mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node); @@ -1172,115 +1226,120 @@ bool PebblingSchedule::isValid() const } // communication phase - senddown - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - { - for(vertex_idx node : nodes_sent_down[proc][step]) - { - if(!in_slow_mem[node]) + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (vertex_idx node : nodes_sent_down[proc][step]) { + if (!in_slow_mem[node]) { return false; + } - if(!in_fast_mem[node][proc]) - { + if (!in_fast_mem[node][proc]) { in_fast_mem[node][proc] = true; mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); } } } - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - if(mem_used[proc] > instance->getArchitecture().memoryBound(proc)) + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (mem_used[proc] > instance->getArchitecture().memoryBound(proc)) { return false; + } + } } - if(needs_blue_at_end.empty()) - { - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - if(instance->getComputationalDag().out_degree(node) == 0 && !in_slow_mem[node]) + if (needs_blue_at_end.empty()) { + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + if (instance->getComputationalDag().out_degree(node) == 0 && !in_slow_mem[node]) { return false; - } - else - { - for(vertex_idx node : needs_blue_at_end) - if(!in_slow_mem[node]) + } + } + } else { + for (vertex_idx node : needs_blue_at_end) { + if (!in_slow_mem[node]) { return false; + } + } } return true; } -template -std::vector > PebblingSchedule::minimumMemoryRequiredPerNodeType(const BspInstance& instance, const std::set& external_sources) -{ - std::vector > max_needed(instance.getComputationalDag().num_vertex_types(), 0); - for(vertex_idx_t node=0; node +std::vector> PebblingSchedule::minimumMemoryRequiredPerNodeType( + const BspInstance &instance, const std::set &external_sources) { + std::vector> max_needed(instance.getComputationalDag().num_vertex_types(), 0); + for (vertex_idx_t node = 0; node < instance.getComputationalDag().num_vertices(); ++node) { + if (external_sources.find(node) != external_sources.end()) { continue; + } v_memw_t needed = instance.getComputationalDag().vertex_mem_weight(node); const v_type_t type = instance.getComputationalDag().vertex_type(node); - for(vertex_idx_t pred : instance.getComputationalDag().parents(node)) + for (vertex_idx_t pred : instance.getComputationalDag().parents(node)) { needed += instance.getComputationalDag().vertex_mem_weight(pred); - - if(needed>max_needed[type]) - max_needed[type]=needed; + } + + if (needed > max_needed[type]) { + max_needed[type] = needed; + } } return max_needed; } -template -std::vector > > > PebblingSchedule::computeTopOrdersDFS(const BspSchedule &schedule) const -{ +template +std::vector>>> PebblingSchedule::computeTopOrdersDFS( + const BspSchedule &schedule) const { size_t n = schedule.getInstance().getComputationalDag().num_vertices(); unsigned num_procs = schedule.getInstance().numberOfProcessors(); unsigned num_supsteps = schedule.numberOfSupersteps(); - std::vector > > top_orders(num_procs, std::vector >(num_supsteps)); + std::vector>> top_orders(num_procs, std::vector>(num_supsteps)); - std::vector > > Q(num_procs, std::vector >(num_supsteps)); - std::vector > > nodesUpdated(num_procs, std::vector >(num_supsteps)); + std::vector>> Q(num_procs, std::vector>(num_supsteps)); + std::vector>> nodesUpdated(num_procs, std::vector>(num_supsteps)); std::vector nr_pred(n); std::vector pred_done(n, 0); - for(vertex_idx node=0; node -void PebblingSchedule::getDataForMultiprocessorPebbling(std::vector > >& computeSteps, - std::vector > >& sendUpSteps, - std::vector > >& sendDownSteps, - std::vector > >& nodesEvictedAfterStep) const -{ +template +void PebblingSchedule::getDataForMultiprocessorPebbling( + std::vector>> &computeSteps, + std::vector>> &sendUpSteps, + std::vector>> &sendDownSteps, + std::vector>> &nodesEvictedAfterStep) const { computeSteps.clear(); computeSteps.resize(instance->numberOfProcessors()); sendUpSteps.clear(); @@ -1291,48 +1350,49 @@ void PebblingSchedule::getDataForMultiprocessorPebbling(std::vectornumberOfProcessors()); std::vector mem_used(instance->numberOfProcessors(), 0); - std::vector > in_mem(instance->numberOfProcessors()); - if(!has_red_in_beginning.empty()) - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - for(vertex_idx node : has_red_in_beginning[proc]) - { + std::vector> in_mem(instance->numberOfProcessors()); + if (!has_red_in_beginning.empty()) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (vertex_idx node : has_red_in_beginning[proc]) { in_mem[proc].insert(node); mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); } + } + } unsigned step = 0; - for(unsigned superstep=0; superstep step_on_proc(instance->numberOfProcessors(), step); bool any_compute = false; - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - if(!compute_steps_for_proc_superstep[proc][superstep].empty()) + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (!compute_steps_for_proc_superstep[proc][superstep].empty()) { any_compute = true; - - if(any_compute) - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - { + } + } + + if (any_compute) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(); sendDownSteps[proc].emplace_back(); nodesEvictedAfterStep[proc].emplace_back(); } + } - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { std::vector evict_list; - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) - { + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][superstep].size(); ++stepIndex) { vertex_idx node = compute_steps_for_proc_superstep[proc][superstep][stepIndex].node; - if(mem_used[proc] + instance->getComputationalDag().vertex_mem_weight(node) > instance->getArchitecture().memoryBound(proc)) - { - //open new step + if (mem_used[proc] + instance->getComputationalDag().vertex_mem_weight(node) + > instance->getArchitecture().memoryBound(proc)) { + // open new step nodesEvictedAfterStep[proc][step_on_proc[proc]] = evict_list; ++step_on_proc[proc]; - for(vertex_idx to_evict : evict_list) + for (vertex_idx to_evict : evict_list) { mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict); - + } + evict_list.clear(); computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(); @@ -1342,193 +1402,192 @@ void PebblingSchedule::getDataForMultiprocessorPebbling(std::vectorgetComputationalDag().vertex_mem_weight(node); - for(vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after) + for (vertex_idx to_evict : compute_steps_for_proc_superstep[proc][superstep][stepIndex].nodes_evicted_after) { evict_list.emplace_back(to_evict); - + } } - if(!evict_list.empty()) - { + if (!evict_list.empty()) { nodesEvictedAfterStep[proc][step_on_proc[proc]] = evict_list; - for(vertex_idx to_evict : evict_list) + for (vertex_idx to_evict : evict_list) { mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict); + } } - } - if(any_compute) - for(unsigned proc=0; procnumberOfProcessors(); ++proc) + if (any_compute) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { ++step_on_proc[proc]; + } + } - for(unsigned proc=0; procnumberOfProcessors(); ++proc) + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { step = std::max(step, step_on_proc[proc]); - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - for(; step_on_proc[proc]numberOfProcessors(); ++proc) { + for (; step_on_proc[proc] < step; ++step_on_proc[proc]) { computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(); sendDownSteps[proc].emplace_back(); nodesEvictedAfterStep[proc].emplace_back(); } - + } + bool any_send_up = false; - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - if(!nodes_sent_up[proc][superstep].empty() || !nodes_evicted_in_comm[proc][superstep].empty()) + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (!nodes_sent_up[proc][superstep].empty() || !nodes_evicted_in_comm[proc][superstep].empty()) { any_send_up = true; - - if(any_send_up) - { - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - { + } + } + + if (any_send_up) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(nodes_sent_up[proc][superstep]); sendDownSteps[proc].emplace_back(); nodesEvictedAfterStep[proc].emplace_back(nodes_evicted_in_comm[proc][superstep]); - for(vertex_idx to_evict : nodes_evicted_in_comm[proc][superstep]) + for (vertex_idx to_evict : nodes_evicted_in_comm[proc][superstep]) { mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(to_evict); + } ++step_on_proc[proc]; } ++step; } bool any_send_down = false; - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - if(!nodes_sent_down[proc][superstep].empty()) + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + if (!nodes_sent_down[proc][superstep].empty()) { any_send_down = true; + } + } - if(any_send_down) - { - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - { + if (any_send_down) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { computeSteps[proc].emplace_back(); sendUpSteps[proc].emplace_back(); sendDownSteps[proc].emplace_back(nodes_sent_down[proc][superstep]); - for(vertex_idx send_down : nodes_sent_down[proc][superstep]) + for (vertex_idx send_down : nodes_sent_down[proc][superstep]) { mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(send_down); + } nodesEvictedAfterStep[proc].emplace_back(); ++step_on_proc[proc]; } ++step; } - } } -template -std::vector > > PebblingSchedule::getMemContentAtEnd() const -{ - std::vector > mem_content(instance->numberOfProcessors()); - if(!has_red_in_beginning.empty()) +template +std::vector>> PebblingSchedule::getMemContentAtEnd() const { + std::vector> mem_content(instance->numberOfProcessors()); + if (!has_red_in_beginning.empty()) { mem_content = has_red_in_beginning; + } - for(unsigned step=0; stepnumberOfProcessors(); ++proc) - { + for (unsigned step = 0; step < number_of_supersteps; ++step) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { // computation phase - for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step]) - { + for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { mem_content[proc].insert(computeStep.node); - for(vertex_idx to_remove : computeStep.nodes_evicted_after) + for (vertex_idx to_remove : computeStep.nodes_evicted_after) { mem_content[proc].erase(to_remove); + } } - //communication phase - eviction - for(vertex_idx node : nodes_evicted_in_comm[proc][step]) + // communication phase - eviction + for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { mem_content[proc].erase(node); + } // communication phase - senddown - for(vertex_idx node : nodes_sent_down[proc][step]) + for (vertex_idx node : nodes_sent_down[proc][step]) { mem_content[proc].insert(node); + } } + } return mem_content; } -template -void PebblingSchedule::removeEvictStepsFromEnd() -{ +template +void PebblingSchedule::removeEvictStepsFromEnd() { std::vector mem_used(instance->numberOfProcessors(), 0); std::vector bottleneck(instance->numberOfProcessors(), 0); - std::vector > fast_mem_end = getMemContentAtEnd(); - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - { - for(vertex_idx node : fast_mem_end[proc]) + std::vector> fast_mem_end = getMemContentAtEnd(); + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (vertex_idx node : fast_mem_end[proc]) { mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); + } bottleneck[proc] = instance->getArchitecture().memoryBound(proc) - mem_used[proc]; } - for(unsigned step=number_of_supersteps; step>0;) - { + for (unsigned step = number_of_supersteps; step > 0;) { --step; - for(unsigned proc=0; procnumberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { // communication phase - senddown - for(vertex_idx node : nodes_sent_down[proc][step]) + for (vertex_idx node : nodes_sent_down[proc][step]) { mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(node); + } - //communication phase - eviction + // communication phase - eviction std::vector remaining; - for(vertex_idx node : nodes_evicted_in_comm[proc][step]) - { + for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(node); - if(instance->getComputationalDag().vertex_mem_weight(node) <= bottleneck[proc] - && fast_mem_end[proc].find(node) == fast_mem_end[proc].end()) - { + if (instance->getComputationalDag().vertex_mem_weight(node) <= bottleneck[proc] + && fast_mem_end[proc].find(node) == fast_mem_end[proc].end()) { fast_mem_end[proc].insert(node); bottleneck[proc] -= instance->getComputationalDag().vertex_mem_weight(node); - } - else + } else { remaining.push_back(node); + } } nodes_evicted_in_comm[proc][step] = remaining; bottleneck[proc] = std::min(bottleneck[proc], instance->getArchitecture().memoryBound(proc) - mem_used[proc]); // computation phase - for(unsigned stepIndex = static_cast(compute_steps_for_proc_superstep[proc][step].size()); stepIndex > 0;) - { + for (unsigned stepIndex = static_cast(compute_steps_for_proc_superstep[proc][step].size()); stepIndex > 0;) { --stepIndex; auto &computeStep = compute_steps_for_proc_superstep[proc][step][stepIndex]; std::vector remaining_2; - for(vertex_idx to_remove : computeStep.nodes_evicted_after) - { + for (vertex_idx to_remove : computeStep.nodes_evicted_after) { mem_used[proc] += instance->getComputationalDag().vertex_mem_weight(to_remove); - if( instance->getComputationalDag().vertex_mem_weight(to_remove) <= bottleneck[proc] - && fast_mem_end[proc].find(to_remove) == fast_mem_end[proc].end()) - { + if (instance->getComputationalDag().vertex_mem_weight(to_remove) <= bottleneck[proc] + && fast_mem_end[proc].find(to_remove) == fast_mem_end[proc].end()) { fast_mem_end[proc].insert(to_remove); bottleneck[proc] -= instance->getComputationalDag().vertex_mem_weight(to_remove); - } - else + } else { remaining_2.push_back(to_remove); + } } computeStep.nodes_evicted_after = remaining_2; bottleneck[proc] = std::min(bottleneck[proc], instance->getArchitecture().memoryBound(proc) - mem_used[proc]); - + mem_used[proc] -= instance->getComputationalDag().vertex_mem_weight(computeStep.node); } } } - if(!isValid()) - std::cout<<"ERROR: eviction removal process created an invalid schedule."< -void PebblingSchedule::CreateFromPartialPebblings(const BspInstance &bsp_instance, - const std::vector >& pebblings, - const std::vector >& processors_to_parts, - const std::vector >& original_node_id, - const std::vector >& original_proc_id, - const std::vector > >& has_reds_in_beginning) -{ +template +void PebblingSchedule::CreateFromPartialPebblings( + const BspInstance &bsp_instance, + const std::vector> &pebblings, + const std::vector> &processors_to_parts, + const std::vector> &original_node_id, + const std::vector> &original_proc_id, + const std::vector>> &has_reds_in_beginning) { instance = &bsp_instance; unsigned nr_parts = static_cast(processors_to_parts.size()); - std::vector > in_mem(instance->numberOfProcessors()); - std::vector > force_evicts; + std::vector> in_mem(instance->numberOfProcessors()); + std::vector> force_evicts; compute_steps_for_proc_superstep.clear(); nodes_sent_up.clear(); @@ -1542,50 +1601,54 @@ void PebblingSchedule::CreateFromPartialPebblings(const BspInstance supstep_idx(instance->numberOfProcessors(), 0); std::vector gets_blue_in_superstep(instance->numberOfVertices(), UINT_MAX); - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - if(instance->getComputationalDag().in_degree(node) == 0) + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + if (instance->getComputationalDag().in_degree(node) == 0) { gets_blue_in_superstep[node] = 0; + } + } - for(unsigned part = 0; part < nr_parts; ++part) - { + for (unsigned part = 0; part < nr_parts; ++part) { unsigned starting_step_index = 0; // find dependencies on previous subschedules - for(vertex_idx node = 0; node < pebblings[part].instance->numberOfVertices(); ++node) - if(pebblings[part].instance->getComputationalDag().in_degree(node) == 0) + for (vertex_idx node = 0; node < pebblings[part].instance->numberOfVertices(); ++node) { + if (pebblings[part].instance->getComputationalDag().in_degree(node) == 0) { starting_step_index = std::max(starting_step_index, gets_blue_in_superstep[original_node_id[part].at(node)]); + } + } // sync starting points for the subset of processors - for(unsigned proc : processors_to_parts[part]) + for (unsigned proc : processors_to_parts[part]) { starting_step_index = std::max(starting_step_index, supstep_idx[proc]); - for(unsigned proc : processors_to_parts[part]) - while(supstep_idx[proc] < starting_step_index) - { + } + for (unsigned proc : processors_to_parts[part]) { + while (supstep_idx[proc] < starting_step_index) { compute_steps_for_proc_superstep[proc].emplace_back(); nodes_sent_up[proc].emplace_back(); nodes_sent_down[proc].emplace_back(); nodes_evicted_in_comm[proc].emplace_back(); ++supstep_idx[proc]; } - + } + // check and update according to initial states of red pebbles - for(unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) - { + for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) { unsigned proc_id = original_proc_id[part].at(proc); std::set needed_in_red, add_before, remove_before; - for(vertex_idx node : has_reds_in_beginning[part][proc]) - { + for (vertex_idx node : has_reds_in_beginning[part][proc]) { vertex_idx node_id = original_node_id[part].at(node); needed_in_red.insert(node_id); - if(in_mem[proc_id].find(node_id) == in_mem[proc_id].end()) + if (in_mem[proc_id].find(node_id) == in_mem[proc_id].end()) { add_before.insert(node_id); + } } - for(vertex_idx node : in_mem[proc_id]) - if(needed_in_red.find(node) == needed_in_red.end()) + for (vertex_idx node : in_mem[proc_id]) { + if (needed_in_red.find(node) == needed_in_red.end()) { remove_before.insert(node); + } + } - if((!add_before.empty() || !remove_before.empty()) && supstep_idx[proc_id] == 0) - { + if ((!add_before.empty() || !remove_before.empty()) && supstep_idx[proc_id] == 0) { // this code is added just in case - this shouldn't happen in normal schedules compute_steps_for_proc_superstep[proc_id].emplace_back(); nodes_sent_up[proc_id].emplace_back(); @@ -1594,22 +1657,19 @@ void PebblingSchedule::CreateFromPartialPebblings(const BspInstance::CreateFromPartialPebblings(const BspInstancenumberOfProcessors(); ++proc) + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { max_step_index = std::max(max_step_index, supstep_idx[proc]); - for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) - while(supstep_idx[proc] < max_step_index) - { + } + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + while (supstep_idx[proc] < max_step_index) { compute_steps_for_proc_superstep[proc].emplace_back(); nodes_sent_up[proc].emplace_back(); nodes_sent_down[proc].emplace_back(); nodes_evicted_in_comm[proc].emplace_back(); ++supstep_idx[proc]; } + } number_of_supersteps = max_step_index; need_to_load_inputs = true; @@ -1670,12 +1728,11 @@ void PebblingSchedule::CreateFromPartialPebblings(const BspInstance -void PebblingSchedule::FixForceEvicts(const std::vector > force_evict_node_proc_step) -{ +template +void PebblingSchedule::FixForceEvicts( + const std::vector> force_evict_node_proc_step) { // Some values were evicted only because they weren't present in the next part - see if we can undo those evictions - for(auto force_evict : force_evict_node_proc_step) - { + for (auto force_evict : force_evict_node_proc_step) { vertex_idx node = std::get<0>(force_evict); unsigned proc = std::get<1>(force_evict); unsigned superstep = std::get<2>(force_evict); @@ -1684,79 +1741,82 @@ void PebblingSchedule::FixForceEvicts(const std::vector where; - for(unsigned find_supstep = superstep + 1; find_supstep < numberOfSupersteps(); ++find_supstep) - { - for(unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][find_supstep].size(); ++stepIndex) - if(compute_steps_for_proc_superstep[proc][find_supstep][stepIndex].node == node) - { + for (unsigned find_supstep = superstep + 1; find_supstep < numberOfSupersteps(); ++find_supstep) { + for (unsigned stepIndex = 0; stepIndex < compute_steps_for_proc_superstep[proc][find_supstep].size(); ++stepIndex) { + if (compute_steps_for_proc_superstep[proc][find_supstep][stepIndex].node == node) { next_in_comp = true; where = std::make_pair(find_supstep, stepIndex); break; } - if(next_in_comp) + } + if (next_in_comp) { break; - for(vertex_idx send_down : nodes_sent_down[proc][find_supstep]) - if(send_down == node) - { + } + for (vertex_idx send_down : nodes_sent_down[proc][find_supstep]) { + if (send_down == node) { next_in_comm = true; where = std::make_pair(find_supstep, 0); break; } - if(next_in_comm) + } + if (next_in_comm) { break; + } } // check new schedule for validity - if(!next_in_comp && !next_in_comm) + if (!next_in_comp && !next_in_comm) { continue; - + } + PebblingSchedule test_schedule = *this; - for(auto itr = test_schedule.nodes_evicted_in_comm[proc][superstep].begin(); itr != test_schedule.nodes_evicted_in_comm[proc][superstep].end(); ++itr) - if(*itr == node) - { + for (auto itr = test_schedule.nodes_evicted_in_comm[proc][superstep].begin(); + itr != test_schedule.nodes_evicted_in_comm[proc][superstep].end(); + ++itr) { + if (*itr == node) { test_schedule.nodes_evicted_in_comm[proc][superstep].erase(itr); break; } + } - if(next_in_comp) - { - for(auto itr = test_schedule.compute_steps_for_proc_superstep[proc][where.first].begin(); itr != test_schedule.compute_steps_for_proc_superstep[proc][where.first].end(); ++itr) - if(itr->node == node) - { - if(where.second > 0) - { + if (next_in_comp) { + for (auto itr = test_schedule.compute_steps_for_proc_superstep[proc][where.first].begin(); + itr != test_schedule.compute_steps_for_proc_superstep[proc][where.first].end(); + ++itr) { + if (itr->node == node) { + if (where.second > 0) { auto previous_step = itr; --previous_step; - for(vertex_idx to_evict : itr->nodes_evicted_after) + for (vertex_idx to_evict : itr->nodes_evicted_after) { previous_step->nodes_evicted_after.push_back(to_evict); - } - else - { - for(vertex_idx to_evict : itr->nodes_evicted_after) - test_schedule.nodes_evicted_in_comm[proc][where.first-1].push_back(to_evict); + } + } else { + for (vertex_idx to_evict : itr->nodes_evicted_after) { + test_schedule.nodes_evicted_in_comm[proc][where.first - 1].push_back(to_evict); + } } test_schedule.compute_steps_for_proc_superstep[proc][where.first].erase(itr); break; } + } - if(test_schedule.isValid()) - { + if (test_schedule.isValid()) { nodes_evicted_in_comm[proc][superstep] = test_schedule.nodes_evicted_in_comm[proc][superstep]; - compute_steps_for_proc_superstep[proc][where.first] = test_schedule.compute_steps_for_proc_superstep[proc][where.first]; - nodes_evicted_in_comm[proc][where.first-1] = test_schedule.nodes_evicted_in_comm[proc][where.first-1]; + compute_steps_for_proc_superstep[proc][where.first] + = test_schedule.compute_steps_for_proc_superstep[proc][where.first]; + nodes_evicted_in_comm[proc][where.first - 1] = test_schedule.nodes_evicted_in_comm[proc][where.first - 1]; } - } - else if(next_in_comm) - { - for(auto itr = test_schedule.nodes_sent_down[proc][where.first].begin(); itr != test_schedule.nodes_sent_down[proc][where.first].end(); ++itr) - if(*itr == node) - { + } else if (next_in_comm) { + for (auto itr = test_schedule.nodes_sent_down[proc][where.first].begin(); + itr != test_schedule.nodes_sent_down[proc][where.first].end(); + ++itr) { + if (*itr == node) { test_schedule.nodes_sent_down[proc][where.first].erase(itr); break; } - - if(test_schedule.isValid()) - { + } + + if (test_schedule.isValid()) { nodes_evicted_in_comm[proc][superstep] = test_schedule.nodes_evicted_in_comm[proc][superstep]; nodes_sent_down[proc][where.first] = test_schedule.nodes_sent_down[proc][where.first]; } @@ -1764,61 +1824,54 @@ void PebblingSchedule::FixForceEvicts(const std::vector -void PebblingSchedule::TryToMergeSupersteps() -{ +template +void PebblingSchedule::TryToMergeSupersteps() { std::vector is_removed(number_of_supersteps, false); - for(unsigned step = 1; step < number_of_supersteps; ++step) - { - if(is_removed[step]) + for (unsigned step = 1; step < number_of_supersteps; ++step) { + if (is_removed[step]) { continue; + } unsigned prev_step = step - 1; - while(is_removed[prev_step]) + while (is_removed[prev_step]) { --prev_step; + } - for(unsigned next_step = step + 1; next_step < number_of_supersteps; ++next_step) - { + for (unsigned next_step = step + 1; next_step < number_of_supersteps; ++next_step) { // Try to merge step and next_step PebblingSchedule test_schedule = *this; - for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { test_schedule.compute_steps_for_proc_superstep[proc][step].insert( - test_schedule.compute_steps_for_proc_superstep[proc][step].end(), - test_schedule.compute_steps_for_proc_superstep[proc][next_step].begin(), - test_schedule.compute_steps_for_proc_superstep[proc][next_step].end()); + test_schedule.compute_steps_for_proc_superstep[proc][step].end(), + test_schedule.compute_steps_for_proc_superstep[proc][next_step].begin(), + test_schedule.compute_steps_for_proc_superstep[proc][next_step].end()); test_schedule.compute_steps_for_proc_superstep[proc][next_step].clear(); - - test_schedule.nodes_sent_up[proc][step].insert( - test_schedule.nodes_sent_up[proc][step].end(), - test_schedule.nodes_sent_up[proc][next_step].begin(), - test_schedule.nodes_sent_up[proc][next_step].end()); + + test_schedule.nodes_sent_up[proc][step].insert(test_schedule.nodes_sent_up[proc][step].end(), + test_schedule.nodes_sent_up[proc][next_step].begin(), + test_schedule.nodes_sent_up[proc][next_step].end()); test_schedule.nodes_sent_up[proc][next_step].clear(); - test_schedule.nodes_sent_down[proc][prev_step].insert( - test_schedule.nodes_sent_down[proc][prev_step].end(), - test_schedule.nodes_sent_down[proc][step].begin(), - test_schedule.nodes_sent_down[proc][step].end()); + test_schedule.nodes_sent_down[proc][prev_step].insert(test_schedule.nodes_sent_down[proc][prev_step].end(), + test_schedule.nodes_sent_down[proc][step].begin(), + test_schedule.nodes_sent_down[proc][step].end()); test_schedule.nodes_sent_down[proc][step].clear(); test_schedule.nodes_evicted_in_comm[proc][step].insert( - test_schedule.nodes_evicted_in_comm[proc][step].end(), - test_schedule.nodes_evicted_in_comm[proc][next_step].begin(), - test_schedule.nodes_evicted_in_comm[proc][next_step].end()); + test_schedule.nodes_evicted_in_comm[proc][step].end(), + test_schedule.nodes_evicted_in_comm[proc][next_step].begin(), + test_schedule.nodes_evicted_in_comm[proc][next_step].end()); test_schedule.nodes_evicted_in_comm[proc][next_step].clear(); - } - if(test_schedule.isValid()) - { + if (test_schedule.isValid()) { is_removed[next_step] = true; - for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { compute_steps_for_proc_superstep[proc][step] = test_schedule.compute_steps_for_proc_superstep[proc][step]; compute_steps_for_proc_superstep[proc][next_step].clear(); - + nodes_sent_up[proc][step] = test_schedule.nodes_sent_up[proc][step]; nodes_sent_up[proc][next_step].clear(); @@ -1829,31 +1882,33 @@ void PebblingSchedule::TryToMergeSupersteps() nodes_evicted_in_comm[proc][step] = test_schedule.nodes_evicted_in_comm[proc][step]; nodes_evicted_in_comm[proc][next_step].clear(); } - } - else + } else { break; + } } } unsigned new_nr_supersteps = 0; - for(unsigned step = 0; step < number_of_supersteps; ++step) - if(!is_removed[step]) + for (unsigned step = 0; step < number_of_supersteps; ++step) { + if (!is_removed[step]) { ++new_nr_supersteps; - - if(new_nr_supersteps == number_of_supersteps) + } + } + + if (new_nr_supersteps == number_of_supersteps) { return; + } PebblingSchedule shortened_schedule = *this; shortened_schedule.updateNumberOfSupersteps(new_nr_supersteps); unsigned new_index = 0; - for(unsigned step = 0; step < number_of_supersteps; ++step) - { - if(is_removed[step]) + for (unsigned step = 0; step < number_of_supersteps; ++step) { + if (is_removed[step]) { continue; + } - for(unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) - { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { shortened_schedule.compute_steps_for_proc_superstep[proc][new_index] = compute_steps_for_proc_superstep[proc][step]; shortened_schedule.nodes_sent_up[proc][new_index] = nodes_sent_up[proc][step]; shortened_schedule.nodes_sent_down[proc][new_index] = nodes_sent_down[proc][step]; @@ -1862,53 +1917,61 @@ void PebblingSchedule::TryToMergeSupersteps() ++new_index; } - - *this = shortened_schedule; - if(!isValid()) - std::cout<<"ERROR: schedule is not valid after superstep merging."< -PebblingSchedule PebblingSchedule::ExpandMemSchedule(const BspInstance& original_instance, const std::vector mapping_to_coarse) const -{ - std::map > original_vertices_for_coarse_ID; - for(vertex_idx node = 0; node < original_instance.numberOfVertices(); ++node) +template +PebblingSchedule PebblingSchedule::ExpandMemSchedule(const BspInstance &original_instance, + const std::vector mapping_to_coarse) const { + std::map> original_vertices_for_coarse_ID; + for (vertex_idx node = 0; node < original_instance.numberOfVertices(); ++node) { original_vertices_for_coarse_ID[mapping_to_coarse[node]].insert(node); + } PebblingSchedule fine_schedule; fine_schedule.instance = &original_instance; fine_schedule.updateNumberOfSupersteps(number_of_supersteps); - for(unsigned step=0; stepnumberOfProcessors(); ++proc) - { + for (unsigned step = 0; step < number_of_supersteps; ++step) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { // computation phase - for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step]) - { + for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { vertex_idx node = computeStep.node; - for(vertex_idx original_node : original_vertices_for_coarse_ID[node]) + for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) { fine_schedule.compute_steps_for_proc_superstep[proc][step].emplace_back(original_node); + } - for(vertex_idx to_remove : computeStep.nodes_evicted_after) - for(vertex_idx original_node : original_vertices_for_coarse_ID[to_remove]) - fine_schedule.compute_steps_for_proc_superstep[proc][step].back().nodes_evicted_after.push_back(original_node); + for (vertex_idx to_remove : computeStep.nodes_evicted_after) { + for (vertex_idx original_node : original_vertices_for_coarse_ID[to_remove]) { + fine_schedule.compute_steps_for_proc_superstep[proc][step].back().nodes_evicted_after.push_back( + original_node); + } + } } - //communication phase - for(vertex_idx node : nodes_sent_up[proc][step]) - for(vertex_idx original_node : original_vertices_for_coarse_ID[node]) + // communication phase + for (vertex_idx node : nodes_sent_up[proc][step]) { + for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) { fine_schedule.nodes_sent_up[proc][step].push_back(original_node); - - for(vertex_idx node : nodes_evicted_in_comm[proc][step]) - for(vertex_idx original_node : original_vertices_for_coarse_ID[node]) + } + } + + for (vertex_idx node : nodes_evicted_in_comm[proc][step]) { + for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) { fine_schedule.nodes_evicted_in_comm[proc][step].push_back(original_node); + } + } - for(vertex_idx node : nodes_sent_down[proc][step]) - for(vertex_idx original_node : original_vertices_for_coarse_ID[node]) + for (vertex_idx node : nodes_sent_down[proc][step]) { + for (vertex_idx original_node : original_vertices_for_coarse_ID[node]) { fine_schedule.nodes_sent_down[proc][step].push_back(original_node); + } + } } } @@ -1916,49 +1979,46 @@ PebblingSchedule PebblingSchedule::ExpandMemSchedule(const Bsp return fine_schedule; } -template -BspSchedule PebblingSchedule::ConvertToBsp() const -{ - std::vector node_to_proc(instance->numberOfVertices(), UINT_MAX), node_to_supstep(instance->numberOfVertices(), UINT_MAX); +template +BspSchedule PebblingSchedule::ConvertToBsp() const { + std::vector node_to_proc(instance->numberOfVertices(), UINT_MAX), + node_to_supstep(instance->numberOfVertices(), UINT_MAX); - for(unsigned step=0; stepnumberOfProcessors(); ++proc) - for(const auto& computeStep : compute_steps_for_proc_superstep[proc][step]) - { - const vertex_idx& node = computeStep.node; - if(node_to_proc[node] == UINT_MAX) - { + for (unsigned step = 0; step < number_of_supersteps; ++step) { + for (unsigned proc = 0; proc < instance->numberOfProcessors(); ++proc) { + for (const auto &computeStep : compute_steps_for_proc_superstep[proc][step]) { + const vertex_idx &node = computeStep.node; + if (node_to_proc[node] == UINT_MAX) { node_to_proc[node] = proc; node_to_supstep[node] = step; } } - if(need_to_load_inputs) - for(vertex_idx node = 0; node < instance->numberOfVertices(); ++node) - if(instance->getComputationalDag().in_degree(node) == 0) - { + } + } + if (need_to_load_inputs) { + for (vertex_idx node = 0; node < instance->numberOfVertices(); ++node) { + if (instance->getComputationalDag().in_degree(node) == 0) { unsigned min_superstep = UINT_MAX, proc_chosen = 0; - for(vertex_idx succ : instance->getComputationalDag().children(node)) - if(node_to_supstep[succ] < min_superstep) - { + for (vertex_idx succ : instance->getComputationalDag().children(node)) { + if (node_to_supstep[succ] < min_superstep) { min_superstep = node_to_supstep[succ]; proc_chosen = node_to_proc[succ]; } + } node_to_supstep[node] = min_superstep; node_to_proc[node] = proc_chosen; } + } + } BspSchedule schedule(*instance, node_to_proc, node_to_supstep); - if(schedule.satisfiesPrecedenceConstraints() && schedule.satisfiesNodeTypeConstraints()) - { + if (schedule.satisfiesPrecedenceConstraints() && schedule.satisfiesNodeTypeConstraints()) { schedule.setAutoCommunicationSchedule(); return schedule; - } - else - { - std::cout<<"ERROR: no direct conversion to Bsp schedule exists, using dummy schedule instead."<(*instance); } } -} - +} // namespace osp diff --git a/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp b/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp index 04931cec..71044414 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp @@ -13,17 +13,17 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once -#include #include +#include struct COPTEnv { static Envr getInstance() { static Envr env; return env; } -}; \ No newline at end of file +}; diff --git a/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp b/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp index bb40a004..dce14592 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp @@ -13,24 +13,23 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once -#include "osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp" #include "callbackbase.h" #include "coptcpp_pch.h" #include "osp/bsp/scheduler/Scheduler.hpp" -#include "osp/pebbling/PebblingSchedule.hpp" #include "osp/graph_algorithms/computational_dag_util.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" +#include "osp/pebbling/PebblingSchedule.hpp" +#include "osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp" -namespace osp{ +namespace osp { -template +template class MultiProcessorPebbling : public Scheduler { - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); private: @@ -44,7 +43,6 @@ class MultiProcessorPebbling : public Scheduler { bool write_solutions_found; class WriteSolutionCallback : public CallbackBase { - private: unsigned counter; unsigned max_number_solution; @@ -53,14 +51,16 @@ class MultiProcessorPebbling : public Scheduler { public: WriteSolutionCallback() - : counter(0), max_number_solution(500), best_obj(COPT_INFINITY), write_solutions_path_cb(""), + : counter(0), + max_number_solution(500), + best_obj(COPT_INFINITY), + write_solutions_path_cb(""), solution_file_prefix_cb("") {} std::string write_solutions_path_cb; std::string solution_file_prefix_cb; void callback() override; - }; WriteSolutionCallback solution_callback; @@ -98,21 +98,21 @@ class MultiProcessorPebbling : public Scheduler { unsigned compute_steps_per_cycle = 3; bool need_to_load_inputs = true; std::set needs_blue_at_end; - std::vector > has_red_in_beginning; + std::vector> has_red_in_beginning; bool verbose = false; void constructPebblingScheduleFromSolution(PebblingSchedule &schedule); void setInitialSolution(const BspInstance &instance, - const std::vector > >& computeSteps, - const std::vector > >& sendUpSteps, - const std::vector > >& sendDownSteps, - const std::vector > >& nodesEvictedAfterStep); + const std::vector>> &computeSteps, + const std::vector>> &sendUpSteps, + const std::vector>> &sendDownSteps, + const std::vector>> &nodesEvictedAfterStep); unsigned computeMaxTimeForInitialSolution(const BspInstance &instance, - const std::vector > >& computeSteps, - const std::vector > >& sendUpSteps, - const std::vector > >& sendDownSteps) const; + const std::vector>> &computeSteps, + const std::vector>> &sendUpSteps, + const std::vector>> &sendDownSteps) const; void setupBaseVariablesConstraints(const BspInstance &instance); @@ -135,7 +135,9 @@ class MultiProcessorPebbling : public Scheduler { virtual RETURN_STATUS computePebbling(PebblingSchedule &schedule, bool use_async = false); - virtual RETURN_STATUS computePebblingWithInitialSolution(const PebblingSchedule& initial_solution, PebblingSchedule &out_schedule, bool use_async = false); + virtual RETURN_STATUS computePebblingWithInitialSolution(const PebblingSchedule &initial_solution, + PebblingSchedule &out_schedule, + bool use_async = false); /** * @brief Enables writing intermediate solutions. @@ -193,25 +195,48 @@ class MultiProcessorPebbling : public Scheduler { // getters and setters for problem parameters inline bool allowsSlidingPebbles() const { return slidingPebbles; } + inline bool allowsMergingSteps() const { return mergeSteps; } + inline bool isUpAndDownCostSummed() const { return up_and_down_cost_summed; } + inline bool allowsRecomputation() const { return allows_recomputation; } + inline bool hasRestrictedStepTypes() const { return restrict_step_types; } + inline bool needsToLoadInputs() const { return need_to_load_inputs; } + inline unsigned getComputeStepsPerCycle() const { return compute_steps_per_cycle; } + inline unsigned getMaxTime() const { return max_time; } - inline void setSlidingPebbles (const bool slidingPebbles_) {slidingPebbles = slidingPebbles_; } - inline void setMergingSteps (const bool mergeSteps_) {mergeSteps = mergeSteps_; } - inline void setUpAndDownCostSummed (const bool is_summed_) {up_and_down_cost_summed = is_summed_; } + inline void setSlidingPebbles(const bool slidingPebbles_) { slidingPebbles = slidingPebbles_; } + + inline void setMergingSteps(const bool mergeSteps_) { mergeSteps = mergeSteps_; } + + inline void setUpAndDownCostSummed(const bool is_summed_) { up_and_down_cost_summed = is_summed_; } + inline void setRecomputation(const bool allow_recompute_) { allows_recomputation = allow_recompute_; } - inline void setRestrictStepTypes(const bool restrict_) { restrict_step_types = restrict_; if(restrict_){mergeSteps = true;} } - inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_;} - inline void setComputeStepsPerCycle (const unsigned steps_per_cycle_) {compute_steps_per_cycle = steps_per_cycle_; } - inline void setMaxTime (const unsigned max_time_) {max_time = max_time_; } - inline void setNeedsBlueAtEnd (const std::set& needs_blue_) {needs_blue_at_end = needs_blue_; } - inline void setHasRedInBeginning (const std::vector >& has_red_) {has_red_in_beginning = has_red_; } - inline void setVerbose (const bool verbose_) {verbose = verbose_; } + + inline void setRestrictStepTypes(const bool restrict_) { + restrict_step_types = restrict_; + if (restrict_) { + mergeSteps = true; + } + } + + inline void setNeedToLoadInputs(const bool load_inputs_) { need_to_load_inputs = load_inputs_; } + + inline void setComputeStepsPerCycle(const unsigned steps_per_cycle_) { compute_steps_per_cycle = steps_per_cycle_; } + + inline void setMaxTime(const unsigned max_time_) { max_time = max_time_; } + + inline void setNeedsBlueAtEnd(const std::set &needs_blue_) { needs_blue_at_end = needs_blue_; } + + inline void setHasRedInBeginning(const std::vector> &has_red_) { has_red_in_beginning = has_red_; } + + inline void setVerbose(const bool verbose_) { verbose = verbose_; } + inline void setTimeLimitSeconds(unsigned time_limit_seconds_) { time_limit_seconds = time_limit_seconds_; } bool hasEmptyStep(const BspInstance &instance); @@ -219,11 +244,11 @@ class MultiProcessorPebbling : public Scheduler { // implementation -template +template void MultiProcessorPebbling::solveILP() { - - if(!verbose) + if (!verbose) { model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0); + } model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds); model.SetIntParam(COPT_INTPARAM_THREADS, 128); @@ -241,11 +266,11 @@ void MultiProcessorPebbling::solveILP() { model.Solve(); } -template +template RETURN_STATUS MultiProcessorPebbling::computeSchedule(BspSchedule &schedule) { - - if(max_time == 0) + if (max_time == 0) { max_time = 2 * static_cast(schedule.getInstance().numberOfVertices()); + } setupBaseVariablesConstraints(schedule.getInstance()); setupSyncPhaseVariablesConstraints(schedule.getInstance()); @@ -254,17 +279,13 @@ RETURN_STATUS MultiProcessorPebbling::computeSchedule(BspSchedule::computeSchedule(BspSchedule +template RETURN_STATUS MultiProcessorPebbling::computeSynchPebbling(PebblingSchedule &schedule) { + const BspInstance &instance = schedule.getInstance(); - const BspInstance& instance = schedule.getInstance(); - - if(max_time == 0) + if (max_time == 0) { max_time = 2 * static_cast(instance.numberOfVertices()); - + } + mergeSteps = false; setupBaseVariablesConstraints(instance); @@ -290,18 +311,14 @@ RETURN_STATUS MultiProcessorPebbling::computeSynchPebbling(PebblingSche solveILP(); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructPebblingScheduleFromSolution(schedule); return RETURN_STATUS::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructPebblingScheduleFromSolution(schedule); return RETURN_STATUS::OSP_SUCCESS; @@ -311,40 +328,35 @@ RETURN_STATUS MultiProcessorPebbling::computeSynchPebbling(PebblingSche } } -template +template RETURN_STATUS MultiProcessorPebbling::computePebbling(PebblingSchedule &schedule, bool use_async) { + const BspInstance &instance = schedule.getInstance(); - const BspInstance& instance = schedule.getInstance(); - - if(max_time == 0) + if (max_time == 0) { max_time = 2 * static_cast(instance.numberOfVertices()); + } synchronous = !use_async; setupBaseVariablesConstraints(instance); - if(synchronous) - { + if (synchronous) { setupSyncPhaseVariablesConstraints(instance); setupBspVariablesConstraintsObjective(instance); - } - else + } else { setupAsyncVariablesConstraintsObjective(instance); + } solveILP(); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - constructPebblingScheduleFromSolution(schedule); return schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - constructPebblingScheduleFromSolution(schedule); return schedule.isValid() ? RETURN_STATUS::OSP_SUCCESS : RETURN_STATUS::ERROR; @@ -354,54 +366,52 @@ RETURN_STATUS MultiProcessorPebbling::computePebbling(PebblingSchedule< } } -template -RETURN_STATUS MultiProcessorPebbling::computePebblingWithInitialSolution(const PebblingSchedule& initial_solution, PebblingSchedule &out_schedule, bool use_async) -{ - const BspInstance& instance = initial_solution.getInstance(); +template +RETURN_STATUS MultiProcessorPebbling::computePebblingWithInitialSolution(const PebblingSchedule &initial_solution, + PebblingSchedule &out_schedule, + bool use_async) { + const BspInstance &instance = initial_solution.getInstance(); - std::vector > > computeSteps; - std::vector > > sendUpSteps; - std::vector > > sendDownSteps; - std::vector > > nodesEvictedAfterStep; + std::vector>> computeSteps; + std::vector>> sendUpSteps; + std::vector>> sendDownSteps; + std::vector>> nodesEvictedAfterStep; synchronous = !use_async; - + initial_solution.getDataForMultiprocessorPebbling(computeSteps, sendUpSteps, sendDownSteps, nodesEvictedAfterStep); max_time = computeMaxTimeForInitialSolution(instance, computeSteps, sendUpSteps, sendDownSteps); - if(verbose) - std::cout<<"Max time set at "<::computePebblingWithInitialSolutio } } -template +template void MultiProcessorPebbling::setupBaseVariablesConstraints(const BspInstance &instance) { - /* Variables */ - compute = std::vector>(instance.numberOfVertices(), - std::vector(instance.numberOfProcessors())); + compute = std::vector>(instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - compute[node][processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "node_processor_time"); } } compute_exists.resize(instance.numberOfVertices(), - std::vector>(instance.numberOfProcessors(), std::vector(max_time, true))); + std::vector>(instance.numberOfProcessors(), std::vector(max_time, true))); - send_up = std::vector>(instance.numberOfVertices(), - std::vector(instance.numberOfProcessors())); + send_up = std::vector>(instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - send_up[node][processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "send_up"); } } send_up_exists.resize(instance.numberOfVertices(), - std::vector>(instance.numberOfProcessors(), std::vector(max_time, true))); + std::vector>(instance.numberOfProcessors(), std::vector(max_time, true))); - send_down = std::vector>(instance.numberOfVertices(), - std::vector(instance.numberOfProcessors())); + send_down + = std::vector>(instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - send_down[node][processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "send_down"); } } @@ -462,18 +463,15 @@ void MultiProcessorPebbling::setupBaseVariablesConstraints(const BspIns has_blue = std::vector(instance.numberOfVertices()); for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - has_blue[node] = model.AddVars(static_cast(max_time), COPT_BINARY, "blue_pebble"); } has_blue_exists.resize(instance.numberOfVertices(), std::vector(max_time, true)); - has_red = std::vector>(instance.numberOfVertices(), - std::vector(instance.numberOfProcessors())); + has_red = std::vector>(instance.numberOfVertices(), std::vector(instance.numberOfProcessors())); for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - has_red[node][processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "red_pebble"); } } @@ -482,199 +480,202 @@ void MultiProcessorPebbling::setupBaseVariablesConstraints(const BspIns Invalidate variables based on various factors (node types, input loading, step type restriction) */ - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) - if(!instance.isCompatible(node, processor)) - for (unsigned t = 0; t < max_time; t++) - { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { + if (!instance.isCompatible(node, processor)) { + for (unsigned t = 0; t < max_time; t++) { compute_exists[node][processor][t] = false; send_up_exists[node][processor][t] = false; } - + } + } + } + // restrict source nodes if they need to be loaded - if(need_to_load_inputs) - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - if (instance.getComputationalDag().in_degree(node) == 0) - for (unsigned t = 0; t < max_time; t++) - { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) - { + if (need_to_load_inputs) { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (instance.getComputationalDag().in_degree(node) == 0) { + for (unsigned t = 0; t < max_time; t++) { + for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { compute_exists[node][processor][t] = false; send_up_exists[node][processor][t] = false; } has_blue_exists[node][t] = false; } + } + } + } // restrict step types for simpler ILP - if(restrict_step_types) - for (unsigned t = 0; t < max_time; t++) - { + if (restrict_step_types) { + for (unsigned t = 0; t < max_time; t++) { bool this_is_a_comm_step = (t % (compute_steps_per_cycle + 2) == compute_steps_per_cycle + 1); - if(!need_to_load_inputs && t % (compute_steps_per_cycle + 2) == compute_steps_per_cycle) + if (!need_to_load_inputs && t % (compute_steps_per_cycle + 2) == compute_steps_per_cycle) { this_is_a_comm_step = true; - if(need_to_load_inputs && t % (compute_steps_per_cycle + 2) == 0) + } + if (need_to_load_inputs && t % (compute_steps_per_cycle + 2) == 0) { this_is_a_comm_step = true; - if(this_is_a_comm_step) - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) + } + if (this_is_a_comm_step) { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { compute_exists[node][processor][t] = false; - else - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) - { + } + } + } else { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { send_up_exists[node][processor][t] = false; send_down_exists[node][processor][t] = false; } + } + } } + } /* Constraints */ - if(!mergeSteps) - { + if (!mergeSteps) { for (unsigned t = 0; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr; for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - - if(compute_exists[node][processor][t]) + if (compute_exists[node][processor][t]) { expr += compute[node][processor][static_cast(t)]; - if(send_up_exists[node][processor][t]) + } + if (send_up_exists[node][processor][t]) { expr += send_up[node][processor][static_cast(t)]; - if(send_down_exists[node][processor][t]) + } + if (send_down_exists[node][processor][t]) { expr += send_down[node][processor][static_cast(t)]; + } } model.AddConstr(expr <= 1); } } - } - else - { - //extra variables to indicate step types in step merging + } else { + // extra variables to indicate step types in step merging std::vector comp_step_on_proc = std::vector(instance.numberOfProcessors()); std::vector comm_step_on_proc = std::vector(instance.numberOfProcessors()); for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - comp_step_on_proc[processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "comp_step_on_proc"); comm_step_on_proc[processor] = model.AddVars(static_cast(max_time), COPT_BINARY, "comm_step_on_proc"); } const unsigned M = static_cast(instance.numberOfVertices()); - for (unsigned t = 0; t < max_time; t++) - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) - { + for (unsigned t = 0; t < max_time; t++) { + for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { Expr expr_comp, expr_comm; for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - - if(compute_exists[node][processor][t]) + if (compute_exists[node][processor][t]) { expr_comp += compute[node][processor][static_cast(t)]; - if(send_up_exists[node][processor][t]) + } + if (send_up_exists[node][processor][t]) { expr_comm += send_up[node][processor][static_cast(t)]; - if(send_down_exists[node][processor][t]) + } + if (send_down_exists[node][processor][t]) { expr_comm += send_down[node][processor][static_cast(t)]; + } } model.AddConstr(M * comp_step_on_proc[processor][static_cast(t)] >= expr_comp); model.AddConstr(2 * M * comm_step_on_proc[processor][static_cast(t)] >= expr_comm); - model.AddConstr(comp_step_on_proc[processor][static_cast(t)] + comm_step_on_proc[processor][static_cast(t)] <= 1); + model.AddConstr( + comp_step_on_proc[processor][static_cast(t)] + comm_step_on_proc[processor][static_cast(t)] <= 1); } + } } for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned t = 1; t < max_time; t++) { - - if(!has_blue_exists[node][t]) + if (!has_blue_exists[node][t]) { continue; + } Expr expr; for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - - if(send_up_exists[node][processor][t-1]) + if (send_up_exists[node][processor][t - 1]) { expr += send_up[node][processor][static_cast(t) - 1]; + } } model.AddConstr(has_blue[node][static_cast(t)] <= has_blue[node][static_cast(t) - 1] + expr); } } for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 1; t < max_time; t++) { - Expr expr; - if(compute_exists[node][processor][t-1]) + if (compute_exists[node][processor][t - 1]) { expr += compute[node][processor][static_cast(t) - 1]; + } - if(send_down_exists[node][processor][t-1]) + if (send_down_exists[node][processor][t - 1]) { expr += send_down[node][processor][static_cast(t) - 1]; + } - model.AddConstr(has_red[node][processor][static_cast(t)] <= has_red[node][processor][static_cast(t) - 1] + expr); + model.AddConstr(has_red[node][processor][static_cast(t)] + <= has_red[node][processor][static_cast(t) - 1] + expr); } } } for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { - - if(!compute_exists[node][processor][t]) + if (!compute_exists[node][processor][t]) { continue; + } for (const auto &source : instance.getComputationalDag().parents(node)) { - - if(!mergeSteps || !compute_exists[source][processor][t]) - model.AddConstr(compute[node][processor][static_cast(t)] <= has_red[source][processor][static_cast(t)]); - else - model.AddConstr(compute[node][processor][static_cast(t)] <= has_red[source][processor][static_cast(t)] + compute[source][processor][static_cast(t)]); + if (!mergeSteps || !compute_exists[source][processor][t]) { + model.AddConstr(compute[node][processor][static_cast(t)] + <= has_red[source][processor][static_cast(t)]); + } else { + model.AddConstr(compute[node][processor][static_cast(t)] + <= has_red[source][processor][static_cast(t)] + + compute[source][processor][static_cast(t)]); + } } } } } for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { - - if(send_up_exists[node][processor][t]) + if (send_up_exists[node][processor][t]) { model.AddConstr(send_up[node][processor][static_cast(t)] <= has_red[node][processor][static_cast(t)]); + } } } } for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { - - if(send_down_exists[node][processor][t] && has_blue_exists[node][t]) + if (send_down_exists[node][processor][t] && has_blue_exists[node][t]) { model.AddConstr(send_down[node][processor][static_cast(t)] <= has_blue[node][static_cast(t)]); + } } } } for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (unsigned t = 0; t < max_time; t++) { Expr expr; for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { expr += has_red[node][processor][static_cast(t)] * instance.getComputationalDag().vertex_mem_weight(node); - if(!slidingPebbles && compute_exists[node][processor][t]) + if (!slidingPebbles && compute_exists[node][processor][t]) { expr += compute[node][processor][static_cast(t)] * instance.getComputationalDag().vertex_mem_weight(node); + } } model.AddConstr(expr <= instance.getArchitecture().memoryBound(processor)); @@ -682,58 +683,62 @@ void MultiProcessorPebbling::setupBaseVariablesConstraints(const BspIns } for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - - if(has_red_in_beginning.empty() || has_red_in_beginning[processor].find(node) == has_red_in_beginning[processor].end()) + if (has_red_in_beginning.empty() + || has_red_in_beginning[processor].find(node) == has_red_in_beginning[processor].end()) { model.AddConstr(has_red[node][processor][0] == 0); - else + } else { model.AddConstr(has_red[node][processor][0] == 1); + } } } for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if(!need_to_load_inputs || instance.getComputationalDag().in_degree(node) > 0) + if (!need_to_load_inputs || instance.getComputationalDag().in_degree(node) > 0) { model.AddConstr(has_blue[node][0] == 0); + } } - if(needs_blue_at_end.empty()) // default case: blue pebbles required on sinks at the end + if (needs_blue_at_end.empty()) // default case: blue pebbles required on sinks at the end { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - if (instance.getComputationalDag().out_degree(node) == 0 && has_blue_exists[node][max_time-1]) - model.AddConstr(has_blue[node][static_cast(max_time)-1] == 1); - } - else // otherwise: specified set of nodes that need blue at the end + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (instance.getComputationalDag().out_degree(node) == 0 && has_blue_exists[node][max_time - 1]) { + model.AddConstr(has_blue[node][static_cast(max_time) - 1] == 1); + } + } + } else // otherwise: specified set of nodes that need blue at the end { - for (vertex_idx node : needs_blue_at_end) - if(has_blue_exists[node][max_time-1]) - model.AddConstr(has_blue[node][static_cast(max_time)-1] == 1); + for (vertex_idx node : needs_blue_at_end) { + if (has_blue_exists[node][max_time - 1]) { + model.AddConstr(has_blue[node][static_cast(max_time) - 1] == 1); + } + } } - + // disable recomputation if needed - if(!allows_recomputation) - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - { + if (!allows_recomputation) { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { Expr expr; - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) - for (unsigned t = 0; t < max_time; t++) - if(compute_exists[node][processor][t]) + for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { + for (unsigned t = 0; t < max_time; t++) { + if (compute_exists[node][processor][t]) { expr += compute[node][processor][static_cast(t)]; + } + } + } model.AddConstr(expr <= 1); } - + } }; -template +template void MultiProcessorPebbling::setupSyncPhaseVariablesConstraints(const BspInstance &instance) { - comp_phase = model.AddVars(static_cast(max_time), COPT_BINARY, "comp_phase"); - if(mergeSteps) + if (mergeSteps) { comm_phase = model.AddVars(static_cast(max_time), COPT_BINARY, "comm_phase"); - else - { + } else { send_up_phase = model.AddVars(static_cast(max_time), COPT_BINARY, "send_up_phase"); send_down_phase = model.AddVars(static_cast(max_time), COPT_BINARY, "send_down_phase"); } @@ -741,50 +746,47 @@ void MultiProcessorPebbling::setupSyncPhaseVariablesConstraints(const B const unsigned M = static_cast(instance.numberOfProcessors() * instance.numberOfVertices()); for (unsigned t = 0; t < max_time; t++) { - Expr expr_comp, expr_comm, expr_send_up, expr_send_down; for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - if(compute_exists[node][processor][t]) + if (compute_exists[node][processor][t]) { expr_comp += compute[node][processor][static_cast(t)]; - if(mergeSteps) - { - if(send_up_exists[node][processor][t]) + } + if (mergeSteps) { + if (send_up_exists[node][processor][t]) { expr_comm += send_up[node][processor][static_cast(t)]; - - if(send_down_exists[node][processor][t]) + } + + if (send_down_exists[node][processor][t]) { expr_comm += send_down[node][processor][static_cast(t)]; - } - else - { - if(send_up_exists[node][processor][t]) + } + } else { + if (send_up_exists[node][processor][t]) { expr_send_up += send_up[node][processor][static_cast(t)]; + } - if(send_down_exists[node][processor][t]) + if (send_down_exists[node][processor][t]) { expr_send_down += send_down[node][processor][static_cast(t)]; + } } } } model.AddConstr(M * comp_phase[static_cast(t)] >= expr_comp); - if(mergeSteps) - { + if (mergeSteps) { model.AddConstr(2 * M * comm_phase[static_cast(t)] >= expr_comm); model.AddConstr(comp_phase[static_cast(t)] + comm_phase[static_cast(t)] <= 1); - } - else - { + } else { model.AddConstr(M * send_up_phase[static_cast(t)] >= expr_send_up); model.AddConstr(M * send_down_phase[static_cast(t)] >= expr_send_down); - model.AddConstr(comp_phase[static_cast(t)] + send_up_phase[static_cast(t)] + send_down_phase[static_cast(t)] <= 1); + model.AddConstr( + comp_phase[static_cast(t)] + send_up_phase[static_cast(t)] + send_down_phase[static_cast(t)] <= 1); } } }; -template +template void MultiProcessorPebbling::setupBspVariablesConstraintsObjective(const BspInstance &instance) { - comp_phase_ends = model.AddVars(static_cast(max_time), COPT_BINARY, "comp_phase_ends"); comm_phase_ends = model.AddVars(static_cast(max_time), COPT_BINARY, "comm_phase_ends"); @@ -799,8 +801,7 @@ void MultiProcessorPebbling::setupBspVariablesConstraintsObjective(cons VarArray send_up_induced; VarArray send_down_induced; - if(up_and_down_cost_summed) - { + if (up_and_down_cost_summed) { send_up_induced = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "send_up_induced"); send_down_induced = model.AddVars(static_cast(max_time), COPT_CONTINUOUS, "send_down_induced"); } @@ -812,85 +813,101 @@ void MultiProcessorPebbling::setupBspVariablesConstraintsObjective(cons } for (unsigned t = 0; t < max_time; t++) { - model.AddConstr(comp_phase[static_cast(t)] >= comp_phase_ends[static_cast(t)]); - if(mergeSteps) + if (mergeSteps) { model.AddConstr(comm_phase[static_cast(t)] >= comm_phase_ends[static_cast(t)]); - else - model.AddConstr(send_down_phase[static_cast(t)] + send_up_phase[static_cast(t)] >= comm_phase_ends[static_cast(t)]); + } else { + model.AddConstr(send_down_phase[static_cast(t)] + send_up_phase[static_cast(t)] + >= comm_phase_ends[static_cast(t)]); + } } for (unsigned t = 0; t < max_time - 1; t++) { - - model.AddConstr(comp_phase_ends[static_cast(t)] >= comp_phase[static_cast(t)] - comp_phase[static_cast(t)+1]); - if(mergeSteps) - model.AddConstr(comm_phase_ends[static_cast(t)] >= comm_phase[static_cast(t)] - comm_phase[static_cast(t)+1]); - else - model.AddConstr(comm_phase_ends[static_cast(t)] >= send_down_phase[static_cast(t)] + send_up_phase[static_cast(t)] - send_down_phase[static_cast(t)+1] - send_up_phase[static_cast(t)+1]); + model.AddConstr(comp_phase_ends[static_cast(t)] + >= comp_phase[static_cast(t)] - comp_phase[static_cast(t) + 1]); + if (mergeSteps) { + model.AddConstr(comm_phase_ends[static_cast(t)] + >= comm_phase[static_cast(t)] - comm_phase[static_cast(t) + 1]); + } else { + model.AddConstr(comm_phase_ends[static_cast(t)] + >= send_down_phase[static_cast(t)] + send_up_phase[static_cast(t)] + - send_down_phase[static_cast(t) + 1] - send_up_phase[static_cast(t) + 1]); + } } - model.AddConstr(comp_phase_ends[static_cast(max_time)-1] >= comp_phase[static_cast(max_time)-1]); - if(mergeSteps) - model.AddConstr(comm_phase_ends[static_cast(max_time)-1] >= comm_phase[static_cast(max_time)-1]); - else - model.AddConstr(comm_phase_ends[static_cast(max_time)-1] >= send_down_phase[static_cast(max_time)-1] + send_up_phase[static_cast(max_time)-1]); + model.AddConstr(comp_phase_ends[static_cast(max_time) - 1] >= comp_phase[static_cast(max_time) - 1]); + if (mergeSteps) { + model.AddConstr(comm_phase_ends[static_cast(max_time) - 1] >= comm_phase[static_cast(max_time) - 1]); + } else { + model.AddConstr(comm_phase_ends[static_cast(max_time) - 1] + >= send_down_phase[static_cast(max_time) - 1] + send_up_phase[static_cast(max_time) - 1]); + } - const unsigned M = static_cast(instance.numberOfProcessors() * ( - sumOfVerticesWorkWeights(instance.getComputationalDag()) + - sumOfVerticesCommunicationWeights(instance.getComputationalDag()) ) ); + const unsigned M = static_cast(instance.numberOfProcessors() + * (sumOfVerticesWorkWeights(instance.getComputationalDag()) + + sumOfVerticesCommunicationWeights(instance.getComputationalDag()))); for (unsigned t = 1; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr_work; Expr expr_send_up; Expr expr_send_down; for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if(compute_exists[node][processor][t]) - expr_work += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][static_cast(t)]; - if(send_up_exists[node][processor][t]) - expr_send_up += instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][static_cast(t)]; - if(send_down_exists[node][processor][t]) - expr_send_down += instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][static_cast(t)]; - } - - model.AddConstr(M * comm_phase_ends[static_cast(t)] + work_step_until[processor][static_cast(t)] >= - work_step_until[processor][static_cast(t) - 1] + expr_work); - - model.AddConstr(M * comp_phase_ends[static_cast(t)] + send_up_step_until[processor][static_cast(t)] >= - send_up_step_until[processor][static_cast(t) - 1] + expr_send_up); - - model.AddConstr(M * comp_phase_ends[static_cast(t)] + send_down_step_until[processor][static_cast(t)] >= - send_down_step_until[processor][static_cast(t) - 1] + expr_send_down); - - model.AddConstr(work_induced[static_cast(t)] >= work_step_until[processor][static_cast(t)] - M * (1 - comp_phase_ends[static_cast(t)])); - if(up_and_down_cost_summed) - { - model.AddConstr(send_up_induced[static_cast(t)] >= send_up_step_until[processor][static_cast(t)] - M * (1 - comm_phase_ends[static_cast(t)])); - model.AddConstr(send_down_induced[static_cast(t)] >= send_down_step_until[processor][static_cast(t)] - M * (1 - comm_phase_ends[static_cast(t)])); - model.AddConstr(comm_induced[static_cast(t)] >= send_up_induced[static_cast(t)] + send_down_induced[static_cast(t)]); - } - else - { - model.AddConstr(comm_induced[static_cast(t)] >= send_down_step_until[processor][static_cast(t)] - M * (1 - comm_phase_ends[static_cast(t)])); - model.AddConstr(comm_induced[static_cast(t)] >= send_up_step_until[processor][static_cast(t)] - M * (1 - comm_phase_ends[static_cast(t)])); + if (compute_exists[node][processor][t]) { + expr_work += instance.getComputationalDag().vertex_work_weight(node) + * compute[node][processor][static_cast(t)]; + } + if (send_up_exists[node][processor][t]) { + expr_send_up += instance.getComputationalDag().vertex_comm_weight(node) + * send_up[node][processor][static_cast(t)]; + } + if (send_down_exists[node][processor][t]) { + expr_send_down += instance.getComputationalDag().vertex_comm_weight(node) + * send_down[node][processor][static_cast(t)]; + } + } + + model.AddConstr(M * comm_phase_ends[static_cast(t)] + work_step_until[processor][static_cast(t)] + >= work_step_until[processor][static_cast(t) - 1] + expr_work); + + model.AddConstr(M * comp_phase_ends[static_cast(t)] + send_up_step_until[processor][static_cast(t)] + >= send_up_step_until[processor][static_cast(t) - 1] + expr_send_up); + + model.AddConstr(M * comp_phase_ends[static_cast(t)] + send_down_step_until[processor][static_cast(t)] + >= send_down_step_until[processor][static_cast(t) - 1] + expr_send_down); + + model.AddConstr(work_induced[static_cast(t)] + >= work_step_until[processor][static_cast(t)] - M * (1 - comp_phase_ends[static_cast(t)])); + if (up_and_down_cost_summed) { + model.AddConstr(send_up_induced[static_cast(t)] >= send_up_step_until[processor][static_cast(t)] + - M * (1 - comm_phase_ends[static_cast(t)])); + model.AddConstr(send_down_induced[static_cast(t)] >= send_down_step_until[processor][static_cast(t)] + - M * (1 - comm_phase_ends[static_cast(t)])); + model.AddConstr(comm_induced[static_cast(t)] + >= send_up_induced[static_cast(t)] + send_down_induced[static_cast(t)]); + } else { + model.AddConstr(comm_induced[static_cast(t)] >= send_down_step_until[processor][static_cast(t)] + - M * (1 - comm_phase_ends[static_cast(t)])); + model.AddConstr(comm_induced[static_cast(t)] >= send_up_step_until[processor][static_cast(t)] + - M * (1 - comm_phase_ends[static_cast(t)])); } } } // t = 0 for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr_work; Expr expr_send_up; Expr expr_send_down; for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - if(compute_exists[node][processor][0]) + if (compute_exists[node][processor][0]) { expr_work += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][0]; - if(send_up_exists[node][processor][0]) + } + if (send_up_exists[node][processor][0]) { expr_send_up += instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][0]; - if(send_down_exists[node][processor][0]) + } + if (send_down_exists[node][processor][0]) { expr_send_down += instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][0]; + } } model.AddConstr(M * comm_phase_ends[0] + work_step_until[processor][0] >= expr_work); @@ -900,14 +917,11 @@ void MultiProcessorPebbling::setupBspVariablesConstraintsObjective(cons model.AddConstr(M * comp_phase_ends[0] + send_down_step_until[processor][0] >= expr_send_down); model.AddConstr(work_induced[0] >= work_step_until[processor][0] - M * (1 - comp_phase_ends[0])); - if(up_and_down_cost_summed) - { + if (up_and_down_cost_summed) { model.AddConstr(send_up_induced[0] >= send_up_step_until[processor][0] - M * (1 - comm_phase_ends[0])); model.AddConstr(send_down_induced[0] >= send_down_step_until[processor][0] - M * (1 - comm_phase_ends[0])); model.AddConstr(comm_induced[0] >= send_up_induced[0] + send_down_induced[0]); - } - else - { + } else { model.AddConstr(comm_induced[0] >= send_down_step_until[processor][0] - M * (1 - comm_phase_ends[0])); model.AddConstr(comm_induced[0] >= send_up_step_until[processor][0] - M * (1 - comm_phase_ends[0])); } @@ -919,25 +933,21 @@ void MultiProcessorPebbling::setupBspVariablesConstraintsObjective(cons Expr expr; for (unsigned t = 0; t < max_time; t++) { - expr += work_induced[static_cast(t)] + instance.synchronisationCosts() * comm_phase_ends[static_cast(t)] + - instance.communicationCosts() * comm_induced[static_cast(t)]; + expr += work_induced[static_cast(t)] + instance.synchronisationCosts() * comm_phase_ends[static_cast(t)] + + instance.communicationCosts() * comm_induced[static_cast(t)]; } model.SetObjective(expr, COPT_MINIMIZE); }; -template +template void MultiProcessorPebbling::setupSyncObjective(const BspInstance &instance) { - Expr expr; for (unsigned t = 0; t < max_time; t++) { - if(!mergeSteps) - { - expr += comp_phase[static_cast(t)] + instance.communicationCosts() * send_up_phase[static_cast(t)] + - instance.communicationCosts() * send_down_phase[static_cast(t)]; - } - else - { + if (!mergeSteps) { + expr += comp_phase[static_cast(t)] + instance.communicationCosts() * send_up_phase[static_cast(t)] + + instance.communicationCosts() * send_down_phase[static_cast(t)]; + } else { // this objective+parameter combination is not very meaningful, but still defined here to avoid a segfault otherwise expr += comp_phase[static_cast(t)] + instance.communicationCosts() * comm_phase[static_cast(t)]; } @@ -946,9 +956,8 @@ void MultiProcessorPebbling::setupSyncObjective(const BspInstance +template void MultiProcessorPebbling::setupAsyncVariablesConstraintsObjective(const BspInstance &instance) { - std::vector finish_times(instance.numberOfProcessors()); for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { @@ -959,77 +968,86 @@ void MultiProcessorPebbling::setupAsyncVariablesConstraintsObjective(co VarArray gets_blue = model.AddVars(static_cast(instance.numberOfVertices()), COPT_CONTINUOUS, "gets_blue"); - const unsigned M = static_cast(instance.numberOfProcessors() * ( - sumOfVerticesWorkWeights(instance.getComputationalDag()) + - sumOfVerticesCommunicationWeights(instance.getComputationalDag()) ) ); + const unsigned M = static_cast(instance.numberOfProcessors() + * (sumOfVerticesWorkWeights(instance.getComputationalDag()) + + sumOfVerticesCommunicationWeights(instance.getComputationalDag()))); for (unsigned t = 0; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr send_down_step_length; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - if(send_down_exists[node][processor][t]) - send_down_step_length += instance.communicationCosts() * - instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][static_cast(t)]; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (send_down_exists[node][processor][t]) { + send_down_step_length += instance.communicationCosts() + * instance.getComputationalDag().vertex_comm_weight(node) + * send_down[node][processor][static_cast(t)]; + } + } - if(send_up_exists[node][processor][t]) - model.AddConstr(gets_blue[static_cast(node)] >= finish_times[processor][static_cast(t)] - (1 - send_up[node][processor][static_cast(t)]) * M); - if(send_down_exists[node][processor][t]) - model.AddConstr(gets_blue[static_cast(node)] <= - finish_times[processor][static_cast(t)] + (1 - send_down[node][processor][static_cast(t)]) * M - send_down_step_length); + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (send_up_exists[node][processor][t]) { + model.AddConstr(gets_blue[static_cast(node)] + >= finish_times[processor][static_cast(t)] + - (1 - send_up[node][processor][static_cast(t)]) * M); + } + if (send_down_exists[node][processor][t]) { + model.AddConstr(gets_blue[static_cast(node)] + <= finish_times[processor][static_cast(t)] + + (1 - send_down[node][processor][static_cast(t)]) * M - send_down_step_length); + } } } } // makespan constraint for (unsigned t = 0; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - model.AddConstr(makespan >= finish_times[processor][static_cast(t)]); } } // t = 0 for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr; for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - - if(compute_exists[node][processor][0]) + if (compute_exists[node][processor][0]) { expr += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][0]; + } - if(send_up_exists[node][processor][0]) - expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][0]; + if (send_up_exists[node][processor][0]) { + expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) + * send_up[node][processor][0]; + } - if(send_down_exists[node][processor][0]) - expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][0]; + if (send_down_exists[node][processor][0]) { + expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) + * send_down[node][processor][0]; + } } model.AddConstr(finish_times[processor][0] >= expr); } for (unsigned t = 1; t < max_time; t++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - Expr expr; for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - - if(compute_exists[node][processor][t]) + if (compute_exists[node][processor][t]) { expr += instance.getComputationalDag().vertex_work_weight(node) * compute[node][processor][static_cast(t)]; + } - if(send_up_exists[node][processor][t]) - expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) * send_up[node][processor][static_cast(t)]; + if (send_up_exists[node][processor][t]) { + expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) + * send_up[node][processor][static_cast(t)]; + } - if(send_down_exists[node][processor][t]) - expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) * send_down[node][processor][static_cast(t)]; + if (send_down_exists[node][processor][t]) { + expr += instance.communicationCosts() * instance.getComputationalDag().vertex_comm_weight(node) + * send_down[node][processor][static_cast(t)]; + } } - model.AddConstr(finish_times[processor][static_cast(t)] >= finish_times[processor][static_cast(t) - 1] + expr); + model.AddConstr(finish_times[processor][static_cast(t)] + >= finish_times[processor][static_cast(t) - 1] + expr); } } @@ -1040,15 +1058,11 @@ void MultiProcessorPebbling::setupAsyncVariablesConstraintsObjective(co model.SetObjective(makespan, COPT_MINIMIZE); } -template +template void MultiProcessorPebbling::WriteSolutionCallback::callback() { - if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { - try { - if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { - best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); // auto sched = constructBspScheduleFromCallback(); @@ -1059,94 +1073,105 @@ void MultiProcessorPebbling::WriteSolutionCallback::callback() { counter++; } - } catch (const std::exception &e) { - } + } catch (const std::exception &e) {} } }; -template -void MultiProcessorPebbling::constructPebblingScheduleFromSolution(PebblingSchedule &schedule) -{ - const BspInstance& instance = schedule.getInstance(); +template +void MultiProcessorPebbling::constructPebblingScheduleFromSolution(PebblingSchedule &schedule) { + const BspInstance &instance = schedule.getInstance(); - std::vector > > > nodes_computed(instance.numberOfProcessors(), std::vector > >(max_time)); - std::vector > > nodes_sent_up(instance.numberOfProcessors(), std::vector >(max_time)); - std::vector > > nodes_sent_down(instance.numberOfProcessors(), std::vector >(max_time)); - std::vector > > evicted_after(instance.numberOfProcessors(), std::vector >(max_time)); + std::vector>>> nodes_computed( + instance.numberOfProcessors(), std::vector>>(max_time)); + std::vector>> nodes_sent_up(instance.numberOfProcessors(), + std::vector>(max_time)); + std::vector>> nodes_sent_down(instance.numberOfProcessors(), + std::vector>(max_time)); + std::vector>> evicted_after(instance.numberOfProcessors(), + std::vector>(max_time)); // used to remove unneeded steps when a node is sent down and then up (which becomes invalid after reordering the comm phases) - std::vector > sent_down_already(instance.numberOfVertices(), std::vector(instance.numberOfProcessors(), false)); - std::vector > ignore_red(instance.numberOfVertices(), std::vector(instance.numberOfProcessors(), false)); + std::vector> sent_down_already(instance.numberOfVertices(), + std::vector(instance.numberOfProcessors(), false)); + std::vector> ignore_red(instance.numberOfVertices(), std::vector(instance.numberOfProcessors(), false)); std::vector topOrder = GetTopOrder(instance.getComputationalDag()); std::vector topOrderPosition(instance.numberOfVertices()); - for(unsigned index = 0; index < instance.numberOfVertices(); ++index) + for (unsigned index = 0; index < instance.numberOfVertices(); ++index) { topOrderPosition[topOrder[index]] = index; + } std::vector empty_step(max_time, true); - std::vector > step_type_on_proc(instance.numberOfProcessors(), std::vector(max_time, 0)); - - for (unsigned step = 0; step < max_time; step++) - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - if (compute_exists[node][processor][step] && compute[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) - step_type_on_proc[processor][step] = 1; - + std::vector> step_type_on_proc(instance.numberOfProcessors(), std::vector(max_time, 0)); for (unsigned step = 0; step < max_time; step++) { - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (compute_exists[node][processor][step] + && compute[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { + step_type_on_proc[processor][step] = 1; + } + } + } + } - if (step>0 && has_red[node][processor][static_cast(step)-1].Get(COPT_DBLINFO_VALUE) >= .99 && has_red[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) <= .01 && !ignore_red[node][processor]) - { - for(size_t previous_step = step - 1; previous_step < step; --previous_step) - if(!nodes_computed[processor][previous_step].empty() || !nodes_sent_up[processor][previous_step].empty() || !nodes_sent_down[processor][previous_step].empty() || previous_step == 0) - { + for (unsigned step = 0; step < max_time; step++) { + for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (step > 0 && has_red[node][processor][static_cast(step) - 1].Get(COPT_DBLINFO_VALUE) >= .99 + && has_red[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) <= .01 + && !ignore_red[node][processor]) { + for (size_t previous_step = step - 1; previous_step < step; --previous_step) { + if (!nodes_computed[processor][previous_step].empty() || !nodes_sent_up[processor][previous_step].empty() + || !nodes_sent_down[processor][previous_step].empty() || previous_step == 0) { evicted_after[processor][previous_step].insert(node); empty_step[previous_step] = false; break; } + } } - - if (compute_exists[node][processor][step] && compute[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) - { + + if (compute_exists[node][processor][step] + && compute[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { nodes_computed[processor][step].emplace(topOrderPosition[node], node); empty_step[step] = false; ignore_red[node][processor] = false; - //implicit eviction in case of mergesteps - never having "has_red=1" - if(step + 1 < max_time && has_red[node][processor][static_cast(step)+1].Get(COPT_DBLINFO_VALUE) <= .01) + // implicit eviction in case of mergesteps - never having "has_red=1" + if (step + 1 < max_time && has_red[node][processor][static_cast(step) + 1].Get(COPT_DBLINFO_VALUE) <= .01) { evicted_after[processor][step].insert(node); + } } - if (send_down_exists[node][processor][step] && send_down[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) - { + if (send_down_exists[node][processor][step] + && send_down[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99) { bool keep_step = false; - for(unsigned next_step = step+1; next_step < max_time && has_red[node][processor][static_cast(next_step)].Get(COPT_DBLINFO_VALUE) >= .99 ; ++next_step) - if(step_type_on_proc[processor][next_step] == 1) - { + for (unsigned next_step = step + 1; + next_step < max_time + && has_red[node][processor][static_cast(next_step)].Get(COPT_DBLINFO_VALUE) >= .99; + ++next_step) { + if (step_type_on_proc[processor][next_step] == 1) { keep_step = true; break; } + } - if(keep_step) - { + if (keep_step) { nodes_sent_down[processor][step].push_back(node); empty_step[step] = false; step_type_on_proc[processor][step] = 3; ignore_red[node][processor] = false; - } - else + } else { ignore_red[node][processor] = true; + } sent_down_already[node][processor] = true; } - if (send_up_exists[node][processor][step] && send_up[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99 && !sent_down_already[node][processor]) - { + if (send_up_exists[node][processor][step] + && send_up[node][processor][static_cast(step)].Get(COPT_DBLINFO_VALUE) >= .99 + && !sent_down_already[node][processor]) { nodes_sent_up[processor][step].push_back(node); empty_step[step] = false; step_type_on_proc[processor][step] = 2; @@ -1156,60 +1181,57 @@ void MultiProcessorPebbling::constructPebblingScheduleFromSolution(Pebb } // components of the final PebblingSchedule - the first two dimensions are always processor and superstep - std::vector > > compute_steps_per_supstep(instance.numberOfProcessors()); - std::vector > > > nodes_evicted_after_compute(instance.numberOfProcessors()); - std::vector > > nodes_sent_up_in_supstep(instance.numberOfProcessors()); - std::vector > > nodes_sent_down_in_supstep(instance.numberOfProcessors()); - std::vector > > nodes_evicted_in_comm_phase(instance.numberOfProcessors()); + std::vector>> compute_steps_per_supstep(instance.numberOfProcessors()); + std::vector>>> nodes_evicted_after_compute(instance.numberOfProcessors()); + std::vector>> nodes_sent_up_in_supstep(instance.numberOfProcessors()); + std::vector>> nodes_sent_down_in_supstep(instance.numberOfProcessors()); + std::vector>> nodes_evicted_in_comm_phase(instance.numberOfProcessors()); // edge case: check if an extra superstep must be added in the beginning to evict values that are initially in cache bool needs_evict_step_in_beginning = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - for (unsigned step = 0; step < max_time; step++) - { - if(step_type_on_proc[proc][step] == 0 && !evicted_after[proc][step].empty()) - { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { + for (unsigned step = 0; step < max_time; step++) { + if (step_type_on_proc[proc][step] == 0 && !evicted_after[proc][step].empty()) { needs_evict_step_in_beginning = true; break; - } - else if(step_type_on_proc[proc][step]>0) + } else if (step_type_on_proc[proc][step] > 0) { break; + } } + } // create the actual PebblingSchedule - iterating over the steps unsigned superstepIndex = 0; - if(synchronous) - { + if (synchronous) { bool in_comm = true; superstepIndex = UINT_MAX; - if(needs_evict_step_in_beginning) - { + if (needs_evict_step_in_beginning) { // artificially insert comm step in beginning, if it would start with compute otherwise bool begins_with_compute = false; - for (unsigned step = 0; step < max_time; step++) - { + for (unsigned step = 0; step < max_time; step++) { bool is_comp = false, is_comm = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - { - if(step_type_on_proc[proc][step] == 1) + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { + if (step_type_on_proc[proc][step] == 1) { is_comp = true; - if(step_type_on_proc[proc][step] > 1) + } + if (step_type_on_proc[proc][step] > 1) { is_comm = true; + } } - if(is_comp) + if (is_comp) { begins_with_compute = true; - if(is_comp || is_comm) + } + if (is_comp || is_comm) { break; + } } - - if(begins_with_compute) - { + + if (begins_with_compute) { superstepIndex = 0; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { compute_steps_per_supstep[proc].push_back(std::vector()); - nodes_evicted_after_compute[proc].push_back(std::vector >()); + nodes_evicted_after_compute[proc].push_back(std::vector>()); nodes_sent_up_in_supstep[proc].push_back(std::vector()); nodes_sent_down_in_supstep[proc].push_back(std::vector()); nodes_evicted_in_comm_phase[proc].push_back(std::vector()); @@ -1218,23 +1240,21 @@ void MultiProcessorPebbling::constructPebblingScheduleFromSolution(Pebb } // process steps - for (unsigned step = 0; step < max_time; step++) - { - if(empty_step[step]) + for (unsigned step = 0; step < max_time; step++) { + if (empty_step[step]) { continue; + } unsigned step_type = 0; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { step_type = std::max(step_type, step_type_on_proc[proc][step]); + } - if (step_type == 1) - { - if(in_comm) - { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - { + if (step_type == 1) { + if (in_comm) { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { compute_steps_per_supstep[proc].push_back(std::vector()); - nodes_evicted_after_compute[proc].push_back(std::vector >()); + nodes_evicted_after_compute[proc].push_back(std::vector>()); nodes_sent_up_in_supstep[proc].push_back(std::vector()); nodes_sent_down_in_supstep[proc].push_back(std::vector()); nodes_evicted_in_comm_phase[proc].push_back(std::vector()); @@ -1242,34 +1262,27 @@ void MultiProcessorPebbling::constructPebblingScheduleFromSolution(Pebb ++superstepIndex; in_comm = false; } - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - { - for(auto index_and_node : nodes_computed[proc][step]) - { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { + for (auto index_and_node : nodes_computed[proc][step]) { compute_steps_per_supstep[proc][superstepIndex].push_back(index_and_node.second); nodes_evicted_after_compute[proc][superstepIndex].push_back(std::vector()); } - for(vertex_idx node : evicted_after[proc][step]) - { - if(!nodes_evicted_after_compute[proc][superstepIndex].empty()) + for (vertex_idx node : evicted_after[proc][step]) { + if (!nodes_evicted_after_compute[proc][superstepIndex].empty()) { nodes_evicted_after_compute[proc][superstepIndex].back().push_back(node); - else - { + } else { // can only happen in special case: eviction in the very beginning nodes_evicted_in_comm_phase[proc][0].push_back(node); } } } } - - if (step_type == 2 || step_type == 3) - { - if(superstepIndex == UINT_MAX) - { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - { + + if (step_type == 2 || step_type == 3) { + if (superstepIndex == UINT_MAX) { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { compute_steps_per_supstep[proc].push_back(std::vector()); - nodes_evicted_after_compute[proc].push_back(std::vector >()); + nodes_evicted_after_compute[proc].push_back(std::vector>()); nodes_sent_up_in_supstep[proc].push_back(std::vector()); nodes_sent_down_in_supstep[proc].push_back(std::vector()); nodes_evicted_in_comm_phase[proc].push_back(std::vector()); @@ -1278,91 +1291,93 @@ void MultiProcessorPebbling::constructPebblingScheduleFromSolution(Pebb } in_comm = true; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - { - for(vertex_idx node : nodes_sent_up[proc][step]) + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { + for (vertex_idx node : nodes_sent_up[proc][step]) { nodes_sent_up_in_supstep[proc][superstepIndex].push_back(node); - for(vertex_idx node : evicted_after[proc][step]) + } + for (vertex_idx node : evicted_after[proc][step]) { nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node); - for(vertex_idx node : nodes_sent_down[proc][step]) + } + for (vertex_idx node : nodes_sent_down[proc][step]) { nodes_sent_down_in_supstep[proc][superstepIndex].push_back(node); + } } } } - } - else - { + } else { std::vector step_idx_on_proc(instance.numberOfProcessors(), 0); std::vector already_has_blue(instance.numberOfVertices(), false); - if(need_to_load_inputs) - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - if(instance.getComputationalDag().in_degree(node) == 0) + if (need_to_load_inputs) { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (instance.getComputationalDag().in_degree(node) == 0) { already_has_blue[node] = true; + } + } + } std::vector proc_finished(instance.numberOfProcessors(), false); unsigned nr_proc_finished = 0; - while(nr_proc_finished < instance.numberOfProcessors()) - { + while (nr_proc_finished < instance.numberOfProcessors()) { // preliminary sweep of superstep, to see if we need to wait for other processors std::vector idx_limit_on_proc = step_idx_on_proc; // first add compute steps - if(!needs_evict_step_in_beginning || superstepIndex > 0) - { - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - while(idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] <= 1) + if (!needs_evict_step_in_beginning || superstepIndex > 0) { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { + while (idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] <= 1) { ++idx_limit_on_proc[proc]; + } + } } // then add communications step until possible (note - they might not be valid if all put into a single superstep!) std::set new_blues; bool still_making_progress = true; - while(still_making_progress) - { + while (still_making_progress) { still_making_progress = false; - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - while(idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] != 1) - { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { + while (idx_limit_on_proc[proc] < max_time && step_type_on_proc[proc][idx_limit_on_proc[proc]] != 1) { bool accept_step = true; - for(vertex_idx node : nodes_sent_down[proc][idx_limit_on_proc[proc]]) - if(!already_has_blue[node] && new_blues.find(node) == new_blues.end()) + for (vertex_idx node : nodes_sent_down[proc][idx_limit_on_proc[proc]]) { + if (!already_has_blue[node] && new_blues.find(node) == new_blues.end()) { accept_step = false; - - if(!accept_step) + } + } + + if (!accept_step) { break; + } - for(vertex_idx node : nodes_sent_up[proc][idx_limit_on_proc[proc]]) - if(!already_has_blue[node]) + for (vertex_idx node : nodes_sent_up[proc][idx_limit_on_proc[proc]]) { + if (!already_has_blue[node]) { new_blues.insert(node); - + } + } + still_making_progress = true; ++idx_limit_on_proc[proc]; - } + } + } } // actually process the superstep - for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) - { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); proc++) { compute_steps_per_supstep[proc].push_back(std::vector()); - nodes_evicted_after_compute[proc].push_back(std::vector >()); + nodes_evicted_after_compute[proc].push_back(std::vector>()); nodes_sent_up_in_supstep[proc].push_back(std::vector()); nodes_sent_down_in_supstep[proc].push_back(std::vector()); nodes_evicted_in_comm_phase[proc].push_back(std::vector()); - while(step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] <= 1) - { - for(auto index_and_node : nodes_computed[proc][step_idx_on_proc[proc]]) - { + while (step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] <= 1) { + for (auto index_and_node : nodes_computed[proc][step_idx_on_proc[proc]]) { compute_steps_per_supstep[proc][superstepIndex].push_back(index_and_node.second); nodes_evicted_after_compute[proc][superstepIndex].push_back(std::vector()); } - for(vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]]) - { - if(!nodes_evicted_after_compute[proc][superstepIndex].empty()) + for (vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]]) { + if (!nodes_evicted_after_compute[proc][superstepIndex].empty()) { nodes_evicted_after_compute[proc][superstepIndex].back().push_back(node); - else - { + } else { // can only happen in special case: eviction in the very beginning nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node); } @@ -1370,22 +1385,21 @@ void MultiProcessorPebbling::constructPebblingScheduleFromSolution(Pebb ++step_idx_on_proc[proc]; } - while(step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] != 1) - { - for(vertex_idx node : nodes_sent_up[proc][step_idx_on_proc[proc]]) - { + while (step_idx_on_proc[proc] < idx_limit_on_proc[proc] && step_type_on_proc[proc][step_idx_on_proc[proc]] != 1) { + for (vertex_idx node : nodes_sent_up[proc][step_idx_on_proc[proc]]) { nodes_sent_up_in_supstep[proc][superstepIndex].push_back(node); already_has_blue[node] = true; } - for(vertex_idx node : nodes_sent_down[proc][step_idx_on_proc[proc]]) + for (vertex_idx node : nodes_sent_down[proc][step_idx_on_proc[proc]]) { nodes_sent_down_in_supstep[proc][superstepIndex].push_back(node); - for(vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]]) + } + for (vertex_idx node : evicted_after[proc][step_idx_on_proc[proc]]) { nodes_evicted_in_comm_phase[proc][superstepIndex].push_back(node); + } ++step_idx_on_proc[proc]; } - if(step_idx_on_proc[proc] == max_time && !proc_finished[proc]) - { + if (step_idx_on_proc[proc] == max_time && !proc_finished[proc]) { proc_finished[proc] = true; ++nr_proc_finished; } @@ -1394,217 +1408,240 @@ void MultiProcessorPebbling::constructPebblingScheduleFromSolution(Pebb } } - std::cout<<"MPP ILP best solution value: "<(instance, compute_steps_per_supstep, nodes_evicted_after_compute, - nodes_sent_up_in_supstep, nodes_sent_down_in_supstep, nodes_evicted_in_comm_phase, needs_blue_at_end, has_red_in_beginning, need_to_load_inputs); + std::cout << "MPP ILP best solution value: " << model.GetDblAttr(COPT_DBLATTR_BESTOBJ) + << ", best lower bound: " << model.GetDblAttr(COPT_DBLATTR_BESTBND) << std::endl; + + schedule = PebblingSchedule(instance, + compute_steps_per_supstep, + nodes_evicted_after_compute, + nodes_sent_up_in_supstep, + nodes_sent_down_in_supstep, + nodes_evicted_in_comm_phase, + needs_blue_at_end, + has_red_in_beginning, + need_to_load_inputs); } -template -void MultiProcessorPebbling::setInitialSolution(const BspInstance &instance, - const std::vector > >& computeSteps, - const std::vector > >& sendUpSteps, - const std::vector > >& sendDownSteps, - const std::vector > >& nodesEvictedAfterStep) -{ +template +void MultiProcessorPebbling::setInitialSolution( + const BspInstance &instance, + const std::vector>> &computeSteps, + const std::vector>> &sendUpSteps, + const std::vector>> &sendDownSteps, + const std::vector>> &nodesEvictedAfterStep) { const unsigned N = static_cast(instance.numberOfVertices()); std::vector in_slow_mem(N, false); - if(need_to_load_inputs) - for(vertex_idx node=0; node < N; ++node) - if(instance.getComputationalDag().in_degree(node) == 0) + if (need_to_load_inputs) { + for (vertex_idx node = 0; node < N; ++node) { + if (instance.getComputationalDag().in_degree(node) == 0) { in_slow_mem[node] = true; + } + } + } - std::vector > in_fast_mem(N, std::vector(instance.numberOfProcessors(), false)); - if(!has_red_in_beginning.empty()) - for(unsigned proc=0; proc> in_fast_mem(N, std::vector(instance.numberOfProcessors(), false)); + if (!has_red_in_beginning.empty()) { + for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + for (vertex_idx node : has_red_in_beginning[proc]) { + in_fast_mem[node][proc] = true; + } + } + } unsigned step = 0, new_step_idx = 0; - for(; step < computeSteps[0].size(); ++step) - { - for(vertex_idx node=0; node < N; ++node) - { - if(has_blue_exists[node][new_step_idx]) + for (; step < computeSteps[0].size(); ++step) { + for (vertex_idx node = 0; node < N; ++node) { + if (has_blue_exists[node][new_step_idx]) { model.SetMipStart(has_blue[node][static_cast(new_step_idx)], static_cast(in_slow_mem[node])); - for(unsigned proc=0; proc(new_step_idx)], static_cast(in_fast_mem[node][proc])); + } } - if(restrict_step_types) - { + if (restrict_step_types) { // align step number with step type cycle's phase, if needed bool skip_step = true; - while(skip_step) - { + while (skip_step) { skip_step = false; bool is_compute = false, is_send_up = false, is_send_down = false; - for(unsigned proc=0; proc(new_step_idx)], static_cast(in_slow_mem[node])); - for(unsigned proc=0; proc(new_step_idx)], static_cast(in_fast_mem[node][proc])); + } + for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { + model.SetMipStart(has_red[node][proc][static_cast(new_step_idx)], + static_cast(in_fast_mem[node][proc])); + } } } } } - for(unsigned proc=0; proc value_of_node(N, false); - for(vertex_idx node : computeSteps[proc][step]) - { + for (vertex_idx node : computeSteps[proc][step]) { value_of_node[node] = true; - if(compute_exists[node][proc][new_step_idx]) + if (compute_exists[node][proc][new_step_idx]) { model.SetMipStart(compute[node][proc][static_cast(new_step_idx)], 1); + } in_fast_mem[node][proc] = true; } - for(vertex_idx node : computeSteps[proc][step]) - { - if(!value_of_node[node]) - { - if(compute_exists[node][proc][new_step_idx]) + for (vertex_idx node : computeSteps[proc][step]) { + if (!value_of_node[node]) { + if (compute_exists[node][proc][new_step_idx]) { model.SetMipStart(compute[node][proc][static_cast(new_step_idx)], 0); - } - else + } + } else { value_of_node[node] = false; + } } - for(vertex_idx node : sendUpSteps[proc][step]) - { + for (vertex_idx node : sendUpSteps[proc][step]) { value_of_node[node] = true; - if(send_up_exists[node][proc][new_step_idx]) + if (send_up_exists[node][proc][new_step_idx]) { model.SetMipStart(send_up[node][proc][static_cast(new_step_idx)], 1); + } in_slow_mem[node] = true; } - for(vertex_idx node : sendUpSteps[proc][step]) - { - if(!value_of_node[node]) - { - if(send_up_exists[node][proc][new_step_idx]) + for (vertex_idx node : sendUpSteps[proc][step]) { + if (!value_of_node[node]) { + if (send_up_exists[node][proc][new_step_idx]) { model.SetMipStart(send_up[node][proc][static_cast(new_step_idx)], 0); - } - else + } + } else { value_of_node[node] = false; + } } - for(vertex_idx node : sendDownSteps[proc][step]) - { + for (vertex_idx node : sendDownSteps[proc][step]) { value_of_node[node] = true; - if(send_down_exists[node][proc][new_step_idx]) + if (send_down_exists[node][proc][new_step_idx]) { model.SetMipStart(send_down[node][proc][static_cast(new_step_idx)], 1); + } in_fast_mem[node][proc] = true; } - for(vertex_idx node : sendDownSteps[proc][step]) - { - if(!value_of_node[node]) - { - if(send_down_exists[node][proc][new_step_idx]) + for (vertex_idx node : sendDownSteps[proc][step]) { + if (!value_of_node[node]) { + if (send_down_exists[node][proc][new_step_idx]) { model.SetMipStart(send_down[node][proc][static_cast(new_step_idx)], 0); - } - else + } + } else { value_of_node[node] = false; + } } - for(vertex_idx node : nodesEvictedAfterStep[proc][step]) + for (vertex_idx node : nodesEvictedAfterStep[proc][step]) { in_fast_mem[node][proc] = false; - + } } ++new_step_idx; } - for(; new_step_idx < max_time; ++new_step_idx) - { - for(vertex_idx node=0; node < N; ++node) - { - if(has_blue_exists[node][new_step_idx]) + for (; new_step_idx < max_time; ++new_step_idx) { + for (vertex_idx node = 0; node < N; ++node) { + if (has_blue_exists[node][new_step_idx]) { model.SetMipStart(has_blue[node][static_cast(new_step_idx)], static_cast(in_slow_mem[node])); - for(unsigned proc=0; proc < instance.numberOfProcessors(); ++proc) - { + } + for (unsigned proc = 0; proc < instance.numberOfProcessors(); ++proc) { model.SetMipStart(has_red[node][proc][static_cast(new_step_idx)], 0); - if(compute_exists[node][proc][new_step_idx]) + if (compute_exists[node][proc][new_step_idx]) { model.SetMipStart(compute[node][proc][static_cast(new_step_idx)], 0); - if(send_up_exists[node][proc][new_step_idx]) + } + if (send_up_exists[node][proc][new_step_idx]) { model.SetMipStart(send_up[node][proc][static_cast(new_step_idx)], 0); - if(send_down_exists[node][proc][new_step_idx]) + } + if (send_down_exists[node][proc][new_step_idx]) { model.SetMipStart(send_down[node][proc][static_cast(new_step_idx)], 0); + } } } } model.LoadMipStart(); } -template -unsigned MultiProcessorPebbling::computeMaxTimeForInitialSolution(const BspInstance &instance, - const std::vector > >& computeSteps, - const std::vector > >& sendUpSteps, - const std::vector > >& sendDownSteps) const -{ - if(!restrict_step_types) +template +unsigned MultiProcessorPebbling::computeMaxTimeForInitialSolution( + const BspInstance &instance, + const std::vector>> &computeSteps, + const std::vector>> &sendUpSteps, + const std::vector>> &sendDownSteps) const { + if (!restrict_step_types) { return static_cast(computeSteps[0].size()) + 3; - + } + unsigned step = 0, new_step_idx = 0; - for(; step < computeSteps[0].size(); ++step) - { + for (; step < computeSteps[0].size(); ++step) { // align step number with step type cycle's phase, if needed bool skip_step = true; - while(skip_step) - { + while (skip_step) { skip_step = false; bool is_compute = false, is_send_up = false, is_send_down = false; - for(unsigned proc=0; proc::computeMaxTimeForInitialSolution(const return new_step_idx; } -template -bool MultiProcessorPebbling::hasEmptyStep(const BspInstance &instance) -{ - for (unsigned step = 0; step < max_time; ++step) - { +template +bool MultiProcessorPebbling::hasEmptyStep(const BspInstance &instance) { + for (unsigned step = 0; step < max_time; ++step) { bool empty = true; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) - { - if((compute_exists[node][processor][step] && compute[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) || - (send_up_exists[node][processor][step] && send_up[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) || - (send_down_exists[node][processor][step] && send_down[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99 )) + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + for (unsigned processor = 0; processor < instance.numberOfProcessors(); processor++) { + if ((compute_exists[node][processor][step] && compute[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) + || (send_up_exists[node][processor][step] && send_up[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99) + || (send_down_exists[node][processor][step] + && send_down[node][processor][step].Get(COPT_DBLINFO_VALUE) >= .99)) { empty = false; + } } - if(empty) + } + if (empty) { return true; + } } return false; } -} \ No newline at end of file +} // namespace osp diff --git a/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp b/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp index 1ac8561b..0544eaab 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp @@ -13,27 +13,27 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once -#include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/bsp/model/BspInstance.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" +#include "osp/bsp/scheduler/Scheduler.hpp" +#include "osp/graph_algorithms/subgraph_algorithms.hpp" #include "osp/pebbling/PebblingSchedule.hpp" +#include "osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp" #include "osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp" #include "osp/pebbling/pebblers/pebblingILP/partialILP/SubproblemMultiScheduling.hpp" -#include "osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/graph_algorithms/subgraph_algorithms.hpp" -namespace osp{ +namespace osp { -template +template class PebblingPartialILP : public Scheduler { - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); - static_assert(std::is_same_v, v_commw_t>, "PebblingSchedule requires work and comm. weights to have the same type."); + static_assert(std::is_same_v, v_commw_t>, + "PebblingSchedule requires work and comm. weights to have the same type."); using vertex_idx = vertex_idx_t; using cost_type = v_workw_t; @@ -67,21 +67,32 @@ class PebblingPartialILP : public Scheduler { // getters and setters for problem parameters inline std::pair getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); } - inline void setMinSize(const unsigned min_size) {minPartitionSize = min_size; maxPartitionSize = 2*min_size; } - inline void setMinAndMaxSize(const std::pair min_and_max) {minPartitionSize = min_and_max.first; maxPartitionSize = min_and_max.second; } - inline void setAsync(const bool async_) {asynchronous = async_; } - inline void setSecondsForSubILP(const unsigned seconds_) {time_seconds_for_subILPs = seconds_; } - inline void setVerbose(const bool verbose_) {verbose = verbose_; } -}; -template -RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule &schedule){ + inline void setMinSize(const unsigned min_size) { + minPartitionSize = min_size; + maxPartitionSize = 2 * min_size; + } + + inline void setMinAndMaxSize(const std::pair min_and_max) { + minPartitionSize = min_and_max.first; + maxPartitionSize = min_and_max.second; + } + + inline void setAsync(const bool async_) { asynchronous = async_; } - const BspInstance& instance = schedule.getInstance(); + inline void setSecondsForSubILP(const unsigned seconds_) { time_seconds_for_subILPs = seconds_; } - if(!PebblingSchedule::hasValidSolution(instance)) + inline void setVerbose(const bool verbose_) { verbose = verbose_; } +}; + +template +RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule &schedule) { + const BspInstance &instance = schedule.getInstance(); + + if (!PebblingSchedule::hasValidSolution(instance)) { return RETURN_STATUS::ERROR; - + } + // STEP 1: divide DAG acyclicly with partitioning ILP AcyclicDagDivider dag_divider; @@ -94,130 +105,141 @@ RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule contracted_instance(contracted_dag, instance.getArchitecture(), instance.getNodeProcessorCompatibilityMatrix()); + BspInstance contracted_instance( + contracted_dag, instance.getArchitecture(), instance.getNodeProcessorCompatibilityMatrix()); SubproblemMultiScheduling multi_scheduler; - std::vector > processors_to_parts_and_types; + std::vector> processors_to_parts_and_types; multi_scheduler.computeMultiSchedule(contracted_instance, processors_to_parts_and_types); - std::vector > processors_to_parts(nr_parts); - for(unsigned part = 0; part < nr_parts; ++part) - for(unsigned type = 0; type < instance.getComputationalDag().num_vertex_types(); ++type) - if(part_and_nodetype_to_new_index.find({part, type}) != part_and_nodetype_to_new_index.end()) - { + std::vector> processors_to_parts(nr_parts); + for (unsigned part = 0; part < nr_parts; ++part) { + for (unsigned type = 0; type < instance.getComputationalDag().num_vertex_types(); ++type) { + if (part_and_nodetype_to_new_index.find({part, type}) != part_and_nodetype_to_new_index.end()) { unsigned new_index = part_and_nodetype_to_new_index[{part, type}]; - for(unsigned proc : processors_to_parts_and_types[new_index]) + for (unsigned proc : processors_to_parts_and_types[new_index]) { processors_to_parts[part].insert(proc); + } } + } + } // AUX: check for isomorphism // create set of nodes & external sources for all parts, and the nodes that need to have blue pebble at the end - std::vector > nodes_in_part(nr_parts), extra_sources(nr_parts); - std::vector > original_node_id(nr_parts); - std::vector > original_proc_id(nr_parts); - for(vertex_idx node = 0; node < instance.numberOfVertices(); ++node) - { - if(instance.getComputationalDag().in_degree(node) > 0) + std::vector> nodes_in_part(nr_parts), extra_sources(nr_parts); + std::vector> original_node_id(nr_parts); + std::vector> original_proc_id(nr_parts); + for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) { + if (instance.getComputationalDag().in_degree(node) > 0) { nodes_in_part[assignment_to_parts[node]].insert(node); - else + } else { extra_sources[assignment_to_parts[node]].insert(node); - for (const vertex_idx &pred : instance.getComputationalDag().parents(node)) - if(assignment_to_parts[node] != assignment_to_parts[pred]) + } + for (const vertex_idx &pred : instance.getComputationalDag().parents(node)) { + if (assignment_to_parts[node] != assignment_to_parts[pred]) { extra_sources[assignment_to_parts[node]].insert(pred); + } + } } std::vector subDags; - for(unsigned part = 0; part < nr_parts; ++part) - { + for (unsigned part = 0; part < nr_parts; ++part) { Graph_t dag; create_induced_subgraph(instance.getComputationalDag(), dag, nodes_in_part[part], extra_sources[part]); subDags.push_back(dag); - + // set source nodes to a new type, so that they are compatible with any processor unsigned artificial_type_for_sources = subDags.back().num_vertex_types(); - for(vertex_idx node_idx = 0; node_idx < extra_sources[part].size(); ++node_idx) + for (vertex_idx node_idx = 0; node_idx < extra_sources[part].size(); ++node_idx) { subDags.back().set_vertex_type(node_idx, artificial_type_for_sources); + } } std::vector isomorphicTo(nr_parts, UINT_MAX); - std::cout<<"Number of parts: "< proc_assigned_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); std::vector other_proc_assigned_per_type(instance.getArchitecture().getNumberOfProcessorTypes(), 0); - for(unsigned proc : processors_to_parts[part]) + for (unsigned proc : processors_to_parts[part]) { ++proc_assigned_per_type[instance.getArchitecture().processorType(proc)]; - for(unsigned proc : processors_to_parts[other_part]) + } + for (unsigned proc : processors_to_parts[other_part]) { ++other_proc_assigned_per_type[instance.getArchitecture().processorType(proc)]; + } - for(unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) - if(proc_assigned_per_type[proc_type] != other_proc_assigned_per_type[proc_type]) + for (unsigned proc_type = 0; proc_type < instance.getArchitecture().getNumberOfProcessorTypes(); ++proc_type) { + if (proc_assigned_per_type[proc_type] != other_proc_assigned_per_type[proc_type]) { isomorphic = false; - - if(!isomorphic) + } + } + + if (!isomorphic) { continue; + } isomorphicTo[other_part] = part; - std::cout<<"Part "< > in_fast_mem(instance.numberOfProcessors()); - std::vector > pebbling(nr_parts); - std::vector > subArch(nr_parts); - std::vector > subInstance(nr_parts); + std::vector> in_fast_mem(instance.numberOfProcessors()); + std::vector> pebbling(nr_parts); + std::vector> subArch(nr_parts); + std::vector> subInstance(nr_parts); // to handle the initial memory content for isomorphic parts - std::vector > > has_reds_in_beginning(nr_parts, std::vector >(instance.numberOfProcessors())); + std::vector>> has_reds_in_beginning( + nr_parts, std::vector>(instance.numberOfProcessors())); - for(unsigned part = 0; part < nr_parts; ++part) - { - std::cout<<"part "< local_id; vertex_idx node_idx = 0; - for(vertex_idx node : extra_sources[part]) - { + for (vertex_idx node : extra_sources[part]) { local_id[node] = node_idx; original_node_id[part][node_idx] = node; ++node_idx; } - for(vertex_idx node : nodes_in_part[part]) - { + for (vertex_idx node : nodes_in_part[part]) { local_id[node] = node_idx; original_node_id[part][node_idx] = node; ++node_idx; } - + std::set needs_blue_at_end; - for(vertex_idx node : nodes_in_part[part]) - { - for (const vertex_idx &succ : instance.getComputationalDag().children(node)) - if(assignment_to_parts[node] != assignment_to_parts[succ]) + for (vertex_idx node : nodes_in_part[part]) { + for (const vertex_idx &succ : instance.getComputationalDag().children(node)) { + if (assignment_to_parts[node] != assignment_to_parts[succ]) { needs_blue_at_end.insert(local_id[node]); - - if(instance.getComputationalDag().out_degree(node) == 0) + } + } + + if (instance.getComputationalDag().out_degree(node) == 0) { needs_blue_at_end.insert(local_id[node]); + } } // set up sub-architecture subArch[part].setNumberOfProcessors(static_cast(processors_to_parts[part].size())); unsigned proc_index = 0; - for(unsigned proc : processors_to_parts[part]) - { + for (unsigned proc : processors_to_parts[part]) { subArch[part].setProcessorType(proc_index, instance.getArchitecture().processorType(proc)); subArch[part].setMemoryBound(instance.getArchitecture().memoryBound(proc), proc_index); original_proc_id[part][proc_index] = proc; @@ -228,32 +250,30 @@ RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule > comp_matrix = instance.getNodeProcessorCompatibilityMatrix(); + std::vector> comp_matrix = instance.getNodeProcessorCompatibilityMatrix(); comp_matrix.emplace_back(instance.getArchitecture().getNumberOfProcessorTypes(), true); subInstance[part] = BspInstance(subDag, subArch[part], comp_matrix); - + // currently we only allow the input laoding scenario - the case where this is false is unmaintained/untested bool need_to_load_inputs = true; // keep in fast memory what's relevant, remove the rest - for(unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) - { + for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) { has_reds_in_beginning[part][proc].clear(); std::set new_content_fast_mem; - for(vertex_idx node : in_fast_mem[original_proc_id[part][proc]]) - if(local_id.find(node) != local_id.end()) - { + for (vertex_idx node : in_fast_mem[original_proc_id[part][proc]]) { + if (local_id.find(node) != local_id.end()) { has_reds_in_beginning[part][proc].insert(local_id[node]); new_content_fast_mem.insert(node); } + } in_fast_mem[original_proc_id[part][proc]] = new_content_fast_mem; } @@ -263,122 +283,124 @@ RETURN_STATUS PebblingPartialILP::computePebbling(PebblingSchedule greedy_scheduler; BspSchedule bsp_heuristic(subInstance[part]); greedy_scheduler.computeSchedule(bsp_heuristic); - + std::set extra_source_ids; - for(vertex_idx idx = 0; idx < extra_sources[part].size(); ++idx) + for (vertex_idx idx = 0; idx < extra_sources[part].size(); ++idx) { extra_source_ids.insert(idx); + } heuristic_pebbling.setNeedToLoadInputs(true); heuristic_pebbling.SetExternalSources(extra_source_ids); heuristic_pebbling.SetNeedsBlueAtEnd(needs_blue_at_end); heuristic_pebbling.SetHasRedInBeginning(has_reds_in_beginning[part]); - heuristic_pebbling.ConvertFromBsp(bsp_heuristic, PebblingSchedule::CACHE_EVICTION_STRATEGY::FORESIGHT); + heuristic_pebbling.ConvertFromBsp(bsp_heuristic, PebblingSchedule::CACHE_EVICTION_STRATEGY::FORESIGHT); heuristic_pebbling.removeEvictStepsFromEnd(); pebbling[part] = heuristic_pebbling; cost_type heuristicCost = asynchronous ? heuristic_pebbling.computeAsynchronousCost() : heuristic_pebbling.computeCost(); - if(!heuristic_pebbling.isValid()) - std::cout<<"ERROR: Pebbling heuristic INVALID!"< mpp; mpp.setVerbose(verbose); mpp.setTimeLimitSeconds(time_seconds_for_subILPs); - mpp.setMaxTime(2*maxPartitionSize); // just a heuristic choice, does not guarantee feasibility! + mpp.setMaxTime(2 * maxPartitionSize); // just a heuristic choice, does not guarantee feasibility! mpp.setNeedsBlueAtEnd(needs_blue_at_end); mpp.setNeedToLoadInputs(need_to_load_inputs); mpp.setHasRedInBeginning(has_reds_in_beginning[part]); PebblingSchedule pebblingILP(subInstance[part]); RETURN_STATUS status = mpp.computePebblingWithInitialSolution(heuristic_pebbling, pebblingILP, asynchronous); - if(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) - { - if(!pebblingILP.isValid()) - std::cout<<"ERROR: Pebbling ILP INVALID!"< > fast_mem_content_at_end = pebbling[part].getMemContentAtEnd(); - for(unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) - { + std::vector> fast_mem_content_at_end = pebbling[part].getMemContentAtEnd(); + for (unsigned proc = 0; proc < processors_to_parts[part].size(); ++proc) { in_fast_mem[original_proc_id[part][proc]].clear(); - for(vertex_idx node : fast_mem_content_at_end[proc]) + for (vertex_idx node : fast_mem_content_at_end[proc]) { in_fast_mem[original_proc_id[part][proc]].insert(original_node_id[part][node]); + } } + } else { + std::cout << "ILP found no solution; using greedy instead (cost = " << heuristicCost << ")." << std::endl; } - else - std::cout<<"ILP found no solution; using greedy instead (cost = "< -Graph_t PebblingPartialILP::contractByPartition(const BspInstance &instance, const std::vector &node_to_part_assignment) -{ +template +Graph_t PebblingPartialILP::contractByPartition(const BspInstance &instance, + const std::vector &node_to_part_assignment) { const auto &G = instance.getComputationalDag(); part_and_nodetype_to_new_index.clear(); unsigned nr_new_nodes = 0; - for(vertex_idx node = 0; node < instance.numberOfVertices(); ++node) - { - if(part_and_nodetype_to_new_index.find({node_to_part_assignment[node], G.vertex_type(node)}) == part_and_nodetype_to_new_index.end()) - { + for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) { + if (part_and_nodetype_to_new_index.find({node_to_part_assignment[node], G.vertex_type(node)}) + == part_and_nodetype_to_new_index.end()) { part_and_nodetype_to_new_index[{node_to_part_assignment[node], G.vertex_type(node)}] = nr_new_nodes; ++nr_new_nodes; } } Graph_t contracted; - for(vertex_idx node = 0; node < nr_new_nodes; ++node) - contracted.add_vertex(0,0,0); + for (vertex_idx node = 0; node < nr_new_nodes; ++node) { + contracted.add_vertex(0, 0, 0); + } - std::set > edges; + std::set> edges; - for(vertex_idx node = 0; node < instance.numberOfVertices(); ++node) - { + for (vertex_idx node = 0; node < instance.numberOfVertices(); ++node) { vertex_idx node_new_index = part_and_nodetype_to_new_index[{node_to_part_assignment[node], G.vertex_type(node)}]; - for (const vertex_idx &succ : instance.getComputationalDag().children(node)) - if(node_to_part_assignment[node] != node_to_part_assignment[succ]) + for (const vertex_idx &succ : instance.getComputationalDag().children(node)) { + if (node_to_part_assignment[node] != node_to_part_assignment[succ]) { edges.emplace(node_new_index, part_and_nodetype_to_new_index[{node_to_part_assignment[succ], G.vertex_type(succ)}]); + } + } - contracted.set_vertex_work_weight(node_new_index, contracted.vertex_work_weight(node_new_index) + G.vertex_work_weight(node)); - contracted.set_vertex_comm_weight(node_new_index, contracted.vertex_comm_weight(node_new_index) + G.vertex_comm_weight(node)); + contracted.set_vertex_work_weight(node_new_index, + contracted.vertex_work_weight(node_new_index) + G.vertex_work_weight(node)); + contracted.set_vertex_comm_weight(node_new_index, + contracted.vertex_comm_weight(node_new_index) + G.vertex_comm_weight(node)); contracted.set_vertex_mem_weight(node_new_index, contracted.vertex_mem_weight(node_new_index) + G.vertex_mem_weight(node)); contracted.set_vertex_type(node_new_index, G.vertex_type(node)); } - for(auto edge : edges) { - + for (auto edge : edges) { if constexpr (has_edge_weights_v) { contracted.add_edge(edge.first, edge.second, 1); } else { contracted.add_edge(edge.first, edge.second); } - } return contracted; } -template -RETURN_STATUS PebblingPartialILP::computeSchedule(BspSchedule&) { +template +RETURN_STATUS PebblingPartialILP::computeSchedule(BspSchedule &) { return RETURN_STATUS::ERROR; } -} \ No newline at end of file +} // namespace osp diff --git a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp index 0fb97201..f969be72 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicDagDivider.hpp @@ -13,7 +13,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once @@ -21,15 +21,14 @@ limitations under the License. #include "osp/bsp/model/BspInstance.hpp" #include "osp/concepts/computational_dag_concept.hpp" #include "osp/dag_divider/ConnectedComponentDivider.hpp" -#include "osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp" #include "osp/graph_algorithms/subgraph_algorithms.hpp" +#include "osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp" -namespace osp{ +namespace osp { -template +template class AcyclicDagDivider { - - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); + static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); protected: using vertex_idx = vertex_idx_t; @@ -37,9 +36,11 @@ class AcyclicDagDivider { unsigned minPartitionSize = 40, maxPartitionSize = 80; bool ignore_sources_in_size = true; - std::vector getTopologicalSplit(const Graph_t &G, std::pair min_and_max, const std::vector& is_original_source) const; + std::vector getTopologicalSplit(const Graph_t &G, + std::pair min_and_max, + const std::vector &is_original_source) const; - v_commw_t static getSplitCost(const Graph_t &G, const std::vector& node_to_part); + v_commw_t static getSplitCost(const Graph_t &G, const std::vector &node_to_part); public: AcyclicDagDivider() {} @@ -50,13 +51,17 @@ class AcyclicDagDivider { // getters and setters for problem parameters inline std::pair getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); } - inline void setMinAndMaxSize(const std::pair min_and_max) {minPartitionSize = min_and_max.first; maxPartitionSize = min_and_max.second; } - inline void setIgnoreSources(const bool ignore_) {ignore_sources_in_size = ignore_; } + + inline void setMinAndMaxSize(const std::pair min_and_max) { + minPartitionSize = min_and_max.first; + maxPartitionSize = min_and_max.second; + } + + inline void setIgnoreSources(const bool ignore_) { ignore_sources_in_size = ignore_; } }; -template -std::vector AcyclicDagDivider::computePartitioning(const BspInstance &instance) -{ +template +std::vector AcyclicDagDivider::computePartitioning(const BspInstance &instance) { const unsigned N = static_cast(instance.numberOfVertices()); // split to connected components first @@ -64,77 +69,69 @@ std::vector AcyclicDagDivider::computePartitioning(const BspI connected_comp.divide(instance.getComputationalDag()); std::vector subDags = connected_comp.get_sub_dags(); - std::vector > node_to_subdag_and_index(N); - std::vector > original_id(subDags.size()); - for(vertex_idx node = 0; node < N; ++node) - { + std::vector> node_to_subdag_and_index(N); + std::vector> original_id(subDags.size()); + for (vertex_idx node = 0; node < N; ++node) { node_to_subdag_and_index[node] = {connected_comp.get_component()[node], connected_comp.get_vertex_map()[node]}; original_id[connected_comp.get_component()[node]].push_back(node); } - + // TODO extend with splits at directed articulation points in future? // split components further with ILPs or heuristics - while(true) - { + while (true) { bool exists_too_large = false; std::vector dag_is_too_large(subDags.size(), false); std::vector dag_real_size(subDags.size(), 0); - for(unsigned idx = 0; idx < subDags.size(); ++idx) - { - const Graph_t& dag = subDags[idx]; - if(!ignore_sources_in_size) - { + for (unsigned idx = 0; idx < subDags.size(); ++idx) { + const Graph_t &dag = subDags[idx]; + if (!ignore_sources_in_size) { dag_real_size[idx] = static_cast(dag.num_vertices()); - if(dag.num_vertices() > maxPartitionSize) - { + if (dag.num_vertices() > maxPartitionSize) { dag_is_too_large[idx] = true; exists_too_large = true; } + } else { + for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { + if (instance.getComputationalDag().in_degree(original_id[idx][local_ID]) > 0) { + ++dag_real_size[idx]; + } + } } - else - { - for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) - if(instance.getComputationalDag().in_degree(original_id[idx][local_ID]) > 0) - ++dag_real_size[idx]; - } - if(dag_real_size[idx] > maxPartitionSize) - { + if (dag_real_size[idx] > maxPartitionSize) { dag_is_too_large[idx] = true; exists_too_large = true; } - } - - if(!exists_too_large) + + if (!exists_too_large) { break; - - std::vector newDagList; - std::vector > original_id_updated; - - for(unsigned idx = 0; idx < subDags.size(); ++idx) - { - const Graph_t& dag = subDags[idx]; - if(!dag_is_too_large[idx]) - { - for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) + } + + std::vector newDagList; + std::vector> original_id_updated; + + for (unsigned idx = 0; idx < subDags.size(); ++idx) { + const Graph_t &dag = subDags[idx]; + if (!dag_is_too_large[idx]) { + for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { node_to_subdag_and_index[original_id[idx][local_ID]].first = static_cast(newDagList.size()); + } original_id_updated.push_back(original_id[idx]); newDagList.push_back(dag); - } - else - { + } else { std::vector ILP_assignment; - //unsigned newMin = dag_real_size[idx]/3, minPartitionSize); minimum condition removed - it can cause very strict bisections - unsigned newMin = dag_real_size[idx]/3; - unsigned newMax = dag_real_size[idx] - newMin; + // unsigned newMin = dag_real_size[idx]/3, minPartitionSize); minimum condition removed - it can cause very strict bisections + unsigned newMin = dag_real_size[idx] / 3; + unsigned newMax = dag_real_size[idx] - newMin; // mark the source nodes of the original DAG std::vector is_original_source(dag.num_vertices()); - for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) + for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { is_original_source[local_ID] = (instance.getComputationalDag().in_degree(original_id[idx][local_ID]) == 0); + } // heuristic splitting std::vector heuristic_assignment = getTopologicalSplit(dag, {newMin, newMax}, is_original_source); @@ -146,11 +143,12 @@ std::vector AcyclicDagDivider::computePartitioning(const BspI partitioner.setTimeLimitSeconds(120); partitioner.setMinAndMaxSize({newMin, newMax}); partitioner.setIsOriginalSource(is_original_source); - partitioner.setNumberOfParts(2); // note - if set to more than 2, ILP is MUCH more inefficient + partitioner.setNumberOfParts(2); // note - if set to more than 2, ILP is MUCH more inefficient BspInstance partial_instance(dag, instance.getArchitecture(), instance.getNodeProcessorCompatibilityMatrix()); RETURN_STATUS status = partitioner.computePartitioning(partial_instance, ILP_assignment); - if(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) + if (status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND) { ILPCost = getSplitCost(dag, ILP_assignment); + } std::vector assignment = ILPCost < heuristicCost ? ILP_assignment : heuristic_assignment; @@ -161,26 +159,27 @@ std::vector AcyclicDagDivider::computePartitioning(const BspI std::cout< node_idx_in_new_subDag(dag.num_vertices()); std::vector nr_nodes_in_new_subDag(splitDags.size(), 0); - for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) - { + for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { node_idx_in_new_subDag[local_ID] = nr_nodes_in_new_subDag[assignment[local_ID]]; ++nr_nodes_in_new_subDag[assignment[local_ID]]; } - - for(auto next_dag : splitDags) + + for (auto next_dag : splitDags) { original_id_updated.emplace_back(next_dag.num_vertices()); + } - for(vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) - { - node_to_subdag_and_index[original_id[idx][local_ID]] = {newDagList.size() + assignment[local_ID], node_idx_in_new_subDag[local_ID]}; - original_id_updated[newDagList.size() + assignment[local_ID]][node_idx_in_new_subDag[local_ID]] = original_id[idx][local_ID]; + for (vertex_idx local_ID = 0; local_ID < dag.num_vertices(); ++local_ID) { + node_to_subdag_and_index[original_id[idx][local_ID]] + = {newDagList.size() + assignment[local_ID], node_idx_in_new_subDag[local_ID]}; + original_id_updated[newDagList.size() + assignment[local_ID]][node_idx_in_new_subDag[local_ID]] + = original_id[idx][local_ID]; } - for(auto next_dag : splitDags) + for (auto next_dag : splitDags) { newDagList.push_back(next_dag); + } } } @@ -190,114 +189,121 @@ std::vector AcyclicDagDivider::computePartitioning(const BspI // output final cost std::vector final_assignment(N); - for(vertex_idx node = 0; node < N; ++node) + for (vertex_idx node = 0; node < N; ++node) { final_assignment[node] = node_to_subdag_and_index[node].first; - std::cout<<"Final cut cost of acyclic DAG divider is "< -std::vector AcyclicDagDivider::getTopologicalSplit(const Graph_t &G, std::pair min_and_max, const std::vector& is_original_source) const -{ +template +std::vector AcyclicDagDivider::getTopologicalSplit(const Graph_t &G, + std::pair min_and_max, + const std::vector &is_original_source) const { std::vector node_to_part(G.num_vertices()); std::vector top_order = GetTopOrder(G); std::vector top_order_idx(G.num_vertices()); - for(unsigned idx = 0; idx < G.num_vertices(); ++idx) + for (unsigned idx = 0; idx < G.num_vertices(); ++idx) { top_order_idx[top_order[idx]] = idx; + } std::vector last_node_idx_in_hyperedge(G.num_vertices()); - for(unsigned node = 0; node < G.num_vertices(); ++node) - { + for (unsigned node = 0; node < G.num_vertices(); ++node) { last_node_idx_in_hyperedge[node] = top_order_idx[node]; - for (const auto &succ : G.children(node)) + for (const auto &succ : G.children(node)) { last_node_idx_in_hyperedge[node] = std::max(last_node_idx_in_hyperedge[node], top_order_idx[succ]); + } } unsigned index = 0; unsigned current_part_id = 0; unsigned nodes_remaining = static_cast(G.num_vertices()); - if(ignore_sources_in_size) - { + if (ignore_sources_in_size) { nodes_remaining = 0; - for(unsigned node = 0; node < G.num_vertices(); ++node) - if(!is_original_source[node]) + for (unsigned node = 0; node < G.num_vertices(); ++node) { + if (!is_original_source[node]) { ++nodes_remaining; + } + } } - while(nodes_remaining > min_and_max.second) - { + while (nodes_remaining > min_and_max.second) { unsigned best_cost = UINT_MAX; unsigned best_end = index; unsigned end; unsigned newly_added_nodes = 0; - for(end = index + 1; index < G.num_vertices() && newly_added_nodes < min_and_max.first; ++end) - if(!ignore_sources_in_size || !is_original_source[end]) + for (end = index + 1; index < G.num_vertices() && newly_added_nodes < min_and_max.first; ++end) { + if (!ignore_sources_in_size || !is_original_source[end]) { ++newly_added_nodes; + } + } - while(end < G.num_vertices() && newly_added_nodes < min_and_max.second) - { + while (end < G.num_vertices() && newly_added_nodes < min_and_max.second) { unsigned extra_cost = 0; // check the extra cut cost of the potential endpoint - for(unsigned top_order_pos = index; top_order_pos <= end; ++top_order_pos) - { + for (unsigned top_order_pos = index; top_order_pos <= end; ++top_order_pos) { vertex_idx node = top_order[top_order_pos]; - if(last_node_idx_in_hyperedge[node] > end) + if (last_node_idx_in_hyperedge[node] > end) { extra_cost += G.vertex_comm_weight(node); - - for (const auto &pred : G.parents(node)) - if(last_node_idx_in_hyperedge[pred] > end) - extra_cost += G.vertex_comm_weight(pred); + } + + for (const auto &pred : G.parents(node)) { + if (last_node_idx_in_hyperedge[pred] > end) { + extra_cost += G.vertex_comm_weight(pred); + } + } } - if(extra_cost < best_cost) - { + if (extra_cost < best_cost) { best_cost = extra_cost; best_end = end; } ++end; - if(!ignore_sources_in_size || !is_original_source[end]) + if (!ignore_sources_in_size || !is_original_source[end]) { ++newly_added_nodes; + } } - for(vertex_idx idx = index; idx <= best_end; ++idx) - { + for (vertex_idx idx = index; idx <= best_end; ++idx) { node_to_part[top_order[idx]] = current_part_id; - if(!ignore_sources_in_size || !is_original_source[idx]) + if (!ignore_sources_in_size || !is_original_source[idx]) { --nodes_remaining; + } } index = best_end + 1; ++current_part_id; } // remaining nodes go into last part - for(vertex_idx idx = index; idx < G.num_vertices(); ++idx) + for (vertex_idx idx = index; idx < G.num_vertices(); ++idx) { node_to_part[top_order[idx]] = current_part_id; + } return node_to_part; } -template -v_commw_t AcyclicDagDivider::getSplitCost(const Graph_t &G, const std::vector& node_to_part) -{ +template +v_commw_t AcyclicDagDivider::getSplitCost(const Graph_t &G, const std::vector &node_to_part) { v_commw_t cost = 0; - for(vertex_idx node = 0; node < G.num_vertices(); ++node) - { + for (vertex_idx node = 0; node < G.num_vertices(); ++node) { std::set parts_included; parts_included.insert(node_to_part[node]); - for (const auto &succ : G.children(node)) + for (const auto &succ : G.children(node)) { parts_included.insert(node_to_part[succ]); - - cost += static_cast>(parts_included.size() -1) * G.vertex_comm_weight(node); + } + + cost += static_cast>(parts_included.size() - 1) * G.vertex_comm_weight(node); } return cost; } -} \ No newline at end of file +} // namespace osp diff --git a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp index de849a0b..2faaeb81 100644 --- a/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp +++ b/include/osp/pebbling/pebblers/pebblingILP/partialILP/AcyclicPartitioningILP.hpp @@ -13,21 +13,20 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. -@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner +@author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ #pragma once -#include "osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp" -#include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/bsp/model/BspInstance.hpp" +#include "osp/bsp/scheduler/Scheduler.hpp" +#include "osp/pebbling/pebblers/pebblingILP/COPTEnv.hpp" -namespace osp{ +namespace osp { -template +template class AcyclicPartitioningILP { - - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); + static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); private: using vertex_idx = vertex_idx_t; @@ -39,7 +38,6 @@ class AcyclicPartitioningILP { bool ignore_sources_for_constraint = true; class WriteSolutionCallback : public CallbackBase { - private: unsigned counter; unsigned max_number_solution; @@ -48,14 +46,16 @@ class AcyclicPartitioningILP { public: WriteSolutionCallback() - : counter(0), max_number_solution(500), best_obj(COPT_INFINITY), write_solutions_path_cb(""), + : counter(0), + max_number_solution(500), + best_obj(COPT_INFINITY), + write_solutions_path_cb(""), solution_file_prefix_cb("") {} std::string write_solutions_path_cb; std::string solution_file_prefix_cb; void callback() override; - }; WriteSolutionCallback solution_callback; @@ -79,12 +79,11 @@ class AcyclicPartitioningILP { void solveILP(); public: - AcyclicPartitioningILP() - : model(COPTEnv::getInstance().CreateModel("AsyncPart")), write_solutions_found(false) {} + AcyclicPartitioningILP() : model(COPTEnv::getInstance().CreateModel("AsyncPart")), write_solutions_found(false) {} virtual ~AcyclicPartitioningILP() = default; - RETURN_STATUS computePartitioning(const BspInstance &instance, std::vector& partitioning); + RETURN_STATUS computePartitioning(const BspInstance &instance, std::vector &partitioning); /** * @brief Enables writing intermediate solutions. @@ -132,7 +131,7 @@ class AcyclicPartitioningILP { * @return The best bound found by the solver. */ inline double bestBound() { return model.GetDblAttr(COPT_DBLATTR_BESTBND); } - + /** * @brief Get the name of the schedule. * @@ -142,18 +141,25 @@ class AcyclicPartitioningILP { // getters and setters for problem parameters inline std::pair getMinAndMaxSize() const { return std::make_pair(minPartitionSize, maxPartitionSize); } - inline void setMinAndMaxSize(const std::pair min_and_max) {minPartitionSize = min_and_max.first; maxPartitionSize = min_and_max.second; } + + inline void setMinAndMaxSize(const std::pair min_and_max) { + minPartitionSize = min_and_max.first; + maxPartitionSize = min_and_max.second; + } inline unsigned getNumberOfParts() const { return numberOfParts; } - inline void setNumberOfParts(const unsigned number_of_parts) {numberOfParts = number_of_parts; } - inline void setIgnoreSourceForConstraint(const bool ignore_) {ignore_sources_for_constraint = ignore_; } - inline void setIsOriginalSource(const std::vector& is_original_source_) {is_original_source = is_original_source_; } + + inline void setNumberOfParts(const unsigned number_of_parts) { numberOfParts = number_of_parts; } + + inline void setIgnoreSourceForConstraint(const bool ignore_) { ignore_sources_for_constraint = ignore_; } + + inline void setIsOriginalSource(const std::vector &is_original_source_) { is_original_source = is_original_source_; } + void setTimeLimitSeconds(unsigned time_limit_seconds_) { time_limit_seconds = time_limit_seconds_; } }; -template +template void AcyclicPartitioningILP::solveILP() { - model.SetIntParam(COPT_INTPARAM_LOGTOCONSOLE, 0); model.SetDblParam(COPT_DBLPARAM_TIMELIMIT, time_limit_seconds); @@ -172,15 +178,15 @@ void AcyclicPartitioningILP::solveILP() { model.Solve(); } -template -RETURN_STATUS AcyclicPartitioningILP::computePartitioning(const BspInstance &instance, std::vector& partitioning) -{ +template +RETURN_STATUS AcyclicPartitioningILP::computePartitioning(const BspInstance &instance, + std::vector &partitioning) { partitioning.clear(); - if(numberOfParts == 0) - { - numberOfParts = static_cast(std::floor(static_cast(instance.numberOfVertices()) / static_cast(minPartitionSize))); - std::cout<<"ILP nr parts: "<( + std::floor(static_cast(instance.numberOfVertices()) / static_cast(minPartitionSize))); + std::cout << "ILP nr parts: " << numberOfParts << std::endl; } setupVariablesConstraintsObjective(instance); @@ -188,19 +194,15 @@ RETURN_STATUS AcyclicPartitioningILP::computePartitioning(const BspInst solveILP(); if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_OPTIMAL) { - partitioning = returnAssignment(instance); return RETURN_STATUS::OSP_SUCCESS; } else if (model.GetIntAttr(COPT_INTATTR_MIPSTATUS) == COPT_MIPSTATUS_INF_OR_UNB) { - partitioning.resize(instance.numberOfVertices(), UINT_MAX); return RETURN_STATUS::ERROR; } else { - if (model.GetIntAttr(COPT_INTATTR_HASMIPSOL)) { - partitioning = returnAssignment(instance); return RETURN_STATUS::OSP_SUCCESS; @@ -211,140 +213,148 @@ RETURN_STATUS AcyclicPartitioningILP::computePartitioning(const BspInst } } -template +template void AcyclicPartitioningILP::setupVariablesConstraintsObjective(const BspInstance &instance) { - // Variables node_in_partition = std::vector(instance.numberOfVertices()); - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { node_in_partition[node] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "node_in_partition"); + } - std::map node_to_hyperedge_index; unsigned numberOfHyperedges = 0; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - if(instance.getComputationalDag().out_degree(node) > 0) - { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (instance.getComputationalDag().out_degree(node) > 0) { node_to_hyperedge_index[node] = numberOfHyperedges; ++numberOfHyperedges; } + } hyperedge_intersects_partition = std::vector(numberOfHyperedges); - for (unsigned hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) - hyperedge_intersects_partition[hyperedge] = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "hyperedge_intersects_partition"); + for (unsigned hyperedge = 0; hyperedge < numberOfHyperedges; hyperedge++) { + hyperedge_intersects_partition[hyperedge] + = model.AddVars(static_cast(numberOfParts), COPT_BINARY, "hyperedge_intersects_partition"); + } // Constraints // each node assigned to exactly one partition for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { - Expr expr; for (unsigned part = 0; part < numberOfParts; part++) { - expr += node_in_partition[node][static_cast(part)]; } model.AddConstr(expr == 1); } // hyperedge indicators match node variables - for (unsigned part = 0; part < numberOfParts; part++) - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - { - if(instance.getComputationalDag().out_degree(node) == 0) + for (unsigned part = 0; part < numberOfParts; part++) { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (instance.getComputationalDag().out_degree(node) == 0) { continue; + } - model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)] >= node_in_partition[node][static_cast(part)]); - for (const auto &succ : instance.getComputationalDag().children(node)) - model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)] >= node_in_partition[succ][static_cast(part)]); + model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)] + >= node_in_partition[node][static_cast(part)]); + for (const auto &succ : instance.getComputationalDag().children(node)) { + model.AddConstr(hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)] + >= node_in_partition[succ][static_cast(part)]); + } } - + } + // partition size constraints - for (unsigned part = 0; part < numberOfParts; part++) - { + for (unsigned part = 0; part < numberOfParts; part++) { Expr expr; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - if(!ignore_sources_for_constraint || is_original_source.empty() || !is_original_source[node]) + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (!ignore_sources_for_constraint || is_original_source.empty() || !is_original_source[node]) { expr += node_in_partition[node][static_cast(part)]; + } + } model.AddConstr(expr <= maxPartitionSize); model.AddConstr(expr >= minPartitionSize); } // acyclicity constraints - for (unsigned from_part = 0; from_part < numberOfParts; from_part++) - for (unsigned to_part = 0; to_part < from_part; to_part++) - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - for (const auto &succ : instance.getComputationalDag().children(node)) - model.AddConstr(node_in_partition[node][static_cast(from_part)] + node_in_partition[succ][static_cast(to_part)] <= 1); - + for (unsigned from_part = 0; from_part < numberOfParts; from_part++) { + for (unsigned to_part = 0; to_part < from_part; to_part++) { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + for (const auto &succ : instance.getComputationalDag().children(node)) { + model.AddConstr(node_in_partition[node][static_cast(from_part)] + + node_in_partition[succ][static_cast(to_part)] + <= 1); + } + } + } + } // set objective Expr expr; - for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) - if(instance.getComputationalDag().out_degree(node) > 0) - { + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { + if (instance.getComputationalDag().out_degree(node) > 0) { expr -= instance.getComputationalDag().vertex_comm_weight(node); - for (unsigned part = 0; part < numberOfParts; part++) - expr += instance.getComputationalDag().vertex_comm_weight(node) * hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)]; + for (unsigned part = 0; part < numberOfParts; part++) { + expr += instance.getComputationalDag().vertex_comm_weight(node) + * hyperedge_intersects_partition[node_to_hyperedge_index[node]][static_cast(part)]; + } } + } model.SetObjective(expr, COPT_MINIMIZE); - }; -template +template void AcyclicPartitioningILP::WriteSolutionCallback::callback() { - if (Where() == COPT_CBCONTEXT_MIPSOL && counter < max_number_solution && GetIntInfo(COPT_CBINFO_HASINCUMBENT)) { - try { - if (GetDblInfo(COPT_CBINFO_BESTOBJ) < best_obj && 0.0 < GetDblInfo(COPT_CBINFO_BESTBND)) { - best_obj = GetDblInfo(COPT_CBINFO_BESTOBJ); counter++; } - } catch (const std::exception &e) { - } + } catch (const std::exception &e) {} } }; -template -std::vector AcyclicPartitioningILP::returnAssignment(const BspInstance &instance) -{ +template +std::vector AcyclicPartitioningILP::returnAssignment(const BspInstance &instance) { std::vector node_to_partition(instance.numberOfVertices(), UINT_MAX); std::set nonempty_partition_ids; - for (unsigned node = 0; node < instance.numberOfVertices(); node++) - for(unsigned part = 0; part < numberOfParts; part++) - if(node_in_partition[node][static_cast(part)].Get(COPT_DBLINFO_VALUE) >= .99) - { + for (unsigned node = 0; node < instance.numberOfVertices(); node++) { + for (unsigned part = 0; part < numberOfParts; part++) { + if (node_in_partition[node][static_cast(part)].Get(COPT_DBLINFO_VALUE) >= .99) { node_to_partition[node] = part; nonempty_partition_ids.insert(part); } + } + } + + for (unsigned chosen_partition : node_to_partition) { + if (chosen_partition == UINT_MAX) { + std::cout << "Error: partitioning returned by ILP seems incomplete!" << std::endl; + } + } - for(unsigned chosen_partition : node_to_partition) - if(chosen_partition == UINT_MAX) - std::cout<<"Error: partitioning returned by ILP seems incomplete!"< new_index; - for(unsigned part_index : nonempty_partition_ids) - { + for (unsigned part_index : nonempty_partition_ids) { new_index[part_index] = current_index; ++current_index; } - for(vertex_idx node = 0; node < instance.numberOfVertices(); node++) + for (vertex_idx node = 0; node < instance.numberOfVertices(); node++) { node_to_partition[node] = new_index[node_to_partition[node]]; + } - std::cout<<"Acyclic partitioning ILP best solution value: "< +template class SubproblemMultiScheduling : public Scheduler { - - static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); + static_assert(is_computational_dag_v, "PebblingSchedule can only be used with computational DAGs."); private: using vertex_idx = vertex_idx_t; @@ -35,7 +34,7 @@ class SubproblemMultiScheduling : public Scheduler { using workweight_type = v_workw_t; std::vector last_node_on_proc; - std::vector > proc_task_lists; + std::vector> proc_task_lists; std::vector longest_outgoing_path; public: @@ -43,17 +42,17 @@ class SubproblemMultiScheduling : public Scheduler { virtual ~SubproblemMultiScheduling() = default; - RETURN_STATUS computeMultiSchedule(const BspInstance &instance, std::vector >& processors_to_node); + RETURN_STATUS computeMultiSchedule(const BspInstance &instance, std::vector> &processors_to_node); - std::vector > makeAssignment(const BspInstance &instance, - const std::set > &nodes_available, - const std::set &procs_available) const; + std::vector> makeAssignment(const BspInstance &instance, + const std::set> &nodes_available, + const std::set &procs_available) const; std::vector static get_longest_path(const Graph_t &graph); // not used, only here for using scheduler class base functionality (status enums, timelimits, etc) RETURN_STATUS computeSchedule(BspSchedule &schedule) override; - + /** * @brief Get the name of the schedule. * @@ -61,13 +60,12 @@ class SubproblemMultiScheduling : public Scheduler { */ virtual std::string getScheduleName() const override { return "SubproblemMultiScheduling"; } - inline const std::vector >& getProcTaskLists() const { return proc_task_lists; } - + inline const std::vector> &getProcTaskLists() const { return proc_task_lists; } }; // currently duplicated from BSP locking scheduler's code -template -std::vector > SubproblemMultiScheduling::get_longest_path(const Graph_t &graph) { +template +std::vector> SubproblemMultiScheduling::get_longest_path(const Graph_t &graph) { std::vector longest_path(graph.num_vertices(), 0); std::vector top_order = GetTopOrder(graph); @@ -77,8 +75,9 @@ std::vector > SubproblemMultiScheduling::get_longest if (graph.out_degree(*r_iter) > 0) { workweight_type max = 0; for (const auto &child : graph.children(*r_iter)) { - if (max <= longest_path[child]) + if (max <= longest_path[child]) { max = longest_path[child]; + } } longest_path[*r_iter] += max; } @@ -87,9 +86,9 @@ std::vector > SubproblemMultiScheduling::get_longest return longest_path; } -template -RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const BspInstance &instance, std::vector >& processors_to_node) -{ +template +RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const BspInstance &instance, + std::vector> &processors_to_node) { const unsigned &N = static_cast(instance.numberOfVertices()); const unsigned &P = instance.numberOfProcessors(); const auto &G = instance.getComputationalDag(); @@ -105,7 +104,7 @@ RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const Bsp longest_outgoing_path = get_longest_path(G); - std::set > readySet; + std::set> readySet; std::vector nrPredecRemain(N); for (vertex_idx node = 0; node < N; node++) { @@ -116,8 +115,9 @@ RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const Bsp } std::set free_procs; - for(unsigned proc = 0; proc < P; ++proc) + for (unsigned proc = 0; proc < P; ++proc) { free_procs.insert(proc); + } std::vector node_finish_time(N, 0); @@ -125,35 +125,32 @@ RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const Bsp finishTimes.emplace(0, std::numeric_limits::max()); while (!readySet.empty() || !finishTimes.empty()) { - const double time = finishTimes.begin()->first; // Find new ready jobs - while (!finishTimes.empty() && fabs(finishTimes.begin()->first - time) < 0.0001 ) { - + while (!finishTimes.empty() && fabs(finishTimes.begin()->first - time) < 0.0001) { const vertex_idx node = finishTimes.begin()->second; finishTimes.erase(finishTimes.begin()); - if (node != std::numeric_limits::max()) - { - for (const vertex_idx &succ : G.children(node)) - { + if (node != std::numeric_limits::max()) { + for (const vertex_idx &succ : G.children(node)) { nrPredecRemain[succ]--; - if (nrPredecRemain[succ] == 0) + if (nrPredecRemain[succ] == 0) { readySet.emplace(-longest_outgoing_path[succ], succ); + } } - for(unsigned proc : processors_to_node[node]) + for (unsigned proc : processors_to_node[node]) { free_procs.insert(proc); + } } } // Assign new jobs to idle processors // first assign free processors to ready nodes - std::vector > new_assingments = makeAssignment(instance, readySet, free_procs); + std::vector> new_assingments = makeAssignment(instance, readySet, free_procs); - for(auto entry : new_assingments) - { + for (auto entry : new_assingments) { vertex_idx node = entry.first; unsigned proc = entry.second; @@ -168,92 +165,95 @@ RETURN_STATUS SubproblemMultiScheduling::computeMultiSchedule(const Bsp // assign remaining free processors to already started nodes, if it helps decltype(finishTimes.rbegin()) itr = finishTimes.rbegin(); - while(!free_procs.empty() && itr != finishTimes.rend()) - { + while (!free_procs.empty() && itr != finishTimes.rend()) { double last_finish_time = itr->first; decltype(finishTimes.rbegin()) itr_latest = itr; - std::set > possible_nodes; - while(itr_latest !=finishTimes.rend() && itr_latest->first + 0.0001 > last_finish_time) - { + std::set> possible_nodes; + while (itr_latest != finishTimes.rend() && itr_latest->first + 0.0001 > last_finish_time) { vertex_idx node = itr_latest->second; - double new_finish_time = time + static_cast(G.vertex_work_weight(node)) / (static_cast(processors_to_node[node].size()) + 1); - if(new_finish_time + 0.0001 < itr_latest->first) + double new_finish_time = time + + static_cast(G.vertex_work_weight(node)) + / (static_cast(processors_to_node[node].size()) + 1); + if (new_finish_time + 0.0001 < itr_latest->first) { possible_nodes.emplace(-longest_outgoing_path[node], node); - + } + ++itr_latest; } new_assingments = makeAssignment(instance, possible_nodes, free_procs); - for(auto entry : new_assingments) - { + for (auto entry : new_assingments) { vertex_idx node = entry.first; unsigned proc = entry.second; processors_to_node[node].insert(proc); proc_task_lists[proc].push_back(node); finishTimes.erase({node_finish_time[node], node}); - double new_finish_time = time + static_cast(G.vertex_work_weight(node)) / (static_cast(processors_to_node[node].size())); + double new_finish_time + = time + + static_cast(G.vertex_work_weight(node)) / (static_cast(processors_to_node[node].size())); finishTimes.emplace(new_finish_time, node); node_finish_time[node] = new_finish_time; last_node_on_proc[proc] = node; free_procs.erase(proc); } - if(new_assingments.empty()) + if (new_assingments.empty()) { itr = itr_latest; + } } - } return RETURN_STATUS::OSP_SUCCESS; } -template -std::vector, unsigned> > SubproblemMultiScheduling::makeAssignment(const BspInstance &instance, - const std::set > &nodes_available, - const std::set &procs_available) const -{ - std::vector > assignments; - if(nodes_available.empty() || procs_available.empty()) +template +std::vector, unsigned>> SubproblemMultiScheduling::makeAssignment( + const BspInstance &instance, + const std::set> &nodes_available, + const std::set &procs_available) const { + std::vector> assignments; + if (nodes_available.empty() || procs_available.empty()) { return assignments; + } std::set assigned_nodes; std::vector assigned_procs(instance.numberOfProcessors(), false); - for(unsigned proc : procs_available) - { - if(last_node_on_proc[proc] == UINT_MAX) + for (unsigned proc : procs_available) { + if (last_node_on_proc[proc] == UINT_MAX) { continue; + } - for (const auto &succ : instance.getComputationalDag().children(last_node_on_proc[proc])) - if(nodes_available.find({-longest_outgoing_path[succ], succ}) != nodes_available.end() && instance.isCompatible(succ, proc) - && assigned_nodes.find(succ) == assigned_nodes.end()) - { + for (const auto &succ : instance.getComputationalDag().children(last_node_on_proc[proc])) { + if (nodes_available.find({-longest_outgoing_path[succ], succ}) != nodes_available.end() + && instance.isCompatible(succ, proc) && assigned_nodes.find(succ) == assigned_nodes.end()) { assignments.emplace_back(succ, proc); assigned_nodes.insert(succ); assigned_procs[proc] = true; break; } + } } - - for(unsigned proc : procs_available) - if(!assigned_procs[proc]) - for(auto itr = nodes_available.begin(); itr != nodes_available.end(); ++itr) - { + + for (unsigned proc : procs_available) { + if (!assigned_procs[proc]) { + for (auto itr = nodes_available.begin(); itr != nodes_available.end(); ++itr) { vertex_idx node = itr->second; - if(instance.isCompatible(node, proc) && assigned_nodes.find(node) == assigned_nodes.end()) - { + if (instance.isCompatible(node, proc) && assigned_nodes.find(node) == assigned_nodes.end()) { assignments.emplace_back(node, proc); assigned_nodes.insert(node); break; } } + } + } return assignments; } -template +template RETURN_STATUS SubproblemMultiScheduling::computeSchedule(BspSchedule &) { return RETURN_STATUS::ERROR; } -} \ No newline at end of file +} // namespace osp diff --git a/tests/balanced_coin_flips.cpp b/tests/balanced_coin_flips.cpp index dd4a2e47..f30b64f0 100644 --- a/tests/balanced_coin_flips.cpp +++ b/tests/balanced_coin_flips.cpp @@ -17,12 +17,11 @@ limitations under the License. */ #define BOOST_TEST_MODULE Balanced_Coin_Flips -#include +#include "osp/auxiliary/Balanced_Coin_Flips.hpp" #include +#include #include -#include "osp/auxiliary/Balanced_Coin_Flips.hpp" - using namespace osp; @@ -30,17 +29,17 @@ bool thue_morse_gen(long unsigned int n) { // std::bitset bits(n); unsigned long int bin_sum = 0; while (n != 0) { - bin_sum += n%2; + bin_sum += n % 2; n /= 2; } - return bool(bin_sum%2); // (bits.count()%2); + return bool(bin_sum % 2); // (bits.count()%2); } BOOST_AUTO_TEST_CASE(Random_Biased_Coin) { - std::cout << "True: " << true << " False: " << false << std::endl; + std::cout << "True: " << true << " False: " << false << std::endl; Biased_Random Coin; std::cout << "Biased Coin: "; - for (int i = 0 ; i < 200 ; i++) { + for (int i = 0; i < 200; i++) { std::cout << Coin.get_flip(); } std::cout << std::endl << std::endl; @@ -49,25 +48,24 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin) { BOOST_AUTO_TEST_CASE(Thue__Morse) { Thue_Morse_Sequence Coin(0); - std::vector beginning({0,1,1,0,1,0,0,1,1,0,0,1,0,1,1,0,1,0,0,1,0,1,1,0,0,1,1,0,1}); + std::vector beginning({0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 1, 0, 1}); std::vector generated; - for (long unsigned i = 0 ; i(randInt(1048575)); Thue_Morse_Sequence Test_Coin_random(ind); BOOST_CHECK_EQUAL(Test_Coin_random.get_flip(), thue_morse_gen(ind)); @@ -75,27 +73,25 @@ BOOST_AUTO_TEST_CASE(Thue__Morse) { } } - BOOST_AUTO_TEST_CASE(Repeater_Coin) { Repeat_Chance Coin; std::cout << "Repeater Coin: "; - for (int i = 0 ; i < 200 ; i++) { + for (int i = 0; i < 200; i++) { std::cout << Coin.get_flip(); } std::cout << std::endl << std::endl; } BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_1) { - Biased_Random_with_side_bias Coin({1,1}); + Biased_Random_with_side_bias Coin({1, 1}); int true_count = 0; int false_count = 0; std::cout << "Biased Coin with side bias 1:1 : "; - for (int i = 0 ; i < 200 ; i++) { + for (int i = 0; i < 200; i++) { bool flip = Coin.get_flip(); if (flip) { true_count++; - } - else { + } else { false_count++; } std::cout << flip; @@ -106,16 +102,15 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_1) { } BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_0) { - Biased_Random_with_side_bias Coin({1,0}); + Biased_Random_with_side_bias Coin({1, 0}); int true_count = 0; int false_count = 0; std::cout << "Biased Coin with side bias 1:0 : "; - for (int i = 0 ; i < 200 ; i++) { + for (int i = 0; i < 200; i++) { bool flip = Coin.get_flip(); if (flip) { true_count++; - } - else { + } else { false_count++; } std::cout << flip; @@ -125,18 +120,16 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_1_0) { std::cout << std::endl; } - BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_0_1) { - Biased_Random_with_side_bias Coin({0,1}); + Biased_Random_with_side_bias Coin({0, 1}); int true_count = 0; int false_count = 0; std::cout << "Biased Coin with side bias 0:1 : "; - for (int i = 0 ; i < 200 ; i++) { + for (int i = 0; i < 200; i++) { bool flip = Coin.get_flip(); if (flip) { true_count++; - } - else { + } else { false_count++; } std::cout << flip; @@ -146,18 +139,16 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_0_1) { std::cout << std::endl; } - BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_2) { - Biased_Random_with_side_bias Coin({3,2}); + Biased_Random_with_side_bias Coin({3, 2}); int true_count = 0; int false_count = 0; std::cout << "Biased Coin with side bias 3:2 : "; - for (int i = 0 ; i < 200 ; i++) { + for (int i = 0; i < 200; i++) { bool flip = Coin.get_flip(); if (flip) { true_count++; - } - else { + } else { false_count++; } std::cout << flip; @@ -168,16 +159,15 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_2) { } BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_1) { - Biased_Random_with_side_bias Coin({3,1}); + Biased_Random_with_side_bias Coin({3, 1}); int true_count = 0; int false_count = 0; std::cout << "Biased Coin with side bias 3:1 : "; - for (int i = 0 ; i < 200 ; i++) { + for (int i = 0; i < 200; i++) { bool flip = Coin.get_flip(); if (flip) { true_count++; - } - else { + } else { false_count++; } std::cout << flip; @@ -185,4 +175,4 @@ BOOST_AUTO_TEST_CASE(Random_Biased_Coin_with_side_bias_3_1) { std::cout << std::endl; std::cout << "True count: " << true_count << " False count: " << false_count << std::endl; std::cout << std::endl; -} \ No newline at end of file +} diff --git a/tests/bit_mask.cpp b/tests/bit_mask.cpp index a80e45b4..5ba648b5 100644 --- a/tests/bit_mask.cpp +++ b/tests/bit_mask.cpp @@ -17,10 +17,10 @@ limitations under the License. */ #define BOOST_TEST_MODULE BitMasks -#include - #include "osp/auxiliary/datastructures/bit_mask.hpp" +#include + using namespace osp; BOOST_AUTO_TEST_CASE(BitMaskTest_1) { @@ -29,13 +29,12 @@ BOOST_AUTO_TEST_CASE(BitMaskTest_1) { for (unsigned i = 0; i < 25U; ++i) { for (std::size_t j = 0; j < num_flags; ++j) { - BOOST_CHECK_EQUAL( mask.mask[j], bool(i & (1U << j)) ); + BOOST_CHECK_EQUAL(mask.mask[j], bool(i & (1U << j))); } ++mask; } } - BOOST_AUTO_TEST_CASE(BitMaskTest_2) { const std::size_t num_flags = 6U; BitMask mask(num_flags); @@ -44,7 +43,7 @@ BOOST_AUTO_TEST_CASE(BitMaskTest_2) { BitMask tmp = mask; BitMask post = mask++; for (std::size_t j = 0; j < num_flags; ++j) { - BOOST_CHECK_EQUAL( tmp.mask[j], post.mask[j] ); + BOOST_CHECK_EQUAL(tmp.mask[j], post.mask[j]); } } } @@ -57,7 +56,7 @@ BOOST_AUTO_TEST_CASE(BitMaskTest_3) { BitMask tmp = mask++; ++tmp; for (std::size_t j = 0; j < num_flags; ++j) { - BOOST_CHECK_EQUAL( tmp.mask[j], mask.mask[j] ); + BOOST_CHECK_EQUAL(tmp.mask[j], mask.mask[j]); } } -} \ No newline at end of file +} diff --git a/tests/boost_graph_adaptor.cpp b/tests/boost_graph_adaptor.cpp index 5a1ea402..401cf216 100644 --- a/tests/boost_graph_adaptor.cpp +++ b/tests/boost_graph_adaptor.cpp @@ -30,7 +30,6 @@ limitations under the License. using namespace osp; boost_graph_int_t constr_graph_1() { - boost_graph_int_t graph; using vertex_idx = boost_graph_int_t::vertex_idx; @@ -78,14 +77,12 @@ boost_graph_int_t constr_graph_1() { } BOOST_AUTO_TEST_CASE(test_empty_dag_boost_graph_adapter) { - boost_graph_int_t graph; BOOST_CHECK_EQUAL(graph.num_edges(), 0); BOOST_CHECK_EQUAL(graph.num_vertices(), 0); } BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) { - boost_graph_int_t graph = constr_graph_1(); using vertex_idx = boost_graph_int_t::vertex_idx; @@ -95,7 +92,6 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) { size_t edge_idx = 0; for (const auto &edge : graph.edges()) { - BOOST_CHECK_EQUAL(source(edge, graph), edge_sources[edge_idx]); BOOST_CHECK_EQUAL(target(edge, graph), edge_targets[edge_idx]); edge_idx++; @@ -103,23 +99,38 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) { edge_idx = 0; for (const auto &edge : edges(graph)) { - BOOST_CHECK_EQUAL(source(edge, graph), edge_sources[edge_idx]); BOOST_CHECK_EQUAL(target(edge, graph), edge_targets[edge_idx]); edge_idx++; } - std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; - std::vector> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}}; - - std::vector> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {3, 4}}; + std::vector> out_neighbors{ + {1, 2, 3}, + {4, 6}, + {4, 5}, + {7}, + {7}, + {}, + {}, + {} + }; + + std::vector> in_neighbors{ + {}, + {0}, + {0}, + {0}, + {1, 2}, + {2}, + {1}, + {3, 4} + }; size_t idx = 0; for (const auto &v : graph.vertices()) { - BOOST_CHECK_EQUAL(v, vertices[idx++]); size_t i = 0; @@ -168,7 +179,6 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_adapter_1) { } BOOST_AUTO_TEST_CASE(test_util_1) { - const boost_graph_int_t graph = constr_graph_1(); BOOST_CHECK_EQUAL(graph.num_edges(), 9); @@ -242,7 +252,6 @@ BOOST_AUTO_TEST_CASE(test_util_1) { } BOOST_AUTO_TEST_CASE(test_constr_dag) { - boost_graph_int_t graph; graph.add_vertex(1, 2, 3); @@ -289,14 +298,12 @@ BOOST_AUTO_TEST_CASE(test_constr_dag) { } BOOST_AUTO_TEST_CASE(test_boost_graph_const_1) { - boost_graph_int_t graph(10u); BOOST_CHECK_EQUAL(graph.num_edges(), 0); BOOST_CHECK_EQUAL(graph.num_vertices(), 10); } BOOST_AUTO_TEST_CASE(test_boost_graph_const_2) { - boost_graph_int_t graph_1 = constr_graph_1(); boost_graph_int_t graph_copy(graph_1); @@ -360,4 +367,4 @@ BOOST_AUTO_TEST_CASE(test_boost_graph_const_2) { BOOST_CHECK_EQUAL(has_path(1, 2, graph_move_2), false); BOOST_CHECK_EQUAL(has_path(1, 3, graph_move_2), false); BOOST_CHECK_EQUAL(has_path(2, 1, graph_move_2), false); -} \ No newline at end of file +} diff --git a/tests/bsp_architecture.cpp b/tests/bsp_architecture.cpp index d803bb56..16e221bc 100644 --- a/tests/bsp_architecture.cpp +++ b/tests/bsp_architecture.cpp @@ -25,8 +25,12 @@ limitations under the License. using namespace osp; BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) { - - std::vector> uniform_sent_costs = {{0, 1, 1, 1}, {1, 0, 1, 1}, {1, 1, 0, 1}, {1, 1, 1, 0}}; + std::vector> uniform_sent_costs = { + {0, 1, 1, 1}, + {1, 0, 1, 1}, + {1, 1, 0, 1}, + {1, 1, 1, 0} + }; BspArchitecture architecture(4, 2, 3); BOOST_TEST(architecture.numberOfProcessors() == 4); @@ -63,7 +67,12 @@ BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) { BOOST_TEST(architecture.sendCost() == uniform_sent_costs); - std::vector> expectedSendCosts = {{0, 2, 2, 2}, {2, 0, 2, 2}, {2, 2, 0, 2}, {2, 2, 2, 0}}; + std::vector> expectedSendCosts = { + {0, 2, 2, 2}, + {2, 0, 2, 2}, + {2, 2, 0, 2}, + {2, 2, 2, 0} + }; architecture.SetSendCosts(expectedSendCosts); BOOST_TEST(architecture.sendCost() == expectedSendCosts); @@ -79,7 +88,6 @@ BOOST_AUTO_TEST_CASE(ParameterizedConstructorTest) { } BOOST_AUTO_TEST_CASE(Architecture) { - // default constructor BspArchitecture test; BOOST_CHECK_EQUAL(test.numberOfProcessors(), 2); @@ -141,7 +149,14 @@ BOOST_AUTO_TEST_CASE(Architecture) { } // constructor - std::vector> send_costs = {{0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {1, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}}; + std::vector> send_costs = { + {0, 1, 1, 1, 1, 1}, + {1, 0, 1, 1, 1, 1}, + {1, 1, 0, 1, 1, 1}, + {1, 1, 1, 0, 1, 1}, + {1, 1, 1, 1, 0, 1}, + {1, 1, 1, 1, 1, 0} + }; BOOST_CHECK_THROW(BspArchitecture test31(7, 42942, 0, send_costs), std::invalid_argument); @@ -168,8 +183,22 @@ BOOST_AUTO_TEST_CASE(Architecture) { } // constructor - std::vector> send_costs2 = {{0, 1, 2, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {1, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}}; - std::vector> send_costs3 = {{0, 1, 1, 1, 1, 1}, {1, 0, 1, 1, 1, 1}, {1, 1, 0, 1, 1, 1}, {3, 1, 1, 0, 1, 1}, {1, 1, 1, 1, 0, 1}, {1, 1, 1, 1, 1, 0}}; + std::vector> send_costs2 = { + {0, 1, 2, 1, 1, 1}, + {1, 0, 1, 1, 1, 1}, + {1, 1, 0, 1, 1, 1}, + {1, 1, 1, 0, 1, 1}, + {1, 1, 1, 1, 0, 1}, + {1, 1, 1, 1, 1, 0} + }; + std::vector> send_costs3 = { + {0, 1, 1, 1, 1, 1}, + {1, 0, 1, 1, 1, 1}, + {1, 1, 0, 1, 1, 1}, + {3, 1, 1, 0, 1, 1}, + {1, 1, 1, 1, 0, 1}, + {1, 1, 1, 1, 1, 0} + }; BspArchitecture test4(6, 0, 4294965, send_costs2); BOOST_CHECK_EQUAL(test4.numberOfProcessors(), 6); diff --git a/tests/bsp_greedy_recomputer.cpp b/tests/bsp_greedy_recomputer.cpp index d411a2b5..cd3ab446 100644 --- a/tests/bsp_greedy_recomputer.cpp +++ b/tests/bsp_greedy_recomputer.cpp @@ -18,18 +18,17 @@ limitations under the License. #define BOOST_TEST_MODULE BSP_GREEDY_RECOMPUTER #include +#include +#include +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyRecomputer.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include -#include using namespace osp; BOOST_AUTO_TEST_CASE(test_recomputer) { - using graph = computational_dag_vector_impl_def_t; BspInstance instance1; @@ -59,8 +58,8 @@ BOOST_AUTO_TEST_CASE(test_recomputer) { scheduler.computeRecompSchedule(schedule_init_cs1, schedule); BOOST_CHECK(schedule.satisfiesConstraints()); BOOST_CHECK(schedule.computeCosts() < schedule_init_cs1.computeCosts()); - std::cout<<"Cost decrease by greedy recomp: "< "< " << schedule.computeCosts() + << std::endl; // non-toy instance @@ -77,8 +76,8 @@ BOOST_AUTO_TEST_CASE(test_recomputer) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance2.getComputationalDag()); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), + instance2.getComputationalDag()); BOOST_CHECK(status); @@ -92,5 +91,6 @@ BOOST_AUTO_TEST_CASE(test_recomputer) { scheduler.computeRecompSchedule(schedule_init_cs2, schedule); BOOST_CHECK(schedule.satisfiesConstraints()); BOOST_CHECK(schedule.computeCosts() < schedule_init_cs2.computeCosts()); - std::cout<<"Cost decrease by greedy recomp: "< "< " << schedule.computeCosts() + << std::endl; +} diff --git a/tests/bsp_improvementschedulers.cpp b/tests/bsp_improvementschedulers.cpp index e117bf2d..f1695297 100644 --- a/tests/bsp_improvementschedulers.cpp +++ b/tests/bsp_improvementschedulers.cpp @@ -18,14 +18,12 @@ limitations under the License. #define BOOST_TEST_MODULE BSP_IMPROVEMENTSCHEDULERS #include - #include #include #include #include "osp/bsp/scheduler/ImprovementScheduler.hpp" - // std::vector test_graphs() { // return {"data/spaa/small/instance_exp_N20_K4_nzP0d2.txt", "data/spaa/small/instance_kNN_N20_K5_nzP0d2.txt", // "data/spaa/small/instance_exp_N10_K8_nzP0d2.txt"}; //, "data/spaa/medium/instance_kNN_N50_K4_nzP0d18.txt"}; //, @@ -94,7 +92,6 @@ limitations under the License. // BspInstance instance(graph, architecture); - // RandomBadGreedy test0; // std::pair result0 = test0.computeSchedule(instance); @@ -107,7 +104,6 @@ limitations under the License. // BOOST_CHECK(result0.second.satisfiesPrecedenceConstraints()); // BOOST_CHECK(result0.second.hasValidCommSchedule()); - // BalDMixR test1; // std::pair result1 = test1.computeSchedule(instance); @@ -120,7 +116,6 @@ limitations under the License. // BOOST_CHECK(result1.second.satisfiesPrecedenceConstraints()); // BOOST_CHECK(result1.second.hasValidCommSchedule()); - // HDagg_simple test2; // std::pair result2 = test2.computeSchedule(instance); @@ -137,8 +132,6 @@ limitations under the License. // } // }; - - BOOST_AUTO_TEST_CASE(Hungarian_alg_process_permuter_test) { // Hungarian_alg_process_permuter test; // run_test(&test); @@ -151,12 +144,8 @@ BOOST_AUTO_TEST_CASE(Hungarian_alg_process_permuter_test) { // BOOST_AUTO_TEST_CASE(LKTotalCommScheduler_test) { // kl_total_comm test; - + // test.setTimeLimitSeconds(10); // test.set_compute_with_time_limit(true); // run_test(&test); // } - - - - diff --git a/tests/bsp_instance.cpp b/tests/bsp_instance.cpp index 101e4b2f..d87d8259 100644 --- a/tests/bsp_instance.cpp +++ b/tests/bsp_instance.cpp @@ -18,6 +18,8 @@ limitations under the License. #define BOOST_TEST_MODULE Bsp_Architecture #include +#include +#include #include "osp/auxiliary/io/arch_file_reader.hpp" #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" @@ -26,8 +28,6 @@ limitations under the License. #include "osp/bsp/model/util/CompatibleProcessorRange.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include -#include using namespace osp; @@ -53,7 +53,6 @@ BOOST_AUTO_TEST_CASE(test_1) { } BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { - BspInstance instance; instance.setNumberOfProcessors(4); instance.setCommunicationCosts(2); @@ -67,8 +66,8 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), + instance.getComputationalDag()); BOOST_CHECK(status); BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54); @@ -117,63 +116,47 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { BspInstance instance_t2(instance); BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(), - instance.getComputationalDag().num_vertex_types()); + BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); - BOOST_CHECK_EQUAL(instance_t2.getArchitecture().numberOfProcessors(), - instance.getArchitecture().numberOfProcessors()); + BOOST_CHECK_EQUAL(instance_t2.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors()); BOOST_CHECK_EQUAL(instance_t2.getArchitecture().getNumberOfProcessorTypes(), instance.getArchitecture().getNumberOfProcessorTypes()); - BOOST_CHECK_EQUAL(instance_t2.getArchitecture().communicationCosts(), - instance.getArchitecture().communicationCosts()); - BOOST_CHECK_EQUAL(instance_t2.getArchitecture().synchronisationCosts(), - instance.getArchitecture().synchronisationCosts()); + BOOST_CHECK_EQUAL(instance_t2.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts()); + BOOST_CHECK_EQUAL(instance_t2.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts()); BspInstance instance_t3; instance_t3 = instance; BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_vertex_types(), - instance.getComputationalDag().num_vertex_types()); + BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); BOOST_CHECK_EQUAL(instance_t3.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); - BOOST_CHECK_EQUAL(instance_t3.getArchitecture().numberOfProcessors(), - instance.getArchitecture().numberOfProcessors()); + BOOST_CHECK_EQUAL(instance_t3.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors()); BOOST_CHECK_EQUAL(instance_t3.getArchitecture().getNumberOfProcessorTypes(), instance.getArchitecture().getNumberOfProcessorTypes()); - BOOST_CHECK_EQUAL(instance_t3.getArchitecture().communicationCosts(), - instance.getArchitecture().communicationCosts()); - BOOST_CHECK_EQUAL(instance_t3.getArchitecture().synchronisationCosts(), - instance.getArchitecture().synchronisationCosts()); + BOOST_CHECK_EQUAL(instance_t3.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts()); + BOOST_CHECK_EQUAL(instance_t3.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts()); BspInstance instance_t4(std::move(instance_t3)); BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_vertex_types(), - instance.getComputationalDag().num_vertex_types()); + BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); BOOST_CHECK_EQUAL(instance_t4.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); - BOOST_CHECK_EQUAL(instance_t4.getArchitecture().numberOfProcessors(), - instance.getArchitecture().numberOfProcessors()); + BOOST_CHECK_EQUAL(instance_t4.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors()); BOOST_CHECK_EQUAL(instance_t4.getArchitecture().getNumberOfProcessorTypes(), instance.getArchitecture().getNumberOfProcessorTypes()); - BOOST_CHECK_EQUAL(instance_t4.getArchitecture().communicationCosts(), - instance.getArchitecture().communicationCosts()); - BOOST_CHECK_EQUAL(instance_t4.getArchitecture().synchronisationCosts(), - instance.getArchitecture().synchronisationCosts()); + BOOST_CHECK_EQUAL(instance_t4.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts()); + BOOST_CHECK_EQUAL(instance_t4.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts()); BspInstance instance_t5; instance_t5 = std::move(instance_t4); BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_vertex_types(), - instance.getComputationalDag().num_vertex_types()); + BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); BOOST_CHECK_EQUAL(instance_t5.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); - BOOST_CHECK_EQUAL(instance_t5.getArchitecture().numberOfProcessors(), - instance.getArchitecture().numberOfProcessors()); + BOOST_CHECK_EQUAL(instance_t5.getArchitecture().numberOfProcessors(), instance.getArchitecture().numberOfProcessors()); BOOST_CHECK_EQUAL(instance_t5.getArchitecture().getNumberOfProcessorTypes(), instance.getArchitecture().getNumberOfProcessorTypes()); - BOOST_CHECK_EQUAL(instance_t5.getArchitecture().communicationCosts(), - instance.getArchitecture().communicationCosts()); - BOOST_CHECK_EQUAL(instance_t5.getArchitecture().synchronisationCosts(), - instance.getArchitecture().synchronisationCosts()); -} \ No newline at end of file + BOOST_CHECK_EQUAL(instance_t5.getArchitecture().communicationCosts(), instance.getArchitecture().communicationCosts()); + BOOST_CHECK_EQUAL(instance_t5.getArchitecture().synchronisationCosts(), instance.getArchitecture().synchronisationCosts()); +} diff --git a/tests/bsp_schedule.cpp b/tests/bsp_schedule.cpp index 0b587266..60cdf53e 100644 --- a/tests/bsp_schedule.cpp +++ b/tests/bsp_schedule.cpp @@ -18,6 +18,8 @@ limitations under the License. #define BOOST_TEST_MODULE Bsp_Architecture #include +#include +#include #include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/auxiliary/io/arch_file_reader.hpp" @@ -29,11 +31,6 @@ limitations under the License. #include "osp/bsp/model/BspScheduleRecomp.hpp" #include "osp/bsp/model/MaxBspSchedule.hpp" #include "osp/bsp/model/MaxBspScheduleCS.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include -#include - #include "osp/bsp/model/cost/BufferedSendingCost.hpp" #include "osp/bsp/model/cost/LazyCommunicationCost.hpp" #include "osp/bsp/model/cost/TotalCommunicationCost.hpp" @@ -47,11 +44,12 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/RandomGreedy.hpp" #include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp" #include "osp/bsp/scheduler/Serial.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; BspInstance instance; @@ -67,16 +65,18 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { std::cout << cwd << std::endl; } - bool status = file_reader::readGraph( - (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); BOOST_CHECK(status); BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54); BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertex_types(), 1); - std::vector *> schedulers = {new BspLocking(), new EtfScheduler(), - new GreedyBspScheduler(), new GreedyChildren(), - new GrowLocalAutoCores(), new VarianceFillup()}; + std::vector *> schedulers = {new BspLocking(), + new EtfScheduler(), + new GreedyBspScheduler(), + new GreedyChildren(), + new GrowLocalAutoCores(), + new VarianceFillup()}; std::vector expected_bsp_costs = {92, 108, 100, 108, 102, 110}; std::vector expected_total_costs = {74, 87, 84.25, 80.25, 91.25, 86.75}; @@ -87,7 +87,6 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { size_t i = 0; for (auto &scheduler : schedulers) { - BspSchedule schedule(instance); const auto result = scheduler->computeSchedule(schedule); @@ -125,7 +124,6 @@ BOOST_AUTO_TEST_CASE(test_instance_bicgstab) { } BOOST_AUTO_TEST_CASE(test_schedule_writer) { - using graph_t1 = computational_dag_edge_idx_vector_impl_def_int_t; using graph_t2 = computational_dag_vector_impl_def_int_t; @@ -142,8 +140,7 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) { std::cout << cwd << std::endl; } - bool status = file_reader::readGraph( - (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); BOOST_CHECK(status); BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54); @@ -171,12 +168,10 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) { BOOST_CHECK(schedule_t2.satisfiesPrecedenceConstraints()); BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertices(), instance.getComputationalDag().num_vertices()); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(), - instance.getComputationalDag().num_vertex_types()); + BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_vertex_types(), instance.getComputationalDag().num_vertex_types()); BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().num_edges(), instance.getComputationalDag().num_edges()); for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_work_weight(v), instance.getComputationalDag().vertex_work_weight(v)); BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_comm_weight(v), @@ -185,11 +180,9 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) { BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_mem_weight(v), instance.getComputationalDag().vertex_mem_weight(v)); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_type(v), - instance.getComputationalDag().vertex_type(v)); + BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().vertex_type(v), instance.getComputationalDag().vertex_type(v)); - BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().out_degree(v), - instance.getComputationalDag().out_degree(v)); + BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().out_degree(v), instance.getComputationalDag().out_degree(v)); BOOST_CHECK_EQUAL(instance_t2.getComputationalDag().in_degree(v), instance.getComputationalDag().in_degree(v)); } @@ -216,7 +209,6 @@ BOOST_AUTO_TEST_CASE(test_schedule_writer) { } BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; BspInstance instance; @@ -232,8 +224,7 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { std::cout << cwd << std::endl; } - file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), - instance.getComputationalDag()); + file_reader::readGraph((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); BspSchedule schedule(instance); BspLocking scheduler; @@ -252,7 +243,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { BOOST_CHECK_EQUAL(schedule_t2.numberOfSupersteps(), schedule.numberOfSupersteps()); for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t2.assignedSuperstep(v), schedule.assignedSuperstep(v)); BOOST_CHECK_EQUAL(schedule_t2.assignedProcessor(v), schedule.assignedProcessor(v)); } @@ -265,7 +255,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { BOOST_CHECK_EQUAL(schedule_t3.numberOfSupersteps(), schedule.numberOfSupersteps()); for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t3.assignedSuperstep(v), schedule.assignedSuperstep(v)); BOOST_CHECK_EQUAL(schedule_t3.assignedProcessor(v), schedule.assignedProcessor(v)); } @@ -278,7 +267,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { BOOST_CHECK(schedule_t4.satisfiesPrecedenceConstraints()); BOOST_CHECK_EQUAL(schedule_t4.numberOfSupersteps(), schedule.numberOfSupersteps()); for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t4.assignedSuperstep(v), schedule.assignedSuperstep(v)); BOOST_CHECK_EQUAL(schedule_t4.assignedProcessor(v), schedule.assignedProcessor(v)); } @@ -290,7 +278,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { BOOST_CHECK_EQUAL(schedule_t5.numberOfSupersteps(), schedule.numberOfSupersteps()); for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t5.assignedSuperstep(v), schedule.assignedSuperstep(v)); BOOST_CHECK_EQUAL(schedule_t5.assignedProcessor(v), schedule.assignedProcessor(v)); } @@ -303,7 +290,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { BOOST_CHECK_EQUAL(schedule_cs.numberOfSupersteps(), schedule.numberOfSupersteps()); for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_cs.assignedSuperstep(v), schedule.assignedSuperstep(v)); BOOST_CHECK_EQUAL(schedule_cs.assignedProcessor(v), schedule.assignedProcessor(v)); } @@ -315,7 +301,6 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { BOOST_CHECK_EQUAL(schedule_t5.numberOfSupersteps(), schedule.numberOfSupersteps()); for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_t5.assignedSuperstep(v), schedule.assignedSuperstep(v)); BOOST_CHECK_EQUAL(schedule_t5.assignedProcessor(v), schedule.assignedProcessor(v)); } @@ -328,26 +313,24 @@ BOOST_AUTO_TEST_CASE(test_bsp_schedule_cs) { BOOST_CHECK_EQUAL(schedule_cs_t2.numberOfSupersteps(), schedule.numberOfSupersteps()); for (const auto &v : instance.getComputationalDag().vertices()) { - BOOST_CHECK_EQUAL(schedule_cs_t2.assignedSuperstep(v), schedule.assignedSuperstep(v)); BOOST_CHECK_EQUAL(schedule_cs_t2.assignedProcessor(v), schedule.assignedProcessor(v)); } } BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; BspInstance instance; instance.setNumberOfProcessors(2); - instance.setCommunicationCosts(10); // g=10 - instance.setSynchronisationCosts(100); // l=100 (not used in MaxBspSchedule cost model) + instance.setCommunicationCosts(10); // g=10 + instance.setSynchronisationCosts(100); // l=100 (not used in MaxBspSchedule cost model) auto &dag = instance.getComputationalDag(); - dag.add_vertex(10, 1, 0); // Node 0 - dag.add_vertex(5, 2, 0); // Node 1 - dag.add_vertex(5, 3, 0); // Node 2 - dag.add_vertex(10, 4, 0); // Node 3 + dag.add_vertex(10, 1, 0); // Node 0 + dag.add_vertex(5, 2, 0); // Node 1 + dag.add_vertex(5, 3, 0); // Node 2 + dag.add_vertex(10, 4, 0); // Node 3 dag.add_edge(0, 1); dag.add_edge(0, 2); dag.add_edge(1, 3); @@ -361,18 +344,18 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) { schedule.setAssignedProcessor(1, 0); schedule.setAssignedSuperstep(1, 1); schedule.setAssignedProcessor(2, 1); - schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 + schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 schedule.setAssignedProcessor(3, 0); - schedule.setAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2 + schedule.setAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2 schedule.updateNumberOfSupersteps(); BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); // Manual cost calculation: // Superstep 0: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10. - // Superstep 1: work = {5, 0} -> max_work = 5. comm from SS0: 0->2 (P0->P1) needed at SS2, comm sent in SS0. comm=1*10=10. Cost = max(5,l+10) = 110. - // Superstep 2: work = {0, 5} -> max_work = 5. comm = 0. Cost = max(5, 0) = 5. - // Superstep 3: work = {0, 0} -> max_work = 0. comm from SS2: 2->3 (P1->P0) needed at SS4, comm sent in SS2. comm=3*10=30. Cost = max(0,l+30) = 130. + // Superstep 1: work = {5, 0} -> max_work = 5. comm from SS0: 0->2 (P0->P1) needed at SS2, comm sent in SS0. comm=1*10=10. + // Cost = max(5,l+10) = 110. Superstep 2: work = {0, 5} -> max_work = 5. comm = 0. Cost = max(5, 0) = 5. Superstep 3: work + // = {0, 0} -> max_work = 0. comm from SS2: 2->3 (P1->P0) needed at SS4, comm sent in SS2. comm=3*10=30. Cost = max(0,l+30) = 130. // Superstep 4: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10. // Total cost = 10 + 110 + 5 + 130 + 10 = 265 BOOST_CHECK_EQUAL(schedule.computeCosts(), 265); @@ -384,22 +367,22 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) { schedule.setAssignedProcessor(0, 0); schedule.setAssignedSuperstep(0, 0); schedule.setAssignedProcessor(1, 1); - schedule.setAssignedSuperstep(1, 2); // 0->1 is cross-proc, 2 >= 0+2 + schedule.setAssignedSuperstep(1, 2); // 0->1 is cross-proc, 2 >= 0+2 schedule.setAssignedProcessor(2, 1); - schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 + schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 schedule.setAssignedProcessor(3, 0); - schedule.setAssignedSuperstep(3, 4); // 1->3, 2->3 are cross-proc, 4 >= 2+2 + schedule.setAssignedSuperstep(3, 4); // 1->3, 2->3 are cross-proc, 4 >= 2+2 schedule.updateNumberOfSupersteps(); BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); // Manual cost calculation: // Superstep 0: work = {10, 0} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10. - // Superstep 1: work = {0, 0} -> max_work = 0. comm from SS0: 0->1, 0->2 (P0->P1) needed at SS2, comm sent in SS0. comm=1*10=10. Cost = max(0,l+10)=110. - // Superstep 2: work = {0, 10} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10. - // Superstep 3: work = {0, 0} -> max_work = 0. comm from SS2: 1->3, 2->3 (P1->P0) needed at SS4, comm sent in SS2. comm=(2+3)*10=50. Cost = max(0,l+50)=150. - // Superstep 4: work = {10, 0} -> max_work = 10. Cost = max(10, 0) = 10. - // Total cost = 10 + 110 + 10 + 150 + 10 = 290 + // Superstep 1: work = {0, 0} -> max_work = 0. comm from SS0: 0->1, 0->2 (P0->P1) needed at SS2, comm sent in SS0. + // comm=1*10=10. Cost = max(0,l+10)=110. Superstep 2: work = {0, 10} -> max_work = 10. comm = 0. Cost = max(10, 0) = 10. + // Superstep 3: work = {0, 0} -> max_work = 0. comm from SS2: 1->3, 2->3 (P1->P0) needed at SS4, comm sent in SS2. + // comm=(2+3)*10=50. Cost = max(0,l+50)=150. Superstep 4: work = {10, 0} -> max_work = 10. Cost = max(10, 0) = 10. Total + // cost = 10 + 110 + 10 + 150 + 10 = 290 BOOST_CHECK_EQUAL(schedule.computeCosts(), 290); } @@ -408,8 +391,8 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) { MaxBspSchedule schedule(instance); schedule.setAssignedProcessor(0, 0); schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedProcessor(1, 1); // 0->1 on different procs - schedule.setAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1) + schedule.setAssignedProcessor(1, 1); // 0->1 on different procs + schedule.setAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1) schedule.updateNumberOfSupersteps(); BOOST_CHECK(!schedule.satisfiesPrecedenceConstraints()); @@ -417,19 +400,18 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule) { } BOOST_AUTO_TEST_CASE(test_max_bsp_schedule_cs) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; BspInstance instance; instance.setNumberOfProcessors(2); - instance.setCommunicationCosts(10); // g=10 - instance.setSynchronisationCosts(100); // l=100 + instance.setCommunicationCosts(10); // g=10 + instance.setSynchronisationCosts(100); // l=100 auto &dag = instance.getComputationalDag(); - dag.add_vertex(10, 1, 0); // Node 0 - dag.add_vertex(5, 2, 0); // Node 1 - dag.add_vertex(5, 3, 0); // Node 2 - dag.add_vertex(10, 4, 0); // Node 3 + dag.add_vertex(10, 1, 0); // Node 0 + dag.add_vertex(5, 2, 0); // Node 1 + dag.add_vertex(5, 3, 0); // Node 2 + dag.add_vertex(10, 4, 0); // Node 3 dag.add_edge(0, 1); dag.add_edge(0, 2); dag.add_edge(1, 3); @@ -443,16 +425,16 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule_cs) { schedule.setAssignedProcessor(1, 0); schedule.setAssignedSuperstep(1, 1); schedule.setAssignedProcessor(2, 1); - schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 + schedule.setAssignedSuperstep(2, 2); // 0->2 is cross-proc, 2 >= 0+2 schedule.setAssignedProcessor(3, 0); - schedule.setAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2 + schedule.setAssignedSuperstep(3, 4); // 2->3 is cross-proc, 4 >= 2+2 schedule.updateNumberOfSupersteps(); BOOST_CHECK(schedule.satisfiesPrecedenceConstraints()); // Set communication schedule (eager) - schedule.addCommunicationScheduleEntry(0, 0, 1, 0); // 0->2 (P0->P1) sent in SS0 - schedule.addCommunicationScheduleEntry(2, 1, 0, 2); // 2->3 (P1->P0) sent in SS2 + schedule.addCommunicationScheduleEntry(0, 0, 1, 0); // 0->2 (P0->P1) sent in SS0 + schedule.addCommunicationScheduleEntry(2, 1, 0, 2); // 2->3 (P1->P0) sent in SS2 BOOST_CHECK(schedule.hasValidCommSchedule()); @@ -471,10 +453,10 @@ BOOST_AUTO_TEST_CASE(test_max_bsp_schedule_cs) { MaxBspScheduleCS schedule(instance); schedule.setAssignedProcessor(0, 0); schedule.setAssignedSuperstep(0, 0); - schedule.setAssignedProcessor(1, 1); // 0->1 on different procs - schedule.setAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1) + schedule.setAssignedProcessor(1, 1); // 0->1 on different procs + schedule.setAssignedSuperstep(1, 1); // step(0)+2 > step(1) is FALSE (0+2 > 1) schedule.updateNumberOfSupersteps(); BOOST_CHECK(!schedule.satisfiesPrecedenceConstraints()); } -} \ No newline at end of file +} diff --git a/tests/bsp_schedule_recomp.cpp b/tests/bsp_schedule_recomp.cpp index 13fa648b..6d1ce9f1 100644 --- a/tests/bsp_schedule_recomp.cpp +++ b/tests/bsp_schedule_recomp.cpp @@ -18,20 +18,17 @@ limitations under the License. #define BOOST_TEST_MODULE BSP_SCHEDULE_RECOMP #include +#include +#include +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/model/BspScheduleRecomp.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" - #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include -#include using namespace osp; -BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) -{ - +BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) { using graph = computational_dag_vector_impl_def_t; BspInstance instance; @@ -48,7 +45,7 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) } file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), - instance.getComputationalDag()); + instance.getComputationalDag()); BspSchedule schedule(instance); GreedyBspScheduler scheduler; @@ -70,5 +67,4 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) BspScheduleRecomp schedule_recomp_from_cs(scheduleCS); BOOST_CHECK(schedule_recomp_from_cs.satisfiesConstraints()); BOOST_CHECK_EQUAL(schedule_recomp_from_cs.computeCosts(), scheduleCS.computeCosts()); - -} \ No newline at end of file +} diff --git a/tests/bsp_schedulers.cpp b/tests/bsp_schedulers.cpp index d1fdbe4d..d9f16d41 100644 --- a/tests/bsp_schedulers.cpp +++ b/tests/bsp_schedulers.cpp @@ -18,12 +18,13 @@ limitations under the License. #define BOOST_TEST_MODULE BSP_SCHEDULERS #include - #include #include #include -#include "osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp" +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp" #include "osp/bsp/scheduler/GreedySchedulers/CilkScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp" @@ -36,22 +37,20 @@ limitations under the License. #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" +#include "osp/bsp/scheduler/MultilevelCoarseAndSchedule.hpp" #include "osp/bsp/scheduler/Serial.hpp" #include "osp/coarser/Sarkar/SarkarMul.hpp" #include "osp/coarser/SquashA/SquashAMul.hpp" #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "osp/auxiliary/io/general_file_reader.hpp" #include "test_graphs.hpp" using namespace osp; std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } -template +template void run_test(Scheduler *test_scheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = tiny_spaa_graphs(); @@ -78,13 +77,11 @@ void run_test(Scheduler *test_scheduler) { BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -98,7 +95,7 @@ void run_test(Scheduler *test_scheduler) { } } -template +template void run_test_2(Scheduler *test_scheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = tiny_spaa_graphs(); @@ -127,11 +124,9 @@ void run_test_2(Scheduler *test_scheduler) { BspArchitecture arch; bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph); - bool status_architecture = - file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch); + bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), arch); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -148,75 +143,63 @@ void run_test_2(Scheduler *test_scheduler) { } BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test) { - GreedyBspScheduler test; run_test(&test); } BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test_2) { - GreedyBspScheduler test; run_test(&test); } BOOST_AUTO_TEST_CASE(Serial_test) { - Serial test; run_test(&test); } BOOST_AUTO_TEST_CASE(cilk_test_1) { - CilkScheduler test; test.setMode(CILK); run_test(&test); } BOOST_AUTO_TEST_CASE(cilk_test_2) { - CilkScheduler test; test.setMode(SJF); run_test(&test); } BOOST_AUTO_TEST_CASE(etf_test) { - EtfScheduler test; run_test(&test); } BOOST_AUTO_TEST_CASE(random_test) { - RandomGreedy test; run_test(&test); } BOOST_AUTO_TEST_CASE(children_test) { - GreedyChildren test; run_test(&test); } BOOST_AUTO_TEST_CASE(locking_test) { - BspLocking test; run_test(&test); } BOOST_AUTO_TEST_CASE(variancefillup_test) { - VarianceFillup test; run_test(&test); } BOOST_AUTO_TEST_CASE(etf_test_edge_desc_impl) { - EtfScheduler test; run_test(&test); } BOOST_AUTO_TEST_CASE(grow_local_auto_test_edge_desc_impl) { - GrowLocalAutoCores test; run_test(&test); } @@ -314,8 +297,7 @@ BOOST_AUTO_TEST_CASE(LightEdgeVariancePartitioner_test) { LightEdgeVariancePartitioner test_flat; run_test(&test_flat); - LightEdgeVariancePartitioner - test_superstep; + LightEdgeVariancePartitioner test_superstep; run_test(&test_superstep); LightEdgeVariancePartitioner test_global; @@ -326,8 +308,9 @@ BOOST_AUTO_TEST_CASE(SquashAMul_test) { GreedyBspScheduler sched; SquashAMul ml_coarsen; - MultilevelCoarseAndSchedule coarsen_test(sched, ml_coarsen); - + MultilevelCoarseAndSchedule + coarsen_test(sched, ml_coarsen); + run_test(&coarsen_test); } @@ -336,19 +319,19 @@ BOOST_AUTO_TEST_CASE(SquashAMul_improver_test) { HillClimbingScheduler improver; SquashAMul ml_coarsen; - MultilevelCoarseAndSchedule coarsen_test(sched, improver, ml_coarsen); - - + MultilevelCoarseAndSchedule + coarsen_test(sched, improver, ml_coarsen); + run_test(&coarsen_test); } - BOOST_AUTO_TEST_CASE(SarkarMul_test) { GreedyBspScheduler sched; SarkarMul ml_coarsen; - MultilevelCoarseAndSchedule coarsen_test(sched, ml_coarsen); - + MultilevelCoarseAndSchedule + coarsen_test(sched, ml_coarsen); + run_test(&coarsen_test); } @@ -357,7 +340,8 @@ BOOST_AUTO_TEST_CASE(SarkarMul_improver_test) { HillClimbingScheduler improver; SarkarMul ml_coarsen; - MultilevelCoarseAndSchedule coarsen_test(sched, improver, ml_coarsen); - + MultilevelCoarseAndSchedule + coarsen_test(sched, improver, ml_coarsen); + run_test(&coarsen_test); -} \ No newline at end of file +} diff --git a/tests/bsp_schedulers_mem_const.cpp b/tests/bsp_schedulers_mem_const.cpp index 7e3dead3..f1f39a34 100644 --- a/tests/bsp_schedulers_mem_const.cpp +++ b/tests/bsp_schedulers_mem_const.cpp @@ -18,45 +18,42 @@ limitations under the License. #define BOOST_TEST_MODULE BSP_SCHEDULERS #include - #include #include #include -#include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp" +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp" #include "osp/bsp/scheduler/GreedySchedulers/EtfScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp" #include "osp/bsp/scheduler/GreedySchedulers/VarianceFillup.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/LightEdgeVariancePartitioner.hpp" +#include "osp/bsp/scheduler/LoadBalanceScheduler/VariancePartitioner.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" #include "osp/bsp/scheduler/Serial.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" #include "test_graphs.hpp" using namespace osp; std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } -template +template void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; int comm_weight = 1; for (const auto &v : dag.vertices()) { - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 3 + 1)); dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 3 + 1)); } } -template +template void run_test_local_memory(Scheduler *test_scheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = test_graphs(); @@ -84,16 +81,15 @@ void run_test_local_memory(Scheduler *test_scheduler) { BspInstance instance; bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + instance.getComputationalDag()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); add_mem_weights(instance.getComputationalDag()); instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL); std::cout << "Memory constraint type: LOCAL" << std::endl; if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -101,7 +97,6 @@ void run_test_local_memory(Scheduler *test_scheduler) { const std::vector> bounds_to_test = {10, 20, 50, 100}; for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); BspSchedule schedule(instance); @@ -115,7 +110,7 @@ void run_test_local_memory(Scheduler *test_scheduler) { } } -template +template void run_test_persistent_transient_memory(Scheduler *test_scheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = test_graphs(); @@ -143,16 +138,15 @@ void run_test_persistent_transient_memory(Scheduler *test_scheduler) { BspInstance instance; bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + instance.getComputationalDag()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); add_mem_weights(instance.getComputationalDag()); instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::PERSISTENT_AND_TRANSIENT); std::cout << "Memory constraint type: PERSISTENT_AND_TRANSIENT" << std::endl; if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -160,7 +154,6 @@ void run_test_persistent_transient_memory(Scheduler *test_scheduler) { const std::vector> bounds_to_test = {50, 100}; for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); BspSchedule schedule(instance); @@ -174,7 +167,7 @@ void run_test_persistent_transient_memory(Scheduler *test_scheduler) { } } -template +template void run_test_local_in_out_memory(Scheduler *test_scheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = test_graphs(); @@ -202,16 +195,15 @@ void run_test_local_in_out_memory(Scheduler *test_scheduler) { BspInstance instance; bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + instance.getComputationalDag()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); add_mem_weights(instance.getComputationalDag()); instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_IN_OUT); std::cout << "Memory constraint type: LOCAL_IN_OUT" << std::endl; if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -219,7 +211,6 @@ void run_test_local_in_out_memory(Scheduler *test_scheduler) { const std::vector> bounds_to_test = {10, 20, 50, 100}; for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); BspSchedule schedule(instance); @@ -233,7 +224,7 @@ void run_test_local_in_out_memory(Scheduler *test_scheduler) { } } -template +template void run_test_local_inc_edges_memory(Scheduler *test_scheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = test_graphs(); @@ -261,16 +252,15 @@ void run_test_local_inc_edges_memory(Scheduler *test_scheduler) { BspInstance instance; bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + instance.getComputationalDag()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); add_mem_weights(instance.getComputationalDag()); instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_INC_EDGES); std::cout << "Memory constraint type: LOCAL_INC_EDGES" << std::endl; if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -278,7 +268,6 @@ void run_test_local_inc_edges_memory(Scheduler *test_scheduler) { const std::vector> bounds_to_test = {50, 100}; for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); BspSchedule schedule(instance); @@ -292,7 +281,7 @@ void run_test_local_inc_edges_memory(Scheduler *test_scheduler) { } } -template +template void run_test_local_inc_edges_2_memory(Scheduler *test_scheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = test_graphs(); @@ -320,16 +309,15 @@ void run_test_local_inc_edges_2_memory(Scheduler *test_scheduler) { BspInstance instance; bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + instance.getComputationalDag()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); add_mem_weights(instance.getComputationalDag()); instance.getArchitecture().setMemoryConstraintType(MEMORY_CONSTRAINT_TYPE::LOCAL_SOURCES_INC_EDGES); std::cout << "Memory constraint type: LOCAL_SOURCES_INC_EDGES" << std::endl; if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -337,7 +325,6 @@ void run_test_local_inc_edges_2_memory(Scheduler *test_scheduler) { const std::vector> bounds_to_test = {20, 50, 100}; for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); BspSchedule schedule(instance); @@ -352,7 +339,6 @@ void run_test_local_inc_edges_2_memory(Scheduler *test_scheduler) { } BOOST_AUTO_TEST_CASE(GreedyBspScheduler_local_test) { - using graph_impl_t = computational_dag_edge_idx_vector_impl_def_int_t; GreedyBspScheduler> test_1; @@ -369,7 +355,6 @@ BOOST_AUTO_TEST_CASE(GreedyBspScheduler_local_test) { } BOOST_AUTO_TEST_CASE(GrowLocalAutoCores_local_test) { - using graph_impl_t = computational_dag_edge_idx_vector_impl_def_int_t; GrowLocalAutoCores> test_1; @@ -386,7 +371,6 @@ BOOST_AUTO_TEST_CASE(GrowLocalAutoCores_local_test) { } BOOST_AUTO_TEST_CASE(BspLocking_local_test) { - using graph_impl_t = computational_dag_edge_idx_vector_impl_def_t; BspLocking> test_1; @@ -403,9 +387,7 @@ BOOST_AUTO_TEST_CASE(BspLocking_local_test) { } BOOST_AUTO_TEST_CASE(variance_local_test) { - - VarianceFillup> + VarianceFillup> test; run_test_local_memory(&test); } @@ -415,17 +397,16 @@ BOOST_AUTO_TEST_CASE(variance_local_test) { // VarianceFillup> // test; - -// kl_total_comm> kl; - + +// kl_total_comm> kl; + // ComboScheduler combo_test(test, kl); - + // run_test_local_memory(&combo_test); // }; - BOOST_AUTO_TEST_CASE(GreedyBspScheduler_persistent_transient_test) { - GreedyBspScheduler> test; @@ -433,65 +414,108 @@ BOOST_AUTO_TEST_CASE(GreedyBspScheduler_persistent_transient_test) { } BOOST_AUTO_TEST_CASE(EtfScheduler_persistent_transient_test) { - EtfScheduler> test; run_test_persistent_transient_memory(&test); } - BOOST_AUTO_TEST_CASE(VariancePartitioner_test) { - VariancePartitioner> test_linear; + VariancePartitioner> + test_linear; run_test_local_memory(&test_linear); - VariancePartitioner> test_flat; + VariancePartitioner> + test_flat; run_test_local_memory(&test_flat); - VariancePartitioner> test_superstep; + VariancePartitioner> + test_superstep; run_test_local_memory(&test_superstep); - VariancePartitioner> test_global; + VariancePartitioner> + test_global; run_test_local_memory(&test_global); - VariancePartitioner> test_linear_tp; + VariancePartitioner> + test_linear_tp; run_test_persistent_transient_memory(&test_linear_tp); - VariancePartitioner> test_flat_tp; + VariancePartitioner> + test_flat_tp; run_test_persistent_transient_memory(&test_flat_tp); - VariancePartitioner> test_superstep_tp; + VariancePartitioner> + test_superstep_tp; run_test_persistent_transient_memory(&test_superstep_tp); - VariancePartitioner> test_global_tp; + VariancePartitioner> + test_global_tp; run_test_persistent_transient_memory(&test_global_tp); - } - BOOST_AUTO_TEST_CASE(LightEdgeVariancePartitioner_test) { - LightEdgeVariancePartitioner> test_linear; + LightEdgeVariancePartitioner> + test_linear; run_test_local_memory(&test_linear); - LightEdgeVariancePartitioner> test_flat; + LightEdgeVariancePartitioner> + test_flat; run_test_local_memory(&test_flat); - LightEdgeVariancePartitioner> test_superstep; + LightEdgeVariancePartitioner> + test_superstep; run_test_local_memory(&test_superstep); - LightEdgeVariancePartitioner> test_global; + LightEdgeVariancePartitioner> + test_global; run_test_local_memory(&test_global); - LightEdgeVariancePartitioner> test_linear_tp; + LightEdgeVariancePartitioner> + test_linear_tp; run_test_persistent_transient_memory(&test_linear_tp); - LightEdgeVariancePartitioner> test_flat_tp; + LightEdgeVariancePartitioner> + test_flat_tp; run_test_persistent_transient_memory(&test_flat_tp); - LightEdgeVariancePartitioner> test_superstep_tp; + LightEdgeVariancePartitioner> + test_superstep_tp; run_test_persistent_transient_memory(&test_superstep_tp); - LightEdgeVariancePartitioner> test_global_tp; + LightEdgeVariancePartitioner> + test_global_tp; run_test_persistent_transient_memory(&test_global_tp); - } diff --git a/tests/coarser.cpp b/tests/coarser.cpp index 9c77703d..d0882512 100644 --- a/tests/coarser.cpp +++ b/tests/coarser.cpp @@ -18,7 +18,6 @@ limitations under the License. #define BOOST_TEST_MODULE COARSER_TEST #include - #include #include #include @@ -48,7 +47,6 @@ using namespace osp; using VertexType = vertex_idx_t; bool check_vertex_map(std::vector> &map, std::size_t size) { - std::unordered_set vertices; for (auto &v : map) { @@ -63,17 +61,16 @@ bool check_vertex_map(std::vector> &map, std::size_t siz return vertices.size() == size; } -template -bool check_vertex_map_constraints(std::vector> &map, ComputationalDag &dag, +template +bool check_vertex_map_constraints(std::vector> &map, + ComputationalDag &dag, v_type_t size_threshold, v_memw_t memory_threshold, v_workw_t work_threshold, v_commw_t communication_threshold) { - std::unordered_set vertices; for (auto &super_node : map) { - v_memw_t memory = 0; v_workw_t work = 0; v_commw_t communication = 0; @@ -87,7 +84,6 @@ bool check_vertex_map_constraints(std::vector> &map, Com } for (auto &v : super_node) { - memory += dag.vertex_mem_weight(v); work += dag.vertex_work_weight(v); communication += dag.vertex_comm_weight(v); @@ -117,25 +113,21 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test) { } for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl - << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << name_graph << std::endl; using graph_t = computational_dag_edge_idx_vector_impl_def_t; BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -190,26 +182,22 @@ BOOST_AUTO_TEST_CASE(coarser_hdagg_test_diff_graph_impl) { } for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl - << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << name_graph << std::endl; using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; using graph_t2 = computational_dag_vector_impl_def_t; BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -263,25 +251,21 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) { } for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl - << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << name_graph << std::endl; using graph_t = computational_dag_edge_idx_vector_impl_def_t; BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -328,9 +312,8 @@ BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) { } } -template +template void test_coarser_same_graph(Coarser &coarser) { - // BOOST_AUTO_TEST_CASE(coarser_bspschedule_test) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = tiny_spaa_graphs(); @@ -344,23 +327,19 @@ void test_coarser_same_graph(Coarser &coarser) { } for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl - << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << name_graph << std::endl; BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -372,7 +351,8 @@ void test_coarser_same_graph(Coarser &coarser) { GreedyBspScheduler scheduler; - bool coarse_success = coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); + bool coarse_success + = coarser.coarsenDag(instance.getComputationalDag(), coarse_instance.getComputationalDag(), reverse_vertex_map); BOOST_CHECK(coarse_success); vertex_map = coarser_util::invert_vertex_contraction_map(reverse_vertex_map); @@ -401,7 +381,6 @@ void test_coarser_same_graph(Coarser &coarser) { } BOOST_AUTO_TEST_CASE(coarser_funndel_bfs_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; FunnelBfs coarser; @@ -409,7 +388,9 @@ BOOST_AUTO_TEST_CASE(coarser_funndel_bfs_test) { FunnelBfs::FunnelBfs_parameters params{std::numeric_limits>::max(), std::numeric_limits>::max(), - std::numeric_limits::max(), false, true}; + std::numeric_limits::max(), + false, + true}; FunnelBfs coarser_params(params); @@ -422,7 +403,6 @@ BOOST_AUTO_TEST_CASE(coarser_funndel_bfs_test) { } BOOST_AUTO_TEST_CASE(coarser_top_sort_test) { - using graph_t = computational_dag_edge_idx_vector_impl_def_t; top_order_coarser coarser; @@ -475,26 +455,22 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSG) { } for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl - << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << name_graph << std::endl; using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; using graph_t2 = CSG; BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -552,26 +528,22 @@ BOOST_AUTO_TEST_CASE(coarser_SquashA_test_diff_graph_impl_CSGE) { } for (auto &filename_graph : filenames_graph) { - std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); - std::cout << std::endl - << "Graph: " << name_graph << std::endl; + std::cout << std::endl << "Graph: " << name_graph << std::endl; using graph_t1 = computational_dag_edge_idx_vector_impl_def_t; using graph_t2 = CSGE; BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -704,4 +676,4 @@ BOOST_AUTO_TEST_CASE(SquashAML_test) { SquashAMul coarser; test_coarser_same_graph(coarser); -} \ No newline at end of file +} diff --git a/tests/coarser_util.cpp b/tests/coarser_util.cpp index 47ac1738..61b418d7 100644 --- a/tests/coarser_util.cpp +++ b/tests/coarser_util.cpp @@ -17,11 +17,11 @@ limitations under the License. */ #define BOOST_TEST_MODULE COARSER_UTIL_TEST -#include +#include "osp/coarser/coarser_util.hpp" +#include #include -#include "osp/coarser/coarser_util.hpp" #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" using namespace osp; @@ -53,10 +53,16 @@ BOOST_AUTO_TEST_CASE(ExpansionMapValidity) { const std::vector>> expansionmap2 = {{0}, {2}, {3}}; BOOST_CHECK(not check_valid_expansion_map(expansionmap2)); - const std::vector>> expansionmap3 = {{0, 3}}; + const std::vector>> expansionmap3 = { + {0, 3} + }; BOOST_CHECK(not check_valid_expansion_map(expansionmap3)); - const std::vector>> expansionmap4 = {{0, 3}, {2, 1, 4}, {5}}; + const std::vector>> expansionmap4 = { + {0, 3}, + {2, 1, 4}, + {5} + }; BOOST_CHECK(check_valid_expansion_map(expansionmap4)); const std::vector>> expansionmap5 = {{0}, {}, {2}, {3}, {1}}; @@ -64,7 +70,10 @@ BOOST_AUTO_TEST_CASE(ExpansionMapValidity) { } BOOST_AUTO_TEST_CASE(ContractionMapCoarsening) { - std::set, vertex_idx_t>> edges({{0, 1}, {1, 2}}); + std::set, vertex_idx_t>> edges({ + {0, 1}, + {1, 2} + }); GraphType graph(6, edges); GraphType coarseGraph1; @@ -91,4 +100,4 @@ BOOST_AUTO_TEST_CASE(ContractionMapCoarsening) { for (const auto &vert : coarseGraph1.parents(1)) { BOOST_CHECK_EQUAL(vert, 0); } -} \ No newline at end of file +} diff --git a/tests/compact_sparse_graph.cpp b/tests/compact_sparse_graph.cpp index f567827d..ba191d70 100644 --- a/tests/compact_sparse_graph.cpp +++ b/tests/compact_sparse_graph.cpp @@ -17,9 +17,10 @@ limitations under the License. */ #define BOOST_TEST_MODULE Sparse_Compact_Graph +#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" + #include -#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" using namespace osp; @@ -62,7 +63,15 @@ BOOST_AUTO_TEST_CASE(No_Edges_Graph_reorder) { } BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { - const std::set> edges({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}}); + const std::set> edges({ + {0, 1}, + {1, 2}, + {2, 3}, + {3, 4}, + {4, 5}, + {5, 6}, + {6, 7} + }); Compact_Sparse_Graph graph(8, edges); @@ -141,9 +150,16 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { } } - BOOST_AUTO_TEST_CASE(LineGraph_reorder) { - const std::vector> edges({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}}); + const std::vector> edges({ + {0, 1}, + {1, 2}, + {2, 3}, + {3, 4}, + {4, 5}, + {5, 6}, + {6, 7} + }); Compact_Sparse_Graph graph(8, edges); @@ -230,9 +246,20 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) { } } - BOOST_AUTO_TEST_CASE(Graph1_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); Compact_Sparse_Graph graph(11, edges); @@ -276,9 +303,7 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { cntr = 0; for (const auto &e : osp::out_edges(vert, graph)) { - BOOST_CHECK_EQUAL(target(e, graph), out_edges[vert][cntr++]); - } } @@ -312,15 +337,12 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { cntr = 0; for (const auto &e : osp::in_edges(vert, graph)) { - BOOST_CHECK_EQUAL(source(e, graph), in_edges[vert][cntr++]); - } } - + unsigned count = 0; - for (const auto & e: osp::edges(graph)) { - + for (const auto &e : osp::edges(graph)) { std::cout << e.source << " -> " << e.target << std::endl; count++; } @@ -337,7 +359,19 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { } BOOST_AUTO_TEST_CASE(Graph1_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); Compact_Sparse_Graph graph(11, edges); @@ -371,9 +405,9 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { }); for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_chld = 0; std::size_t cntr = 0; for (const auto &chld : graph.children(vert)) { @@ -381,7 +415,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { BOOST_CHECK_LE(previous_chld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) + != out_edges[ori_vert].cend()); previous_chld = chld; ++cntr; @@ -394,7 +429,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { } --cntr; - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it]) != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it]) + != out_edges[ori_vert].cend()); previous_chld = *it; } @@ -415,9 +451,9 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { }); for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_par = 0; std::size_t cntr = 0; for (const auto &par : graph.parents(vert)) { @@ -425,7 +461,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { BOOST_CHECK_LE(previous_par, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) + != in_edges[ori_vert].cend()); previous_par = par; ++cntr; @@ -438,12 +475,13 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { } --cntr; - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it]) != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it]) + != in_edges[ori_vert].cend()); previous_par = *it; } } - + for (const auto &vert : graph.vertices()) { BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size()); } @@ -454,7 +492,6 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { } BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { - computational_dag_edge_idx_vector_impl_def_t graph; using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx; @@ -498,12 +535,10 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { BOOST_CHECK_EQUAL(graph.num_edges(), 9); BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - Compact_Sparse_Graph copy_graph(graph.num_vertices(), edge_view(graph)); BOOST_CHECK_EQUAL(copy_graph.num_vertices(), 8); BOOST_CHECK_EQUAL(copy_graph.num_edges(), 9); - std::vector> out_edges({ {1, 2, 3}, {4, 6}, @@ -554,9 +589,9 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_chld = 0; std::size_t cntr = 0; for (const auto &chld : reorder_graph.children(vert)) { @@ -564,7 +599,8 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { BOOST_CHECK_LE(previous_chld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) + != out_edges[ori_vert].cend()); previous_chld = chld; ++cntr; @@ -572,9 +608,9 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { } for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_par = 0; std::size_t cntr = 0; for (const auto &par : reorder_graph.parents(vert)) { @@ -582,7 +618,8 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { BOOST_CHECK_LE(previous_par, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) + != in_edges[ori_vert].cend()); previous_par = par; ++cntr; @@ -591,7 +628,19 @@ BOOST_AUTO_TEST_CASE(Graph_edge_contruction) { } BOOST_AUTO_TEST_CASE(Graph_work_weights_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); @@ -599,16 +648,28 @@ BOOST_AUTO_TEST_CASE(Graph_work_weights_keep_order) { Compact_Sparse_Graph graph(11, edges, ww); for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[vert]); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]); const unsigned wt = static_cast(rand()); graph.set_vertex_work_weight(vert, wt); - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), wt); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), wt); } } BOOST_AUTO_TEST_CASE(Graph_work_weights_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); @@ -618,16 +679,28 @@ BOOST_AUTO_TEST_CASE(Graph_work_weights_reorder) { const std::vector &graph_perm = graph.get_pullback_permutation(); for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]); const unsigned wt = static_cast(rand()); graph.set_vertex_work_weight(graph_perm[vert], wt); - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), wt); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), wt); } } BOOST_AUTO_TEST_CASE(Graph_comm_weights_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); @@ -638,20 +711,32 @@ BOOST_AUTO_TEST_CASE(Graph_comm_weights_keep_order) { Compact_Sparse_Graph graph(11, edges, ww, cw); for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[vert]); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]); } for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[vert]); - + BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]); + const unsigned wt = static_cast(rand()); graph.set_vertex_comm_weight(vert, wt); - BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), wt); + BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), wt); } } BOOST_AUTO_TEST_CASE(Graph_comm_weights_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); @@ -664,20 +749,32 @@ BOOST_AUTO_TEST_CASE(Graph_comm_weights_reorder) { const std::vector &graph_perm = graph.get_pullback_permutation(); for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]); } for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); const unsigned wt = static_cast(rand()); graph.set_vertex_comm_weight(graph_perm[vert], wt); - BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), wt); + BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), wt); } } BOOST_AUTO_TEST_CASE(Graph_mem_weights_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); @@ -691,21 +788,33 @@ BOOST_AUTO_TEST_CASE(Graph_mem_weights_keep_order) { Compact_Sparse_Graph graph(11, edges, ww, cw, mw); for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[vert]); - BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[vert]); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]); + BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]); } for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), mw[vert]); - + BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[vert]); + const unsigned wt = static_cast(rand()); graph.set_vertex_mem_weight(vert, wt); - BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), wt); + BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), wt); } } BOOST_AUTO_TEST_CASE(Graph_mem_weights_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); @@ -721,21 +830,33 @@ BOOST_AUTO_TEST_CASE(Graph_mem_weights_reorder) { const std::vector &graph_perm = graph.get_pullback_permutation(); for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[graph_perm[vert]]); - BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); } for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), mw[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[graph_perm[vert]]); const unsigned wt = static_cast(rand()); graph.set_vertex_mem_weight(graph_perm[vert], wt); - BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), wt); + BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), wt); } } BOOST_AUTO_TEST_CASE(Graph_vtype_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); @@ -752,22 +873,34 @@ BOOST_AUTO_TEST_CASE(Graph_vtype_keep_order) { Compact_Sparse_Graph graph(11, edges, ww, cw, mw, vt); for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[vert]); - BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[vert]); - BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), mw[vert]); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[vert]); + BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[vert]); + BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[vert]); } for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_type(vert), vt[vert]); - + BOOST_CHECK_EQUAL(graph.vertex_type(vert), vt[vert]); + const unsigned wt = static_cast(rand()); graph.set_vertex_type(vert, wt); - BOOST_CHECK_EQUAL( graph.vertex_type(vert), wt); + BOOST_CHECK_EQUAL(graph.vertex_type(vert), wt); } } BOOST_AUTO_TEST_CASE(Graph_vtype_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); std::vector ww(11); std::iota(ww.begin(), ww.end(), 0); @@ -786,26 +919,21 @@ BOOST_AUTO_TEST_CASE(Graph_vtype_reorder) { const std::vector &graph_perm = graph.get_pullback_permutation(); for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_work_weight(vert), ww[graph_perm[vert]]); - BOOST_CHECK_EQUAL( graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); - BOOST_CHECK_EQUAL( graph.vertex_mem_weight(vert), mw[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), ww[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_comm_weight(vert), cw[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_mem_weight(vert), mw[graph_perm[vert]]); } for (auto vert : graph.vertices()) { - BOOST_CHECK_EQUAL( graph.vertex_type(vert), vt[graph_perm[vert]]); + BOOST_CHECK_EQUAL(graph.vertex_type(vert), vt[graph_perm[vert]]); const unsigned wt = static_cast(rand()); graph.set_vertex_type(graph_perm[vert], wt); - BOOST_CHECK_EQUAL( graph.vertex_type(vert), wt); + BOOST_CHECK_EQUAL(graph.vertex_type(vert), wt); } } - - - - BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { - computational_dag_edge_idx_vector_impl_def_t graph; using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx; @@ -849,12 +977,21 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { BOOST_CHECK_EQUAL(graph.num_edges(), 9); BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - - Compact_Sparse_Graph copy_graph(graph); + Compact_Sparse_Graph + copy_graph(graph); BOOST_CHECK_EQUAL(copy_graph.num_vertices(), 8); BOOST_CHECK_EQUAL(copy_graph.num_edges(), 9); - std::vector> out_edges({ {1, 2, 3}, {4, 6}, @@ -902,7 +1039,18 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { } } - Compact_Sparse_Graph reorder_graph(graph); + Compact_Sparse_Graph + reorder_graph(graph); BOOST_CHECK_EQUAL(reorder_graph.num_vertices(), 8); BOOST_CHECK_EQUAL(reorder_graph.num_edges(), 9); @@ -919,9 +1067,9 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { } for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(reorder_graph.out_degree(vert), out_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_chld = 0; std::size_t cntr = 0; for (const auto &chld : reorder_graph.children(vert)) { @@ -929,7 +1077,8 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { BOOST_CHECK_LE(previous_chld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) + != out_edges[ori_vert].cend()); previous_chld = chld; ++cntr; @@ -937,9 +1086,9 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { } for (const auto &vert : reorder_graph.vertices()) { - BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(reorder_graph.in_degree(vert), in_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_par = 0; std::size_t cntr = 0; for (const auto &par : reorder_graph.parents(vert)) { @@ -947,7 +1096,8 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { BOOST_CHECK_LE(previous_par, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) + != in_edges[ori_vert].cend()); previous_par = par; ++cntr; @@ -955,10 +1105,20 @@ BOOST_AUTO_TEST_CASE(Graph_type_copy_contruction) { } } - - BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); Compact_Sparse_Graph graph(11, edges); Compact_Sparse_Graph copy_graph(graph); @@ -1018,7 +1178,7 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) { ++cntr; } } - + for (const auto &vert : copy_graph.vertices()) { BOOST_CHECK_EQUAL(copy_graph.vertex_work_weight(vert), 1 + in_edges[vert].size()); } @@ -1029,7 +1189,19 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_keep_order) { } BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); Compact_Sparse_Graph graph(11, edges); Compact_Sparse_Graph copy_graph(std::move(graph)); @@ -1089,7 +1261,7 @@ BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) { ++cntr; } } - + for (const auto &vert : copy_graph.vertices()) { BOOST_CHECK_EQUAL(copy_graph.vertex_work_weight(vert), 1 + in_edges[vert].size()); } @@ -1099,9 +1271,20 @@ BOOST_AUTO_TEST_CASE(Graph1_move_keep_order) { } } - BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); Compact_Sparse_Graph ori_graph(11, edges); Compact_Sparse_Graph graph(ori_graph); @@ -1136,9 +1319,9 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { }); for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_chld = 0; std::size_t cntr = 0; for (const auto &chld : graph.children(vert)) { @@ -1146,7 +1329,8 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { BOOST_CHECK_LE(previous_chld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) + != out_edges[ori_vert].cend()); previous_chld = chld; ++cntr; @@ -1168,9 +1352,9 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { }); for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_par = 0; std::size_t cntr = 0; for (const auto &par : graph.parents(vert)) { @@ -1178,13 +1362,14 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { BOOST_CHECK_LE(previous_par, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) + != in_edges[ori_vert].cend()); previous_par = par; ++cntr; } } - + for (const auto &vert : graph.vertices()) { BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size()); } @@ -1194,9 +1379,20 @@ BOOST_AUTO_TEST_CASE(Graph1_copy_reorder) { } } - BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); Compact_Sparse_Graph ori_graph(11, edges); Compact_Sparse_Graph graph(std::move(ori_graph)); @@ -1231,9 +1427,9 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { }); for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_chld = 0; std::size_t cntr = 0; for (const auto &chld : graph.children(vert)) { @@ -1241,7 +1437,8 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { BOOST_CHECK_LE(previous_chld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) + != out_edges[ori_vert].cend()); previous_chld = chld; ++cntr; @@ -1263,9 +1460,9 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { }); for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_par = 0; std::size_t cntr = 0; for (const auto &par : graph.parents(vert)) { @@ -1273,13 +1470,14 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { BOOST_CHECK_LE(previous_par, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) + != in_edges[ori_vert].cend()); previous_par = par; ++cntr; } } - + for (const auto &vert : graph.vertices()) { BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size()); } @@ -1287,4 +1485,4 @@ BOOST_AUTO_TEST_CASE(Graph1_move_reorder) { for (const auto &vert : graph.vertices()) { BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); } -} \ No newline at end of file +} diff --git a/tests/compact_sparse_graph_edge_desc.cpp b/tests/compact_sparse_graph_edge_desc.cpp index 1bf8d9bb..a8a4957b 100644 --- a/tests/compact_sparse_graph_edge_desc.cpp +++ b/tests/compact_sparse_graph_edge_desc.cpp @@ -17,10 +17,10 @@ limitations under the License. */ #define BOOST_TEST_MODULE Sparse_Compact_Graph_Edge_Desc -#include - #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph_edge_desc.hpp" +#include + using namespace osp; BOOST_AUTO_TEST_CASE(Empty_Graph_keep_order) { @@ -135,7 +135,6 @@ BOOST_AUTO_TEST_CASE(No_Edges_Graph_reorder) { BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices()); - std::vector perm(10, 0); std::iota(perm.begin(), perm.end(), 0); const std::vector &graph_perm = graph.get_pullback_permutation(); @@ -143,7 +142,15 @@ BOOST_AUTO_TEST_CASE(No_Edges_Graph_reorder) { } BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { - const std::vector> edges({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}}); + const std::vector> edges({ + {0, 1}, + {1, 2}, + {2, 3}, + {3, 4}, + {4, 5}, + {5, 6}, + {6, 7} + }); Compact_Sparse_Graph_EdgeDesc graph(8, edges); @@ -221,11 +228,10 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { BOOST_CHECK_EQUAL(graph.vertex_type(vert), 0); } - std::size_t edge_counter = 0; - for (const auto& edge : graph.edges()) { + for (const auto &edge : graph.edges()) { BOOST_CHECK_EQUAL(graph.source(edge), edge_counter); - BOOST_CHECK_EQUAL(graph.target(edge), edge_counter+1); + BOOST_CHECK_EQUAL(graph.target(edge), edge_counter + 1); BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); @@ -234,9 +240,9 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { BOOST_CHECK_EQUAL(edge_counter, graph.num_edges()); edge_counter = 0; - for (const auto& edge : osp::edges(graph)) { + for (const auto &edge : osp::edges(graph)) { BOOST_CHECK_EQUAL(source(edge, graph), edge_counter); - BOOST_CHECK_EQUAL(target(edge, graph), edge_counter+1); + BOOST_CHECK_EQUAL(target(edge, graph), edge_counter + 1); BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); @@ -245,7 +251,7 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { BOOST_CHECK_EQUAL(edge_counter, graph.num_edges()); std::size_t vert_counter = 0; - for (const auto& vert : graph.vertices()) { + for (const auto &vert : graph.vertices()) { for (const auto &edge : graph.in_edges(vert)) { BOOST_CHECK_EQUAL(graph.source(edge), vert - 1); BOOST_CHECK_EQUAL(graph.target(edge), vert); @@ -271,9 +277,16 @@ BOOST_AUTO_TEST_CASE(LineGraph_keep_order) { BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices()); } - BOOST_AUTO_TEST_CASE(LineGraph_reorder) { - const std::vector> edges({{0, 1}, {1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}}); + const std::vector> edges({ + {0, 1}, + {1, 2}, + {2, 3}, + {3, 4}, + {4, 5}, + {5, 6}, + {6, 7} + }); Compact_Sparse_Graph_EdgeDesc graph(8, edges); @@ -359,11 +372,10 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) { BOOST_CHECK_EQUAL(perm[vert], graph_perm[vert]); } - std::size_t edge_counter = 0; - for (const auto& edge : graph.edges()) { + for (const auto &edge : graph.edges()) { BOOST_CHECK_EQUAL(graph.source(edge), edge_counter); - BOOST_CHECK_EQUAL(graph.target(edge), edge_counter+1); + BOOST_CHECK_EQUAL(graph.target(edge), edge_counter + 1); BOOST_CHECK_EQUAL(edge, graph.edge(graph.source(edge), graph.target(edge))); @@ -372,7 +384,7 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) { BOOST_CHECK_EQUAL(edge_counter, graph.num_edges()); std::size_t vert_counter = 0; - for (const auto& vert : graph.vertices()) { + for (const auto &vert : graph.vertices()) { for (const auto &edge : graph.in_edges(vert)) { BOOST_CHECK_EQUAL(graph.source(edge), vert - 1); BOOST_CHECK_EQUAL(graph.target(edge), vert); @@ -387,9 +399,20 @@ BOOST_AUTO_TEST_CASE(LineGraph_reorder) { BOOST_CHECK_EQUAL(vert_counter, graph.num_vertices()); } - BOOST_AUTO_TEST_CASE(Graph1_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); Compact_Sparse_Graph_EdgeDesc graph(11, edges); @@ -490,7 +513,7 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { ++edge_cntr; } BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges()); - + edge_cntr = 0; for (const auto &vert : graph.vertices()) { for (const auto &edge : graph.out_edges(vert)) { @@ -500,7 +523,6 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { } BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges()); - for (const auto &vert : graph.vertices()) { BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[vert].size()); } @@ -511,7 +533,19 @@ BOOST_AUTO_TEST_CASE(Graph1_keep_order) { } BOOST_AUTO_TEST_CASE(Graph1_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); Compact_Sparse_Graph_EdgeDesc graph(11, edges); @@ -545,9 +579,9 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { }); for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(graph.out_degree(vert), out_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_chld = 0; std::size_t cntr = 0; for (const auto &chld : graph.children(vert)) { @@ -555,7 +589,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { BOOST_CHECK_LE(previous_chld, chld); } - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[chld]) + != out_edges[ori_vert].cend()); previous_chld = chld; ++cntr; @@ -568,7 +603,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { } --cntr; - BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it]) != out_edges[ori_vert].cend()); + BOOST_CHECK(std::find(out_edges[ori_vert].cbegin(), out_edges[ori_vert].cend(), graph_perm[*it]) + != out_edges[ori_vert].cend()); previous_chld = *it; } @@ -598,9 +634,9 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { }); for (const auto &vert : graph.vertices()) { - BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[ graph_perm[vert] ].size()); + BOOST_CHECK_EQUAL(graph.in_degree(vert), in_edges[graph_perm[vert]].size()); std::size_t ori_vert = graph_perm[vert]; - + std::size_t previous_par = 0; std::size_t cntr = 0; for (const auto &par : graph.parents(vert)) { @@ -608,7 +644,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { BOOST_CHECK_LE(previous_par, par); } - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[par]) + != in_edges[ori_vert].cend()); previous_par = par; ++cntr; @@ -621,7 +658,8 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { } --cntr; - BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it]) != in_edges[ori_vert].cend()); + BOOST_CHECK(std::find(in_edges[ori_vert].cbegin(), in_edges[ori_vert].cend(), graph_perm[*it]) + != in_edges[ori_vert].cend()); previous_par = *it; } @@ -635,7 +673,7 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { } BOOST_CHECK_EQUAL(cntr, graph.in_degree(vert)); } - + for (const auto &vert : graph.vertices()) { BOOST_CHECK_EQUAL(graph.vertex_work_weight(vert), 1 + in_edges[graph_perm[vert]].size()); } @@ -653,7 +691,7 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { ++edge_cntr; } BOOST_CHECK_EQUAL(edge_cntr, graph.num_edges()); - + edge_cntr = 0; for (const auto &vert : graph.vertices()) { for (const auto &edge : graph.out_edges(vert)) { @@ -665,7 +703,19 @@ BOOST_AUTO_TEST_CASE(Graph1_reorder) { } BOOST_AUTO_TEST_CASE(Graph1_e_comm_keep_order) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); const std::vector edge_weights({3, 6, 12, 874, 134, 67, 234, 980, 123, 152, 34}); Compact_Sparse_Graph_EdgeDesc graph(11, edges); @@ -674,7 +724,7 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_keep_order) { BOOST_CHECK_EQUAL(graph.num_edges(), 11); for (std::size_t i = 0; i < edges.size(); ++i) { - const auto& [src, tgt] = edges[i]; + const auto &[src, tgt] = edges[i]; graph.set_edge_comm_weight(src, tgt, edge_weights[i]); } @@ -684,15 +734,26 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_keep_order) { auto it = std::find(edges.cbegin(), edges.cend(), std::make_pair(src, tgt)); BOOST_CHECK(it != edges.cend()); - + auto ind = std::distance(edges.cbegin(), it); BOOST_CHECK_EQUAL(edge_weights[static_cast(ind)], graph.edge_comm_weight(edge)); } } - BOOST_AUTO_TEST_CASE(Graph1_e_comm_reorder) { - const std::vector> edges({{0, 1}, {2, 3}, {6, 10}, {7, 9}, {0, 2}, {4, 6}, {1, 6}, {6, 7}, {5, 6}, {3, 7}, {1, 2}}); + const std::vector> edges({ + {0, 1}, + {2, 3}, + {6, 10}, + {7, 9}, + {0, 2}, + {4, 6}, + {1, 6}, + {6, 7}, + {5, 6}, + {3, 7}, + {1, 2} + }); const std::vector edge_weights({3, 6, 12, 874, 134, 67, 234, 980, 123, 152, 34}); Compact_Sparse_Graph_EdgeDesc graph(11, edges); @@ -706,7 +767,7 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_reorder) { BOOST_CHECK(std::is_permutation(perm.cbegin(), perm.cend(), graph_perm.cbegin(), graph_perm.cend())); for (std::size_t i = 0; i < edges.size(); ++i) { - const auto& [src, tgt] = edges[i]; + const auto &[src, tgt] = edges[i]; graph.set_edge_comm_weight(src, tgt, edge_weights[i]); } @@ -716,8 +777,8 @@ BOOST_AUTO_TEST_CASE(Graph1_e_comm_reorder) { auto it = std::find(edges.cbegin(), edges.cend(), std::make_pair(src, tgt)); BOOST_CHECK(it != edges.cend()); - + auto ind = std::distance(edges.cbegin(), it); BOOST_CHECK_EQUAL(edge_weights[static_cast(ind)], graph.edge_comm_weight(edge)); } -} \ No newline at end of file +} diff --git a/tests/connected_components_part.cpp b/tests/connected_components_part.cpp index 22360d2f..57031311 100644 --- a/tests/connected_components_part.cpp +++ b/tests/connected_components_part.cpp @@ -22,13 +22,12 @@ limitations under the License. #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/dag_divider/ConnectedComponentDivider.hpp" #include "osp/dag_divider/ConnectedComponentScheduler.hpp" -#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(ConnectedComponentPart_test) { - BspInstance instance; computational_dag_vector_impl_def_int_t &dag = instance.getComputationalDag(); using VertexType = vertex_idx_t; diff --git a/tests/cost_evaluation.cpp b/tests/cost_evaluation.cpp index 27f7660c..9375f4c8 100644 --- a/tests/cost_evaluation.cpp +++ b/tests/cost_evaluation.cpp @@ -30,7 +30,6 @@ limitations under the License. using namespace osp; BOOST_AUTO_TEST_CASE(test_cost_models_simple_dag) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; BspInstance instance; diff --git a/tests/cuthill_mckee.cpp b/tests/cuthill_mckee.cpp index c6b2f019..89cf42f0 100644 --- a/tests/cuthill_mckee.cpp +++ b/tests/cuthill_mckee.cpp @@ -17,13 +17,14 @@ limitations under the License. */ #define BOOST_TEST_MODULE cuthill_mckee +#include "osp/graph_algorithms/cuthill_mckee.hpp" + #include #include -#include "osp/graph_algorithms/cuthill_mckee.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "test_graphs.hpp" using namespace osp; @@ -32,7 +33,6 @@ using ComputationalDag = boost_graph_int_t; using VertexType = vertex_idx_t; BOOST_AUTO_TEST_CASE(cuthill_mckee_1) { - ComputationalDag dag; dag.add_vertex(2, 9); @@ -56,32 +56,32 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_1) { std::vector cm_wavefront = cuthill_mckee_wavefront(dag); std::vector expected_cm_wavefront = {0, 3, 1, 2, 6, 4, 5, 7}; - BOOST_CHECK_EQUAL_COLLECTIONS(cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(), - expected_cm_wavefront.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(), expected_cm_wavefront.end()); cm_wavefront = cuthill_mckee_wavefront(dag, true); expected_cm_wavefront = {0, 2, 3, 1, 5, 6, 4, 7}; - BOOST_CHECK_EQUAL_COLLECTIONS(cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(), - expected_cm_wavefront.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + cm_wavefront.begin(), cm_wavefront.end(), expected_cm_wavefront.begin(), expected_cm_wavefront.end()); std::vector cm_undirected; std::vector expected_cm_undirected; cm_undirected = cuthill_mckee_undirected(dag, true); expected_cm_undirected = {7, 3, 4, 0, 1, 2, 6, 5}; - BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), - expected_cm_undirected.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); cm_undirected = cuthill_mckee_undirected(dag, false); expected_cm_undirected = {0, 3, 1, 2, 7, 6, 4, 5}; - BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), - expected_cm_undirected.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); cm_undirected = cuthill_mckee_undirected(dag, true, true); expected_cm_undirected = {3, 4, 5, 1, 2, 7, 6, 0}; - BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), - expected_cm_undirected.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); std::vector top_sort; for (const auto &vertex : priority_vec_top_sort_view(dag, cm_undirected)) { @@ -93,21 +93,21 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_1) { cm_undirected = cuthill_mckee_undirected(dag, false, true); expected_cm_undirected = {0, 2, 3, 1, 6, 7, 5, 4}; - BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), - expected_cm_undirected.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); dag.add_edge(8, 9); dag.add_edge(9, 10); cm_undirected = cuthill_mckee_undirected(dag, true); expected_cm_undirected = {7, 3, 4, 0, 1, 2, 6, 5, 10, 9, 8}; - BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), - expected_cm_undirected.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); cm_undirected = cuthill_mckee_undirected(dag, false); expected_cm_undirected = {0, 3, 1, 2, 7, 6, 4, 5, 8, 9, 10}; - BOOST_CHECK_EQUAL_COLLECTIONS(cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), - expected_cm_undirected.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + cm_undirected.begin(), cm_undirected.end(), expected_cm_undirected.begin(), expected_cm_undirected.end()); } bool is_permutation(const std::vector &vec) { @@ -128,7 +128,6 @@ bool is_top_sort(const std::vector &vec, const ComputationalDag &dag } for (const auto &vertex : dag.vertices()) { - for (const auto &child : dag.children(vertex)) { if (position[vertex] > position[child]) { return false; @@ -140,7 +139,6 @@ bool is_top_sort(const std::vector &vec, const ComputationalDag &dag } BOOST_AUTO_TEST_CASE(cuthill_mckee_2) { - std::vector filenames_graph = tiny_spaa_graphs(); // Getting root git directory @@ -152,12 +150,10 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_2) { } for (auto &filename_graph : filenames_graph) { - ComputationalDag graph; auto status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), graph); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } else { @@ -182,4 +178,4 @@ BOOST_AUTO_TEST_CASE(cuthill_mckee_2) { BOOST_CHECK(is_permutation(top_sort)); BOOST_CHECK(is_top_sort(top_sort, graph)); } -} \ No newline at end of file +} diff --git a/tests/debug_merkle_divider.cpp b/tests/debug_merkle_divider.cpp index 5763d840..a9a7ed1e 100644 --- a/tests/debug_merkle_divider.cpp +++ b/tests/debug_merkle_divider.cpp @@ -16,6 +16,8 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ +#include + #include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/auxiliary/io/dot_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp" @@ -27,11 +29,10 @@ limitations under the License. #include "osp/coarser/coarser_util.hpp" #include "osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include using namespace osp; -template +template void check_partition_type_homogeneity(const GraphT &dag, const std::vector> &partition) { // Group partitions by their ID std::map, std::vector>> partitions; @@ -41,8 +42,9 @@ void check_partition_type_homogeneity(const GraphT &dag, const std::vector>(instance.getComputationalDag().vertex_comm_weight(v) * 0.01)); + instance.getComputationalDag().set_vertex_comm_weight( + v, static_cast>(instance.getComputationalDag().vertex_comm_weight(v) * 0.01)); } // Set up architecture @@ -105,7 +108,7 @@ int main(int argc, char *argv[]) { iso_scheduler.setCriticalPathThreshold(500); iso_scheduler.setOrbitLockRatio(0.5); iso_scheduler.setAllowTrimmedScheduler(false); - iso_scheduler.set_plot_dot_graphs(true); // Enable plotting for debug + iso_scheduler.set_plot_dot_graphs(true); // Enable plotting for debug std::cout << "Starting partition computation..." << std::endl; @@ -120,7 +123,8 @@ int main(int argc, char *argv[]) { std::cout << "Partition is " << (acyc ? "acyclic." : "not acyclic."); std::cout << "Partition computation finished." << std::endl; - std::cout << "Generated " << std::set>(partition.begin(), partition.end()).size() << " partitions." << std::endl; + std::cout << "Generated " << std::set>(partition.begin(), partition.end()).size() << " partitions." + << std::endl; return 0; } diff --git a/tests/directed_graph_algorithms.cpp b/tests/directed_graph_algorithms.cpp index 0b246503..da141811 100644 --- a/tests/directed_graph_algorithms.cpp +++ b/tests/directed_graph_algorithms.cpp @@ -22,6 +22,7 @@ limitations under the License. #include #include +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/graph_algorithms/computational_dag_util.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util_parallel.hpp" @@ -31,15 +32,12 @@ limitations under the License. #include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "test_utils.hpp" #include "test_graphs.hpp" +#include "test_utils.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) { - - using graph_t = boost_graph_int_t; // static_assert(std::is_base_of::value, "Class is not a scheduler!"); @@ -50,9 +48,7 @@ BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) { for (auto &filename_graph : filenames_graph) { graph_t graph; - - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((project_root / filename_graph).string(), - graph); + bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((project_root / filename_graph).string(), graph); BOOST_CHECK(status_graph); @@ -63,16 +59,14 @@ BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) { std::cout << "\n" << filename_graph << std::endl; std::cout << "Time for long_edges_in_triangles: " - << std::chrono::duration_cast(finish_time - start_time).count() << "ms" - << std::endl; + << std::chrono::duration_cast(finish_time - start_time).count() << "ms" << std::endl; start_time = std::chrono::high_resolution_clock::now(); auto deleted_edges_parallel = long_edges_in_triangles_parallel(graph); finish_time = std::chrono::high_resolution_clock::now(); std::cout << "Time for long_edges_in_triangles_parallel: " - << std::chrono::duration_cast(finish_time - start_time).count() << "ms" - << std::endl; + << std::chrono::duration_cast(finish_time - start_time).count() << "ms" << std::endl; BOOST_CHECK_EQUAL(deleted_edges.size(), deleted_edges_parallel.size()); @@ -84,4 +78,4 @@ BOOST_AUTO_TEST_CASE(longest_edge_triangle_parallel) { BOOST_CHECK(deleted_edges.find(edge) != deleted_edges.cend()); } } -} \ No newline at end of file +} diff --git a/tests/directed_graph_top_sort.cpp b/tests/directed_graph_top_sort.cpp index 0925d1ae..3b2703f9 100644 --- a/tests/directed_graph_top_sort.cpp +++ b/tests/directed_graph_top_sort.cpp @@ -18,6 +18,8 @@ limitations under the License. #define BOOST_TEST_MODULE ApproxEdgeReduction +#include "osp/graph_algorithms/directed_graph_top_sort.hpp" + #include #include #include @@ -25,7 +27,6 @@ limitations under the License. #include "osp/graph_algorithms/computational_dag_util.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" #include "osp/graph_algorithms/directed_graph_path_util.hpp" -#include "osp/graph_algorithms/directed_graph_top_sort.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" @@ -34,7 +35,6 @@ limitations under the License. using namespace osp; computational_dag_vector_impl_def_t constr_graph_1() { - computational_dag_vector_impl_def_t graph; using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; @@ -63,21 +63,26 @@ computational_dag_vector_impl_def_t constr_graph_1() { } BOOST_AUTO_TEST_CASE(test_util_1) { - const computational_dag_vector_impl_def_t graph = constr_graph_1(); // using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; } BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { - using VertexType = vertex_idx_t; - const std::vector> out( - - {{7}, {}, {0}, {2}, {}, {2, 0}, {1, 2, 0}, {}, {4}, {6, 1, 5}} - - ); + const std::vector> out({ + {7}, + {}, + {0}, + {2}, + {}, + {2, 0}, + {1, 2, 0}, + {}, + {4}, + {6, 1, 5} + }); const std::vector workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); const std::vector commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); @@ -205,7 +210,6 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { std::vector loc_view_top_sort; for (const auto &v : locality_top_sort_view(graph)) { - loc_view_top_sort.push_back(v); } @@ -270,14 +274,20 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { } BOOST_AUTO_TEST_CASE(top_sort_template_overload_csr) { - using VertexType = vertex_idx_t; - const std::vector> out( - - {{7}, {}, {0}, {2}, {}, {2, 0}, {1, 2, 0}, {}, {4}, {6, 1, 5}} - - ); + const std::vector> out({ + {7}, + {}, + {0}, + {2}, + {}, + {2, 0}, + {1, 2, 0}, + {}, + {4}, + {6, 1, 5} + }); const std::vector workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); const std::vector commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); @@ -299,4 +309,4 @@ BOOST_AUTO_TEST_CASE(top_sort_template_overload_csr) { BOOST_CHECK_EQUAL(expected_top_order[idx], v); ++idx; } -} \ No newline at end of file +} diff --git a/tests/directed_graph_util.cpp b/tests/directed_graph_util.cpp index 492f61e8..fe2c53bc 100644 --- a/tests/directed_graph_util.cpp +++ b/tests/directed_graph_util.cpp @@ -18,6 +18,8 @@ limitations under the License. #define BOOST_TEST_MODULE ApproxEdgeReduction +#include "osp/graph_algorithms/directed_graph_util.hpp" + #include #include #include @@ -27,14 +29,12 @@ limitations under the License. #include "osp/graph_algorithms/directed_graph_edge_view.hpp" #include "osp/graph_algorithms/directed_graph_path_util.hpp" #include "osp/graph_algorithms/directed_graph_top_sort.hpp" -#include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" using namespace osp; computational_dag_vector_impl_def_t constr_graph_1() { - computational_dag_vector_impl_def_t graph; using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; @@ -63,7 +63,6 @@ computational_dag_vector_impl_def_t constr_graph_1() { } BOOST_AUTO_TEST_CASE(test_empty_graph) { - computational_dag_vector_impl_def_t graph; using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; @@ -82,7 +81,6 @@ BOOST_AUTO_TEST_CASE(test_empty_graph) { } BOOST_AUTO_TEST_CASE(test_util_1) { - computational_dag_vector_impl_def_t graph = constr_graph_1(); using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; @@ -403,7 +401,6 @@ BOOST_AUTO_TEST_CASE(test_util_1) { size_t i = 0; for (const auto &e : edge_view(graph)) { - BOOST_CHECK_EQUAL(e.source, edge_source[i]); BOOST_CHECK_EQUAL(e.target, edge_target[i]); @@ -423,14 +420,20 @@ BOOST_AUTO_TEST_CASE(test_util_1) { } BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { - using VertexType = vertex_idx_t; - const std::vector> out( - - {{7}, {}, {0}, {2}, {}, {2, 0}, {1, 2, 0}, {}, {4}, {6, 1, 5}} - - ); + const std::vector> out({ + {7}, + {}, + {0}, + {2}, + {}, + {2, 0}, + {1, 2, 0}, + {}, + {4}, + {6, 1, 5} + }); const std::vector workW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); const std::vector commW({1, 1, 1, 1, 2, 3, 2, 1, 1, 1}); @@ -480,16 +483,15 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { for (const auto &vertex : graph.vertices()) { num_edges += graph.out_degree(vertex); for (const auto &parent : graph.parents(vertex)) { - BOOST_CHECK(std::any_of(graph.children(parent).cbegin(), graph.children(parent).cend(), - [vertex](VertexType k) { return k == vertex; })); + BOOST_CHECK(std::any_of( + graph.children(parent).cbegin(), graph.children(parent).cend(), [vertex](VertexType k) { return k == vertex; })); } } for (const auto &vertex : graph.vertices()) { for (const auto &child : graph.children(vertex)) { - - BOOST_CHECK(std::any_of(graph.parents(child).cbegin(), graph.parents(child).cend(), - [vertex](VertexType k) { return k == vertex; })); + BOOST_CHECK(std::any_of( + graph.parents(child).cbegin(), graph.parents(child).cend(), [vertex](VertexType k) { return k == vertex; })); } } @@ -563,8 +565,8 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { bool_c[i] = true; } - BOOST_CHECK(GetFilteredTopOrder(bool_a, graph) == std::vector({0, 8}) || - GetFilteredTopOrder(bool_a, graph) == std::vector({8, 0})); + BOOST_CHECK(GetFilteredTopOrder(bool_a, graph) == std::vector({0, 8}) + || GetFilteredTopOrder(bool_a, graph) == std::vector({8, 0})); BOOST_CHECK(GetFilteredTopOrder(bool_b, graph)[3] == 2); BOOST_CHECK(GetFilteredTopOrder(bool_c, graph) == std::vector({9, 6, 1})); @@ -626,7 +628,6 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { for (unsigned loops = 0; loops < 10; loops++) { for (unsigned noise = 0; noise < 6; noise++) { for (auto &pois_para : poisson_params) { - std::vector poset_int_map = get_strict_poset_integer_map(noise, pois_para, graph); for (const auto &vertex : graph.vertices()) { @@ -642,7 +643,13 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { auto wavefronts = compute_wavefronts(graph); - std::vector> expected_wavefronts = {{3, 8, 9}, {4, 6, 5}, {1, 2}, {0}, {7}}; + std::vector> expected_wavefronts = { + {3, 8, 9}, + {4, 6, 5}, + {1, 2}, + {0}, + {7} + }; size_t size = 0; size_t counter = 0; @@ -650,8 +657,8 @@ BOOST_AUTO_TEST_CASE(ComputationalDagConstructor) { size += wavefront.size(); BOOST_CHECK(!wavefront.empty()); - BOOST_CHECK_EQUAL_COLLECTIONS(wavefront.begin(), wavefront.end(), expected_wavefronts[counter].begin(), - expected_wavefronts[counter].end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + wavefront.begin(), wavefront.end(), expected_wavefronts[counter].begin(), expected_wavefronts[counter].end()); counter++; } @@ -707,4 +714,4 @@ BOOST_AUTO_TEST_CASE(test_edge_view_indexed_access) { // Check out of bounds auto oob_it = decltype(all_edges)::iterator(graph.num_edges() + 5, graph); BOOST_CHECK(oob_it == all_edges.end()); -} \ No newline at end of file +} diff --git a/tests/divisors.cpp b/tests/divisors.cpp index 74eb43d7..f9f7956c 100644 --- a/tests/divisors.cpp +++ b/tests/divisors.cpp @@ -17,10 +17,10 @@ limitations under the License. */ #define BOOST_TEST_MODULE Divisor -#include - #include "osp/auxiliary/math/divisors.hpp" +#include + using namespace osp; BOOST_AUTO_TEST_CASE(IntegerSqrt) { @@ -45,7 +45,7 @@ BOOST_AUTO_TEST_CASE(Divisors) { BOOST_CHECK_EQUAL(num % div, 0U); } std::cout << "\n"; - + auto it = divs.begin(); for (std::size_t i = 1U; i <= num; ++i) { if (num % i == 0) { diff --git a/tests/eft_subgraph_scheduler.cpp b/tests/eft_subgraph_scheduler.cpp index e8dec670..3869b8ec 100644 --- a/tests/eft_subgraph_scheduler.cpp +++ b/tests/eft_subgraph_scheduler.cpp @@ -19,24 +19,23 @@ limitations under the License. #define BOOST_TEST_MODULE EftSubgraphScheduler #include +#include "osp/bsp/model/BspInstance.hpp" #include "osp/dag_divider/isomorphism_divider/EftSubgraphScheduler.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/bsp/model/BspInstance.hpp" using namespace osp; -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain) -{ +BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain) { using graph_t = computational_dag_vector_impl_def_t; // 1. Setup Instance BspInstance instance; - auto& dag = instance.getComputationalDag(); + auto &dag = instance.getComputationalDag(); // Create a simple coarse-grained DAG: 0 -> 1 -> 2 - dag.add_vertex(100, 1, 0); // node 0 - dag.add_vertex(200, 1, 0); // node 1 - dag.add_vertex(300, 1, 0); // node 2 + dag.add_vertex(100, 1, 0); // node 0 + dag.add_vertex(200, 1, 0); // node 1 + dag.add_vertex(300, 1, 0); // node 2 dag.add_edge(0, 1); dag.add_edge(1, 2); @@ -48,7 +47,7 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain) std::vector multiplicities = {1, 2, 1}; std::vector max_procs = {100, 100, 100}; std::vector>> required_proc_types(3); - + // Node 0: work 100, mult 1. Needs type 0. required_proc_types[0] = {100, 0}; // Node 1: work 200, mult 2. Needs type 0 and 1. @@ -75,19 +74,18 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_SimpleChain) BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[2][1], 2); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin) -{ +BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin) { using graph_t = computational_dag_vector_impl_def_t; // 1. Setup Instance BspInstance instance; - auto& dag = instance.getComputationalDag(); + auto &dag = instance.getComputationalDag(); // Create a fork-join DAG: 0 -> {1,2} -> 3 - dag.add_vertex(100, 1, 0); // node 0 - dag.add_vertex(200, 1, 0); // node 1 - dag.add_vertex(300, 1, 0); // node 2 - dag.add_vertex(100, 1, 0); // node 3 + dag.add_vertex(100, 1, 0); // node 0 + dag.add_vertex(200, 1, 0); // node 1 + dag.add_vertex(300, 1, 0); // node 2 + dag.add_vertex(100, 1, 0); // node 3 dag.add_edge(0, 1); dag.add_edge(0, 2); dag.add_edge(1, 3); @@ -101,7 +99,7 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin) std::vector multiplicities = {1, 2, 1, 4}; std::vector max_procs = {100, 100, 100, 100}; std::vector>> required_proc_types(4); - + // All nodes need type 0 required_proc_types[0] = {100}; required_proc_types[1] = {200}; @@ -139,16 +137,15 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ForkJoin) BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[3][0], 1); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_Deadlock) -{ +BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_Deadlock) { using graph_t = computational_dag_vector_impl_def_t; // 1. Setup Instance BspInstance instance; - auto& dag = instance.getComputationalDag(); + auto &dag = instance.getComputationalDag(); // Create a single-node DAG - dag.add_vertex(100, 1, 0); // node 0 + dag.add_vertex(100, 1, 0); // node 0 // Setup Architecture: 1 processor of type 0 instance.getArchitecture().setProcessorsWithTypes({0}); @@ -171,20 +168,19 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_Deadlock) BOOST_CHECK_LT(schedule.makespan, 0.0); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG) -{ +BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG) { using graph_t = computational_dag_vector_impl_def_t; // 1. Setup Instance BspInstance instance; - auto& dag = instance.getComputationalDag(); - - dag.add_vertex(50, 1, 0); // 0 - dag.add_vertex(100, 1, 0); // 1 - dag.add_vertex(150, 1, 0); // 2 - dag.add_vertex(80, 1, 0); // 3 - dag.add_vertex(120, 1, 0); // 4 - dag.add_vertex(60, 1, 0); // 5 + auto &dag = instance.getComputationalDag(); + + dag.add_vertex(50, 1, 0); // 0 + dag.add_vertex(100, 1, 0); // 1 + dag.add_vertex(150, 1, 0); // 2 + dag.add_vertex(80, 1, 0); // 3 + dag.add_vertex(120, 1, 0); // 4 + dag.add_vertex(60, 1, 0); // 5 dag.add_edge(0, 1); dag.add_edge(0, 2); dag.add_edge(1, 3); @@ -199,14 +195,14 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG) // 2. Setup Scheduler Inputs std::vector multiplicities = {1, 2, 1, 4, 2, 1}; - std::vector max_procs = {100, 100, 100, 100, 100, 100}; + std::vector max_procs = {100, 100, 100, 100, 100, 100}; std::vector>> required_proc_types(6); - required_proc_types[0] = {50, 0}; // Job 0: needs T0 - required_proc_types[1] = {100, 0}; // Job 1: needs T0 - required_proc_types[2] = {0, 150}; // Job 2: needs T1 - required_proc_types[3] = {40, 40}; // Job 3: needs T0 & T1 - required_proc_types[4] = {0, 120}; // Job 4: needs T1 - required_proc_types[5] = {60, 0}; // Job 5: needs T0 + required_proc_types[0] = {50, 0}; // Job 0: needs T0 + required_proc_types[1] = {100, 0}; // Job 1: needs T0 + required_proc_types[2] = {0, 150}; // Job 2: needs T1 + required_proc_types[3] = {40, 40}; // Job 3: needs T0 & T1 + required_proc_types[4] = {0, 120}; // Job 4: needs T1 + required_proc_types[5] = {60, 0}; // Job 5: needs T0 // 3. Run Scheduler EftSubgraphScheduler scheduler; @@ -225,20 +221,19 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ComplexDAG) BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[5][0], 4); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ResourceContention) -{ +BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ResourceContention) { using graph_t = computational_dag_vector_impl_def_t; // 1. Setup Instance BspInstance instance; - auto& dag = instance.getComputationalDag(); + auto &dag = instance.getComputationalDag(); // Create a fork-join DAG: 0 -> {1,2,3} -> 4 - dag.add_vertex(10, 1, 0); // 0 - dag.add_vertex(100, 1, 0); // 1 (high rank) - dag.add_vertex(50, 1, 0); // 2 (mid rank) - dag.add_vertex(20, 1, 0); // 3 (low rank) - dag.add_vertex(10, 1, 0); // 4 + dag.add_vertex(10, 1, 0); // 0 + dag.add_vertex(100, 1, 0); // 1 (high rank) + dag.add_vertex(50, 1, 0); // 2 (mid rank) + dag.add_vertex(20, 1, 0); // 3 (low rank) + dag.add_vertex(10, 1, 0); // 4 dag.add_edge(0, 1); dag.add_edge(0, 2); dag.add_edge(0, 3); @@ -286,18 +281,17 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ResourceContention) BOOST_CHECK_EQUAL(schedule.node_assigned_worker_per_type[4][0], 4); } -BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ProportionalAllocation) -{ +BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ProportionalAllocation) { using graph_t = computational_dag_vector_impl_def_t; // 1. Setup Instance BspInstance instance; - auto& dag = instance.getComputationalDag(); + auto &dag = instance.getComputationalDag(); // Create a fork DAG: 0 -> {1,2} - dag.add_vertex(10, 1, 0); // 0 - dag.add_vertex(300, 1, 0); // 1 (high rank) - dag.add_vertex(100, 1, 0); // 2 (low rank) + dag.add_vertex(10, 1, 0); // 0 + dag.add_vertex(300, 1, 0); // 1 (high rank) + dag.add_vertex(100, 1, 0); // 2 (low rank) dag.add_edge(0, 1); dag.add_edge(0, 2); @@ -330,7 +324,7 @@ BOOST_AUTO_TEST_CASE(EftSubgraphScheduler_ProportionalAllocation) // Job 1 finishes at 1 + 300/7 = 1 + 42.857... = 43.857... // Job 2 finishes at 1 + 100/3 = 1 + 33.333... = 34.333... // Makespan is 43.857... - BOOST_CHECK_CLOSE(schedule.makespan, 1.0 + 300.0/7.0, 1e-9); + BOOST_CHECK_CLOSE(schedule.makespan, 1.0 + 300.0 / 7.0, 1e-9); BOOST_REQUIRE_EQUAL(schedule.node_assigned_worker_per_type.size(), 3); // Job 0: 10 workers diff --git a/tests/filereader.cpp b/tests/filereader.cpp index 6e64fd63..e95ad03c 100644 --- a/tests/filereader.cpp +++ b/tests/filereader.cpp @@ -18,22 +18,20 @@ limitations under the License. #define BOOST_TEST_MODULE File_Reader #include +#include +#include -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" #include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/auxiliary/io/dot_graph_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/auxiliary/io/mtx_graph_file_reader.hpp" -#include -#include +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" using namespace osp; - BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) { - // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); std::cout << cwd << std::endl; @@ -44,23 +42,23 @@ BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) { computational_dag_vector_impl_def_t graph; - bool status = - file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph); + bool status + = file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph); std::cout << "STATUS:" << status << std::endl; BOOST_CHECK(status); BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(graph.num_edges(), 19); + BOOST_CHECK_EQUAL(graph.num_edges(), 19); // ---- Node 0 - std::vector p0{ }; + std::vector p0{}; std::vector c0{4, 6, 3, 5, 2}; BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(0).begin(), graph.parents(0).end(), p0.begin(), p0.end()); BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(0).begin(), graph.children(0).end(), c0.begin(), c0.end()); // ---- Node 1 - std::vector p1{ }; + std::vector p1{}; std::vector c1{3, 5, 2, 6}; BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(1).begin(), graph.parents(1).end(), p1.begin(), p1.end()); BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(1).begin(), graph.children(1).end(), c1.begin(), c1.end()); @@ -85,7 +83,7 @@ BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) { // ---- Node 5 std::vector p5{0, 1, 2, 3, 4}; - std::vector c5{ }; + std::vector c5{}; BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(5).begin(), graph.parents(5).end(), p5.begin(), p5.end()); BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(5).begin(), graph.children(5).end(), c5.begin(), c5.end()); @@ -97,15 +95,12 @@ BOOST_AUTO_TEST_CASE(test_mtx_computational_dag_vector_impl) { // ---- Node 7 std::vector p7{3, 4, 6}; - std::vector c7{ }; + std::vector c7{}; BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(7).begin(), graph.parents(7).end(), p7.begin(), p7.end()); BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(7).begin(), graph.children(7).end(), c7.begin(), c7.end()); - } - BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) { - // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); std::cout << cwd << std::endl; @@ -116,23 +111,23 @@ BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) { boost_graph_int_t graph; - bool status = - file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph); + bool status + = file_reader::readComputationalDagMartixMarketFormat((cwd / "data/mtx_tests/ErdosRenyi_8_19_A.mtx").string(), graph); std::cout << "STATUS:" << status << std::endl; BOOST_CHECK(status); BOOST_CHECK_EQUAL(graph.num_vertices(), 8); - BOOST_CHECK_EQUAL(graph.num_edges(), 19); + BOOST_CHECK_EQUAL(graph.num_edges(), 19); // ---- Node 0 - std::vector p0{ }; + std::vector p0{}; std::vector c0{4, 6, 3, 5, 2}; BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(0).begin(), graph.parents(0).end(), p0.begin(), p0.end()); BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(0).begin(), graph.children(0).end(), c0.begin(), c0.end()); // ---- Node 1 - std::vector p1{ }; + std::vector p1{}; std::vector c1{3, 5, 2, 6}; BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(1).begin(), graph.parents(1).end(), p1.begin(), p1.end()); BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(1).begin(), graph.children(1).end(), c1.begin(), c1.end()); @@ -157,7 +152,7 @@ BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) { // ---- Node 5 std::vector p5{0, 1, 2, 3, 4}; - std::vector c5{ }; + std::vector c5{}; BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(5).begin(), graph.parents(5).end(), p5.begin(), p5.end()); BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(5).begin(), graph.children(5).end(), c5.begin(), c5.end()); @@ -169,16 +164,12 @@ BOOST_AUTO_TEST_CASE(test_mtx_boost_graph) { // ---- Node 7 std::vector p7{3, 4, 6}; - std::vector c7{ }; + std::vector c7{}; BOOST_CHECK_EQUAL_COLLECTIONS(graph.parents(7).begin(), graph.parents(7).end(), p7.begin(), p7.end()); BOOST_CHECK_EQUAL_COLLECTIONS(graph.children(7).begin(), graph.children(7).end(), c7.begin(), c7.end()); - } - - BOOST_AUTO_TEST_CASE(test_bicgstab) { - // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); std::cout << cwd << std::endl; @@ -189,15 +180,14 @@ BOOST_AUTO_TEST_CASE(test_bicgstab) { computational_dag_vector_impl_def_t graph; - bool status = - file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); + bool status + = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); BOOST_CHECK(status); BOOST_CHECK_EQUAL(graph.num_vertices(), 54); } BOOST_AUTO_TEST_CASE(test_hdag_boost) { - // Getting root git directory std::filesystem::path cwd = std::filesystem::current_path(); std::cout << cwd << std::endl; @@ -208,15 +198,14 @@ BOOST_AUTO_TEST_CASE(test_hdag_boost) { boost_graph_int_t graph; - bool status = - file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); + bool status + = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph); BOOST_CHECK(status); BOOST_CHECK_EQUAL(graph.num_vertices(), 54); } BOOST_AUTO_TEST_CASE(test_arch_smpl) { - std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { @@ -232,11 +221,9 @@ BOOST_AUTO_TEST_CASE(test_arch_smpl) { BOOST_CHECK_EQUAL(arch.communicationCosts(), 3); BOOST_CHECK_EQUAL(arch.synchronisationCosts(), 5); BOOST_CHECK_EQUAL(arch.getMemoryConstraintType(), MEMORY_CONSTRAINT_TYPE::NONE); - } BOOST_AUTO_TEST_CASE(test_arch_smpl_signed) { - std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { @@ -252,12 +239,9 @@ BOOST_AUTO_TEST_CASE(test_arch_smpl_signed) { BOOST_CHECK_EQUAL(arch.communicationCosts(), 3); BOOST_CHECK_EQUAL(arch.synchronisationCosts(), 5); BOOST_CHECK_EQUAL(arch.getMemoryConstraintType(), MEMORY_CONSTRAINT_TYPE::NONE); - } BOOST_AUTO_TEST_CASE(test_k_means) { - - std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { @@ -271,8 +255,7 @@ BOOST_AUTO_TEST_CASE(test_k_means) { computational_dag_vector_impl_def_t graph; - bool status = - file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph); BOOST_CHECK(status); BOOST_CHECK_EQUAL(graph.num_vertices(), 40); @@ -285,8 +268,7 @@ BOOST_AUTO_TEST_CASE(test_k_means) { computational_dag_edge_idx_vector_impl_def_t graph2; - status = - file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph2); + status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_k-means.hdag").string(), graph2); BOOST_CHECK(status); BOOST_CHECK_EQUAL(graph2.num_vertices(), 40); @@ -299,8 +281,6 @@ BOOST_AUTO_TEST_CASE(test_k_means) { } BOOST_AUTO_TEST_CASE(test_dot_graph) { - - std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { @@ -314,8 +294,7 @@ BOOST_AUTO_TEST_CASE(test_dot_graph) { computational_dag_vector_impl_def_t graph; - bool status = - file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph); + bool status = file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph); BOOST_CHECK(status); BOOST_CHECK_EQUAL(graph.num_vertices(), 11); @@ -328,13 +307,9 @@ BOOST_AUTO_TEST_CASE(test_dot_graph) { BOOST_CHECK_EQUAL(graph.vertex_mem_weight(v), mem[v]); BOOST_CHECK_EQUAL(graph.vertex_type(v), type[v]); } - - } BOOST_AUTO_TEST_CASE(test_dot_graph_boost) { - - std::filesystem::path cwd = std::filesystem::current_path(); while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { @@ -348,8 +323,7 @@ BOOST_AUTO_TEST_CASE(test_dot_graph_boost) { boost_graph_int_t graph; - bool status = - file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph); + bool status = file_reader::readComputationalDagDotFormat((cwd / "data/dot/smpl_dot_graph_1.dot").string(), graph); BOOST_CHECK(status); BOOST_CHECK_EQUAL(graph.num_vertices(), 11); @@ -362,6 +336,4 @@ BOOST_AUTO_TEST_CASE(test_dot_graph_boost) { BOOST_CHECK_EQUAL(graph.vertex_mem_weight(v), mem[v]); BOOST_CHECK_EQUAL(graph.vertex_type(v), type[v]); } - - -} \ No newline at end of file +} diff --git a/tests/graph_vector_adapter.cpp b/tests/graph_vector_adapter.cpp index e96a6be6..66fd7595 100644 --- a/tests/graph_vector_adapter.cpp +++ b/tests/graph_vector_adapter.cpp @@ -22,40 +22,65 @@ limitations under the License. #include #include -#include "osp/graph_algorithms/directed_graph_util.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp" -#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" -#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp" -#include "osp/bsp/scheduler/Serial.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyChildren.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyMetaScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" +#include "osp/bsp/scheduler/Serial.hpp" +#include "osp/coarser/Sarkar/Sarkar.hpp" +#include "osp/coarser/Sarkar/SarkarMul.hpp" #include "osp/coarser/coarser_util.hpp" #include "osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp" +#include "osp/graph_algorithms/directed_graph_util.hpp" +#include "osp/graph_implementations/adj_list_impl/compact_sparse_graph.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/coarser/Sarkar/Sarkar.hpp" -#include "osp/coarser/Sarkar/SarkarMul.hpp" +#include "osp/graph_implementations/adj_list_impl/dag_vector_adapter.hpp" +#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" using namespace osp; - BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) { - - std::vector> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}}; - - std::vector> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {4, 3}}; + std::vector> out_neighbors{ + {1, 2, 3}, + {4, 6}, + {4, 5}, + {7}, + {7}, + {}, + {}, + {} + }; + + std::vector> in_neighbors{ + {}, + {0}, + {0}, + {0}, + {1, 2}, + {2}, + {1}, + {4, 3} + }; using v_impl = cdag_vertex_impl; - using graph_t = dag_vector_adapter; + using graph_t = dag_vector_adapter; using graph_constr_t = computational_dag_edge_idx_vector_impl; - using CoarseGraphType = Compact_Sparse_Graph, std::size_t, v_workw_t, v_workw_t, v_workw_t, v_type_t>; + using CoarseGraphType = Compact_Sparse_Graph, + std::size_t, + v_workw_t, + v_workw_t, + v_workw_t, + v_type_t>; graph_t graph(out_neighbors, in_neighbors); - + for (auto v : graph.vertices()) { graph.set_vertex_work_weight(v, 10); } @@ -63,13 +88,13 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) { BspInstance instance; instance.getComputationalDag() = graph; - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); instance.setDiagonalCompatibilityMatrix(2); instance.setSynchronisationCosts(1000); instance.setCommunicationCosts(1); - - // Set up the scheduler GrowLocalAutoCores growlocal; BspLocking locking; @@ -105,20 +130,46 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter_edge) { BOOST_CHECK(acyc); } - BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { - - std::vector> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}}; - - std::vector> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {4, 3}}; + std::vector> out_neighbors{ + {1, 2, 3}, + {4, 6}, + {4, 5}, + {7}, + {7}, + {}, + {}, + {} + }; + + std::vector> in_neighbors{ + {}, + {0}, + {0}, + {0}, + {1, 2}, + {2}, + {1}, + {4, 3} + }; using v_impl = cdag_vertex_impl; - using graph_t = dag_vector_adapter; + using graph_t = dag_vector_adapter; using graph_constr_t = computational_dag_vector_impl; - using CoarseGraphType = Compact_Sparse_Graph, std::size_t, v_workw_t, v_workw_t, v_workw_t, v_type_t>; + using CoarseGraphType = Compact_Sparse_Graph, + std::size_t, + v_workw_t, + v_workw_t, + v_workw_t, + v_type_t>; graph_t graph(out_neighbors, in_neighbors); - + for (auto v : graph.vertices()) { graph.set_vertex_work_weight(v, 10); } @@ -126,13 +177,13 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { BspInstance instance; instance.getComputationalDag() = graph; - instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1 , 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); + instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); instance.setDiagonalCompatibilityMatrix(2); instance.setSynchronisationCosts(1000); instance.setCommunicationCosts(1); - - // Set up the scheduler GrowLocalAutoCores growlocal; BspLocking locking; @@ -166,4 +217,4 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { acyc = is_acyclic(coarse_dag); BOOST_CHECK(acyc); -} \ No newline at end of file +} diff --git a/tests/graph_vector_edge_desc_impl.cpp b/tests/graph_vector_edge_desc_impl.cpp index d079bf1a..1c6770c9 100644 --- a/tests/graph_vector_edge_desc_impl.cpp +++ b/tests/graph_vector_edge_desc_impl.cpp @@ -22,16 +22,15 @@ limitations under the License. #include #include -#include "osp/graph_algorithms/directed_graph_util.hpp" -#include "osp/graph_algorithms/directed_graph_path_util.hpp" #include "osp/graph_algorithms/directed_graph_edge_desc_util.hpp" +#include "osp/graph_algorithms/directed_graph_path_util.hpp" +#include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" using namespace osp; computational_dag_edge_idx_vector_impl_def_t constr_graph_1() { - computational_dag_edge_idx_vector_impl_def_t graph; using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx; @@ -79,14 +78,12 @@ computational_dag_edge_idx_vector_impl_def_t constr_graph_1() { } BOOST_AUTO_TEST_CASE(test_empty_dag_edge_idx) { - computational_dag_edge_idx_vector_impl_def_t graph; BOOST_CHECK_EQUAL(graph.num_edges(), 0); BOOST_CHECK_EQUAL(graph.num_vertices(), 0); } BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { - computational_dag_edge_idx_vector_impl_def_t graph = constr_graph_1(); using vertex_idx = computational_dag_edge_idx_vector_impl_def_t::vertex_idx; @@ -96,7 +93,6 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { size_t edge_idx = 0; for (const auto &edge : graph.edges()) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); edge_idx++; @@ -104,7 +100,6 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { edge_idx = 0; for (const auto &edge : edges(graph)) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); edge_idx++; @@ -112,16 +107,13 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { edge_idx = 0; for (auto &edge : edges(graph)) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); edge_idx++; } - edge_idx = 0; for (const auto edge : edges(graph)) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); edge_idx++; @@ -129,7 +121,6 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { edge_idx = 0; for (auto edge : edges(graph)) { - BOOST_CHECK_EQUAL(edge.source, edge_sources[edge_idx]); BOOST_CHECK_EQUAL(edge.target, edge_targets[edge_idx]); edge_idx++; @@ -137,14 +128,31 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; - std::vector> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}}; - - std::vector> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {3, 4}}; + std::vector> out_neighbors{ + {1, 2, 3}, + {4, 6}, + {4, 5}, + {7}, + {7}, + {}, + {}, + {} + }; + + std::vector> in_neighbors{ + {}, + {0}, + {0}, + {0}, + {1, 2}, + {2}, + {1}, + {3, 4} + }; size_t idx = 0; for (const auto &v : graph.vertices()) { - BOOST_CHECK_EQUAL(v, vertices[idx++]); size_t i = 0; @@ -203,7 +211,6 @@ BOOST_AUTO_TEST_CASE(test_dag_edge_idx) { } BOOST_AUTO_TEST_CASE(test_util_1) { - const computational_dag_edge_idx_vector_impl_def_t graph = constr_graph_1(); BOOST_CHECK_EQUAL(graph.num_edges(), 9); @@ -221,18 +228,18 @@ BOOST_AUTO_TEST_CASE(test_util_1) { const auto pair = edge_desc(0, 1, graph); BOOST_CHECK_EQUAL(pair.second, true); - BOOST_CHECK_EQUAL(source(pair.first,graph), 0); - BOOST_CHECK_EQUAL(target(pair.first,graph), 1); + BOOST_CHECK_EQUAL(source(pair.first, graph), 0); + BOOST_CHECK_EQUAL(target(pair.first, graph), 1); BOOST_CHECK_EQUAL(edge(0, 1, graph), true); - + const auto pair2 = edge_desc(0, 4, graph); BOOST_CHECK_EQUAL(pair2.second, false); BOOST_CHECK_EQUAL(edge(0, 4, graph), false); const auto pair3 = edge_desc(1, 4, graph); BOOST_CHECK_EQUAL(pair3.second, true); - BOOST_CHECK_EQUAL(source(pair3.first,graph), 1); - BOOST_CHECK_EQUAL(target(pair3.first,graph), 4); + BOOST_CHECK_EQUAL(source(pair3.first, graph), 1); + BOOST_CHECK_EQUAL(target(pair3.first, graph), 4); BOOST_CHECK_EQUAL(edge(1, 4, graph), true); BOOST_CHECK_EQUAL(has_path(0, 1, graph), true); @@ -294,11 +301,9 @@ BOOST_AUTO_TEST_CASE(test_util_1) { const auto long_edges = long_edges_in_triangles(graph); BOOST_CHECK_EQUAL(long_edges.size(), 0); - } BOOST_AUTO_TEST_CASE(test_constr_dag) { - computational_dag_edge_idx_vector_impl_def_int_t graph; graph.add_vertex(1, 2, 3); @@ -343,4 +348,4 @@ BOOST_AUTO_TEST_CASE(test_constr_dag) { BOOST_CHECK_EQUAL(graph_3.vertex_work_weight(1), 5); BOOST_CHECK_EQUAL(graph_3.vertex_comm_weight(1), 6); BOOST_CHECK_EQUAL(graph_3.vertex_mem_weight(1), 7); -} \ No newline at end of file +} diff --git a/tests/graph_vector_impl.cpp b/tests/graph_vector_impl.cpp index 9e2131e7..e0677f93 100644 --- a/tests/graph_vector_impl.cpp +++ b/tests/graph_vector_impl.cpp @@ -30,7 +30,6 @@ limitations under the License. using namespace osp; computational_dag_vector_impl_def_t constr_graph_1() { - computational_dag_vector_impl_def_t graph; using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; @@ -59,7 +58,6 @@ computational_dag_vector_impl_def_t constr_graph_1() { } BOOST_AUTO_TEST_CASE(test_empty_dag) { - computational_dag_vector_impl_def_t graph; BOOST_CHECK_EQUAL(graph.num_edges(), 0); BOOST_CHECK_EQUAL(graph.num_vertices(), 0); @@ -73,7 +71,6 @@ BOOST_AUTO_TEST_CASE(test_empty_dag) { } BOOST_AUTO_TEST_CASE(test_dag) { - const computational_dag_vector_impl_def_t graph = constr_graph_1(); using vertex_idx = computational_dag_vector_impl_def_t::vertex_idx; @@ -83,14 +80,31 @@ BOOST_AUTO_TEST_CASE(test_dag) { std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; - std::vector> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}}; - - std::vector> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {4, 3}}; + std::vector> out_neighbors{ + {1, 2, 3}, + {4, 6}, + {4, 5}, + {7}, + {7}, + {}, + {}, + {} + }; + + std::vector> in_neighbors{ + {}, + {0}, + {0}, + {0}, + {1, 2}, + {2}, + {1}, + {4, 3} + }; size_t idx = 0; for (const auto &v : graph.vertices()) { - BOOST_CHECK_EQUAL(v, vertices[idx++]); size_t i = 0; @@ -105,35 +119,27 @@ BOOST_AUTO_TEST_CASE(test_dag) { i = 0; for (const auto &e : out_edges(v, graph)) { - BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[v][i++]); - } i = 0; for (const auto &e : in_edges(v, graph)) { - BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[v][i++]); - } BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[v].size()); BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[v].size()); } - unsigned count = 0; - for (const auto & e: edges(graph)) { - + for (const auto &e : edges(graph)) { std::cout << e.source << " -> " << e.target << std::endl; count++; } BOOST_CHECK_EQUAL(count, 9); - } BOOST_AUTO_TEST_CASE(test_constr_dag) { - computational_dag_vector_impl_def_int_t graph; graph.add_vertex(1, 2, 3); @@ -206,22 +212,38 @@ BOOST_AUTO_TEST_CASE(test_constr_dag) { } BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { - std::vector vertices{0, 1, 2, 3, 4, 5, 6, 7}; - std::vector> out_neighbors{{1, 2, 3}, {4, 6}, {4, 5}, {7}, {7}, {}, {}, {}}; - - std::vector> in_neighbors{{}, {0}, {0}, {0}, {1, 2}, {2}, {1}, {4, 3}}; + std::vector> out_neighbors{ + {1, 2, 3}, + {4, 6}, + {4, 5}, + {7}, + {7}, + {}, + {}, + {} + }; + + std::vector> in_neighbors{ + {}, + {0}, + {0}, + {0}, + {1, 2}, + {2}, + {1}, + {4, 3} + }; using v_impl = cdag_vertex_impl; - using graph_t = dag_vector_adapter; + using graph_t = dag_vector_adapter; graph_t graph(out_neighbors, in_neighbors); size_t idx = 0; for (const auto &v : graph.vertices()) { - BOOST_CHECK_EQUAL(v, vertices[idx++]); unsigned vv = static_cast(v); @@ -238,12 +260,12 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { i = 0; for (const auto &e : out_edges(v, graph)) { - BOOST_CHECK_EQUAL(target(e,graph), out_neighbors[vv][i++]); + BOOST_CHECK_EQUAL(target(e, graph), out_neighbors[vv][i++]); } i = 0; for (const auto &e : in_edges(v, graph)) { - BOOST_CHECK_EQUAL(source(e,graph), in_neighbors[vv][i++]); + BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[vv][i++]); } BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[vv].size()); @@ -251,10 +273,9 @@ BOOST_AUTO_TEST_CASE(test_dag_vector_adapter) { } unsigned count = 0; - for (const auto & e: edges(graph)) { - + for (const auto &e : edges(graph)) { std::cout << e.source << " -> " << e.target << std::endl; count++; } BOOST_CHECK_EQUAL(count, 9); -} \ No newline at end of file +} diff --git a/tests/hash_pair.cpp b/tests/hash_pair.cpp index 6f9cb7fe..070c7406 100644 --- a/tests/hash_pair.cpp +++ b/tests/hash_pair.cpp @@ -24,23 +24,22 @@ limitations under the License. using namespace osp; BOOST_AUTO_TEST_CASE(Hash_Pair) { - std::pair p1({0,0}); - std::pair p2({1,1}); - std::pair p3({1,2}); - std::pair p4({2,1}); - std::pair p5({1,3}); - std::pair p6({2,6}); + std::pair p1({0, 0}); + std::pair p2({1, 1}); + std::pair p3({1, 2}); + std::pair p4({2, 1}); + std::pair p5({1, 3}); + std::pair p6({2, 6}); std::pair p7 = p6; pair_hash hasher; - - BOOST_CHECK( hasher(p7) == hasher(p6) ); + BOOST_CHECK(hasher(p7) == hasher(p6)); // Can technically fail, but should not - BOOST_CHECK( hasher(p1) != hasher(p2) ); - BOOST_CHECK( hasher(p3) != hasher(p4) ); - BOOST_CHECK( hasher(p2) != hasher(p3) ); - BOOST_CHECK( hasher(p2) != hasher(p5) ); - BOOST_CHECK( hasher(p4) != hasher(p6) ); -} \ No newline at end of file + BOOST_CHECK(hasher(p1) != hasher(p2)); + BOOST_CHECK(hasher(p3) != hasher(p4)); + BOOST_CHECK(hasher(p2) != hasher(p3)); + BOOST_CHECK(hasher(p2) != hasher(p5)); + BOOST_CHECK(hasher(p4) != hasher(p6)); +} diff --git a/tests/heaps.cpp b/tests/heaps.cpp index 2036b18c..de177ed1 100644 --- a/tests/heaps.cpp +++ b/tests/heaps.cpp @@ -17,38 +17,37 @@ limitations under the License. */ #define BOOST_TEST_MODULE HeapTest -#include - -#include "osp/auxiliary/datastructures/heaps/DaryHeap.hpp" -#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp" -#include - #include +#include +#include +#include +#include +#include #include #include #include #include -#include -#include -#include + +#include "osp/auxiliary/datastructures/heaps/DaryHeap.hpp" +#include "osp/auxiliary/datastructures/heaps/PairingHeap.hpp" namespace osp::test { // Wrapper for boost::heap::fibonacci_heap to match the test interface template class BoostFibonacciHeapWrapper { -private: + private: struct Node { Key key; Value value; }; struct NodeCompare { - bool operator()(const Node& a, const Node& b) const { + bool operator()(const Node &a, const Node &b) const { if constexpr (IsMinHeap) { - return a.value > b.value; // For min-heap + return a.value > b.value; // For min-heap } else { - return a.value < b.value; // For max-heap + return a.value < b.value; // For max-heap } } }; @@ -59,46 +58,60 @@ class BoostFibonacciHeapWrapper { BoostHeap heap; std::unordered_map handles; -public: + public: BoostFibonacciHeapWrapper() = default; bool is_empty() const { return heap.empty(); } + size_t size() const { return heap.size(); } - bool contains(const Key& key) const { return handles.count(key); } - const Key& top() const { - if (is_empty()) throw std::out_of_range("Heap is empty"); + bool contains(const Key &key) const { return handles.count(key); } + + const Key &top() const { + if (is_empty()) { + throw std::out_of_range("Heap is empty"); + } return heap.top().key; } Key pop() { - if (is_empty()) throw std::out_of_range("Heap is empty"); + if (is_empty()) { + throw std::out_of_range("Heap is empty"); + } Key top_key = heap.top().key; heap.pop(); handles.erase(top_key); return top_key; } - void push(const Key& key, const Value& value) { - if (contains(key)) throw std::invalid_argument("Key already exists"); + void push(const Key &key, const Value &value) { + if (contains(key)) { + throw std::invalid_argument("Key already exists"); + } handle_type handle = heap.push({key, value}); handles[key] = handle; } - Value get_value(const Key& key) const { - if (!contains(key)) throw std::out_of_range("Key not found"); + Value get_value(const Key &key) const { + if (!contains(key)) { + throw std::out_of_range("Key not found"); + } return (*handles.at(key)).value; } - void update(const Key& key, const Value& new_value) { - if (!contains(key)) throw std::invalid_argument("Key not found for update"); + void update(const Key &key, const Value &new_value) { + if (!contains(key)) { + throw std::invalid_argument("Key not found for update"); + } handle_type handle = handles.at(key); (*handle).value = new_value; heap.update(handle); } - void erase(const Key& key) { - if (!contains(key)) throw std::invalid_argument("Key not found for erase"); + void erase(const Key &key) { + if (!contains(key)) { + throw std::invalid_argument("Key not found for erase"); + } heap.erase(handles.at(key)); handles.erase(key); } @@ -118,17 +131,17 @@ using MaxBoostFibonacciHeap = BoostFibonacciHeapWrapper; // Wrapper for std::set to match the test interface template class StdSetWrapper { -private: + private: struct NodeCompare { - bool operator()(const std::pair& a, const std::pair& b) const { + bool operator()(const std::pair &a, const std::pair &b) const { if (a.first != b.first) { if constexpr (IsMinHeap) { - return a.first < b.first; // For min-heap + return a.first < b.first; // For min-heap } else { - return a.first > b.first; // For max-heap + return a.first > b.first; // For max-heap } } - return a.second < b.second; // Tie-breaking + return a.second < b.second; // Tie-breaking } }; @@ -136,48 +149,64 @@ class StdSetWrapper { SetType data_set; std::unordered_map value_map; -public: + public: StdSetWrapper() = default; bool is_empty() const { return data_set.empty(); } + size_t size() const { return data_set.size(); } - bool contains(const Key& key) const { return value_map.count(key); } - const Key& top() const { - if (is_empty()) throw std::out_of_range("Heap is empty"); + bool contains(const Key &key) const { return value_map.count(key); } + + const Key &top() const { + if (is_empty()) { + throw std::out_of_range("Heap is empty"); + } return data_set.begin()->second; } Key pop() { - if (is_empty()) throw std::out_of_range("Heap is empty"); + if (is_empty()) { + throw std::out_of_range("Heap is empty"); + } auto top_node = *data_set.begin(); data_set.erase(data_set.begin()); value_map.erase(top_node.second); return top_node.second; } - void push(const Key& key, const Value& value) { - if (contains(key)) throw std::invalid_argument("Key already exists"); + void push(const Key &key, const Value &value) { + if (contains(key)) { + throw std::invalid_argument("Key already exists"); + } data_set.insert({value, key}); value_map[key] = value; } - Value get_value(const Key& key) const { - if (!contains(key)) throw std::out_of_range("Key not found"); + Value get_value(const Key &key) const { + if (!contains(key)) { + throw std::out_of_range("Key not found"); + } return value_map.at(key); } - void update(const Key& key, const Value& new_value) { - if (!contains(key)) throw std::invalid_argument("Key not found for update"); + void update(const Key &key, const Value &new_value) { + if (!contains(key)) { + throw std::invalid_argument("Key not found for update"); + } Value old_value = value_map.at(key); - if (old_value == new_value) return; + if (old_value == new_value) { + return; + } data_set.erase({old_value, key}); data_set.insert({new_value, key}); value_map[key] = new_value; } - void erase(const Key& key) { - if (!contains(key)) throw std::invalid_argument("Key not found for erase"); + void erase(const Key &key) { + if (!contains(key)) { + throw std::invalid_argument("Key not found for erase"); + } Value value = value_map.at(key); data_set.erase({value, key}); value_map.erase(key); @@ -196,7 +225,8 @@ template using MaxStdSetHeap = StdSetWrapper; // Generic test suite for any min-heap implementation that follows the API. -template void test_min_heap_functionality() { +template +void test_min_heap_functionality() { HeapType heap; // Basic properties of an empty heap @@ -247,12 +277,12 @@ template void test_min_heap_functionality() { BOOST_CHECK_THROW(heap.get_value("Z"), std::out_of_range); // Test update (decrease-key) - heap.update("B", 1); // B: 5 -> 1. Should be new top. + heap.update("B", 1); // B: 5 -> 1. Should be new top. BOOST_CHECK_EQUAL(heap.top(), "B"); BOOST_CHECK_EQUAL(heap.get_value("B"), 1); // Test update (increase-key) - heap.update("B", 25); // B: 1 -> 25. D (2) should be new top. + heap.update("B", 25); // B: 1 -> 25. D (2) should be new top. BOOST_CHECK_EQUAL(heap.top(), "D"); BOOST_CHECK_EQUAL(heap.get_value("B"), 25); @@ -261,12 +291,12 @@ template void test_min_heap_functionality() { BOOST_CHECK_EQUAL(heap.get_value("A"), 10); // Test erase - heap.erase("D"); // Erase top element + heap.erase("D"); // Erase top element BOOST_CHECK_EQUAL(heap.size(), 4); BOOST_CHECK(!heap.contains("D")); - BOOST_CHECK_EQUAL(heap.top(), "A"); // A (10) is new top + BOOST_CHECK_EQUAL(heap.top(), "A"); // A (10) is new top - heap.erase("E"); // Erase non-top element + heap.erase("E"); // Erase non-top element BOOST_CHECK_EQUAL(heap.size(), 3); BOOST_CHECK(!heap.contains("E")); BOOST_CHECK_THROW(heap.erase("Z"), std::invalid_argument); @@ -277,7 +307,8 @@ template void test_min_heap_functionality() { BOOST_CHECK_EQUAL(heap.size(), 0); } -template void test_max_heap_functionality() { +template +void test_max_heap_functionality() { HeapType heap; heap.push("A", 10); heap.push("B", 5); @@ -292,7 +323,8 @@ template void test_max_heap_functionality() { } // Stress test with a larger number of elements -template void stress_test_heap() { +template +void stress_test_heap() { HeapType heap; const int num_items = 1000; @@ -363,37 +395,35 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t std::cout << "Bulk Pop (" << num_items << " items): " << duration.count() << " ms" << std::endl; BOOST_CHECK(heap.is_empty()); - - // Scenario 4: Random Operations (Push, Erase, Update) heap.clear(); std::vector present_keys; present_keys.reserve(num_items); std::vector key_in_heap(num_items, false); - std::uniform_int_distribution op_dist(0, 2); // 0: push, 1: erase, 2: update + std::uniform_int_distribution op_dist(0, 2); // 0: push, 1: erase, 2: update start = std::chrono::high_resolution_clock::now(); for (size_t i = 0; i < num_random_ops; ++i) { int op = op_dist(gen); - if (op == 0 || present_keys.empty()) { // Push + if (op == 0 || present_keys.empty()) { // Push size_t key_idx = key_distrib(gen); if (!key_in_heap[key_idx]) { heap.push(keys[key_idx], priorities[key_idx]); present_keys.push_back(keys[key_idx]); key_in_heap[key_idx] = true; } - } else { // Erase or Update + } else { // Erase or Update std::uniform_int_distribution present_key_dist(0, present_keys.size() - 1); size_t present_key_vec_idx = present_key_dist(gen); std::string key_to_op = present_keys[present_key_vec_idx]; - if (op == 1) { // Erase a random element + if (op == 1) { // Erase a random element heap.erase(key_to_op); key_in_heap[std::stoul(key_to_op)] = false; std::swap(present_keys[present_key_vec_idx], present_keys.back()); present_keys.pop_back(); - } else { // op == 2, Update a random element (decrease key) + } else { // op == 2, Update a random element (decrease key) int new_prio = heap.get_value(key_to_op) - dec_dist(gen); heap.update(key_to_op, new_prio); } @@ -401,8 +431,7 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t } end = std::chrono::high_resolution_clock::now(); duration = end - start; - std::cout << "Random Ops (" << num_random_ops << " ops of push/erase/update): " << duration.count() << " ms" - << std::endl; + std::cout << "Random Ops (" << num_random_ops << " ops of push/erase/update): " << duration.count() << " ms" << std::endl; // Scenario 5: Mixed Workload with Re-initialization const size_t num_outer_loops_s5 = 500; @@ -412,8 +441,7 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t const size_t num_updates_per_iter_s5 = 25; // A large pool of keys to draw from for pushes, to avoid collisions. - const size_t key_pool_size_s5 = - num_outer_loops_s5 * (num_initial_pushes_s5 + num_inner_loops_s5 * num_pushes_per_iter_s5); + const size_t key_pool_size_s5 = num_outer_loops_s5 * (num_initial_pushes_s5 + num_inner_loops_s5 * num_pushes_per_iter_s5); std::vector keys_s5(key_pool_size_s5); std::vector priorities_s5(key_pool_size_s5); for (size_t i = 0; i < key_pool_size_s5; ++i) { @@ -477,23 +505,33 @@ void run_performance_test(const std::string &heap_name, size_t num_items, size_t BOOST_AUTO_TEST_SUITE(HeapTests) BOOST_AUTO_TEST_CASE(PairingHeapTest) { test_min_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(MaxPairingHeapTest) { test_max_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(PairingHeapStressTest) { stress_test_heap>(); } BOOST_AUTO_TEST_CASE(BoostFibonacciHeapTest) { test_min_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(MaxBoostFibonacciHeapTest) { test_max_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(BoostFibonacciHeapStressTest) { stress_test_heap>(); } BOOST_AUTO_TEST_CASE(StdSetHeapTest) { test_min_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(MaxStdSetHeapTest) { test_max_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(StdSetHeapStressTest) { stress_test_heap>(); } BOOST_AUTO_TEST_CASE(DaryHeap_D2_Test) { test_min_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(MaxDaryHeap_D2_Test) { test_max_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(DaryHeap_D2_StressTest) { stress_test_heap>(); } BOOST_AUTO_TEST_CASE(DaryHeap_D4_Test) { test_min_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(MaxDaryHeap_D4_Test) { test_max_heap_functionality>(); } + BOOST_AUTO_TEST_CASE(DaryHeap_D4_StressTest) { stress_test_heap>(); } BOOST_AUTO_TEST_SUITE_END() @@ -506,15 +544,13 @@ BOOST_AUTO_TEST_CASE(HeapPerformanceComparison) { const size_t num_random_ops = 40000; run_performance_test>("Pairing Heap", num_items, num_updates, num_random_ops); - run_performance_test>("Boost Fibonacci Heap", num_items, num_updates, - num_random_ops); + run_performance_test>("Boost Fibonacci Heap", num_items, num_updates, num_random_ops); run_performance_test>("std::set", num_items, num_updates, num_random_ops); - run_performance_test>("Binary Heap (d=2)", num_items, num_updates, - num_random_ops); + run_performance_test>("Binary Heap (d=2)", num_items, num_updates, num_random_ops); run_performance_test>("4-ary Heap (d=4)", num_items, num_updates, num_random_ops); run_performance_test>("8-ary Heap (d=8)", num_items, num_updates, num_random_ops); } BOOST_AUTO_TEST_SUITE_END() -} // namespace osp::test +} // namespace osp::test diff --git a/tests/heavy_edge_preprocessing.cpp b/tests/heavy_edge_preprocessing.cpp index 3c8c0cf7..6fcda0c2 100644 --- a/tests/heavy_edge_preprocessing.cpp +++ b/tests/heavy_edge_preprocessing.cpp @@ -1,19 +1,17 @@ #define BOOST_TEST_MODULE heavy_edge_partitioning #include - #include #include #include +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/LoadBalanceScheduler/HeavyEdgePreProcess.hpp" #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "test_graphs.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) { - using Graph_t = boost_graph_int_t; std::vector filenames_graph = test_graphs(); @@ -27,8 +25,7 @@ BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) { } for (auto &filename_graph : filenames_graph) { - std::string name_graph = - filename_graph.substr(filename_graph.find_last_of("/\\") + 1, filename_graph.find_last_of(".")); + std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1, filename_graph.find_last_of(".")); std::cout << std::endl << "Graph: " << name_graph << std::endl; @@ -37,7 +34,6 @@ BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) { bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), graph); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -59,4 +55,4 @@ BOOST_AUTO_TEST_CASE(HeavyEdgePartitioning) { BOOST_CHECK(value); } } -} \ No newline at end of file +} diff --git a/tests/hill_climbing.cpp b/tests/hill_climbing.cpp index 7a108ba9..3bbfcde1 100644 --- a/tests/hill_climbing.cpp +++ b/tests/hill_climbing.cpp @@ -17,21 +17,20 @@ limitations under the License. */ #define BOOST_TEST_MODULE HILL_CLIMBING +#include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" + #include +#include +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include -#include "test_graphs.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "test_graphs.hpp" using namespace osp; - BOOST_AUTO_TEST_CASE(hill_climbing) { - using graph = computational_dag_vector_impl_def_t; BspInstance instance; @@ -75,11 +74,9 @@ BOOST_AUTO_TEST_CASE(hill_climbing) { BspSchedule schedule4 = bsp_initial; scheduler.improveScheduleWithStepLimit(schedule4, 5); BOOST_CHECK_EQUAL(schedule4.satisfiesPrecedenceConstraints(), true); - } BOOST_AUTO_TEST_CASE(hill_climbing_for_comm_schedule) { - using graph = computational_dag_vector_impl_def_t; BspInstance instance; @@ -111,7 +108,7 @@ BOOST_AUTO_TEST_CASE(hill_climbing_for_comm_schedule) { BspSchedule schedule = initial; BspScheduleCS initial_cs(std::move(initial)); - //initial_cs.setAutoCommunicationSchedule(); + // initial_cs.setAutoCommunicationSchedule(); initial_cs.setEagerCommunicationSchedule(); BOOST_CHECK_EQUAL(initial_cs.hasValidCommSchedule(), true); @@ -124,5 +121,4 @@ BOOST_AUTO_TEST_CASE(hill_climbing_for_comm_schedule) { hc_cs.setSteepestAscend(true); hc_cs.improveSchedule(schedule2); BOOST_CHECK_EQUAL(schedule2.hasValidCommSchedule(), true); - -} \ No newline at end of file +} diff --git a/tests/hypergraph_and_partition.cpp b/tests/hypergraph_and_partition.cpp index 10e4cb4a..4d934454 100644 --- a/tests/hypergraph_and_partition.cpp +++ b/tests/hypergraph_and_partition.cpp @@ -18,24 +18,22 @@ limitations under the License. #define BOOST_TEST_MODULE HYPERGRAPH_AND_PARTITION #include - #include #include #include -#include "osp/partitioning/model/partitioning.hpp" -#include "osp/partitioning/model/partitioning_replication.hpp" -#include "osp/partitioning/model/hypergraph_utility.hpp" -#include "osp/partitioning/partitioners/generic_FM.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/auxiliary/io/mtx_hypergraph_file_reader.hpp" #include "osp/auxiliary/io/partitioning_file_writer.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "osp/partitioning/model/hypergraph_utility.hpp" +#include "osp/partitioning/model/partitioning.hpp" +#include "osp/partitioning/model/partitioning_replication.hpp" +#include "osp/partitioning/partitioners/generic_FM.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { - using graph = computational_dag_vector_impl_def_int_t; using hypergraph = Hypergraph_def_t; @@ -45,12 +43,11 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { cwd = cwd.parent_path(); std::cout << cwd << std::endl; - } + } graph DAG; - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG); BOOST_CHECK(status); @@ -66,35 +63,38 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { Hgraph = convert_from_cdag_as_dag(DAG); BOOST_CHECK_EQUAL(DAG.num_vertices(), Hgraph.num_vertices()); BOOST_CHECK_EQUAL(DAG.num_edges(), Hgraph.num_hyperedges()); - BOOST_CHECK_EQUAL(DAG.num_edges()*2, Hgraph.num_pins()); + BOOST_CHECK_EQUAL(DAG.num_edges() * 2, Hgraph.num_pins()); // HyperDAG format, one hypredge for each non-sink node unsigned nr_of_non_sinks = 0; - for(const auto &node : DAG.vertices()) - if(DAG.out_degree(node) > 0) - ++ nr_of_non_sinks; + for (const auto &node : DAG.vertices()) { + if (DAG.out_degree(node) > 0) { + ++nr_of_non_sinks; + } + } Hgraph = convert_from_cdag_as_hyperdag(DAG); BOOST_CHECK_EQUAL(DAG.num_vertices(), Hgraph.num_vertices()); BOOST_CHECK_EQUAL(nr_of_non_sinks, Hgraph.num_hyperedges()); BOOST_CHECK_EQUAL(DAG.num_edges() + nr_of_non_sinks, Hgraph.num_pins()); - // Dummy partitioning PartitioningProblem instance(Hgraph, 3, 30); Partitioning partition(instance); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { partition.setAssignedPartition(node, node % 3); + } BOOST_CHECK(partition.satisfiesBalanceConstraint()); int cutNetCost = partition.computeCutNetCost(); int connectivityCost = partition.computeConnectivityCost(); BOOST_CHECK(connectivityCost >= cutNetCost); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { instance.getHypergraph().set_vertex_work_weight(node, 1); + } instance.setMaxWorkWeightViaImbalanceFactor(0); BOOST_CHECK(partition.satisfiesBalanceConstraint()); @@ -103,44 +103,48 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { instance.setMaxWorkWeightViaImbalanceFactor(0); BOOST_CHECK(!partition.satisfiesBalanceConstraint()); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { partition.setAssignedPartition(node, node % 5); + } BOOST_CHECK(partition.satisfiesBalanceConstraint()); BOOST_CHECK(partition.computeConnectivityCost() >= partition.computeCutNetCost()); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { instance.getHypergraph().set_vertex_memory_weight(node, 1); + } instance.setMaxMemoryWeightExplicitly(10); BOOST_CHECK(partition.satisfiesBalanceConstraint() == false); instance.setMaxMemoryWeightExplicitly(std::numeric_limits::max()); file_writer::write_txt(std::cout, partition); - // Dummy partitioning with replication instance.setHypergraph(convert_from_cdag_as_hyperdag(DAG)); instance.setNumberOfPartitions(3); instance.setMaxWorkWeightExplicitly(30); PartitioningWithReplication partition_with_rep(instance); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { partition_with_rep.setAssignedPartitions(node, {node % 3}); + } BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint()); BOOST_CHECK(partition_with_rep.computeCutNetCost() == cutNetCost); BOOST_CHECK(partition_with_rep.computeConnectivityCost() == connectivityCost); instance.setMaxWorkWeightExplicitly(60); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) - partition_with_rep.setAssignedPartitions(node, {node % 3, (node+1)%3}); + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { + partition_with_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3}); + } BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint()); BOOST_CHECK(partition_with_rep.computeConnectivityCost() >= partition_with_rep.computeCutNetCost()); instance.setMaxWorkWeightExplicitly(compute_total_vertex_work_weight(Hgraph)); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { partition_with_rep.setAssignedPartitions(node, {0, 1, 2}); + } BOOST_CHECK(partition_with_rep.satisfiesBalanceConstraint()); BOOST_CHECK(partition_with_rep.computeConnectivityCost() == 0); @@ -148,17 +152,18 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { file_writer::write_txt(std::cout, partition_with_rep); - // Generic FM instance.setNumberOfPartitions(2); instance.setMaxWorkWeightExplicitly(35); - for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) + for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { instance.getHypergraph().set_vertex_work_weight(node, 1); + } Partitioning partition_to_improve(instance); - for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) + for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { partition_to_improve.setAssignedPartition(node, node % 2); + } int original_cost = partition_to_improve.computeConnectivityCost(); @@ -168,20 +173,22 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint()); BOOST_CHECK(new_cost <= original_cost); - std::cout< "< " << new_cost << std::endl; graph larger_DAG; - file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(), larger_DAG); + file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag").string(), + larger_DAG); instance.setHypergraph(convert_from_cdag_as_hyperdag(larger_DAG)); instance.setMaxWorkWeightExplicitly(4000); - for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) + for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { instance.getHypergraph().set_vertex_work_weight(node, 1); + } partition_to_improve.resetPartition(); - for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) + for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { partition_to_improve.setAssignedPartition(node, node % 2); + } original_cost = partition_to_improve.computeConnectivityCost(); @@ -191,14 +198,15 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint()); BOOST_CHECK(new_cost <= original_cost); - std::cout< "< " << new_cost << std::endl; // Recursive FM instance.setNumberOfPartitions(16); instance.setMaxWorkWeightViaImbalanceFactor(0.3); - for(unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) + for (unsigned node = 0; node < instance.getHypergraph().num_vertices(); ++node) { partition_to_improve.setAssignedPartition(node, node % 16); + } original_cost = partition_to_improve.computeConnectivityCost(); @@ -208,7 +216,5 @@ BOOST_AUTO_TEST_CASE(Hypergraph_and_Partition_test) { BOOST_CHECK(partition_to_improve.satisfiesBalanceConstraint()); BOOST_CHECK(new_cost <= original_cost); - std::cout< "< " << new_cost << std::endl; +} diff --git a/tests/ilp_bsp_scheduler.cpp b/tests/ilp_bsp_scheduler.cpp index fc6934b4..201dd393 100644 --- a/tests/ilp_bsp_scheduler.cpp +++ b/tests/ilp_bsp_scheduler.cpp @@ -18,26 +18,24 @@ limitations under the License. #define BOOST_TEST_MODULE COPT_ILP_SCHEDULING #include +#include +#include +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/model/BspInstance.hpp" #include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include -#include - -#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp" -#include "osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp" #include "osp/bsp/scheduler/IlpSchedulers/CoptCommScheduleOptimizer.hpp" +#include "osp/bsp/scheduler/IlpSchedulers/CoptFullScheduler.hpp" #include "osp/bsp/scheduler/IlpSchedulers/CoptPartialScheduler.hpp" +#include "osp/bsp/scheduler/IlpSchedulers/TotalCommunicationScheduler.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(test_total) { - using graph = computational_dag_edge_idx_vector_impl_def_t; BspInstance instance; @@ -78,7 +76,6 @@ BOOST_AUTO_TEST_CASE(test_total) { }; BOOST_AUTO_TEST_CASE(test_full) { - using graph = computational_dag_edge_idx_vector_impl_def_t; BspInstance instance; @@ -142,7 +139,7 @@ BOOST_AUTO_TEST_CASE(test_full) { BOOST_CHECK(schedule_improved2.satisfiesConstraints()); // initialize with recomputing schedule, return recomputing schedule - BspScheduleRecomp schedule_improved3(instance),schedule_init3(schedule_init_cs); + BspScheduleRecomp schedule_improved3(instance), schedule_init3(schedule_init_cs); CoptFullScheduler scheduler_init3(schedule_init3); scheduler_init3.setTimeLimitSeconds(10); const auto result_init3 = scheduler_init3.computeScheduleRecomp(schedule_improved3); @@ -153,8 +150,9 @@ BOOST_AUTO_TEST_CASE(test_full) { BspInstance instance_typed = instance; instance_typed.getArchitecture().setProcessorType(0, 1); instance_typed.getArchitecture().setProcessorType(1, 1); - for(vertex_idx_t node = 0; node < static_cast >(instance_typed.numberOfVertices()); ++node) - instance_typed.getComputationalDag().set_vertex_type(node, node%2); + for (vertex_idx_t node = 0; node < static_cast >(instance_typed.numberOfVertices()); ++node) { + instance_typed.getComputationalDag().set_vertex_type(node, node % 2); + } instance_typed.setDiagonalCompatibilityMatrix(2); BspSchedule schedule_typed(instance_typed); @@ -198,7 +196,6 @@ BOOST_AUTO_TEST_CASE(test_full) { }; BOOST_AUTO_TEST_CASE(test_cs) { - using graph = computational_dag_edge_idx_vector_impl_def_t; BspInstance instance; @@ -214,8 +211,8 @@ BOOST_AUTO_TEST_CASE(test_cs) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_pregel.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), + instance.getComputationalDag()); BOOST_CHECK(status); @@ -232,7 +229,7 @@ BOOST_AUTO_TEST_CASE(test_cs) { const auto result = scheduler.improveSchedule(schedule_cs); BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); const auto after = schedule_cs.compute_cs_communication_costs(); - std::cout< "< " << after << std::endl; BOOST_CHECK(schedule_cs.satisfiesPrecedenceConstraints()); BOOST_CHECK(schedule_cs.hasValidCommSchedule()); @@ -240,7 +237,6 @@ BOOST_AUTO_TEST_CASE(test_cs) { }; BOOST_AUTO_TEST_CASE(test_partial) { - using graph = computational_dag_edge_idx_vector_impl_def_t; BspInstance instance; @@ -256,8 +252,8 @@ BOOST_AUTO_TEST_CASE(test_partial) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_pregel.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), + instance.getComputationalDag()); BOOST_CHECK(status); @@ -285,6 +281,4 @@ BOOST_AUTO_TEST_CASE(test_partial) { BOOST_CHECK(schedule.hasValidCommSchedule()); auto cost_after = schedule.computeCosts(); BOOST_CHECK(cost_after <= cost_mid); - }; - diff --git a/tests/ilp_hypergraph_partitioning.cpp b/tests/ilp_hypergraph_partitioning.cpp index b42bbc29..636c6545 100644 --- a/tests/ilp_hypergraph_partitioning.cpp +++ b/tests/ilp_hypergraph_partitioning.cpp @@ -18,19 +18,17 @@ limitations under the License. #define BOOST_TEST_MODULE HYPERGRAPH_PARTITIONING_ILP #include - #include + +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/partitioning/model/hypergraph_utility.hpp" #include "osp/partitioning/partitioners/partitioning_ILP.hpp" #include "osp/partitioning/partitioners/partitioning_ILP_replication.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" - using namespace osp; BOOST_AUTO_TEST_CASE(test_full) { - using graph = computational_dag_vector_impl_def_int_t; using Hypergraph = Hypergraph_def_t; @@ -40,12 +38,11 @@ BOOST_AUTO_TEST_CASE(test_full) { while ((!cwd.empty()) && (cwd.filename() != "OneStopParallel")) { cwd = cwd.parent_path(); std::cout << cwd << std::endl; - } + } graph DAG; - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), DAG); BOOST_CHECK(status); @@ -55,9 +52,8 @@ BOOST_AUTO_TEST_CASE(test_full) { PartitioningProblem instance(Hgraph, 3, 35); Partitioning partition(instance); - // ILP without replication - + HypergraphPartitioningILP partitioner; partitioner.setTimeLimitSeconds(60); partitioner.computePartitioning(partition); @@ -65,8 +61,9 @@ BOOST_AUTO_TEST_CASE(test_full) { BOOST_CHECK(partition.satisfiesBalanceConstraint()); BOOST_CHECK(partition.computeConnectivityCost() >= partition.computeCutNetCost()); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { partition.setAssignedPartition(node, node % 3); + } partitioner.setUseInitialSolution(true); partitioner.computePartitioning(partition); @@ -94,16 +91,18 @@ BOOST_AUTO_TEST_CASE(test_full) { BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); partitioner_rep.setUseInitialSolution(true); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { partition_rep.setAssignedPartitions(node, {node % 3}); + } partitioner_rep.computePartitioning(partition_rep); BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); instance.setMaxWorkWeightExplicitly(60); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) - partition_rep.setAssignedPartitions(node, {node % 3, (node+1)%3}); + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { + partition_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3}); + } partitioner_rep.computePartitioning(partition_rep); BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); @@ -119,19 +118,20 @@ BOOST_AUTO_TEST_CASE(test_full) { BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); partitioner_rep.setUseInitialSolution(true); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { partition_rep.setAssignedPartitions(node, {node % 3}); + } partitioner_rep.computePartitioning(partition_rep); BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); instance.setMaxWorkWeightExplicitly(60); - for(unsigned node = 0; node < Hgraph.num_vertices(); ++node) - partition_rep.setAssignedPartitions(node, {node % 3, (node+1)%3}); + for (unsigned node = 0; node < Hgraph.num_vertices(); ++node) { + partition_rep.setAssignedPartitions(node, {node % 3, (node + 1) % 3}); + } partitioner_rep.computePartitioning(partition_rep); BOOST_CHECK(partition_rep.satisfiesBalanceConstraint()); BOOST_CHECK(partition_rep.computeConnectivityCost() == 0); - -}; \ No newline at end of file +}; diff --git a/tests/ilp_pebbling_scheduler.cpp b/tests/ilp_pebbling_scheduler.cpp index 8c186919..0d8b810f 100644 --- a/tests/ilp_pebbling_scheduler.cpp +++ b/tests/ilp_pebbling_scheduler.cpp @@ -18,22 +18,20 @@ limitations under the License. #define BOOST_TEST_MODULE PEBBLING_ILP #include - -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include #include +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "osp/graph_implementations/boost_graphs/boost_graph.hpp" #include "osp/pebbling/pebblers/pebblingILP/MultiProcessorPebbling.hpp" #include "osp/pebbling/pebblers/pebblingILP/PebblingPartialILP.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(test_full) { - using graph = computational_dag_vector_impl_def_t; BspInstance instance; @@ -58,7 +56,8 @@ BOOST_AUTO_TEST_CASE(test_full) { BspSchedule bsp_initial(instance); BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(bsp_initial)); - std::vector > minimum_memory_required_vector = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); + std::vector > minimum_memory_required_vector + = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); v_memw_t max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); instance.getArchitecture().setMemoryBound(max_required); @@ -71,11 +70,9 @@ BOOST_AUTO_TEST_CASE(test_full) { mpp.computePebblingWithInitialSolution(initial_sol, schedule); schedule.cleanSchedule(); BOOST_CHECK(schedule.isValid()); - }; BOOST_AUTO_TEST_CASE(test_partial) { - using graph = computational_dag_vector_impl_def_t; BspInstance instance; @@ -96,7 +93,8 @@ BOOST_AUTO_TEST_CASE(test_partial) { BOOST_CHECK(status); - std::vector > minimum_memory_required_vector = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); + std::vector > minimum_memory_required_vector + = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); v_memw_t max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); instance.getArchitecture().setMemoryBound(max_required); @@ -106,5 +104,4 @@ BOOST_AUTO_TEST_CASE(test_partial) { PebblingSchedule schedule(instance); mpp.computePebbling(schedule); BOOST_CHECK(schedule.isValid()); - -}; \ No newline at end of file +}; diff --git a/tests/intpower.cpp b/tests/intpower.cpp index de5f973c..53ab140d 100644 --- a/tests/intpower.cpp +++ b/tests/intpower.cpp @@ -21,7 +21,6 @@ limitations under the License. #include "osp/auxiliary/misc.hpp" - using namespace osp; BOOST_AUTO_TEST_CASE(IntegerPowers) { diff --git a/tests/isomorphic_subgraph_scheduler.cpp b/tests/isomorphic_subgraph_scheduler.cpp index 9165c5a5..e5abd52b 100644 --- a/tests/isomorphic_subgraph_scheduler.cpp +++ b/tests/isomorphic_subgraph_scheduler.cpp @@ -18,14 +18,13 @@ limitations under the License. #define BOOST_TEST_MODULE IsomorphicSubgraphScheduler #include +#include +#include -#include "test_graphs.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/dag_divider/isomorphism_divider/IsomorphicSubgraphScheduler.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" - -#include -#include +#include "test_graphs.hpp" using namespace osp; @@ -40,14 +39,14 @@ class IsomorphicSubgraphSchedulerTester : public IsomorphicSubgraphScheduler::IsomorphicSubgraphScheduler; - void test_trim_subgraph_groups(std::vector& isomorphic_groups, - const BspInstance& instance, - std::vector& was_trimmed) { + void test_trim_subgraph_groups(std::vector &isomorphic_groups, + const BspInstance &instance, + std::vector &was_trimmed) { this->trim_subgraph_groups(isomorphic_groups, instance, was_trimmed); } void test_schedule_isomorphic_group(const BspInstance &instance, - const std::vector& isomorphic_groups, + const std::vector &isomorphic_groups, const SubgraphSchedule &sub_sched, std::vector> &partition) { this->schedule_isomorphic_group(instance, isomorphic_groups, sub_sched, partition); @@ -57,7 +56,6 @@ class IsomorphicSubgraphSchedulerTester : public IsomorphicSubgraphScheduler instance; instance.getArchitecture().setNumberOfProcessors(4); @@ -73,16 +71,16 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_NoTrim) { IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); BspInstance instance; - auto& dag = instance.getComputationalDag(); - dag.add_vertex(1, 1, 1, 0); // 0 - dag.add_vertex(1, 1, 1, 0); // 1 - dag.add_vertex(1, 1, 1, 0); // 2 - dag.add_vertex(1, 1, 1, 0); // 3 - instance.getArchitecture().setProcessorsWithTypes({0,0,0,0,0,0,0,0}); // 8 processors of type 0 + auto &dag = instance.getComputationalDag(); + dag.add_vertex(1, 1, 1, 0); // 0 + dag.add_vertex(1, 1, 1, 0); // 1 + dag.add_vertex(1, 1, 1, 0); // 2 + dag.add_vertex(1, 1, 1, 0); // 3 + instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0}); // 8 processors of type 0 instance.setDiagonalCompatibilityMatrix(1); // A single group with 4 subgraphs, each with 1 node. - std::vector iso_groups = { group_t{ { {0}, {1}, {2}, {3} } } }; + std::vector iso_groups = {group_t{{{0}, {1}, {2}, {3}}}}; std::vector was_trimmed(iso_groups.size()); // Group size (4) is a divisor of processor count for type 0 (8), so no trim. @@ -91,28 +89,27 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_NoTrim) { BOOST_REQUIRE_EQUAL(was_trimmed.size(), 1); BOOST_CHECK(!was_trimmed[0]); BOOST_CHECK_EQUAL(iso_groups.size(), 1); - BOOST_CHECK_EQUAL(iso_groups[0].subgraphs.size(), 4); // Still 4 subgraphs in the group + BOOST_CHECK_EQUAL(iso_groups[0].subgraphs.size(), 4); // Still 4 subgraphs in the group } BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_WithTrim) { GreedyBspScheduler greedy_scheduler; IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); tester.setAllowTrimmedScheduler(false); - BspInstance instance; - auto& dag = instance.getComputationalDag(); - dag.add_vertex(10, 1, 1, 0); // 0 - dag.add_vertex(10, 1, 1, 0); // 1 - dag.add_vertex(10, 1, 1, 0); // 2 - dag.add_vertex(10, 1, 1, 0); // 3 - dag.add_vertex(10, 1, 1, 0); // 4 - dag.add_vertex(10, 1, 1, 0); // 5 - instance.getArchitecture().setProcessorsWithTypes({0,0,0,0,0,0,0,0}); // 8 processors of type 0 + auto &dag = instance.getComputationalDag(); + dag.add_vertex(10, 1, 1, 0); // 0 + dag.add_vertex(10, 1, 1, 0); // 1 + dag.add_vertex(10, 1, 1, 0); // 2 + dag.add_vertex(10, 1, 1, 0); // 3 + dag.add_vertex(10, 1, 1, 0); // 4 + dag.add_vertex(10, 1, 1, 0); // 5 + instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0}); // 8 processors of type 0 instance.setDiagonalCompatibilityMatrix(1); // 6 subgraphs, each with 1 node and work weight 10. - std::vector iso_groups = { group_t{ { {0}, {1}, {2}, {3}, {4}, {5} } } }; + std::vector iso_groups = {group_t{{{0}, {1}, {2}, {3}, {4}, {5}}}}; std::vector was_trimmed(iso_groups.size()); // Group size (6) is not a divisor of processor count for type 0 (8). @@ -124,13 +121,13 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_WithTrim) { BOOST_REQUIRE_EQUAL(was_trimmed.size(), 1); BOOST_CHECK(was_trimmed[0]); BOOST_CHECK_EQUAL(iso_groups.size(), 1); - BOOST_REQUIRE_EQUAL(iso_groups[0].subgraphs.size(), 2); // Group now contains 2 merged subgraphs + BOOST_REQUIRE_EQUAL(iso_groups[0].subgraphs.size(), 2); // Group now contains 2 merged subgraphs // Check that the new subgraphs are correctly merged. BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[0].size(), 3); BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[1].size(), 3); - const auto& final_sgs = iso_groups[0].subgraphs; + const auto &final_sgs = iso_groups[0].subgraphs; std::set vertices_sg0(final_sgs[0].begin(), final_sgs[0].end()); std::set vertices_sg1(final_sgs[1].begin(), final_sgs[1].end()); std::set expected_sg0 = {0, 1, 2}; @@ -144,28 +141,39 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_MultipleGroups) { IsomorphicSubgraphSchedulerTester tester(greedy_scheduler); tester.setAllowTrimmedScheduler(false); - BspInstance instance; - auto& dag = instance.getComputationalDag(); - for (int i = 0; i < 6; ++i) dag.add_vertex(1,1,1,0); // 0-5 - for (int i = 0; i < 3; ++i) dag.add_vertex(1,1,1,0); // 6-8, but we will use 10-12 in test - for (int i = 0; i < 2; ++i) dag.add_vertex(1,1,1,0); // 9-10 - for (int i = 0; i < 2; ++i) dag.add_vertex(1,1,1,0); // 11-12 - for (int i = 0; i < 8; ++i) dag.add_vertex(1,1,1,0); // 13-20 - for (int i = 0; i < 5; ++i) dag.add_vertex(1,1,1,0); // 21-25 + auto &dag = instance.getComputationalDag(); + for (int i = 0; i < 6; ++i) { + dag.add_vertex(1, 1, 1, 0); // 0-5 + } + for (int i = 0; i < 3; ++i) { + dag.add_vertex(1, 1, 1, 0); // 6-8, but we will use 10-12 in test + } + for (int i = 0; i < 2; ++i) { + dag.add_vertex(1, 1, 1, 0); // 9-10 + } + for (int i = 0; i < 2; ++i) { + dag.add_vertex(1, 1, 1, 0); // 11-12 + } + for (int i = 0; i < 8; ++i) { + dag.add_vertex(1, 1, 1, 0); // 13-20 + } + for (int i = 0; i < 5; ++i) { + dag.add_vertex(1, 1, 1, 0); // 21-25 + } // Make sure all vertices used in iso_groups exist. // All are type 0. - instance.getArchitecture().setProcessorsWithTypes({0,0,0,0,0,0,0,0,0}); // 9 processors of type 0 + instance.getArchitecture().setProcessorsWithTypes({0, 0, 0, 0, 0, 0, 0, 0, 0}); // 9 processors of type 0 instance.setDiagonalCompatibilityMatrix(1); // Group 1: size 6. gcd(6, 9) = 3. merge_size = 6/3 = 2. -> 3 subgraphs of size 2. // Group 2: size 3. gcd(3, 9) = 3. merge_size = 3/3 = 1. -> no trim. // Group 3: size 5. gcd(5, 9) = 1. merge_size = 5/1 = 5. -> 1 subgraph of size 5. std::vector iso_groups = { - group_t{ { {0}, {1}, {2}, {3}, {4}, {5} } }, // Group 1 - group_t{ { {10}, {11}, {12} } }, // Group 2 - group_t{ { {20}, {21}, {22}, {23}, {24} } } // Group 3 + group_t{{{0}, {1}, {2}, {3}, {4}, {5}}}, // Group 1 + group_t{{{10}, {11}, {12}}}, // Group 2 + group_t{{{20}, {21}, {22}, {23}, {24}}} // Group 3 }; std::vector was_trimmed(iso_groups.size()); @@ -174,9 +182,9 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_MultipleGroups) { BOOST_REQUIRE_EQUAL(iso_groups.size(), 3); BOOST_REQUIRE_EQUAL(was_trimmed.size(), 3); - BOOST_CHECK(was_trimmed[0]); // Group 1 should be trimmed - BOOST_CHECK(!was_trimmed[1]); // Group 2 should not be trimmed - BOOST_CHECK(was_trimmed[2]); // Group 3 should be trimmed + BOOST_CHECK(was_trimmed[0]); // Group 1 should be trimmed + BOOST_CHECK(!was_trimmed[1]); // Group 2 should not be trimmed + BOOST_CHECK(was_trimmed[2]); // Group 3 should be trimmed // Check Group 1 BOOST_REQUIRE_EQUAL(iso_groups[0].subgraphs.size(), 3); BOOST_CHECK_EQUAL(iso_groups[0].subgraphs[0].size(), 2); @@ -195,34 +203,35 @@ BOOST_AUTO_TEST_CASE(TrimSubgraphGroupsTest_MultipleGroups) { BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_HeterogeneousArch) { // --- Setup --- BspInstance instance; - auto& dag = instance.getComputationalDag(); + auto &dag = instance.getComputationalDag(); // Two isomorphic groups: // Group 0: {0,1}, {2,3} (type 0) // Group 1: {4}, {5} (type 1) - dag.add_vertex(10, 1, 1, 0); dag.add_vertex(10, 1, 1, 0); // 0, 1 - dag.add_vertex(10, 1, 1, 0); dag.add_vertex(10, 1, 1, 0); // 2, 3 - dag.add_vertex(20, 1, 1, 1); // 4 - dag.add_vertex(20, 1, 1, 1); // 5 - dag.add_edge(0, 1); dag.add_edge(2, 3); - dag.add_edge(1, 4); dag.add_edge(3, 5); + dag.add_vertex(10, 1, 1, 0); + dag.add_vertex(10, 1, 1, 0); // 0, 1 + dag.add_vertex(10, 1, 1, 0); + dag.add_vertex(10, 1, 1, 0); // 2, 3 + dag.add_vertex(20, 1, 1, 1); // 4 + dag.add_vertex(20, 1, 1, 1); // 5 + dag.add_edge(0, 1); + dag.add_edge(2, 3); + dag.add_edge(1, 4); + dag.add_edge(3, 5); // 2 procs of type 0, 2 procs of type 1 instance.getArchitecture().setProcessorsWithTypes({0, 0, 1, 1}); instance.setDiagonalCompatibilityMatrix(2); - std::vector iso_groups = { - group_t{ { {0, 1}, {2, 3} } }, - group_t{ { {4}, {5} } } - }; + std::vector iso_groups = {group_t{{{0, 1}, {2, 3}}}, group_t{{{4}, {5}}}}; // Mock SubgraphSchedule from EFT scheduler // Group 0 (2 subgraphs) gets 2 workers of type 0 // Group 1 (2 subgraphs) gets 2 workers of type 1 SubgraphSchedule sub_sched; sub_sched.node_assigned_worker_per_type.resize(2); - sub_sched.node_assigned_worker_per_type[0] = {2, 0}; // 2xT0 for group 0 - sub_sched.node_assigned_worker_per_type[1] = {0, 2}; // 2xT1 for group 1 - sub_sched.was_trimmed = {false, false}; // No trimming occurred + sub_sched.node_assigned_worker_per_type[0] = {2, 0}; // 2xT0 for group 0 + sub_sched.node_assigned_worker_per_type[1] = {0, 2}; // 2xT1 for group 1 + sub_sched.was_trimmed = {false, false}; // No trimming occurred std::vector> partition(dag.num_vertices()); @@ -252,7 +261,9 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_HeterogeneousArch) { // Verify all partitions are unique as expected std::set> partition_ids; - for(const auto& p_id : partition) partition_ids.insert(p_id); + for (const auto &p_id : partition) { + partition_ids.insert(p_id); + } BOOST_CHECK_EQUAL(partition_ids.size(), 4); } @@ -261,16 +272,16 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) { // This test ensures that the isomorphism mapping works correctly even if // the vertex IDs of isomorphic subgraphs are not in the same relative order. BspInstance instance; - auto& dag = instance.getComputationalDag(); + auto &dag = instance.getComputationalDag(); // Group 0, Subgraph 1: 0 -> 1 - dag.add_vertex(10, 1, 1, 0); // 0 - dag.add_vertex(20, 1, 1, 0); // 1 + dag.add_vertex(10, 1, 1, 0); // 0 + dag.add_vertex(20, 1, 1, 0); // 1 dag.add_edge(0, 1); // Group 0, Subgraph 2 (isomorphic to 1, but with shuffled IDs): 3 -> 2 - dag.add_vertex(20, 1, 1, 0); // 2 (work 20, corresponds to node 1) - dag.add_vertex(10, 1, 1, 0); // 3 (work 10, corresponds to node 0) + dag.add_vertex(20, 1, 1, 0); // 2 (work 20, corresponds to node 1) + dag.add_vertex(10, 1, 1, 0); // 3 (work 10, corresponds to node 0) dag.add_edge(3, 2); // Architecture: 2 processors, so each subgraph gets its own partition space. @@ -280,15 +291,13 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) { // Manually define the isomorphic groups. // Subgraph 1 vertices: {0, 1} // Subgraph 2 vertices: {2, 3} - std::vector iso_groups = { - group_t{ { {0, 1}, {2, 3} } } - }; + std::vector iso_groups = {group_t{{{0, 1}, {2, 3}}}}; // Mock SubgraphSchedule: The single group gets all 2 processors. SubgraphSchedule sub_sched; sub_sched.node_assigned_worker_per_type.resize(1); sub_sched.node_assigned_worker_per_type[0] = {2}; - sub_sched.was_trimmed = {false}; // No trimming occurred + sub_sched.was_trimmed = {false}; // No trimming occurred std::vector> partition(dag.num_vertices()); @@ -395,4 +404,4 @@ BOOST_AUTO_TEST_CASE(ScheduleIsomorphicGroup_ShuffledIDs) { // BOOST_CHECK_NE(partition[3], partition[6]); // Sinks // } -BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/isomorphism_mapper.cpp b/tests/isomorphism_mapper.cpp index a64e7b99..72b6ecbb 100644 --- a/tests/isomorphism_mapper.cpp +++ b/tests/isomorphism_mapper.cpp @@ -18,15 +18,14 @@ limitations under the License. #define BOOST_TEST_MODULE IsomorphismMapper #include +#include +#include +#include #include "osp/dag_divider/isomorphism_divider/IsomorphismMapper.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "test_graphs.hpp" -#include -#include -#include - using namespace osp; BOOST_AUTO_TEST_SUITE(IsomorphismMapperTestSuite) @@ -37,16 +36,20 @@ using constr_graph_t = computational_dag_vector_impl_def_t; BOOST_AUTO_TEST_CASE(Mapper_SimpleChain) { // Rep: 0 -> 1 -> 2 constr_graph_t rep_graph; - rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(30,1,1); - rep_graph.add_edge(0, 1); rep_graph.add_edge(1, 2); + rep_graph.add_vertex(10, 1, 1); + rep_graph.add_vertex(20, 1, 1); + rep_graph.add_vertex(30, 1, 1); + rep_graph.add_edge(0, 1); + rep_graph.add_edge(1, 2); std::vector> rep_map = {100, 101, 102}; // Current: 2 -> 0 -> 1 (isomorphic, but different local IDs) constr_graph_t current_graph; - current_graph.add_vertex(20,1,1); // local 0 (work 20) - current_graph.add_vertex(30,1,1); // local 1 (work 30) - current_graph.add_vertex(10,1,1); // local 2 (work 10) - current_graph.add_edge(2, 0); current_graph.add_edge(0, 1); + current_graph.add_vertex(20, 1, 1); // local 0 (work 20) + current_graph.add_vertex(30, 1, 1); // local 1 (work 30) + current_graph.add_vertex(10, 1, 1); // local 2 (work 10) + current_graph.add_edge(2, 0); + current_graph.add_edge(0, 1); std::vector> current_map = {201, 202, 200}; IsomorphismMapper mapper(rep_graph); @@ -54,7 +57,7 @@ BOOST_AUTO_TEST_CASE(Mapper_SimpleChain) { // Translate local map to global map for the test std::unordered_map, vertex_idx_t> result_map; - for(const auto& [curr_local, rep_local] : result_map_local) { + for (const auto &[curr_local, rep_local] : result_map_local) { result_map[current_map[curr_local]] = rep_map[rep_local]; } @@ -71,24 +74,33 @@ BOOST_AUTO_TEST_CASE(Mapper_SimpleChain) { BOOST_AUTO_TEST_CASE(Mapper_ForkJoin) { // Rep: 0 -> {1,2} -> 3 constr_graph_t rep_graph; - rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(20,1,1); rep_graph.add_vertex(30,1,1); - rep_graph.add_edge(0,1); rep_graph.add_edge(0,2); rep_graph.add_edge(1,3); rep_graph.add_edge(2,3); + rep_graph.add_vertex(10, 1, 1); + rep_graph.add_vertex(20, 1, 1); + rep_graph.add_vertex(20, 1, 1); + rep_graph.add_vertex(30, 1, 1); + rep_graph.add_edge(0, 1); + rep_graph.add_edge(0, 2); + rep_graph.add_edge(1, 3); + rep_graph.add_edge(2, 3); std::vector> rep_map = {10, 11, 12, 13}; // Current: 3 -> {0,2} -> 1 constr_graph_t current_graph; - current_graph.add_vertex(20,1,1); // local 0 - current_graph.add_vertex(30,1,1); // local 1 - current_graph.add_vertex(20,1,1); // local 2 - current_graph.add_vertex(10,1,1); // local 3 - current_graph.add_edge(3,0); current_graph.add_edge(3,2); current_graph.add_edge(0,1); current_graph.add_edge(2,1); + current_graph.add_vertex(20, 1, 1); // local 0 + current_graph.add_vertex(30, 1, 1); // local 1 + current_graph.add_vertex(20, 1, 1); // local 2 + current_graph.add_vertex(10, 1, 1); // local 3 + current_graph.add_edge(3, 0); + current_graph.add_edge(3, 2); + current_graph.add_edge(0, 1); + current_graph.add_edge(2, 1); std::vector> current_map = {21, 23, 22, 20}; IsomorphismMapper mapper(rep_graph); auto result_map_local = mapper.find_mapping(current_graph); std::unordered_map, vertex_idx_t> result_map; - for(const auto& [curr_local, rep_local] : result_map_local) { + for (const auto &[curr_local, rep_local] : result_map_local) { result_map[current_map[curr_local]] = rep_map[rep_local]; } @@ -109,23 +121,29 @@ BOOST_AUTO_TEST_CASE(Mapper_ForkJoin) { BOOST_AUTO_TEST_CASE(Mapper_DisconnectedComponents) { // Rep: {0->1}, {2->3}. Two identical but disconnected components. constr_graph_t rep_graph; - rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); // 0, 1 - rep_graph.add_vertex(10,1,1); rep_graph.add_vertex(20,1,1); // 2, 3 - rep_graph.add_edge(0,1); rep_graph.add_edge(2,3); + rep_graph.add_vertex(10, 1, 1); + rep_graph.add_vertex(20, 1, 1); // 0, 1 + rep_graph.add_vertex(10, 1, 1); + rep_graph.add_vertex(20, 1, 1); // 2, 3 + rep_graph.add_edge(0, 1); + rep_graph.add_edge(2, 3); std::vector> rep_map = {10, 11, 12, 13}; // Current: {2->3}, {0->1}. Same components, but different local IDs. constr_graph_t current_graph; - current_graph.add_vertex(10,1,1); current_graph.add_vertex(20,1,1); // 0, 1 - current_graph.add_vertex(10,1,1); current_graph.add_vertex(20,1,1); // 2, 3 - current_graph.add_edge(2,3); current_graph.add_edge(0,1); + current_graph.add_vertex(10, 1, 1); + current_graph.add_vertex(20, 1, 1); // 0, 1 + current_graph.add_vertex(10, 1, 1); + current_graph.add_vertex(20, 1, 1); // 2, 3 + current_graph.add_edge(2, 3); + current_graph.add_edge(0, 1); std::vector> current_map = {22, 23, 20, 21}; IsomorphismMapper mapper(rep_graph); auto result_map_local = mapper.find_mapping(current_graph); std::unordered_map, vertex_idx_t> result_map; - for(const auto& [curr_local, rep_local] : result_map_local) { + for (const auto &[curr_local, rep_local] : result_map_local) { result_map[current_map[curr_local]] = rep_map[rep_local]; } @@ -137,14 +155,12 @@ BOOST_AUTO_TEST_CASE(Mapper_DisconnectedComponents) { // Mapping Option 1: // rep {10,11} -> current {20,21} // rep {12,13} -> current {22,23} - bool mapping1 = (result_map.at(20) == 12 && result_map.at(21) == 13 && - result_map.at(22) == 10 && result_map.at(23) == 11); + bool mapping1 = (result_map.at(20) == 12 && result_map.at(21) == 13 && result_map.at(22) == 10 && result_map.at(23) == 11); // Mapping Option 2: // rep {10,11} -> current {22,23} // rep {12,13} -> current {20,21} - bool mapping2 = (result_map.at(22) == 12 && result_map.at(23) == 13 && - result_map.at(20) == 10 && result_map.at(21) == 11); + bool mapping2 = (result_map.at(22) == 12 && result_map.at(23) == 13 && result_map.at(20) == 10 && result_map.at(21) == 11); BOOST_CHECK(mapping1 || mapping2); } @@ -162,21 +178,23 @@ BOOST_AUTO_TEST_CASE(Mapper_MultiPipeline) { // Pipeline 1 (local IDs 0,1,2) corresponds to rep pipeline 2 (global 20,21,22) // Pipeline 2 (local IDs 3,4,5) corresponds to rep pipeline 1 (global 10,11,12) constr_graph_t current_graph; - current_graph.add_vertex(10,1,1); // local 0, stage 0 - current_graph.add_vertex(20,1,1); // local 1, stage 1 - current_graph.add_vertex(30,1,1); // local 2, stage 2 - current_graph.add_vertex(10,1,1); // local 3, stage 0 - current_graph.add_vertex(20,1,1); // local 4, stage 1 - current_graph.add_vertex(30,1,1); // local 5, stage 2 - current_graph.add_edge(0, 1); current_graph.add_edge(1, 2); // First pipeline - current_graph.add_edge(3, 4); current_graph.add_edge(4, 5); // Second pipeline + current_graph.add_vertex(10, 1, 1); // local 0, stage 0 + current_graph.add_vertex(20, 1, 1); // local 1, stage 1 + current_graph.add_vertex(30, 1, 1); // local 2, stage 2 + current_graph.add_vertex(10, 1, 1); // local 3, stage 0 + current_graph.add_vertex(20, 1, 1); // local 4, stage 1 + current_graph.add_vertex(30, 1, 1); // local 5, stage 2 + current_graph.add_edge(0, 1); + current_graph.add_edge(1, 2); // First pipeline + current_graph.add_edge(3, 4); + current_graph.add_edge(4, 5); // Second pipeline std::vector> current_map = {120, 121, 122, 110, 111, 112}; IsomorphismMapper mapper(rep_graph); auto result_map_local = mapper.find_mapping(current_graph); std::unordered_map, vertex_idx_t> result_map; - for(const auto& [curr_local, rep_local] : result_map_local) { + for (const auto &[curr_local, rep_local] : result_map_local) { result_map[current_map[curr_local]] = rep_map[rep_local]; } @@ -185,12 +203,12 @@ BOOST_AUTO_TEST_CASE(Mapper_MultiPipeline) { // The two pipelines are symmetric, so the mapping can go either way. // Mapping Option 1: current pipeline 1 -> rep pipeline 1, current pipeline 2 -> rep pipeline 2 - bool mapping1 = (result_map.at(110) == 10 && result_map.at(111) == 11 && result_map.at(112) == 12 && - result_map.at(120) == 20 && result_map.at(121) == 21 && result_map.at(122) == 22); + bool mapping1 = (result_map.at(110) == 10 && result_map.at(111) == 11 && result_map.at(112) == 12 && result_map.at(120) == 20 + && result_map.at(121) == 21 && result_map.at(122) == 22); // Mapping Option 2: current pipeline 1 -> rep pipeline 2, current pipeline 2 -> rep pipeline 1 - bool mapping2 = (result_map.at(110) == 20 && result_map.at(111) == 21 && result_map.at(112) == 22 && - result_map.at(120) == 10 && result_map.at(121) == 11 && result_map.at(122) == 12); + bool mapping2 = (result_map.at(110) == 20 && result_map.at(111) == 21 && result_map.at(112) == 22 && result_map.at(120) == 10 + && result_map.at(121) == 11 && result_map.at(122) == 12); BOOST_CHECK(mapping1 || mapping2); } @@ -211,18 +229,22 @@ BOOST_AUTO_TEST_CASE(Mapper_ShuffledSymmetric) { // A naive mapping of local IDs (0->0, 1->1, etc.) would be incorrect // because the work weights would not match. constr_graph_t current_graph; - current_graph.add_vertex(20,1,1); // local 0 (work 20, right) - current_graph.add_vertex(10,1,1); // local 1 (work 10, left) - current_graph.add_vertex(20,1,1); // local 2 (work 20, right) - current_graph.add_vertex(10,1,1); // local 3 (work 10, left) - current_graph.add_vertex(20,1,1); // local 4 (work 20, right) - current_graph.add_vertex(10,1,1); // local 5 (work 10, left) + current_graph.add_vertex(20, 1, 1); // local 0 (work 20, right) + current_graph.add_vertex(10, 1, 1); // local 1 (work 10, left) + current_graph.add_vertex(20, 1, 1); // local 2 (work 20, right) + current_graph.add_vertex(10, 1, 1); // local 3 (work 10, left) + current_graph.add_vertex(20, 1, 1); // local 4 (work 20, right) + current_graph.add_vertex(10, 1, 1); // local 5 (work 10, left) // Edges for {5,0} -> {3,2} -> {1,4} - current_graph.add_edge(5, 3); current_graph.add_edge(5, 2); // Rung 1 - current_graph.add_edge(0, 3); current_graph.add_edge(0, 2); + current_graph.add_edge(5, 3); + current_graph.add_edge(5, 2); // Rung 1 + current_graph.add_edge(0, 3); + current_graph.add_edge(0, 2); - current_graph.add_edge(3, 1); current_graph.add_edge(3, 4); // Rung 2 - current_graph.add_edge(2, 1); current_graph.add_edge(2, 4); + current_graph.add_edge(3, 1); + current_graph.add_edge(3, 4); // Rung 2 + current_graph.add_edge(2, 1); + current_graph.add_edge(2, 4); std::vector> current_map = {111, 114, 113, 112, 115, 110}; @@ -230,15 +252,15 @@ BOOST_AUTO_TEST_CASE(Mapper_ShuffledSymmetric) { auto result_map_local = mapper.find_mapping(current_graph); std::unordered_map, vertex_idx_t> result_map; - for(const auto& [curr_local, rep_local] : result_map_local) { + for (const auto &[curr_local, rep_local] : result_map_local) { result_map[current_map[curr_local]] = rep_map[rep_local]; } BOOST_REQUIRE_EQUAL(result_map.size(), 6); // Check that structurally identical nodes are mapped, regardless of their original IDs. // E.g., current global 110 (from local 5, work 10) must map to a rep node with work 10. - BOOST_CHECK_EQUAL(result_map.at(110), 10); // current 5 (work 10) -> rep 0 (work 10) - BOOST_CHECK_EQUAL(result_map.at(111), 11); // current 0 (work 20) -> rep 1 (work 20) + BOOST_CHECK_EQUAL(result_map.at(110), 10); // current 5 (work 10) -> rep 0 (work 10) + BOOST_CHECK_EQUAL(result_map.at(111), 11); // current 0 (work 20) -> rep 1 (work 20) } BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/iterators.cpp b/tests/iterators.cpp index 04812cb8..55c9052e 100644 --- a/tests/iterators.cpp +++ b/tests/iterators.cpp @@ -24,10 +24,9 @@ limitations under the License. using namespace osp; BOOST_AUTO_TEST_CASE(integral_range_test) { - integral_range range(0, 10); BOOST_CHECK_EQUAL(range.size(), 10); - + int count = 0; for (auto it = range.begin(); it != range.end(); ++it) { BOOST_CHECK_EQUAL(*it, count); @@ -52,7 +51,7 @@ BOOST_AUTO_TEST_CASE(integral_range_test) { --count; } BOOST_CHECK_EQUAL(count, -1); - + count = 0; integral_range range2(10); BOOST_CHECK_EQUAL(range2.size(), 10); @@ -69,8 +68,6 @@ BOOST_AUTO_TEST_CASE(integral_range_test) { --count; } BOOST_CHECK_EQUAL(count, -1); - - count = 5; integral_range range3(5, 15); @@ -87,7 +84,4 @@ BOOST_AUTO_TEST_CASE(integral_range_test) { --count; } BOOST_CHECK_EQUAL(count, 4); - - - } diff --git a/tests/kl.cpp b/tests/kl.cpp index 773fe6b5..3a8a506f 100644 --- a/tests/kl.cpp +++ b/tests/kl.cpp @@ -20,32 +20,29 @@ limitations under the License. #include #include +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "test_graphs.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" +#include "test_graphs.hpp" using namespace osp; -template +template void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; int comm_weight = 1; for (const auto &v : dag.vertices()) { - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 3 + 1)); dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 3 + 1)); } } BOOST_AUTO_TEST_CASE(kl_base_1) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -82,12 +79,11 @@ BOOST_AUTO_TEST_CASE(kl_base_1) { using kl_move = kl_move; - kl_total_comm_test kl; + kl_total_comm_test kl; kl.test_setup_schedule(schedule); - auto &kl_current_schedule = - kl.get_current_schedule(); + auto &kl_current_schedule = kl.get_current_schedule(); BOOST_CHECK_EQUAL(kl_current_schedule.step_max_work[0], 44.0); BOOST_CHECK_EQUAL(kl_current_schedule.step_second_max_work[0], 0.0); @@ -145,7 +141,6 @@ BOOST_AUTO_TEST_CASE(kl_base_1) { }; BOOST_AUTO_TEST_CASE(kl_total_comm_test_1) { - std::vector filenames_graph = test_graphs(); using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -161,18 +156,16 @@ BOOST_AUTO_TEST_CASE(kl_total_comm_test_1) { GreedyBspScheduler test_scheduler; for (auto &filename_graph : filenames_graph) { - BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph + = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); instance.getArchitecture().setSynchronisationCosts(5); instance.getArchitecture().setCommunicationCosts(5); instance.getArchitecture().setNumberOfProcessors(4); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -194,7 +187,6 @@ BOOST_AUTO_TEST_CASE(kl_total_comm_test_1) { } BOOST_AUTO_TEST_CASE(kl_total_comm_test_2) { - std::vector filenames_graph = test_graphs(); using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -210,18 +202,16 @@ BOOST_AUTO_TEST_CASE(kl_total_comm_test_2) { GreedyBspScheduler test_scheduler; for (auto &filename_graph : filenames_graph) { - BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph + = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); instance.getArchitecture().setSynchronisationCosts(5); instance.getArchitecture().setCommunicationCosts(5); instance.getArchitecture().setNumberOfProcessors(4); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -243,7 +233,6 @@ BOOST_AUTO_TEST_CASE(kl_total_comm_test_2) { } BOOST_AUTO_TEST_CASE(kl_total_cut_test_1) { - std::vector filenames_graph = test_graphs(); using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -259,17 +248,15 @@ BOOST_AUTO_TEST_CASE(kl_total_cut_test_1) { GreedyBspScheduler test_scheduler; for (auto &filename_graph : filenames_graph) { - BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph + = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); instance.getArchitecture().setSynchronisationCosts(5); instance.getArchitecture().setCommunicationCosts(5); instance.getArchitecture().setNumberOfProcessors(4); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -291,7 +278,6 @@ BOOST_AUTO_TEST_CASE(kl_total_cut_test_1) { } BOOST_AUTO_TEST_CASE(kl_total_cut_test_2) { - std::vector filenames_graph = test_graphs(); using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -307,17 +293,15 @@ BOOST_AUTO_TEST_CASE(kl_total_cut_test_2) { GreedyBspScheduler test_scheduler; for (auto &filename_graph : filenames_graph) { - BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph + = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); instance.getArchitecture().setSynchronisationCosts(5); instance.getArchitecture().setCommunicationCosts(5); instance.getArchitecture().setNumberOfProcessors(4); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -337,4 +321,3 @@ BOOST_AUTO_TEST_CASE(kl_total_cut_test_2) { BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); } } - diff --git a/tests/kl_bsp_affinity_test.cpp b/tests/kl_bsp_affinity_test.cpp index 9d67de8e..24418309 100644 --- a/tests/kl_bsp_affinity_test.cpp +++ b/tests/kl_bsp_affinity_test.cpp @@ -16,20 +16,20 @@ BOOST_AUTO_TEST_CASE(simple_parent_child_test) { using VertexType = graph::vertex_idx; graph dag; - const VertexType v0 = dag.add_vertex(10, 5, 2); // work=10, mem=5, comm=2 - const VertexType v1 = dag.add_vertex(8, 4, 1); // work=8, mem=4, comm=1 - dag.add_edge(v0, v1, 3); // edge weight=3 + const VertexType v0 = dag.add_vertex(10, 5, 2); // work=10, mem=5, comm=2 + const VertexType v1 = dag.add_vertex(8, 4, 1); // work=8, mem=4, comm=1 + dag.add_edge(v0, v1, 3); // edge weight=3 BspArchitecture arch; arch.setNumberOfProcessors(2); BspInstance instance(dag, arch); - instance.setCommunicationCosts(10); // comm multiplier + instance.setCommunicationCosts(10); // comm multiplier instance.setSynchronisationCosts(5); BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1}); // v0 on p0, v1 on p1 - schedule.setAssignedSupersteps({0, 1}); // v0 in step 0, v1 in step 1 + schedule.setAssignedProcessors({0, 1}); // v0 on p0, v1 on p1 + schedule.setAssignedSupersteps({0, 1}); // v0 in step 0, v1 in step 1 schedule.updateNumberOfSupersteps(); using comm_cost_t = kl_bsp_comm_cost_function; @@ -54,11 +54,11 @@ BOOST_AUTO_TEST_CASE(simple_parent_child_test) { /** * Helper to validate comm datastructures by comparing with freshly computed ones */ -template -bool validate_comm_datastructures( - const max_comm_datastructure &comm_ds_incremental, - kl_active_schedule_t &active_sched, const BspInstance &instance, const std::string &context) { - +template +bool validate_comm_datastructures(const max_comm_datastructure &comm_ds_incremental, + kl_active_schedule_t &active_sched, + const BspInstance &instance, + const std::string &context) { // 1. Clone Schedule BspSchedule current_schedule(instance); active_sched.write_schedule(current_schedule); @@ -122,12 +122,10 @@ bool validate_comm_datastructures( /** * Helper to validate affinity tables by comparing with freshly computed ones */ -template -bool validate_affinity_tables( - kl_improver_test &kl_incremental, - const BspInstance &instance, const std::string &context) { - +template +bool validate_affinity_tables(kl_improver_test &kl_incremental, + const BspInstance &instance, + const std::string &context) { // 1. Get current schedule from incremental BspSchedule current_schedule(instance); kl_incremental.get_active_schedule_test(current_schedule); @@ -138,12 +136,11 @@ bool validate_affinity_tables( // Get selected nodes from incremental std::vector> selected_nodes; - + const size_t active_count = kl_incremental.get_affinity_table().size(); for (size_t i = 0; i < active_count; ++i) { selected_nodes.push_back(kl_incremental.get_affinity_table().get_selected_nodes()[i]); } - std::cout << "\n [" << context << "] Validating " << selected_nodes.size() << " selected nodes: { "; for (const auto n : selected_nodes) { @@ -159,17 +156,17 @@ bool validate_affinity_tables( const unsigned num_steps = kl_incremental.get_active_schedule().num_steps(); // 3. Compare affinity tables for each selected node - - for (const auto & node : selected_nodes) { - + + for (const auto &node : selected_nodes) { const auto &affinity_inc = kl_incremental.get_affinity_table().get_affinity_table(node); const auto &affinity_fresh = kl_fresh.get_affinity_table().get_affinity_table(node); unsigned node_step = kl_incremental.get_active_schedule().assigned_superstep(node); for (unsigned p = 0; p < num_procs; ++p) { - if (p >= affinity_inc.size() || p >= affinity_fresh.size()) + if (p >= affinity_inc.size() || p >= affinity_fresh.size()) { continue; + } for (unsigned idx = 0; idx < affinity_inc[p].size() && idx < affinity_fresh[p].size(); ++idx) { int step_offset = static_cast(idx) - static_cast(window_size); @@ -203,12 +200,12 @@ BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) { graph dag; // Create 6 vertices with specific comm weights - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 5, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 - dag.add_vertex(1, 2, 1); // 4 - dag.add_vertex(1, 1, 1); // 5 + dag.add_vertex(1, 10, 1); // 0 + dag.add_vertex(1, 1, 1); // 1 + dag.add_vertex(1, 5, 1); // 2 + dag.add_vertex(1, 1, 1); // 3 + dag.add_vertex(1, 2, 1); // 4 + dag.add_vertex(1, 1, 1); // 5 // Add edges dag.add_edge(0, 1, 1); @@ -244,8 +241,8 @@ BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) { double after_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "test_update_datastructure_after_move")); + BOOST_CHECK(validate_comm_datastructures( + kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_update_datastructure_after_move")); BOOST_CHECK_CLOSE(after_recomputed, after_tracked, 0.00001); } @@ -253,10 +250,10 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) { graph dag; // Create a linear chain: 0 -> 1 -> 2 -> 3 - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 8, 1); // 1 - dag.add_vertex(1, 6, 1); // 2 - dag.add_vertex(1, 4, 1); // 3 + dag.add_vertex(1, 10, 1); // 0 + dag.add_vertex(1, 8, 1); // 1 + dag.add_vertex(1, 6, 1); // 2 + dag.add_vertex(1, 4, 1); // 3 dag.add_edge(0, 1, 1); dag.add_edge(1, 2, 1); @@ -285,24 +282,24 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) { double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "test_multiple_sequential_moves_1")); + BOOST_CHECK(validate_comm_datastructures( + kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_1")); BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "test_multiple_sequential_moves_2")); + BOOST_CHECK(validate_comm_datastructures( + kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_2")); BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "test_multiple_sequential_moves_3")); + BOOST_CHECK(validate_comm_datastructures( + kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_multiple_sequential_moves_3")); BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); // After: Node 0 has 3 local children @@ -315,10 +312,10 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) { graph dag; // Tree structure: Node 0 has three children (1, 2, 3) - dag.add_vertex(1, 1, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 1, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 + dag.add_vertex(1, 1, 1); // 0 + dag.add_vertex(1, 1, 1); // 1 + dag.add_vertex(1, 1, 1); // 2 + dag.add_vertex(1, 1, 1); // 3 dag.add_edge(0, 1, 1); dag.add_edge(0, 2, 1); @@ -348,24 +345,24 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) { double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "test_node_with_multiple_children")); + BOOST_CHECK(validate_comm_datastructures( + kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children")); BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "test_node_with_multiple_children_2")); + BOOST_CHECK(validate_comm_datastructures( + kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children_2")); BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "test_node_with_multiple_children_3")); + BOOST_CHECK(validate_comm_datastructures( + kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_node_with_multiple_children_3")); BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); // After: Node 0 has 3 local children @@ -378,9 +375,9 @@ BOOST_AUTO_TEST_CASE(test_cross_step_moves) { graph dag; // 0 -> 1 -> 2 - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 8, 1); // 1 - dag.add_vertex(1, 6, 1); // 2 + dag.add_vertex(1, 10, 1); // 0 + dag.add_vertex(1, 8, 1); // 1 + dag.add_vertex(1, 6, 1); // 2 dag.add_edge(0, 1, 1); dag.add_edge(1, 2, 1); @@ -408,8 +405,8 @@ BOOST_AUTO_TEST_CASE(test_cross_step_moves) { double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "test_cross_step_moves_1")); + BOOST_CHECK(validate_comm_datastructures( + kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "test_cross_step_moves_1")); BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); } @@ -423,8 +420,8 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario) { const auto v4 = dag.add_vertex(5, 6, 2); const auto v5 = dag.add_vertex(6, 5, 6); const auto v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); // v7 (index 6) - const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) + dag.add_vertex(8, 3, 4); // v7 (index 6) + const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) dag.add_edge(v1, v2, 2); dag.add_edge(v1, v3, 2); @@ -436,7 +433,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario) { dag.add_edge(v4, v8, 9); BspArchitecture arch; - arch.setNumberOfProcessors(2); // P0, P1 + arch.setNumberOfProcessors(2); // P0, P1 arch.setCommunicationCosts(1); arch.setSynchronisationCosts(1); @@ -458,16 +455,14 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario) { double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "complex_move1")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1")); BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "complex_move2")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move2")); BOOST_CHECK(validate_affinity_tables(kl, instance, "complex_move2")); BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); @@ -475,24 +470,21 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario) { double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "complex_move3")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move3")); BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move4_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "complex_move4")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move4")); BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move5_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move5_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "complex_move5")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move5")); BOOST_CHECK_CLOSE(after_move5_recomputed, after_move5_tracked, 0.00001); } @@ -505,8 +497,8 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { const auto v4 = dag.add_vertex(5, 6, 2); const auto v5 = dag.add_vertex(6, 5, 6); const auto v6 = dag.add_vertex(7, 4, 2); - const auto v7 = dag.add_vertex(8, 3, 4); // v7 (index 6) - const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) + const auto v7 = dag.add_vertex(8, 3, 4); // v7 (index 6) + const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) dag.add_edge(v1, v2, 2); dag.add_edge(v1, v3, 2); @@ -518,7 +510,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { dag.add_edge(v4, v8, 9); BspArchitecture arch; - arch.setNumberOfProcessors(2); // P0, P1 + arch.setNumberOfProcessors(2); // P0, P1 arch.setCommunicationCosts(1); arch.setSynchronisationCosts(1); @@ -538,8 +530,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { kl.insert_gain_heap_test({v1}); kl.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "complex_move1")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1")); BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); kl_improver_test kl2; @@ -548,8 +539,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { kl2.insert_gain_heap_test({v2}); kl2.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance, - "complex_move2")); + BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance, "complex_move2")); BOOST_CHECK_CLOSE(kl2.get_comm_cost_f().compute_schedule_cost_test(), kl2.get_current_cost(), 0.00001); kl_improver_test kl3; @@ -558,8 +548,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { kl3.insert_gain_heap_test({v3}); kl3.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance, - "complex_move3")); + BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance, "complex_move3")); BOOST_CHECK_CLOSE(kl3.get_comm_cost_f().compute_schedule_cost_test(), kl3.get_current_cost(), 0.00001); kl_improver_test kl4; @@ -568,8 +557,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { kl4.insert_gain_heap_test({v4}); kl4.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance, - "complex_move4")); + BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance, "complex_move4")); BOOST_CHECK_CLOSE(kl4.get_comm_cost_f().compute_schedule_cost_test(), kl4.get_current_cost(), 0.00001); kl_improver_test kl5; @@ -578,8 +566,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { kl5.insert_gain_heap_test({v5}); kl5.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance, - "complex_move5")); + BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance, "complex_move5")); BOOST_CHECK_CLOSE(kl5.get_comm_cost_f().compute_schedule_cost_test(), kl5.get_current_cost(), 0.00001); kl_improver_test kl6; @@ -588,8 +575,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { kl6.insert_gain_heap_test({v6}); kl6.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance, - "complex_move6")); + BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance, "complex_move6")); BOOST_CHECK_CLOSE(kl6.get_comm_cost_f().compute_schedule_cost_test(), kl6.get_current_cost(), 0.00001); kl_improver_test kl7; @@ -598,8 +584,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { kl7.insert_gain_heap_test({v7}); kl7.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance, - "complex_move7")); + BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance, "complex_move7")); BOOST_CHECK_CLOSE(kl7.get_comm_cost_f().compute_schedule_cost_test(), kl7.get_current_cost(), 0.00001); kl_improver_test kl8; @@ -608,8 +593,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute) { kl8.insert_gain_heap_test({v8}); kl8.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance, - "complex_move8")); + BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance, "complex_move8")); BOOST_CHECK_CLOSE(kl8.get_comm_cost_f().compute_schedule_cost_test(), kl8.get_current_cost(), 0.00001); } @@ -622,8 +606,8 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { const auto v4 = dag.add_vertex(5, 6, 2); const auto v5 = dag.add_vertex(6, 5, 6); const auto v6 = dag.add_vertex(7, 4, 2); - const auto v7 = dag.add_vertex(8, 3, 4); // v7 (index 6) - const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) + const auto v7 = dag.add_vertex(8, 3, 4); // v7 (index 6) + const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) dag.add_edge(v1, v2, 2); dag.add_edge(v1, v5, 2); @@ -645,7 +629,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { dag.add_edge(v7, v8, 2); BspArchitecture arch; - arch.setNumberOfProcessors(2); // P0, P1 + arch.setNumberOfProcessors(2); // P0, P1 arch.setCommunicationCosts(1); arch.setSynchronisationCosts(1); @@ -665,8 +649,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { kl.insert_gain_heap_test({v1}); kl.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "complex_move1")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "complex_move1")); BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); kl_improver_test kl2; @@ -675,8 +658,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { kl2.insert_gain_heap_test({v2}); kl2.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance, - "complex_move2")); + BOOST_CHECK(validate_comm_datastructures(kl2.get_comm_cost_f().comm_ds, kl2.get_active_schedule(), instance, "complex_move2")); BOOST_CHECK_CLOSE(kl2.get_comm_cost_f().compute_schedule_cost_test(), kl2.get_current_cost(), 0.00001); kl_improver_test kl3; @@ -685,8 +667,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { kl3.insert_gain_heap_test({v3}); kl3.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance, - "complex_move3")); + BOOST_CHECK(validate_comm_datastructures(kl3.get_comm_cost_f().comm_ds, kl3.get_active_schedule(), instance, "complex_move3")); BOOST_CHECK_CLOSE(kl3.get_comm_cost_f().compute_schedule_cost_test(), kl3.get_current_cost(), 0.00001); kl_improver_test kl4; @@ -695,8 +676,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { kl4.insert_gain_heap_test({v4}); kl4.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance, - "complex_move4")); + BOOST_CHECK(validate_comm_datastructures(kl4.get_comm_cost_f().comm_ds, kl4.get_active_schedule(), instance, "complex_move4")); BOOST_CHECK_CLOSE(kl4.get_comm_cost_f().compute_schedule_cost_test(), kl4.get_current_cost(), 0.00001); kl_improver_test kl5; @@ -705,8 +685,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { kl5.insert_gain_heap_test({v5}); kl5.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance, - "complex_move5")); + BOOST_CHECK(validate_comm_datastructures(kl5.get_comm_cost_f().comm_ds, kl5.get_active_schedule(), instance, "complex_move5")); BOOST_CHECK_CLOSE(kl5.get_comm_cost_f().compute_schedule_cost_test(), kl5.get_current_cost(), 0.00001); kl_improver_test kl6; @@ -715,8 +694,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { kl6.insert_gain_heap_test({v6}); kl6.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance, - "complex_move6")); + BOOST_CHECK(validate_comm_datastructures(kl6.get_comm_cost_f().comm_ds, kl6.get_active_schedule(), instance, "complex_move6")); BOOST_CHECK_CLOSE(kl6.get_comm_cost_f().compute_schedule_cost_test(), kl6.get_current_cost(), 0.00001); kl_improver_test kl7; @@ -725,8 +703,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { kl7.insert_gain_heap_test({v7}); kl7.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance, - "complex_move7")); + BOOST_CHECK(validate_comm_datastructures(kl7.get_comm_cost_f().comm_ds, kl7.get_active_schedule(), instance, "complex_move7")); BOOST_CHECK_CLOSE(kl7.get_comm_cost_f().compute_schedule_cost_test(), kl7.get_current_cost(), 0.00001); kl_improver_test kl8; @@ -735,8 +712,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_only_compute_2) { kl8.insert_gain_heap_test({v8}); kl8.run_inner_iteration_test(); - BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance, - "complex_move8")); + BOOST_CHECK(validate_comm_datastructures(kl8.get_comm_cost_f().comm_ds, kl8.get_active_schedule(), instance, "complex_move8")); BOOST_CHECK_CLOSE(kl8.get_comm_cost_f().compute_schedule_cost_test(), kl8.get_current_cost(), 0.00001); } @@ -745,7 +721,7 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) { graph dag = osp::construct_grid_dag(5, 5); BspArchitecture arch; - arch.setNumberOfProcessors(4); // P0..P3 + arch.setNumberOfProcessors(4); // P0..P3 arch.setCommunicationCosts(1); arch.setSynchronisationCosts(1); @@ -791,32 +767,28 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) { double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK( - validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move1")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move1")); BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK( - validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move2")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move2")); BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK( - validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move3")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move3")); BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move4_tracked = kl.get_current_cost(); - BOOST_CHECK( - validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move4")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "grid_move4")); BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001); } @@ -867,32 +839,28 @@ BOOST_AUTO_TEST_CASE(test_butterfly_graph_moves) { double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "butterfly_move1")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move1")); BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "butterfly_move2")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move2")); BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "butterfly_move3")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move3")); BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move4_tracked = kl.get_current_cost(); - BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, - "butterfly_move4")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "butterfly_move4")); BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001); } @@ -937,31 +905,27 @@ BOOST_AUTO_TEST_CASE(test_ladder_graph_moves) { double after_move1_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move1_tracked = kl.get_current_cost(); - BOOST_CHECK( - validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move1")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move1")); BOOST_CHECK_CLOSE(after_move1_recomputed, after_move1_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move2_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move2_tracked = kl.get_current_cost(); - BOOST_CHECK( - validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move2")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move2")); BOOST_CHECK_CLOSE(after_move2_recomputed, after_move2_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move3_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move3_tracked = kl.get_current_cost(); - BOOST_CHECK( - validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move3")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move3")); BOOST_CHECK_CLOSE(after_move3_recomputed, after_move3_tracked, 0.00001); kl.run_inner_iteration_test(); double after_move4_recomputed = kl.get_comm_cost_f().compute_schedule_cost_test(); double after_move4_tracked = kl.get_current_cost(); - BOOST_CHECK( - validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move4")); + BOOST_CHECK(validate_comm_datastructures(kl.get_comm_cost_f().comm_ds, kl.get_active_schedule(), instance, "ladder_move4")); BOOST_CHECK_CLOSE(after_move4_recomputed, after_move4_tracked, 0.00001); -} \ No newline at end of file +} diff --git a/tests/kl_bsp_cost.cpp b/tests/kl_bsp_cost.cpp index 05a5882c..01779f45 100644 --- a/tests/kl_bsp_cost.cpp +++ b/tests/kl_bsp_cost.cpp @@ -75,12 +75,12 @@ BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) { comm_ds.arrange_superstep_comm_data(step); BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 1); // Only proc 0 has 10 - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 8); // Next highest is 8 (from recv) + BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 1); // Only proc 0 has 10 + BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 8); // Next highest is 8 (from recv) // Case 2: Shared Max comm_ds.reset_superstep(step); - comm_ds.step_proc_send(step, 0) = 10; // Need to re-set this as reset clears it + comm_ds.step_proc_send(step, 0) = 10; // Need to re-set this as reset clears it comm_ds.step_proc_send(step, 1) = 10; comm_ds.step_proc_send(step, 2) = 2; comm_ds.step_proc_send(step, 3) = 1; @@ -92,8 +92,8 @@ BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) { comm_ds.arrange_superstep_comm_data(step); BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 2); // Proc 0 and 1 - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 5); // Next highest is 5 (from recv) + BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 2); // Proc 0 and 1 + BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 5); // Next highest is 5 (from recv) // Case 3: Max in Recv comm_ds.reset_superstep(step); @@ -124,8 +124,8 @@ BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) { comm_ds.arrange_superstep_comm_data(step); BOOST_CHECK_EQUAL(comm_ds.step_max_comm(step), 10); - BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 8); // 4 sends + 4 recvs - BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 0); // If all removed, 0. + BOOST_CHECK_EQUAL(comm_ds.step_max_comm_count(step), 8); // 4 sends + 4 recvs + BOOST_CHECK_EQUAL(comm_ds.step_second_max_comm(step), 0); // If all removed, 0. // Case 5: Max removed, second max is from same type (Send) comm_ds.reset_superstep(step); @@ -134,8 +134,9 @@ BOOST_AUTO_TEST_CASE(test_arrange_superstep_comm_data) { comm_ds.step_proc_send(step, 2) = 2; comm_ds.step_proc_send(step, 3) = 1; - for (unsigned i = 0; i < 4; ++i) + for (unsigned i = 0; i < 4; ++i) { comm_ds.step_proc_receive(step, i) = 5; + } comm_ds.arrange_superstep_comm_data(step); @@ -182,7 +183,7 @@ BOOST_AUTO_TEST_CASE(test_compute_comm_datastructures) { // Add edges // 0 -> 1 - dag.add_edge(0, 1, 1); // Edge weight ignored by max_comm_datastructure + dag.add_edge(0, 1, 1); // Edge weight ignored by max_comm_datastructure // 2 -> 3 dag.add_edge(2, 3, 1); // 4 -> 5 @@ -258,11 +259,11 @@ BOOST_AUTO_TEST_CASE(test_compute_comm_datastructures) { /** * Helper to validate comm datastructures by comparing with freshly computed ones */ -template -bool validate_comm_datastructures( - const max_comm_datastructure &comm_ds_incremental, - kl_active_schedule_t &active_sched, const BspInstance &instance, const std::string &context) { - +template +bool validate_comm_datastructures(const max_comm_datastructure &comm_ds_incremental, + kl_active_schedule_t &active_sched, + const BspInstance &instance, + const std::string &context) { // 1. Clone Schedule BspSchedule current_schedule(instance); active_sched.write_schedule(current_schedule); @@ -327,12 +328,12 @@ BOOST_AUTO_TEST_CASE(test_update_datastructure_after_move) { graph dag; // Create 6 vertices with specific comm weights - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 5, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 - dag.add_vertex(1, 2, 1); // 4 - dag.add_vertex(1, 1, 1); // 5 + dag.add_vertex(1, 10, 1); // 0 + dag.add_vertex(1, 1, 1); // 1 + dag.add_vertex(1, 5, 1); // 2 + dag.add_vertex(1, 1, 1); // 3 + dag.add_vertex(1, 2, 1); // 4 + dag.add_vertex(1, 1, 1); // 5 // Add edges dag.add_edge(0, 1, 1); @@ -413,10 +414,10 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) { graph dag; // Create a linear chain: 0 -> 1 -> 2 -> 3 - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 8, 1); // 1 - dag.add_vertex(1, 6, 1); // 2 - dag.add_vertex(1, 4, 1); // 3 + dag.add_vertex(1, 10, 1); // 0 + dag.add_vertex(1, 8, 1); // 1 + dag.add_vertex(1, 6, 1); // 2 + dag.add_vertex(1, 4, 1); // 3 dag.add_edge(0, 1, 1); dag.add_edge(1, 2, 1); @@ -460,9 +461,9 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) { comm_ds.update_datastructure_after_move(move1, 0, 0); BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_1")); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 8); // Node 1 sends - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0); // Node was moved away - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); // No receives at P0 + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 8); // Node 1 sends + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 1), 0); // Node was moved away + BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); // No receives at P0 // Move 2: Move node 2 from P2 to P0 (chain more local) kl_move move2(2, 0.0, 2, 0, 0, 0); @@ -471,9 +472,9 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) { BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_2")); // After move2: Nodes 0,1,2 all at P0, only 3 at P3 - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 6); // Only node 2 sends off-proc - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 2), 0); // Node moved away - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 6); // P3 receives from node 2 + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 6); // Only node 2 sends off-proc + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 2), 0); // Node moved away + BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 6); // P3 receives from node 2 // Move 3: Move node 3 to P0 (everything local) kl_move move3(3, 0.0, 3, 0, 0, 0); @@ -482,18 +483,18 @@ BOOST_AUTO_TEST_CASE(test_multiple_sequential_moves) { BOOST_CHECK(validate_comm_datastructures(comm_ds, kl_sched, instance, "test_multiple_sequential_moves_3")); // After move3: All nodes at P0, all communication is local - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // All local - BOOST_CHECK_EQUAL(comm_ds.step_max_comm(0), 0); // No communication cost + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // All local + BOOST_CHECK_EQUAL(comm_ds.step_max_comm(0), 0); // No communication cost } BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) { graph dag; // Tree structure: Node 0 has three children (1, 2, 3) - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 1, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 + dag.add_vertex(1, 10, 1); // 0 + dag.add_vertex(1, 1, 1); // 1 + dag.add_vertex(1, 1, 1); // 2 + dag.add_vertex(1, 1, 1); // 3 dag.add_edge(0, 1, 1); dag.add_edge(0, 2, 1); @@ -538,7 +539,7 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) { // After: Node 0 has 1 local child, 2 off-proc (P2, P3) // Send cost = 10 * 2 = 20 BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 20); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0); // No longer receives + BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 1), 0); // No longer receives BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 10); BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 10); @@ -550,7 +551,7 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) { // After: Node 0 has 2 local children, 1 off-proc (P3) // Send cost = 10 * 1 = 10 BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 10); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 0); // No longer receives + BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 2), 0); // No longer receives BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 10); // Move child 3 to P0 (all local) @@ -562,16 +563,16 @@ BOOST_AUTO_TEST_CASE(test_node_with_multiple_children) { // After: Node 0 has 3 local children // Send cost = 10 * 0 = 0 (all local) BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 0); // No longer receives + BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 3), 0); // No longer receives } BOOST_AUTO_TEST_CASE(test_cross_step_moves) { graph dag; // 0 -> 1 -> 2 - dag.add_vertex(1, 10, 1); // 0 - dag.add_vertex(1, 8, 1); // 1 - dag.add_vertex(1, 6, 1); // 2 + dag.add_vertex(1, 10, 1); // 0 + dag.add_vertex(1, 8, 1); // 1 + dag.add_vertex(1, 6, 1); // 2 dag.add_edge(0, 1, 1); dag.add_edge(1, 2, 1); @@ -610,17 +611,17 @@ BOOST_AUTO_TEST_CASE(test_cross_step_moves) { kl_sched.apply_move(move1, active_schedule_data); comm_ds.update_datastructure_after_move(move1, 0, 2); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // Local (same processor) - BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); // No receive needed + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // Local (same processor) + BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(0, 0), 0); // No receive needed - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); // Local (same processor) - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0); // Node moved away + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 0), 0); // Local (same processor) + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(1, 1), 0); // Node moved away kl_move move2(1, 0.0, 0, 1, 0, 0); kl_sched.apply_move(move2, active_schedule_data); comm_ds.update_datastructure_after_move(move2, 0, 2); - BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // All local at P0 + BOOST_CHECK_EQUAL(comm_ds.step_proc_send(0, 0), 0); // All local at P0 } BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) { @@ -634,8 +635,8 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) { const auto v4 = dag.add_vertex(5, 6, 2); const auto v5 = dag.add_vertex(6, 5, 6); const auto v6 = dag.add_vertex(7, 4, 2); - dag.add_vertex(8, 3, 4); // v7 (index 6) - const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) + dag.add_vertex(8, 3, 4); // v7 (index 6) + const auto v8 = dag.add_vertex(9, 2, 1); // v8 (index 7) // Edges dag.add_edge(v1, v2, 2); @@ -648,7 +649,7 @@ BOOST_AUTO_TEST_CASE(test_complex_scenario_user_provided) { dag.add_edge(v4, v8, 9); BspArchitecture arch; - arch.setNumberOfProcessors(2); // P0, P1 + arch.setNumberOfProcessors(2); // P0, P1 arch.setCommunicationCosts(1); arch.setSynchronisationCosts(1); @@ -756,7 +757,7 @@ BOOST_AUTO_TEST_CASE(test_grid_graph_complex_moves) { graph dag = osp::construct_grid_dag(5, 5); BspArchitecture arch; - arch.setNumberOfProcessors(4); // P0..P3 + arch.setNumberOfProcessors(4); // P0..P3 arch.setCommunicationCosts(1); arch.setSynchronisationCosts(1); @@ -1187,7 +1188,7 @@ BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) { BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0); using kl_move = osp::kl_move_struct; - kl_move move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1 + kl_move move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1 kl_sched.apply_move(move, active_schedule_data); comm_ds.update_datastructure_after_move(move, 0, 4); @@ -1254,7 +1255,7 @@ BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) { BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0); using kl_move = osp::kl_move_struct; - kl_move move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1 + kl_move move(1, 0.0, 1, 2, 1, 3); // Node 1, Step 2->3, Proc 1->1 kl_sched.apply_move(move, active_schedule_data); comm_ds.update_datastructure_after_move(move, 0, 4); @@ -1271,4 +1272,4 @@ BOOST_AUTO_TEST_CASE(test_lazy_and_buffered_modes) { BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(3, 1), 0); BOOST_CHECK_EQUAL(comm_ds.step_proc_receive(4, 1), 0); } -} \ No newline at end of file +} diff --git a/tests/kl_bsp_improver_test.cpp b/tests/kl_bsp_improver_test.cpp index 6e1611ec..b0a0a3db 100644 --- a/tests/kl_bsp_improver_test.cpp +++ b/tests/kl_bsp_improver_test.cpp @@ -17,14 +17,12 @@ using namespace osp; -template +template void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; int comm_weight = 7; for (const auto &v : dag.vertices()) { - dag.set_vertex_work_weight(v, static_cast>(mem_weight++ % 10 + 2)); dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 10 + 2)); dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 10 + 2)); @@ -32,7 +30,6 @@ void add_mem_weights(Graph_t &dag) { } BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -247,4 +244,4 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // BOOST_CHECK_EQUAL(schedule_cs_2.satisfiesPrecedenceConstraints(), true); // } -// } \ No newline at end of file +// } diff --git a/tests/kl_lambda.cpp b/tests/kl_lambda.cpp index 31f86130..8090fa1d 100644 --- a/tests/kl_lambda.cpp +++ b/tests/kl_lambda.cpp @@ -20,14 +20,12 @@ limitations under the License. #include #include +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" - -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_improver_test.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include_mt.hpp" @@ -36,21 +34,19 @@ limitations under the License. using namespace osp; -template +template void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; int comm_weight = 7; for (const auto &v : dag.vertices()) { - dag.set_vertex_work_weight(v, static_cast>(mem_weight++ % 10 + 2)); dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 10 + 2)); dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 10 + 2)); } } -template +template void add_node_types(Graph_t &dag) { unsigned node_type = 0; @@ -59,46 +55,50 @@ void add_node_types(Graph_t &dag) { } } -template +template void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::set &nodes) { - for (auto i : nodes) { BOOST_CHECK_EQUAL(table_1[i].size(), table_2[i].size()); - if (table_1[i].size() != table_2[i].size()) + if (table_1[i].size() != table_2[i].size()) { continue; + } for (size_t j = 0; j < table_1[i].size(); ++j) { BOOST_CHECK_EQUAL(table_1[i][j].size(), table_2[i][j].size()); - if (table_1[i][j].size() != table_2[i][j].size()) + if (table_1[i][j].size() != table_2[i][j].size()) { continue; + } for (size_t k = 0; k < table_1[i][j].size(); ++k) { BOOST_CHECK(std::abs(table_1[i][j][k] - table_2[i][j][k]) < 0.000001); if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) { - std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] << ", table_2=" << table_2[i][j][k] << std::endl; + std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] + << ", table_2=" << table_2[i][j][k] << std::endl; } } } } } -void check_equal_lambda_map(const std::vector> &map_1, const std::vector> &map_2) { +void check_equal_lambda_map(const std::vector> &map_1, + const std::vector> &map_2) { BOOST_CHECK_EQUAL(map_1.size(), map_2.size()); - if (map_1.size() != map_2.size()) + if (map_1.size() != map_2.size()) { return; + } for (size_t i = 0; i < map_1.size(); ++i) { for (const auto &[key, value] : map_1[i]) { BOOST_CHECK_EQUAL(value, map_2[i].at(key)); if (value != map_2[i].at(key)) { - std::cout << "Mismatch at [" << i << "][" << key << "]: map_1=" << value << ", map_2=" << map_2[i].at(key) << std::endl; + std::cout << "Mismatch at [" << i << "][" << key << "]: map_1=" << value << ", map_2=" << map_2[i].at(key) + << std::endl; } } } } BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) { - std::vector filenames_graph = test_graphs(); using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -114,18 +114,16 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) { GreedyBspScheduler test_scheduler; for (auto &filename_graph : filenames_graph) { - BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph + = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); instance.getArchitecture().setSynchronisationCosts(5); instance.getArchitecture().setCommunicationCosts(5); instance.getArchitecture().setNumberOfProcessors(4); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -158,7 +156,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_with_node_types_test) { } BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) { - std::vector filenames_graph = test_graphs(); using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -174,18 +171,16 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) { GreedyBspScheduler test_scheduler; for (auto &filename_graph : filenames_graph) { - BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph + = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); instance.getArchitecture().setSynchronisationCosts(5); instance.getArchitecture().setCommunicationCosts(5); instance.getArchitecture().setNumberOfProcessors(4); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -495,7 +490,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_on_test_graphs) { // }; BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -552,7 +546,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) { auto node_selection = kl.insert_gain_heap_test_penalty({2, 3}); - auto recompute_max_gain = kl.run_inner_iteration_test(); // best move 3 + auto recompute_max_gain = kl.run_inner_iteration_test(); // best move 3 std::cout << "------------------------recompute max_gain: { "; for (const auto &[key, value] : recompute_max_gain) { std::cout << key << " "; @@ -561,7 +555,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) { BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); // best move 0 + recompute_max_gain = kl.run_inner_iteration_test(); // best move 0 std::cout << "recompute max_gain: { "; for (const auto &[key, value] : recompute_max_gain) { std::cout << key << " "; @@ -570,7 +564,7 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) { BOOST_CHECK_CLOSE(kl.get_comm_cost_f().compute_schedule_cost_test(), kl.get_current_cost(), 0.00001); - recompute_max_gain = kl.run_inner_iteration_test(); // best move 1 + recompute_max_gain = kl.run_inner_iteration_test(); // best move 1 std::cout << "recompute max_gain: { "; for (const auto &[key, value] : recompute_max_gain) { std::cout << key << " "; @@ -590,7 +584,6 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_penalty_test) { } BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -732,7 +725,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // schedule.updateNumberOfSupersteps(); -// std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " << +// schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; // BspSchedule schedule_2(schedule); @@ -746,7 +740,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // auto finish_time = std::chrono::high_resolution_clock::now(); // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " +// << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); @@ -759,7 +754,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // // duration = std::chrono::duration_cast(finish_time - start_time).count(); -// // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; +// // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " +// << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; // // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); @@ -808,7 +804,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // schedule.updateNumberOfSupersteps(); -// std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// std::cout << "initial scedule with costs: " << schedule.computeTotalLambdaCosts() << " and " << +// schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; // BspSchedule schedule_2(schedule); @@ -822,7 +819,8 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // auto finish_time = std::chrono::high_resolution_clock::now(); // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalLambdaCosts() << " with " +// << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); @@ -835,10 +833,11 @@ BOOST_AUTO_TEST_CASE(kl_lambda_improver_inner_loop_lambda_map_test) { // // duration = std::chrono::duration_cast(finish_time - start_time).count(); -// // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; +// // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " +// << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; // // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); // } -// } \ No newline at end of file +// } diff --git a/tests/kl_mem_constr.cpp b/tests/kl_mem_constr.cpp index 7f4c0ef4..1e2de362 100644 --- a/tests/kl_mem_constr.cpp +++ b/tests/kl_mem_constr.cpp @@ -20,33 +20,30 @@ limitations under the License. #include #include +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_base.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "test_graphs.hpp" +#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_include.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" +#include "test_graphs.hpp" using namespace osp; -template +template void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; int comm_weight = 1; for (const auto &v : dag.vertices()) { - dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 3 + 1)); dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 3 + 1)); } } BOOST_AUTO_TEST_CASE(kl_local_memconst) { - std::vector filenames_graph = test_graphs(); using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -62,12 +59,11 @@ BOOST_AUTO_TEST_CASE(kl_local_memconst) { GreedyBspScheduler> test_scheduler; for (auto &filename_graph : filenames_graph) { - std::cout << filename_graph << std::endl; BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph + = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); instance.getArchitecture().setSynchronisationCosts(10); instance.getArchitecture().setCommunicationCosts(5); instance.getArchitecture().setNumberOfProcessors(4); @@ -79,13 +75,11 @@ BOOST_AUTO_TEST_CASE(kl_local_memconst) { add_mem_weights(instance.getComputationalDag()); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } for (const auto &bound : bounds_to_test) { - instance.getArchitecture().setMemoryBound(bound); BspSchedule schedule(instance); @@ -104,4 +98,4 @@ BOOST_AUTO_TEST_CASE(kl_local_memconst) { BOOST_CHECK(schedule.satisfiesMemoryConstraints()); } } -} \ No newline at end of file +} diff --git a/tests/kl_total.cpp b/tests/kl_total.cpp index 58421144..21b565c0 100644 --- a/tests/kl_total.cpp +++ b/tests/kl_total.cpp @@ -20,8 +20,6 @@ limitations under the License. #include #include -#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" - #include "osp/auxiliary/io/arch_file_reader.hpp" #include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" @@ -33,21 +31,19 @@ limitations under the License. using namespace osp; -template +template void add_mem_weights(Graph_t &dag) { - int mem_weight = 1; int comm_weight = 7; for (const auto &v : dag.vertices()) { - dag.set_vertex_work_weight(v, static_cast>(mem_weight++ % 10 + 2)); dag.set_vertex_mem_weight(v, static_cast>(mem_weight++ % 10 + 2)); dag.set_vertex_comm_weight(v, static_cast>(comm_weight++ % 10 + 2)); } } -template +template void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::set &nodes) { BOOST_CHECK_EQUAL(table_1.size(), table_2.size()); @@ -57,7 +53,8 @@ void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::s BOOST_CHECK(std::abs(table_1[i][j][k] - table_2[i][j][k]) < 0.000001); if (std::abs(table_1[i][j][k] - table_2[i][j][k]) > 0.000001) { - std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] << ", table_2=" << table_2[i][j][k] << std::endl; + std::cout << "Mismatch at [" << i << "][" << j << "][" << k << "]: table_1=" << table_1[i][j][k] + << ", table_2=" << table_2[i][j][k] << std::endl; } } } @@ -65,7 +62,6 @@ void check_equal_affinity_table(table_t &table_1, table_t &table_2, const std::s } BOOST_AUTO_TEST_CASE(kl_improver_smoke_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -110,7 +106,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_smoke_test) { } BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) { - std::vector filenames_graph = test_graphs(); using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -126,18 +121,16 @@ BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) { GreedyBspScheduler test_scheduler; for (auto &filename_graph : filenames_graph) { - BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), - instance.getComputationalDag()); + bool status_graph + = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); instance.getArchitecture().setSynchronisationCosts(5); instance.getArchitecture().setCommunicationCosts(5); instance.getArchitecture().setNumberOfProcessors(4); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -161,7 +154,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_on_test_graphs) { } BOOST_AUTO_TEST_CASE(kl_improver_superstep_removal_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -209,7 +201,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_superstep_removal_test) { } BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -320,7 +311,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_test) { } BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_penalty_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -405,7 +395,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_inner_loop_penalty_test) { } BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -435,7 +424,7 @@ BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) { BspSchedule schedule(instance); - schedule.setAssignedProcessors({0, 1, 0, 0, 1, 0, 0, 1}); // v1->v2 is on same step, different procs + schedule.setAssignedProcessors({0, 1, 0, 0, 1, 0, 0, 1}); // v1->v2 is on same step, different procs schedule.setAssignedSupersteps({0, 0, 2, 1, 2, 2, 3, 3}); schedule.updateNumberOfSupersteps(); @@ -456,7 +445,6 @@ BOOST_AUTO_TEST_CASE(kl_improver_violation_handling_test) { } BOOST_AUTO_TEST_CASE(kl_base_1) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -521,8 +509,8 @@ BOOST_AUTO_TEST_CASE(kl_base_1) { kl.apply_move_test(move_2); - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 39.0); // 42-3 - BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 5.0); // 2+3 + BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_max_work(0), 39.0); // 42-3 + BOOST_CHECK_EQUAL(kl_active_schedule.work_datastructures.step_second_max_work(0), 5.0); // 2+3 BOOST_CHECK_EQUAL(kl_active_schedule.num_steps(), 1); BOOST_CHECK_EQUAL(kl.is_feasible(), false); BOOST_CHECK_CLOSE(kl.get_current_cost(), kl.get_comm_cost_f().compute_schedule_cost(), 0.00001); @@ -551,7 +539,6 @@ BOOST_AUTO_TEST_CASE(kl_base_1) { } BOOST_AUTO_TEST_CASE(kl_base_2) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -713,7 +700,6 @@ BOOST_AUTO_TEST_CASE(kl_base_2) { } BOOST_AUTO_TEST_CASE(kl_base_3) { - using graph = computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; @@ -994,7 +980,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // schedule.updateNumberOfSupersteps(); -// std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps() +// << " number of supersteps"<< std::endl; // BspSchedule schedule_2(schedule); @@ -1010,7 +997,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << +// schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); @@ -1023,7 +1011,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // // duration = std::chrono::duration_cast(finish_time - start_time).count(); -// // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; +// // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " +// << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; // // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); @@ -1073,7 +1062,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // schedule.updateNumberOfSupersteps(); -// std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// std::cout << "initial scedule with costs: " << schedule.computeTotalCosts() << " and " << schedule.numberOfSupersteps() +// << " number of supersteps"<< std::endl; // BspSchedule schedule_2(schedule); @@ -1089,7 +1079,8 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // auto duration = std::chrono::duration_cast(finish_time - start_time).count(); -// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; +// std::cout << "kl new finished in " << duration << " seconds, costs: " << schedule.computeTotalCosts() << " with " << +// schedule.numberOfSupersteps() << " number of supersteps"<< std::endl; // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // BOOST_CHECK_EQUAL(schedule.satisfiesPrecedenceConstraints(), true); @@ -1102,10 +1093,11 @@ BOOST_AUTO_TEST_CASE(kl_base_3) { // // duration = std::chrono::duration_cast(finish_time - start_time).count(); -// // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; +// // std::cout << "kl old finished in " << duration << " seconds, costs: " << schedule_2.computeTotalCosts() << " with " +// << schedule_2.numberOfSupersteps() << " number of supersteps"<< std::endl; // // BOOST_CHECK(status == RETURN_STATUS::OSP_SUCCESS || status == RETURN_STATUS::BEST_FOUND); // // BOOST_CHECK_EQUAL(schedule_2.satisfiesPrecedenceConstraints(), true); // } -// } \ No newline at end of file +// } diff --git a/tests/kl_util.cpp b/tests/kl_util.cpp index a275f425..0057779f 100644 --- a/tests/kl_util.cpp +++ b/tests/kl_util.cpp @@ -17,15 +17,15 @@ limitations under the License. */ #define BOOST_TEST_MODULE kl_util +#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp" + #include #include #include - -#include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_util.hpp" +#include "osp/bsp/model/BspSchedule.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin_v2/kl_active_schedule.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/bsp/model/BspSchedule.hpp" using namespace osp; using graph = computational_dag_edge_idx_vector_impl_def_int_t; @@ -39,7 +39,7 @@ struct ScheduleFixture { ScheduleFixture() : schedule(instance) { // Setup a simple graph and schedule - auto& dag = instance.getComputationalDag(); + auto &dag = instance.getComputationalDag(); for (int i = 0; i < 20; ++i) { dag.add_vertex(i + 1, i + 1, i + 1); } @@ -83,7 +83,7 @@ BOOST_AUTO_TEST_CASE(reward_penalty_strategy_test) { } // Tests for lock managers -template +template void test_lock_manager() { LockManager lm; lm.initialize(10); @@ -123,7 +123,7 @@ BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) { BOOST_CHECK_EQUAL(table.size(), 1); BOOST_CHECK(table.is_selected(0)); BOOST_CHECK(!table.is_selected(1)); - BOOST_CHECK(!table.insert(0)); // already present + BOOST_CHECK(!table.insert(0)); // already present // Remove table.remove(0); @@ -151,13 +151,13 @@ BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) { // After trim, the gaps should be filled. std::set expected_selected = {0, 1, 2, 4, 6, 8, 9}; std::set actual_selected; - const auto& selected_nodes_vec = table.get_selected_nodes(); - for(size_t i = 0; i < table.size(); ++i) { + const auto &selected_nodes_vec = table.get_selected_nodes(); + for (size_t i = 0; i < table.size(); ++i) { actual_selected.insert(static_cast(selected_nodes_vec[i])); } BOOST_CHECK(expected_selected == actual_selected); - for(unsigned i = 0; i < 20; ++i) { + for (unsigned i = 0; i < 20; ++i) { if (expected_selected.count(i)) { BOOST_CHECK(table.is_selected(i)); } else { @@ -166,7 +166,7 @@ BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) { } // Check that indices are correct - for(size_t i = 0; i < table.size(); ++i) { + for (size_t i = 0; i < table.size(); ++i) { BOOST_CHECK_EQUAL(table.get_selected_nodes_idx(selected_nodes_vec[i]), i); } @@ -181,7 +181,7 @@ BOOST_AUTO_TEST_CASE(adaptive_affinity_table_test) { BOOST_AUTO_TEST_CASE(static_affinity_table_test) { using affinity_table_t = static_affinity_table; affinity_table_t table; - table.initialize(active_schedule, 0); // size is ignored + table.initialize(active_schedule, 0); // size is ignored BOOST_CHECK_EQUAL(table.size(), 0); @@ -190,7 +190,7 @@ BOOST_AUTO_TEST_CASE(static_affinity_table_test) { BOOST_CHECK_EQUAL(table.size(), 1); BOOST_CHECK(table.is_selected(0)); BOOST_CHECK(!table.is_selected(1)); - table.insert(0); // should be a no-op on size + table.insert(0); // should be a no-op on size BOOST_CHECK_EQUAL(table.size(), 1); // Remove @@ -210,12 +210,11 @@ BOOST_AUTO_TEST_CASE(static_affinity_table_test) { BOOST_CHECK(!table.is_selected(0)); } - // Tests for vertex_selection_strategy BOOST_AUTO_TEST_CASE(vertex_selection_strategy_test) { using affinity_table_t = adaptive_affinity_table; using selection_strategy_t = vertex_selection_strategy; - + selection_strategy_t strategy; std::mt19937 gen(0); const unsigned end_step = active_schedule.num_steps() - 1; @@ -239,7 +238,7 @@ BOOST_AUTO_TEST_CASE(vertex_selection_strategy_test) { strategy.select_nodes_permutation_threshold(15, table); BOOST_CHECK_EQUAL(table.size(), 20); - BOOST_CHECK_EQUAL(strategy.permutation_idx, 0); // should wrap around and reshuffle + BOOST_CHECK_EQUAL(strategy.permutation_idx, 0); // should wrap around and reshuffle table.reset_node_selection(); strategy.max_work_counter = 0; @@ -276,7 +275,7 @@ BOOST_AUTO_TEST_CASE(kl_move_struct_test) { } BOOST_AUTO_TEST_CASE(work_datastructures_initialization_test) { - auto& wd = active_schedule.work_datastructures; + auto &wd = active_schedule.work_datastructures; // Step 0: node 0 on proc 0, work 1. Other procs have 0 work. BOOST_CHECK_EQUAL(wd.step_proc_work(0, 0), 1); @@ -298,13 +297,13 @@ BOOST_AUTO_TEST_CASE(work_datastructures_initialization_test) { } BOOST_AUTO_TEST_CASE(work_datastructures_apply_move_test) { - auto& wd = active_schedule.work_datastructures; + auto &wd = active_schedule.work_datastructures; using kl_move = kl_move_struct; // Move within same superstep // Move node 0 (work 1) from proc 0 to proc 3 in step 0 kl_move move1(0, 0.0, 0, 0, 3, 0); - wd.apply_move(move1, 1); // work_weight of node 0 is 1 + wd.apply_move(move1, 1); // work_weight of node 0 is 1 // Before: {1,0,0,0}, After: {0,0,0,1} BOOST_CHECK_EQUAL(wd.step_proc_work(0, 0), 0); @@ -318,7 +317,7 @@ BOOST_AUTO_TEST_CASE(work_datastructures_apply_move_test) { // Move to different superstep // Move node 4 (work 5) from proc 0, step 4 to proc 1, step 0 kl_move move2(4, 0.0, 0, 4, 1, 0); - wd.apply_move(move2, 5); // work_weight of node 4 is 5 + wd.apply_move(move2, 5); // work_weight of node 4 is 5 // Step 0 state after move1: {0,0,0,1}. max=1 // After move2: {0,5,0,1}. max=5 @@ -338,7 +337,7 @@ BOOST_AUTO_TEST_CASE(work_datastructures_apply_move_test) { BOOST_CHECK_EQUAL(wd.step_proc_work(4, 3), 0); BOOST_CHECK_EQUAL(wd.step_max_work(4), 0); BOOST_CHECK_EQUAL(wd.step_second_max_work(4), 0); - BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[4], 3); // All 4 procs have work 0, so count is 3. + BOOST_CHECK_EQUAL(wd.step_max_work_processor_count[4], 3); // All 4 procs have work 0, so count is 3. } BOOST_AUTO_TEST_CASE(active_schedule_initialization_test) { @@ -376,11 +375,11 @@ BOOST_AUTO_TEST_CASE(active_schedule_compute_violations_test) { thread_data_t thread_data; // Manually create a violation - schedule.setAssignedSuperstep(1, 0); // node 1 is now in step 0 (was 1) - schedule.setAssignedSuperstep(0, 1); // node 0 is now in step 1 (was 0) + schedule.setAssignedSuperstep(1, 0); // node 1 is now in step 0 (was 1) + schedule.setAssignedSuperstep(0, 1); // node 0 is now in step 1 (was 0) // Now we have a violation for edge 0 -> 1, since step(0) > step(1) active_schedule.initialize(schedule); - + active_schedule.compute_violations(thread_data); BOOST_CHECK(!thread_data.feasible); @@ -390,7 +389,7 @@ BOOST_AUTO_TEST_CASE(active_schedule_compute_violations_test) { BOOST_AUTO_TEST_CASE(active_schedule_revert_moves_test) { using kl_move = kl_move_struct; using thread_data_t = thread_local_active_schedule_data; - + kl_active_schedule_t original_schedule; original_schedule.initialize(schedule); @@ -406,7 +405,7 @@ BOOST_AUTO_TEST_CASE(active_schedule_revert_moves_test) { BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(1), 1); struct dummy_comm_ds { - void update_datastructure_after_move(const kl_move&, unsigned, unsigned) {} + void update_datastructure_after_move(const kl_move &, unsigned, unsigned) {} } comm_ds; // Revert both moves @@ -426,46 +425,46 @@ BOOST_AUTO_TEST_CASE(active_schedule_revert_to_best_schedule_test) { thread_data.initialize_cost(100); // Apply 3 moves - kl_move move1(0, 0.0, 0, 0, 1, 0); // node 0 from (p0,s0) to (p1,s0) + kl_move move1(0, 0.0, 0, 0, 1, 0); // node 0 from (p0,s0) to (p1,s0) active_schedule.apply_move(move1, thread_data); - thread_data.update_cost(-10); // cost 90 + thread_data.update_cost(-10); // cost 90 - kl_move move2(1, 0.0, 1, 1, 2, 1); // node 1 from (p1,s1) to (p2,s1) + kl_move move2(1, 0.0, 1, 1, 2, 1); // node 1 from (p1,s1) to (p2,s1) active_schedule.apply_move(move2, thread_data); - thread_data.update_cost(-10); // cost 80, best is here + thread_data.update_cost(-10); // cost 80, best is here - kl_move move3(2, 0.0, 2, 2, 3, 2); // node 2 from (p2,s2) to (p3,s2) + kl_move move3(2, 0.0, 2, 2, 3, 2); // node 2 from (p2,s2) to (p3,s2) active_schedule.apply_move(move3, thread_data); - thread_data.update_cost(+5); // cost 85 + thread_data.update_cost(+5); // cost 85 BOOST_CHECK_EQUAL(thread_data.best_schedule_idx, 2); BOOST_CHECK_EQUAL(thread_data.applied_moves.size(), 3); struct dummy_comm_ds { - void update_datastructure_after_move(const kl_move&, unsigned, unsigned) {} + void update_datastructure_after_move(const kl_move &, unsigned, unsigned) {} } comm_ds; - + unsigned end_step = active_schedule.num_steps() - 1; // Revert to best. start_move=0 means no step removal logic is triggered. active_schedule.revert_to_best_schedule(0, 0, comm_ds, thread_data, 0, end_step); - BOOST_CHECK_EQUAL(thread_data.cost, 80.0); // Check cost is reverted to best + BOOST_CHECK_EQUAL(thread_data.cost, 80.0); // Check cost is reverted to best BOOST_CHECK_EQUAL(thread_data.applied_moves.size(), 0); - BOOST_CHECK_EQUAL(thread_data.best_schedule_idx, 0); // Reset for next iteration + BOOST_CHECK_EQUAL(thread_data.best_schedule_idx, 0); // Reset for next iteration // Check schedule state is after move2 - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(0), 1); // from move1 + BOOST_CHECK_EQUAL(active_schedule.assigned_processor(0), 1); // from move1 BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(0), 0); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(1), 2); // from move2 + BOOST_CHECK_EQUAL(active_schedule.assigned_processor(1), 2); // from move2 BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(1), 1); - BOOST_CHECK_EQUAL(active_schedule.assigned_processor(2), 2); // Reverted, so original - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(2), 2); // Reverted, so original + BOOST_CHECK_EQUAL(active_schedule.assigned_processor(2), 2); // Reverted, so original + BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(2), 2); // Reverted, so original } BOOST_AUTO_TEST_CASE(active_schedule_swap_empty_step_fwd_test) { // Make step 1 empty by moving node 1 to step 0 active_schedule.getVectorSchedule().setAssignedSuperstep(1, 0); - active_schedule.initialize(active_schedule.getVectorSchedule()); // re-init to update set_schedule and work_ds + active_schedule.initialize(active_schedule.getVectorSchedule()); // re-init to update set_schedule and work_ds BOOST_CHECK_EQUAL(active_schedule.get_step_total_work(1), 0); @@ -486,15 +485,15 @@ BOOST_AUTO_TEST_CASE(active_schedule_remove_empty_step_test) { active_schedule.initialize(active_schedule.getVectorSchedule()); unsigned original_num_steps = active_schedule.num_steps(); - unsigned original_step_of_node_8 = active_schedule.assigned_superstep(8); // should be 2 + unsigned original_step_of_node_8 = active_schedule.assigned_superstep(8); // should be 2 active_schedule.remove_empty_step(1); BOOST_CHECK_EQUAL(active_schedule.num_steps(), original_num_steps - 1); // Node 8 should be shifted back by one step - BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(8), original_step_of_node_8 - 1); // 8 -> 7 + BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(8), original_step_of_node_8 - 1); // 8 -> 7 // Node 3 (in step 3) should be shifted back by one step BOOST_CHECK_EQUAL(active_schedule.assigned_superstep(3), 2); } -BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/max_bsp_schedulers.cpp b/tests/max_bsp_schedulers.cpp index 5d86b6b7..cb13a16d 100644 --- a/tests/max_bsp_schedulers.cpp +++ b/tests/max_bsp_schedulers.cpp @@ -18,27 +18,25 @@ limitations under the License. #define BOOST_TEST_MODULE BSP_SCHEDULERS #include - #include #include #include - +#include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/general_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/GreedySchedulers/GreedyVarianceSspScheduler.hpp" #include "osp/bsp/scheduler/MaxBspScheduler.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/arch_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "osp/auxiliary/io/general_file_reader.hpp" #include "test_graphs.hpp" using namespace osp; std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } -template +template void run_test(Scheduler *test_scheduler) { // static_assert(std::is_base_of::value, "Class is not a scheduler!"); std::vector filenames_graph = tiny_spaa_graphs(); @@ -65,13 +63,11 @@ void run_test(Scheduler *test_scheduler) { BspInstance instance; - bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), - instance.getComputationalDag()); - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), - instance.getArchitecture()); + bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), instance.getComputationalDag()); + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); if (!status_graph || !status_architecture) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -85,8 +81,8 @@ void run_test(Scheduler *test_scheduler) { } } -template -void run_test_max_bsp(MaxBspScheduler* test_scheduler) { +template +void run_test_max_bsp(MaxBspScheduler *test_scheduler) { std::vector filenames_graph = tiny_spaa_graphs(); std::vector filenames_architectures = test_architectures(); @@ -96,8 +92,8 @@ void run_test_max_bsp(MaxBspScheduler* test_scheduler) { cwd = cwd.parent_path(); } - for (auto& filename_graph : filenames_graph) { - for (auto& filename_machine : filenames_architectures) { + for (auto &filename_graph : filenames_graph) { + for (auto &filename_machine : filenames_architectures) { std::string name_graph = filename_graph.substr(filename_graph.find_last_of("/\\") + 1); name_graph = name_graph.substr(0, name_graph.find_last_of(".")); std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); @@ -112,8 +108,7 @@ void run_test_max_bsp(MaxBspScheduler* test_scheduler) { BspArchitecture arch; bool status_graph = file_reader::readGraph((cwd / filename_graph).string(), graph); - bool status_architecture = - file_reader::readBspArchitecture((cwd / filename_machine).string(), arch); + bool status_architecture = file_reader::readBspArchitecture((cwd / filename_machine).string(), arch); BOOST_REQUIRE_MESSAGE(status_graph, "Failed to read graph: " << filename_graph); BOOST_REQUIRE_MESSAGE(status_architecture, "Failed to read architecture: " << filename_machine); diff --git a/tests/maxbsp_converter_and_hc.cpp b/tests/maxbsp_converter_and_hc.cpp index 1ecb25d8..a36fa7c4 100644 --- a/tests/maxbsp_converter_and_hc.cpp +++ b/tests/maxbsp_converter_and_hc.cpp @@ -18,20 +18,18 @@ limitations under the License. #define BOOST_TEST_MODULE MAXBSP_SCHEDULERS #include +#include -#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspToMaxBspConverter.hpp" +#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing.hpp" #include "osp/bsp/scheduler/LocalSearch/HillClimbing/hill_climbing_for_comm_schedule.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" using namespace osp; - BOOST_AUTO_TEST_CASE(maxbsp_scheduling) { - using graph = computational_dag_vector_impl_def_t; BspInstance instance; @@ -57,15 +55,14 @@ BOOST_AUTO_TEST_CASE(maxbsp_scheduling) { BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, greedy.computeSchedule(bsp_initial)); BOOST_CHECK(bsp_initial.satisfiesPrecedenceConstraints()); - // PART I: from BspSchedule to MaxBspSchedule conversion - std::cout<<"Original Bsp Cost: "< converter; MaxBspSchedule maxbsp = converter.Convert(bsp_initial); BOOST_CHECK(maxbsp.satisfiesPrecedenceConstraints()); auto cost_conversion = maxbsp.computeCosts(); - std::cout<<"Cost after maxBsp conversion: "< bsp_initial_cs(bsp_initial); BOOST_CHECK(bsp_initial_cs.hasValidCommSchedule()); - std::cout<<"Original BspCS Cost: "< maxbsp_cs = converter.Convert(bsp_initial_cs); BOOST_CHECK(maxbsp_cs.satisfiesPrecedenceConstraints()); BOOST_CHECK(maxbsp_cs.hasValidCommSchedule()); auto cost_conversion_cs = maxbsp_cs.computeCosts(); - std::cout<<"Cost after maxBsp(CS) conversion: "< bsp_initial_large_cs(bsp_initial_large); BOOST_CHECK(bsp_initial_large_cs.hasValidCommSchedule()); - std::cout<<"Original Bsp Cost on large DAG: "< maxbsp_cs_large = converter.Convert(bsp_initial_large_cs); BOOST_CHECK(maxbsp_cs_large.satisfiesPrecedenceConstraints()); BOOST_CHECK(maxbsp_cs_large.hasValidCommSchedule()); auto cost_maxbsp_cs_large = maxbsp_cs_large.computeCosts(); - std::cout<<"Cost after maxBsp conversion on large DAG: "< +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "test_utils.hpp" using namespace osp; -BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) -{ +BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) { using graph_t = computational_dag_vector_impl_def_t; graph_t graph; @@ -38,19 +37,18 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) MerkleHashComputer>> m_hash(graph); BOOST_CHECK_EQUAL(m_hash.get_vertex_hashes().size(), graph.num_vertices()); - - for (const auto& v : source_vertices_view(graph)) { + + for (const auto &v : source_vertices_view(graph)) { BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(v), 11); } size_t num = 0; - for (const auto& pair : m_hash.get_orbits()) { - + for (const auto &pair : m_hash.get_orbits()) { num += pair.second.size(); std::cout << "orbit " << pair.first << ": "; - for (const auto& v : pair.second) { + for (const auto &v : pair.second) { std::cout << v << ", "; - } + } std::cout << std::endl; } @@ -62,43 +60,38 @@ BOOST_AUTO_TEST_CASE(BspScheduleRecomp_test) BOOST_CHECK_EQUAL(m_hash.get_vertex_hash(29), m_hash.get_vertex_hash(22)); BOOST_CHECK(m_hash.get_vertex_hash(3) != m_hash.get_vertex_hash(12)); BOOST_CHECK(m_hash.get_vertex_hash(53) != m_hash.get_vertex_hash(29)); - } -BOOST_AUTO_TEST_CASE(MerkleHashComputer_test_fw_bw_precomp) -{ +BOOST_AUTO_TEST_CASE(MerkleHashComputer_test_fw_bw_precomp) { using graph_t = computational_dag_vector_impl_def_t; graph_t graph_test; const auto project_root = get_project_root(); - file_reader::readComputationalDagHyperdagFormatDB((project_root / "data/spaa/tiny/instance_bicgstab.hdag").string(), graph_test); + file_reader::readComputationalDagHyperdagFormatDB((project_root / "data/spaa/tiny/instance_bicgstab.hdag").string(), + graph_test); std::vector precom_node_hashes(graph_test.num_vertices(), 5); MerkleHashComputer> m_hash(graph_test, graph_test, precom_node_hashes); BOOST_CHECK_EQUAL(m_hash.get_vertex_hashes().size(), graph_test.num_vertices()); - - size_t num = 0; - for (const auto& pair : m_hash.get_orbits()) { + size_t num = 0; + for (const auto &pair : m_hash.get_orbits()) { num += pair.second.size(); std::cout << "orbit " << pair.first << ": "; - for (const auto& v : pair.second) { + for (const auto &v : pair.second) { std::cout << v << ", "; - } + } std::cout << std::endl; } BOOST_CHECK_EQUAL(num, graph_test.num_vertices()); - } - using graphType = computational_dag_vector_impl_def_t; using VertexType = vertex_idx_t; - BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_IdenticalGraphsAreIsomorphic) { graphType dag1; const auto v1 = dag1.add_vertex(0, 10, 1); @@ -134,14 +127,14 @@ BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_DifferentVertexCount) { // Test case 3: Graphs with the same size but different structures should not be isomorphic. BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_SameSizeDifferentStructure) { - graphType dag1; // A -> B -> C + graphType dag1; // A -> B -> C const auto v1_1 = dag1.add_vertex(0, 1, 1); const auto v1_2 = dag1.add_vertex(0, 1, 1); const auto v1_3 = dag1.add_vertex(0, 1, 1); dag1.add_edge(v1_1, v1_2); dag1.add_edge(v1_2, v1_3); - graphType dag2; // A -> B, A -> C + graphType dag2; // A -> B, A -> C const auto v2_1 = dag2.add_vertex(0, 1, 1); const auto v2_2 = dag2.add_vertex(0, 1, 1); const auto v2_3 = dag2.add_vertex(0, 1, 1); @@ -154,16 +147,16 @@ BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_SameSizeDifferentStructure) { // Test case 4: Structurally identical graphs with different vertex labeling should be isomorphic. BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_IsomorphicWithDifferentLabels) { graphType dag1; - const auto v1_1 = dag1.add_vertex(0, 1, 1); // Source + const auto v1_1 = dag1.add_vertex(0, 1, 1); // Source const auto v1_2 = dag1.add_vertex(0, 1, 1); - const auto v1_3 = dag1.add_vertex(0, 1, 1); // Sink + const auto v1_3 = dag1.add_vertex(0, 1, 1); // Sink dag1.add_edge(v1_1, v1_2); dag1.add_edge(v1_2, v1_3); graphType dag2; // Same structure as dag1, but vertices are added in a different order. - const auto v2_3 = dag2.add_vertex(0, 1, 1); // Sink - const auto v2_1 = dag2.add_vertex(0, 1, 1); // Source + const auto v2_3 = dag2.add_vertex(0, 1, 1); // Sink + const auto v2_1 = dag2.add_vertex(0, 1, 1); // Source const auto v2_2 = dag2.add_vertex(0, 1, 1); dag2.add_edge(v2_1, v2_2); dag2.add_edge(v2_2, v2_3); @@ -175,14 +168,26 @@ BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_IsomorphicWithDifferentLabels) { BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_ComplexIsomorphicGraphs) { graphType dag1; { - const auto v1 = dag1.add_vertex(2, 9, 2); const auto v2 = dag1.add_vertex(3, 8, 4); - const auto v3 = dag1.add_vertex(4, 7, 3); const auto v4 = dag1.add_vertex(5, 6, 2); - const auto v5 = dag1.add_vertex(6, 5, 6); const auto v6 = dag1.add_vertex(7, 4, 2); - dag1.add_vertex(8, 3, 4); const auto v8 = dag1.add_vertex(9, 2, 1); - dag1.add_edge(v1, v2); dag1.add_edge(v1, v3); dag1.add_edge(v1, v4); - dag1.add_edge(v1, v5); dag1.add_edge(v1, v8); dag1.add_edge(v2, v5); - dag1.add_edge(v2, v6); dag1.add_edge(v2, v8); dag1.add_edge(v3, v5); - dag1.add_edge(v3, v6); dag1.add_edge(v5, v8); dag1.add_edge(v4, v8); + const auto v1 = dag1.add_vertex(2, 9, 2); + const auto v2 = dag1.add_vertex(3, 8, 4); + const auto v3 = dag1.add_vertex(4, 7, 3); + const auto v4 = dag1.add_vertex(5, 6, 2); + const auto v5 = dag1.add_vertex(6, 5, 6); + const auto v6 = dag1.add_vertex(7, 4, 2); + dag1.add_vertex(8, 3, 4); + const auto v8 = dag1.add_vertex(9, 2, 1); + dag1.add_edge(v1, v2); + dag1.add_edge(v1, v3); + dag1.add_edge(v1, v4); + dag1.add_edge(v1, v5); + dag1.add_edge(v1, v8); + dag1.add_edge(v2, v5); + dag1.add_edge(v2, v6); + dag1.add_edge(v2, v8); + dag1.add_edge(v3, v5); + dag1.add_edge(v3, v6); + dag1.add_edge(v5, v8); + dag1.add_edge(v4, v8); } graphType dag2; @@ -190,14 +195,25 @@ BOOST_AUTO_TEST_CASE(MerkleIsomorphismTest_ComplexIsomorphicGraphs) { // Same structure, different vertex variable names and creation order. const auto n8 = dag2.add_vertex(9, 2, 1); dag2.add_vertex(8, 3, 4); - const auto n6 = dag2.add_vertex(7, 4, 2); const auto n5 = dag2.add_vertex(6, 5, 6); - const auto n4 = dag2.add_vertex(5, 6, 2); const auto n3 = dag2.add_vertex(4, 7, 3); - const auto n2 = dag2.add_vertex(3, 8, 4); const auto n1 = dag2.add_vertex(2, 9, 2); - dag2.add_edge(n1, n2); dag2.add_edge(n1, n3); dag2.add_edge(n1, n4); - dag2.add_edge(n1, n5); dag2.add_edge(n1, n8); dag2.add_edge(n2, n5); - dag2.add_edge(n2, n6); dag2.add_edge(n2, n8); dag2.add_edge(n3, n5); - dag2.add_edge(n3, n6); dag2.add_edge(n5, n8); dag2.add_edge(n4, n8); + const auto n6 = dag2.add_vertex(7, 4, 2); + const auto n5 = dag2.add_vertex(6, 5, 6); + const auto n4 = dag2.add_vertex(5, 6, 2); + const auto n3 = dag2.add_vertex(4, 7, 3); + const auto n2 = dag2.add_vertex(3, 8, 4); + const auto n1 = dag2.add_vertex(2, 9, 2); + dag2.add_edge(n1, n2); + dag2.add_edge(n1, n3); + dag2.add_edge(n1, n4); + dag2.add_edge(n1, n5); + dag2.add_edge(n1, n8); + dag2.add_edge(n2, n5); + dag2.add_edge(n2, n6); + dag2.add_edge(n2, n8); + dag2.add_edge(n3, n5); + dag2.add_edge(n3, n6); + dag2.add_edge(n5, n8); + dag2.add_edge(n4, n8); } - + BOOST_CHECK(are_isomorphic_by_merkle_hash(dag1, dag2)); -} \ No newline at end of file +} diff --git a/tests/orbit_graph_processor.cpp b/tests/orbit_graph_processor.cpp index d79a83a9..6ac34228 100644 --- a/tests/orbit_graph_processor.cpp +++ b/tests/orbit_graph_processor.cpp @@ -18,28 +18,27 @@ limitations under the License. #define BOOST_TEST_MODULE OrbitGraphProcessor #include -#include "test_utils.hpp" -#include "test_graphs.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" +#include +#include +#include +#include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/auxiliary/io/dot_graph_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/dag_divider/isomorphism_divider/MerkleHashComputer.hpp" -#include "osp/auxiliary/io/DotFileWriter.hpp" #include "osp/dag_divider/isomorphism_divider/OrbitGraphProcessor.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" - -#include -#include -#include +#include "test_graphs.hpp" +#include "test_utils.hpp" using namespace osp; using graph_t = computational_dag_vector_impl_def_t; template -void check_partitioning(const Graph_t& dag, const OrbitGraphProcessor& processor) { - const auto& final_coarse_graph = processor.get_final_coarse_graph(); - const auto& final_groups = processor.get_final_groups(); +void check_partitioning(const Graph_t &dag, const OrbitGraphProcessor &processor) { + const auto &final_coarse_graph = processor.get_final_coarse_graph(); + const auto &final_groups = processor.get_final_groups(); // Check that the final coarse graph is acyclic BOOST_CHECK(is_acyclic(final_coarse_graph)); @@ -47,10 +46,10 @@ void check_partitioning(const Graph_t& dag, const OrbitGraphProcessor vertex_counts(dag.num_vertices(), 0); size_t total_vertices_in_groups = 0; - for (const auto& group : final_groups) { - for (const auto& subgraph : group.subgraphs) { + for (const auto &group : final_groups) { + for (const auto &subgraph : group.subgraphs) { total_vertices_in_groups += subgraph.size(); - for (const auto& vertex : subgraph) { + for (const auto &vertex : subgraph) { BOOST_REQUIRE_LT(vertex, dag.num_vertices()); vertex_counts[vertex]++; } @@ -111,10 +110,10 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_SimpleMerge) { // Two parallel pipelines that are structurally identical // 0 -> 1 // 2 -> 3 - dag.add_vertex(10, 1, 1); // 0 - dag.add_vertex(10, 1, 1); // 1 - dag.add_vertex(10, 1, 1); // 2 - dag.add_vertex(10, 1, 1); // 3 + dag.add_vertex(10, 1, 1); // 0 + dag.add_vertex(10, 1, 1); // 1 + dag.add_vertex(10, 1, 1); // 2 + dag.add_vertex(10, 1, 1); // 3 dag.add_edge(0, 1); dag.add_edge(2, 3); @@ -124,8 +123,8 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_SimpleMerge) { MerkleHashComputer, true> hasher(dag, dag); processor.discover_isomorphic_groups(dag, hasher); - const auto& final_coarse_graph = processor.get_final_coarse_graph(); - const auto& final_groups = processor.get_final_groups(); + const auto &final_coarse_graph = processor.get_final_coarse_graph(); + const auto &final_groups = processor.get_final_groups(); // Expect a single node in the final coarse graph BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1); @@ -146,10 +145,10 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_SimpleMerge) { BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ForkJoinNoMerge) { graph_t dag; // 0 -> {1, 2} -> 3. Nodes 1 and 2 are in the same orbit. - dag.add_vertex(10, 1, 1); // 0 - dag.add_vertex(20, 1, 1); // 1 - dag.add_vertex(20, 1, 1); // 2 - dag.add_vertex(30, 1, 1); // 3 + dag.add_vertex(10, 1, 1); // 0 + dag.add_vertex(20, 1, 1); // 1 + dag.add_vertex(20, 1, 1); // 2 + dag.add_vertex(30, 1, 1); // 3 dag.add_edge(0, 1); dag.add_edge(0, 2); dag.add_edge(1, 3); @@ -162,8 +161,8 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ForkJoinNoMerge) { MerkleHashComputer, true> hasher(dag, dag); processor.discover_isomorphic_groups(dag, hasher); - const auto& final_coarse_graph = processor.get_final_coarse_graph(); - const auto& final_groups = processor.get_final_groups(); + const auto &final_coarse_graph = processor.get_final_coarse_graph(); + const auto &final_groups = processor.get_final_groups(); // Expect no merges, so final graph is same as initial coarse graph. BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 3); @@ -175,9 +174,13 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ForkJoinNoMerge) { // Group 2: {{3}} size_t group_of_1_count = 0; size_t group_of_2_count = 0; - for(const auto& group : final_groups) { - if (group.subgraphs.size() == 1) group_of_1_count++; - if (group.subgraphs.size() == 2) group_of_2_count++; + for (const auto &group : final_groups) { + if (group.subgraphs.size() == 1) { + group_of_1_count++; + } + if (group.subgraphs.size() == 2) { + group_of_2_count++; + } } BOOST_CHECK_EQUAL(group_of_1_count, 2); BOOST_CHECK_EQUAL(group_of_2_count, 1); @@ -208,12 +211,12 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_MultiPipelineMerge) { const auto dag = construct_multi_pipeline_dag(5, 4); BOOST_REQUIRE_EQUAL(dag.num_vertices(), 20); - OrbitGraphProcessor processor; // Set threshold to match pipeline count + OrbitGraphProcessor processor; // Set threshold to match pipeline count MerkleHashComputer, true> hasher(dag, dag); processor.discover_isomorphic_groups(dag, hasher); - const auto& final_coarse_graph = processor.get_final_coarse_graph(); - const auto& final_groups = processor.get_final_groups(); + const auto &final_coarse_graph = processor.get_final_coarse_graph(); + const auto &final_groups = processor.get_final_groups(); // Expect a single node in the final coarse graph BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1); @@ -239,9 +242,9 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_LadderNoMerge) { OrbitGraphProcessor processor; MerkleHashComputer, true> hasher(dag, dag); processor.discover_isomorphic_groups(dag, hasher); - - const auto& initial_coarse_graph = processor.get_coarse_graph(); - const auto& final_coarse_graph = processor.get_final_coarse_graph(); + + const auto &initial_coarse_graph = processor.get_coarse_graph(); + const auto &final_coarse_graph = processor.get_final_coarse_graph(); // Expect no merges, so final graph is the same as the initial coarse graph. BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), initial_coarse_graph.num_vertices()); @@ -260,7 +263,7 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_AsymmetricNoMerge) { MerkleHashComputer, true> hasher(dag, dag); processor.discover_isomorphic_groups(dag, hasher); - const auto& final_coarse_graph = processor.get_final_coarse_graph(); + const auto &final_coarse_graph = processor.get_final_coarse_graph(); // Expect all nodes to be merged into a single coarse node. BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 1); @@ -282,7 +285,7 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_BinaryTreeNoMerge) { MerkleHashComputer, true> hasher(dag, dag); processor.discover_isomorphic_groups(dag, hasher); - const auto& final_coarse_graph = processor.get_final_coarse_graph(); + const auto &final_coarse_graph = processor.get_final_coarse_graph(); BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 3); @@ -297,8 +300,8 @@ BOOST_AUTO_TEST_CASE(OrbitGraphProcessor_ButterflyMerge) { MerkleHashComputer, true> hasher(dag, dag); processor.discover_isomorphic_groups(dag, hasher); - const auto& final_coarse_graph = processor.get_final_coarse_graph(); + const auto &final_coarse_graph = processor.get_final_coarse_graph(); BOOST_CHECK_EQUAL(final_coarse_graph.num_vertices(), 4); check_partitioning(dag, processor); -} \ No newline at end of file +} diff --git a/tests/pebbling_schedule_class.cpp b/tests/pebbling_schedule_class.cpp index ce180370..097b1b7f 100644 --- a/tests/pebbling_schedule_class.cpp +++ b/tests/pebbling_schedule_class.cpp @@ -18,19 +18,17 @@ limitations under the License. #define BOOST_TEST_MODULE BSP_MEM_SCHEDULERS #include - #include #include #include -#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" -#include "osp/pebbling/PebblingSchedule.hpp" -#include "osp/bsp/scheduler/Scheduler.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/auxiliary/io/pebbling_schedule_file_writer.hpp" - +#include "osp/bsp/scheduler/GreedySchedulers/GreedyBspScheduler.hpp" +#include "osp/bsp/scheduler/Scheduler.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "osp/pebbling/PebblingSchedule.hpp" using namespace osp; @@ -55,7 +53,7 @@ std::vector tiny_spaa_graphs() { std::vector test_architectures() { return {"data/machine_params/p3.arch"}; } -template +template void run_test(Scheduler *test_scheduler) { std::vector filenames_graph = tiny_spaa_graphs(); std::vector filenames_architectures = test_architectures(); @@ -70,8 +68,8 @@ void run_test(Scheduler *test_scheduler) { for (auto &filename_graph : filenames_graph) { for (auto &filename_machine : filenames_architectures) { - std::string name_graph = - filename_graph.substr(filename_machine.find_last_of("/\\") + 1, filename_graph.find_last_of(".")); + std::string name_graph + = filename_graph.substr(filename_machine.find_last_of("/\\") + 1, filename_graph.find_last_of(".")); std::string name_machine = filename_machine.substr(filename_machine.find_last_of("/\\") + 1); name_machine = name_machine.substr(0, name_machine.rfind(".")); @@ -80,12 +78,13 @@ void run_test(Scheduler *test_scheduler) { BspInstance instance; - bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), instance.getComputationalDag()); - - bool status_architecture = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + bool status_graph = file_reader::readComputationalDagHyperdagFormatDB((cwd / filename_graph).string(), + instance.getComputationalDag()); - if (!status_graph || !status_architecture) { + bool status_architecture + = file_reader::readBspArchitecture((cwd / "data/machine_params/p3.arch").string(), instance.getArchitecture()); + if (!status_graph || !status_architecture) { std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } @@ -95,15 +94,18 @@ void run_test(Scheduler *test_scheduler) { RETURN_STATUS result = test_scheduler->computeSchedule(bsp_schedule); BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - std::vector > minimum_memory_required_vector = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); - v_memw_t max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); + std::vector > minimum_memory_required_vector + = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); + v_memw_t max_required + = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); instance.getArchitecture().setMemoryBound(max_required); PebblingSchedule memSchedule1(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::LARGEST_ID); BOOST_CHECK_EQUAL(&memSchedule1.getInstance(), &instance); - BOOST_CHECK(memSchedule1.isValid()); + BOOST_CHECK(memSchedule1.isValid()); - PebblingSchedule memSchedule3(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED); + PebblingSchedule memSchedule3(bsp_schedule, + PebblingSchedule::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED); BOOST_CHECK(memSchedule3.isValid()); PebblingSchedule memSchedule5(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::FORESIGHT); @@ -114,7 +116,8 @@ void run_test(Scheduler *test_scheduler) { PebblingSchedule memSchedule2(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::LARGEST_ID); BOOST_CHECK(memSchedule2.isValid()); - PebblingSchedule memSchedule4(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED); + PebblingSchedule memSchedule4(bsp_schedule, + PebblingSchedule::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED); BOOST_CHECK(memSchedule4.isValid()); PebblingSchedule memSchedule6(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::FORESIGHT); @@ -123,14 +126,12 @@ void run_test(Scheduler *test_scheduler) { } } - BOOST_AUTO_TEST_CASE(GreedyBspScheduler_test) { GreedyBspScheduler test; run_test(&test); } BOOST_AUTO_TEST_CASE(test_pebbling_schedule_writer) { - using graph = computational_dag_vector_impl_def_int_t; BspInstance instance; @@ -146,8 +147,8 @@ BOOST_AUTO_TEST_CASE(test_pebbling_schedule_writer) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_bicgstab.hdag").string(), + instance.getComputationalDag()); BOOST_CHECK(status); BOOST_CHECK_EQUAL(instance.getComputationalDag().num_vertices(), 54); @@ -159,13 +160,14 @@ BOOST_AUTO_TEST_CASE(test_pebbling_schedule_writer) { RETURN_STATUS result = scheduler.computeSchedule(bsp_schedule); BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); - std::vector > minimum_memory_required_vector = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); + std::vector > minimum_memory_required_vector + = PebblingSchedule::minimumMemoryRequiredPerNodeType(instance); v_memw_t max_required = *std::max_element(minimum_memory_required_vector.begin(), minimum_memory_required_vector.end()); - instance.getArchitecture().setMemoryBound(max_required + 3); + instance.getArchitecture().setMemoryBound(max_required + 3); PebblingSchedule memSchedule(bsp_schedule, PebblingSchedule::CACHE_EVICTION_STRATEGY::LEAST_RECENTLY_USED); BOOST_CHECK(memSchedule.isValid()); std::cout << "Writing pebbling schedule" << std::endl; file_writer::write_txt(std::cout, memSchedule); -} \ No newline at end of file +} diff --git a/tests/permutations.cpp b/tests/permutations.cpp index 67eb7e71..05622968 100644 --- a/tests/permutations.cpp +++ b/tests/permutations.cpp @@ -17,13 +17,11 @@ limitations under the License. */ #define BOOST_TEST_MODULE permutations -#include - #include +#include #include #include - #include "osp/auxiliary/permute.hpp" namespace osp { @@ -50,7 +48,7 @@ BOOST_AUTO_TEST_CASE(In_Place_Permutation_random) { BOOST_AUTO_TEST_CASE(In_Place_Permutation_char) { std::vector vec({'a', 'b', 'c', 'd', 'e', 'f', 'g'}); - std::vector perm({4,0,1,2,3,6,5}); + std::vector perm({4, 0, 1, 2, 3, 6, 5}); std::vector sol({'b', 'c', 'd', 'e', 'a', 'g', 'f'}); std::vector perm_sol(perm.size()); std::iota(perm_sol.begin(), perm_sol.end(), 0); @@ -62,7 +60,6 @@ BOOST_AUTO_TEST_CASE(In_Place_Permutation_char) { } } - BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_random) { std::vector vec(20); std::iota(vec.begin(), vec.end(), 0); @@ -89,7 +86,7 @@ BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_random) { BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_char) { std::vector vec({'a', 'b', 'c', 'd', 'e', 'f', 'g'}); - std::vector perm({4,0,1,2,3,6,5}); + std::vector perm({4, 0, 1, 2, 3, 6, 5}); std::vector sol({'e', 'a', 'b', 'c', 'd', 'g', 'f'}); std::vector perm_sol(perm.size()); std::iota(perm_sol.begin(), perm_sol.end(), 0); @@ -101,7 +98,4 @@ BOOST_AUTO_TEST_CASE(In_Place_Inverse_Permutation_char) { } } - - - -} // namespace osp \ No newline at end of file +} // namespace osp diff --git a/tests/random_graph_gen.cpp b/tests/random_graph_gen.cpp index 7f3a1b01..7a7fce52 100644 --- a/tests/random_graph_gen.cpp +++ b/tests/random_graph_gen.cpp @@ -18,7 +18,6 @@ limitations under the License. #define BOOST_TEST_MODULE coarse_refine_scheduler #include - #include #include #include @@ -28,13 +27,12 @@ limitations under the License. #include "osp/auxiliary/random_graph_generator/Erdos_Renyi_graph.hpp" #include "osp/auxiliary/random_graph_generator/near_diagonal_random_graph.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/graph_algorithms/directed_graph_util.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(Erdos_Renyi_graph_test) { - std::vector graph_sizes({100, 500, 500}); std::vector graph_chances({10, 8, 20}); @@ -48,10 +46,9 @@ BOOST_AUTO_TEST_CASE(Erdos_Renyi_graph_test) { } BOOST_AUTO_TEST_CASE(near_diag_random_graph_test) { - std::vector graph_sizes({100, 500, 500}); std::vector graph_bw({10, 20, 30}); - std::vector graph_prob({0.14, 0.02 , 0.07}); + std::vector graph_prob({0.14, 0.02, 0.07}); for (size_t i = 0; i < graph_sizes.size(); i++) { computational_dag_vector_impl_def_int_t graph; diff --git a/tests/set_operations.cpp b/tests/set_operations.cpp index 6252aa5f..73496e6e 100644 --- a/tests/set_operations.cpp +++ b/tests/set_operations.cpp @@ -18,13 +18,11 @@ limitations under the License. #define BOOST_TEST_MODULE Sets #include - -#include "osp/auxiliary/misc.hpp" - #include #include #include +#include "osp/auxiliary/misc.hpp" using namespace osp; @@ -60,16 +58,16 @@ BOOST_AUTO_TEST_CASE(SetIntersectionLarge) { std::unordered_set iota_0_to_10k_set(iota_0_to_10k.begin(), iota_0_to_10k.end()); - { // Intersection of [0,10k] and [10k,20k] --> [] + { // Intersection of [0,10k] and [10k,20k] --> [] std::unordered_set iota_10k_to_20k_set(iota_10k_to_20k.begin(), iota_10k_to_20k.end()); BOOST_CHECK(get_intersection(iota_0_to_10k_set, iota_10k_to_20k_set).empty()); } - { // Intersection of [0,10k] and [0k,10k] --> [0k,10k] + { // Intersection of [0,10k] and [0k,10k] --> [0k,10k] BOOST_CHECK(get_intersection(iota_0_to_10k_set, iota_0_to_10k_set) == iota_0_to_10k_set); } - { // Intersection of [0,10k] and [5k,10k] --> [5k,10k] + { // Intersection of [0,10k] and [5k,10k] --> [5k,10k] std::vector iota_5k_to_10k(5'000); std::iota(iota_5k_to_10k.begin(), iota_5k_to_10k.end(), 5'000); std::unordered_set iota_5k_to_10k_set(iota_5k_to_10k.begin(), iota_5k_to_10k.end()); @@ -112,18 +110,18 @@ BOOST_AUTO_TEST_CASE(SetUnionLarge) { std::unordered_set iota_0_to_10k_set(iota_0_to_10k.begin(), iota_0_to_10k.end()); - { // Union of [0,10k] and [10k,20k] --> [0k,20k] + { // Union of [0,10k] and [10k,20k] --> [0k,20k] std::unordered_set iota_10k_to_20k_set(iota_10k_to_20k.begin(), iota_10k_to_20k.end()); std::unordered_set expected_union(iota_0_to_10k.begin(), iota_0_to_10k.end()); expected_union.insert(iota_10k_to_20k.begin(), iota_10k_to_20k.end()); BOOST_CHECK(get_union(iota_0_to_10k_set, iota_10k_to_20k_set) == expected_union); } - { // Union of [0,10k] and [0k,10k] --> [0k,10k] + { // Union of [0,10k] and [0k,10k] --> [0k,10k] BOOST_CHECK(get_union(iota_0_to_10k_set, iota_0_to_10k_set) == iota_0_to_10k_set); } - { // Union of [0,10k] and [5k,15k] --> [0k,15k] + { // Union of [0,10k] and [5k,15k] --> [0k,15k] std::vector iota_5k_to_15k(10'000); std::iota(iota_5k_to_15k.begin(), iota_5k_to_15k.end(), 5'000); std::unordered_set iota_5k_to_15k_set(iota_5k_to_15k.begin(), iota_5k_to_15k.end()); diff --git a/tests/sorts_and_arrangements.cpp b/tests/sorts_and_arrangements.cpp index 5e2d1582..328f228e 100644 --- a/tests/sorts_and_arrangements.cpp +++ b/tests/sorts_and_arrangements.cpp @@ -18,7 +18,6 @@ limitations under the License. #define BOOST_TEST_MODULE Sorts_and_Arrangements #include - #include #include #include diff --git a/tests/sparse_matrix_impl.cpp b/tests/sparse_matrix_impl.cpp index fd23aa84..8957fae7 100644 --- a/tests/sparse_matrix_impl.cpp +++ b/tests/sparse_matrix_impl.cpp @@ -18,45 +18,45 @@ limitations under the License. #ifdef EIGEN_FOUND -#define BOOST_TEST_MODULE SparseMatrixImpl +# define BOOST_TEST_MODULE SparseMatrixImpl -#include -#include -#include +# include +# include +# include -#include "osp/graph_algorithms/directed_graph_util.hpp" -#include "osp/graph_algorithms/directed_graph_path_util.hpp" -#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" +# include "osp/graph_algorithms/directed_graph_path_util.hpp" +# include "osp/graph_algorithms/directed_graph_util.hpp" +# include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(test_sparse_matrix_adapter_1) { -/* - - ---0 - / / | \ - --|--1 2--|-\ - | | | \ | | - | | 4 <-- 3 / - | \ | / - \ 5 / - \ | /---- - \|/ - 6 - - - j→ 0 1 2 3 4 5 6 -i ↓ - ------------------------------------------- - 0 | 0 0 0 0 0 0 0 - 1 | 2.0 0 0 0 0 0 0 - 2 | 3.0 0 0 0 0 0 0 - 3 | 4.0 0 5.0 0 0 0 0 - 4 | 0.0 6.0 0 7.0 0 0 0 - 5 | 8.0 0 0 0 9.0 0 0 - 6 | 0.0 10.0 11.0 0 0 12.0 0 - -*/ + /* + + ---0 + / / | \ + --|--1 2--|-\ + | | | \ | | + | | 4 <-- 3 / + | \ | / + \ 5 / + \ | /---- + \|/ + 6 + + + j→ 0 1 2 3 4 5 6 + i ↓ + ------------------------------------------- + 0 | 0 0 0 0 0 0 0 + 1 | 2.0 0 0 0 0 0 0 + 2 | 3.0 0 0 0 0 0 0 + 3 | 4.0 0 5.0 0 0 0 0 + 4 | 0.0 6.0 0 7.0 0 0 0 + 5 | 8.0 0 0 0 9.0 0 0 + 6 | 0.0 10.0 11.0 0 0 12.0 0 + + */ using SM_csr = Eigen::SparseMatrix; using SM_csc = Eigen::SparseMatrix; using Triplet = Eigen::Triplet; @@ -64,21 +64,22 @@ i ↓ std::vector triplets; // Diagonal entries - for (int i = 0; i < size; ++i) + for (int i = 0; i < size; ++i) { triplets.emplace_back(i, i, 1.0); + } // Dependencies (i depends on j if L(i,j) ≠ 0, j < i) - triplets.emplace_back(1, 0, 2.0); // x1 ← x0 - triplets.emplace_back(2, 0, 3.0); // x2 ← x0 - triplets.emplace_back(3, 0, 4.0); // x3 ← x0 - triplets.emplace_back(3, 2, 5.0); // x3 ← x2 - triplets.emplace_back(4, 1, 6.0); // x4 ← x1 - triplets.emplace_back(4, 3, 7.0); // x4 ← x3 - triplets.emplace_back(5, 0, 8.0); // x5 ← x0 - triplets.emplace_back(5, 4, 9.0); // x5 ← x4 - triplets.emplace_back(6, 1, 10.0); // x6 ← x1 - triplets.emplace_back(6, 2, 11.0); // x6 ← x2 - triplets.emplace_back(6, 5, 12.0); // x6 ← x5 + triplets.emplace_back(1, 0, 2.0); // x1 ← x0 + triplets.emplace_back(2, 0, 3.0); // x2 ← x0 + triplets.emplace_back(3, 0, 4.0); // x3 ← x0 + triplets.emplace_back(3, 2, 5.0); // x3 ← x2 + triplets.emplace_back(4, 1, 6.0); // x4 ← x1 + triplets.emplace_back(4, 3, 7.0); // x4 ← x3 + triplets.emplace_back(5, 0, 8.0); // x5 ← x0 + triplets.emplace_back(5, 4, 9.0); // x5 ← x4 + triplets.emplace_back(6, 1, 10.0); // x6 ← x1 + triplets.emplace_back(6, 2, 11.0); // x6 ← x2 + triplets.emplace_back(6, 5, 12.0); // x6 ← x5 // Construct matrix SM_csr L_csr(size, size); @@ -105,16 +106,31 @@ i ↓ std::vector vertices{0, 1, 2, 3, 4, 5, 6}; - std::vector> out_neighbors{{1, 2, 3, 5}, {4, 6}, {3, 6}, {4}, {5}, {6}, {}}; - - std::vector> in_neighbors{{}, {0}, {0}, {0, 2}, {1, 3}, {0, 4}, {1, 2, 5}}; + std::vector> out_neighbors{ + {1, 2, 3, 5}, + {4, 6}, + {3, 6}, + {4}, + {5}, + {6}, + {} + }; + + std::vector> in_neighbors{ + {}, + {0}, + {0}, + {0, 2}, + {1, 3}, + {0, 4}, + {1, 2, 5} + }; size_t idx = 0; for (const long unsigned int &v : graph.vertices()) { - BOOST_CHECK_EQUAL(v, vertices[idx++]); - + size_t i = 0; const size_t vi = static_cast(v); @@ -137,20 +153,16 @@ i ↓ BOOST_CHECK_EQUAL(source(e, graph), in_neighbors[vi][i++]); } - BOOST_CHECK_EQUAL(graph.in_degree(v), in_neighbors[vi].size()); BOOST_CHECK_EQUAL(graph.out_degree(v), out_neighbors[vi].size()); - } unsigned count = 0; - for (const auto & e: edges(graph)) { - + for (const auto &e : edges(graph)) { std::cout << e.source << " -> " << e.target << std::endl; count++; } BOOST_CHECK_EQUAL(count, 11); - } #endif diff --git a/tests/sptrsv.cpp b/tests/sptrsv.cpp index f6dc8e17..6bc49c93 100644 --- a/tests/sptrsv.cpp +++ b/tests/sptrsv.cpp @@ -18,30 +18,27 @@ limitations under the License. // #define EIGEN_FOUND 1 - #ifdef EIGEN_FOUND -#define BOOST_TEST_MODULE SPTRSV +# define BOOST_TEST_MODULE SPTRSV -#include -#include -#include -#include -#include -#include -#include +# include "osp/auxiliary/sptrsv_simulator/sptrsv.hpp" +# include +# include +# include +# include +# include +# include -#include "osp/graph_algorithms/directed_graph_util.hpp" -#include "osp/graph_algorithms/directed_graph_path_util.hpp" -#include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" -#include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp" -#include "osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp" -#include "osp/auxiliary/sptrsv_simulator/sptrsv.hpp" +# include "osp/auxiliary/sptrsv_simulator/ScheduleNodePermuter.hpp" +# include "osp/bsp/scheduler/GreedySchedulers/GrowLocalAutoCores.hpp" +# include "osp/graph_algorithms/directed_graph_path_util.hpp" +# include "osp/graph_algorithms/directed_graph_util.hpp" +# include "osp/graph_implementations/eigen_matrix_adapter/sparse_matrix.hpp" using namespace osp; - bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { std::cout << std::fixed; std::cout << std::setprecision(15); @@ -49,9 +46,9 @@ bool compare_vectors(Eigen::VectorXd &v1, Eigen::VectorXd &v2) { assert(v1.size() == v2.size()); bool same = true; const double epsilon = 1e-10; - for (long long int i=0; i < v1.size(); ++i){ - //std::cout << "Ind: " << i << ": | " << v1[i] << " - " << v2[i] << " | = " << abs(v1[i]-v2[i]) << "\n"; - if( std::abs(v1[i] - v2[i]) / (std::abs(v1[i]) + std::abs(v2[i]) + epsilon) > epsilon ){ + for (long long int i = 0; i < v1.size(); ++i) { + // std::cout << "Ind: " << i << ": | " << v1[i] << " - " << v2[i] << " | = " << abs(v1[i]-v2[i]) << "\n"; + if (std::abs(v1[i] - v2[i]) / (std::abs(v1[i]) + std::abs(v2[i]) + epsilon) > epsilon) { std::cout << "We have differences in the matrix in position: " << i << std::endl; std::cout << v1[i] << " , " << v2[i] << std::endl; same = false; @@ -72,8 +69,8 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { cwd = cwd.parent_path(); std::cout << cwd << std::endl; } - const std::string filename = (cwd / "data/mtx_tests/ErdosRenyi_2k_14k_A.mtx").string(); - + const std::string filename = (cwd / "data/mtx_tests/ErdosRenyi_2k_14k_A.mtx").string(); + SparseMatrixImp graph; SM_csr L_csr; @@ -85,8 +82,8 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { return; } - std::cout << "Loaded matrix of size " << L_csr.rows() << " x " << L_csr.cols() - << " with " << L_csr.nonZeros() << " non-zeros.\n"; + std::cout << "Loaded matrix of size " << L_csr.rows() << " x " << L_csr.cols() << " with " << L_csr.nonZeros() + << " non-zeros.\n"; graph.setCSR(&L_csr); SM_csc L_csc{}; @@ -115,20 +112,20 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { std::cout << std::endl; } */ - + BOOST_CHECK_EQUAL(result_cs, RETURN_STATUS::OSP_SUCCESS); BOOST_CHECK(schedule_cs.hasValidCommSchedule()); - //std::cout << "Scheduling Costs:" << schedule_cs.computeCosts() << std::endl; - //std::cout << "lazy com Costs:" <(); - L_b_ref.setOnes(); // Initialize RHS vector with all ones + L_b_ref.setOnes(); // Initialize RHS vector with all ones L_x_ref.setZero(); L_x_ref = L_view.solve(L_b_ref); @@ -136,16 +133,15 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { Sptrsv sim{instance}; sim.setup_csr_no_permutation(schedule_cs); - - //osp no permutation L_solve + // osp no permutation L_solve auto L_x_osp = L_x_ref; auto L_b_osp = L_b_ref; L_b_osp.setOnes(); - //L_x_osp.setZero(); + // L_x_osp.setZero(); sim.x = &L_x_osp[0]; sim.b = &L_b_osp[0]; sim.lsolve_no_permutation(); - BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp)); + BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); // Comparisson with osp serial L solve // Eigen @@ -154,10 +150,9 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { L_x_ref = L_view.solve(L_b_ref); // OSP L_b_osp.setOnes(); - //L_x_osp.setZero(); + // L_x_osp.setZero(); sim.lsolve_serial(); - BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp)); - + BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); // INPLACE case eigen L solve vs osp L solve // Eigen @@ -166,9 +161,9 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { L_x_ref = L_view.solve(L_b_ref); // OSP L_x_osp.setConstant(0.1); - L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values + L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values sim.lsolve_no_permutation_in_place(); - BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp)); + BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); // Comparisson with osp serial in place L solve // Eigen @@ -177,13 +172,13 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { L_x_ref = L_view.solve(L_b_ref); // OSP L_x_osp.setConstant(0.1); - L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values + L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values sim.lsolve_serial_in_place(); - BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp)); + BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); // Upper Solve SM_csr U_csr = L_csc.transpose(); - SM_csc U_csc = U_csr; // Convert to column-major + SM_csc U_csc = U_csr; // Convert to column-major Eigen::VectorXd U_b_ref(n), U_x_ref(n); Eigen::VectorXd U_b_osp(n), U_x_osp(n); // Eigen reference U solve @@ -208,8 +203,8 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { U_b_osp.setOnes(); U_x_osp.setZero(); sim.usolve_serial(); - BOOST_CHECK(compare_vectors(U_x_ref,U_x_osp)); - + BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp)); + // INPLACE case eigen U solve vs osp U solve // Eigen U_b_ref.setConstant(0.1); @@ -217,9 +212,9 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { U_x_ref = U_view.solve(U_b_ref); // OSP U_x_osp.setConstant(0.1); - U_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values + U_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values sim.usolve_no_permutation_in_place(); - BOOST_CHECK(compare_vectors(U_x_ref,U_x_osp)); + BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp)); // Comparisson with osp serial in place U solve // Eigen @@ -228,14 +223,13 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { U_x_ref = U_view.solve(U_b_ref); // OSP U_x_osp.setConstant(0.1); - U_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values + U_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values sim.usolve_serial_in_place(); - BOOST_CHECK(compare_vectors(U_x_ref,U_x_osp)); - + BOOST_CHECK(compare_vectors(U_x_ref, U_x_osp)); // Lsolve in-place With PERMUTATION std::vector perm = schedule_node_permuter_basic(schedule_cs, LOOP_PROCESSORS); - sim.setup_csr_with_permutation (schedule_cs, perm); + sim.setup_csr_with_permutation(schedule_cs, perm); // Comparisson with osp serial in place L solve // Eigen @@ -244,17 +238,14 @@ BOOST_AUTO_TEST_CASE(test_eigen_sptrsv) { L_x_ref = L_view.solve(L_b_ref); // OSP L_x_osp.setConstant(0.1); - L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values + L_b_osp.setZero(); // this will not be used as x will take the values that already has instead of the b values sim.x = &L_x_osp[0]; sim.b = &L_b_osp[0]; - //sim.permute_x_vector(perm); + // sim.permute_x_vector(perm); sim.lsolve_with_permutation_in_place(); sim.permute_x_vector(perm); - BOOST_CHECK(compare_vectors(L_x_ref,L_x_osp)); - + BOOST_CHECK(compare_vectors(L_x_ref, L_x_osp)); } - - #endif diff --git a/tests/stepbystep_coarsen_and_multilevel.cpp b/tests/stepbystep_coarsen_and_multilevel.cpp index bfc4931f..270893e9 100644 --- a/tests/stepbystep_coarsen_and_multilevel.cpp +++ b/tests/stepbystep_coarsen_and_multilevel.cpp @@ -19,24 +19,21 @@ limitations under the License. #define BOOST_TEST_MODULE STEPBYSTEP_AND_MULTILEVEL #include - #include #include #include -#include "osp/coarser/StepByStep/StepByStepCoarser.hpp" -#include "osp/bsp/scheduler/Scheduler.hpp" -#include "osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/auxiliary/io/arch_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" +#include "osp/bsp/scheduler/CoarsenRefineSchedulers/MultiLevelHillClimbing.hpp" +#include "osp/bsp/scheduler/Scheduler.hpp" +#include "osp/coarser/StepByStep/StepByStepCoarser.hpp" #include "osp/coarser/coarser_util.hpp" - #include "osp/graph_implementations/boost_graphs/boost_graph.hpp" using namespace osp; BOOST_AUTO_TEST_CASE(StepByStepCoarser_test) { - using graph = boost_graph_uint_t; StepByStepCoarser test; @@ -57,7 +54,7 @@ BOOST_AUTO_TEST_CASE(StepByStepCoarser_test) { StepByStepCoarser coarser; - coarser.setTargetNumberOfNodes(static_cast(DAG.num_vertices())/2); + coarser.setTargetNumberOfNodes(static_cast(DAG.num_vertices()) / 2); graph coarsened_dag1, coarsened_dag2; std::vector>> old_vertex_ids; @@ -66,14 +63,12 @@ BOOST_AUTO_TEST_CASE(StepByStepCoarser_test) { coarser.coarsenDag(DAG, coarsened_dag1, new_vertex_id); old_vertex_ids = coarser_util::invert_vertex_contraction_map(new_vertex_id); - coarser.setTargetNumberOfNodes(static_cast(DAG.num_vertices())*2/3); + coarser.setTargetNumberOfNodes(static_cast(DAG.num_vertices()) * 2 / 3); coarser.coarsenForPebbling(DAG, coarsened_dag2, new_vertex_id); old_vertex_ids = coarser_util::invert_vertex_contraction_map(new_vertex_id); - } BOOST_AUTO_TEST_CASE(Multilevel_test) { - using graph = boost_graph_uint_t; StepByStepCoarser test; @@ -90,27 +85,25 @@ BOOST_AUTO_TEST_CASE(Multilevel_test) { std::cout << cwd << std::endl; } - bool status = file_reader::readComputationalDagHyperdagFormatDB( - (cwd / "data/spaa/tiny/instance_pregel.hdag").string(), instance.getComputationalDag()); + bool status = file_reader::readComputationalDagHyperdagFormatDB((cwd / "data/spaa/tiny/instance_pregel.hdag").string(), + instance.getComputationalDag()); BOOST_CHECK(status); - MultiLevelHillClimbingScheduler multi1, multi2; BspSchedule schedule1(instance), schedule2(instance); - multi1.setContractionRate(0.3); + multi1.setContractionRate(0.3); multi1.useLinearRefinementSteps(5); auto result = multi1.computeSchedule(schedule1); BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); BOOST_CHECK(schedule1.satisfiesPrecedenceConstraints()); - multi2.setContractionRate(0.3); + multi2.setContractionRate(0.3); multi2.useExponentialRefinementPoints(1.2); result = multi2.computeSchedule(schedule2); BOOST_CHECK_EQUAL(RETURN_STATUS::OSP_SUCCESS, result); BOOST_CHECK(schedule2.satisfiesPrecedenceConstraints()); - -} \ No newline at end of file +} diff --git a/tests/strongly_connected_components.cpp b/tests/strongly_connected_components.cpp index 8e85a8ff..65e294e8 100644 --- a/tests/strongly_connected_components.cpp +++ b/tests/strongly_connected_components.cpp @@ -17,19 +17,18 @@ limitations under the License. */ #define BOOST_TEST_MODULE StronglyConnectedComponentsTest -#include - #include "osp/graph_algorithms/strongly_connected_components.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" #include +#include #include #include +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" + // Helper function to compare SCC results. template -void check_sccs_equal(const std::vector> &result, - const std::vector> &expected) { +void check_sccs_equal(const std::vector> &result, const std::vector> &expected) { auto to_set_of_sets = [](const std::vector> &vec_of_vecs) { std::set> set_of_sets; for (const auto &inner_vec : vec_of_vecs) { @@ -57,9 +56,9 @@ BOOST_AUTO_TEST_CASE(EmptyGraphTest) { BOOST_AUTO_TEST_CASE(NoEdgesTest) { graph g; - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); auto sccs = osp::strongly_connected_components(g); std::vector> expected = {{0}, {1}, {2}}; @@ -68,10 +67,10 @@ BOOST_AUTO_TEST_CASE(NoEdgesTest) { BOOST_AUTO_TEST_CASE(LineGraphTest) { graph g; - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); g.add_edge(0, 1); g.add_edge(1, 2); g.add_edge(2, 3); @@ -83,23 +82,25 @@ BOOST_AUTO_TEST_CASE(LineGraphTest) { BOOST_AUTO_TEST_CASE(SimpleCycleTest) { graph g; - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); g.add_edge(0, 1); g.add_edge(1, 2); g.add_edge(2, 0); auto sccs = osp::strongly_connected_components(g); - std::vector> expected = {{0, 1, 2}}; + std::vector> expected = { + {0, 1, 2} + }; check_sccs_equal(sccs, expected); } BOOST_AUTO_TEST_CASE(FullGraphIsSCCTest) { graph g; - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); - g.add_vertex(1,1,1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); + g.add_vertex(1, 1, 1); g.add_edge(0, 1); g.add_edge(1, 0); g.add_edge(1, 2); @@ -108,40 +109,69 @@ BOOST_AUTO_TEST_CASE(FullGraphIsSCCTest) { g.add_edge(2, 0); auto sccs = osp::strongly_connected_components(g); - std::vector> expected = {{0, 1, 2}}; + std::vector> expected = { + {0, 1, 2} + }; check_sccs_equal(sccs, expected); } BOOST_AUTO_TEST_CASE(MultipleSCCsTest) { - graph g; - for (int i = 0; i < 8; ++i) - g.add_vertex(1,1,1); + for (int i = 0; i < 8; ++i) { + g.add_vertex(1, 1, 1); + } - g.add_edge(0, 1); g.add_edge(1, 2); g.add_edge(2, 0); // SCC {0,1,2} - g.add_edge(3, 4); g.add_edge(4, 3); // SCC {3,4} - g.add_edge(5, 6); g.add_edge(6, 5); // SCC {5,6} + g.add_edge(0, 1); + g.add_edge(1, 2); + g.add_edge(2, 0); // SCC {0,1,2} + g.add_edge(3, 4); + g.add_edge(4, 3); // SCC {3,4} + g.add_edge(5, 6); + g.add_edge(6, 5); // SCC {5,6} // SCC {7} - g.add_edge(2, 3); g.add_edge(3, 5); g.add_edge(4, 6); g.add_edge(5, 7); + g.add_edge(2, 3); + g.add_edge(3, 5); + g.add_edge(4, 6); + g.add_edge(5, 7); auto sccs = osp::strongly_connected_components(g); - std::vector> expected = {{0, 1, 2}, {3, 4}, {5, 6}, {7}}; + std::vector> expected = { + {0, 1, 2}, + {3, 4}, + {5, 6}, + {7} + }; check_sccs_equal(sccs, expected); } BOOST_AUTO_TEST_CASE(ComplexGraphFromPaperTest) { - graph g; - for (int i = 0; i < 8; ++i) g.add_vertex(1,1,1); - g.add_edge(0, 1); g.add_edge(1, 2); g.add_edge(1, 4); g.add_edge(1, 5); - g.add_edge(2, 3); g.add_edge(2, 6); g.add_edge(3, 2); g.add_edge(3, 7); - g.add_edge(4, 0); g.add_edge(4, 5); g.add_edge(5, 6); g.add_edge(6, 5); - g.add_edge(7, 3); g.add_edge(7, 6); + for (int i = 0; i < 8; ++i) { + g.add_vertex(1, 1, 1); + } + g.add_edge(0, 1); + g.add_edge(1, 2); + g.add_edge(1, 4); + g.add_edge(1, 5); + g.add_edge(2, 3); + g.add_edge(2, 6); + g.add_edge(3, 2); + g.add_edge(3, 7); + g.add_edge(4, 0); + g.add_edge(4, 5); + g.add_edge(5, 6); + g.add_edge(6, 5); + g.add_edge(7, 3); + g.add_edge(7, 6); auto sccs = osp::strongly_connected_components(g); - std::vector> expected = {{0, 1, 4}, {2, 3, 7}, {5, 6}}; + std::vector> expected = { + {0, 1, 4}, + {2, 3, 7}, + {5, 6} + }; check_sccs_equal(sccs, expected); } -BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file +BOOST_AUTO_TEST_SUITE_END() diff --git a/tests/test_graphs.hpp b/tests/test_graphs.hpp index 8db94585..fd31c74a 100644 --- a/tests/test_graphs.hpp +++ b/tests/test_graphs.hpp @@ -18,10 +18,11 @@ limitations under the License. #pragma once -#include "osp/concepts/constructable_computational_dag_concept.hpp" #include #include +#include "osp/concepts/constructable_computational_dag_concept.hpp" + namespace osp { std::vector tiny_spaa_graphs() { @@ -40,25 +41,22 @@ std::vector tiny_spaa_graphs() { "data/spaa/tiny/instance_pregel.hdag", "data/spaa/tiny/instance_spmv_N6_nzP0d4.hdag", "data/spaa/tiny/instance_spmv_N7_nzP0d35.hdag", - "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag" - }; + "data/spaa/tiny/instance_spmv_N10_nzP0d25.hdag"}; } std::vector large_spaa_graphs() { return {"data/spaa/large/instance_exp_N50_K12_nzP0d15.hdag", "data/spaa/large/instance_CG_N24_K22_nzP0d2.hdag", "data/spaa/large/instance_kNN_N45_K15_nzP0d16.hdag", - "data/spaa/large/instance_spmv_N120_nzP0d18.hdag" -}; + "data/spaa/large/instance_spmv_N120_nzP0d18.hdag"}; } std::vector test_graphs() { - return {"data/spaa/tiny/instance_k-means.hdag", - "data/spaa/tiny/instance_bicgstab.hdag", + return {"data/spaa/tiny/instance_k-means.hdag", + "data/spaa/tiny/instance_bicgstab.hdag", "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag"}; } - /** * @brief Constructs a DAG with multiple identical, parallel pipelines. * @@ -73,11 +71,13 @@ std::vector test_graphs() { * @param pipeline_len The length of each pipeline. * @return A Graph_t object representing the DAG. */ -template +template inline Graph_t construct_multi_pipeline_dag(unsigned num_pipelines, unsigned pipeline_len) { static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); Graph_t dag; - if (num_pipelines == 0 || pipeline_len == 0) return dag; + if (num_pipelines == 0 || pipeline_len == 0) { + return dag; + } for (unsigned i = 0; i < num_pipelines; ++i) { for (unsigned j = 0; j < pipeline_len; ++j) { @@ -104,15 +104,17 @@ inline Graph_t construct_multi_pipeline_dag(unsigned num_pipelines, unsigned pip * @param num_rungs The number of rungs in the ladder. * @return A Graph_t object representing the DAG. */ -template +template inline Graph_t construct_ladder_dag(unsigned num_rungs) { static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); Graph_t dag; - if (num_rungs == 0) return dag; + if (num_rungs == 0) { + return dag; + } for (unsigned i = 0; i < num_rungs + 1; ++i) { - dag.add_vertex(10, 1, 1); // Left side node - dag.add_vertex(20, 1, 1); // Right side node + dag.add_vertex(10, 1, 1); // Left side node + dag.add_vertex(20, 1, 1); // Right side node } for (unsigned i = 0; i < num_rungs; ++i) { @@ -138,7 +140,7 @@ inline Graph_t construct_ladder_dag(unsigned num_rungs) { * @param num_nodes The number of nodes in the chain. * @return A Graph_t object representing the DAG. */ -template +template inline Graph_t construct_asymmetric_dag(unsigned num_nodes) { static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); Graph_t dag; @@ -157,12 +159,14 @@ inline Graph_t construct_asymmetric_dag(unsigned num_nodes) { * @param height The height of the tree. A height of 0 is a single node. Total nodes: 2^(height+1) - 1. * @return A Graph_t object representing the out-tree. */ -template +template inline Graph_t construct_binary_out_tree(unsigned height) { static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); Graph_t dag; unsigned num_nodes = (1U << (height + 1)) - 1; - if (num_nodes == 0) return dag; + if (num_nodes == 0) { + return dag; + } for (unsigned i = 0; i < num_nodes; ++i) { dag.add_vertex(10, 1, 1); @@ -181,12 +185,14 @@ inline Graph_t construct_binary_out_tree(unsigned height) { * @param height The height of the tree. A height of 0 is a single node. Total nodes: 2^(height+1) - 1. * @return A Graph_t object representing the in-tree. */ -template +template inline Graph_t construct_binary_in_tree(unsigned height) { static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); Graph_t dag; unsigned num_nodes = (1U << (height + 1)) - 1; - if (num_nodes == 0) return dag; + if (num_nodes == 0) { + return dag; + } for (unsigned i = 0; i < num_nodes; ++i) { dag.add_vertex(10, 1, 1); @@ -206,11 +212,13 @@ inline Graph_t construct_binary_in_tree(unsigned height) { * @param cols The number of columns in the grid. * @return A Graph_t object representing the grid. */ -template +template inline Graph_t construct_grid_dag(unsigned rows, unsigned cols) { static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); Graph_t dag; - if (rows == 0 || cols == 0) return dag; + if (rows == 0 || cols == 0) { + return dag; + } for (unsigned i = 0; i < rows * cols; ++i) { dag.add_vertex(10, 1, 1); @@ -218,8 +226,12 @@ inline Graph_t construct_grid_dag(unsigned rows, unsigned cols) { for (unsigned r = 0; r < rows; ++r) { for (unsigned c = 0; c < cols; ++c) { - if (r + 1 < rows) dag.add_edge(r * cols + c, (r + 1) * cols + c); - if (c + 1 < cols) dag.add_edge(r * cols + c, r * cols + (c + 1)); + if (r + 1 < rows) { + dag.add_edge(r * cols + c, (r + 1) * cols + c); + } + if (c + 1 < cols) { + dag.add_edge(r * cols + c, r * cols + (c + 1)); + } } } return dag; @@ -231,11 +243,13 @@ inline Graph_t construct_grid_dag(unsigned rows, unsigned cols) { * @param stages The number of stages (log2 of the number of inputs). Total nodes: (stages+1) * 2^stages. * @return A Graph_t object representing the butterfly graph. */ -template +template inline Graph_t construct_butterfly_dag(unsigned stages) { static_assert(is_constructable_cdag_v, "Graph_t must be a constructable computational DAG"); Graph_t dag; - if (stages == 0) return dag; + if (stages == 0) { + return dag; + } unsigned N = 1U << stages; for (unsigned i = 0; i < (stages + 1) * N; ++i) { @@ -254,4 +268,4 @@ inline Graph_t construct_butterfly_dag(unsigned stages) { return dag; } -} // namespace osp::test \ No newline at end of file +} // namespace osp diff --git a/tests/transitive_reduction.cpp b/tests/transitive_reduction.cpp index b2758f31..272b3807 100644 --- a/tests/transitive_reduction.cpp +++ b/tests/transitive_reduction.cpp @@ -17,12 +17,13 @@ limitations under the License. */ #define BOOST_TEST_MODULE TransitiveReduction +#include "osp/graph_algorithms/transitive_reduction.hpp" + #include -#include "test_graphs.hpp" -#include "osp/graph_algorithms/transitive_reduction.hpp" -#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" #include "osp/graph_algorithms/subgraph_algorithms.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_vector_impl.hpp" +#include "test_graphs.hpp" using namespace osp; using graph_t = computational_dag_vector_impl_def_t; @@ -32,12 +33,12 @@ BOOST_AUTO_TEST_SUITE(TransitiveReduction) // Test with a simple chain graph that has a transitive edge BOOST_AUTO_TEST_CASE(SimpleTransitiveEdge) { graph_t dag; - dag.add_vertex(1, 1, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 1, 1); // 2 + dag.add_vertex(1, 1, 1); // 0 + dag.add_vertex(1, 1, 1); // 1 + dag.add_vertex(1, 1, 1); // 2 dag.add_edge(0, 1); dag.add_edge(1, 2); - dag.add_edge(0, 2); // Transitive edge + dag.add_edge(0, 2); // Transitive edge BOOST_REQUIRE_EQUAL(dag.num_vertices(), 3); BOOST_REQUIRE_EQUAL(dag.num_edges(), 3); @@ -56,7 +57,7 @@ BOOST_AUTO_TEST_CASE(SimpleTransitiveEdge) { // Test with a graph that has no transitive edges BOOST_AUTO_TEST_CASE(NoTransitiveEdges) { - const auto dag = construct_ladder_dag(3); // A ladder graph has no transitive edges + const auto dag = construct_ladder_dag(3); // A ladder graph has no transitive edges BOOST_REQUIRE_EQUAL(dag.num_vertices(), 8); BOOST_REQUIRE_EQUAL(dag.num_edges(), 11); @@ -78,11 +79,11 @@ BOOST_AUTO_TEST_CASE(ComplexGraph) { // 2 -> 3 // 3 -> 4 // 0 -> 4 (transitive) - dag.add_vertex(1, 1, 1); // 0 - dag.add_vertex(1, 1, 1); // 1 - dag.add_vertex(1, 1, 1); // 2 - dag.add_vertex(1, 1, 1); // 3 - dag.add_vertex(1, 1, 1); // 4 + dag.add_vertex(1, 1, 1); // 0 + dag.add_vertex(1, 1, 1); // 1 + dag.add_vertex(1, 1, 1); // 2 + dag.add_vertex(1, 1, 1); // 3 + dag.add_vertex(1, 1, 1); // 4 dag.add_edge(0, 1); dag.add_edge(0, 2); @@ -90,8 +91,8 @@ BOOST_AUTO_TEST_CASE(ComplexGraph) { dag.add_edge(2, 3); dag.add_edge(3, 4); // Add transitive edges - dag.add_edge(0, 3); // transitive via 0->1->3 or 0->2->3 - dag.add_edge(0, 4); // transitive via 0->...->3->4 + dag.add_edge(0, 3); // transitive via 0->1->3 or 0->2->3 + dag.add_edge(0, 4); // transitive via 0->...->3->4 BOOST_REQUIRE_EQUAL(dag.num_vertices(), 5); BOOST_REQUIRE_EQUAL(dag.num_edges(), 7); diff --git a/tests/trimmed_group_scheduler.cpp b/tests/trimmed_group_scheduler.cpp index ccbfee8a..63ba9ed9 100644 --- a/tests/trimmed_group_scheduler.cpp +++ b/tests/trimmed_group_scheduler.cpp @@ -30,7 +30,7 @@ using namespace osp; using graph_t = computational_dag_vector_impl_def_t; // Mock SubScheduler for TrimmedGroupScheduler tests -template +template class MockSubScheduler : public Scheduler { public: // This mock scheduler assigns all nodes to local processor 0 and superstep 0. @@ -57,7 +57,7 @@ struct TrimmedGroupSchedulerFixture { // Default architecture: 1 processor type, 100 memory bound arch.setCommunicationCosts(1); arch.setSynchronisationCosts(1); - instance.setAllOnesCompatibilityMatrix(); // All node types compatible with all processor types + instance.setAllOnesCompatibilityMatrix(); // All node types compatible with all processor types } }; @@ -78,9 +78,9 @@ BOOST_AUTO_TEST_CASE(EmptyGraphTest) { BOOST_AUTO_TEST_CASE(SingleComponentSingleProcessorTypeTest) { // Graph: 0-1-2 (single component) - dag.add_vertex(1, 1, 1, 0); // 0 - dag.add_vertex(1, 1, 1, 0); // 1 - dag.add_vertex(1, 1, 1, 0); // 2 + dag.add_vertex(1, 1, 1, 0); // 0 + dag.add_vertex(1, 1, 1, 0); // 1 + dag.add_vertex(1, 1, 1, 0); // 2 dag.add_edge(0, 1); dag.add_edge(1, 2); instance.getComputationalDag() = dag; @@ -109,10 +109,10 @@ BOOST_AUTO_TEST_CASE(SingleComponentSingleProcessorTypeTest) { BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeEvenDistributionTest) { // Graph: 0-1 (component 0), 2-3 (component 1) - dag.add_vertex(1, 1, 1, 0); // 0 - dag.add_vertex(1, 1, 1, 0); // 1 - dag.add_vertex(1, 1, 1, 0); // 2 - dag.add_vertex(1, 1, 1, 0); // 3 + dag.add_vertex(1, 1, 1, 0); // 0 + dag.add_vertex(1, 1, 1, 0); // 1 + dag.add_vertex(1, 1, 1, 0); // 2 + dag.add_vertex(1, 1, 1, 0); // 3 dag.add_edge(0, 1); dag.add_edge(2, 3); instance.getComputationalDag() = dag; @@ -147,9 +147,9 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeEvenDistributionTest) BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeUnevenDistributionTest) { // Graph: 0 (component 0), 1 (component 1), 2 (component 2) - all isolated - dag.add_vertex(1, 1, 1, 0); // 0 - dag.add_vertex(1, 1, 1, 0); // 1 - dag.add_vertex(1, 1, 1, 0); // 2 + dag.add_vertex(1, 1, 1, 0); // 0 + dag.add_vertex(1, 1, 1, 0); // 1 + dag.add_vertex(1, 1, 1, 0); // 2 instance.getComputationalDag() = dag; // Architecture: 6 processors of type 0 @@ -184,14 +184,14 @@ BOOST_AUTO_TEST_CASE(MultipleComponentsSingleProcessorTypeUnevenDistributionTest BOOST_AUTO_TEST_CASE(MultipleComponentsHeterogeneousArchitectureTest) { // Graph: 0 (type 0), 1 (type 1) - isolated nodes - dag.add_vertex(1, 1, 1, 0); // 0 (component 0, type 0) - dag.add_vertex(1, 1, 1, 1); // 1 (component 1, type 1) + dag.add_vertex(1, 1, 1, 0); // 0 (component 0, type 0) + dag.add_vertex(1, 1, 1, 1); // 1 (component 1, type 1) instance.getComputationalDag() = dag; // Architecture: 2 processors of type 0 (global 0,1), 2 processors of type 1 (global 2,3) arch.setProcessorsWithTypes({0, 0, 1, 1}); instance.getArchitecture() = arch; - instance.setDiagonalCompatibilityMatrix(2); // Node type 0 compatible with proc type 0, etc. + instance.setDiagonalCompatibilityMatrix(2); // Node type 0 compatible with proc type 0, etc. // min_non_zero_procs_ = 2 (2 components, 2 groups) // sub_proc_counts for type 0: 2 / 2 = 1 diff --git a/tests/uf_structures.cpp b/tests/uf_structures.cpp index 61245443..f19bc9be 100644 --- a/tests/uf_structures.cpp +++ b/tests/uf_structures.cpp @@ -16,15 +16,14 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ - #define BOOST_TEST_MODULE Union_Find #include - -#include "osp/auxiliary/datastructures/union_find.hpp" #include #include #include +#include "osp/auxiliary/datastructures/union_find.hpp" + using namespace osp; BOOST_AUTO_TEST_CASE(Union_find_structure1) { @@ -175,21 +174,20 @@ BOOST_AUTO_TEST_CASE(Union_find_weight_structure) { BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("e"), 2); BOOST_CHECK_EQUAL(test_universe.get_memory_of_component_by_name("b"), 7); - std::vector, unsigned>> components_n_weights = - test_universe.get_connected_components_and_weights(); + std::vector, unsigned>> components_n_weights + = test_universe.get_connected_components_and_weights(); unsigned total_comp_weights = 0; unsigned total_elements = 0; for (auto &[comp, wt] : components_n_weights) { total_comp_weights += wt; total_elements += static_cast(comp.size()); for (auto &name : comp) { - BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), - [name](std::string other_name) { return name == other_name; })); + BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; })); } } - std::vector, unsigned, unsigned>> components_n_weights_n_memory = - test_universe.get_connected_components_weights_and_memories(); + std::vector, unsigned, unsigned>> components_n_weights_n_memory + = test_universe.get_connected_components_weights_and_memories(); unsigned total_comp_weights_2 = 0; unsigned total_comp_memory = 0; unsigned total_elements_2 = 0; @@ -198,8 +196,7 @@ BOOST_AUTO_TEST_CASE(Union_find_weight_structure) { total_comp_memory += mem; total_elements_2 += static_cast(comp.size()); for (auto &name : comp) { - BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), - [name](std::string other_name) { return name == other_name; })); + BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; })); } } @@ -215,9 +212,11 @@ BOOST_AUTO_TEST_CASE(Union_find_weight_structure) { BOOST_CHECK_EQUAL(total_weight, total_comp_memory); for (auto &name : names) { - BOOST_CHECK(std::any_of(components_n_weights.cbegin(), components_n_weights.cend(), + BOOST_CHECK(std::any_of(components_n_weights.cbegin(), + components_n_weights.cend(), [name](std::pair, unsigned> comp_pair) { - return std::any_of(comp_pair.first.cbegin(), comp_pair.first.cend(), + return std::any_of(comp_pair.first.cbegin(), + comp_pair.first.cend(), [name](std::string other_name) { return name == other_name; }); })); } @@ -263,36 +262,35 @@ BOOST_AUTO_TEST_CASE(Union_find_structure_weight_comp_count) { BOOST_CHECK_NE(test_universe.find_origin_by_name("a"), test_universe.find_origin_by_name("e")); BOOST_CHECK_NE(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("d")); - std::vector, unsigned>> comp_n_weights = - test_universe.get_connected_components_and_weights(); + std::vector, unsigned>> comp_n_weights + = test_universe.get_connected_components_and_weights(); BOOST_CHECK(comp_n_weights.size() == 2); BOOST_CHECK(comp_n_weights.size() == test_universe.get_number_of_connected_components()); BOOST_CHECK(comp_n_weights[0].first.size() == 3); BOOST_CHECK(comp_n_weights[1].first.size() == 3); - BOOST_CHECK((comp_n_weights[0].second == 4 && comp_n_weights[1].second == 5) || - (comp_n_weights[0].second == 5 && comp_n_weights[1].second == 4)); + BOOST_CHECK((comp_n_weights[0].second == 4 && comp_n_weights[1].second == 5) + || (comp_n_weights[0].second == 5 && comp_n_weights[1].second == 4)); - std::vector, unsigned, unsigned>> comp_n_weight_n_memory = - test_universe.get_connected_components_weights_and_memories(); + std::vector, unsigned, unsigned>> comp_n_weight_n_memory + = test_universe.get_connected_components_weights_and_memories(); BOOST_CHECK(comp_n_weight_n_memory.size() == 2); BOOST_CHECK(comp_n_weight_n_memory.size() == test_universe.get_number_of_connected_components()); BOOST_CHECK(std::get<0>(comp_n_weight_n_memory[0]).size() == 3); BOOST_CHECK(std::get<0>(comp_n_weight_n_memory[1]).size() == 3); - BOOST_CHECK((std::get<1>(comp_n_weight_n_memory[0]) == 4 && std::get<1>(comp_n_weight_n_memory[1]) == 5) || - (std::get<1>(comp_n_weight_n_memory[0]) == 5 && std::get<1>(comp_n_weight_n_memory[1]) == 4)); - BOOST_CHECK((std::get<2>(comp_n_weight_n_memory[0]) == 4 && std::get<2>(comp_n_weight_n_memory[1]) == 5) || - (std::get<2>(comp_n_weight_n_memory[0]) == 5 && std::get<2>(comp_n_weight_n_memory[1]) == 4)); + BOOST_CHECK((std::get<1>(comp_n_weight_n_memory[0]) == 4 && std::get<1>(comp_n_weight_n_memory[1]) == 5) + || (std::get<1>(comp_n_weight_n_memory[0]) == 5 && std::get<1>(comp_n_weight_n_memory[1]) == 4)); + BOOST_CHECK((std::get<2>(comp_n_weight_n_memory[0]) == 4 && std::get<2>(comp_n_weight_n_memory[1]) == 5) + || (std::get<2>(comp_n_weight_n_memory[0]) == 5 && std::get<2>(comp_n_weight_n_memory[1]) == 4)); - std::vector, unsigned>> components_n_weights = - test_universe.get_connected_components_and_weights(); + std::vector, unsigned>> components_n_weights + = test_universe.get_connected_components_and_weights(); unsigned total_comp_weights = 0; unsigned total_elements = 0; for (auto &[comp, wt] : components_n_weights) { total_comp_weights += wt; total_elements += static_cast(comp.size()); for (auto &name : comp) { - BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), - [name](std::string other_name) { return name == other_name; })); + BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; })); } } @@ -304,9 +302,11 @@ BOOST_AUTO_TEST_CASE(Union_find_structure_weight_comp_count) { BOOST_CHECK_EQUAL(total_elements, names.size()); BOOST_CHECK_EQUAL(total_weight, total_comp_weights); for (auto &name : names) { - BOOST_CHECK(std::any_of(components_n_weights.cbegin(), components_n_weights.cend(), + BOOST_CHECK(std::any_of(components_n_weights.cbegin(), + components_n_weights.cend(), [name](std::pair, unsigned> comp_pair) { - return std::any_of(comp_pair.first.cbegin(), comp_pair.first.cend(), + return std::any_of(comp_pair.first.cbegin(), + comp_pair.first.cend(), [name](std::string other_name) { return name == other_name; }); })); } @@ -366,16 +366,15 @@ BOOST_AUTO_TEST_CASE(Union_find_structure_weight_chains_comp_count) { BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("e"), test_universe.find_origin_by_name("h")); BOOST_CHECK_EQUAL(test_universe.find_origin_by_name("b"), test_universe.find_origin_by_name("i")); - std::vector, unsigned>> components_n_weights = - test_universe.get_connected_components_and_weights(); + std::vector, unsigned>> components_n_weights + = test_universe.get_connected_components_and_weights(); unsigned total_comp_weights = 0; unsigned total_elements = 0; for (auto &[comp, wt] : components_n_weights) { total_comp_weights += wt; total_elements += static_cast(comp.size()); for (auto &name : comp) { - BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), - [name](std::string other_name) { return name == other_name; })); + BOOST_CHECK(std::any_of(names.cbegin(), names.cend(), [name](std::string other_name) { return name == other_name; })); } } @@ -387,9 +386,11 @@ BOOST_AUTO_TEST_CASE(Union_find_structure_weight_chains_comp_count) { BOOST_CHECK_EQUAL(total_elements, names.size()); BOOST_CHECK_EQUAL(total_weight, total_comp_weights); for (auto &name : names) { - BOOST_CHECK(std::any_of(components_n_weights.cbegin(), components_n_weights.cend(), + BOOST_CHECK(std::any_of(components_n_weights.cbegin(), + components_n_weights.cend(), [name](std::pair, unsigned> comp_pair) { - return std::any_of(comp_pair.first.cbegin(), comp_pair.first.cend(), + return std::any_of(comp_pair.first.cbegin(), + comp_pair.first.cend(), [name](std::string other_name) { return name == other_name; }); })); } diff --git a/tests/wavefront_component_divider.cpp b/tests/wavefront_component_divider.cpp index e2e3be52..37cd28fa 100644 --- a/tests/wavefront_component_divider.cpp +++ b/tests/wavefront_component_divider.cpp @@ -16,15 +16,15 @@ limitations under the License. @author Toni Boehnlein, Benjamin Lozes, Pal Andras Papp, Raphael S. Steiner */ - #define BOOST_TEST_MODULE SequenceSplitterTest #include -#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" -#include "osp/dag_divider/wavefront_divider/SequenceSplitter.hpp" -#include "osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp" -#include "osp/dag_divider/wavefront_divider/SequenceGenerator.hpp" -#include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp" + #include "osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp" +#include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp" +#include "osp/dag_divider/wavefront_divider/SequenceGenerator.hpp" +#include "osp/dag_divider/wavefront_divider/SequenceSplitter.hpp" +#include "osp/dag_divider/wavefront_divider/WavefrontStatisticsCollector.hpp" +#include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" BOOST_AUTO_TEST_CASE(VarianceSplitterTest) { osp::VarianceSplitter splitter(0.8, 0.1); @@ -76,7 +76,7 @@ BOOST_AUTO_TEST_CASE(LargestStepSplitterTest) { std::vector splits3 = splitter.split(seq3); std::vector expected3 = {3}; BOOST_CHECK_EQUAL_COLLECTIONS(splits3.begin(), splits3.end(), expected3.begin(), expected3.end()); - + // Test case 4: Sequence too short std::vector seq4 = {1, 10}; std::vector splits4 = splitter.split(seq4); @@ -114,7 +114,7 @@ BOOST_AUTO_TEST_CASE(ThresholdScanSplitterTest) { std::vector seq4 = {1, 2, 3, 4, 5}; std::vector splits4 = splitter.split(seq4); BOOST_CHECK(splits4.empty()); - + // Test case 5: Empty sequence std::vector seq5 = {}; std::vector splits5 = splitter.split(seq5); @@ -125,7 +125,6 @@ using graph = osp::computational_dag_edge_idx_vector_impl_def_int_t; using VertexType = graph::vertex_idx; BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) { - graph dag; const auto v1 = dag.add_vertex(2, 1, 9); const auto v2 = dag.add_vertex(3, 1, 8); @@ -133,7 +132,7 @@ BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) { const auto v4 = dag.add_vertex(5, 1, 6); const auto v5 = dag.add_vertex(6, 1, 5); const auto v6 = dag.add_vertex(7, 1, 4); - const auto v7 = dag.add_vertex(8, 1, 3); // Note: v7 is not connected in the example + const auto v7 = dag.add_vertex(8, 1, 3); // Note: v7 is not connected in the example const auto v8 = dag.add_vertex(9, 1, 2); dag.add_edge(v1, v2); @@ -148,11 +147,11 @@ BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) { // Manually defined level sets for this DAG const std::vector> level_sets = { - {v1}, // Level 0 + {v1}, // Level 0 {v2, v3, v4}, // Level 1 - {v5, v6}, // Level 2 - {v8}, // Level 3 - {v7} // Level 4 (isolated vertex) + {v5, v6}, // Level 2 + {v8}, // Level 3 + {v7} // Level 4 (isolated vertex) }; osp::WavefrontStatisticsCollector collector(dag, level_sets); @@ -168,43 +167,42 @@ BOOST_AUTO_TEST_CASE(ForwardAndBackwardPassTest) { // Level 1 BOOST_CHECK_EQUAL(forward_stats[1].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(forward_stats[1].connected_components_weights[0], 2 + 3 + 4 + 5); // v1,v2,v3,v4 + BOOST_CHECK_EQUAL(forward_stats[1].connected_components_weights[0], 2 + 3 + 4 + 5); // v1,v2,v3,v4 BOOST_CHECK_EQUAL(forward_stats[1].connected_components_memories[0], 9 + 8 + 7 + 6); // Level 2 BOOST_CHECK_EQUAL(forward_stats[2].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(forward_stats[2].connected_components_weights[0], 14 + 6 + 7); // v1-v6 + BOOST_CHECK_EQUAL(forward_stats[2].connected_components_weights[0], 14 + 6 + 7); // v1-v6 BOOST_CHECK_EQUAL(forward_stats[2].connected_components_memories[0], 30 + 5 + 4); // Level 3 BOOST_CHECK_EQUAL(forward_stats[3].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(forward_stats[3].connected_components_weights[0], 27 + 9); // v1-v6, v8 + BOOST_CHECK_EQUAL(forward_stats[3].connected_components_weights[0], 27 + 9); // v1-v6, v8 BOOST_CHECK_EQUAL(forward_stats[3].connected_components_memories[0], 39 + 2); // Level 4 (isolated vertex shows up as a new component) BOOST_CHECK_EQUAL(forward_stats[4].connected_components_vertices.size(), 2); - // --- Test Backward Pass --- auto backward_stats = collector.compute_backward(); BOOST_REQUIRE_EQUAL(backward_stats.size(), 5); // Level 4 BOOST_CHECK_EQUAL(backward_stats[4].connected_components_vertices.size(), 1); - BOOST_CHECK_EQUAL(backward_stats[4].connected_components_weights[0], 8); // v7 + BOOST_CHECK_EQUAL(backward_stats[4].connected_components_weights[0], 8); // v7 BOOST_CHECK_EQUAL(backward_stats[4].connected_components_memories[0], 3); // Level 3 - BOOST_CHECK_EQUAL(backward_stats[3].connected_components_vertices.size(), 2); // {v8}, {v7} + BOOST_CHECK_EQUAL(backward_stats[3].connected_components_vertices.size(), 2); // {v8}, {v7} // Level 2 - BOOST_CHECK_EQUAL(backward_stats[2].connected_components_vertices.size(), 3); // {v5,v8}, {v6}, {v7} + BOOST_CHECK_EQUAL(backward_stats[2].connected_components_vertices.size(), 3); // {v5,v8}, {v6}, {v7} // Level 1 - BOOST_CHECK_EQUAL(backward_stats[1].connected_components_vertices.size(), 2); // {v2,v3,v4,v5,v6,v8}, {v7} + BOOST_CHECK_EQUAL(backward_stats[1].connected_components_vertices.size(), 2); // {v2,v3,v4,v5,v6,v8}, {v7} // Level 0 - BOOST_CHECK_EQUAL(backward_stats[0].connected_components_vertices.size(), 2); // {v1-v6,v8}, {v7} + BOOST_CHECK_EQUAL(backward_stats[0].connected_components_vertices.size(), 2); // {v1-v6,v8}, {v7} } BOOST_AUTO_TEST_CASE(SequenceGenerationTest) { @@ -216,7 +214,7 @@ BOOST_AUTO_TEST_CASE(SequenceGenerationTest) { const auto v4 = dag.add_vertex(5, 1, 6); const auto v5 = dag.add_vertex(6, 1, 5); const auto v6 = dag.add_vertex(7, 1, 4); - const auto v7 = dag.add_vertex(8, 1, 3); // Isolated vertex + const auto v7 = dag.add_vertex(8, 1, 3); // Isolated vertex const auto v8 = dag.add_vertex(9, 1, 2); dag.add_edge(v1, v2); @@ -230,7 +228,11 @@ BOOST_AUTO_TEST_CASE(SequenceGenerationTest) { dag.add_edge(v4, v8); const std::vector> level_sets = { - {v1}, {v2, v3, v4}, {v5, v6}, {v8}, {v7} + {v1}, + {v2, v3, v4}, + {v5, v6}, + {v8}, + {v7} }; osp::SequenceGenerator generator(dag, level_sets); @@ -238,12 +240,12 @@ BOOST_AUTO_TEST_CASE(SequenceGenerationTest) { // --- Test Component Count --- auto component_seq = generator.generate(osp::SequenceMetric::COMPONENT_COUNT); std::vector expected_components = {1.0, 1.0, 1.0, 1.0, 2.0}; - BOOST_CHECK_EQUAL_COLLECTIONS(component_seq.begin(), component_seq.end(), - expected_components.begin(), expected_components.end()); + BOOST_CHECK_EQUAL_COLLECTIONS( + component_seq.begin(), component_seq.end(), expected_components.begin(), expected_components.end()); // --- Test Available Parallelism --- auto parallelism_seq = generator.generate(osp::SequenceMetric::AVAILABLE_PARALLELISM); - + // Manual calculation for expected values: // L0: 2 / 1 = 2 // L1: (2 + 3+4+5) / 2 = 14 / 2 = 7 @@ -276,9 +278,9 @@ struct TestFixture { const auto v4 = dag.add_vertex(5, 1, 6); const auto v5 = dag.add_vertex(6, 1, 5); const auto v6 = dag.add_vertex(7, 1, 4); - const auto v7 = dag.add_vertex(8, 1, 3); // Isolated vertex + const auto v7 = dag.add_vertex(8, 1, 3); // Isolated vertex const auto v8 = dag.add_vertex(9, 1, 2); - + vertices = {v1, v2, v3, v4, v5, v6, v7, v8}; dag.add_edge(v1, v2); @@ -298,7 +300,7 @@ BOOST_FIXTURE_TEST_SUITE(ScanWavefrontDividerTestSuite, TestFixture) BOOST_AUTO_TEST_CASE(LargestStepDivisionTest) { osp::ScanWavefrontDivider divider; divider.set_metric(osp::SequenceMetric::AVAILABLE_PARALLELISM); - divider.use_largest_step_splitter(0.9,1); + divider.use_largest_step_splitter(0.9, 1); auto sections = divider.divide(dag); @@ -310,14 +312,14 @@ BOOST_AUTO_TEST_CASE(LargestStepDivisionTest) { // Section 2: levels 1, 2, 3. The rest of the main component. BOOST_REQUIRE_EQUAL(sections[1].size(), 1); - BOOST_CHECK_EQUAL(sections[1][0].size(), 6); // v2,v3,v4,v5,v6,v8 + BOOST_CHECK_EQUAL(sections[1][0].size(), 6); // v2,v3,v4,v5,v6,v8 } BOOST_AUTO_TEST_CASE(ThresholdScanDivisionTest) { osp::ScanWavefrontDivider divider; divider.set_metric(osp::SequenceMetric::AVAILABLE_PARALLELISM); divider.use_threshold_scan_splitter(2.0, 11.5); - + auto sections = divider.divide(dag); // A cut is expected when the sequence crosses 11.5 (at level 2) and crosses back (at level 3) @@ -333,17 +335,16 @@ BOOST_AUTO_TEST_CASE(ThresholdScanDivisionTest) { BOOST_REQUIRE_EQUAL(sections[2].size(), 1); } - BOOST_AUTO_TEST_CASE(NoCutDivisionTest) { osp::ScanWavefrontDivider divider; divider.set_metric(osp::SequenceMetric::COMPONENT_COUNT); - divider.use_largest_step_splitter(2.0, 2); + divider.use_largest_step_splitter(2.0, 2); auto sections = divider.divide(dag); // Expecting a single section containing all components BOOST_REQUIRE_EQUAL(sections.size(), 1); - BOOST_REQUIRE_EQUAL(sections[0].size(), 2); // Two final components + BOOST_REQUIRE_EQUAL(sections[0].size(), 2); // Two final components } BOOST_AUTO_TEST_CASE(EmptyGraphTest) { @@ -371,7 +372,7 @@ struct TestFixture_2 { const auto v5 = dag.add_vertex(1, 1, 1); const auto v6 = dag.add_vertex(1, 1, 1); const auto v7 = dag.add_vertex(1, 1, 1); - + vertices = {v1, v2, v3, v4, v5, v6, v7}; dag.add_edge(v1, v3); @@ -399,7 +400,7 @@ struct TestFixture_SimpleMerge { const auto v4 = dag.add_vertex(1, 1, 1); const auto v5 = dag.add_vertex(1, 1, 1); const auto v6 = dag.add_vertex(1, 1, 1); - + dag.add_edge(v0, v2); dag.add_edge(v1, v3); dag.add_edge(v2, v4); @@ -412,9 +413,8 @@ struct TestFixture_SimpleMerge { BOOST_FIXTURE_TEST_SUITE(SimpleMergeTests, TestFixture_SimpleMerge) BOOST_AUTO_TEST_CASE(BasicRecursionTest) { - osp::RecursiveWavefrontDivider divider; - divider.use_largest_step_splitter(0.5, 1); + divider.use_largest_step_splitter(0.5, 1); auto sections = divider.divide(dag); // Expecting a cut after level 2, where component count drops from 2 to 1. @@ -453,8 +453,7 @@ BOOST_AUTO_TEST_CASE(MinSubsequenceLengthTest) { BOOST_AUTO_TEST_CASE(MaxDepthTest) { // Setting max_depth to 0 should prevent any recursion. osp::RecursiveWavefrontDivider divider; - divider.use_largest_step_splitter(0.5, 2) - .set_max_depth(0); + divider.use_largest_step_splitter(0.5, 2).set_max_depth(0); auto sections = divider.divide(dag); BOOST_REQUIRE_EQUAL(sections.size(), 1); @@ -468,8 +467,7 @@ BOOST_AUTO_TEST_CASE(EmptyGraphTest) { BOOST_CHECK(sections.empty()); } -BOOST_AUTO_TEST_SUITE_END() - +BOOST_AUTO_TEST_SUITE_END() // --- Test Fixture 2: A DAG with multiple merge points for deeper recursion --- struct TestFixture_MultiMerge { @@ -479,18 +477,27 @@ struct TestFixture_MultiMerge { // Sequence: {4, 4, 2, 2, 1, 1}. Two significant drops. // L0: 4 comp -> L2: 2 comp (drop of 2) // L2: 2 comp -> L4: 1 comp (drop of 1) - const auto v_l0_1 = dag.add_vertex(1,1,1), v_l0_2 = dag.add_vertex(1,1,1), v_l0_3 = dag.add_vertex(1,1,1), v_l0_4 = dag.add_vertex(1,1,1); - const auto v_l1_1 = dag.add_vertex(1,1,1), v_l1_2 = dag.add_vertex(1,1,1), v_l1_3 = dag.add_vertex(1,1,1), v_l1_4 = dag.add_vertex(1,1,1); - const auto v_l2_1 = dag.add_vertex(1,1,1), v_l2_2 = dag.add_vertex(1,1,1); - const auto v_l3_1 = dag.add_vertex(1,1,1), v_l3_2 = dag.add_vertex(1,1,1); - const auto v_l4_1 = dag.add_vertex(1,1,1); - const auto v_l5_1 = dag.add_vertex(1,1,1); - - dag.add_edge(v_l0_1, v_l1_1); dag.add_edge(v_l0_2, v_l1_2); dag.add_edge(v_l0_3, v_l1_3); dag.add_edge(v_l0_4, v_l1_4); - dag.add_edge(v_l1_1, v_l2_1); dag.add_edge(v_l1_2, v_l2_1); - dag.add_edge(v_l1_3, v_l2_2); dag.add_edge(v_l1_4, v_l2_2); - dag.add_edge(v_l2_1, v_l3_1); dag.add_edge(v_l2_2, v_l3_2); - dag.add_edge(v_l3_1, v_l4_1); dag.add_edge(v_l3_2, v_l4_1); + const auto v_l0_1 = dag.add_vertex(1, 1, 1), v_l0_2 = dag.add_vertex(1, 1, 1), v_l0_3 = dag.add_vertex(1, 1, 1), + v_l0_4 = dag.add_vertex(1, 1, 1); + const auto v_l1_1 = dag.add_vertex(1, 1, 1), v_l1_2 = dag.add_vertex(1, 1, 1), v_l1_3 = dag.add_vertex(1, 1, 1), + v_l1_4 = dag.add_vertex(1, 1, 1); + const auto v_l2_1 = dag.add_vertex(1, 1, 1), v_l2_2 = dag.add_vertex(1, 1, 1); + const auto v_l3_1 = dag.add_vertex(1, 1, 1), v_l3_2 = dag.add_vertex(1, 1, 1); + const auto v_l4_1 = dag.add_vertex(1, 1, 1); + const auto v_l5_1 = dag.add_vertex(1, 1, 1); + + dag.add_edge(v_l0_1, v_l1_1); + dag.add_edge(v_l0_2, v_l1_2); + dag.add_edge(v_l0_3, v_l1_3); + dag.add_edge(v_l0_4, v_l1_4); + dag.add_edge(v_l1_1, v_l2_1); + dag.add_edge(v_l1_2, v_l2_1); + dag.add_edge(v_l1_3, v_l2_2); + dag.add_edge(v_l1_4, v_l2_2); + dag.add_edge(v_l2_1, v_l3_1); + dag.add_edge(v_l2_2, v_l3_2); + dag.add_edge(v_l3_1, v_l4_1); + dag.add_edge(v_l3_2, v_l4_1); dag.add_edge(v_l4_1, v_l5_1); } }; @@ -530,7 +537,6 @@ BOOST_AUTO_TEST_CASE(VarianceSplitterTest) { BOOST_REQUIRE_EQUAL(sections.size(), 3); } +BOOST_AUTO_TEST_SUITE_END() // End of MultiMergeTests -BOOST_AUTO_TEST_SUITE_END() // End of MultiMergeTests - -BOOST_AUTO_TEST_SUITE_END() // End of DagDividerTestSuite \ No newline at end of file +BOOST_AUTO_TEST_SUITE_END() // End of DagDividerTestSuite diff --git a/tests/wavefront_divider.cpp b/tests/wavefront_divider.cpp index 09caf82b..40b48783 100644 --- a/tests/wavefront_divider.cpp +++ b/tests/wavefront_divider.cpp @@ -19,40 +19,36 @@ limitations under the License. #define BOOST_TEST_MODULE wavefront_divider #include +#include "osp/auxiliary/io/dot_graph_file_reader.hpp" +#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" #include "osp/bsp/scheduler/GreedySchedulers/BspLocking.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_comm.hpp" #include "osp/bsp/scheduler/LocalSearch/KernighanLin/kl_total_cut.hpp" -#include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp" -#include "osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp" #include "osp/dag_divider/WavefrontComponentScheduler.hpp" -#include "osp/auxiliary/io/dot_graph_file_reader.hpp" -#include "osp/auxiliary/io/hdag_graph_file_reader.hpp" -#include "test_utils.hpp" - +#include "osp/dag_divider/wavefront_divider/RecursiveWavefrontDivider.hpp" +#include "osp/dag_divider/wavefront_divider/ScanWavefrontDivider.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" - +#include "test_utils.hpp" using namespace osp; std::vector test_graphs_dot() { return {"data/dot/smpl_dot_graph_1.dot"}; } std::vector tiny_spaa_graphs() { - return { - "data/spaa/tiny/instance_bicgstab.hdag", "data/spaa/tiny/instance_CG_N2_K2_nzP0d75.hdag", - "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag", - "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag" - }; + return {"data/spaa/tiny/instance_bicgstab.hdag", + "data/spaa/tiny/instance_CG_N2_K2_nzP0d75.hdag", + "data/spaa/tiny/instance_CG_N3_K1_nzP0d5.hdag", + "data/spaa/tiny/instance_CG_N4_K1_nzP0d35.hdag"}; } -template +template bool check_vertex_maps(const std::vector>>> &maps, const Graph_t &dag) { - std::unordered_set> all_vertices; for (const auto &step : maps) { for (const auto &subgraph : step) { - - for (const auto &vertex : subgraph) + for (const auto &vertex : subgraph) { all_vertices.insert(vertex); + } } } @@ -60,7 +56,6 @@ bool check_vertex_maps(const std::vector filenames_graph = test_graphs_dot(); const auto project_root = get_project_root(); @@ -74,25 +69,22 @@ BOOST_AUTO_TEST_CASE(wavefront_component_divider) { auto status_graph = file_reader::readComputationalDagDotFormat((project_root / filename_graph).string(), graph); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } else { std::cout << "File read:" << filename_graph << std::endl; } - ScanWavefrontDivider wavefront; + ScanWavefrontDivider wavefront; auto maps = wavefront.divide(graph); if (!maps.empty()) { - BOOST_CHECK(check_vertex_maps(maps, graph)); } } } BOOST_AUTO_TEST_CASE(wavefront_component_parallelism_divider) { - std::vector filenames_graph = tiny_spaa_graphs(); const auto project_root = get_project_root(); @@ -106,7 +98,6 @@ BOOST_AUTO_TEST_CASE(wavefront_component_parallelism_divider) { auto status_graph = file_reader::readComputationalDagHyperdagFormatDB((project_root / filename_graph).string(), graph); if (!status_graph) { - std::cout << "Reading files failed." << std::endl; BOOST_CHECK(false); } else { @@ -115,12 +106,11 @@ BOOST_AUTO_TEST_CASE(wavefront_component_parallelism_divider) { ScanWavefrontDivider wavefront; wavefront.set_metric(SequenceMetric::AVAILABLE_PARALLELISM); - wavefront.use_variance_splitter(1.0,1.0,1); + wavefront.use_variance_splitter(1.0, 1.0, 1); auto maps = wavefront.divide(graph); if (!maps.empty()) { - BOOST_CHECK(check_vertex_maps(maps, graph)); } } diff --git a/tests/wavefront_scheduler.cpp b/tests/wavefront_scheduler.cpp index 3e1e2a29..3fb2b203 100644 --- a/tests/wavefront_scheduler.cpp +++ b/tests/wavefront_scheduler.cpp @@ -18,52 +18,51 @@ limitations under the License. #define BOOST_TEST_MODULE AbstractWavefrontSchedulerTest #include + #include "osp/dag_divider/AbstractWavefrontScheduler.hpp" +#include "osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp" #include "osp/dag_divider/WavefrontComponentScheduler.hpp" -#include "osp/dag_divider/IsomorphicWavefrontComponentScheduler.hpp" #include "osp/graph_implementations/adj_list_impl/computational_dag_edge_idx_vector_impl.hpp" using graph_t = osp::computational_dag_edge_idx_vector_impl_def_t; - -template +template class ConcreteWavefrontScheduler : public osp::AbstractWavefrontScheduler { -public: - ConcreteWavefrontScheduler(osp::IDagDivider& div, osp::Scheduler& sched) + public: + ConcreteWavefrontScheduler(osp::IDagDivider &div, osp::Scheduler &sched) : osp::AbstractWavefrontScheduler(div, sched) {} - + // Expose the protected method for testing with the new signature - bool test_distributeProcessors( - unsigned total_processors, - const std::vector& work_weights, - std::vector& allocation) const { + bool test_distributeProcessors(unsigned total_processors, + const std::vector &work_weights, + std::vector &allocation) const { return this->distributeProcessors(total_processors, work_weights, allocation); } // Dummy implementation for the pure virtual method - osp::RETURN_STATUS computeSchedule(osp::BspSchedule&) override { - return osp::RETURN_STATUS::OSP_SUCCESS; - } + osp::RETURN_STATUS computeSchedule(osp::BspSchedule &) override { return osp::RETURN_STATUS::OSP_SUCCESS; } + std::string getScheduleName() const override { return "ConcreteScheduler"; } }; // Mock dependencies for the test struct MockDivider : public osp::IDagDivider { - std::vector>> divide(const graph_t&) override { return {}; } + std::vector>> divide(const graph_t &) override { return {}; } }; + struct MockScheduler : public osp::Scheduler { - osp::RETURN_STATUS computeSchedule(osp::BspSchedule&) override { return osp::RETURN_STATUS::OSP_SUCCESS; } + osp::RETURN_STATUS computeSchedule(osp::BspSchedule &) override { return osp::RETURN_STATUS::OSP_SUCCESS; } + std::string getScheduleName() const override { return "Mock"; } }; - BOOST_AUTO_TEST_SUITE(AbstractWavefrontSchedulerTestSuite) BOOST_AUTO_TEST_CASE(DistributeProcessorsTest) { MockDivider mock_divider; MockScheduler mock_scheduler; ConcreteWavefrontScheduler scheduler(mock_divider, mock_scheduler); - + std::vector allocation; bool starvation_hit; @@ -108,7 +107,7 @@ BOOST_AUTO_TEST_CASE(DistributeProcessorsTest) { std::vector expected6 = {0, 0, 0}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected6.begin(), expected6.end()); BOOST_CHECK(!starvation_hit); - + // Test 7: Inactive components (work is zero) std::vector work7 = {100.0, 0.0, 300.0, 0.0}; starvation_hit = scheduler.test_distributeProcessors(8, work7, allocation); @@ -139,12 +138,10 @@ BOOST_AUTO_TEST_CASE(DistributeProcessorsTest) { std::vector expected10 = {0, 1, 0}; BOOST_CHECK_EQUAL_COLLECTIONS(allocation.begin(), allocation.end(), expected10.begin(), expected10.end()); BOOST_CHECK(starvation_hit); - } BOOST_AUTO_TEST_SUITE_END() - // Mock implementations for dependencies using graph_t = osp::computational_dag_edge_idx_vector_impl_def_t; using VertexType = graph_t::vertex_idx; @@ -152,14 +149,13 @@ using VertexType = graph_t::vertex_idx; // A mock divider that returns a predictable set of sections. struct MockDivider_2 : public osp::IDagDivider { std::vector>> sections_to_return; - std::vector>> divide(const graph_t&) override { - return sections_to_return; - } + + std::vector>> divide(const graph_t &) override { return sections_to_return; } }; // A mock sub-scheduler that returns a simple, predictable schedule. struct MockSubScheduler : public osp::Scheduler { - osp::RETURN_STATUS computeSchedule(osp::BspSchedule& schedule) override { + osp::RETURN_STATUS computeSchedule(osp::BspSchedule &schedule) override { // Assign all tasks to the first processor in a single superstep for (VertexType v = 0; v < schedule.getInstance().getComputationalDag().num_vertices(); ++v) { schedule.setAssignedProcessor(v, 0); @@ -168,6 +164,7 @@ struct MockSubScheduler : public osp::Scheduler { schedule.setNumberOfSupersteps(1); return osp::RETURN_STATUS::OSP_SUCCESS; } + std::string getScheduleName() const override { return "MockSubScheduler"; } }; @@ -180,10 +177,10 @@ struct TestFixture { TestFixture() { // A simple DAG: v0 -> v1, v2 -> v3 // Two components that will be in the same wavefront set. - dag.add_vertex(10, 1, 1); // v0 - dag.add_vertex(20, 1, 1); // v1 - dag.add_vertex(30, 1, 1); // v2 - dag.add_vertex(40, 1, 1); // v3 + dag.add_vertex(10, 1, 1); // v0 + dag.add_vertex(20, 1, 1); // v1 + dag.add_vertex(30, 1, 1); // v2 + dag.add_vertex(40, 1, 1); // v3 dag.add_edge(0, 1); dag.add_edge(2, 3); @@ -214,13 +211,16 @@ BOOST_AUTO_TEST_CASE(BasicSchedulingTest) { BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0); BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1); BOOST_CHECK_EQUAL(schedule.assignedSuperstep(3), 1); - + BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 2); } BOOST_AUTO_TEST_CASE(MultipleSectionsTest) { // Setup the mock divider to return two separate sections - mock_divider.sections_to_return = { {{0},{1}}, {{2}, {3}} }; + mock_divider.sections_to_return = { + {{0}, {1}}, + {{2}, {3}} + }; osp::WavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); osp::BspInstance instance(dag, arch); @@ -228,13 +228,12 @@ BOOST_AUTO_TEST_CASE(MultipleSectionsTest) { auto status = scheduler.computeSchedule(schedule); BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::OSP_SUCCESS); - + BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 3); BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); BOOST_CHECK_EQUAL(schedule.assignedSuperstep(1), 0); - BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 0); BOOST_CHECK_EQUAL(schedule.assignedProcessor(3), 4); BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1); @@ -249,7 +248,10 @@ BOOST_AUTO_TEST_CASE(StarvationReturnsErrorTest) { scarce_arch.setNumberOfProcessors(1); // Setup the mock divider to return one section with two components - mock_divider.sections_to_return = {{{0}, {1}}, {{2, 3}}}; + mock_divider.sections_to_return = { + {{0}, {1}}, + {{2, 3}} + }; osp::WavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); osp::BspInstance instance(dag, scarce_arch); @@ -262,7 +264,6 @@ BOOST_AUTO_TEST_CASE(StarvationReturnsErrorTest) { BOOST_AUTO_TEST_SUITE_END() - // struct TestFixture_2 { // graph_t dag; // osp::BspArchitecture arch; @@ -291,12 +292,12 @@ BOOST_AUTO_TEST_SUITE_END() // auto status = scheduler.computeSchedule(schedule); // BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::OSP_SUCCESS); - + // // Member 1 of iso group {0,1} gets 1 proc (global proc 0) // BOOST_CHECK_EQUAL(schedule.assignedProcessor(0), 0); // BOOST_CHECK_EQUAL(schedule.assignedProcessor(1), 0); // BOOST_CHECK_EQUAL(schedule.assignedSuperstep(0), 0); - + // // Member 2 of iso group {2,3} gets 1 proc (global proc 1) // BOOST_CHECK_EQUAL(schedule.assignedProcessor(2), 1); // BOOST_CHECK_EQUAL(schedule.assignedProcessor(3), 1); @@ -310,7 +311,6 @@ BOOST_AUTO_TEST_SUITE_END() // BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 1); // } - // BOOST_AUTO_TEST_CASE(IndivisibleScarcitySchedulingTest) { // // 2 isomorphic components, 1 unique. 3 processors available. // arch.setNumberOfProcessors(3); @@ -329,7 +329,7 @@ BOOST_AUTO_TEST_SUITE_END() // BOOST_CHECK_EQUAL(schedule.assignedSuperstep(2), 1); // Sequential // // Unique group scheduled on its 2 processors (global procs 1, 2) -// BOOST_CHECK_EQUAL(schedule.assignedProcessor(4), 1); +// BOOST_CHECK_EQUAL(schedule.assignedProcessor(4), 1); // BOOST_CHECK_EQUAL(schedule.assignedSuperstep(4), 0); // BOOST_CHECK_EQUAL(schedule.numberOfSupersteps(), 2); @@ -338,7 +338,7 @@ BOOST_AUTO_TEST_SUITE_END() // BOOST_AUTO_TEST_CASE(StarvationReturnsErrorTest) { // // IsomorphismGroups will find 2 groups: {{0,1}, {2,3}} and {{4,5}}. // // With only 1 processor, this is a starvation scenario. -// arch.setNumberOfProcessors(1); +// arch.setNumberOfProcessors(1); // mock_divider.sections_to_return = {{{0, 1}, {2, 3}, {4, 5}}}; // osp::IsomorphicWavefrontComponentScheduler scheduler(mock_divider, mock_sub_scheduler); @@ -350,5 +350,4 @@ BOOST_AUTO_TEST_SUITE_END() // BOOST_CHECK_EQUAL(status, osp::RETURN_STATUS::ERROR); // } - // BOOST_AUTO_TEST_SUITE_END()